1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 * Copyright 2012 Milan Jurik. All rights reserved.
25 */
26
27/*
28 * Copyright (c) 2013, Joyent, Inc. All rights reserved.
29 */
30
31/*
32 * fork.c - safe forking for svc.startd
33 *
34 * fork_configd() and fork_sulogin() are related, special cases that handle the
35 * spawning of specific client processes for svc.startd.
36 */
37
38#include <sys/contract/process.h>
39#include <sys/corectl.h>
40#include <sys/ctfs.h>
41#include <sys/stat.h>
42#include <sys/types.h>
43#include <sys/uio.h>
44#include <sys/wait.h>
45#include <assert.h>
46#include <errno.h>
47#include <fcntl.h>
48#include <libcontract.h>
49#include <libcontract_priv.h>
50#include <libscf_priv.h>
51#include <limits.h>
52#include <poll.h>
53#include <port.h>
54#include <signal.h>
55#include <stdarg.h>
56#include <stdio.h>
57#include <stdlib.h>
58#include <string.h>
59#include <unistd.h>
60#include <utmpx.h>
61#include <spawn.h>
62
63#include "manifest_hash.h"
64#include "configd_exit.h"
65#include "protocol.h"
66#include "startd.h"
67
68static	struct	utmpx	*utmpp;	/* pointer for getutxent() */
69
70pid_t
71startd_fork1(int *forkerr)
72{
73	pid_t p;
74
75	/*
76	 * prefork stack
77	 */
78	wait_prefork();
79	utmpx_prefork();
80
81	p = fork1();
82
83	if (p == -1 && forkerr != NULL)
84		*forkerr = errno;
85
86	/*
87	 * postfork stack
88	 */
89	utmpx_postfork();
90	wait_postfork(p);
91
92	return (p);
93}
94
95/*
96 * void fork_mount(char *, char *)
97 *   Run mount(1M) with the given options and mount point.  (mount(1M) has much
98 *   hidden knowledge; it's much less correct to reimplement that logic here to
99 *   save a fork(2)/exec(2) invocation.)
100 */
101int
102fork_mount(char *path, char *opts)
103{
104	pid_t pid;
105	uint_t tries = 0;
106	int status;
107
108	for (pid = fork1(); pid == -1; pid = fork1()) {
109		if (++tries > MAX_MOUNT_RETRIES)
110			return (-1);
111
112		(void) sleep(tries);
113	}
114
115	if (pid != 0) {
116		(void) waitpid(pid, &status, 0);
117
118		/*
119		 * If our mount(1M) invocation exited by peculiar means, or with
120		 * a non-zero status, our mount likelihood is low.
121		 */
122		if (!WIFEXITED(status) ||
123		    WEXITSTATUS(status) != 0)
124			return (-1);
125
126		return (0);
127	}
128
129	(void) execl("/sbin/mount", "mount", "-o", opts, path, NULL);
130
131	return (-1);
132}
133
134/*
135 * pid_t fork_common(...)
136 *   Common routine used by fork_sulogin, fork_emi, and fork_configd to
137 *   fork a process in a contract with the provided terms.  Invokes
138 *   fork_sulogin (with its no-fork argument set) on errors.
139 */
140static pid_t
141fork_common(const char *name, const char *svc_fmri, int retries, ctid_t *ctidp,
142    uint_t inf, uint_t crit, uint_t fatal, uint_t param, uint64_t cookie)
143{
144	uint_t tries = 0;
145	int ctfd, err;
146	pid_t pid;
147
148	/*
149	 * Establish process contract terms.
150	 */
151	if ((ctfd = open64(CTFS_ROOT "/process/template", O_RDWR)) == -1) {
152		fork_sulogin(B_TRUE, "Could not open process contract template "
153		    "for %s: %s\n", name, strerror(errno));
154		/* NOTREACHED */
155	}
156
157	err = ct_tmpl_set_critical(ctfd, crit);
158	err |= ct_pr_tmpl_set_fatal(ctfd, fatal);
159	err |= ct_tmpl_set_informative(ctfd, inf);
160	err |= ct_pr_tmpl_set_param(ctfd, param);
161	err |= ct_tmpl_set_cookie(ctfd, cookie);
162	err |= ct_pr_tmpl_set_svc_fmri(ctfd, svc_fmri);
163	err |= ct_pr_tmpl_set_svc_aux(ctfd, name);
164	if (err) {
165		(void) close(ctfd);
166		fork_sulogin(B_TRUE, "Could not set %s process contract "
167		    "terms\n", name);
168		/* NOTREACHED */
169	}
170
171	if (err = ct_tmpl_activate(ctfd)) {
172		(void) close(ctfd);
173		fork_sulogin(B_TRUE, "Could not activate %s process contract "
174		    "template: %s\n", name, strerror(err));
175		/* NOTREACHED */
176	}
177
178	utmpx_prefork();
179
180	/*
181	 * Attempt to fork "retries" times.
182	 */
183	for (pid = fork1(); pid == -1; pid = fork1()) {
184		if (++tries > retries) {
185			/*
186			 * When we exit the sulogin session, init(1M)
187			 * will restart svc.startd(1M).
188			 */
189			err = errno;
190			(void) ct_tmpl_clear(ctfd);
191			(void) close(ctfd);
192			utmpx_postfork();
193			fork_sulogin(B_TRUE, "Could not fork to start %s: %s\n",
194			    name, strerror(err));
195			/* NOTREACHED */
196		}
197		(void) sleep(tries);
198	}
199
200	utmpx_postfork();
201
202	/*
203	 * Clean up, return pid and ctid.
204	 */
205	if (pid != 0 && (errno = contract_latest(ctidp)) != 0)
206		uu_die("Could not get new contract id for %s\n", name);
207	(void) ct_tmpl_clear(ctfd);
208	(void) close(ctfd);
209
210	return (pid);
211}
212
213/*
214 * void fork_sulogin(boolean_t, const char *, ...)
215 *   When we are invoked with the -s flag from boot (or run into an unfixable
216 *   situation), we run a private copy of sulogin.  When the sulogin session
217 *   is ended, we continue.  This is the last fallback action for system
218 *   maintenance.
219 *
220 *   If immediate is true, fork_sulogin() executes sulogin(1M) directly, without
221 *   forking.
222 *
223 *   Because fork_sulogin() is needed potentially before we daemonize, we leave
224 *   it outside the wait_register() framework.
225 */
226/*PRINTFLIKE2*/
227void
228fork_sulogin(boolean_t immediate, const char *format, ...)
229{
230	va_list args;
231	int fd_console;
232
233	(void) printf("Requesting System Maintenance Mode\n");
234
235	if (!booting_to_single_user)
236		(void) printf("(See /lib/svc/share/README for more "
237		    "information.)\n");
238
239	va_start(args, format);
240	(void) vprintf(format, args);
241	va_end(args);
242
243	if (!immediate) {
244		ctid_t	ctid;
245		pid_t	pid;
246
247		pid = fork_common("sulogin", SVC_SULOGIN_FMRI,
248		    MAX_SULOGIN_RETRIES, &ctid, CT_PR_EV_HWERR, 0,
249		    CT_PR_EV_HWERR, CT_PR_PGRPONLY, SULOGIN_COOKIE);
250
251		if (pid != 0) {
252			(void) waitpid(pid, NULL, 0);
253			contract_abandon(ctid);
254			return;
255		}
256		/* close all inherited fds */
257		closefrom(0);
258	} else {
259		(void) printf("Directly executing sulogin.\n");
260		/*
261		 * Can't call closefrom() in this MT section
262		 * so safely close a minimum set of fds.
263		 */
264		(void) close(STDIN_FILENO);
265		(void) close(STDOUT_FILENO);
266		(void) close(STDERR_FILENO);
267	}
268
269	(void) setpgrp();
270
271	/* open the console for sulogin */
272	if ((fd_console = open("/dev/console", O_RDWR)) >= 0) {
273		if (fd_console != STDIN_FILENO)
274			while (dup2(fd_console, STDIN_FILENO) < 0 &&
275			    errno == EINTR)
276				;
277		if (fd_console != STDOUT_FILENO)
278			while (dup2(fd_console, STDOUT_FILENO) < 0 &&
279			    errno == EINTR)
280				;
281		if (fd_console != STDERR_FILENO)
282			while (dup2(fd_console, STDERR_FILENO) < 0 &&
283			    errno == EINTR)
284				;
285		if (fd_console > STDERR_FILENO)
286			(void) close(fd_console);
287	}
288
289	setutxent();
290	while ((utmpp = getutxent()) != NULL) {
291		if (strcmp(utmpp->ut_user, "LOGIN") != 0) {
292			if (strcmp(utmpp->ut_line, "console") == 0) {
293				(void) kill(utmpp->ut_pid, 9);
294				break;
295			}
296		}
297	}
298
299	(void) execl("/sbin/sulogin", "sulogin", NULL);
300
301	uu_warn("Could not exec() sulogin");
302
303	exit(1);
304}
305
306#define	CONFIGD_PATH	"/lib/svc/bin/svc.configd"
307
308/*
309 * void fork_configd(int status)
310 *   We are interested in exit events (since the parent's exiting means configd
311 *   is ready to run and since the child's exiting indicates an error case) and
312 *   in empty events.  This means we have a unique template for initiating
313 *   configd.
314 */
315void
316fork_configd(int exitstatus)
317{
318	pid_t pid;
319	ctid_t ctid = -1;
320	int err;
321	char path[PATH_MAX];
322
323	/*
324	 * Checking the existatus for the potential failure of the
325	 * daemonized svc.configd.  If this is not the first time
326	 * through, but a call from the svc.configd monitoring thread
327	 * after a failure this is the status that is expected.  Other
328	 * failures are exposed during initialization or are fixed
329	 * by a restart (e.g door closings).
330	 *
331	 * If this is on-disk database corruption it will also be
332	 * caught by a restart but could be cleared before the restart.
333	 *
334	 * Or this could be internal database corruption due to a
335	 * rogue service that needs to be cleared before restart.
336	 */
337	if (WEXITSTATUS(exitstatus) == CONFIGD_EXIT_DATABASE_BAD) {
338		fork_sulogin(B_FALSE, "svc.configd exited with database "
339		    "corrupt error after initialization of the repository\n");
340	}
341
342retry:
343	log_framework(LOG_DEBUG, "fork_configd trying to start svc.configd\n");
344
345	/*
346	 * If we're retrying, we will have an old contract lying around
347	 * from the failure.  Since we're going to be creating a new
348	 * contract shortly, we abandon the old one now.
349	 */
350	if (ctid != -1)
351		contract_abandon(ctid);
352	ctid = -1;
353
354	pid = fork_common("svc.configd", SCF_SERVICE_CONFIGD,
355	    MAX_CONFIGD_RETRIES, &ctid, 0, CT_PR_EV_EXIT, 0,
356	    CT_PR_INHERIT | CT_PR_REGENT, CONFIGD_COOKIE);
357
358	if (pid != 0) {
359		int exitstatus;
360
361		st->st_configd_pid = pid;
362
363		if (waitpid(pid, &exitstatus, 0) == -1) {
364			fork_sulogin(B_FALSE, "waitpid on svc.configd "
365			    "failed: %s\n", strerror(errno));
366		} else if (WIFEXITED(exitstatus)) {
367			char *errstr;
368
369			/*
370			 * Examine exitstatus.  This will eventually get more
371			 * complicated, as we will want to teach startd how to
372			 * invoke configd with alternate repositories, etc.
373			 *
374			 * Note that exec(2) failure results in an exit status
375			 * of 1, resulting in the default clause below.
376			 */
377
378			/*
379			 * Assign readable strings to cases we don't handle, or
380			 * have error outcomes that cannot be eliminated.
381			 */
382			switch (WEXITSTATUS(exitstatus)) {
383			case CONFIGD_EXIT_BAD_ARGS:
384				errstr = "bad arguments";
385				break;
386
387			case CONFIGD_EXIT_DATABASE_BAD:
388				errstr = "database corrupt";
389				break;
390
391			case CONFIGD_EXIT_DATABASE_LOCKED:
392				errstr = "database locked";
393				break;
394			case CONFIGD_EXIT_INIT_FAILED:
395				errstr = "initialization failure";
396				break;
397			case CONFIGD_EXIT_DOOR_INIT_FAILED:
398				errstr = "door initialization failure";
399				break;
400			case CONFIGD_EXIT_DATABASE_INIT_FAILED:
401				errstr = "database initialization failure";
402				break;
403			case CONFIGD_EXIT_NO_THREADS:
404				errstr = "no threads available";
405				break;
406			case CONFIGD_EXIT_LOST_MAIN_DOOR:
407				errstr = "lost door server attachment";
408				break;
409			case 1:
410				errstr = "execution failure";
411				break;
412			default:
413				errstr = "unknown error";
414				break;
415			}
416
417			/*
418			 * Remedial actions for various configd failures.
419			 */
420			switch (WEXITSTATUS(exitstatus)) {
421			case CONFIGD_EXIT_OKAY:
422				break;
423
424			case CONFIGD_EXIT_DATABASE_LOCKED:
425				/* attempt remount of / read-write */
426				if (fs_is_read_only("/", NULL) == 1) {
427					if (fs_remount("/") == -1)
428						fork_sulogin(B_FALSE,
429						    "remount of root "
430						    "filesystem failed\n");
431
432					goto retry;
433				}
434				break;
435
436			default:
437				fork_sulogin(B_FALSE, "svc.configd exited "
438				    "with status %d (%s)\n",
439				    WEXITSTATUS(exitstatus), errstr);
440				goto retry;
441			}
442		} else if (WIFSIGNALED(exitstatus)) {
443			char signame[SIG2STR_MAX];
444
445			if (sig2str(WTERMSIG(exitstatus), signame))
446				(void) snprintf(signame, SIG2STR_MAX,
447				    "signum %d", WTERMSIG(exitstatus));
448
449			fork_sulogin(B_FALSE, "svc.configd signalled:"
450			    " %s\n", signame);
451
452			goto retry;
453		} else {
454			fork_sulogin(B_FALSE, "svc.configd non-exit "
455			    "condition: 0x%x\n", exitstatus);
456
457			goto retry;
458		}
459
460		/*
461		 * Announce that we have a valid svc.configd status.
462		 */
463		MUTEX_LOCK(&st->st_configd_live_lock);
464		st->st_configd_lives = 1;
465		err = pthread_cond_broadcast(&st->st_configd_live_cv);
466		assert(err == 0);
467		MUTEX_UNLOCK(&st->st_configd_live_lock);
468
469		log_framework(LOG_DEBUG, "fork_configd broadcasts configd is "
470		    "live\n");
471		return;
472	}
473
474	/*
475	 * Set our per-process core file path to leave core files in
476	 * /etc/svc/volatile directory, named after the PID to aid in debugging.
477	 */
478	(void) snprintf(path, sizeof (path),
479	    "/etc/svc/volatile/core.configd.%%p");
480
481	(void) core_set_process_path(path, strlen(path) + 1, getpid());
482
483	log_framework(LOG_DEBUG, "executing svc.configd\n");
484
485	(void) execl(CONFIGD_PATH, CONFIGD_PATH, NULL);
486
487	/*
488	 * Status code is used above to identify configd exec failure.
489	 */
490	exit(1);
491}
492
493void *
494fork_configd_thread(void *vctid)
495{
496	int fd, err;
497	ctid_t configd_ctid = (ctid_t)vctid;
498
499	if (configd_ctid == -1) {
500		log_framework(LOG_DEBUG,
501		    "fork_configd_thread starting svc.configd\n");
502		fork_configd(0);
503	} else {
504		/*
505		 * configd_ctid is known:  we broadcast and continue.
506		 * test contract for appropriate state by verifying that
507		 * there is one or more processes within it?
508		 */
509		log_framework(LOG_DEBUG,
510		    "fork_configd_thread accepting svc.configd with CTID %ld\n",
511		    configd_ctid);
512		MUTEX_LOCK(&st->st_configd_live_lock);
513		st->st_configd_lives = 1;
514		(void) pthread_cond_broadcast(&st->st_configd_live_cv);
515		MUTEX_UNLOCK(&st->st_configd_live_lock);
516	}
517
518	fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
519	if (fd == -1)
520		uu_die("process bundle open failed");
521
522	/*
523	 * Make sure we get all events (including those generated by configd
524	 * before this thread was started).
525	 */
526	err = ct_event_reset(fd);
527	assert(err == 0);
528
529	for (;;) {
530		int efd, sfd;
531		ct_evthdl_t ev;
532		uint32_t type;
533		ctevid_t evid;
534		ct_stathdl_t status;
535		ctid_t ctid;
536		uint64_t cookie;
537		pid_t pid;
538
539		if (err = ct_event_read_critical(fd, &ev)) {
540			assert(err != EINVAL && err != EAGAIN);
541			log_error(LOG_WARNING,
542			    "Error reading next contract event: %s",
543			    strerror(err));
544			continue;
545		}
546
547		evid = ct_event_get_evid(ev);
548		ctid = ct_event_get_ctid(ev);
549		type = ct_event_get_type(ev);
550
551		/* Fetch cookie. */
552		sfd = contract_open(ctid, "process", "status", O_RDONLY);
553		if (sfd < 0) {
554			ct_event_free(ev);
555			continue;
556		}
557
558		if (err = ct_status_read(sfd, CTD_COMMON, &status)) {
559			log_framework(LOG_WARNING, "Could not get status for "
560			    "contract %ld: %s\n", ctid, strerror(err));
561
562			ct_event_free(ev);
563			startd_close(sfd);
564			continue;
565		}
566
567		cookie = ct_status_get_cookie(status);
568
569		ct_status_free(status);
570
571		startd_close(sfd);
572
573		/*
574		 * Don't process events from contracts we aren't interested in.
575		 */
576		if (cookie != CONFIGD_COOKIE) {
577			ct_event_free(ev);
578			continue;
579		}
580
581		if (type == CT_PR_EV_EXIT) {
582			int exitstatus;
583
584			(void) ct_pr_event_get_pid(ev, &pid);
585			(void) ct_pr_event_get_exitstatus(ev,
586			    &exitstatus);
587
588			if (st->st_configd_pid != pid) {
589				/*
590				 * This is the child exiting, so we
591				 * abandon the contract and restart
592				 * configd.
593				 */
594				contract_abandon(ctid);
595				fork_configd(exitstatus);
596			}
597		}
598
599		efd = contract_open(ctid, "process", "ctl", O_WRONLY);
600		if (efd != -1) {
601			(void) ct_ctl_ack(efd, evid);
602			startd_close(efd);
603		}
604
605		ct_event_free(ev);
606
607	}
608
609	/*NOTREACHED*/
610	return (NULL);
611}
612
613void
614fork_rc_script(char rl, const char *arg, boolean_t wait)
615{
616	pid_t pid;
617	int tmpl, err, stat;
618	char path[20] = "/sbin/rc.", log[20] = "rc..log", timebuf[20];
619	time_t now;
620	struct tm ltime;
621	size_t sz;
622	char *pathenv;
623	char **nenv;
624
625	path[8] = rl;
626
627	tmpl = open64(CTFS_ROOT "/process/template", O_RDWR);
628	if (tmpl >= 0) {
629		err = ct_tmpl_set_critical(tmpl, 0);
630		assert(err == 0);
631
632		err = ct_tmpl_set_informative(tmpl, 0);
633		assert(err == 0);
634
635		err = ct_pr_tmpl_set_fatal(tmpl, 0);
636		assert(err == 0);
637
638		err = ct_tmpl_activate(tmpl);
639		assert(err == 0);
640
641		err = close(tmpl);
642		assert(err == 0);
643	} else {
644		uu_warn("Could not create contract template for %s.\n", path);
645	}
646
647	pid = startd_fork1(NULL);
648	if (pid < 0) {
649		return;
650	} else if (pid != 0) {
651		/* parent */
652		if (wait) {
653			do
654				err = waitpid(pid, &stat, 0);
655			while (err != 0 && errno == EINTR)
656				;
657
658			if (!WIFEXITED(stat)) {
659				log_framework(LOG_INFO,
660				    "%s terminated with waitpid() status %d.\n",
661				    path, stat);
662			} else if (WEXITSTATUS(stat) != 0) {
663				log_framework(LOG_INFO,
664				    "%s failed with status %d.\n", path,
665				    WEXITSTATUS(stat));
666			}
667		}
668
669		return;
670	}
671
672	/* child */
673
674	log[2] = rl;
675
676	setlog(log);
677
678	now = time(NULL);
679	sz = strftime(timebuf, sizeof (timebuf), "%b %e %T",
680	    localtime_r(&now, &ltime));
681	assert(sz != 0);
682
683	(void) fprintf(stderr, "%s Executing %s %s\n", timebuf, path, arg);
684
685	if (rl == 'S')
686		pathenv = "PATH=/sbin:/usr/sbin:/usr/bin";
687	else
688		pathenv = "PATH=/usr/sbin:/usr/bin";
689
690	nenv = set_smf_env(NULL, 0, pathenv, NULL, NULL);
691
692	(void) execle(path, path, arg, 0, nenv);
693
694	perror("exec");
695	exit(0);
696}
697
698#define	SVCCFG_PATH	"/usr/sbin/svccfg"
699#define	EMI_MFST	"/lib/svc/manifest/system/early-manifest-import.xml"
700#define	EMI_PATH	"/lib/svc/method/manifest-import"
701
702/*
703 * Set Early Manifest Import service's state and log file.
704 */
705static int
706emi_set_state(restarter_instance_state_t state, boolean_t setlog)
707{
708	int r, ret = 1;
709	instance_data_t idata;
710	scf_handle_t *hndl = NULL;
711	scf_instance_t *inst = NULL;
712
713retry:
714	if (hndl == NULL)
715		hndl = libscf_handle_create_bound(SCF_VERSION);
716
717	if (hndl == NULL) {
718		/*
719		 * In the case that we can't bind to the repository
720		 * (which should have been started), we need to allow
721		 * the user into maintenance mode to determine what's
722		 * failed.
723		 */
724		fork_sulogin(B_FALSE, "Unable to bind a new repository"
725		    " handle: %s\n", scf_strerror(scf_error()));
726		goto retry;
727	}
728
729	if (inst == NULL)
730		inst = safe_scf_instance_create(hndl);
731
732	if (scf_handle_decode_fmri(hndl, SCF_INSTANCE_EMI, NULL, NULL,
733	    inst, NULL, NULL, SCF_DECODE_FMRI_EXACT) == -1) {
734		switch (scf_error()) {
735		case SCF_ERROR_NOT_FOUND:
736			goto out;
737
738		case SCF_ERROR_CONNECTION_BROKEN:
739		case SCF_ERROR_NOT_BOUND:
740			libscf_handle_rebind(hndl);
741			goto retry;
742
743		default:
744			fork_sulogin(B_FALSE, "Couldn't fetch %s service: "
745			    "%s\n", SCF_INSTANCE_EMI,
746			    scf_strerror(scf_error()));
747			goto retry;
748		}
749	}
750
751	if (setlog) {
752		(void) libscf_note_method_log(inst, st->st_log_prefix, EMI_LOG);
753		log_framework(LOG_DEBUG,
754		    "Set logfile property for %s\n", SCF_INSTANCE_EMI);
755	}
756
757	idata.i_fmri = SCF_INSTANCE_EMI;
758	idata.i_state =  RESTARTER_STATE_NONE;
759	idata.i_next_state = RESTARTER_STATE_NONE;
760	switch (r = _restarter_commit_states(hndl, &idata, state,
761	    RESTARTER_STATE_NONE, NULL)) {
762	case 0:
763		break;
764
765	case ECONNABORTED:
766		libscf_handle_rebind(hndl);
767		goto retry;
768
769	case ENOMEM:
770	case ENOENT:
771	case EPERM:
772	case EACCES:
773	case EROFS:
774		fork_sulogin(B_FALSE, "Could not set state of "
775		    "%s: %s\n", SCF_INSTANCE_EMI, strerror(r));
776		goto retry;
777
778	case EINVAL:
779	default:
780		bad_error("_restarter_commit_states", r);
781	}
782	ret = 0;
783
784out:
785	scf_instance_destroy(inst);
786	scf_handle_destroy(hndl);
787	return (ret);
788}
789
790/*
791 * It is possible that the early-manifest-import service is disabled.  This
792 * would not be the normal case for Solaris, but it may happen on dedicated
793 * systems.  So this function checks the state of the general/enabled
794 * property for Early Manifest Import.
795 *
796 * It is also possible that the early-manifest-import service does not yet
797 * have a repository representation when this function runs.  This happens
798 * if non-Early Manifest Import system is upgraded to an Early Manifest
799 * Import based system.  Thus, the non-existence of general/enabled is not
800 * an error.
801 *
802 * Returns 1 if Early Manifest Import is disabled and 0 otherwise.
803 */
804static int
805emi_is_disabled()
806{
807	int disabled = 0;
808	int disconnected = 1;
809	int enabled;
810	scf_handle_t *hndl = NULL;
811	scf_instance_t *inst = NULL;
812	uchar_t stored_hash[MHASH_SIZE];
813	char *pname;
814	int hashash, r;
815
816	while (hndl == NULL) {
817		hndl = libscf_handle_create_bound(SCF_VERSION);
818
819		if (hndl == NULL) {
820			/*
821			 * In the case that we can't bind to the repository
822			 * (which should have been started), we need to
823			 * allow the user into maintenance mode to
824			 * determine what's failed.
825			 */
826			fork_sulogin(B_FALSE, "Unable to bind a new repository "
827			    "handle: %s\n", scf_strerror(scf_error()));
828		}
829	}
830
831	while (disconnected) {
832		r = libscf_fmri_get_instance(hndl, SCF_INSTANCE_EMI, &inst);
833		if (r != 0) {
834			switch (r) {
835			case ECONNABORTED:
836				libscf_handle_rebind(hndl);
837				continue;
838
839			case ENOENT:
840				/*
841				 * Early Manifest Import service is not in
842				 * the repository. Check the manifest file
843				 * and service's hash in smf/manifest to
844				 * figure out whether Early Manifest Import
845				 * service was deleted. If Early Manifest Import
846				 * service was deleted, treat that as a disable
847				 * and don't run early import.
848				 */
849
850				if (access(EMI_MFST, F_OK)) {
851					/*
852					 * Manifest isn't found, so service is
853					 * properly removed.
854					 */
855					disabled = 1;
856				} else {
857					/*
858					 * If manifest exists and we have the
859					 * hash, the service was improperly
860					 * deleted, generate a warning and treat
861					 * this as a disable.
862					 */
863
864					if ((pname = mhash_filename_to_propname(
865					    EMI_MFST, B_TRUE)) == NULL) {
866						/*
867						 * Treat failure to get propname
868						 * as a disable.
869						 */
870						disabled = 1;
871						uu_warn("Failed to get propname"
872						    " for %s.\n",
873						    SCF_INSTANCE_EMI);
874					} else {
875						hashash = mhash_retrieve_entry(
876						    hndl, pname,
877						    stored_hash,
878						    NULL) == 0;
879						uu_free(pname);
880
881						if (hashash) {
882							disabled = 1;
883							uu_warn("%s service is "
884							    "deleted \n",
885							    SCF_INSTANCE_EMI);
886						}
887					}
888
889				}
890
891				disconnected = 0;
892				continue;
893
894			default:
895				bad_error("libscf_fmri_get_instance",
896				    scf_error());
897			}
898		}
899		r = libscf_get_basic_instance_data(hndl, inst, SCF_INSTANCE_EMI,
900		    &enabled, NULL, NULL);
901		if (r == 0) {
902			/*
903			 * enabled can be returned as -1, which indicates
904			 * that the enabled property was not found.  To us
905			 * that means that the service was not disabled.
906			 */
907			if (enabled == 0)
908				disabled = 1;
909		} else {
910			switch (r) {
911			case ECONNABORTED:
912				libscf_handle_rebind(hndl);
913				continue;
914
915			case ECANCELED:
916			case ENOENT:
917				break;
918			default:
919				bad_error("libscf_get_basic_instance_data", r);
920			}
921		}
922		disconnected = 0;
923	}
924
925out:
926	if (inst != NULL)
927		scf_instance_destroy(inst);
928	scf_handle_destroy(hndl);
929	return (disabled);
930}
931
932void
933fork_emi()
934{
935	pid_t pid;
936	ctid_t ctid = -1;
937	char **envp, **np;
938	char *emipath;
939	char corepath[PATH_MAX];
940	char *svc_state;
941	int setemilog;
942	int sz;
943
944	if (emi_is_disabled()) {
945		log_framework(LOG_NOTICE, "%s is  disabled and will "
946		    "not be run.\n", SCF_INSTANCE_EMI);
947		return;
948	}
949
950	/*
951	 * Early Manifest Import should run only once, at boot. If svc.startd
952	 * is some how restarted, Early Manifest Import  should not run again.
953	 * Use the Early Manifest Import service's state to figure out whether
954	 * Early Manifest Import has successfully completed earlier and bail
955	 * out if it did.
956	 */
957	if (svc_state = smf_get_state(SCF_INSTANCE_EMI)) {
958		if (strcmp(svc_state, SCF_STATE_STRING_ONLINE) == 0) {
959			free(svc_state);
960			return;
961		}
962		free(svc_state);
963	}
964
965	/*
966	 * Attempt to set Early Manifest Import service's state and log file.
967	 * If emi_set_state fails, set log file again in the next call to
968	 * emi_set_state.
969	 */
970	setemilog = emi_set_state(RESTARTER_STATE_OFFLINE, B_TRUE);
971
972	/* Don't go further if /usr isn't available */
973	if (access(SVCCFG_PATH, F_OK)) {
974		log_framework(LOG_NOTICE, "Early Manifest Import is not "
975		    "supported on systems with a separate /usr filesystem.\n");
976		return;
977	}
978
979fork_retry:
980	log_framework(LOG_DEBUG, "Starting Early Manifest Import\n");
981
982	/*
983	 * If we're retrying, we will have an old contract lying around
984	 * from the failure.  Since we're going to be creating a new
985	 * contract shortly, we abandon the old one now.
986	 */
987	if (ctid != -1)
988		contract_abandon(ctid);
989	ctid = -1;
990
991	pid = fork_common(SCF_INSTANCE_EMI, SCF_INSTANCE_EMI,
992	    MAX_EMI_RETRIES, &ctid, 0, 0, 0, 0, EMI_COOKIE);
993
994	if (pid != 0) {
995		int exitstatus;
996
997		if (waitpid(pid, &exitstatus, 0) == -1) {
998			fork_sulogin(B_FALSE, "waitpid on %s failed: "
999			    "%s\n", SCF_INSTANCE_EMI, strerror(errno));
1000		} else if (WIFEXITED(exitstatus)) {
1001			if (WEXITSTATUS(exitstatus)) {
1002				fork_sulogin(B_FALSE, "%s exited with status "
1003				    "%d \n", SCF_INSTANCE_EMI,
1004				    WEXITSTATUS(exitstatus));
1005				goto fork_retry;
1006			}
1007		} else if (WIFSIGNALED(exitstatus)) {
1008			char signame[SIG2STR_MAX];
1009
1010			if (sig2str(WTERMSIG(exitstatus), signame))
1011				(void) snprintf(signame, SIG2STR_MAX,
1012				    "signum %d", WTERMSIG(exitstatus));
1013
1014			fork_sulogin(B_FALSE, "%s signalled: %s\n",
1015			    SCF_INSTANCE_EMI, signame);
1016			goto fork_retry;
1017		} else {
1018			fork_sulogin(B_FALSE, "%s non-exit condition: 0x%x\n",
1019			    SCF_INSTANCE_EMI, exitstatus);
1020			goto fork_retry;
1021		}
1022
1023		log_framework(LOG_DEBUG, "%s completed successfully\n",
1024		    SCF_INSTANCE_EMI);
1025
1026		/*
1027		 * Once Early Manifest Import completed, the Early Manifest
1028		 * Import service must have been imported so set log file and
1029		 * state properties. Since this information is required for
1030		 * late manifest import and common admin operations, failing to
1031		 * set these properties should result in su login so admin can
1032		 * correct the problem.
1033		 */
1034		(void) emi_set_state(RESTARTER_STATE_ONLINE,
1035		    setemilog ? B_TRUE : B_FALSE);
1036
1037		return;
1038	}
1039
1040	/* child */
1041
1042	/*
1043	 * Set our per-process core file path to leave core files in
1044	 * /etc/svc/volatile directory, named after the PID to aid in debugging.
1045	 */
1046	(void) snprintf(corepath, sizeof (corepath),
1047	    "/etc/svc/volatile/core.emi.%%p");
1048	(void) core_set_process_path(corepath, strlen(corepath) + 1, getpid());
1049
1050	/*
1051	 * Similar to running legacy services, we need to manually set
1052	 * log files here and environment variables.
1053	 */
1054	setlog(EMI_LOG);
1055
1056	envp = startd_zalloc(sizeof (char *) * 3);
1057	np = envp;
1058
1059	sz = sizeof ("SMF_FMRI=") + strlen(SCF_INSTANCE_EMI);
1060	*np = startd_zalloc(sz);
1061	(void) strlcpy(*np, "SMF_FMRI=", sz);
1062	(void) strncat(*np, SCF_INSTANCE_EMI, sz);
1063	np++;
1064
1065	emipath = getenv("PATH");
1066	if (emipath == NULL)
1067		emipath = strdup("/usr/sbin:/usr/bin");
1068
1069	sz = sizeof ("PATH=") + strlen(emipath);
1070	*np = startd_zalloc(sz);
1071	(void) strlcpy(*np, "PATH=", sz);
1072	(void) strncat(*np, emipath, sz);
1073
1074	log_framework(LOG_DEBUG, "executing Early Manifest Import\n");
1075	(void) execle(EMI_PATH, EMI_PATH, NULL, envp);
1076
1077	/*
1078	 * Status code is used above to identify Early Manifest Import
1079	 * exec failure.
1080	 */
1081	exit(1);
1082}
1083
1084extern char **environ;
1085
1086/*
1087 * A local variation on system(3c) which accepts a timeout argument.  This
1088 * allows us to better ensure that the system will actually shut down.
1089 *
1090 * gracetime specifies an amount of time in seconds which the routine must wait
1091 * after the command exits, to allow for asynchronous effects (like sent
1092 * signals) to take effect.  This can be zero.
1093 */
1094void
1095fork_with_timeout(const char *cmd, uint_t gracetime, uint_t timeout)
1096{
1097	int err = 0;
1098	pid_t pid;
1099	char *argv[4];
1100	posix_spawnattr_t attr;
1101	posix_spawn_file_actions_t factions;
1102
1103	sigset_t mask, savemask;
1104	uint_t msec_timeout;
1105	uint_t msec_spent = 0;
1106	uint_t msec_gracetime;
1107	int status;
1108
1109	msec_timeout = timeout * 1000;
1110	msec_gracetime = gracetime * 1000;
1111
1112	/*
1113	 * See also system(3c) in libc.  This is very similar, except
1114	 * that we avoid some unneeded complexity.
1115	 */
1116	err = posix_spawnattr_init(&attr);
1117	if (err == 0)
1118		err = posix_spawnattr_setflags(&attr,
1119		    POSIX_SPAWN_SETSIGMASK | POSIX_SPAWN_SETSIGDEF |
1120		    POSIX_SPAWN_NOSIGCHLD_NP | POSIX_SPAWN_WAITPID_NP |
1121		    POSIX_SPAWN_NOEXECERR_NP);
1122
1123	/*
1124	 * We choose to close fd's above 2, a deviation from system.
1125	 */
1126	if (err == 0)
1127		err = posix_spawn_file_actions_init(&factions);
1128	if (err == 0)
1129		err = posix_spawn_file_actions_addclosefrom_np(&factions,
1130		    STDERR_FILENO + 1);
1131
1132	(void) sigemptyset(&mask);
1133	(void) sigaddset(&mask, SIGCHLD);
1134	(void) thr_sigsetmask(SIG_BLOCK, &mask, &savemask);
1135
1136	argv[0] = "/bin/sh";
1137	argv[1] = "-c";
1138	argv[2] = (char *)cmd;
1139	argv[3] = NULL;
1140
1141	if (err == 0)
1142		err = posix_spawn(&pid, "/bin/sh", &factions, &attr,
1143		    (char *const *)argv, (char *const *)environ);
1144
1145	(void) posix_spawnattr_destroy(&attr);
1146	(void) posix_spawn_file_actions_destroy(&factions);
1147
1148	if (err) {
1149		uu_warn("Failed to spawn %s: %s\n", cmd, strerror(err));
1150	} else {
1151		for (;;) {
1152			int w;
1153			w = waitpid(pid, &status, WNOHANG);
1154			if (w == -1 && errno != EINTR)
1155				break;
1156			if (w > 0) {
1157				/*
1158				 * Command succeeded, so give it gracetime
1159				 * seconds for it to have an effect.
1160				 */
1161				if (status == 0 && msec_gracetime != 0)
1162					(void) poll(NULL, 0, msec_gracetime);
1163				break;
1164			}
1165
1166			(void) poll(NULL, 0, 100);
1167			msec_spent += 100;
1168			/*
1169			 * If we timed out, kill off the process, then try to
1170			 * wait for it-- it's possible that we could accumulate
1171			 * a zombie here since we don't allow waitpid to hang,
1172			 * but it's better to let that happen and continue to
1173			 * make progress.
1174			 */
1175			if (msec_spent >= msec_timeout) {
1176				uu_warn("'%s' timed out after %d "
1177				    "seconds.  Killing.\n", cmd,
1178				    timeout);
1179				(void) kill(pid, SIGTERM);
1180				(void) poll(NULL, 0, 100);
1181				(void) kill(pid, SIGKILL);
1182				(void) poll(NULL, 0, 100);
1183				(void) waitpid(pid, &status, WNOHANG);
1184				break;
1185			}
1186		}
1187	}
1188	(void) thr_sigsetmask(SIG_BLOCK, &savemask, NULL);
1189}
1190