1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2020 Oxide Computer Company
24 * Copyright (c) 2013 Gary Mills
25 *
26 * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
27 */
28
29/*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
30/*	  All Rights Reserved  	*/
31
32/*
33 * University Copyright- Copyright (c) 1982, 1986, 1988
34 * The Regents of the University of California
35 * All Rights Reserved
36 *
37 * University Acknowledgment- Portions of this document are derived from
38 * software developed by the University of California, Berkeley, and its
39 * contributors.
40 */
41
42/*
43 * init(1M) is the general process spawning program.  Its primary job is to
44 * start and restart svc.startd for smf(5).  For backwards-compatibility it also
45 * spawns and respawns processes according to /etc/inittab and the current
46 * run-level.  It reads /etc/default/inittab for general configuration.
47 *
48 * To change run-levels the system administrator runs init from the command
49 * line with a level name.  init signals svc.startd via libscf and directs the
50 * zone's init (pid 1 in the global zone) what to do by sending it a signal;
51 * these signal numbers are commonly refered to in the code as 'states'.  Valid
52 * run-levels are [sS0123456].  Additionally, init can be given directives
53 * [qQabc], which indicate actions to be taken pertaining to /etc/inittab.
54 *
55 * When init processes inittab entries, it finds processes that are to be
56 * spawned at various run-levels.  inittab contains the set of the levels for
57 * which each inittab entry is valid.
58 *
59 * State File and Restartability
60 *   Premature exit by init(1M) is handled as a special case by the kernel:
61 *   init(1M) will be immediately re-executed, retaining its original PID.  (PID
62 *   1 in the global zone.)  To track the processes it has previously spawned,
63 *   as well as other mutable state, init(1M) regularly updates a state file
64 *   such that its subsequent invocations have knowledge of its various
65 *   dependent processes and duties.
66 *
67 * Process Contracts
68 *   We start svc.startd(1M) in a contract and transfer inherited contracts when
69 *   restarting it.  Everything else is started using the legacy contract
70 *   template, and the created contracts are abandoned when they become empty.
71 *
72 * utmpx Entry Handling
73 *   Because init(1M) no longer governs the startup process, its knowledge of
74 *   when utmpx becomes writable is indirect.  However, spawned processes
75 *   expect to be constructed with valid utmpx entries.  As a result, attempts
76 *   to write normal entries will be retried until successful.
77 *
78 * Maintenance Mode
79 *   In certain failure scenarios, init(1M) will enter a maintenance mode, in
80 *   which it invokes sulogin(1M) to allow the operator an opportunity to
81 *   repair the system.  Normally, this operation is performed as a
82 *   fork(2)-exec(2)-waitpid(3C) sequence with the parent waiting for repair or
83 *   diagnosis to be completed.  In the cases that fork(2) requests themselves
84 *   fail, init(1M) will directly execute sulogin(1M), and allow the kernel to
85 *   restart init(1M) on exit from the operator session.
86 *
87 *   One scenario where init(1M) enters its maintenance mode is when
88 *   svc.startd(1M) begins to fail rapidly, defined as when the average time
89 *   between recent failures drops below a given threshold.
90 */
91
92#include <sys/contract/process.h>
93#include <sys/ctfs.h>
94#include <sys/stat.h>
95#include <sys/statvfs.h>
96#include <sys/stropts.h>
97#include <sys/systeminfo.h>
98#include <sys/time.h>
99#include <sys/termios.h>
100#include <sys/tty.h>
101#include <sys/types.h>
102#include <sys/utsname.h>
103#include <sys/bootbanner.h>
104
105#include <bsm/adt_event.h>
106#include <bsm/libbsm.h>
107#include <security/pam_appl.h>
108
109#include <assert.h>
110#include <ctype.h>
111#include <dirent.h>
112#include <errno.h>
113#include <fcntl.h>
114#include <libcontract.h>
115#include <libcontract_priv.h>
116#include <libintl.h>
117#include <libscf.h>
118#include <libscf_priv.h>
119#include <poll.h>
120#include <procfs.h>
121#include <signal.h>
122#include <stdarg.h>
123#include <stdio.h>
124#include <stdio_ext.h>
125#include <stdlib.h>
126#include <string.h>
127#include <strings.h>
128#include <syslog.h>
129#include <time.h>
130#include <ulimit.h>
131#include <unistd.h>
132#include <utmpx.h>
133#include <wait.h>
134#include <zone.h>
135#include <ucontext.h>
136
137#undef	sleep
138
139#define	fioctl(p, sptr, cmd)	ioctl(fileno(p), sptr, cmd)
140#define	min(a, b)		(((a) < (b)) ? (a) : (b))
141
142#define	TRUE	1
143#define	FALSE	0
144#define	FAILURE	-1
145
146#define	UT_USER_SZ	32	/* Size of a utmpx ut_user field */
147#define	UT_LINE_SZ	32	/* Size of a utmpx ut_line field */
148
149/*
150 * SLEEPTIME	The number of seconds "init" sleeps between wakeups if
151 *		nothing else requires this "init" wakeup.
152 */
153#define	SLEEPTIME	(5 * 60)
154
155/*
156 * MAXCMDL	The maximum length of a command string in inittab.
157 */
158#define	MAXCMDL	512
159
160/*
161 * EXEC		The length of the prefix string added to all comamnds
162 *		found in inittab.
163 */
164#define	EXEC	(sizeof ("exec ") - 1)
165
166/*
167 * TWARN	The amount of time between warning signal, SIGTERM,
168 *		and the fatal kill signal, SIGKILL.
169 */
170#define	TWARN	5
171
172#define	id_eq(x, y)	((x[0] == y[0] && x[1] == y[1] && x[2] == y[2] &&\
173			x[3] == y[3]) ? TRUE : FALSE)
174
175/*
176 * The kernel's default umask is 022 these days; since some processes inherit
177 * their umask from init, init will set it from CMASK in /etc/default/init.
178 * init gets the default umask from the kernel, it sets it to 022 whenever
179 * it wants to create a file and reverts to CMASK afterwards.
180 */
181
182static int cmask;
183
184/*
185 * The following definitions, concluding with the 'lvls' array, provide a
186 * common mapping between level-name (like 'S'), signal number (state),
187 * run-level mask, and specific properties associated with a run-level.
188 * This array should be accessed using the routines lvlname_to_state(),
189 * lvlname_to_mask(), state_to_mask(), and state_to_flags().
190 */
191
192/*
193 * Correspondence of signals to init actions.
194 */
195#define	LVLQ		SIGHUP
196#define	LVL0		SIGINT
197#define	LVL1		SIGQUIT
198#define	LVL2		SIGILL
199#define	LVL3		SIGTRAP
200#define	LVL4		SIGIOT
201#define	LVL5		SIGEMT
202#define	LVL6		SIGFPE
203#define	SINGLE_USER	SIGBUS
204#define	LVLa		SIGSEGV
205#define	LVLb		SIGSYS
206#define	LVLc		SIGPIPE
207
208/*
209 * Bit Mask for each level.  Used to determine legal levels.
210 */
211#define	MASK0	0x0001
212#define	MASK1	0x0002
213#define	MASK2	0x0004
214#define	MASK3	0x0008
215#define	MASK4	0x0010
216#define	MASK5	0x0020
217#define	MASK6	0x0040
218#define	MASKSU	0x0080
219#define	MASKa	0x0100
220#define	MASKb	0x0200
221#define	MASKc	0x0400
222
223#define	MASK_NUMERIC (MASK0 | MASK1 | MASK2 | MASK3 | MASK4 | MASK5 | MASK6)
224#define	MASK_abc (MASKa | MASKb | MASKc)
225
226/*
227 * Flags to indicate properties of various states.
228 */
229#define	LSEL_RUNLEVEL	0x0001	/* runlevels you can transition to */
230
231typedef struct lvl {
232	int	lvl_state;
233	int	lvl_mask;
234	char	lvl_name;
235	int	lvl_flags;
236} lvl_t;
237
238static lvl_t lvls[] = {
239	{ LVLQ,		0,	'Q', 0					},
240	{ LVLQ,		0,	'q', 0					},
241	{ LVL0,		MASK0,	'0', LSEL_RUNLEVEL			},
242	{ LVL1, 	MASK1,	'1', LSEL_RUNLEVEL			},
243	{ LVL2, 	MASK2,	'2', LSEL_RUNLEVEL			},
244	{ LVL3, 	MASK3,	'3', LSEL_RUNLEVEL			},
245	{ LVL4, 	MASK4,	'4', LSEL_RUNLEVEL			},
246	{ LVL5, 	MASK5,	'5', LSEL_RUNLEVEL			},
247	{ LVL6, 	MASK6, 	'6', LSEL_RUNLEVEL			},
248	{ SINGLE_USER, 	MASKSU, 'S', LSEL_RUNLEVEL			},
249	{ SINGLE_USER, 	MASKSU, 's', LSEL_RUNLEVEL			},
250	{ LVLa,		MASKa,	'a', 0					},
251	{ LVLb,		MASKb,	'b', 0					},
252	{ LVLc,		MASKc,	'c', 0					}
253};
254
255#define	LVL_NELEMS (sizeof (lvls) / sizeof (lvl_t))
256
257/*
258 * Legal action field values.
259 */
260#define	OFF		0	/* Kill process if on, else ignore */
261#define	RESPAWN		1	/* Continuously restart process when it dies */
262#define	ONDEMAND	RESPAWN	/* Respawn for a, b, c type processes */
263#define	ONCE		2	/* Start process, do not respawn when dead */
264#define	WAIT		3	/* Perform once and wait to complete */
265#define	BOOT		4	/* Start at boot time only */
266#define	BOOTWAIT	5	/* Start at boot time and wait to complete */
267#define	POWERFAIL	6	/* Start on powerfail */
268#define	POWERWAIT	7	/* Start and wait for complete on powerfail */
269#define	INITDEFAULT	8	/* Default level "init" should start at */
270#define	SYSINIT		9	/* Actions performed before init speaks */
271
272#define	M_OFF		0001
273#define	M_RESPAWN	0002
274#define	M_ONDEMAND	M_RESPAWN
275#define	M_ONCE		0004
276#define	M_WAIT		0010
277#define	M_BOOT		0020
278#define	M_BOOTWAIT	0040
279#define	M_PF		0100
280#define	M_PWAIT		0200
281#define	M_INITDEFAULT	0400
282#define	M_SYSINIT	01000
283
284/* States for the inittab parser in getcmd(). */
285#define	ID	1
286#define	LEVELS	2
287#define	ACTION	3
288#define	COMMAND	4
289#define	COMMENT	5
290
291/*
292 * inittab entry id constants
293 */
294#define	INITTAB_ENTRY_ID_SIZE 4
295#define	INITTAB_ENTRY_ID_STR_FORMAT "%.4s"	/* if INITTAB_ENTRY_ID_SIZE */
296						/* changes, this should */
297						/* change accordingly */
298
299/*
300 * Init can be in any of three main states, "normal" mode where it is
301 * processing entries for the lines file in a normal fashion, "boot" mode,
302 * where it is only interested in the boot actions, and "powerfail" mode,
303 * where it is only interested in powerfail related actions. The following
304 * masks declare the legal actions for each mode.
305 */
306#define	NORMAL_MODES	(M_OFF | M_RESPAWN | M_ONCE | M_WAIT)
307#define	BOOT_MODES	(M_BOOT | M_BOOTWAIT)
308#define	PF_MODES	(M_PF | M_PWAIT)
309
310struct PROC_TABLE {
311	char	p_id[INITTAB_ENTRY_ID_SIZE];	/* Four letter unique id of */
312						/* process */
313	pid_t	p_pid;		/* Process id */
314	short	p_count;	/* How many respawns of this command in */
315				/*   the current series */
316	long	p_time;		/* Start time for a series of respawns */
317	short	p_flags;
318	short	p_exit;		/* Exit status of a process which died */
319};
320
321/*
322 * Flags for the "p_flags" word of a PROC_TABLE entry:
323 *
324 *	OCCUPIED	This slot in init's proc table is in use.
325 *
326 *	LIVING		Process is alive.
327 *
328 *	NOCLEANUP	efork() is not allowed to cleanup this entry even
329 *			if process is dead.
330 *
331 *	NAMED		This process has a name, i.e. came from inittab.
332 *
333 *	DEMANDREQUEST	Process started by a "telinit [abc]" command.  Processes
334 *			formed this way are respawnable and immune to level
335 *			changes as long as their entry exists in inittab.
336 *
337 *	TOUCHED		Flag used by remv() to determine whether it has looked
338 *			at an entry while checking for processes to be killed.
339 *
340 *	WARNED		Flag used by remv() to mark processes that have been
341 *			sent the SIGTERM signal.  If they don't die in 5
342 *			seconds, they are sent the SIGKILL signal.
343 *
344 *	KILLED		Flag used by remv() to mark procs that have been sent
345 *			the SIGTERM and SIGKILL signals.
346 *
347 *	PF_MASK		Bitwise or of legal flags, for sanity checking.
348 */
349#define	OCCUPIED	01
350#define	LIVING		02
351#define	NOCLEANUP	04
352#define	NAMED		010
353#define	DEMANDREQUEST	020
354#define	TOUCHED		040
355#define	WARNED		0100
356#define	KILLED		0200
357#define	PF_MASK		0377
358
359/*
360 * Respawn limits for processes that are to be respawned:
361 *
362 *	SPAWN_INTERVAL	The number of seconds over which "init" will try to
363 *			respawn a process SPAWN_LIMIT times before it gets mad.
364 *
365 *	SPAWN_LIMIT	The number of respawns "init" will attempt in
366 *			SPAWN_INTERVAL seconds before it generates an
367 *			error message and inhibits further tries for
368 *			INHIBIT seconds.
369 *
370 *	INHIBIT		The number of seconds "init" ignores an entry it had
371 *			trouble spawning unless a "telinit Q" is received.
372 */
373
374#define	SPAWN_INTERVAL	(2*60)
375#define	SPAWN_LIMIT	10
376#define	INHIBIT		(5*60)
377
378/*
379 * The maximum number of decimal digits for an id_t.  (ceil(log10 (max_id)))
380 */
381#define	ID_MAX_STR_LEN	10
382
383#define	NULLPROC	((struct PROC_TABLE *)(0))
384#define	NO_ROOM		((struct PROC_TABLE *)(FAILURE))
385
386struct CMD_LINE {
387	char c_id[INITTAB_ENTRY_ID_SIZE];	/* Four letter unique id of */
388						/* process to be affected by */
389						/* action */
390	short c_levels;	/* Mask of legal levels for process */
391	short c_action;	/* Mask for type of action required */
392	char *c_command; /* Pointer to init command */
393};
394
395struct	pidrec {
396	int	pd_type;	/* Command type */
397	pid_t	pd_pid;		/* pid to add or remove */
398};
399
400/*
401 * pd_type's
402 */
403#define	ADDPID	1
404#define	REMPID	2
405
406static struct	pidlist {
407	pid_t	pl_pid;		/* pid to watch for */
408	int	pl_dflag;	/* Flag indicating SIGCLD from this pid */
409	short	pl_exit;	/* Exit status of proc */
410	struct	pidlist	*pl_next; /* Next in list */
411} *Plhead, *Plfree;
412
413/*
414 * The following structure contains a set of modes for /dev/syscon
415 * and should match the default contents of /etc/ioctl.syscon.
416 */
417static struct termios	dflt_termios = {
418	.c_iflag = BRKINT|ICRNL|IXON|IMAXBEL,
419	.c_oflag = OPOST|ONLCR|TAB3,
420	.c_cflag = CS8|CREAD|B9600,
421	.c_lflag = ISIG|ICANON|ECHO|ECHOE|ECHOK|ECHOCTL|ECHOKE|IEXTEN,
422	.c_cc = { CINTR, CQUIT, CERASE, CKILL, CEOF, 0, 0, 0,
423	    CSTART, CSTOP, CSWTCH, CDSUSP, CRPRNT, CFLUSH, CWERASE, CLNEXT,
424	    CSTATUS, CERASE2, 0
425	}
426};
427
428static struct termios	stored_syscon_termios;
429static int		write_ioctl = 0;	/* Rewrite /etc/ioctl.syscon */
430
431static union WAKEUP {
432	struct WAKEFLAGS {
433		unsigned w_usersignal : 1;	/* User sent signal to "init" */
434		unsigned w_childdeath : 1;	/* An "init" child died */
435		unsigned w_powerhit : 1;	/* OS experienced powerfail */
436	}	w_flags;
437	int w_mask;
438} wakeup;
439
440
441struct init_state {
442	int			ist_runlevel;
443	int			ist_num_proc;
444	int			ist_utmpx_ok;
445	struct PROC_TABLE	ist_proc_table[1];
446};
447
448#define	cur_state	(g_state->ist_runlevel)
449#define	num_proc	(g_state->ist_num_proc)
450#define	proc_table	(g_state->ist_proc_table)
451#define	utmpx_ok	(g_state->ist_utmpx_ok)
452
453/* Contract cookies. */
454#define	ORDINARY_COOKIE		0
455#define	STARTD_COOKIE		1
456
457
458#ifndef NDEBUG
459#define	bad_error(func, err)	{					\
460	(void) fprintf(stderr, "%s:%d: %s() failed with unexpected "	\
461	    "error %d.  Aborting.\n", __FILE__, __LINE__, (func), (err)); \
462	abort();							\
463}
464#else
465#define	bad_error(func, err)	abort()
466#endif
467
468
469/*
470 * Useful file and device names.
471 */
472static char *CONSOLE	  = "/dev/console";	/* Real system console */
473static char *INITPIPE_DIR = "/var/run";
474static char *INITPIPE	  = "/var/run/initpipe";
475
476#define	INIT_STATE_DIR "/etc/svc/volatile"
477static const char * const init_state_file = INIT_STATE_DIR "/init.state";
478static const char * const init_next_state_file =
479	INIT_STATE_DIR "/init-next.state";
480
481static const int init_num_proc = 20;	/* Initial size of process table. */
482
483static char *UTMPX	 = UTMPX_FILE;		/* Snapshot record file */
484static char *WTMPX	 = WTMPX_FILE;		/* Long term record file */
485static char *INITTAB	 = "/etc/inittab";	/* Script file for "init" */
486static char *SYSTTY	 = "/dev/systty";	/* System Console */
487static char *SYSCON	 = "/dev/syscon";	/* Virtual System console */
488static char *IOCTLSYSCON = "/etc/ioctl.syscon";	/* Last syscon modes */
489static char *ENVFILE	 = "/etc/default/init";	/* Default env. */
490static char *SU	= "/etc/sulogin";	/* Super-user program for single user */
491static char *SH	= "/sbin/sh";		/* Standard shell */
492
493/*
494 * Default Path.  /sbin is included in path only during sysinit phase
495 */
496#define	DEF_PATH	"PATH=/usr/sbin:/usr/bin"
497#define	INIT_PATH	"PATH=/sbin:/usr/sbin:/usr/bin"
498
499static int	prior_state;
500static int	prev_state;	/* State "init" was in last time it woke */
501static int	new_state;	/* State user wants "init" to go to. */
502static int	lvlq_received;	/* Explicit request to examine state */
503static int	op_modes = BOOT_MODES; /* Current state of "init" */
504static int	Gchild = 0;	/* Flag to indicate "godchild" died, set in */
505				/*   childeath() and cleared in cleanaux() */
506static int	Pfd = -1;	/* fd to receive pids thru */
507static unsigned int	spawncnt, pausecnt;
508static int	rsflag;		/* Set if a respawn has taken place */
509static volatile int time_up;	/* Flag set to TRUE by the alarm interrupt */
510				/* routine each time an alarm interrupt */
511				/* takes place. */
512static int	sflg = 0;	/* Set if we were booted -s to single user */
513static int	rflg = 0;	/* Set if booted -r, reconfigure devices */
514static int	bflg = 0;	/* Set if booted -b, don't run rc scripts */
515static pid_t	init_pid;	/* PID of "one true" init for current zone */
516
517static struct init_state *g_state = NULL;
518static size_t	g_state_sz;
519static int	booting = 1;	/* Set while we're booting. */
520
521/*
522 * Array for default global environment.
523 */
524#define	MAXENVENT	24	/* Max number of default env variables + 1 */
525				/* init can use three itself, so this leaves */
526				/* 20 for the administrator in ENVFILE. */
527static char	*glob_envp[MAXENVENT];	/* Array of environment strings */
528static int	glob_envn;		/* Number of environment strings */
529
530
531static struct pollfd	poll_fds[1];
532static int		poll_nfds = 0;	/* poll_fds is uninitialized */
533
534/*
535 * Contracts constants
536 */
537#define	SVC_INIT_PREFIX "init:/"
538#define	SVC_AUX_SIZE (INITTAB_ENTRY_ID_SIZE + 1)
539#define	SVC_FMRI_SIZE (sizeof (SVC_INIT_PREFIX) + INITTAB_ENTRY_ID_SIZE)
540
541static int	legacy_tmpl = -1;	/* fd for legacy contract template */
542static int	startd_tmpl = -1;	/* fd for svc.startd's template */
543static char	startd_svc_aux[SVC_AUX_SIZE];
544
545static char	startd_cline[256] = "";	/* svc.startd's command line */
546static int	do_restart_startd = 1;	/* Whether to restart svc.startd. */
547static char	*smf_options = NULL;	/* Options to give to startd. */
548static int	smf_debug = 0;		/* Messages for debugging smf(5) */
549static time_t	init_boot_time;		/* Substitute for kernel boot time. */
550
551#define	NSTARTD_FAILURE_TIMES	3		/* trigger after 3 failures */
552#define	STARTD_FAILURE_RATE_NS	5000000000LL	/* 1 failure/5 seconds */
553
554static hrtime_t	startd_failure_time[NSTARTD_FAILURE_TIMES];
555static uint_t	startd_failure_index;
556
557
558static char	*prog_name(char *);
559static int	state_to_mask(int);
560static int	lvlname_to_mask(char, int *);
561static void	lscf_set_runlevel(char);
562static int	state_to_flags(int);
563static char	state_to_name(int);
564static int	lvlname_to_state(char);
565static int	getcmd(struct CMD_LINE *, char *);
566static int	realcon();
567static int	spawn_processes();
568static int	get_ioctl_syscon();
569static int	account(short, struct PROC_TABLE *, char *);
570static void	alarmclk();
571static void	childeath(int);
572static void	cleanaux();
573static void	clearent(pid_t, short);
574static void	console(boolean_t, char *, ...);
575static void	init_signals(void);
576static void	setup_pipe();
577static void	killproc(pid_t);
578static void	init_env();
579static void	boot_init();
580static void	powerfail();
581static void	remv();
582static void	write_ioctl_syscon();
583static void	spawn(struct PROC_TABLE *, struct CMD_LINE *);
584static void	setimer(int);
585static void	siglvl(int, siginfo_t *, ucontext_t *);
586static void	sigpoll(int);
587static void	enter_maintenance(void);
588static void	timer(int);
589static void	userinit(int, char **);
590static void	notify_pam_dead(struct utmpx *);
591static long	waitproc(struct PROC_TABLE *);
592static struct PROC_TABLE *efork(int, struct PROC_TABLE *, int);
593static struct PROC_TABLE *findpslot(struct CMD_LINE *);
594static void	increase_proc_table_size();
595static void	st_init();
596static void	st_write();
597static void	contracts_init();
598static void	contract_event(struct pollfd *);
599static int	startd_run(const char *, int, ctid_t);
600static void	startd_record_failure();
601static int	startd_failure_rate_critical();
602static char	*audit_boot_msg();
603static int	audit_put_record(int, int, char *);
604static void	update_boot_archive(int new_state);
605static void	init_bootbanner_print(const char *, uint_t);
606
607int
608main(int argc, char *argv[])
609{
610	int	chg_lvl_flag = FALSE, print_banner = FALSE;
611	int	may_need_audit = 1;
612	int	c;
613	char	*msg;
614
615	/* Get a timestamp for use as boot time, if needed. */
616	(void) time(&init_boot_time);
617
618	/* Get the default umask */
619	cmask = umask(022);
620	(void) umask(cmask);
621
622	/* Parse the arguments to init. Check for single user */
623	opterr = 0;
624	while ((c = getopt(argc, argv, "brsm:")) != EOF) {
625		switch (c) {
626		case 'b':
627			rflg = 0;
628			bflg = 1;
629			if (!sflg)
630				sflg++;
631			break;
632		case 'r':
633			bflg = 0;
634			rflg++;
635			break;
636		case 's':
637			if (!bflg)
638				sflg++;
639			break;
640		case 'm':
641			smf_options = optarg;
642			smf_debug = (strstr(smf_options, "debug") != NULL);
643			break;
644		}
645	}
646
647	/*
648	 * Determine if we are the main init, or a user invoked init, whose job
649	 * it is to inform init to change levels or perform some other action.
650	 */
651	if (zone_getattr(getzoneid(), ZONE_ATTR_INITPID, &init_pid,
652	    sizeof (init_pid)) != sizeof (init_pid)) {
653		(void) fprintf(stderr, "could not get pid for init\n");
654		return (1);
655	}
656
657	/*
658	 * If this PID is not the same as the "true" init for the zone, then we
659	 * must be in 'user' mode.
660	 */
661	if (getpid() != init_pid) {
662		userinit(argc, argv);
663	}
664
665	if (getzoneid() != GLOBAL_ZONEID) {
666		print_banner = TRUE;
667	}
668
669	/*
670	 * Initialize state (and set "booting").
671	 */
672	st_init();
673
674	if (booting && print_banner) {
675		/*
676		 * We want to print the boot banner as soon as
677		 * possible.  In the global zone, the kernel does it,
678		 * but we do not have that luxury in non-global zones,
679		 * so we will print it here.
680		 */
681#ifdef	LEGACY_BANNER
682		struct utsname un;
683		char buf[BUFSIZ];
684		const char *bits;
685		int r;
686
687		(void) uname(&un);
688		if ((r = sysinfo(SI_ADDRESS_WIDTH, buf, sizeof (buf))) > 0 &&
689		    r < sizeof (buf)) {
690			bits = buf;
691		} else {
692			bits = "64";
693		}
694
695		console(B_FALSE,
696		    "\n\n%s Release %s Version %s %s-bit\r\n",
697		    un.sysname, un.release, un.version, bits);
698		console(B_FALSE,
699		    "Copyright (c) 1983, 2010, Oracle and/or its affiliates."
700		    " All rights reserved.\r\n");
701#else
702		bootbanner_print(init_bootbanner_print, 0);
703#endif
704	}
705
706	/*
707	 * Get the ioctl settings for /dev/syscon from /etc/ioctl.syscon
708	 * so that it can be brought up in the state it was in when the
709	 * system went down; or set to defaults if ioctl.syscon isn't
710	 * valid.
711	 *
712	 * This needs to be done even if we're restarting so reset_modes()
713	 * will work in case we need to go down to single user mode.
714	 */
715	write_ioctl = get_ioctl_syscon();
716
717	/*
718	 * Set up all signals to be caught or ignored as appropriate.
719	 */
720	init_signals();
721
722	/* Load glob_envp from ENVFILE. */
723	init_env();
724
725	contracts_init();
726
727	if (!booting) {
728		/* cur_state should have been read in. */
729
730		op_modes = NORMAL_MODES;
731
732		/* Rewrite the ioctl file if it was bad. */
733		if (write_ioctl)
734			write_ioctl_syscon();
735	} else {
736		/*
737		 * It's fine to boot up with state as zero, because
738		 * startd will later tell us the real state.
739		 */
740		cur_state = 0;
741		op_modes = BOOT_MODES;
742
743		boot_init();
744	}
745
746	prev_state = prior_state = cur_state;
747
748	setup_pipe();
749
750	/*
751	 * Here is the beginning of the main process loop.
752	 */
753	for (;;) {
754		if (lvlq_received) {
755			setup_pipe();
756			lvlq_received = B_FALSE;
757		}
758
759		/*
760		 * Clean up any accounting records for dead "godchildren".
761		 */
762		if (Gchild)
763			cleanaux();
764
765		/*
766		 * If in "normal" mode, check all living processes and initiate
767		 * kill sequence on those that should not be there anymore.
768		 */
769		if (op_modes == NORMAL_MODES && cur_state != LVLa &&
770		    cur_state != LVLb && cur_state != LVLc)
771			remv();
772
773		/*
774		 * If a change in run levels is the reason we awoke, now do
775		 * the accounting to report the change in the utmp file.
776		 * Also report the change on the system console.
777		 */
778		if (chg_lvl_flag) {
779			chg_lvl_flag = FALSE;
780
781			if (state_to_flags(cur_state) & LSEL_RUNLEVEL) {
782				char rl = state_to_name(cur_state);
783
784				if (rl != -1)
785					lscf_set_runlevel(rl);
786			}
787
788			may_need_audit = 1;
789		}
790
791		/*
792		 * Scan the inittab file and spawn and respawn processes that
793		 * should be alive in the current state. If inittab does not
794		 * exist default to  single user mode.
795		 */
796		if (spawn_processes() == FAILURE) {
797			prior_state = prev_state;
798			cur_state = SINGLE_USER;
799		}
800
801		/* If any respawns occurred, take note. */
802		if (rsflag) {
803			rsflag = 0;
804			spawncnt++;
805		}
806
807		/*
808		 * If a powerfail signal was received during the last
809		 * sequence, set mode to powerfail.  When spawn_processes() is
810		 * entered the first thing it does is to check "powerhit".  If
811		 * it is in PF_MODES then it clears "powerhit" and does
812		 * a powerfail sequence.  If it is not in PF_MODES, then it
813		 * puts itself in PF_MODES and then clears "powerhit".  Should
814		 * "powerhit" get set again while spawn_processes() is working
815		 * on a powerfail sequence, the following code  will see that
816		 * spawn_processes() tries to execute the powerfail sequence
817		 * again.  This guarantees that the powerfail sequence will be
818		 * successfully completed before further processing takes
819		 * place.
820		 */
821		if (wakeup.w_flags.w_powerhit) {
822			op_modes = PF_MODES;
823			/*
824			 * Make sure that cur_state != prev_state so that
825			 * ONCE and WAIT types work.
826			 */
827			prev_state = 0;
828		} else if (op_modes != NORMAL_MODES) {
829			/*
830			 * If spawn_processes() was not just called while in
831			 * normal mode, we set the mode to normal and it will
832			 * be called again to check normal modes.  If we have
833			 * just finished a powerfail sequence with prev_state
834			 * equal to zero, we set prev_state equal to cur_state
835			 * before the next pass through.
836			 */
837			if (op_modes == PF_MODES)
838				prev_state = cur_state;
839			op_modes = NORMAL_MODES;
840		} else if (cur_state == LVLa || cur_state == LVLb ||
841		    cur_state == LVLc) {
842			/*
843			 * If it was a change of levels that awakened us and the
844			 * new level is one of the demand levels then reset
845			 * cur_state to the previous state and do another scan
846			 * to take care of the usual respawn actions.
847			 */
848			cur_state = prior_state;
849			prior_state = prev_state;
850			prev_state = cur_state;
851		} else {
852			prev_state = cur_state;
853
854			if (wakeup.w_mask == 0) {
855				int ret;
856
857				if (may_need_audit && (cur_state == LVL3)) {
858					msg = audit_boot_msg();
859
860					may_need_audit = 0;
861					(void) audit_put_record(ADT_SUCCESS,
862					    ADT_SUCCESS, msg);
863					free(msg);
864				}
865
866				/*
867				 * "init" is finished with all actions for
868				 * the current wakeup.
869				 */
870				ret = poll(poll_fds, poll_nfds,
871				    SLEEPTIME * MILLISEC);
872				pausecnt++;
873				if (ret > 0)
874					contract_event(&poll_fds[0]);
875				else if (ret < 0 && errno != EINTR)
876					console(B_TRUE, "poll() error: %s\n",
877					    strerror(errno));
878			}
879
880			if (wakeup.w_flags.w_usersignal) {
881				/*
882				 * Install the new level.  This could be a real
883				 * change in levels  or a telinit [Q|a|b|c] or
884				 * just a telinit to the same level at which
885				 * we are running.
886				 */
887				if (new_state != cur_state) {
888					if (new_state == LVLa ||
889					    new_state == LVLb ||
890					    new_state == LVLc) {
891						prev_state = prior_state;
892						prior_state = cur_state;
893						cur_state = new_state;
894					} else {
895						prev_state = cur_state;
896						if (cur_state >= 0)
897							prior_state = cur_state;
898						cur_state = new_state;
899						chg_lvl_flag = TRUE;
900					}
901				}
902
903				new_state = 0;
904			}
905
906			if (wakeup.w_flags.w_powerhit)
907				op_modes = PF_MODES;
908
909			/*
910			 * Clear all wakeup reasons.
911			 */
912			wakeup.w_mask = 0;
913		}
914	}
915
916	/*NOTREACHED*/
917}
918
919static void
920init_bootbanner_print(const char *line, uint_t num)
921{
922	const char *pfx = (num == 0) ? "\n\n" : "";
923
924	console(B_FALSE, "%s%s\r\n", pfx, line);
925}
926
927static void
928update_boot_archive(int new_state)
929{
930	if (new_state != LVL0 && new_state != LVL5 && new_state != LVL6)
931		return;
932
933	if (getzoneid() != GLOBAL_ZONEID)
934		return;
935
936	(void) system("/sbin/bootadm -ea update_all");
937}
938
939/*
940 * void enter_maintenance()
941 *   A simple invocation of sulogin(1M), with no baggage, in the case that we
942 *   are unable to activate svc.startd(1M).  We fork; the child runs sulogin;
943 *   we wait for it to exit.
944 */
945static void
946enter_maintenance()
947{
948	struct PROC_TABLE	*su_process;
949
950	console(B_FALSE, "Requesting maintenance mode\n"
951	    "(See /lib/svc/share/README for additional information.)\n");
952	(void) sighold(SIGCLD);
953	while ((su_process = efork(M_OFF, NULLPROC, NOCLEANUP)) == NO_ROOM)
954		(void) pause();
955	(void) sigrelse(SIGCLD);
956	if (su_process == NULLPROC) {
957		int fd;
958
959		(void) fclose(stdin);
960		(void) fclose(stdout);
961		(void) fclose(stderr);
962		closefrom(0);
963
964		fd = open(SYSCON, O_RDWR | O_NOCTTY);
965		if (fd >= 0) {
966			(void) dup2(fd, 1);
967			(void) dup2(fd, 2);
968		} else {
969			/*
970			 * Need to issue an error message somewhere.
971			 */
972			syslog(LOG_CRIT, "init[%d]: cannot open %s; %s\n",
973			    getpid(), SYSCON, strerror(errno));
974		}
975
976		/*
977		 * Execute the "su" program.
978		 */
979		(void) execle(SU, SU, "-", (char *)0, glob_envp);
980		console(B_TRUE, "execle of %s failed: %s\n", SU,
981		    strerror(errno));
982		timer(5);
983		exit(1);
984	}
985
986	/*
987	 * If we are the parent, wait around for the child to die
988	 * or for "init" to be signaled to change levels.
989	 */
990	while (waitproc(su_process) == FAILURE) {
991		/*
992		 * All other reasons for waking are ignored when in
993		 * single-user mode.  The only child we are interested
994		 * in is being waited for explicitly by waitproc().
995		 */
996		wakeup.w_mask = 0;
997	}
998}
999
1000/*
1001 * remv() scans through "proc_table" and performs cleanup.  If
1002 * there is a process in the table, which shouldn't be here at
1003 * the current run level, then remv() kills the process.
1004 */
1005static void
1006remv()
1007{
1008	struct PROC_TABLE	*process;
1009	struct CMD_LINE		cmd;
1010	char			cmd_string[MAXCMDL];
1011	int			change_level;
1012
1013	change_level = (cur_state != prev_state ? TRUE : FALSE);
1014
1015	/*
1016	 * Clear the TOUCHED flag on all entries so that when we have
1017	 * finished scanning inittab, we will be able to tell if we
1018	 * have any processes for which there is no entry in inittab.
1019	 */
1020	for (process = proc_table;
1021	    (process < proc_table + num_proc); process++) {
1022		process->p_flags &= ~TOUCHED;
1023	}
1024
1025	/*
1026	 * Scan all inittab entries.
1027	 */
1028	while (getcmd(&cmd, &cmd_string[0]) == TRUE) {
1029		/* Scan for process which goes with this entry in inittab. */
1030		for (process = proc_table;
1031		    (process < proc_table + num_proc); process++) {
1032			if ((process->p_flags & OCCUPIED) == 0 ||
1033			    !id_eq(process->p_id, cmd.c_id))
1034				continue;
1035
1036			/*
1037			 * This slot contains the process we are looking for.
1038			 */
1039
1040			/*
1041			 * Is the cur_state SINGLE_USER or is this process
1042			 * marked as "off" or was this proc started by some
1043			 * mechanism other than LVL{a|b|c} and the current level
1044			 * does not support this process?
1045			 */
1046			if (cur_state == SINGLE_USER ||
1047			    cmd.c_action == M_OFF ||
1048			    ((cmd.c_levels & state_to_mask(cur_state)) == 0 &&
1049			    (process->p_flags & DEMANDREQUEST) == 0)) {
1050				if (process->p_flags & LIVING) {
1051					/*
1052					 * Touch this entry so we know we have
1053					 * treated it.  Note that procs which
1054					 * are already dead at this point and
1055					 * should not be restarted are left
1056					 * untouched.  This causes their slot to
1057					 * be freed later after dead accounting
1058					 * is done.
1059					 */
1060					process->p_flags |= TOUCHED;
1061
1062					if ((process->p_flags & KILLED) == 0) {
1063						if (change_level) {
1064							process->p_flags
1065							    |= WARNED;
1066							(void) kill(
1067							    process->p_pid,
1068							    SIGTERM);
1069						} else {
1070							/*
1071							 * Fork a killing proc
1072							 * so "init" can
1073							 * continue without
1074							 * having to pause for
1075							 * TWARN seconds.
1076							 */
1077							killproc(
1078							    process->p_pid);
1079						}
1080						process->p_flags |= KILLED;
1081					}
1082				}
1083			} else {
1084				/*
1085				 * Process can exist at current level.  If it is
1086				 * still alive or a DEMANDREQUEST we touch it so
1087				 * it will be left alone.  Otherwise we leave it
1088				 * untouched so it will be accounted for and
1089				 * cleaned up later in remv().  Dead
1090				 * DEMANDREQUESTs will be accounted but not
1091				 * freed.
1092				 */
1093				if (process->p_flags &
1094				    (LIVING|NOCLEANUP|DEMANDREQUEST))
1095					process->p_flags |= TOUCHED;
1096			}
1097
1098			break;
1099		}
1100	}
1101
1102	st_write();
1103
1104	/*
1105	 * If this was a change of levels call, scan through the
1106	 * process table for processes that were warned to die.  If any
1107	 * are found that haven't left yet, sleep for TWARN seconds and
1108	 * then send final terminations to any that haven't died yet.
1109	 */
1110	if (change_level) {
1111
1112		/*
1113		 * Set the alarm for TWARN seconds on the assumption
1114		 * that there will be some that need to be waited for.
1115		 * This won't harm anything except we are guaranteed to
1116		 * wakeup in TWARN seconds whether we need to or not.
1117		 */
1118		setimer(TWARN);
1119
1120		/*
1121		 * Scan for processes which should be dying.  We hope they
1122		 * will die without having to be sent a SIGKILL signal.
1123		 */
1124		for (process = proc_table;
1125		    (process < proc_table + num_proc); process++) {
1126			/*
1127			 * If this process should die, hasn't yet, and the
1128			 * TWARN time hasn't expired yet, wait for process
1129			 * to die or for timer to expire.
1130			 */
1131			while (time_up == FALSE &&
1132			    (process->p_flags & (WARNED|LIVING|OCCUPIED)) ==
1133			    (WARNED|LIVING|OCCUPIED))
1134				(void) pause();
1135
1136			if (time_up == TRUE)
1137				break;
1138		}
1139
1140		/*
1141		 * If we reached the end of the table without the timer
1142		 * expiring, then there are no procs which will have to be
1143		 * sent the SIGKILL signal.  If the timer has expired, then
1144		 * it is necessary to scan the table again and send signals
1145		 * to all processes which aren't going away nicely.
1146		 */
1147		if (time_up == TRUE) {
1148			for (process = proc_table;
1149			    (process < proc_table + num_proc); process++) {
1150				if ((process->p_flags &
1151				    (WARNED|LIVING|OCCUPIED)) ==
1152				    (WARNED|LIVING|OCCUPIED))
1153					(void) kill(process->p_pid, SIGKILL);
1154			}
1155		}
1156		setimer(0);
1157	}
1158
1159	/*
1160	 * Rescan the proc_table for two kinds of entry, those marked LIVING,
1161	 * NAMED, which don't have an entry in inittab (haven't been TOUCHED
1162	 * by the above scanning), and haven't been sent kill signals, and
1163	 * those entries marked not LIVING, NAMED.  The former procs are killed.
1164	 * The latter have DEAD_PROCESS accounting done and the slot cleared.
1165	 */
1166	for (process = proc_table;
1167	    (process < proc_table + num_proc); process++) {
1168		if ((process->p_flags & (LIVING|NAMED|TOUCHED|KILLED|OCCUPIED))
1169		    == (LIVING|NAMED|OCCUPIED)) {
1170			killproc(process->p_pid);
1171			process->p_flags |= KILLED;
1172		} else if ((process->p_flags & (LIVING|NAMED|OCCUPIED)) ==
1173		    (NAMED|OCCUPIED)) {
1174			(void) account(DEAD_PROCESS, process, NULL);
1175			/*
1176			 * If this named proc hasn't been TOUCHED, then free the
1177			 * space. It has either died of it's own accord, but
1178			 * isn't respawnable or it was killed because it
1179			 * shouldn't exist at this level.
1180			 */
1181			if ((process->p_flags & TOUCHED) == 0)
1182				process->p_flags = 0;
1183		}
1184	}
1185
1186	st_write();
1187}
1188
1189/*
1190 * Extract the svc.startd command line and whether to restart it from its
1191 * inittab entry.
1192 */
1193/*ARGSUSED*/
1194static void
1195process_startd_line(struct CMD_LINE *cmd, char *cmd_string)
1196{
1197	size_t sz;
1198
1199	/* Save the command line. */
1200	if (sflg || rflg) {
1201		/* Also append -r or -s. */
1202		(void) strlcpy(startd_cline, cmd_string, sizeof (startd_cline));
1203		(void) strlcat(startd_cline, " -", sizeof (startd_cline));
1204		if (sflg)
1205			sz = strlcat(startd_cline, "s", sizeof (startd_cline));
1206		if (rflg)
1207			sz = strlcat(startd_cline, "r", sizeof (startd_cline));
1208	} else {
1209		sz = strlcpy(startd_cline, cmd_string, sizeof (startd_cline));
1210	}
1211
1212	if (sz >= sizeof (startd_cline)) {
1213		console(B_TRUE,
1214		    "svc.startd command line too long.  Ignoring.\n");
1215		startd_cline[0] = '\0';
1216		return;
1217	}
1218}
1219
1220/*
1221 * spawn_processes() scans inittab for entries which should be run at this
1222 * mode.  Processes which should be running but are not, are started.
1223 */
1224static int
1225spawn_processes()
1226{
1227	struct PROC_TABLE		*pp;
1228	struct CMD_LINE			cmd;
1229	char				cmd_string[MAXCMDL];
1230	short				lvl_mask;
1231	int				status;
1232
1233	/*
1234	 * First check the "powerhit" flag.  If it is set, make sure the modes
1235	 * are PF_MODES and clear the "powerhit" flag.  Avoid the possible race
1236	 * on the "powerhit" flag by disallowing a new powerfail interrupt
1237	 * between the test of the powerhit flag and the clearing of it.
1238	 */
1239	if (wakeup.w_flags.w_powerhit) {
1240		wakeup.w_flags.w_powerhit = 0;
1241		op_modes = PF_MODES;
1242	}
1243	lvl_mask = state_to_mask(cur_state);
1244
1245	/*
1246	 * Scan through all the entries in inittab.
1247	 */
1248	while ((status = getcmd(&cmd, &cmd_string[0])) == TRUE) {
1249		if (id_eq(cmd.c_id, "smf")) {
1250			process_startd_line(&cmd, cmd_string);
1251			continue;
1252		}
1253
1254retry_for_proc_slot:
1255
1256		/*
1257		 * Find out if there is a process slot for this entry already.
1258		 */
1259		if ((pp = findpslot(&cmd)) == NULLPROC) {
1260			/*
1261			 * we've run out of proc table entries
1262			 * increase proc_table.
1263			 */
1264			increase_proc_table_size();
1265
1266			/*
1267			 * Retry now as we have an empty proc slot.
1268			 * In case increase_proc_table_size() fails,
1269			 * we will keep retrying.
1270			 */
1271			goto retry_for_proc_slot;
1272		}
1273
1274		/*
1275		 * If there is an entry, and it is marked as DEMANDREQUEST,
1276		 * one of the levels a, b, or c is in its levels mask, and
1277		 * the action field is ONDEMAND and ONDEMAND is a permissable
1278		 * mode, and the process is dead, then respawn it.
1279		 */
1280		if (((pp->p_flags & (LIVING|DEMANDREQUEST)) == DEMANDREQUEST) &&
1281		    (cmd.c_levels & MASK_abc) &&
1282		    (cmd.c_action & op_modes) == M_ONDEMAND) {
1283			spawn(pp, &cmd);
1284			continue;
1285		}
1286
1287		/*
1288		 * If the action is not an action we are interested in,
1289		 * skip the entry.
1290		 */
1291		if ((cmd.c_action & op_modes) == 0 || pp->p_flags & LIVING ||
1292		    (cmd.c_levels & lvl_mask) == 0)
1293			continue;
1294
1295		/*
1296		 * If the modes are the normal modes (ONCE, WAIT, RESPAWN, OFF,
1297		 * ONDEMAND) and the action field is either OFF or the action
1298		 * field is ONCE or WAIT and the current level is the same as
1299		 * the last level, then skip this entry.  ONCE and WAIT only
1300		 * get run when the level changes.
1301		 */
1302		if (op_modes == NORMAL_MODES &&
1303		    (cmd.c_action == M_OFF ||
1304		    (cmd.c_action & (M_ONCE|M_WAIT)) &&
1305		    cur_state == prev_state))
1306			continue;
1307
1308		/*
1309		 * At this point we are interested in performing the action for
1310		 * this entry.  Actions fall into two categories, spinning off
1311		 * a process and not waiting, and spinning off a process and
1312		 * waiting for it to die.  If the action is ONCE, RESPAWN,
1313		 * ONDEMAND, POWERFAIL, or BOOT we don't wait for the process
1314		 * to die, for all other actions we do wait.
1315		 */
1316		if (cmd.c_action & (M_ONCE | M_RESPAWN | M_PF | M_BOOT)) {
1317			spawn(pp, &cmd);
1318
1319		} else {
1320			spawn(pp, &cmd);
1321			while (waitproc(pp) == FAILURE)
1322				;
1323			(void) account(DEAD_PROCESS, pp, NULL);
1324			pp->p_flags = 0;
1325		}
1326	}
1327	return (status);
1328}
1329
1330/*
1331 * spawn() spawns a shell, inserts the information about the process
1332 * process into the proc_table, and does the startup accounting.
1333 */
1334static void
1335spawn(struct PROC_TABLE *process, struct CMD_LINE *cmd)
1336{
1337	int		i;
1338	int		modes, maxfiles;
1339	time_t		now;
1340	struct PROC_TABLE tmproc, *oprocess;
1341
1342	/*
1343	 * The modes to be sent to efork() are 0 unless we are
1344	 * spawning a LVLa, LVLb, or LVLc entry or we will be
1345	 * waiting for the death of the child before continuing.
1346	 */
1347	modes = NAMED;
1348	if (process->p_flags & DEMANDREQUEST || cur_state == LVLa ||
1349	    cur_state == LVLb || cur_state == LVLc)
1350		modes |= DEMANDREQUEST;
1351	if ((cmd->c_action & (M_SYSINIT | M_WAIT | M_BOOTWAIT | M_PWAIT)) != 0)
1352		modes |= NOCLEANUP;
1353
1354	/*
1355	 * If this is a respawnable process, check the threshold
1356	 * information to avoid excessive respawns.
1357	 */
1358	if (cmd->c_action & M_RESPAWN) {
1359		/*
1360		 * Add NOCLEANUP to all respawnable commands so that the
1361		 * information about the frequency of respawns isn't lost.
1362		 */
1363		modes |= NOCLEANUP;
1364		(void) time(&now);
1365
1366		/*
1367		 * If no time is assigned, then this is the first time
1368		 * this command is being processed in this series.  Assign
1369		 * the current time.
1370		 */
1371		if (process->p_time == 0L)
1372			process->p_time = now;
1373
1374		if (process->p_count++ == SPAWN_LIMIT) {
1375
1376			if ((now - process->p_time) < SPAWN_INTERVAL) {
1377				/*
1378				 * Process is respawning too rapidly.  Print
1379				 * message and refuse to respawn it for now.
1380				 */
1381				console(B_TRUE, "Command is respawning too "
1382				    "rapidly. Check for possible errors.\n"
1383				    "id:%4s \"%s\"\n",
1384				    &cmd->c_id[0], &cmd->c_command[EXEC]);
1385				return;
1386			}
1387			process->p_time = now;
1388			process->p_count = 0;
1389
1390		} else if (process->p_count > SPAWN_LIMIT) {
1391			/*
1392			 * If process has been respawning too rapidly and
1393			 * the inhibit time limit hasn't expired yet, we
1394			 * refuse to respawn.
1395			 */
1396			if (now - process->p_time < SPAWN_INTERVAL + INHIBIT)
1397				return;
1398			process->p_time = now;
1399			process->p_count = 0;
1400		}
1401		rsflag = TRUE;
1402	}
1403
1404	/*
1405	 * Spawn a child process to execute this command.
1406	 */
1407	(void) sighold(SIGCLD);
1408	oprocess = process;
1409	while ((process = efork(cmd->c_action, oprocess, modes)) == NO_ROOM)
1410		(void) pause();
1411
1412	if (process == NULLPROC) {
1413
1414		/*
1415		 * We are the child.  We must make sure we get a different
1416		 * file pointer for our references to utmpx.  Otherwise our
1417		 * seeks and reads will compete with those of the parent.
1418		 */
1419		endutxent();
1420
1421		/*
1422		 * Perform the accounting for the beginning of a process.
1423		 * Note that all processes are initially "INIT_PROCESS"es.
1424		 */
1425		tmproc.p_id[0] = cmd->c_id[0];
1426		tmproc.p_id[1] = cmd->c_id[1];
1427		tmproc.p_id[2] = cmd->c_id[2];
1428		tmproc.p_id[3] = cmd->c_id[3];
1429		tmproc.p_pid = getpid();
1430		tmproc.p_exit = 0;
1431		(void) account(INIT_PROCESS, &tmproc,
1432		    prog_name(&cmd->c_command[EXEC]));
1433		maxfiles = ulimit(UL_GDESLIM, 0);
1434		for (i = 0; i < maxfiles; i++)
1435			(void) fcntl(i, F_SETFD, FD_CLOEXEC);
1436
1437		/*
1438		 * Now exec a shell with the -c option and the command
1439		 * from inittab.
1440		 */
1441		(void) execle(SH, "INITSH", "-c", cmd->c_command, (char *)0,
1442		    glob_envp);
1443		console(B_TRUE, "Command\n\"%s\"\n failed to execute.  errno "
1444		    "= %d (exec of shell failed)\n", cmd->c_command, errno);
1445
1446		/*
1447		 * Don't come back so quickly that "init" doesn't have a
1448		 * chance to finish putting this child in "proc_table".
1449		 */
1450		timer(20);
1451		exit(1);
1452
1453	}
1454
1455	/*
1456	 * We are the parent.  Insert the necessary
1457	 * information in the proc_table.
1458	 */
1459	process->p_id[0] = cmd->c_id[0];
1460	process->p_id[1] = cmd->c_id[1];
1461	process->p_id[2] = cmd->c_id[2];
1462	process->p_id[3] = cmd->c_id[3];
1463
1464	st_write();
1465
1466	(void) sigrelse(SIGCLD);
1467}
1468
1469/*
1470 * findpslot() finds the old slot in the process table for the
1471 * command with the same id, or it finds an empty slot.
1472 */
1473static struct PROC_TABLE *
1474findpslot(struct CMD_LINE *cmd)
1475{
1476	struct PROC_TABLE	*process;
1477	struct PROC_TABLE	*empty = NULLPROC;
1478
1479	for (process = proc_table;
1480	    (process < proc_table + num_proc); process++) {
1481		if (process->p_flags & OCCUPIED &&
1482		    id_eq(process->p_id, cmd->c_id))
1483			break;
1484
1485		/*
1486		 * If the entry is totally empty and "empty" is still 0,
1487		 * remember where this hole is and make sure the slot is
1488		 * zeroed out.
1489		 */
1490		if (empty == NULLPROC && (process->p_flags & OCCUPIED) == 0) {
1491			empty = process;
1492			process->p_id[0] = '\0';
1493			process->p_id[1] = '\0';
1494			process->p_id[2] = '\0';
1495			process->p_id[3] = '\0';
1496			process->p_pid = 0;
1497			process->p_time = 0L;
1498			process->p_count = 0;
1499			process->p_flags = 0;
1500			process->p_exit = 0;
1501		}
1502	}
1503
1504	/*
1505	 * If there is no entry for this slot, then there should be an
1506	 * empty slot.  If there is no empty slot, then we've run out
1507	 * of proc_table space.  If the latter is true, empty will be
1508	 * NULL and the caller will have to complain.
1509	 */
1510	if (process == (proc_table + num_proc))
1511		process = empty;
1512
1513	return (process);
1514}
1515
1516/*
1517 * getcmd() parses lines from inittab.  Each time it finds a command line
1518 * it will return TRUE as well as fill the passed CMD_LINE structure and
1519 * the shell command string.  When the end of inittab is reached, FALSE
1520 * is returned inittab is automatically opened if it is not currently open
1521 * and is closed when the end of the file is reached.
1522 */
1523static FILE *fp_inittab = NULL;
1524
1525static int
1526getcmd(struct CMD_LINE *cmd, char *shcmd)
1527{
1528	char	*ptr;
1529	int	c, lastc, state;
1530	char 	*ptr1;
1531	int	answer, i, proceed;
1532	struct	stat	sbuf;
1533	static char *actions[] = {
1534		"off", "respawn", "ondemand", "once", "wait", "boot",
1535		"bootwait", "powerfail", "powerwait", "initdefault",
1536		"sysinit",
1537	};
1538	static short act_masks[] = {
1539		M_OFF, M_RESPAWN, M_ONDEMAND, M_ONCE, M_WAIT, M_BOOT,
1540		M_BOOTWAIT, M_PF, M_PWAIT, M_INITDEFAULT, M_SYSINIT,
1541	};
1542	/*
1543	 * Only these actions will be allowed for entries which
1544	 * are specified for single-user mode.
1545	 */
1546	short su_acts = M_INITDEFAULT | M_PF | M_PWAIT | M_WAIT;
1547
1548	if (fp_inittab == NULL) {
1549		/*
1550		 * Before attempting to open inittab we stat it to make
1551		 * sure it currently exists and is not empty.  We try
1552		 * several times because someone may have temporarily
1553		 * unlinked or truncated the file.
1554		 */
1555		for (i = 0; i < 3; i++) {
1556			if (stat(INITTAB, &sbuf) == -1) {
1557				if (i == 2) {
1558					console(B_TRUE,
1559					    "Cannot stat %s, errno: %d\n",
1560					    INITTAB, errno);
1561					return (FAILURE);
1562				} else {
1563					timer(3);
1564				}
1565			} else if (sbuf.st_size < 10) {
1566				if (i == 2) {
1567					console(B_TRUE,
1568					    "%s truncated or corrupted\n",
1569					    INITTAB);
1570					return (FAILURE);
1571				} else {
1572					timer(3);
1573				}
1574			} else {
1575				break;
1576			}
1577		}
1578
1579		/*
1580		 * If unable to open inittab, print error message and
1581		 * return FAILURE to caller.
1582		 */
1583		if ((fp_inittab = fopen(INITTAB, "r")) == NULL) {
1584			console(B_TRUE, "Cannot open %s errno: %d\n", INITTAB,
1585			    errno);
1586			return (FAILURE);
1587		}
1588	}
1589
1590	/*
1591	 * Keep getting commands from inittab until you find a
1592	 * good one or run out of file.
1593	 */
1594	for (answer = FALSE; answer == FALSE; ) {
1595		/*
1596		 * Zero out the cmd itself before trying next line.
1597		 */
1598		bzero(cmd, sizeof (struct CMD_LINE));
1599
1600		/*
1601		 * Read in lines of inittab, parsing at colons, until a line is
1602		 * read in which doesn't end with a backslash.  Do not start if
1603		 * the first character read is an EOF.  Note that this means
1604		 * that lines which don't end in a newline are still processed,
1605		 * since the "for" will terminate normally once started,
1606		 * regardless of whether line terminates with a newline or EOF.
1607		 */
1608		state = FAILURE;
1609		if ((c = fgetc(fp_inittab)) == EOF) {
1610			answer = FALSE;
1611			(void) fclose(fp_inittab);
1612			fp_inittab = NULL;
1613			break;
1614		}
1615
1616		for (proceed = TRUE, ptr = shcmd, state = ID, lastc = '\0';
1617		    proceed && c != EOF;
1618		    lastc = c, c = fgetc(fp_inittab)) {
1619			/* If we're not in the FAILURE state and haven't */
1620			/* yet reached the shell command field, process	 */
1621			/* the line, otherwise just look for a real end	 */
1622			/* of line.					 */
1623			if (state != FAILURE && state != COMMAND) {
1624			/*
1625			 * Squeeze out spaces and tabs.
1626			 */
1627			if (c == ' ' || c == '\t')
1628				continue;
1629
1630			/*
1631			 * Ignore characters in a comment, except for the \n.
1632			 */
1633			if (state == COMMENT) {
1634				if (c == '\n') {
1635					lastc = ' ';
1636					break;
1637				} else {
1638					continue;
1639				}
1640			}
1641
1642			/*
1643			 * Detect comments (lines whose first non-whitespace
1644			 * character is '#') by checking that we're at the
1645			 * beginning of a line, have seen a '#', and haven't
1646			 * yet accumulated any characters.
1647			 */
1648			if (state == ID && c == '#' && ptr == shcmd) {
1649				state = COMMENT;
1650				continue;
1651			}
1652
1653			/*
1654			 * If the character is a ':', then check the
1655			 * previous field for correctness and advance
1656			 * to the next field.
1657			 */
1658			if (c == ':') {
1659				switch (state) {
1660
1661				case ID :
1662				/*
1663				 * Check to see that there are only
1664				 * 1 to 4 characters for the id.
1665				 */
1666				if ((i = ptr - shcmd) < 1 || i > 4) {
1667					state = FAILURE;
1668				} else {
1669					bcopy(shcmd, &cmd->c_id[0], i);
1670					ptr = shcmd;
1671					state = LEVELS;
1672				}
1673				break;
1674
1675				case LEVELS :
1676				/*
1677				 * Build a mask for all the levels for
1678				 * which this command will be legal.
1679				 */
1680				for (cmd->c_levels = 0, ptr1 = shcmd;
1681				    ptr1 < ptr; ptr1++) {
1682					int mask;
1683					if (lvlname_to_mask(*ptr1,
1684					    &mask) == -1) {
1685						state = FAILURE;
1686						break;
1687					}
1688					cmd->c_levels |= mask;
1689				}
1690				if (state != FAILURE) {
1691					state = ACTION;
1692					ptr = shcmd;	/* Reset the buffer */
1693				}
1694				break;
1695
1696				case ACTION :
1697				/*
1698				 * Null terminate the string in shcmd buffer and
1699				 * then try to match against legal actions.  If
1700				 * the field is of length 0, then the default of
1701				 * "RESPAWN" is used if the id is numeric,
1702				 * otherwise the default is "OFF".
1703				 */
1704				if (ptr == shcmd) {
1705					if (isdigit(cmd->c_id[0]) &&
1706					    (cmd->c_id[1] == '\0' ||
1707					    isdigit(cmd->c_id[1])) &&
1708					    (cmd->c_id[2] == '\0' ||
1709					    isdigit(cmd->c_id[2])) &&
1710					    (cmd->c_id[3] == '\0' ||
1711					    isdigit(cmd->c_id[3])))
1712						cmd->c_action = M_RESPAWN;
1713					else
1714						cmd->c_action = M_OFF;
1715				} else {
1716					for (cmd->c_action = 0, i = 0,
1717					    *ptr = '\0';
1718					    i <
1719					    sizeof (actions)/sizeof (char *);
1720					    i++) {
1721					if (strcmp(shcmd, actions[i]) == 0) {
1722						if ((cmd->c_levels & MASKSU) &&
1723						    !(act_masks[i] & su_acts))
1724							cmd->c_action = 0;
1725						else
1726							cmd->c_action =
1727							    act_masks[i];
1728						break;
1729					}
1730					}
1731				}
1732
1733				/*
1734				 * If the action didn't match any legal action,
1735				 * set state to FAILURE.
1736				 */
1737				if (cmd->c_action == 0) {
1738					state = FAILURE;
1739				} else {
1740					state = COMMAND;
1741					(void) strcpy(shcmd, "exec ");
1742				}
1743				ptr = shcmd + EXEC;
1744				break;
1745				}
1746				continue;
1747			}
1748		}
1749
1750		/* If the character is a '\n', then this is the end of a */
1751		/* line.  If the '\n' wasn't preceded by a backslash, */
1752		/* it is also the end of an inittab command.  If it was */
1753		/* preceded by a backslash then the next line is a */
1754		/* continuation.  Note that the continuation '\n' falls */
1755		/* through and is treated like other characters and is */
1756		/* stored in the shell command line. */
1757		if (c == '\n' && lastc != '\\') {
1758			proceed = FALSE;
1759			*ptr = '\0';
1760			break;
1761		}
1762
1763		/* For all other characters just stuff them into the */
1764		/* command as long as there aren't too many of them. */
1765		/* Make sure there is room for a terminating '\0' also. */
1766		if (ptr >= shcmd + MAXCMDL - 1)
1767			state = FAILURE;
1768		else
1769			*ptr++ = (char)c;
1770
1771		/* If the character we just stored was a quoted	*/
1772		/* backslash, then change "c" to '\0', so that this	*/
1773		/* backslash will not cause a subsequent '\n' to appear */
1774		/* quoted.  In otherwords '\' '\' '\n' is the real end */
1775		/* of a command, while '\' '\n' is a continuation. */
1776		if (c == '\\' && lastc == '\\')
1777			c = '\0';
1778		}
1779
1780		/*
1781		 * Make sure all the fields are properly specified
1782		 * for a good command line.
1783		 */
1784		if (state == COMMAND) {
1785			answer = TRUE;
1786			cmd->c_command = shcmd;
1787
1788			/*
1789			 * If no default level was supplied, insert
1790			 * all numerical levels.
1791			 */
1792			if (cmd->c_levels == 0)
1793				cmd->c_levels = MASK_NUMERIC;
1794
1795			/*
1796			 * If no action has been supplied, declare this
1797			 * entry to be OFF.
1798			 */
1799			if (cmd->c_action == 0)
1800				cmd->c_action = M_OFF;
1801
1802			/*
1803			 * If no shell command has been supplied, make sure
1804			 * there is a null string in the command field.
1805			 */
1806			if (ptr == shcmd + EXEC)
1807				*shcmd = '\0';
1808		} else
1809			answer = FALSE;
1810
1811		/*
1812		 * If we have reached the end of inittab, then close it
1813		 * and quit trying to find a good command line.
1814		 */
1815		if (c == EOF) {
1816			(void) fclose(fp_inittab);
1817			fp_inittab = NULL;
1818			break;
1819		}
1820	}
1821	return (answer);
1822}
1823
1824/*
1825 * lvlname_to_state(): convert the character name of a state to its level
1826 * (its corresponding signal number).
1827 */
1828static int
1829lvlname_to_state(char name)
1830{
1831	int i;
1832	for (i = 0; i < LVL_NELEMS; i++) {
1833		if (lvls[i].lvl_name == name)
1834			return (lvls[i].lvl_state);
1835	}
1836	return (-1);
1837}
1838
1839/*
1840 * state_to_name(): convert the level to the character name.
1841 */
1842static char
1843state_to_name(int state)
1844{
1845	int i;
1846	for (i = 0; i < LVL_NELEMS; i++) {
1847		if (lvls[i].lvl_state == state)
1848			return (lvls[i].lvl_name);
1849	}
1850	return (-1);
1851}
1852
1853/*
1854 * state_to_mask(): return the mask corresponding to a signal number
1855 */
1856static int
1857state_to_mask(int state)
1858{
1859	int i;
1860	for (i = 0; i < LVL_NELEMS; i++) {
1861		if (lvls[i].lvl_state == state)
1862			return (lvls[i].lvl_mask);
1863	}
1864	return (0);	/* return 0, since that represents an empty mask */
1865}
1866
1867/*
1868 * lvlname_to_mask(): return the mask corresponding to a levels character name
1869 */
1870static int
1871lvlname_to_mask(char name, int *mask)
1872{
1873	int i;
1874	for (i = 0; i < LVL_NELEMS; i++) {
1875		if (lvls[i].lvl_name == name) {
1876			*mask = lvls[i].lvl_mask;
1877			return (0);
1878		}
1879	}
1880	return (-1);
1881}
1882
1883/*
1884 * state_to_flags(): return the flags corresponding to a runlevel.  These
1885 * indicate properties of that runlevel.
1886 */
1887static int
1888state_to_flags(int state)
1889{
1890	int i;
1891	for (i = 0; i < LVL_NELEMS; i++) {
1892		if (lvls[i].lvl_state == state)
1893			return (lvls[i].lvl_flags);
1894	}
1895	return (0);
1896}
1897
1898/*
1899 * killproc() creates a child which kills the process specified by pid.
1900 */
1901void
1902killproc(pid_t pid)
1903{
1904	struct PROC_TABLE	*process;
1905
1906	(void) sighold(SIGCLD);
1907	while ((process = efork(M_OFF, NULLPROC, 0)) == NO_ROOM)
1908		(void) pause();
1909	(void) sigrelse(SIGCLD);
1910
1911	if (process == NULLPROC) {
1912		/*
1913		 * efork() sets all signal handlers to the default, so reset
1914		 * the ALRM handler to make timer() work as expected.
1915		 */
1916		(void) sigset(SIGALRM, alarmclk);
1917
1918		/*
1919		 * We are the child.  Try to terminate the process nicely
1920		 * first using SIGTERM and if it refuses to die in TWARN
1921		 * seconds kill it with SIGKILL.
1922		 */
1923		(void) kill(pid, SIGTERM);
1924		(void) timer(TWARN);
1925		(void) kill(pid, SIGKILL);
1926		(void) exit(0);
1927	}
1928}
1929
1930/*
1931 * Set up the default environment for all procs to be forked from init.
1932 * Read the values from the /etc/default/init file, except for PATH.  If
1933 * there's not enough room in the environment array, the environment
1934 * lines that don't fit are silently discarded.
1935 */
1936void
1937init_env()
1938{
1939	char	line[MAXCMDL];
1940	FILE	*fp;
1941	int	inquotes, length, wslength;
1942	char	*tokp, *cp1, *cp2;
1943
1944	glob_envp[0] = malloc((unsigned)(strlen(DEF_PATH)+2));
1945	(void) strcpy(glob_envp[0], DEF_PATH);
1946	glob_envn = 1;
1947
1948	if (rflg) {
1949		glob_envp[1] =
1950		    malloc((unsigned)(strlen("_DVFS_RECONFIG=YES")+2));
1951		(void) strcpy(glob_envp[1], "_DVFS_RECONFIG=YES");
1952		++glob_envn;
1953	} else if (bflg == 1) {
1954		glob_envp[1] =
1955		    malloc((unsigned)(strlen("RB_NOBOOTRC=YES")+2));
1956		(void) strcpy(glob_envp[1], "RB_NOBOOTRC=YES");
1957		++glob_envn;
1958	}
1959
1960	if ((fp = fopen(ENVFILE, "r")) == NULL) {
1961		console(B_TRUE,
1962		    "Cannot open %s. Environment not initialized.\n",
1963		    ENVFILE);
1964	} else {
1965		while (fgets(line, MAXCMDL - 1, fp) != NULL &&
1966		    glob_envn < MAXENVENT - 2) {
1967			/*
1968			 * Toss newline
1969			 */
1970			length = strlen(line);
1971			if (line[length - 1] == '\n')
1972				line[length - 1] = '\0';
1973
1974			/*
1975			 * Ignore blank or comment lines.
1976			 */
1977			if (line[0] == '#' || line[0] == '\0' ||
1978			    (wslength = strspn(line, " \t\n")) ==
1979			    strlen(line) ||
1980			    strchr(line, '#') == line + wslength)
1981				continue;
1982
1983			/*
1984			 * First make a pass through the line and change
1985			 * any non-quoted semi-colons to blanks so they
1986			 * will be treated as token separators below.
1987			 */
1988			inquotes = 0;
1989			for (cp1 = line; *cp1 != '\0'; cp1++) {
1990				if (*cp1 == '"') {
1991					if (inquotes == 0)
1992						inquotes = 1;
1993					else
1994						inquotes = 0;
1995				} else if (*cp1 == ';') {
1996					if (inquotes == 0)
1997						*cp1 = ' ';
1998				}
1999			}
2000
2001			/*
2002			 * Tokens within the line are separated by blanks
2003			 *  and tabs.  For each token in the line which
2004			 * contains a '=' we strip out any quotes and then
2005			 * stick the token in the environment array.
2006			 */
2007			if ((tokp = strtok(line, " \t")) == NULL)
2008				continue;
2009			do {
2010				if (strchr(tokp, '=') == NULL)
2011					continue;
2012				length = strlen(tokp);
2013				while ((cp1 = strpbrk(tokp, "\"\'")) != NULL) {
2014					for (cp2 = cp1;
2015					    cp2 < &tokp[length]; cp2++)
2016						*cp2 = *(cp2 + 1);
2017					length--;
2018				}
2019
2020				if (strncmp(tokp, "CMASK=",
2021				    sizeof ("CMASK=") - 1) == 0) {
2022					long t;
2023
2024					/* We know there's an = */
2025					t = strtol(strchr(tokp, '=') + 1, NULL,
2026					    8);
2027
2028					/* Sanity */
2029					if (t <= 077 && t >= 0)
2030						cmask = (int)t;
2031					(void) umask(cmask);
2032					continue;
2033				}
2034				glob_envp[glob_envn] =
2035				    malloc((unsigned)(length + 1));
2036				(void) strcpy(glob_envp[glob_envn], tokp);
2037				if (++glob_envn >= MAXENVENT - 1)
2038					break;
2039			} while ((tokp = strtok(NULL, " \t")) != NULL);
2040		}
2041
2042		/*
2043		 * Append a null pointer to the environment array
2044		 * to mark its end.
2045		 */
2046		glob_envp[glob_envn] = NULL;
2047		(void) fclose(fp);
2048	}
2049}
2050
2051/*
2052 * boot_init(): Do initialization things that should be done at boot.
2053 */
2054void
2055boot_init()
2056{
2057	int i;
2058	struct PROC_TABLE *process, *oprocess;
2059	struct CMD_LINE	cmd;
2060	char	line[MAXCMDL];
2061	char	svc_aux[SVC_AUX_SIZE];
2062	char	init_svc_fmri[SVC_FMRI_SIZE];
2063	char *old_path;
2064	int maxfiles;
2065
2066	/* Use INIT_PATH for sysinit cmds */
2067	old_path = glob_envp[0];
2068	glob_envp[0] = malloc((unsigned)(strlen(INIT_PATH)+2));
2069	(void) strcpy(glob_envp[0], INIT_PATH);
2070
2071	/*
2072	 * Scan inittab(4) and process the special svc.startd entry, initdefault
2073	 * and sysinit entries.
2074	 */
2075	while (getcmd(&cmd, &line[0]) == TRUE) {
2076		if (startd_tmpl >= 0 && id_eq(cmd.c_id, "smf")) {
2077			process_startd_line(&cmd, line);
2078			(void) snprintf(startd_svc_aux, SVC_AUX_SIZE,
2079			    INITTAB_ENTRY_ID_STR_FORMAT, cmd.c_id);
2080		} else if (cmd.c_action == M_INITDEFAULT) {
2081			/*
2082			 * initdefault is no longer meaningful, as the SMF
2083			 * milestone controls what (legacy) run level we
2084			 * boot to.
2085			 */
2086			console(B_TRUE,
2087			    "Ignoring legacy \"initdefault\" entry.\n");
2088		} else if (cmd.c_action == M_SYSINIT) {
2089			/*
2090			 * Execute the "sysinit" entry and wait for it to
2091			 * complete.  No bookkeeping is performed on these
2092			 * entries because we avoid writing to the file system
2093			 * until after there has been an chance to check it.
2094			 */
2095			if (process = findpslot(&cmd)) {
2096				(void) sighold(SIGCLD);
2097				(void) snprintf(svc_aux, SVC_AUX_SIZE,
2098				    INITTAB_ENTRY_ID_STR_FORMAT, cmd.c_id);
2099				(void) snprintf(init_svc_fmri, SVC_FMRI_SIZE,
2100				    SVC_INIT_PREFIX INITTAB_ENTRY_ID_STR_FORMAT,
2101				    cmd.c_id);
2102				if (legacy_tmpl >= 0) {
2103					(void) ct_pr_tmpl_set_svc_fmri(
2104					    legacy_tmpl, init_svc_fmri);
2105					(void) ct_pr_tmpl_set_svc_aux(
2106					    legacy_tmpl, svc_aux);
2107				}
2108
2109				for (oprocess = process;
2110				    (process = efork(M_OFF, oprocess,
2111				    (NAMED|NOCLEANUP))) == NO_ROOM;
2112				    /* CSTYLED */)
2113					;
2114				(void) sigrelse(SIGCLD);
2115
2116				if (process == NULLPROC) {
2117					maxfiles = ulimit(UL_GDESLIM, 0);
2118
2119					for (i = 0; i < maxfiles; i++)
2120						(void) fcntl(i, F_SETFD,
2121						    FD_CLOEXEC);
2122					(void) execle(SH, "INITSH", "-c",
2123					    cmd.c_command,
2124					    (char *)0, glob_envp);
2125					console(B_TRUE,
2126"Command\n\"%s\"\n failed to execute.  errno = %d (exec of shell failed)\n",
2127					    cmd.c_command, errno);
2128					exit(1);
2129				} else
2130					while (waitproc(process) == FAILURE)
2131						;
2132				process->p_flags = 0;
2133				st_write();
2134			}
2135		}
2136	}
2137
2138	/* Restore the path. */
2139	free(glob_envp[0]);
2140	glob_envp[0] = old_path;
2141
2142	/*
2143	 * This will enable st_write() to complain about init_state_file.
2144	 */
2145	booting = 0;
2146
2147	/*
2148	 * If the /etc/ioctl.syscon didn't exist or had invalid contents write
2149	 * out a correct version.
2150	 */
2151	if (write_ioctl)
2152		write_ioctl_syscon();
2153
2154	/*
2155	 * Start svc.startd(1M), which does most of the work.
2156	 */
2157	if (startd_cline[0] != '\0' && startd_tmpl >= 0) {
2158		/* Start svc.startd. */
2159		if (startd_run(startd_cline, startd_tmpl, 0) == -1)
2160			cur_state = SINGLE_USER;
2161	} else {
2162		console(B_TRUE, "Absent svc.startd entry or bad "
2163		    "contract template.  Not starting svc.startd.\n");
2164		enter_maintenance();
2165	}
2166}
2167
2168/*
2169 * init_signals(): Initialize all signals to either be caught or ignored.
2170 */
2171void
2172init_signals(void)
2173{
2174	struct sigaction act;
2175	int i;
2176
2177	/*
2178	 * Start by ignoring all signals, then selectively re-enable some.
2179	 * The SIG_IGN disposition will only affect asynchronous signals:
2180	 * any signal that we trigger synchronously that doesn't end up
2181	 * being handled by siglvl() will be forcibly delivered by the kernel.
2182	 */
2183	for (i = SIGHUP; i <= SIGRTMAX; i++)
2184		(void) sigset(i, SIG_IGN);
2185
2186	/*
2187	 * Handle all level-changing signals using siglvl() and set sa_mask so
2188	 * that all level-changing signals are blocked while in siglvl().
2189	 */
2190	act.sa_handler = siglvl;
2191	act.sa_flags = SA_SIGINFO;
2192	(void) sigemptyset(&act.sa_mask);
2193
2194	(void) sigaddset(&act.sa_mask, LVLQ);
2195	(void) sigaddset(&act.sa_mask, LVL0);
2196	(void) sigaddset(&act.sa_mask, LVL1);
2197	(void) sigaddset(&act.sa_mask, LVL2);
2198	(void) sigaddset(&act.sa_mask, LVL3);
2199	(void) sigaddset(&act.sa_mask, LVL4);
2200	(void) sigaddset(&act.sa_mask, LVL5);
2201	(void) sigaddset(&act.sa_mask, LVL6);
2202	(void) sigaddset(&act.sa_mask, SINGLE_USER);
2203	(void) sigaddset(&act.sa_mask, LVLa);
2204	(void) sigaddset(&act.sa_mask, LVLb);
2205	(void) sigaddset(&act.sa_mask, LVLc);
2206
2207	(void) sigaction(LVLQ, &act, NULL);
2208	(void) sigaction(LVL0, &act, NULL);
2209	(void) sigaction(LVL1, &act, NULL);
2210	(void) sigaction(LVL2, &act, NULL);
2211	(void) sigaction(LVL3, &act, NULL);
2212	(void) sigaction(LVL4, &act, NULL);
2213	(void) sigaction(LVL5, &act, NULL);
2214	(void) sigaction(LVL6, &act, NULL);
2215	(void) sigaction(SINGLE_USER, &act, NULL);
2216	(void) sigaction(LVLa, &act, NULL);
2217	(void) sigaction(LVLb, &act, NULL);
2218	(void) sigaction(LVLc, &act, NULL);
2219
2220	(void) sigset(SIGALRM, alarmclk);
2221	alarmclk();
2222
2223	(void) sigset(SIGCLD, childeath);
2224	(void) sigset(SIGPWR, powerfail);
2225}
2226
2227/*
2228 * Set up pipe for "godchildren". If the file exists and is a pipe just open
2229 * it. Else, if the file system is r/w create it.  Otherwise, defer its
2230 * creation and open until after /var/run has been mounted.  This function is
2231 * only called on startup and when explicitly requested via LVLQ.
2232 */
2233void
2234setup_pipe()
2235{
2236	struct stat stat_buf;
2237	struct statvfs statvfs_buf;
2238	struct sigaction act;
2239
2240	/*
2241	 * Always close the previous pipe descriptor as the mounted filesystems
2242	 * may have changed.
2243	 */
2244	if (Pfd >= 0)
2245		(void) close(Pfd);
2246
2247	if ((stat(INITPIPE, &stat_buf) == 0) &&
2248	    ((stat_buf.st_mode & (S_IFMT|S_IRUSR)) == (S_IFIFO|S_IRUSR)))
2249		Pfd = open(INITPIPE, O_RDWR | O_NDELAY);
2250	else
2251		if ((statvfs(INITPIPE_DIR, &statvfs_buf) == 0) &&
2252		    ((statvfs_buf.f_flag & ST_RDONLY) == 0)) {
2253			(void) unlink(INITPIPE);
2254			(void) mknod(INITPIPE, S_IFIFO | 0600, 0);
2255			Pfd = open(INITPIPE, O_RDWR | O_NDELAY);
2256		}
2257
2258	if (Pfd >= 0) {
2259		(void) ioctl(Pfd, I_SETSIG, S_INPUT);
2260		/*
2261		 * Read pipe in message discard mode.
2262		 */
2263		(void) ioctl(Pfd, I_SRDOPT, RMSGD);
2264
2265		act.sa_handler = sigpoll;
2266		act.sa_flags = 0;
2267		(void) sigemptyset(&act.sa_mask);
2268		(void) sigaddset(&act.sa_mask, SIGCLD);
2269		(void) sigaction(SIGPOLL, &act, NULL);
2270	}
2271}
2272
2273/*
2274 * siglvl - handle an asynchronous signal from init(1M) telling us that we
2275 * should change the current run level.  We set new_state accordingly.
2276 */
2277void
2278siglvl(int sig, siginfo_t *sip, ucontext_t *ucp)
2279{
2280	struct PROC_TABLE *process;
2281	struct sigaction act;
2282
2283	/*
2284	 * If the signal was from the kernel (rather than init(1M)) then init
2285	 * itself tripped the signal.  That is, we might have a bug and tripped
2286	 * a real SIGSEGV instead of receiving it as an alias for SIGLVLa.  In
2287	 * such a case we reset the disposition to SIG_DFL, block all signals
2288	 * in uc_mask but the current one, and return to the interrupted ucp
2289	 * to effect an appropriate death.  The kernel will then restart us.
2290	 *
2291	 * The one exception to SI_FROMKERNEL() is SIGFPE (a.k.a. LVL6), which
2292	 * the kernel can send us when it wants to effect an orderly reboot.
2293	 * For this case we must also verify si_code is zero, rather than a
2294	 * code such as FPE_INTDIV which a bug might have triggered.
2295	 */
2296	if (sip != NULL && SI_FROMKERNEL(sip) &&
2297	    (sig != SIGFPE || sip->si_code == 0)) {
2298
2299		(void) sigemptyset(&act.sa_mask);
2300		act.sa_handler = SIG_DFL;
2301		act.sa_flags = 0;
2302		(void) sigaction(sig, &act, NULL);
2303
2304		(void) sigfillset(&ucp->uc_sigmask);
2305		(void) sigdelset(&ucp->uc_sigmask, sig);
2306		ucp->uc_flags |= UC_SIGMASK;
2307
2308		(void) setcontext(ucp);
2309	}
2310
2311	/*
2312	 * If the signal received is a LVLQ signal, do not really
2313	 * change levels, just restate the current level.  If the
2314	 * signal is not a LVLQ, set the new level to the signal
2315	 * received.
2316	 */
2317	if (sig == LVLQ) {
2318		new_state = cur_state;
2319		lvlq_received = B_TRUE;
2320	} else {
2321		new_state = sig;
2322	}
2323
2324	/*
2325	 * Clear all times and repeat counts in the process table
2326	 * since either the level is changing or the user has editted
2327	 * the inittab file and wants us to look at it again.
2328	 * If the user has fixed a typo, we don't want residual timing
2329	 * data preventing the fixed command line from executing.
2330	 */
2331	for (process = proc_table;
2332	    (process < proc_table + num_proc); process++) {
2333		process->p_time = 0L;
2334		process->p_count = 0;
2335	}
2336
2337	/*
2338	 * Set the flag to indicate that a "user signal" was received.
2339	 */
2340	wakeup.w_flags.w_usersignal = 1;
2341}
2342
2343
2344/*
2345 * alarmclk
2346 */
2347static void
2348alarmclk()
2349{
2350	time_up = TRUE;
2351}
2352
2353/*
2354 * childeath_single():
2355 *
2356 * This used to be the SIGCLD handler and it was set with signal()
2357 * (as opposed to sigset()).  When a child exited we'd come to the
2358 * handler, wait for the child, and reenable the handler with
2359 * signal() just before returning.  The implementation of signal()
2360 * checks with waitid() for waitable children and sends a SIGCLD
2361 * if there are some.  If children are exiting faster than the
2362 * handler can run we keep sending signals and the handler never
2363 * gets to return and eventually the stack runs out and init dies.
2364 * To prevent that we set the handler with sigset() so the handler
2365 * doesn't need to be reset, and in childeath() (see below) we
2366 * call childeath_single() as long as there are children to be
2367 * waited for.  If a child exits while init is in the handler a
2368 * SIGCLD will be pending and delivered on return from the handler.
2369 * If the child was already waited for the handler will have nothing
2370 * to do and return, otherwise the child will be waited for.
2371 */
2372static void
2373childeath_single(pid_t pid, int status)
2374{
2375	struct PROC_TABLE	*process;
2376	struct pidlist		*pp;
2377
2378	/*
2379	 * Scan the process table to see if we are interested in this process.
2380	 */
2381	for (process = proc_table;
2382	    (process < proc_table + num_proc); process++) {
2383		if ((process->p_flags & (LIVING|OCCUPIED)) ==
2384		    (LIVING|OCCUPIED) && process->p_pid == pid) {
2385
2386			/*
2387			 * Mark this process as having died and store the exit
2388			 * status.  Also set the wakeup flag for a dead child
2389			 * and break out of the loop.
2390			 */
2391			process->p_flags &= ~LIVING;
2392			process->p_exit = (short)status;
2393			wakeup.w_flags.w_childdeath = 1;
2394
2395			return;
2396		}
2397	}
2398
2399	/*
2400	 * No process was found above, look through auxiliary list.
2401	 */
2402	(void) sighold(SIGPOLL);
2403	pp = Plhead;
2404	while (pp) {
2405		if (pid > pp->pl_pid) {
2406			/*
2407			 * Keep on looking.
2408			 */
2409			pp = pp->pl_next;
2410			continue;
2411		} else if (pid < pp->pl_pid) {
2412			/*
2413			 * Not in the list.
2414			 */
2415			break;
2416		} else {
2417			/*
2418			 * This is a dead "godchild".
2419			 */
2420			pp->pl_dflag = 1;
2421			pp->pl_exit = (short)status;
2422			wakeup.w_flags.w_childdeath = 1;
2423			Gchild = 1;	/* Notice to call cleanaux(). */
2424			break;
2425		}
2426	}
2427
2428	(void) sigrelse(SIGPOLL);
2429}
2430
2431/* ARGSUSED */
2432static void
2433childeath(int signo)
2434{
2435	pid_t pid;
2436	int status;
2437
2438	while ((pid = waitpid(-1, &status, WNOHANG)) > 0)
2439		childeath_single(pid, status);
2440}
2441
2442static void
2443powerfail()
2444{
2445	(void) nice(-19);
2446	wakeup.w_flags.w_powerhit = 1;
2447}
2448
2449/*
2450 * efork() forks a child and the parent inserts the process in its table
2451 * of processes that are directly a result of forks that it has performed.
2452 * The child just changes the "global" with the process id for this process
2453 * to it's new value.
2454 * If efork() is called with a pointer into the proc_table it uses that slot,
2455 * otherwise it searches for a free slot.  Regardless of how it was called,
2456 * it returns the pointer to the proc_table entry
2457 *
2458 * The SIGCLD signal is blocked (held) before calling efork()
2459 * and is unblocked (released) after efork() returns.
2460 *
2461 * Ideally, this should be rewritten to use modern signal semantics.
2462 */
2463static struct PROC_TABLE *
2464efork(int action, struct PROC_TABLE *process, int modes)
2465{
2466	pid_t	childpid;
2467	struct PROC_TABLE *proc;
2468	int		i;
2469	/*
2470	 * Freshen up the proc_table, removing any entries for dead processes
2471	 * that don't have NOCLEANUP set.  Perform the necessary accounting.
2472	 */
2473	for (proc = proc_table; (proc < proc_table + num_proc); proc++) {
2474		if ((proc->p_flags & (OCCUPIED|LIVING|NOCLEANUP)) ==
2475		    (OCCUPIED)) {
2476			/*
2477			 * Is this a named process?
2478			 * If so, do the necessary bookkeeping.
2479			 */
2480			if (proc->p_flags & NAMED)
2481				(void) account(DEAD_PROCESS, proc, NULL);
2482
2483			/*
2484			 * Free this entry for new usage.
2485			 */
2486			proc->p_flags = 0;
2487		}
2488	}
2489
2490	while ((childpid = fork()) == FAILURE) {
2491		/*
2492		 * Shorten the alarm timer in case someone else's child dies
2493		 * and free up a slot in the process table.
2494		 */
2495		setimer(5);
2496
2497		/*
2498		 * Wait for some children to die.  Since efork()
2499		 * is always called with SIGCLD blocked, unblock
2500		 * it here so that child death signals can come in.
2501		 */
2502		(void) sigrelse(SIGCLD);
2503		(void) pause();
2504		(void) sighold(SIGCLD);
2505		setimer(0);
2506	}
2507
2508	if (childpid != 0) {
2509
2510		if (process == NULLPROC) {
2511			/*
2512			 * No proc table pointer specified so search
2513			 * for a free slot.
2514			 */
2515			for (process = proc_table;  process->p_flags != 0 &&
2516			    (process < proc_table + num_proc); process++)
2517					;
2518
2519			if (process == (proc_table + num_proc)) {
2520				int old_proc_table_size = num_proc;
2521
2522				/* Increase the process table size */
2523				increase_proc_table_size();
2524				if (old_proc_table_size == num_proc) {
2525					/* didn't grow: memory failure */
2526					return (NO_ROOM);
2527				} else {
2528					process =
2529					    proc_table + old_proc_table_size;
2530				}
2531			}
2532
2533			process->p_time = 0L;
2534			process->p_count = 0;
2535		}
2536		process->p_id[0] = '\0';
2537		process->p_id[1] = '\0';
2538		process->p_id[2] = '\0';
2539		process->p_id[3] = '\0';
2540		process->p_pid = childpid;
2541		process->p_flags = (LIVING | OCCUPIED | modes);
2542		process->p_exit = 0;
2543
2544		st_write();
2545	} else {
2546		if ((action & (M_WAIT | M_BOOTWAIT)) == 0)
2547			(void) setpgrp();
2548
2549		process = NULLPROC;
2550
2551		/*
2552		 * Reset all signals to the system defaults.
2553		 */
2554		for (i = SIGHUP; i <= SIGRTMAX; i++)
2555			(void) sigset(i, SIG_DFL);
2556
2557		/*
2558		 * POSIX B.2.2.2 advises that init should set SIGTTOU,
2559		 * SIGTTIN, and SIGTSTP to SIG_IGN.
2560		 *
2561		 * Make sure that SIGXCPU and SIGXFSZ also remain ignored,
2562		 * for backward compatibility.
2563		 */
2564		(void) sigset(SIGTTIN, SIG_IGN);
2565		(void) sigset(SIGTTOU, SIG_IGN);
2566		(void) sigset(SIGTSTP, SIG_IGN);
2567		(void) sigset(SIGXCPU, SIG_IGN);
2568		(void) sigset(SIGXFSZ, SIG_IGN);
2569	}
2570	return (process);
2571}
2572
2573
2574/*
2575 * waitproc() waits for a specified process to die.  For this function to
2576 * work, the specified process must already in the proc_table.  waitproc()
2577 * returns the exit status of the specified process when it dies.
2578 */
2579static long
2580waitproc(struct PROC_TABLE *process)
2581{
2582	int		answer;
2583	sigset_t	oldmask, newmask, zeromask;
2584
2585	(void) sigemptyset(&zeromask);
2586	(void) sigemptyset(&newmask);
2587
2588	(void) sigaddset(&newmask, SIGCLD);
2589
2590	/* Block SIGCLD and save the current signal mask */
2591	if (sigprocmask(SIG_BLOCK, &newmask, &oldmask) < 0)
2592		perror("SIG_BLOCK error");
2593
2594	/*
2595	 * Wait around until the process dies.
2596	 */
2597	if (process->p_flags & LIVING)
2598		(void) sigsuspend(&zeromask);
2599
2600	/* Reset signal mask to unblock SIGCLD */
2601	if (sigprocmask(SIG_SETMASK, &oldmask, NULL) < 0)
2602		perror("SIG_SETMASK error");
2603
2604	if (process->p_flags & LIVING)
2605		return (FAILURE);
2606
2607	/*
2608	 * Make sure to only return 16 bits so that answer will always
2609	 * be positive whenever the process of interest really died.
2610	 */
2611	answer = (process->p_exit & 0xffff);
2612
2613	/*
2614	 * Free the slot in the proc_table.
2615	 */
2616	process->p_flags = 0;
2617	return (answer);
2618}
2619
2620/*
2621 * notify_pam_dead(): calls into the PAM framework to close the given session.
2622 */
2623static void
2624notify_pam_dead(struct utmpx *up)
2625{
2626	pam_handle_t *pamh;
2627	char user[sizeof (up->ut_user) + 1];
2628	char ttyn[sizeof (up->ut_line) + 1];
2629	char host[sizeof (up->ut_host) + 1];
2630
2631	/*
2632	 * PAM does not take care of updating utmpx/wtmpx.
2633	 */
2634	(void) snprintf(user, sizeof (user), "%s", up->ut_user);
2635	(void) snprintf(ttyn, sizeof (ttyn), "%s", up->ut_line);
2636	(void) snprintf(host, sizeof (host), "%s", up->ut_host);
2637
2638	if (pam_start("init", user, NULL, &pamh) == PAM_SUCCESS)  {
2639		(void) pam_set_item(pamh, PAM_TTY, ttyn);
2640		(void) pam_set_item(pamh, PAM_RHOST, host);
2641		(void) pam_close_session(pamh, 0);
2642		(void) pam_end(pamh, PAM_SUCCESS);
2643	}
2644}
2645
2646/*
2647 * Check you can access utmpx (As / may be read-only and
2648 * /var may not be mounted yet).
2649 */
2650static int
2651access_utmpx(void)
2652{
2653	do {
2654		utmpx_ok = (access(UTMPX, R_OK|W_OK) == 0);
2655	} while (!utmpx_ok && errno == EINTR);
2656
2657	return (utmpx_ok);
2658}
2659
2660/*
2661 * account() updates entries in utmpx and appends new entries to the end of
2662 * wtmpx (assuming they exist).  The program argument indicates the name of
2663 * program if INIT_PROCESS, otherwise should be NULL.
2664 *
2665 * account() only blocks for INIT_PROCESS requests.
2666 *
2667 * Returns non-zero if write failed.
2668 */
2669static int
2670account(short state, struct PROC_TABLE *process, char *program)
2671{
2672	struct utmpx utmpbuf, *u, *oldu;
2673	int tmplen;
2674	char fail_buf[UT_LINE_SZ];
2675	sigset_t block, unblock;
2676
2677	if (!utmpx_ok && !access_utmpx()) {
2678		return (-1);
2679	}
2680
2681	/*
2682	 * Set up the prototype for the utmp structure we want to write.
2683	 */
2684	u = &utmpbuf;
2685	(void) memset(u, 0, sizeof (struct utmpx));
2686
2687	/*
2688	 * Fill in the various fields of the utmp structure.
2689	 */
2690	u->ut_id[0] = process->p_id[0];
2691	u->ut_id[1] = process->p_id[1];
2692	u->ut_id[2] = process->p_id[2];
2693	u->ut_id[3] = process->p_id[3];
2694	u->ut_pid = process->p_pid;
2695
2696	/*
2697	 * Fill the "ut_exit" structure.
2698	 */
2699	u->ut_exit.e_termination = WTERMSIG(process->p_exit);
2700	u->ut_exit.e_exit = WEXITSTATUS(process->p_exit);
2701	u->ut_type = state;
2702
2703	(void) time(&u->ut_tv.tv_sec);
2704
2705	/*
2706	 * Block signals for utmp update.
2707	 */
2708	(void) sigfillset(&block);
2709	(void) sigprocmask(SIG_BLOCK, &block, &unblock);
2710
2711	/*
2712	 * See if there already is such an entry in the "utmpx" file.
2713	 */
2714	setutxent();	/* Start at beginning of utmpx file. */
2715
2716	if ((oldu = getutxid(u)) != NULL) {
2717		/*
2718		 * Copy in the old "user", "line" and "host" fields
2719		 * to our new structure.
2720		 */
2721		bcopy(oldu->ut_user, u->ut_user, sizeof (u->ut_user));
2722		bcopy(oldu->ut_line, u->ut_line, sizeof (u->ut_line));
2723		bcopy(oldu->ut_host, u->ut_host, sizeof (u->ut_host));
2724		u->ut_syslen = (tmplen = strlen(u->ut_host)) ?
2725		    min(tmplen + 1, sizeof (u->ut_host)) : 0;
2726
2727		if (oldu->ut_type == USER_PROCESS && state == DEAD_PROCESS) {
2728			notify_pam_dead(oldu);
2729		}
2730	}
2731
2732	/*
2733	 * Perform special accounting. Insert the special string into the
2734	 * ut_line array. For INIT_PROCESSes put in the name of the
2735	 * program in the "ut_user" field.
2736	 */
2737	switch (state) {
2738	case INIT_PROCESS:
2739		(void) strncpy(u->ut_user, program, sizeof (u->ut_user));
2740		(void) strcpy(fail_buf, "INIT_PROCESS");
2741		break;
2742
2743	default:
2744		(void) strlcpy(fail_buf, u->ut_id, sizeof (u->ut_id) + 1);
2745		break;
2746	}
2747
2748	/*
2749	 * Write out the updated entry to utmpx file.
2750	 */
2751	if (pututxline(u) == NULL) {
2752		console(B_TRUE, "Failed write of utmpx entry: \"%s\": %s\n",
2753		    fail_buf, strerror(errno));
2754		endutxent();
2755		(void) sigprocmask(SIG_SETMASK, &unblock, NULL);
2756		return (-1);
2757	}
2758
2759	/*
2760	 * If we're able to write to utmpx, then attempt to add to the
2761	 * end of the wtmpx file.
2762	 */
2763	updwtmpx(WTMPX, u);
2764
2765	endutxent();
2766
2767	(void) sigprocmask(SIG_SETMASK, &unblock, NULL);
2768
2769	return (0);
2770}
2771
2772static void
2773clearent(pid_t pid, short status)
2774{
2775	struct utmpx *up;
2776	sigset_t block, unblock;
2777
2778	/*
2779	 * Block signals for utmp update.
2780	 */
2781	(void) sigfillset(&block);
2782	(void) sigprocmask(SIG_BLOCK, &block, &unblock);
2783
2784	/*
2785	 * No error checking for now.
2786	 */
2787
2788	setutxent();
2789	while (up = getutxent()) {
2790		if (up->ut_pid == pid) {
2791			if (up->ut_type == DEAD_PROCESS) {
2792				/*
2793				 * Cleaned up elsewhere.
2794				 */
2795				continue;
2796			}
2797
2798			notify_pam_dead(up);
2799
2800			up->ut_type = DEAD_PROCESS;
2801			up->ut_exit.e_termination = WTERMSIG(status);
2802			up->ut_exit.e_exit = WEXITSTATUS(status);
2803			(void) time(&up->ut_tv.tv_sec);
2804
2805			(void) pututxline(up);
2806			/*
2807			 * Now attempt to add to the end of the
2808			 * wtmp and wtmpx files.  Do not create
2809			 * if they don't already exist.
2810			 */
2811			updwtmpx(WTMPX, up);
2812
2813			break;
2814		}
2815	}
2816
2817	endutxent();
2818	(void) sigprocmask(SIG_SETMASK, &unblock, NULL);
2819}
2820
2821/*
2822 * prog_name() searches for the word or unix path name and
2823 * returns a pointer to the last element of the pathname.
2824 */
2825static char *
2826prog_name(char *string)
2827{
2828	char	*ptr, *ptr2;
2829	static char word[UT_USER_SZ + 1];
2830
2831	/*
2832	 * Search for the first word skipping leading spaces and tabs.
2833	 */
2834	while (*string == ' ' || *string == '\t')
2835		string++;
2836
2837	/*
2838	 * If the first non-space non-tab character is not one allowed in
2839	 * a word, return a pointer to a null string, otherwise parse the
2840	 * pathname.
2841	 */
2842	if (*string != '.' && *string != '/' && *string != '_' &&
2843	    (*string < 'a' || *string > 'z') &&
2844	    (*string < 'A' || * string > 'Z') &&
2845	    (*string < '0' || *string > '9'))
2846		return ("");
2847
2848	/*
2849	 * Parse the pathname looking forward for '/', ' ', '\t', '\n' or
2850	 * '\0'.  Each time a '/' is found, move "ptr" to one past the
2851	 * '/', thus when a ' ', '\t', '\n', or '\0' is found, "ptr" will
2852	 * point to the last element of the pathname.
2853	 */
2854	for (ptr = string; *string != ' ' && *string != '\t' &&
2855	    *string != '\n' && *string != '\0'; string++) {
2856		if (*string == '/')
2857			ptr = string+1;
2858	}
2859
2860	/*
2861	 * Copy out up to the size of the "ut_user" array into "word",
2862	 * null terminate it and return a pointer to it.
2863	 */
2864	for (ptr2 = &word[0]; ptr2 < &word[UT_USER_SZ] &&
2865	    ptr < string; /* CSTYLED */)
2866		*ptr2++ = *ptr++;
2867
2868	*ptr2 = '\0';
2869	return (&word[0]);
2870}
2871
2872
2873/*
2874 * realcon() returns a nonzero value if there is a character device
2875 * associated with SYSCON that has the same device number as CONSOLE.
2876 */
2877static int
2878realcon()
2879{
2880	struct stat sconbuf, conbuf;
2881
2882	if (stat(SYSCON, &sconbuf) != -1 &&
2883	    stat(CONSOLE, &conbuf) != -1 &&
2884	    S_ISCHR(sconbuf.st_mode) &&
2885	    S_ISCHR(conbuf.st_mode) &&
2886	    sconbuf.st_rdev == conbuf.st_rdev) {
2887		return (1);
2888	} else {
2889		return (0);
2890	}
2891}
2892
2893
2894/*
2895 * get_ioctl_syscon() retrieves the SYSCON settings from the IOCTLSYSCON file.
2896 * Returns true if the IOCTLSYSCON file needs to be written (with
2897 * write_ioctl_syscon() below)
2898 */
2899static int
2900get_ioctl_syscon()
2901{
2902	FILE	*fp;
2903	unsigned int	iflags, oflags, cflags, lflags, ldisc, cc[18];
2904	int		i, valid_format = 0;
2905
2906	/*
2907	 * Read in the previous modes for SYSCON from IOCTLSYSCON.
2908	 */
2909	if ((fp = fopen(IOCTLSYSCON, "r")) == NULL) {
2910		stored_syscon_termios = dflt_termios;
2911		console(B_TRUE,
2912		    "warning:%s does not exist, default settings assumed\n",
2913		    IOCTLSYSCON);
2914	} else {
2915
2916		i = fscanf(fp,
2917	    "%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x",
2918		    &iflags, &oflags, &cflags, &lflags,
2919		    &cc[0], &cc[1], &cc[2], &cc[3], &cc[4], &cc[5], &cc[6],
2920		    &cc[7], &cc[8], &cc[9], &cc[10], &cc[11], &cc[12], &cc[13],
2921		    &cc[14], &cc[15], &cc[16], &cc[17]);
2922
2923		if (i == 22) {
2924			stored_syscon_termios.c_iflag = iflags;
2925			stored_syscon_termios.c_oflag = oflags;
2926			stored_syscon_termios.c_cflag = cflags;
2927			stored_syscon_termios.c_lflag = lflags;
2928			for (i = 0; i < 18; i++)
2929				stored_syscon_termios.c_cc[i] = (char)cc[i];
2930			valid_format = 1;
2931		} else if (i == 13) {
2932		rewind(fp);
2933		i = fscanf(fp, "%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x",
2934		    &iflags, &oflags, &cflags, &lflags, &ldisc, &cc[0], &cc[1],
2935		    &cc[2], &cc[3], &cc[4], &cc[5], &cc[6], &cc[7]);
2936
2937		/*
2938		 * If the file is formatted properly, use the values to
2939		 * initialize the console terminal condition.
2940		 */
2941		stored_syscon_termios.c_iflag = (ushort_t)iflags;
2942		stored_syscon_termios.c_oflag = (ushort_t)oflags;
2943		stored_syscon_termios.c_cflag = (ushort_t)cflags;
2944		stored_syscon_termios.c_lflag = (ushort_t)lflags;
2945		for (i = 0; i < 8; i++)
2946			stored_syscon_termios.c_cc[i] = (char)cc[i];
2947		valid_format = 1;
2948		}
2949		(void) fclose(fp);
2950
2951		/* If the file is badly formatted, use the default settings. */
2952		if (!valid_format)
2953			stored_syscon_termios = dflt_termios;
2954	}
2955
2956	/* If the file had a bad format, rewrite it later. */
2957	return (!valid_format);
2958}
2959
2960
2961static void
2962write_ioctl_syscon()
2963{
2964	FILE *fp;
2965	int i;
2966
2967	(void) unlink(SYSCON);
2968	(void) link(SYSTTY, SYSCON);
2969	(void) umask(022);
2970	fp = fopen(IOCTLSYSCON, "w");
2971
2972	(void) fprintf(fp, "%x:%x:%x:%x:0", stored_syscon_termios.c_iflag,
2973	    stored_syscon_termios.c_oflag, stored_syscon_termios.c_cflag,
2974	    stored_syscon_termios.c_lflag);
2975	for (i = 0; i < 8; ++i)
2976		(void) fprintf(fp, ":%x", stored_syscon_termios.c_cc[i]);
2977	(void) putc('\n', fp);
2978
2979	(void) fflush(fp);
2980	(void) fsync(fileno(fp));
2981	(void) fclose(fp);
2982	(void) umask(cmask);
2983}
2984
2985
2986/*
2987 * void console(boolean_t, char *, ...)
2988 *   Outputs the requested message to the system console.  Note that the number
2989 *   of arguments passed to console() should be determined by the print format.
2990 *
2991 *   The "prefix" parameter indicates whether or not "INIT: " should precede the
2992 *   message.
2993 *
2994 *   To make sure we write to the console in a sane fashion, we use the modes
2995 *   we keep in stored_syscon_termios (which we read out of /etc/ioctl.syscon).
2996 *   Afterwards we restore whatever modes were already there.
2997 */
2998/* PRINTFLIKE2 */
2999static void
3000console(boolean_t prefix, char *format, ...)
3001{
3002	char	outbuf[BUFSIZ];
3003	va_list	args;
3004	int fd, getret;
3005	struct termios old_syscon_termios;
3006	FILE *f;
3007
3008	/*
3009	 * We open SYSCON anew each time in case it has changed (see
3010	 * userinit()).
3011	 */
3012	if ((fd = open(SYSCON, O_RDWR | O_NOCTTY)) < 0 ||
3013	    (f = fdopen(fd, "r+")) == NULL) {
3014		if (prefix)
3015			syslog(LOG_WARNING, "INIT: ");
3016		va_start(args, format);
3017		vsyslog(LOG_WARNING, format, args);
3018		va_end(args);
3019		if (fd >= 0)
3020			(void) close(fd);
3021		return;
3022	}
3023	setbuf(f, &outbuf[0]);
3024
3025	getret = tcgetattr(fd, &old_syscon_termios);
3026	old_syscon_termios.c_cflag &= ~HUPCL;
3027	if (realcon())
3028		/* Don't overwrite cflag of real console. */
3029		stored_syscon_termios.c_cflag = old_syscon_termios.c_cflag;
3030
3031	stored_syscon_termios.c_cflag &= ~HUPCL;
3032
3033	(void) tcsetattr(fd, TCSANOW, &stored_syscon_termios);
3034
3035	if (prefix)
3036		(void) fprintf(f, "\nINIT: ");
3037	va_start(args, format);
3038	(void) vfprintf(f, format, args);
3039	va_end(args);
3040
3041	if (getret == 0)
3042		(void) tcsetattr(fd, TCSADRAIN, &old_syscon_termios);
3043
3044	(void) fclose(f);
3045}
3046
3047/*
3048 * timer() is a substitute for sleep() which uses alarm() and pause().
3049 */
3050static void
3051timer(int waitime)
3052{
3053	setimer(waitime);
3054	while (time_up == FALSE)
3055		(void) pause();
3056}
3057
3058static void
3059setimer(int timelimit)
3060{
3061	alarmclk();
3062	(void) alarm(timelimit);
3063	time_up = (timelimit ? FALSE : TRUE);
3064}
3065
3066/*
3067 * Fails with
3068 *   ENOMEM - out of memory
3069 *   ECONNABORTED - repository connection broken
3070 *   EPERM - permission denied
3071 *   EACCES - backend access denied
3072 *   EROFS - backend readonly
3073 */
3074static int
3075get_or_add_startd(scf_instance_t *inst)
3076{
3077	scf_handle_t *h;
3078	scf_scope_t *scope = NULL;
3079	scf_service_t *svc = NULL;
3080	int ret = 0;
3081
3082	h = scf_instance_handle(inst);
3083
3084	if (scf_handle_decode_fmri(h, SCF_SERVICE_STARTD, NULL, NULL, inst,
3085	    NULL, NULL, SCF_DECODE_FMRI_EXACT) == 0)
3086		return (0);
3087
3088	switch (scf_error()) {
3089	case SCF_ERROR_CONNECTION_BROKEN:
3090		return (ECONNABORTED);
3091
3092	case SCF_ERROR_NOT_FOUND:
3093		break;
3094
3095	case SCF_ERROR_HANDLE_MISMATCH:
3096	case SCF_ERROR_INVALID_ARGUMENT:
3097	case SCF_ERROR_CONSTRAINT_VIOLATED:
3098	default:
3099		bad_error("scf_handle_decode_fmri", scf_error());
3100	}
3101
3102	/* Make sure we're right, since we're adding piece-by-piece. */
3103	assert(strcmp(SCF_SERVICE_STARTD,
3104	    "svc:/system/svc/restarter:default") == 0);
3105
3106	if ((scope = scf_scope_create(h)) == NULL ||
3107	    (svc = scf_service_create(h)) == NULL) {
3108		ret = ENOMEM;
3109		goto out;
3110	}
3111
3112get_scope:
3113	if (scf_handle_get_scope(h, SCF_SCOPE_LOCAL, scope) != 0) {
3114		switch (scf_error()) {
3115		case SCF_ERROR_CONNECTION_BROKEN:
3116			ret = ECONNABORTED;
3117			goto out;
3118
3119		case SCF_ERROR_NOT_FOUND:
3120			(void) fputs(gettext(
3121			    "smf(5) repository missing local scope.\n"),
3122			    stderr);
3123			exit(1);
3124			/* NOTREACHED */
3125
3126		case SCF_ERROR_HANDLE_MISMATCH:
3127		case SCF_ERROR_INVALID_ARGUMENT:
3128		default:
3129			bad_error("scf_handle_get_scope", scf_error());
3130		}
3131	}
3132
3133get_svc:
3134	if (scf_scope_get_service(scope, "system/svc/restarter", svc) != 0) {
3135		switch (scf_error()) {
3136		case SCF_ERROR_CONNECTION_BROKEN:
3137			ret = ECONNABORTED;
3138			goto out;
3139
3140		case SCF_ERROR_DELETED:
3141			goto get_scope;
3142
3143		case SCF_ERROR_NOT_FOUND:
3144			break;
3145
3146		case SCF_ERROR_HANDLE_MISMATCH:
3147		case SCF_ERROR_INVALID_ARGUMENT:
3148		case SCF_ERROR_NOT_SET:
3149		default:
3150			bad_error("scf_scope_get_service", scf_error());
3151		}
3152
3153add_svc:
3154		if (scf_scope_add_service(scope, "system/svc/restarter", svc) !=
3155		    0) {
3156			switch (scf_error()) {
3157			case SCF_ERROR_CONNECTION_BROKEN:
3158				ret = ECONNABORTED;
3159				goto out;
3160
3161			case SCF_ERROR_EXISTS:
3162				goto get_svc;
3163
3164			case SCF_ERROR_PERMISSION_DENIED:
3165				ret = EPERM;
3166				goto out;
3167
3168			case SCF_ERROR_BACKEND_ACCESS:
3169				ret = EACCES;
3170				goto out;
3171
3172			case SCF_ERROR_BACKEND_READONLY:
3173				ret = EROFS;
3174				goto out;
3175
3176			case SCF_ERROR_HANDLE_MISMATCH:
3177			case SCF_ERROR_INVALID_ARGUMENT:
3178			case SCF_ERROR_NOT_SET:
3179			default:
3180				bad_error("scf_scope_add_service", scf_error());
3181			}
3182		}
3183	}
3184
3185get_inst:
3186	if (scf_service_get_instance(svc, "default", inst) != 0) {
3187		switch (scf_error()) {
3188		case SCF_ERROR_CONNECTION_BROKEN:
3189			ret = ECONNABORTED;
3190			goto out;
3191
3192		case SCF_ERROR_DELETED:
3193			goto add_svc;
3194
3195		case SCF_ERROR_NOT_FOUND:
3196			break;
3197
3198		case SCF_ERROR_HANDLE_MISMATCH:
3199		case SCF_ERROR_INVALID_ARGUMENT:
3200		case SCF_ERROR_NOT_SET:
3201		default:
3202			bad_error("scf_service_get_instance", scf_error());
3203		}
3204
3205		if (scf_service_add_instance(svc, "default", inst) !=
3206		    0) {
3207			switch (scf_error()) {
3208			case SCF_ERROR_CONNECTION_BROKEN:
3209				ret = ECONNABORTED;
3210				goto out;
3211
3212			case SCF_ERROR_DELETED:
3213				goto add_svc;
3214
3215			case SCF_ERROR_EXISTS:
3216				goto get_inst;
3217
3218			case SCF_ERROR_PERMISSION_DENIED:
3219				ret = EPERM;
3220				goto out;
3221
3222			case SCF_ERROR_BACKEND_ACCESS:
3223				ret = EACCES;
3224				goto out;
3225
3226			case SCF_ERROR_BACKEND_READONLY:
3227				ret = EROFS;
3228				goto out;
3229
3230			case SCF_ERROR_HANDLE_MISMATCH:
3231			case SCF_ERROR_INVALID_ARGUMENT:
3232			case SCF_ERROR_NOT_SET:
3233			default:
3234				bad_error("scf_service_add_instance",
3235				    scf_error());
3236			}
3237		}
3238	}
3239
3240	ret = 0;
3241
3242out:
3243	scf_service_destroy(svc);
3244	scf_scope_destroy(scope);
3245	return (ret);
3246}
3247
3248/*
3249 * Fails with
3250 *   ECONNABORTED - repository connection broken
3251 *   ECANCELED - the transaction's property group was deleted
3252 */
3253static int
3254transaction_add_set(scf_transaction_t *tx, scf_transaction_entry_t *ent,
3255    const char *pname, scf_type_t type)
3256{
3257change_type:
3258	if (scf_transaction_property_change_type(tx, ent, pname, type) == 0)
3259		return (0);
3260
3261	switch (scf_error()) {
3262	case SCF_ERROR_CONNECTION_BROKEN:
3263		return (ECONNABORTED);
3264
3265	case SCF_ERROR_DELETED:
3266		return (ECANCELED);
3267
3268	case SCF_ERROR_NOT_FOUND:
3269		goto new;
3270
3271	case SCF_ERROR_HANDLE_MISMATCH:
3272	case SCF_ERROR_INVALID_ARGUMENT:
3273	case SCF_ERROR_NOT_BOUND:
3274	case SCF_ERROR_NOT_SET:
3275	default:
3276		bad_error("scf_transaction_property_change_type", scf_error());
3277	}
3278
3279new:
3280	if (scf_transaction_property_new(tx, ent, pname, type) == 0)
3281		return (0);
3282
3283	switch (scf_error()) {
3284	case SCF_ERROR_CONNECTION_BROKEN:
3285		return (ECONNABORTED);
3286
3287	case SCF_ERROR_DELETED:
3288		return (ECANCELED);
3289
3290	case SCF_ERROR_EXISTS:
3291		goto change_type;
3292
3293	case SCF_ERROR_HANDLE_MISMATCH:
3294	case SCF_ERROR_INVALID_ARGUMENT:
3295	case SCF_ERROR_NOT_BOUND:
3296	case SCF_ERROR_NOT_SET:
3297	default:
3298		bad_error("scf_transaction_property_new", scf_error());
3299		/* NOTREACHED */
3300	}
3301}
3302
3303static void
3304scferr(void)
3305{
3306	switch (scf_error()) {
3307	case SCF_ERROR_NO_MEMORY:
3308		console(B_TRUE, gettext("Out of memory.\n"));
3309		break;
3310
3311	case SCF_ERROR_CONNECTION_BROKEN:
3312		console(B_TRUE, gettext(
3313		    "Connection to smf(5) repository server broken.\n"));
3314		break;
3315
3316	case SCF_ERROR_NO_RESOURCES:
3317		console(B_TRUE, gettext(
3318		    "smf(5) repository server is out of memory.\n"));
3319		break;
3320
3321	case SCF_ERROR_PERMISSION_DENIED:
3322		console(B_TRUE, gettext("Insufficient privileges.\n"));
3323		break;
3324
3325	default:
3326		console(B_TRUE, gettext("libscf error: %s\n"),
3327		    scf_strerror(scf_error()));
3328	}
3329}
3330
3331static void
3332lscf_set_runlevel(char rl)
3333{
3334	scf_handle_t *h;
3335	scf_instance_t *inst = NULL;
3336	scf_propertygroup_t *pg = NULL;
3337	scf_transaction_t *tx = NULL;
3338	scf_transaction_entry_t *ent = NULL;
3339	scf_value_t *val = NULL;
3340	char buf[2];
3341	int r;
3342
3343	h = scf_handle_create(SCF_VERSION);
3344	if (h == NULL) {
3345		scferr();
3346		return;
3347	}
3348
3349	if (scf_handle_bind(h) != 0) {
3350		switch (scf_error()) {
3351		case SCF_ERROR_NO_SERVER:
3352			console(B_TRUE,
3353			    gettext("smf(5) repository server not running.\n"));
3354			goto bail;
3355
3356		default:
3357			scferr();
3358			goto bail;
3359		}
3360	}
3361
3362	if ((inst = scf_instance_create(h)) == NULL ||
3363	    (pg = scf_pg_create(h)) == NULL ||
3364	    (val = scf_value_create(h)) == NULL ||
3365	    (tx = scf_transaction_create(h)) == NULL ||
3366	    (ent = scf_entry_create(h)) == NULL) {
3367		scferr();
3368		goto bail;
3369	}
3370
3371get_inst:
3372	r = get_or_add_startd(inst);
3373	switch (r) {
3374	case 0:
3375		break;
3376
3377	case ENOMEM:
3378	case ECONNABORTED:
3379	case EPERM:
3380	case EACCES:
3381	case EROFS:
3382		scferr();
3383		goto bail;
3384	default:
3385		bad_error("get_or_add_startd", r);
3386	}
3387
3388get_pg:
3389	if (scf_instance_get_pg(inst, SCF_PG_OPTIONS_OVR, pg) != 0) {
3390		switch (scf_error()) {
3391		case SCF_ERROR_CONNECTION_BROKEN:
3392			scferr();
3393			goto bail;
3394
3395		case SCF_ERROR_DELETED:
3396			goto get_inst;
3397
3398		case SCF_ERROR_NOT_FOUND:
3399			break;
3400
3401		case SCF_ERROR_HANDLE_MISMATCH:
3402		case SCF_ERROR_INVALID_ARGUMENT:
3403		case SCF_ERROR_NOT_SET:
3404		default:
3405			bad_error("scf_instance_get_pg", scf_error());
3406		}
3407
3408add_pg:
3409		if (scf_instance_add_pg(inst, SCF_PG_OPTIONS_OVR,
3410		    SCF_PG_OPTIONS_OVR_TYPE, SCF_PG_OPTIONS_OVR_FLAGS, pg) !=
3411		    0) {
3412			switch (scf_error()) {
3413			case SCF_ERROR_CONNECTION_BROKEN:
3414			case SCF_ERROR_PERMISSION_DENIED:
3415			case SCF_ERROR_BACKEND_ACCESS:
3416				scferr();
3417				goto bail;
3418
3419			case SCF_ERROR_DELETED:
3420				goto get_inst;
3421
3422			case SCF_ERROR_EXISTS:
3423				goto get_pg;
3424
3425			case SCF_ERROR_HANDLE_MISMATCH:
3426			case SCF_ERROR_INVALID_ARGUMENT:
3427			case SCF_ERROR_NOT_SET:
3428			default:
3429				bad_error("scf_instance_add_pg", scf_error());
3430			}
3431		}
3432	}
3433
3434	buf[0] = rl;
3435	buf[1] = '\0';
3436	r = scf_value_set_astring(val, buf);
3437	assert(r == 0);
3438
3439	for (;;) {
3440		if (scf_transaction_start(tx, pg) != 0) {
3441			switch (scf_error()) {
3442			case SCF_ERROR_CONNECTION_BROKEN:
3443			case SCF_ERROR_PERMISSION_DENIED:
3444			case SCF_ERROR_BACKEND_ACCESS:
3445				scferr();
3446				goto bail;
3447
3448			case SCF_ERROR_DELETED:
3449				goto add_pg;
3450
3451			case SCF_ERROR_HANDLE_MISMATCH:
3452			case SCF_ERROR_NOT_BOUND:
3453			case SCF_ERROR_IN_USE:
3454			case SCF_ERROR_NOT_SET:
3455			default:
3456				bad_error("scf_transaction_start", scf_error());
3457			}
3458		}
3459
3460		r = transaction_add_set(tx, ent, "runlevel", SCF_TYPE_ASTRING);
3461		switch (r) {
3462		case 0:
3463			break;
3464
3465		case ECONNABORTED:
3466			scferr();
3467			goto bail;
3468
3469		case ECANCELED:
3470			scf_transaction_reset(tx);
3471			goto add_pg;
3472
3473		default:
3474			bad_error("transaction_add_set", r);
3475		}
3476
3477		r = scf_entry_add_value(ent, val);
3478		assert(r == 0);
3479
3480		r = scf_transaction_commit(tx);
3481		if (r == 1)
3482			break;
3483
3484		if (r != 0) {
3485			switch (scf_error()) {
3486			case SCF_ERROR_CONNECTION_BROKEN:
3487			case SCF_ERROR_PERMISSION_DENIED:
3488			case SCF_ERROR_BACKEND_ACCESS:
3489			case SCF_ERROR_BACKEND_READONLY:
3490				scferr();
3491				goto bail;
3492
3493			case SCF_ERROR_DELETED:
3494				scf_transaction_reset(tx);
3495				goto add_pg;
3496
3497			case SCF_ERROR_INVALID_ARGUMENT:
3498			case SCF_ERROR_NOT_BOUND:
3499			case SCF_ERROR_NOT_SET:
3500			default:
3501				bad_error("scf_transaction_commit",
3502				    scf_error());
3503			}
3504		}
3505
3506		scf_transaction_reset(tx);
3507		(void) scf_pg_update(pg);
3508	}
3509
3510bail:
3511	scf_transaction_destroy(tx);
3512	scf_entry_destroy(ent);
3513	scf_value_destroy(val);
3514	scf_pg_destroy(pg);
3515	scf_instance_destroy(inst);
3516
3517	(void) scf_handle_unbind(h);
3518	scf_handle_destroy(h);
3519}
3520
3521/*
3522 * Function to handle requests from users to main init running as process 1.
3523 */
3524static void
3525userinit(int argc, char **argv)
3526{
3527	FILE	*fp;
3528	char	*ln;
3529	int	init_signal;
3530	struct stat	sconbuf, conbuf;
3531	const char *usage_msg = "Usage: init [0123456SsQqabc]\n";
3532
3533	/*
3534	 * We are a user invoked init.  Is there an argument and is it
3535	 * a single character?  If not, print usage message and quit.
3536	 */
3537	if (argc != 2 || argv[1][1] != '\0') {
3538		(void) fprintf(stderr, usage_msg);
3539		exit(0);
3540	}
3541
3542	if ((init_signal = lvlname_to_state((char)argv[1][0])) == -1) {
3543		(void) fprintf(stderr, usage_msg);
3544		(void) audit_put_record(ADT_FAILURE, ADT_FAIL_VALUE_BAD_CMD,
3545		    argv[1]);
3546		exit(1);
3547	}
3548
3549	if (init_signal == SINGLE_USER) {
3550		/*
3551		 * Make sure this process is talking to a legal tty line
3552		 * and that /dev/syscon is linked to this line.
3553		 */
3554		ln = ttyname(0);	/* Get the name of tty */
3555		if (ln == NULL) {
3556			(void) fprintf(stderr,
3557			    "Standard input not a tty line\n");
3558			(void) audit_put_record(ADT_FAILURE,
3559			    ADT_FAIL_VALUE_BAD_TTY, argv[1]);
3560			exit(1);
3561		}
3562
3563		if ((stat(ln, &sconbuf) != -1) &&
3564		    (stat(SYSCON, &conbuf) == -1 ||
3565		    sconbuf.st_rdev != conbuf.st_rdev)) {
3566			/*
3567			 * /dev/syscon needs to change.
3568			 * Unlink /dev/syscon and relink it to the current line.
3569			 */
3570			if (lstat(SYSCON, &conbuf) != -1 &&
3571			    unlink(SYSCON) == FAILURE) {
3572				perror("Can't unlink /dev/syscon");
3573				(void) fprintf(stderr,
3574				    "Run command on the system console.\n");
3575				(void) audit_put_record(ADT_FAILURE,
3576				    ADT_FAIL_VALUE_PROGRAM, argv[1]);
3577				exit(1);
3578			}
3579			if (symlink(ln, SYSCON) == FAILURE) {
3580				(void) fprintf(stderr,
3581				    "Can't symlink /dev/syscon to %s: %s", ln,
3582				    strerror(errno));
3583
3584				/* Try to leave a syscon */
3585				(void) link(SYSTTY, SYSCON);
3586				(void) audit_put_record(ADT_FAILURE,
3587				    ADT_FAIL_VALUE_PROGRAM, argv[1]);
3588				exit(1);
3589			}
3590
3591			/*
3592			 * Try to leave a message on system console saying where
3593			 * /dev/syscon is currently connected.
3594			 */
3595			if ((fp = fopen(SYSTTY, "r+")) != NULL) {
3596				(void) fprintf(fp,
3597				    "\n****	SYSCON CHANGED TO %s	****\n",
3598				    ln);
3599				(void) fclose(fp);
3600			}
3601		}
3602	}
3603
3604	update_boot_archive(init_signal);
3605
3606	(void) audit_put_record(ADT_SUCCESS, ADT_SUCCESS, argv[1]);
3607
3608	/*
3609	 * Signal init; init will take care of telling svc.startd.
3610	 */
3611	if (kill(init_pid, init_signal) == FAILURE) {
3612		(void) fprintf(stderr, "Must be super-user\n");
3613		(void) audit_put_record(ADT_FAILURE,
3614		    ADT_FAIL_VALUE_AUTH, argv[1]);
3615		exit(1);
3616	}
3617
3618	exit(0);
3619}
3620
3621
3622#define	DELTA	25	/* Number of pidlist elements to allocate at a time */
3623
3624/* ARGSUSED */
3625void
3626sigpoll(int n)
3627{
3628	struct pidrec prec;
3629	struct pidrec *p = &prec;
3630	struct pidlist *plp;
3631	struct pidlist *tp, *savetp;
3632	int i;
3633
3634	if (Pfd < 0) {
3635		return;
3636	}
3637
3638	for (;;) {
3639		/*
3640		 * Important Note: Either read will really fail (in which case
3641		 * return is all we can do) or will get EAGAIN (Pfd was opened
3642		 * O_NDELAY), in which case we also want to return.
3643		 * Always return from here!
3644		 */
3645		if (read(Pfd, p, sizeof (struct pidrec)) !=
3646						sizeof (struct pidrec)) {
3647			return;
3648		}
3649		switch (p->pd_type) {
3650
3651		case ADDPID:
3652			/*
3653			 * New "godchild", add to list.
3654			 */
3655			if (Plfree == NULL) {
3656				plp = (struct pidlist *)calloc(DELTA,
3657				    sizeof (struct pidlist));
3658				if (plp == NULL) {
3659					/* Can't save pid */
3660					break;
3661				}
3662				/*
3663				 * Point at 2nd record allocated, we'll use plp.
3664				 */
3665				tp = plp + 1;
3666				/*
3667				 * Link them into a chain.
3668				 */
3669				Plfree = tp;
3670				for (i = 0; i < DELTA - 2; i++) {
3671					tp->pl_next = tp + 1;
3672					tp++;
3673				}
3674			} else {
3675				plp = Plfree;
3676				Plfree = plp->pl_next;
3677			}
3678			plp->pl_pid = p->pd_pid;
3679			plp->pl_dflag = 0;
3680			plp->pl_next = NULL;
3681			/*
3682			 * Note - pid list is kept in increasing order of pids.
3683			 */
3684			if (Plhead == NULL) {
3685				Plhead = plp;
3686				/* Back up to read next record */
3687				break;
3688			} else {
3689				savetp = tp = Plhead;
3690				while (tp) {
3691					if (plp->pl_pid > tp->pl_pid) {
3692						savetp = tp;
3693						tp = tp->pl_next;
3694						continue;
3695					} else if (plp->pl_pid < tp->pl_pid) {
3696						if (tp == Plhead) {
3697							plp->pl_next = Plhead;
3698							Plhead = plp;
3699						} else {
3700							plp->pl_next =
3701							    savetp->pl_next;
3702							savetp->pl_next = plp;
3703						}
3704						break;
3705					} else {
3706						/* Already in list! */
3707						plp->pl_next = Plfree;
3708						Plfree = plp;
3709						break;
3710					}
3711				}
3712				if (tp == NULL) {
3713					/* Add to end of list */
3714					savetp->pl_next = plp;
3715				}
3716			}
3717			/* Back up to read next record. */
3718			break;
3719
3720		case REMPID:
3721			/*
3722			 * This one was handled by someone else,
3723			 * purge it from the list.
3724			 */
3725			if (Plhead == NULL) {
3726				/* Back up to read next record. */
3727				break;
3728			}
3729			savetp = tp = Plhead;
3730			while (tp) {
3731				if (p->pd_pid > tp->pl_pid) {
3732					/* Keep on looking. */
3733					savetp = tp;
3734					tp = tp->pl_next;
3735					continue;
3736				} else if (p->pd_pid < tp->pl_pid) {
3737					/* Not in list. */
3738					break;
3739				} else {
3740					/* Found it. */
3741					if (tp == Plhead)
3742						Plhead = tp->pl_next;
3743					else
3744						savetp->pl_next = tp->pl_next;
3745					tp->pl_next = Plfree;
3746					Plfree = tp;
3747					break;
3748				}
3749			}
3750			/* Back up to read next record. */
3751			break;
3752		default:
3753			console(B_TRUE, "Bad message on initpipe\n");
3754			break;
3755		}
3756	}
3757}
3758
3759
3760static void
3761cleanaux()
3762{
3763	struct pidlist *savep, *p;
3764	pid_t	pid;
3765	short	status;
3766
3767	(void) sighold(SIGCLD);
3768	Gchild = 0;	/* Note - Safe to do this here since no SIGCLDs */
3769	(void) sighold(SIGPOLL);
3770	savep = p = Plhead;
3771	while (p) {
3772		if (p->pl_dflag) {
3773			/*
3774			 * Found an entry to delete,
3775			 * remove it from list first.
3776			 */
3777			pid = p->pl_pid;
3778			status = p->pl_exit;
3779			if (p == Plhead) {
3780				Plhead = p->pl_next;
3781				p->pl_next = Plfree;
3782				Plfree = p;
3783				savep = p = Plhead;
3784			} else {
3785				savep->pl_next = p->pl_next;
3786				p->pl_next = Plfree;
3787				Plfree = p;
3788				p = savep->pl_next;
3789			}
3790			clearent(pid, status);
3791			continue;
3792		}
3793		savep = p;
3794		p = p->pl_next;
3795	}
3796	(void) sigrelse(SIGPOLL);
3797	(void) sigrelse(SIGCLD);
3798}
3799
3800
3801/*
3802 * /etc/inittab has more entries and we have run out of room in the proc_table
3803 * array. Double the size of proc_table to accomodate the extra entries.
3804 */
3805static void
3806increase_proc_table_size()
3807{
3808	sigset_t block, unblock;
3809	void *ptr;
3810	size_t delta = num_proc * sizeof (struct PROC_TABLE);
3811
3812
3813	/*
3814	 * Block signals for realloc.
3815	 */
3816	(void) sigfillset(&block);
3817	(void) sigprocmask(SIG_BLOCK, &block, &unblock);
3818
3819
3820	/*
3821	 * On failure we just return because callers of this function check
3822	 * for failure.
3823	 */
3824	do
3825		ptr = realloc(g_state, g_state_sz + delta);
3826	while (ptr == NULL && errno == EAGAIN)
3827		;
3828
3829	if (ptr != NULL) {
3830		/* ensure that the new part is initialized to zero */
3831		bzero((caddr_t)ptr + g_state_sz, delta);
3832
3833		g_state = ptr;
3834		g_state_sz += delta;
3835		num_proc <<= 1;
3836	}
3837
3838
3839	/* unblock our signals before returning */
3840	(void) sigprocmask(SIG_SETMASK, &unblock, NULL);
3841}
3842
3843
3844
3845/*
3846 * Sanity check g_state.
3847 */
3848static int
3849st_sane()
3850{
3851	int i;
3852	struct PROC_TABLE *ptp;
3853
3854
3855	/* Note: cur_state is encoded as a signal number */
3856	if (cur_state < 1 || cur_state == 9 || cur_state > 13)
3857		return (0);
3858
3859	/* Check num_proc */
3860	if (g_state_sz != sizeof (struct init_state) + (num_proc - 1) *
3861	    sizeof (struct PROC_TABLE))
3862		return (0);
3863
3864	/* Check proc_table */
3865	for (i = 0, ptp = proc_table; i < num_proc; ++i, ++ptp) {
3866		/* skip unoccupied entries */
3867		if (!(ptp->p_flags & OCCUPIED))
3868			continue;
3869
3870		/* p_flags has no bits outside of PF_MASK */
3871		if (ptp->p_flags & ~(PF_MASK))
3872			return (0);
3873
3874		/* 5 <= pid <= MAXPID */
3875		if (ptp->p_pid < 5 || ptp->p_pid > MAXPID)
3876			return (0);
3877
3878		/* p_count >= 0 */
3879		if (ptp->p_count < 0)
3880			return (0);
3881
3882		/* p_time >= 0 */
3883		if (ptp->p_time < 0)
3884			return (0);
3885	}
3886
3887	return (1);
3888}
3889
3890/*
3891 * Initialize our state.
3892 *
3893 * If the system just booted, then init_state_file, which is located on an
3894 * everpresent tmpfs filesystem, should not exist.
3895 *
3896 * If we were restarted, then init_state_file should exist, in
3897 * which case we'll read it in, sanity check it, and use it.
3898 *
3899 * Note: You can't call console() until proc_table is ready.
3900 */
3901void
3902st_init()
3903{
3904	struct stat stb;
3905	int ret, st_fd, insane = 0;
3906	size_t to_be_read;
3907	char *ptr;
3908
3909
3910	booting = 1;
3911
3912	do {
3913		/*
3914		 * If we can exclusively create the file, then we're the
3915		 * initial invocation of init(1M).
3916		 */
3917		st_fd = open(init_state_file, O_RDWR | O_CREAT | O_EXCL,
3918		    S_IRUSR | S_IWUSR);
3919	} while (st_fd == -1 && errno == EINTR);
3920	if (st_fd != -1)
3921		goto new_state;
3922
3923	booting = 0;
3924
3925	do {
3926		st_fd = open(init_state_file, O_RDWR, S_IRUSR | S_IWUSR);
3927	} while (st_fd == -1 && errno == EINTR);
3928	if (st_fd == -1)
3929		goto new_state;
3930
3931	/* Get the size of the file. */
3932	do
3933		ret = fstat(st_fd, &stb);
3934	while (ret == -1 && errno == EINTR)
3935		;
3936	if (ret == -1)
3937		goto new_state;
3938
3939	do
3940		g_state = malloc(stb.st_size);
3941	while (g_state == NULL && errno == EAGAIN)
3942		;
3943	if (g_state == NULL)
3944		goto new_state;
3945
3946	to_be_read = stb.st_size;
3947	ptr = (char *)g_state;
3948	while (to_be_read > 0) {
3949		ssize_t read_ret;
3950
3951		read_ret = read(st_fd, ptr, to_be_read);
3952		if (read_ret < 0) {
3953			if (errno == EINTR)
3954				continue;
3955
3956			goto new_state;
3957		}
3958
3959		to_be_read -= read_ret;
3960		ptr += read_ret;
3961	}
3962
3963	(void) close(st_fd);
3964
3965	g_state_sz = stb.st_size;
3966
3967	if (st_sane()) {
3968		console(B_TRUE, "Restarting.\n");
3969		return;
3970	}
3971
3972	insane = 1;
3973
3974new_state:
3975	if (st_fd >= 0)
3976		(void) close(st_fd);
3977	else
3978		(void) unlink(init_state_file);
3979
3980	if (g_state != NULL)
3981		free(g_state);
3982
3983	/* Something went wrong, so allocate new state. */
3984	g_state_sz = sizeof (struct init_state) +
3985	    ((init_num_proc - 1) * sizeof (struct PROC_TABLE));
3986	do
3987		g_state = calloc(1, g_state_sz);
3988	while (g_state == NULL && errno == EAGAIN)
3989		;
3990	if (g_state == NULL) {
3991		/* Fatal error! */
3992		exit(errno);
3993	}
3994
3995	g_state->ist_runlevel = -1;
3996	num_proc = init_num_proc;
3997
3998	if (!booting) {
3999		console(B_TRUE, "Restarting.\n");
4000
4001		/* Overwrite the bad state file. */
4002		st_write();
4003
4004		if (!insane) {
4005			console(B_TRUE,
4006			    "Error accessing persistent state file `%s'.  "
4007			    "Ignored.\n", init_state_file);
4008		} else {
4009			console(B_TRUE,
4010			    "Persistent state file `%s' is invalid and was "
4011			    "ignored.\n", init_state_file);
4012		}
4013	}
4014}
4015
4016/*
4017 * Write g_state out to the state file.
4018 */
4019void
4020st_write()
4021{
4022	static int complained = 0;
4023
4024	int st_fd;
4025	char *cp;
4026	size_t sz;
4027	ssize_t ret;
4028
4029
4030	do {
4031		st_fd = open(init_next_state_file,
4032		    O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR);
4033	} while (st_fd < 0 && errno == EINTR);
4034	if (st_fd < 0)
4035		goto err;
4036
4037	cp = (char *)g_state;
4038	sz = g_state_sz;
4039	while (sz > 0) {
4040		ret = write(st_fd, cp, sz);
4041		if (ret < 0) {
4042			if (errno == EINTR)
4043				continue;
4044
4045			goto err;
4046		}
4047
4048		sz -= ret;
4049		cp += ret;
4050	}
4051
4052	(void) close(st_fd);
4053	st_fd = -1;
4054	if (rename(init_next_state_file, init_state_file)) {
4055		(void) unlink(init_next_state_file);
4056		goto err;
4057	}
4058	complained = 0;
4059
4060	return;
4061
4062err:
4063	if (st_fd >= 0)
4064		(void) close(st_fd);
4065
4066	if (!booting && !complained) {
4067		/*
4068		 * Only complain after the filesystem should have come up.
4069		 * And only do it once so we don't loop between console()
4070		 * & efork().
4071		 */
4072		complained = 1;
4073		if (st_fd)
4074			console(B_TRUE, "Couldn't write persistent state "
4075			    "file `%s'.\n", init_state_file);
4076		else
4077			console(B_TRUE, "Couldn't move persistent state "
4078			    "file `%s' to `%s'.\n", init_next_state_file,
4079			    init_state_file);
4080	}
4081}
4082
4083/*
4084 * Create a contract with these parameters.
4085 */
4086static int
4087contract_make_template(uint_t info, uint_t critical, uint_t fatal,
4088    uint64_t cookie)
4089{
4090	int fd, err;
4091
4092	char *ioctl_tset_emsg =
4093	    "Couldn't set \"%s\" contract template parameter: %s.\n";
4094
4095	do
4096		fd = open64(CTFS_ROOT "/process/template", O_RDWR);
4097	while (fd < 0 && errno == EINTR)
4098		;
4099	if (fd < 0) {
4100		console(B_TRUE, "Couldn't create process template: %s.\n",
4101		    strerror(errno));
4102		return (-1);
4103	}
4104
4105	if (err = ct_pr_tmpl_set_param(fd, CT_PR_INHERIT | CT_PR_REGENT))
4106		console(B_TRUE, "Contract set template inherit, regent "
4107		    "failed: %s.\n", strerror(err));
4108
4109	/*
4110	 * These errors result in a misconfigured template, which is better
4111	 * than no template at all, so warn but don't abort.
4112	 */
4113	if (err = ct_tmpl_set_informative(fd, info))
4114		console(B_TRUE, ioctl_tset_emsg, "informative", strerror(err));
4115
4116	if (err = ct_tmpl_set_critical(fd, critical))
4117		console(B_TRUE, ioctl_tset_emsg, "critical", strerror(err));
4118
4119	if (err = ct_pr_tmpl_set_fatal(fd, fatal))
4120		console(B_TRUE, ioctl_tset_emsg, "fatal", strerror(err));
4121
4122	if (err = ct_tmpl_set_cookie(fd, cookie))
4123		console(B_TRUE, ioctl_tset_emsg, "cookie", strerror(err));
4124
4125	(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
4126
4127	return (fd);
4128}
4129
4130/*
4131 * Create the templates and open an event file descriptor.  We use dup2(2) to
4132 * get these descriptors away from the stdin/stdout/stderr group.
4133 */
4134static void
4135contracts_init()
4136{
4137	int err, fd;
4138
4139	/*
4140	 * Create & configure a legacy template.  We only want empty events so
4141	 * we know when to abandon them.
4142	 */
4143	legacy_tmpl = contract_make_template(0, CT_PR_EV_EMPTY, CT_PR_EV_HWERR,
4144	    ORDINARY_COOKIE);
4145	if (legacy_tmpl >= 0) {
4146		err = ct_tmpl_activate(legacy_tmpl);
4147		if (err != 0) {
4148			(void) close(legacy_tmpl);
4149			legacy_tmpl = -1;
4150			console(B_TRUE,
4151			    "Couldn't activate legacy template (%s); "
4152			    "legacy services will be in init's contract.\n",
4153			    strerror(err));
4154		}
4155	} else
4156		console(B_TRUE,
4157		    "Legacy services will be in init's contract.\n");
4158
4159	if (dup2(legacy_tmpl, 255) == -1) {
4160		console(B_TRUE, "Could not duplicate legacy template: %s.\n",
4161		    strerror(errno));
4162	} else {
4163		(void) close(legacy_tmpl);
4164		legacy_tmpl = 255;
4165	}
4166
4167	(void) fcntl(legacy_tmpl, F_SETFD, FD_CLOEXEC);
4168
4169	startd_tmpl = contract_make_template(0, CT_PR_EV_EMPTY,
4170	    CT_PR_EV_HWERR | CT_PR_EV_SIGNAL | CT_PR_EV_CORE, STARTD_COOKIE);
4171
4172	if (dup2(startd_tmpl, 254) == -1) {
4173		console(B_TRUE, "Could not duplicate startd template: %s.\n",
4174		    strerror(errno));
4175	} else {
4176		(void) close(startd_tmpl);
4177		startd_tmpl = 254;
4178	}
4179
4180	(void) fcntl(startd_tmpl, F_SETFD, FD_CLOEXEC);
4181
4182	if (legacy_tmpl < 0 && startd_tmpl < 0) {
4183		/* The creation errors have already been reported. */
4184		console(B_TRUE,
4185		    "Ignoring contract events.  Core smf(5) services will not "
4186		    "be restarted.\n");
4187		return;
4188	}
4189
4190	/*
4191	 * Open an event endpoint.
4192	 */
4193	do
4194		fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
4195	while (fd < 0 && errno == EINTR)
4196		;
4197	if (fd < 0) {
4198		console(B_TRUE,
4199		    "Couldn't open process pbundle: %s.  Core smf(5) services "
4200		    "will not be restarted.\n", strerror(errno));
4201		return;
4202	}
4203
4204	if (dup2(fd, 253) == -1) {
4205		console(B_TRUE, "Could not duplicate process bundle: %s.\n",
4206		    strerror(errno));
4207	} else {
4208		(void) close(fd);
4209		fd = 253;
4210	}
4211
4212	(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
4213
4214	/* Reset in case we've been restarted. */
4215	(void) ct_event_reset(fd);
4216
4217	poll_fds[0].fd = fd;
4218	poll_fds[0].events = POLLIN;
4219	poll_nfds = 1;
4220}
4221
4222static int
4223contract_getfile(ctid_t id, const char *name, int oflag)
4224{
4225	int fd;
4226
4227	do
4228		fd = contract_open(id, "process", name, oflag);
4229	while (fd < 0 && errno == EINTR)
4230		;
4231
4232	if (fd < 0)
4233		console(B_TRUE, "Couldn't open %s for contract %ld: %s.\n",
4234		    name, id, strerror(errno));
4235
4236	return (fd);
4237}
4238
4239static int
4240contract_cookie(ctid_t id, uint64_t *cp)
4241{
4242	int fd, err;
4243	ct_stathdl_t sh;
4244
4245	fd = contract_getfile(id, "status", O_RDONLY);
4246	if (fd < 0)
4247		return (-1);
4248
4249	err = ct_status_read(fd, CTD_COMMON, &sh);
4250	if (err != 0) {
4251		console(B_TRUE, "Couldn't read status of contract %ld: %s.\n",
4252		    id, strerror(err));
4253		(void) close(fd);
4254		return (-1);
4255	}
4256
4257	(void) close(fd);
4258
4259	*cp = ct_status_get_cookie(sh);
4260
4261	ct_status_free(sh);
4262	return (0);
4263}
4264
4265static void
4266contract_ack(ct_evthdl_t e)
4267{
4268	int fd;
4269
4270	if (ct_event_get_flags(e) & CTE_INFO)
4271		return;
4272
4273	fd = contract_getfile(ct_event_get_ctid(e), "ctl", O_WRONLY);
4274	if (fd < 0)
4275		return;
4276
4277	(void) ct_ctl_ack(fd, ct_event_get_evid(e));
4278	(void) close(fd);
4279}
4280
4281/*
4282 * Process a contract event.
4283 */
4284static void
4285contract_event(struct pollfd *poll)
4286{
4287	ct_evthdl_t e;
4288	int err;
4289	ctid_t ctid;
4290
4291	if (!(poll->revents & POLLIN)) {
4292		if (poll->revents & POLLERR)
4293			console(B_TRUE,
4294			    "Unknown poll error on my process contract "
4295			    "pbundle.\n");
4296		return;
4297	}
4298
4299	err = ct_event_read(poll->fd, &e);
4300	if (err != 0) {
4301		console(B_TRUE, "Error retrieving contract event: %s.\n",
4302		    strerror(err));
4303		return;
4304	}
4305
4306	ctid = ct_event_get_ctid(e);
4307
4308	if (ct_event_get_type(e) == CT_PR_EV_EMPTY) {
4309		uint64_t cookie;
4310		int ret, abandon = 1;
4311
4312		/* If it's svc.startd, restart it.  Else, abandon. */
4313		ret = contract_cookie(ctid, &cookie);
4314
4315		if (ret == 0) {
4316			if (cookie == STARTD_COOKIE &&
4317			    do_restart_startd) {
4318				if (smf_debug)
4319					console(B_TRUE, "Restarting "
4320					    "svc.startd.\n");
4321
4322				/*
4323				 * Account for the failure.  If the failure rate
4324				 * exceeds a threshold, then drop to maintenance
4325				 * mode.
4326				 */
4327				startd_record_failure();
4328				if (startd_failure_rate_critical())
4329					enter_maintenance();
4330
4331				if (startd_tmpl < 0)
4332					console(B_TRUE,
4333					    "Restarting svc.startd in "
4334					    "improper contract (bad "
4335					    "template).\n");
4336
4337				(void) startd_run(startd_cline, startd_tmpl,
4338				    ctid);
4339
4340				abandon = 0;
4341			}
4342		}
4343
4344		if (abandon && (err = contract_abandon_id(ctid))) {
4345			console(B_TRUE, "Couldn't abandon contract %ld: %s.\n",
4346			    ctid, strerror(err));
4347		}
4348
4349		/*
4350		 * No need to acknowledge the event since either way the
4351		 * originating contract should be abandoned.
4352		 */
4353	} else {
4354		console(B_TRUE,
4355		    "Received contract event of unexpected type %d from "
4356		    "contract %ld.\n", ct_event_get_type(e), ctid);
4357
4358		if ((ct_event_get_flags(e) & (CTE_INFO | CTE_ACK)) == 0)
4359			/* Allow unexpected critical events to be released. */
4360			contract_ack(e);
4361	}
4362
4363	ct_event_free(e);
4364}
4365
4366/*
4367 * svc.startd(1M) Management
4368 */
4369
4370/*
4371 * (Re)start svc.startd(1M).  old_ctid should be the contract ID of the old
4372 * contract, or 0 if we're starting it for the first time.  If wait is true
4373 * we'll wait for and return the exit value of the child.
4374 */
4375static int
4376startd_run(const char *cline, int tmpl, ctid_t old_ctid)
4377{
4378	int err, i, ret, did_activate;
4379	pid_t pid;
4380	struct stat sb;
4381
4382	if (cline[0] == '\0')
4383		return (-1);
4384
4385	/*
4386	 * Don't restart startd if the system is rebooting or shutting down.
4387	 */
4388	do {
4389		ret = stat("/etc/svc/volatile/resetting", &sb);
4390	} while (ret == -1 && errno == EINTR);
4391
4392	if (ret == 0) {
4393		if (smf_debug)
4394			console(B_TRUE, "Quiescing for reboot.\n");
4395		(void) pause();
4396		return (-1);
4397	}
4398
4399	err = ct_pr_tmpl_set_transfer(tmpl, old_ctid);
4400	if (err == EINVAL) {
4401		console(B_TRUE, "Remake startd_tmpl; reattempt transfer.\n");
4402		tmpl = startd_tmpl = contract_make_template(0, CT_PR_EV_EMPTY,
4403		    CT_PR_EV_HWERR, STARTD_COOKIE);
4404
4405		err = ct_pr_tmpl_set_transfer(tmpl, old_ctid);
4406	}
4407	if (err != 0) {
4408		console(B_TRUE,
4409		    "Couldn't set transfer parameter of contract template: "
4410		    "%s.\n", strerror(err));
4411	}
4412
4413	if ((err = ct_pr_tmpl_set_svc_fmri(startd_tmpl,
4414	    SCF_SERVICE_STARTD)) != 0)
4415		console(B_TRUE,
4416		    "Can not set svc_fmri in contract template: %s\n",
4417		    strerror(err));
4418	if ((err = ct_pr_tmpl_set_svc_aux(startd_tmpl,
4419	    startd_svc_aux)) != 0)
4420		console(B_TRUE,
4421		    "Can not set svc_aux in contract template: %s\n",
4422		    strerror(err));
4423	did_activate = !(ct_tmpl_activate(tmpl));
4424	if (!did_activate)
4425		console(B_TRUE,
4426		    "Template activation failed; not starting \"%s\" in "
4427		    "proper contract.\n", cline);
4428
4429	/* Hold SIGCLD so we can wait if necessary. */
4430	(void) sighold(SIGCLD);
4431
4432	while ((pid = fork()) < 0) {
4433		if (errno == EPERM) {
4434			console(B_TRUE, "Insufficient permission to fork.\n");
4435
4436			/* Now that's a doozy. */
4437			exit(1);
4438		}
4439
4440		console(B_TRUE,
4441		    "fork() for svc.startd failed: %s.  Will retry in 1 "
4442		    "second...\n", strerror(errno));
4443
4444		(void) sleep(1);
4445
4446		/* Eventually give up? */
4447	}
4448
4449	if (pid == 0) {
4450		/* child */
4451
4452		/* See the comment in efork() */
4453		for (i = SIGHUP; i <= SIGRTMAX; ++i) {
4454			if (i == SIGTTOU || i == SIGTTIN || i == SIGTSTP)
4455				(void) sigset(i, SIG_IGN);
4456			else
4457				(void) sigset(i, SIG_DFL);
4458		}
4459
4460		if (smf_options != NULL) {
4461			/* Put smf_options in the environment. */
4462			glob_envp[glob_envn] =
4463			    malloc(sizeof ("SMF_OPTIONS=") - 1 +
4464			    strlen(smf_options) + 1);
4465
4466			if (glob_envp[glob_envn] != NULL) {
4467				/* LINTED */
4468				(void) sprintf(glob_envp[glob_envn],
4469				    "SMF_OPTIONS=%s", smf_options);
4470				glob_envp[glob_envn+1] = NULL;
4471			} else {
4472				console(B_TRUE,
4473				    "Could not set SMF_OPTIONS (%s).\n",
4474				    strerror(errno));
4475			}
4476		}
4477
4478		if (smf_debug)
4479			console(B_TRUE, "Executing svc.startd\n");
4480
4481		(void) execle(SH, "INITSH", "-c", cline, NULL, glob_envp);
4482
4483		console(B_TRUE, "Could not exec \"%s\" (%s).\n", SH,
4484		    strerror(errno));
4485
4486		exit(1);
4487	}
4488
4489	/* parent */
4490
4491	if (did_activate) {
4492		if (legacy_tmpl < 0 || ct_tmpl_activate(legacy_tmpl) != 0)
4493			(void) ct_tmpl_clear(tmpl);
4494	}
4495
4496	/* Clear the old_ctid reference so the kernel can reclaim it. */
4497	if (old_ctid != 0)
4498		(void) ct_pr_tmpl_set_transfer(tmpl, 0);
4499
4500	(void) sigrelse(SIGCLD);
4501
4502	return (0);
4503}
4504
4505/*
4506 * void startd_record_failure(void)
4507 *   Place the current time in our circular array of svc.startd failures.
4508 */
4509void
4510startd_record_failure()
4511{
4512	int index = startd_failure_index++ % NSTARTD_FAILURE_TIMES;
4513
4514	startd_failure_time[index] = gethrtime();
4515}
4516
4517/*
4518 * int startd_failure_rate_critical(void)
4519 *   Return true if the average failure interval is less than the permitted
4520 *   interval.  Implicit success if insufficient measurements for an average
4521 *   exist.
4522 */
4523int
4524startd_failure_rate_critical()
4525{
4526	int n = startd_failure_index;
4527	hrtime_t avg_ns = 0;
4528
4529	if (startd_failure_index < NSTARTD_FAILURE_TIMES)
4530		return (0);
4531
4532	avg_ns =
4533	    (startd_failure_time[(n - 1) % NSTARTD_FAILURE_TIMES] -
4534	    startd_failure_time[n % NSTARTD_FAILURE_TIMES]) /
4535	    NSTARTD_FAILURE_TIMES;
4536
4537	return (avg_ns < STARTD_FAILURE_RATE_NS);
4538}
4539
4540/*
4541 * returns string that must be free'd
4542 */
4543
4544static char
4545*audit_boot_msg()
4546{
4547	char		*b, *p;
4548	char		desc[] = "booted";
4549	zoneid_t	zid = getzoneid();
4550
4551	b = malloc(sizeof (desc) + MAXNAMELEN + 3);
4552	if (b == NULL)
4553		return (b);
4554
4555	p = b;
4556	p += strlcpy(p, desc, sizeof (desc));
4557	if (zid != GLOBAL_ZONEID) {
4558		p += strlcpy(p, ": ", 3);
4559		(void) getzonenamebyid(zid, p, MAXNAMELEN);
4560	}
4561	return (b);
4562}
4563
4564/*
4565 * Generate AUE_init_solaris audit record.  Return 1 if
4566 * auditing is enabled in case the caller cares.
4567 *
4568 * In the case of userint() or a local zone invocation of
4569 * one_true_init, the process initially contains the audit
4570 * characteristics of the process that invoked init.  The first pass
4571 * through here uses those characteristics then for the case of
4572 * one_true_init in a local zone, clears them so subsequent system
4573 * state changes won't be attributed to the person who booted the
4574 * zone.
4575 */
4576static int
4577audit_put_record(int pass_fail, int status, char *msg)
4578{
4579	adt_session_data_t	*ah;
4580	adt_event_data_t	*event;
4581
4582	if (!adt_audit_enabled())
4583		return (0);
4584
4585	/*
4586	 * the PROC_DATA picks up the context to tell whether this is
4587	 * an attributed record (auid = -2 is unattributed)
4588	 */
4589	if (adt_start_session(&ah, NULL, ADT_USE_PROC_DATA)) {
4590		console(B_TRUE, "audit failure:  %s\n", strerror(errno));
4591		return (1);
4592	}
4593	event = adt_alloc_event(ah, ADT_init_solaris);
4594	if (event == NULL) {
4595		console(B_TRUE, "audit failure:  %s\n", strerror(errno));
4596		(void) adt_end_session(ah);
4597		return (1);
4598	}
4599	event->adt_init_solaris.info = msg;	/* NULL is ok here */
4600
4601	if (adt_put_event(event, pass_fail, status)) {
4602		console(B_TRUE, "audit failure:  %s\n", strerror(errno));
4603		(void) adt_end_session(ah);
4604		return (1);
4605	}
4606	adt_free_event(event);
4607
4608	(void) adt_end_session(ah);
4609
4610	return (1);
4611}
4612