xref: /illumos-gate/usr/src/cmd/init/init.c (revision 861a9162)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved  	*/
28 
29 /*
30  * University Copyright- Copyright (c) 1982, 1986, 1988
31  * The Regents of the University of California
32  * All Rights Reserved
33  *
34  * University Acknowledgment- Portions of this document are derived from
35  * software developed by the University of California, Berkeley, and its
36  * contributors.
37  */
38 
39 /*
40  * init(1M) is the general process spawning program.  Its primary job is to
41  * start and restart svc.startd for smf(5).  For backwards-compatibility it also
42  * spawns and respawns processes according to /etc/inittab and the current
43  * run-level.  It reads /etc/default/inittab for general configuration.
44  *
45  * To change run-levels the system administrator runs init from the command
46  * line with a level name.  init signals svc.startd via libscf and directs the
47  * zone's init (pid 1 in the global zone) what to do by sending it a signal;
48  * these signal numbers are commonly refered to in the code as 'states'.  Valid
49  * run-levels are [sS0123456].  Additionally, init can be given directives
50  * [qQabc], which indicate actions to be taken pertaining to /etc/inittab.
51  *
52  * When init processes inittab entries, it finds processes that are to be
53  * spawned at various run-levels.  inittab contains the set of the levels for
54  * which each inittab entry is valid.
55  *
56  * State File and Restartability
57  *   Premature exit by init(1M) is handled as a special case by the kernel:
58  *   init(1M) will be immediately re-executed, retaining its original PID.  (PID
59  *   1 in the global zone.)  To track the processes it has previously spawned,
60  *   as well as other mutable state, init(1M) regularly updates a state file
61  *   such that its subsequent invocations have knowledge of its various
62  *   dependent processes and duties.
63  *
64  * Process Contracts
65  *   We start svc.startd(1M) in a contract and transfer inherited contracts when
66  *   restarting it.  Everything else is started using the legacy contract
67  *   template, and the created contracts are abandoned when they become empty.
68  *
69  * utmpx Entry Handling
70  *   Because init(1M) no longer governs the startup process, its knowledge of
71  *   when utmpx becomes writable is indirect.  However, spawned processes
72  *   expect to be constructed with valid utmpx entries.  As a result, attempts
73  *   to write normal entries will be retried until successful.
74  *
75  * Maintenance Mode
76  *   In certain failure scenarios, init(1M) will enter a maintenance mode, in
77  *   which it invokes sulogin(1M) to allow the operator an opportunity to
78  *   repair the system.  Normally, this operation is performed as a
79  *   fork(2)-exec(2)-waitpid(3C) sequence with the parent waiting for repair or
80  *   diagnosis to be completed.  In the cases that fork(2) requests themselves
81  *   fail, init(1M) will directly execute sulogin(1M), and allow the kernel to
82  *   restart init(1M) on exit from the operator session.
83  *
84  *   One scenario where init(1M) enters its maintenance mode is when
85  *   svc.startd(1M) begins to fail rapidly, defined as when the average time
86  *   between recent failures drops below a given threshold.
87  */
88 
89 #include <sys/contract/process.h>
90 #include <sys/ctfs.h>
91 #include <sys/stat.h>
92 #include <sys/statvfs.h>
93 #include <sys/stropts.h>
94 #include <sys/systeminfo.h>
95 #include <sys/time.h>
96 #include <sys/termios.h>
97 #include <sys/tty.h>
98 #include <sys/types.h>
99 #include <sys/utsname.h>
100 
101 #include <bsm/adt_event.h>
102 #include <bsm/libbsm.h>
103 #include <security/pam_appl.h>
104 
105 #include <assert.h>
106 #include <ctype.h>
107 #include <dirent.h>
108 #include <errno.h>
109 #include <fcntl.h>
110 #include <libcontract.h>
111 #include <libcontract_priv.h>
112 #include <libintl.h>
113 #include <libscf.h>
114 #include <libscf_priv.h>
115 #include <poll.h>
116 #include <procfs.h>
117 #include <signal.h>
118 #include <stdarg.h>
119 #include <stdio.h>
120 #include <stdio_ext.h>
121 #include <stdlib.h>
122 #include <string.h>
123 #include <strings.h>
124 #include <syslog.h>
125 #include <time.h>
126 #include <ulimit.h>
127 #include <unistd.h>
128 #include <utmpx.h>
129 #include <wait.h>
130 #include <zone.h>
131 #include <ucontext.h>
132 
133 #undef	sleep
134 
135 #define	fioctl(p, sptr, cmd)	ioctl(fileno(p), sptr, cmd)
136 #define	min(a, b)		(((a) < (b)) ? (a) : (b))
137 
138 #define	TRUE	1
139 #define	FALSE	0
140 #define	FAILURE	-1
141 
142 #define	UT_LINE_SZ	32	/* Size of a utmpx ut_line field */
143 
144 /*
145  * SLEEPTIME	The number of seconds "init" sleeps between wakeups if
146  *		nothing else requires this "init" wakeup.
147  */
148 #define	SLEEPTIME	(5 * 60)
149 
150 /*
151  * MAXCMDL	The maximum length of a command string in inittab.
152  */
153 #define	MAXCMDL	512
154 
155 /*
156  * EXEC		The length of the prefix string added to all comamnds
157  *		found in inittab.
158  */
159 #define	EXEC	(sizeof ("exec ") - 1)
160 
161 /*
162  * TWARN	The amount of time between warning signal, SIGTERM,
163  *		and the fatal kill signal, SIGKILL.
164  */
165 #define	TWARN	5
166 
167 #define	id_eq(x, y)	((x[0] == y[0] && x[1] == y[1] && x[2] == y[2] &&\
168 			x[3] == y[3]) ? TRUE : FALSE)
169 
170 /*
171  * The kernel's default umask is 022 these days; since some processes inherit
172  * their umask from init, init will set it from CMASK in /etc/default/init.
173  * init gets the default umask from the kernel, it sets it to 022 whenever
174  * it wants to create a file and reverts to CMASK afterwards.
175  */
176 
177 static int cmask;
178 
179 /*
180  * The following definitions, concluding with the 'lvls' array, provide a
181  * common mapping between level-name (like 'S'), signal number (state),
182  * run-level mask, and specific properties associated with a run-level.
183  * This array should be accessed using the routines lvlname_to_state(),
184  * lvlname_to_mask(), state_to_mask(), and state_to_flags().
185  */
186 
187 /*
188  * Correspondence of signals to init actions.
189  */
190 #define	LVLQ		SIGHUP
191 #define	LVL0		SIGINT
192 #define	LVL1		SIGQUIT
193 #define	LVL2		SIGILL
194 #define	LVL3		SIGTRAP
195 #define	LVL4		SIGIOT
196 #define	LVL5		SIGEMT
197 #define	LVL6		SIGFPE
198 #define	SINGLE_USER	SIGBUS
199 #define	LVLa		SIGSEGV
200 #define	LVLb		SIGSYS
201 #define	LVLc		SIGPIPE
202 
203 /*
204  * Bit Mask for each level.  Used to determine legal levels.
205  */
206 #define	MASK0	0x0001
207 #define	MASK1	0x0002
208 #define	MASK2	0x0004
209 #define	MASK3	0x0008
210 #define	MASK4	0x0010
211 #define	MASK5	0x0020
212 #define	MASK6	0x0040
213 #define	MASKSU	0x0080
214 #define	MASKa	0x0100
215 #define	MASKb	0x0200
216 #define	MASKc	0x0400
217 
218 #define	MASK_NUMERIC (MASK0 | MASK1 | MASK2 | MASK3 | MASK4 | MASK5 | MASK6)
219 #define	MASK_abc (MASKa | MASKb | MASKc)
220 
221 /*
222  * Flags to indicate properties of various states.
223  */
224 #define	LSEL_RUNLEVEL	0x0001	/* runlevels you can transition to */
225 
226 typedef struct lvl {
227 	int	lvl_state;
228 	int	lvl_mask;
229 	char	lvl_name;
230 	int	lvl_flags;
231 } lvl_t;
232 
233 static lvl_t lvls[] = {
234 	{ LVLQ,		0,	'Q', 0					},
235 	{ LVLQ,		0,	'q', 0					},
236 	{ LVL0,		MASK0,	'0', LSEL_RUNLEVEL			},
237 	{ LVL1, 	MASK1,	'1', LSEL_RUNLEVEL			},
238 	{ LVL2, 	MASK2,	'2', LSEL_RUNLEVEL			},
239 	{ LVL3, 	MASK3,	'3', LSEL_RUNLEVEL			},
240 	{ LVL4, 	MASK4,	'4', LSEL_RUNLEVEL			},
241 	{ LVL5, 	MASK5,	'5', LSEL_RUNLEVEL			},
242 	{ LVL6, 	MASK6, 	'6', LSEL_RUNLEVEL			},
243 	{ SINGLE_USER, 	MASKSU, 'S', LSEL_RUNLEVEL			},
244 	{ SINGLE_USER, 	MASKSU, 's', LSEL_RUNLEVEL			},
245 	{ LVLa,		MASKa,	'a', 0					},
246 	{ LVLb,		MASKb,	'b', 0					},
247 	{ LVLc,		MASKc,	'c', 0					}
248 };
249 
250 #define	LVL_NELEMS (sizeof (lvls) / sizeof (lvl_t))
251 
252 /*
253  * Legal action field values.
254  */
255 #define	OFF		0	/* Kill process if on, else ignore */
256 #define	RESPAWN		1	/* Continuously restart process when it dies */
257 #define	ONDEMAND	RESPAWN	/* Respawn for a, b, c type processes */
258 #define	ONCE		2	/* Start process, do not respawn when dead */
259 #define	WAIT		3	/* Perform once and wait to complete */
260 #define	BOOT		4	/* Start at boot time only */
261 #define	BOOTWAIT	5	/* Start at boot time and wait to complete */
262 #define	POWERFAIL	6	/* Start on powerfail */
263 #define	POWERWAIT	7	/* Start and wait for complete on powerfail */
264 #define	INITDEFAULT	8	/* Default level "init" should start at */
265 #define	SYSINIT		9	/* Actions performed before init speaks */
266 
267 #define	M_OFF		0001
268 #define	M_RESPAWN	0002
269 #define	M_ONDEMAND	M_RESPAWN
270 #define	M_ONCE		0004
271 #define	M_WAIT		0010
272 #define	M_BOOT		0020
273 #define	M_BOOTWAIT	0040
274 #define	M_PF		0100
275 #define	M_PWAIT		0200
276 #define	M_INITDEFAULT	0400
277 #define	M_SYSINIT	01000
278 
279 /* States for the inittab parser in getcmd(). */
280 #define	ID	1
281 #define	LEVELS	2
282 #define	ACTION	3
283 #define	COMMAND	4
284 #define	COMMENT	5
285 
286 /*
287  * inittab entry id constants
288  */
289 #define	INITTAB_ENTRY_ID_SIZE 4
290 #define	INITTAB_ENTRY_ID_STR_FORMAT "%.4s"	/* if INITTAB_ENTRY_ID_SIZE */
291 						/* changes, this should */
292 						/* change accordingly */
293 
294 /*
295  * Init can be in any of three main states, "normal" mode where it is
296  * processing entries for the lines file in a normal fashion, "boot" mode,
297  * where it is only interested in the boot actions, and "powerfail" mode,
298  * where it is only interested in powerfail related actions. The following
299  * masks declare the legal actions for each mode.
300  */
301 #define	NORMAL_MODES	(M_OFF | M_RESPAWN | M_ONCE | M_WAIT)
302 #define	BOOT_MODES	(M_BOOT | M_BOOTWAIT)
303 #define	PF_MODES	(M_PF | M_PWAIT)
304 
305 struct PROC_TABLE {
306 	char	p_id[INITTAB_ENTRY_ID_SIZE];	/* Four letter unique id of */
307 						/* process */
308 	pid_t	p_pid;		/* Process id */
309 	short	p_count;	/* How many respawns of this command in */
310 				/*   the current series */
311 	long	p_time;		/* Start time for a series of respawns */
312 	short	p_flags;
313 	short	p_exit;		/* Exit status of a process which died */
314 };
315 
316 /*
317  * Flags for the "p_flags" word of a PROC_TABLE entry:
318  *
319  *	OCCUPIED	This slot in init's proc table is in use.
320  *
321  *	LIVING		Process is alive.
322  *
323  *	NOCLEANUP	efork() is not allowed to cleanup this entry even
324  *			if process is dead.
325  *
326  *	NAMED		This process has a name, i.e. came from inittab.
327  *
328  *	DEMANDREQUEST	Process started by a "telinit [abc]" command.  Processes
329  *			formed this way are respawnable and immune to level
330  *			changes as long as their entry exists in inittab.
331  *
332  *	TOUCHED		Flag used by remv() to determine whether it has looked
333  *			at an entry while checking for processes to be killed.
334  *
335  *	WARNED		Flag used by remv() to mark processes that have been
336  *			sent the SIGTERM signal.  If they don't die in 5
337  *			seconds, they are sent the SIGKILL signal.
338  *
339  *	KILLED		Flag used by remv() to mark procs that have been sent
340  *			the SIGTERM and SIGKILL signals.
341  *
342  *	PF_MASK		Bitwise or of legal flags, for sanity checking.
343  */
344 #define	OCCUPIED	01
345 #define	LIVING		02
346 #define	NOCLEANUP	04
347 #define	NAMED		010
348 #define	DEMANDREQUEST	020
349 #define	TOUCHED		040
350 #define	WARNED		0100
351 #define	KILLED		0200
352 #define	PF_MASK		0377
353 
354 /*
355  * Respawn limits for processes that are to be respawned:
356  *
357  *	SPAWN_INTERVAL	The number of seconds over which "init" will try to
358  *			respawn a process SPAWN_LIMIT times before it gets mad.
359  *
360  *	SPAWN_LIMIT	The number of respawns "init" will attempt in
361  *			SPAWN_INTERVAL seconds before it generates an
362  *			error message and inhibits further tries for
363  *			INHIBIT seconds.
364  *
365  *	INHIBIT		The number of seconds "init" ignores an entry it had
366  *			trouble spawning unless a "telinit Q" is received.
367  */
368 
369 #define	SPAWN_INTERVAL	(2*60)
370 #define	SPAWN_LIMIT	10
371 #define	INHIBIT		(5*60)
372 
373 /*
374  * The maximum number of decimal digits for an id_t.  (ceil(log10 (max_id)))
375  */
376 #define	ID_MAX_STR_LEN	10
377 
378 #define	NULLPROC	((struct PROC_TABLE *)(0))
379 #define	NO_ROOM		((struct PROC_TABLE *)(FAILURE))
380 
381 struct CMD_LINE {
382 	char c_id[INITTAB_ENTRY_ID_SIZE];	/* Four letter unique id of */
383 						/* process to be affected by */
384 						/* action */
385 	short c_levels;	/* Mask of legal levels for process */
386 	short c_action;	/* Mask for type of action required */
387 	char *c_command; /* Pointer to init command */
388 };
389 
390 struct	pidrec {
391 	int	pd_type;	/* Command type */
392 	pid_t	pd_pid;		/* pid to add or remove */
393 };
394 
395 /*
396  * pd_type's
397  */
398 #define	ADDPID	1
399 #define	REMPID	2
400 
401 static struct	pidlist {
402 	pid_t	pl_pid;		/* pid to watch for */
403 	int	pl_dflag;	/* Flag indicating SIGCLD from this pid */
404 	short	pl_exit;	/* Exit status of proc */
405 	struct	pidlist	*pl_next; /* Next in list */
406 } *Plhead, *Plfree;
407 
408 /*
409  * The following structure contains a set of modes for /dev/syscon
410  * and should match the default contents of /etc/ioctl.syscon.  It should also
411  * be kept in-sync with base_termios in uts/common/io/ttcompat.c.
412  */
413 static struct termios	dflt_termios = {
414 	BRKINT|ICRNL|IXON|IMAXBEL,			/* iflag */
415 	OPOST|ONLCR|TAB3,				/* oflag */
416 	CS8|CREAD|B9600,				/* cflag */
417 	ISIG|ICANON|ECHO|ECHOE|ECHOK|ECHOCTL|ECHOKE|IEXTEN, /* lflag */
418 	CINTR, CQUIT, CERASE, CKILL, CEOF, 0, 0, 0,
419 	0, 0, 0, 0, 0, 0, 0, 0,
420 	0, 0, 0
421 };
422 
423 static struct termios	stored_syscon_termios;
424 static int		write_ioctl = 0;	/* Rewrite /etc/ioctl.syscon */
425 
426 static union WAKEUP {
427 	struct WAKEFLAGS {
428 		unsigned w_usersignal : 1;	/* User sent signal to "init" */
429 		unsigned w_childdeath : 1;	/* An "init" child died */
430 		unsigned w_powerhit : 1;	/* OS experienced powerfail */
431 	}	w_flags;
432 	int w_mask;
433 } wakeup;
434 
435 
436 struct init_state {
437 	int			ist_runlevel;
438 	int			ist_num_proc;
439 	int			ist_utmpx_ok;
440 	struct PROC_TABLE	ist_proc_table[1];
441 };
442 
443 #define	cur_state	(g_state->ist_runlevel)
444 #define	num_proc	(g_state->ist_num_proc)
445 #define	proc_table	(g_state->ist_proc_table)
446 #define	utmpx_ok	(g_state->ist_utmpx_ok)
447 
448 /* Contract cookies. */
449 #define	ORDINARY_COOKIE		0
450 #define	STARTD_COOKIE		1
451 
452 
453 #ifndef NDEBUG
454 #define	bad_error(func, err)	{					\
455 	(void) fprintf(stderr, "%s:%d: %s() failed with unexpected "	\
456 	    "error %d.  Aborting.\n", __FILE__, __LINE__, (func), (err)); \
457 	abort();							\
458 }
459 #else
460 #define	bad_error(func, err)	abort()
461 #endif
462 
463 
464 /*
465  * Useful file and device names.
466  */
467 static char *CONSOLE	  = "/dev/console";	/* Real system console */
468 static char *INITPIPE_DIR = "/var/run";
469 static char *INITPIPE	  = "/var/run/initpipe";
470 
471 #define	INIT_STATE_DIR "/etc/svc/volatile"
472 static const char * const init_state_file = INIT_STATE_DIR "/init.state";
473 static const char * const init_next_state_file =
474 	INIT_STATE_DIR "/init-next.state";
475 
476 static const int init_num_proc = 20;	/* Initial size of process table. */
477 
478 static char *UTMPX	 = UTMPX_FILE;		/* Snapshot record file */
479 static char *WTMPX	 = WTMPX_FILE;		/* Long term record file */
480 static char *INITTAB	 = "/etc/inittab";	/* Script file for "init" */
481 static char *SYSTTY	 = "/dev/systty";	/* System Console */
482 static char *SYSCON	 = "/dev/syscon";	/* Virtual System console */
483 static char *IOCTLSYSCON = "/etc/ioctl.syscon";	/* Last syscon modes */
484 static char *ENVFILE	 = "/etc/default/init";	/* Default env. */
485 static char *SU	= "/etc/sulogin";	/* Super-user program for single user */
486 static char *SH	= "/sbin/sh";		/* Standard shell */
487 
488 /*
489  * Default Path.  /sbin is included in path only during sysinit phase
490  */
491 #define	DEF_PATH	"PATH=/usr/sbin:/usr/bin"
492 #define	INIT_PATH	"PATH=/sbin:/usr/sbin:/usr/bin"
493 
494 static int	prior_state;
495 static int	prev_state;	/* State "init" was in last time it woke */
496 static int	new_state;	/* State user wants "init" to go to. */
497 static int	lvlq_received;	/* Explicit request to examine state */
498 static int	op_modes = BOOT_MODES; /* Current state of "init" */
499 static int	Gchild = 0;	/* Flag to indicate "godchild" died, set in */
500 				/*   childeath() and cleared in cleanaux() */
501 static int	Pfd = -1;	/* fd to receive pids thru */
502 static unsigned int	spawncnt, pausecnt;
503 static int	rsflag;		/* Set if a respawn has taken place */
504 static volatile int time_up;	/* Flag set to TRUE by the alarm interrupt */
505 				/* routine each time an alarm interrupt */
506 				/* takes place. */
507 static int	sflg = 0;	/* Set if we were booted -s to single user */
508 static int	rflg = 0;	/* Set if booted -r, reconfigure devices */
509 static int	bflg = 0;	/* Set if booted -b, don't run rc scripts */
510 static pid_t	init_pid;	/* PID of "one true" init for current zone */
511 
512 static struct init_state *g_state = NULL;
513 static size_t	g_state_sz;
514 static int	booting = 1;	/* Set while we're booting. */
515 
516 /*
517  * Array for default global environment.
518  */
519 #define	MAXENVENT	24	/* Max number of default env variables + 1 */
520 				/* init can use three itself, so this leaves */
521 				/* 20 for the administrator in ENVFILE. */
522 static char	*glob_envp[MAXENVENT];	/* Array of environment strings */
523 static int	glob_envn;		/* Number of environment strings */
524 
525 
526 static struct pollfd	poll_fds[1];
527 static int		poll_nfds = 0;	/* poll_fds is uninitialized */
528 
529 /*
530  * Contracts constants
531  */
532 #define	SVC_INIT_PREFIX "init:/"
533 #define	SVC_AUX_SIZE (INITTAB_ENTRY_ID_SIZE + 1)
534 #define	SVC_FMRI_SIZE (sizeof (SVC_INIT_PREFIX) + INITTAB_ENTRY_ID_SIZE)
535 
536 static int	legacy_tmpl = -1;	/* fd for legacy contract template */
537 static int	startd_tmpl = -1;	/* fd for svc.startd's template */
538 static char	startd_svc_aux[SVC_AUX_SIZE];
539 
540 static char	startd_cline[256] = "";	/* svc.startd's command line */
541 static int	do_restart_startd = 1;	/* Whether to restart svc.startd. */
542 static char	*smf_options = NULL;	/* Options to give to startd. */
543 static int	smf_debug = 0;		/* Messages for debugging smf(5) */
544 static time_t	init_boot_time;		/* Substitute for kernel boot time. */
545 
546 #define	NSTARTD_FAILURE_TIMES	3		/* trigger after 3 failures */
547 #define	STARTD_FAILURE_RATE_NS	5000000000LL	/* 1 failure/5 seconds */
548 
549 static hrtime_t	startd_failure_time[NSTARTD_FAILURE_TIMES];
550 static uint_t	startd_failure_index;
551 
552 
553 static char	*prog_name(char *);
554 static int	state_to_mask(int);
555 static int	lvlname_to_mask(char, int *);
556 static void	lscf_set_runlevel(char);
557 static int	state_to_flags(int);
558 static char	state_to_name(int);
559 static int	lvlname_to_state(char);
560 static int	getcmd(struct CMD_LINE *, char *);
561 static int	realcon();
562 static int	spawn_processes();
563 static int	get_ioctl_syscon();
564 static int	account(short, struct PROC_TABLE *, char *);
565 static void	alarmclk();
566 static void	childeath(int);
567 static void	cleanaux();
568 static void	clearent(pid_t, short);
569 static void	console(boolean_t, char *, ...);
570 static void	init_signals(void);
571 static void	setup_pipe();
572 static void	killproc(pid_t);
573 static void	init_env();
574 static void	boot_init();
575 static void	powerfail();
576 static void	remv();
577 static void	write_ioctl_syscon();
578 static void	spawn(struct PROC_TABLE *, struct CMD_LINE *);
579 static void	setimer(int);
580 static void	siglvl(int, siginfo_t *, ucontext_t *);
581 static void	sigpoll(int);
582 static void	enter_maintenance(void);
583 static void	timer(int);
584 static void	userinit(int, char **);
585 static void	notify_pam_dead(struct utmpx *);
586 static long	waitproc(struct PROC_TABLE *);
587 static struct PROC_TABLE *efork(int, struct PROC_TABLE *, int);
588 static struct PROC_TABLE *findpslot(struct CMD_LINE *);
589 static void	increase_proc_table_size();
590 static void	st_init();
591 static void	st_write();
592 static void	contracts_init();
593 static void	contract_event(struct pollfd *);
594 static int	startd_run(const char *, int, ctid_t);
595 static void	startd_record_failure();
596 static int	startd_failure_rate_critical();
597 static char	*audit_boot_msg();
598 static int	audit_put_record(int, int, char *);
599 static void	update_boot_archive(int new_state);
600 
601 int
602 main(int argc, char *argv[])
603 {
604 	int	chg_lvl_flag = FALSE, print_banner = FALSE;
605 	int	may_need_audit = 1;
606 	int	c;
607 	char	*msg;
608 
609 	/* Get a timestamp for use as boot time, if needed. */
610 	(void) time(&init_boot_time);
611 
612 	/* Get the default umask */
613 	cmask = umask(022);
614 	(void) umask(cmask);
615 
616 	/* Parse the arguments to init. Check for single user */
617 	opterr = 0;
618 	while ((c = getopt(argc, argv, "brsm:")) != EOF) {
619 		switch (c) {
620 		case 'b':
621 			rflg = 0;
622 			bflg = 1;
623 			if (!sflg)
624 				sflg++;
625 			break;
626 		case 'r':
627 			bflg = 0;
628 			rflg++;
629 			break;
630 		case 's':
631 			if (!bflg)
632 				sflg++;
633 			break;
634 		case 'm':
635 			smf_options = optarg;
636 			smf_debug = (strstr(smf_options, "debug") != NULL);
637 			break;
638 		}
639 	}
640 
641 	/*
642 	 * Determine if we are the main init, or a user invoked init, whose job
643 	 * it is to inform init to change levels or perform some other action.
644 	 */
645 	if (zone_getattr(getzoneid(), ZONE_ATTR_INITPID, &init_pid,
646 	    sizeof (init_pid)) != sizeof (init_pid)) {
647 		(void) fprintf(stderr, "could not get pid for init\n");
648 		return (1);
649 	}
650 
651 	/*
652 	 * If this PID is not the same as the "true" init for the zone, then we
653 	 * must be in 'user' mode.
654 	 */
655 	if (getpid() != init_pid) {
656 		userinit(argc, argv);
657 	}
658 
659 	if (getzoneid() != GLOBAL_ZONEID) {
660 		print_banner = TRUE;
661 	}
662 
663 	/*
664 	 * Initialize state (and set "booting").
665 	 */
666 	st_init();
667 
668 	if (booting && print_banner) {
669 		struct utsname un;
670 		char buf[BUFSIZ], *isa;
671 		long ret;
672 		int bits = 32;
673 
674 		/*
675 		 * We want to print the boot banner as soon as
676 		 * possible.  In the global zone, the kernel does it,
677 		 * but we do not have that luxury in non-global zones,
678 		 * so we will print it here.
679 		 */
680 		(void) uname(&un);
681 		ret = sysinfo(SI_ISALIST, buf, sizeof (buf));
682 		if (ret != -1L && ret <= sizeof (buf)) {
683 			for (isa = strtok(buf, " "); isa;
684 			    isa = strtok(NULL, " ")) {
685 				if (strcmp(isa, "sparcv9") == 0 ||
686 				    strcmp(isa, "amd64") == 0) {
687 					bits = 64;
688 					break;
689 				}
690 			}
691 		}
692 
693 		console(B_FALSE,
694 		    "\n\n%s Release %s Version %s %d-bit\r\n",
695 		    un.sysname, un.release, un.version, bits);
696 		console(B_FALSE,
697 		    "Copyright (c) 1983, 2010, Oracle and/or its affiliates."
698 		    " All rights reserved.\r\n");
699 	}
700 
701 	/*
702 	 * Get the ioctl settings for /dev/syscon from /etc/ioctl.syscon
703 	 * so that it can be brought up in the state it was in when the
704 	 * system went down; or set to defaults if ioctl.syscon isn't
705 	 * valid.
706 	 *
707 	 * This needs to be done even if we're restarting so reset_modes()
708 	 * will work in case we need to go down to single user mode.
709 	 */
710 	write_ioctl = get_ioctl_syscon();
711 
712 	/*
713 	 * Set up all signals to be caught or ignored as appropriate.
714 	 */
715 	init_signals();
716 
717 	/* Load glob_envp from ENVFILE. */
718 	init_env();
719 
720 	contracts_init();
721 
722 	if (!booting) {
723 		/* cur_state should have been read in. */
724 
725 		op_modes = NORMAL_MODES;
726 
727 		/* Rewrite the ioctl file if it was bad. */
728 		if (write_ioctl)
729 			write_ioctl_syscon();
730 	} else {
731 		/*
732 		 * It's fine to boot up with state as zero, because
733 		 * startd will later tell us the real state.
734 		 */
735 		cur_state = 0;
736 		op_modes = BOOT_MODES;
737 
738 		boot_init();
739 	}
740 
741 	prev_state = prior_state = cur_state;
742 
743 	setup_pipe();
744 
745 	/*
746 	 * Here is the beginning of the main process loop.
747 	 */
748 	for (;;) {
749 		if (lvlq_received) {
750 			setup_pipe();
751 			lvlq_received = B_FALSE;
752 		}
753 
754 		/*
755 		 * Clean up any accounting records for dead "godchildren".
756 		 */
757 		if (Gchild)
758 			cleanaux();
759 
760 		/*
761 		 * If in "normal" mode, check all living processes and initiate
762 		 * kill sequence on those that should not be there anymore.
763 		 */
764 		if (op_modes == NORMAL_MODES && cur_state != LVLa &&
765 		    cur_state != LVLb && cur_state != LVLc)
766 			remv();
767 
768 		/*
769 		 * If a change in run levels is the reason we awoke, now do
770 		 * the accounting to report the change in the utmp file.
771 		 * Also report the change on the system console.
772 		 */
773 		if (chg_lvl_flag) {
774 			chg_lvl_flag = FALSE;
775 
776 			if (state_to_flags(cur_state) & LSEL_RUNLEVEL) {
777 				char rl = state_to_name(cur_state);
778 
779 				if (rl != -1)
780 					lscf_set_runlevel(rl);
781 			}
782 
783 			may_need_audit = 1;
784 		}
785 
786 		/*
787 		 * Scan the inittab file and spawn and respawn processes that
788 		 * should be alive in the current state. If inittab does not
789 		 * exist default to  single user mode.
790 		 */
791 		if (spawn_processes() == FAILURE) {
792 			prior_state = prev_state;
793 			cur_state = SINGLE_USER;
794 		}
795 
796 		/* If any respawns occurred, take note. */
797 		if (rsflag) {
798 			rsflag = 0;
799 			spawncnt++;
800 		}
801 
802 		/*
803 		 * If a powerfail signal was received during the last
804 		 * sequence, set mode to powerfail.  When spawn_processes() is
805 		 * entered the first thing it does is to check "powerhit".  If
806 		 * it is in PF_MODES then it clears "powerhit" and does
807 		 * a powerfail sequence.  If it is not in PF_MODES, then it
808 		 * puts itself in PF_MODES and then clears "powerhit".  Should
809 		 * "powerhit" get set again while spawn_processes() is working
810 		 * on a powerfail sequence, the following code  will see that
811 		 * spawn_processes() tries to execute the powerfail sequence
812 		 * again.  This guarantees that the powerfail sequence will be
813 		 * successfully completed before further processing takes
814 		 * place.
815 		 */
816 		if (wakeup.w_flags.w_powerhit) {
817 			op_modes = PF_MODES;
818 			/*
819 			 * Make sure that cur_state != prev_state so that
820 			 * ONCE and WAIT types work.
821 			 */
822 			prev_state = 0;
823 		} else if (op_modes != NORMAL_MODES) {
824 			/*
825 			 * If spawn_processes() was not just called while in
826 			 * normal mode, we set the mode to normal and it will
827 			 * be called again to check normal modes.  If we have
828 			 * just finished a powerfail sequence with prev_state
829 			 * equal to zero, we set prev_state equal to cur_state
830 			 * before the next pass through.
831 			 */
832 			if (op_modes == PF_MODES)
833 				prev_state = cur_state;
834 			op_modes = NORMAL_MODES;
835 		} else if (cur_state == LVLa || cur_state == LVLb ||
836 		    cur_state == LVLc) {
837 			/*
838 			 * If it was a change of levels that awakened us and the
839 			 * new level is one of the demand levels then reset
840 			 * cur_state to the previous state and do another scan
841 			 * to take care of the usual respawn actions.
842 			 */
843 			cur_state = prior_state;
844 			prior_state = prev_state;
845 			prev_state = cur_state;
846 		} else {
847 			prev_state = cur_state;
848 
849 			if (wakeup.w_mask == 0) {
850 				int ret;
851 
852 				if (may_need_audit && (cur_state == LVL3)) {
853 					msg = audit_boot_msg();
854 
855 					may_need_audit = 0;
856 					(void) audit_put_record(ADT_SUCCESS,
857 					    ADT_SUCCESS, msg);
858 					free(msg);
859 				}
860 
861 				/*
862 				 * "init" is finished with all actions for
863 				 * the current wakeup.
864 				 */
865 				ret = poll(poll_fds, poll_nfds,
866 				    SLEEPTIME * MILLISEC);
867 				pausecnt++;
868 				if (ret > 0)
869 					contract_event(&poll_fds[0]);
870 				else if (ret < 0 && errno != EINTR)
871 					console(B_TRUE, "poll() error: %s\n",
872 					    strerror(errno));
873 			}
874 
875 			if (wakeup.w_flags.w_usersignal) {
876 				/*
877 				 * Install the new level.  This could be a real
878 				 * change in levels  or a telinit [Q|a|b|c] or
879 				 * just a telinit to the same level at which
880 				 * we are running.
881 				 */
882 				if (new_state != cur_state) {
883 					if (new_state == LVLa ||
884 					    new_state == LVLb ||
885 					    new_state == LVLc) {
886 						prev_state = prior_state;
887 						prior_state = cur_state;
888 						cur_state = new_state;
889 					} else {
890 						prev_state = cur_state;
891 						if (cur_state >= 0)
892 							prior_state = cur_state;
893 						cur_state = new_state;
894 						chg_lvl_flag = TRUE;
895 					}
896 				}
897 
898 				new_state = 0;
899 			}
900 
901 			if (wakeup.w_flags.w_powerhit)
902 				op_modes = PF_MODES;
903 
904 			/*
905 			 * Clear all wakeup reasons.
906 			 */
907 			wakeup.w_mask = 0;
908 		}
909 	}
910 
911 	/*NOTREACHED*/
912 }
913 
914 static void
915 update_boot_archive(int new_state)
916 {
917 	if (new_state != LVL0 && new_state != LVL5 && new_state != LVL6)
918 		return;
919 
920 	if (getzoneid() != GLOBAL_ZONEID)
921 		return;
922 
923 	(void) system("/sbin/bootadm -ea update_all");
924 }
925 
926 /*
927  * void enter_maintenance()
928  *   A simple invocation of sulogin(1M), with no baggage, in the case that we
929  *   are unable to activate svc.startd(1M).  We fork; the child runs sulogin;
930  *   we wait for it to exit.
931  */
932 static void
933 enter_maintenance()
934 {
935 	struct PROC_TABLE	*su_process;
936 
937 	console(B_FALSE, "Requesting maintenance mode\n"
938 	    "(See /lib/svc/share/README for additional information.)\n");
939 	(void) sighold(SIGCLD);
940 	while ((su_process = efork(M_OFF, NULLPROC, NOCLEANUP)) == NO_ROOM)
941 		(void) pause();
942 	(void) sigrelse(SIGCLD);
943 	if (su_process == NULLPROC) {
944 		int fd;
945 
946 		(void) fclose(stdin);
947 		(void) fclose(stdout);
948 		(void) fclose(stderr);
949 		closefrom(0);
950 
951 		fd = open(SYSCON, O_RDWR | O_NOCTTY);
952 		if (fd >= 0) {
953 			(void) dup2(fd, 1);
954 			(void) dup2(fd, 2);
955 		} else {
956 			/*
957 			 * Need to issue an error message somewhere.
958 			 */
959 			syslog(LOG_CRIT, "init[%d]: cannot open %s; %s\n",
960 			    getpid(), SYSCON, strerror(errno));
961 		}
962 
963 		/*
964 		 * Execute the "su" program.
965 		 */
966 		(void) execle(SU, SU, "-", (char *)0, glob_envp);
967 		console(B_TRUE, "execle of %s failed: %s\n", SU,
968 		    strerror(errno));
969 		timer(5);
970 		exit(1);
971 	}
972 
973 	/*
974 	 * If we are the parent, wait around for the child to die
975 	 * or for "init" to be signaled to change levels.
976 	 */
977 	while (waitproc(su_process) == FAILURE) {
978 		/*
979 		 * All other reasons for waking are ignored when in
980 		 * single-user mode.  The only child we are interested
981 		 * in is being waited for explicitly by waitproc().
982 		 */
983 		wakeup.w_mask = 0;
984 	}
985 }
986 
987 /*
988  * remv() scans through "proc_table" and performs cleanup.  If
989  * there is a process in the table, which shouldn't be here at
990  * the current run level, then remv() kills the process.
991  */
992 static void
993 remv()
994 {
995 	struct PROC_TABLE	*process;
996 	struct CMD_LINE		cmd;
997 	char			cmd_string[MAXCMDL];
998 	int			change_level;
999 
1000 	change_level = (cur_state != prev_state ? TRUE : FALSE);
1001 
1002 	/*
1003 	 * Clear the TOUCHED flag on all entries so that when we have
1004 	 * finished scanning inittab, we will be able to tell if we
1005 	 * have any processes for which there is no entry in inittab.
1006 	 */
1007 	for (process = proc_table;
1008 	    (process < proc_table + num_proc); process++) {
1009 		process->p_flags &= ~TOUCHED;
1010 	}
1011 
1012 	/*
1013 	 * Scan all inittab entries.
1014 	 */
1015 	while (getcmd(&cmd, &cmd_string[0]) == TRUE) {
1016 		/* Scan for process which goes with this entry in inittab. */
1017 		for (process = proc_table;
1018 		    (process < proc_table + num_proc); process++) {
1019 			if ((process->p_flags & OCCUPIED) == 0 ||
1020 			    !id_eq(process->p_id, cmd.c_id))
1021 				continue;
1022 
1023 			/*
1024 			 * This slot contains the process we are looking for.
1025 			 */
1026 
1027 			/*
1028 			 * Is the cur_state SINGLE_USER or is this process
1029 			 * marked as "off" or was this proc started by some
1030 			 * mechanism other than LVL{a|b|c} and the current level
1031 			 * does not support this process?
1032 			 */
1033 			if (cur_state == SINGLE_USER ||
1034 			    cmd.c_action == M_OFF ||
1035 			    ((cmd.c_levels & state_to_mask(cur_state)) == 0 &&
1036 			    (process->p_flags & DEMANDREQUEST) == 0)) {
1037 				if (process->p_flags & LIVING) {
1038 					/*
1039 					 * Touch this entry so we know we have
1040 					 * treated it.  Note that procs which
1041 					 * are already dead at this point and
1042 					 * should not be restarted are left
1043 					 * untouched.  This causes their slot to
1044 					 * be freed later after dead accounting
1045 					 * is done.
1046 					 */
1047 					process->p_flags |= TOUCHED;
1048 
1049 					if ((process->p_flags & KILLED) == 0) {
1050 						if (change_level) {
1051 							process->p_flags
1052 							    |= WARNED;
1053 							(void) kill(
1054 							    process->p_pid,
1055 							    SIGTERM);
1056 						} else {
1057 							/*
1058 							 * Fork a killing proc
1059 							 * so "init" can
1060 							 * continue without
1061 							 * having to pause for
1062 							 * TWARN seconds.
1063 							 */
1064 							killproc(
1065 							    process->p_pid);
1066 						}
1067 						process->p_flags |= KILLED;
1068 					}
1069 				}
1070 			} else {
1071 				/*
1072 				 * Process can exist at current level.  If it is
1073 				 * still alive or a DEMANDREQUEST we touch it so
1074 				 * it will be left alone.  Otherwise we leave it
1075 				 * untouched so it will be accounted for and
1076 				 * cleaned up later in remv().  Dead
1077 				 * DEMANDREQUESTs will be accounted but not
1078 				 * freed.
1079 				 */
1080 				if (process->p_flags &
1081 				    (LIVING|NOCLEANUP|DEMANDREQUEST))
1082 					process->p_flags |= TOUCHED;
1083 			}
1084 
1085 			break;
1086 		}
1087 	}
1088 
1089 	st_write();
1090 
1091 	/*
1092 	 * If this was a change of levels call, scan through the
1093 	 * process table for processes that were warned to die.  If any
1094 	 * are found that haven't left yet, sleep for TWARN seconds and
1095 	 * then send final terminations to any that haven't died yet.
1096 	 */
1097 	if (change_level) {
1098 
1099 		/*
1100 		 * Set the alarm for TWARN seconds on the assumption
1101 		 * that there will be some that need to be waited for.
1102 		 * This won't harm anything except we are guaranteed to
1103 		 * wakeup in TWARN seconds whether we need to or not.
1104 		 */
1105 		setimer(TWARN);
1106 
1107 		/*
1108 		 * Scan for processes which should be dying.  We hope they
1109 		 * will die without having to be sent a SIGKILL signal.
1110 		 */
1111 		for (process = proc_table;
1112 		    (process < proc_table + num_proc); process++) {
1113 			/*
1114 			 * If this process should die, hasn't yet, and the
1115 			 * TWARN time hasn't expired yet, wait for process
1116 			 * to die or for timer to expire.
1117 			 */
1118 			while (time_up == FALSE &&
1119 			    (process->p_flags & (WARNED|LIVING|OCCUPIED)) ==
1120 			    (WARNED|LIVING|OCCUPIED))
1121 				(void) pause();
1122 
1123 			if (time_up == TRUE)
1124 				break;
1125 		}
1126 
1127 		/*
1128 		 * If we reached the end of the table without the timer
1129 		 * expiring, then there are no procs which will have to be
1130 		 * sent the SIGKILL signal.  If the timer has expired, then
1131 		 * it is necessary to scan the table again and send signals
1132 		 * to all processes which aren't going away nicely.
1133 		 */
1134 		if (time_up == TRUE) {
1135 			for (process = proc_table;
1136 			    (process < proc_table + num_proc); process++) {
1137 				if ((process->p_flags &
1138 				    (WARNED|LIVING|OCCUPIED)) ==
1139 				    (WARNED|LIVING|OCCUPIED))
1140 					(void) kill(process->p_pid, SIGKILL);
1141 			}
1142 		}
1143 		setimer(0);
1144 	}
1145 
1146 	/*
1147 	 * Rescan the proc_table for two kinds of entry, those marked LIVING,
1148 	 * NAMED, which don't have an entry in inittab (haven't been TOUCHED
1149 	 * by the above scanning), and haven't been sent kill signals, and
1150 	 * those entries marked not LIVING, NAMED.  The former procs are killed.
1151 	 * The latter have DEAD_PROCESS accounting done and the slot cleared.
1152 	 */
1153 	for (process = proc_table;
1154 	    (process < proc_table + num_proc); process++) {
1155 		if ((process->p_flags & (LIVING|NAMED|TOUCHED|KILLED|OCCUPIED))
1156 		    == (LIVING|NAMED|OCCUPIED)) {
1157 			killproc(process->p_pid);
1158 			process->p_flags |= KILLED;
1159 		} else if ((process->p_flags & (LIVING|NAMED|OCCUPIED)) ==
1160 		    (NAMED|OCCUPIED)) {
1161 			(void) account(DEAD_PROCESS, process, NULL);
1162 			/*
1163 			 * If this named proc hasn't been TOUCHED, then free the
1164 			 * space. It has either died of it's own accord, but
1165 			 * isn't respawnable or it was killed because it
1166 			 * shouldn't exist at this level.
1167 			 */
1168 			if ((process->p_flags & TOUCHED) == 0)
1169 				process->p_flags = 0;
1170 		}
1171 	}
1172 
1173 	st_write();
1174 }
1175 
1176 /*
1177  * Extract the svc.startd command line and whether to restart it from its
1178  * inittab entry.
1179  */
1180 /*ARGSUSED*/
1181 static void
1182 process_startd_line(struct CMD_LINE *cmd, char *cmd_string)
1183 {
1184 	size_t sz;
1185 
1186 	/* Save the command line. */
1187 	if (sflg || rflg) {
1188 		/* Also append -r or -s. */
1189 		(void) strlcpy(startd_cline, cmd_string, sizeof (startd_cline));
1190 		(void) strlcat(startd_cline, " -", sizeof (startd_cline));
1191 		if (sflg)
1192 			sz = strlcat(startd_cline, "s", sizeof (startd_cline));
1193 		if (rflg)
1194 			sz = strlcat(startd_cline, "r", sizeof (startd_cline));
1195 	} else {
1196 		sz = strlcpy(startd_cline, cmd_string, sizeof (startd_cline));
1197 	}
1198 
1199 	if (sz >= sizeof (startd_cline)) {
1200 		console(B_TRUE,
1201 		    "svc.startd command line too long.  Ignoring.\n");
1202 		startd_cline[0] = '\0';
1203 		return;
1204 	}
1205 }
1206 
1207 /*
1208  * spawn_processes() scans inittab for entries which should be run at this
1209  * mode.  Processes which should be running but are not, are started.
1210  */
1211 static int
1212 spawn_processes()
1213 {
1214 	struct PROC_TABLE		*pp;
1215 	struct CMD_LINE			cmd;
1216 	char				cmd_string[MAXCMDL];
1217 	short				lvl_mask;
1218 	int				status;
1219 
1220 	/*
1221 	 * First check the "powerhit" flag.  If it is set, make sure the modes
1222 	 * are PF_MODES and clear the "powerhit" flag.  Avoid the possible race
1223 	 * on the "powerhit" flag by disallowing a new powerfail interrupt
1224 	 * between the test of the powerhit flag and the clearing of it.
1225 	 */
1226 	if (wakeup.w_flags.w_powerhit) {
1227 		wakeup.w_flags.w_powerhit = 0;
1228 		op_modes = PF_MODES;
1229 	}
1230 	lvl_mask = state_to_mask(cur_state);
1231 
1232 	/*
1233 	 * Scan through all the entries in inittab.
1234 	 */
1235 	while ((status = getcmd(&cmd, &cmd_string[0])) == TRUE) {
1236 		if (id_eq(cmd.c_id, "smf")) {
1237 			process_startd_line(&cmd, cmd_string);
1238 			continue;
1239 		}
1240 
1241 retry_for_proc_slot:
1242 
1243 		/*
1244 		 * Find out if there is a process slot for this entry already.
1245 		 */
1246 		if ((pp = findpslot(&cmd)) == NULLPROC) {
1247 			/*
1248 			 * we've run out of proc table entries
1249 			 * increase proc_table.
1250 			 */
1251 			increase_proc_table_size();
1252 
1253 			/*
1254 			 * Retry now as we have an empty proc slot.
1255 			 * In case increase_proc_table_size() fails,
1256 			 * we will keep retrying.
1257 			 */
1258 			goto retry_for_proc_slot;
1259 		}
1260 
1261 		/*
1262 		 * If there is an entry, and it is marked as DEMANDREQUEST,
1263 		 * one of the levels a, b, or c is in its levels mask, and
1264 		 * the action field is ONDEMAND and ONDEMAND is a permissable
1265 		 * mode, and the process is dead, then respawn it.
1266 		 */
1267 		if (((pp->p_flags & (LIVING|DEMANDREQUEST)) == DEMANDREQUEST) &&
1268 		    (cmd.c_levels & MASK_abc) &&
1269 		    (cmd.c_action & op_modes) == M_ONDEMAND) {
1270 			spawn(pp, &cmd);
1271 			continue;
1272 		}
1273 
1274 		/*
1275 		 * If the action is not an action we are interested in,
1276 		 * skip the entry.
1277 		 */
1278 		if ((cmd.c_action & op_modes) == 0 || pp->p_flags & LIVING ||
1279 		    (cmd.c_levels & lvl_mask) == 0)
1280 			continue;
1281 
1282 		/*
1283 		 * If the modes are the normal modes (ONCE, WAIT, RESPAWN, OFF,
1284 		 * ONDEMAND) and the action field is either OFF or the action
1285 		 * field is ONCE or WAIT and the current level is the same as
1286 		 * the last level, then skip this entry.  ONCE and WAIT only
1287 		 * get run when the level changes.
1288 		 */
1289 		if (op_modes == NORMAL_MODES &&
1290 		    (cmd.c_action == M_OFF ||
1291 		    (cmd.c_action & (M_ONCE|M_WAIT)) &&
1292 		    cur_state == prev_state))
1293 			continue;
1294 
1295 		/*
1296 		 * At this point we are interested in performing the action for
1297 		 * this entry.  Actions fall into two categories, spinning off
1298 		 * a process and not waiting, and spinning off a process and
1299 		 * waiting for it to die.  If the action is ONCE, RESPAWN,
1300 		 * ONDEMAND, POWERFAIL, or BOOT we don't wait for the process
1301 		 * to die, for all other actions we do wait.
1302 		 */
1303 		if (cmd.c_action & (M_ONCE | M_RESPAWN | M_PF | M_BOOT)) {
1304 			spawn(pp, &cmd);
1305 
1306 		} else {
1307 			spawn(pp, &cmd);
1308 			while (waitproc(pp) == FAILURE)
1309 				;
1310 			(void) account(DEAD_PROCESS, pp, NULL);
1311 			pp->p_flags = 0;
1312 		}
1313 	}
1314 	return (status);
1315 }
1316 
1317 /*
1318  * spawn() spawns a shell, inserts the information about the process
1319  * process into the proc_table, and does the startup accounting.
1320  */
1321 static void
1322 spawn(struct PROC_TABLE *process, struct CMD_LINE *cmd)
1323 {
1324 	int		i;
1325 	int		modes, maxfiles;
1326 	time_t		now;
1327 	struct PROC_TABLE tmproc, *oprocess;
1328 
1329 	/*
1330 	 * The modes to be sent to efork() are 0 unless we are
1331 	 * spawning a LVLa, LVLb, or LVLc entry or we will be
1332 	 * waiting for the death of the child before continuing.
1333 	 */
1334 	modes = NAMED;
1335 	if (process->p_flags & DEMANDREQUEST || cur_state == LVLa ||
1336 	    cur_state == LVLb || cur_state == LVLc)
1337 		modes |= DEMANDREQUEST;
1338 	if ((cmd->c_action & (M_SYSINIT | M_WAIT | M_BOOTWAIT | M_PWAIT)) != 0)
1339 		modes |= NOCLEANUP;
1340 
1341 	/*
1342 	 * If this is a respawnable process, check the threshold
1343 	 * information to avoid excessive respawns.
1344 	 */
1345 	if (cmd->c_action & M_RESPAWN) {
1346 		/*
1347 		 * Add NOCLEANUP to all respawnable commands so that the
1348 		 * information about the frequency of respawns isn't lost.
1349 		 */
1350 		modes |= NOCLEANUP;
1351 		(void) time(&now);
1352 
1353 		/*
1354 		 * If no time is assigned, then this is the first time
1355 		 * this command is being processed in this series.  Assign
1356 		 * the current time.
1357 		 */
1358 		if (process->p_time == 0L)
1359 			process->p_time = now;
1360 
1361 		if (process->p_count++ == SPAWN_LIMIT) {
1362 
1363 			if ((now - process->p_time) < SPAWN_INTERVAL) {
1364 				/*
1365 				 * Process is respawning too rapidly.  Print
1366 				 * message and refuse to respawn it for now.
1367 				 */
1368 				console(B_TRUE, "Command is respawning too "
1369 				    "rapidly. Check for possible errors.\n"
1370 				    "id:%4s \"%s\"\n",
1371 				    &cmd->c_id[0], &cmd->c_command[EXEC]);
1372 				return;
1373 			}
1374 			process->p_time = now;
1375 			process->p_count = 0;
1376 
1377 		} else if (process->p_count > SPAWN_LIMIT) {
1378 			/*
1379 			 * If process has been respawning too rapidly and
1380 			 * the inhibit time limit hasn't expired yet, we
1381 			 * refuse to respawn.
1382 			 */
1383 			if (now - process->p_time < SPAWN_INTERVAL + INHIBIT)
1384 				return;
1385 			process->p_time = now;
1386 			process->p_count = 0;
1387 		}
1388 		rsflag = TRUE;
1389 	}
1390 
1391 	/*
1392 	 * Spawn a child process to execute this command.
1393 	 */
1394 	(void) sighold(SIGCLD);
1395 	oprocess = process;
1396 	while ((process = efork(cmd->c_action, oprocess, modes)) == NO_ROOM)
1397 		(void) pause();
1398 
1399 	if (process == NULLPROC) {
1400 
1401 		/*
1402 		 * We are the child.  We must make sure we get a different
1403 		 * file pointer for our references to utmpx.  Otherwise our
1404 		 * seeks and reads will compete with those of the parent.
1405 		 */
1406 		endutxent();
1407 
1408 		/*
1409 		 * Perform the accounting for the beginning of a process.
1410 		 * Note that all processes are initially "INIT_PROCESS"es.
1411 		 */
1412 		tmproc.p_id[0] = cmd->c_id[0];
1413 		tmproc.p_id[1] = cmd->c_id[1];
1414 		tmproc.p_id[2] = cmd->c_id[2];
1415 		tmproc.p_id[3] = cmd->c_id[3];
1416 		tmproc.p_pid = getpid();
1417 		tmproc.p_exit = 0;
1418 		(void) account(INIT_PROCESS, &tmproc,
1419 		    prog_name(&cmd->c_command[EXEC]));
1420 		maxfiles = ulimit(UL_GDESLIM, 0);
1421 		for (i = 0; i < maxfiles; i++)
1422 			(void) fcntl(i, F_SETFD, FD_CLOEXEC);
1423 
1424 		/*
1425 		 * Now exec a shell with the -c option and the command
1426 		 * from inittab.
1427 		 */
1428 		(void) execle(SH, "INITSH", "-c", cmd->c_command, (char *)0,
1429 		    glob_envp);
1430 		console(B_TRUE, "Command\n\"%s\"\n failed to execute.  errno "
1431 		    "= %d (exec of shell failed)\n", cmd->c_command, errno);
1432 
1433 		/*
1434 		 * Don't come back so quickly that "init" doesn't have a
1435 		 * chance to finish putting this child in "proc_table".
1436 		 */
1437 		timer(20);
1438 		exit(1);
1439 
1440 	}
1441 
1442 	/*
1443 	 * We are the parent.  Insert the necessary
1444 	 * information in the proc_table.
1445 	 */
1446 	process->p_id[0] = cmd->c_id[0];
1447 	process->p_id[1] = cmd->c_id[1];
1448 	process->p_id[2] = cmd->c_id[2];
1449 	process->p_id[3] = cmd->c_id[3];
1450 
1451 	st_write();
1452 
1453 	(void) sigrelse(SIGCLD);
1454 }
1455 
1456 /*
1457  * findpslot() finds the old slot in the process table for the
1458  * command with the same id, or it finds an empty slot.
1459  */
1460 static struct PROC_TABLE *
1461 findpslot(struct CMD_LINE *cmd)
1462 {
1463 	struct PROC_TABLE	*process;
1464 	struct PROC_TABLE	*empty = NULLPROC;
1465 
1466 	for (process = proc_table;
1467 	    (process < proc_table + num_proc); process++) {
1468 		if (process->p_flags & OCCUPIED &&
1469 		    id_eq(process->p_id, cmd->c_id))
1470 			break;
1471 
1472 		/*
1473 		 * If the entry is totally empty and "empty" is still 0,
1474 		 * remember where this hole is and make sure the slot is
1475 		 * zeroed out.
1476 		 */
1477 		if (empty == NULLPROC && (process->p_flags & OCCUPIED) == 0) {
1478 			empty = process;
1479 			process->p_id[0] = '\0';
1480 			process->p_id[1] = '\0';
1481 			process->p_id[2] = '\0';
1482 			process->p_id[3] = '\0';
1483 			process->p_pid = 0;
1484 			process->p_time = 0L;
1485 			process->p_count = 0;
1486 			process->p_flags = 0;
1487 			process->p_exit = 0;
1488 		}
1489 	}
1490 
1491 	/*
1492 	 * If there is no entry for this slot, then there should be an
1493 	 * empty slot.  If there is no empty slot, then we've run out
1494 	 * of proc_table space.  If the latter is true, empty will be
1495 	 * NULL and the caller will have to complain.
1496 	 */
1497 	if (process == (proc_table + num_proc))
1498 		process = empty;
1499 
1500 	return (process);
1501 }
1502 
1503 /*
1504  * getcmd() parses lines from inittab.  Each time it finds a command line
1505  * it will return TRUE as well as fill the passed CMD_LINE structure and
1506  * the shell command string.  When the end of inittab is reached, FALSE
1507  * is returned inittab is automatically opened if it is not currently open
1508  * and is closed when the end of the file is reached.
1509  */
1510 static FILE *fp_inittab = NULL;
1511 
1512 static int
1513 getcmd(struct CMD_LINE *cmd, char *shcmd)
1514 {
1515 	char	*ptr;
1516 	int	c, lastc, state;
1517 	char 	*ptr1;
1518 	int	answer, i, proceed;
1519 	struct	stat	sbuf;
1520 	static char *actions[] = {
1521 		"off", "respawn", "ondemand", "once", "wait", "boot",
1522 		"bootwait", "powerfail", "powerwait", "initdefault",
1523 		"sysinit",
1524 	};
1525 	static short act_masks[] = {
1526 		M_OFF, M_RESPAWN, M_ONDEMAND, M_ONCE, M_WAIT, M_BOOT,
1527 		M_BOOTWAIT, M_PF, M_PWAIT, M_INITDEFAULT, M_SYSINIT,
1528 	};
1529 	/*
1530 	 * Only these actions will be allowed for entries which
1531 	 * are specified for single-user mode.
1532 	 */
1533 	short su_acts = M_INITDEFAULT | M_PF | M_PWAIT | M_WAIT;
1534 
1535 	if (fp_inittab == NULL) {
1536 		/*
1537 		 * Before attempting to open inittab we stat it to make
1538 		 * sure it currently exists and is not empty.  We try
1539 		 * several times because someone may have temporarily
1540 		 * unlinked or truncated the file.
1541 		 */
1542 		for (i = 0; i < 3; i++) {
1543 			if (stat(INITTAB, &sbuf) == -1) {
1544 				if (i == 2) {
1545 					console(B_TRUE,
1546 					    "Cannot stat %s, errno: %d\n",
1547 					    INITTAB, errno);
1548 					return (FAILURE);
1549 				} else {
1550 					timer(3);
1551 				}
1552 			} else if (sbuf.st_size < 10) {
1553 				if (i == 2) {
1554 					console(B_TRUE,
1555 					    "%s truncated or corrupted\n",
1556 					    INITTAB);
1557 					return (FAILURE);
1558 				} else {
1559 					timer(3);
1560 				}
1561 			} else {
1562 				break;
1563 			}
1564 		}
1565 
1566 		/*
1567 		 * If unable to open inittab, print error message and
1568 		 * return FAILURE to caller.
1569 		 */
1570 		if ((fp_inittab = fopen(INITTAB, "r")) == NULL) {
1571 			console(B_TRUE, "Cannot open %s errno: %d\n", INITTAB,
1572 			    errno);
1573 			return (FAILURE);
1574 		}
1575 	}
1576 
1577 	/*
1578 	 * Keep getting commands from inittab until you find a
1579 	 * good one or run out of file.
1580 	 */
1581 	for (answer = FALSE; answer == FALSE; ) {
1582 		/*
1583 		 * Zero out the cmd itself before trying next line.
1584 		 */
1585 		bzero(cmd, sizeof (struct CMD_LINE));
1586 
1587 		/*
1588 		 * Read in lines of inittab, parsing at colons, until a line is
1589 		 * read in which doesn't end with a backslash.  Do not start if
1590 		 * the first character read is an EOF.  Note that this means
1591 		 * that lines which don't end in a newline are still processed,
1592 		 * since the "for" will terminate normally once started,
1593 		 * regardless of whether line terminates with a newline or EOF.
1594 		 */
1595 		state = FAILURE;
1596 		if ((c = fgetc(fp_inittab)) == EOF) {
1597 			answer = FALSE;
1598 			(void) fclose(fp_inittab);
1599 			fp_inittab = NULL;
1600 			break;
1601 		}
1602 
1603 		for (proceed = TRUE, ptr = shcmd, state = ID, lastc = '\0';
1604 		    proceed && c != EOF;
1605 		    lastc = c, c = fgetc(fp_inittab)) {
1606 		    /* If we're not in the FAILURE state and haven't	*/
1607 		    /* yet reached the shell command field, process	*/
1608 		    /* the line, otherwise just look for a real end	*/
1609 		    /* of line.						*/
1610 		    if (state != FAILURE && state != COMMAND) {
1611 			/*
1612 			 * Squeeze out spaces and tabs.
1613 			 */
1614 			if (c == ' ' || c == '\t')
1615 				continue;
1616 
1617 			/*
1618 			 * Ignore characters in a comment, except for the \n.
1619 			 */
1620 			if (state == COMMENT) {
1621 				if (c == '\n') {
1622 					lastc = ' ';
1623 					break;
1624 				} else {
1625 					continue;
1626 				}
1627 			}
1628 
1629 			/*
1630 			 * Detect comments (lines whose first non-whitespace
1631 			 * character is '#') by checking that we're at the
1632 			 * beginning of a line, have seen a '#', and haven't
1633 			 * yet accumulated any characters.
1634 			 */
1635 			if (state == ID && c == '#' && ptr == shcmd) {
1636 				state = COMMENT;
1637 				continue;
1638 			}
1639 
1640 			/*
1641 			 * If the character is a ':', then check the
1642 			 * previous field for correctness and advance
1643 			 * to the next field.
1644 			 */
1645 			if (c == ':') {
1646 			    switch (state) {
1647 
1648 			    case ID :
1649 				/*
1650 				 * Check to see that there are only
1651 				 * 1 to 4 characters for the id.
1652 				 */
1653 				if ((i = ptr - shcmd) < 1 || i > 4) {
1654 					state = FAILURE;
1655 				} else {
1656 					bcopy(shcmd, &cmd->c_id[0], i);
1657 					ptr = shcmd;
1658 					state = LEVELS;
1659 				}
1660 				break;
1661 
1662 			    case LEVELS :
1663 				/*
1664 				 * Build a mask for all the levels for
1665 				 * which this command will be legal.
1666 				 */
1667 				for (cmd->c_levels = 0, ptr1 = shcmd;
1668 				    ptr1 < ptr; ptr1++) {
1669 					int mask;
1670 					if (lvlname_to_mask(*ptr1,
1671 					    &mask) == -1) {
1672 						state = FAILURE;
1673 						break;
1674 					}
1675 					cmd->c_levels |= mask;
1676 				}
1677 				if (state != FAILURE) {
1678 					state = ACTION;
1679 					ptr = shcmd;	/* Reset the buffer */
1680 				}
1681 				break;
1682 
1683 			    case ACTION :
1684 				/*
1685 				 * Null terminate the string in shcmd buffer and
1686 				 * then try to match against legal actions.  If
1687 				 * the field is of length 0, then the default of
1688 				 * "RESPAWN" is used if the id is numeric,
1689 				 * otherwise the default is "OFF".
1690 				 */
1691 				if (ptr == shcmd) {
1692 					if (isdigit(cmd->c_id[0]) &&
1693 					    (cmd->c_id[1] == '\0' ||
1694 						isdigit(cmd->c_id[1])) &&
1695 					    (cmd->c_id[2] == '\0' ||
1696 						isdigit(cmd->c_id[2])) &&
1697 					    (cmd->c_id[3] == '\0' ||
1698 						isdigit(cmd->c_id[3])))
1699 						    cmd->c_action = M_RESPAWN;
1700 					else
1701 						    cmd->c_action = M_OFF;
1702 				} else {
1703 				    for (cmd->c_action = 0, i = 0, *ptr = '\0';
1704 				    i < sizeof (actions)/sizeof (char *);
1705 				    i++) {
1706 					if (strcmp(shcmd, actions[i]) == 0) {
1707 					    if ((cmd->c_levels & MASKSU) &&
1708 						!(act_masks[i] & su_acts))
1709 						    cmd->c_action = 0;
1710 					    else
1711 						cmd->c_action = act_masks[i];
1712 					    break;
1713 					}
1714 				    }
1715 				}
1716 
1717 				/*
1718 				 * If the action didn't match any legal action,
1719 				 * set state to FAILURE.
1720 				 */
1721 				if (cmd->c_action == 0) {
1722 					state = FAILURE;
1723 				} else {
1724 					state = COMMAND;
1725 					(void) strcpy(shcmd, "exec ");
1726 				}
1727 				ptr = shcmd + EXEC;
1728 				break;
1729 			    }
1730 			    continue;
1731 			}
1732 		    }
1733 
1734 		    /* If the character is a '\n', then this is the end of a */
1735 		    /* line.  If the '\n' wasn't preceded by a backslash, */
1736 		    /* it is also the end of an inittab command.  If it was */
1737 		    /* preceded by a backslash then the next line is a */
1738 		    /* continuation.  Note that the continuation '\n' falls */
1739 		    /* through and is treated like other characters and is */
1740 		    /* stored in the shell command line. */
1741 		    if (c == '\n' && lastc != '\\') {
1742 				proceed = FALSE;
1743 				*ptr = '\0';
1744 				break;
1745 		    }
1746 
1747 		    /* For all other characters just stuff them into the */
1748 		    /* command as long as there aren't too many of them. */
1749 		    /* Make sure there is room for a terminating '\0' also. */
1750 		    if (ptr >= shcmd + MAXCMDL - 1)
1751 			state = FAILURE;
1752 		    else
1753 			*ptr++ = (char)c;
1754 
1755 		    /* If the character we just stored was a quoted	*/
1756 		    /* backslash, then change "c" to '\0', so that this	*/
1757 		    /* backslash will not cause a subsequent '\n' to appear */
1758 		    /* quoted.  In otherwords '\' '\' '\n' is the real end */
1759 		    /* of a command, while '\' '\n' is a continuation. */
1760 		    if (c == '\\' && lastc == '\\')
1761 			c = '\0';
1762 		}
1763 
1764 		/*
1765 		 * Make sure all the fields are properly specified
1766 		 * for a good command line.
1767 		 */
1768 		if (state == COMMAND) {
1769 			answer = TRUE;
1770 			cmd->c_command = shcmd;
1771 
1772 			/*
1773 			 * If no default level was supplied, insert
1774 			 * all numerical levels.
1775 			 */
1776 			if (cmd->c_levels == 0)
1777 				cmd->c_levels = MASK_NUMERIC;
1778 
1779 			/*
1780 			 * If no action has been supplied, declare this
1781 			 * entry to be OFF.
1782 			 */
1783 			if (cmd->c_action == 0)
1784 				cmd->c_action = M_OFF;
1785 
1786 			/*
1787 			 * If no shell command has been supplied, make sure
1788 			 * there is a null string in the command field.
1789 			 */
1790 			if (ptr == shcmd + EXEC)
1791 				*shcmd = '\0';
1792 		} else
1793 			answer = FALSE;
1794 
1795 		/*
1796 		 * If we have reached the end of inittab, then close it
1797 		 * and quit trying to find a good command line.
1798 		 */
1799 		if (c == EOF) {
1800 			(void) fclose(fp_inittab);
1801 			fp_inittab = NULL;
1802 			break;
1803 		}
1804 	}
1805 	return (answer);
1806 }
1807 
1808 /*
1809  * lvlname_to_state(): convert the character name of a state to its level
1810  * (its corresponding signal number).
1811  */
1812 static int
1813 lvlname_to_state(char name)
1814 {
1815 	int i;
1816 	for (i = 0; i < LVL_NELEMS; i++) {
1817 		if (lvls[i].lvl_name == name)
1818 			return (lvls[i].lvl_state);
1819 	}
1820 	return (-1);
1821 }
1822 
1823 /*
1824  * state_to_name(): convert the level to the character name.
1825  */
1826 static char
1827 state_to_name(int state)
1828 {
1829 	int i;
1830 	for (i = 0; i < LVL_NELEMS; i++) {
1831 		if (lvls[i].lvl_state == state)
1832 			return (lvls[i].lvl_name);
1833 	}
1834 	return (-1);
1835 }
1836 
1837 /*
1838  * state_to_mask(): return the mask corresponding to a signal number
1839  */
1840 static int
1841 state_to_mask(int state)
1842 {
1843 	int i;
1844 	for (i = 0; i < LVL_NELEMS; i++) {
1845 		if (lvls[i].lvl_state == state)
1846 			return (lvls[i].lvl_mask);
1847 	}
1848 	return (0);	/* return 0, since that represents an empty mask */
1849 }
1850 
1851 /*
1852  * lvlname_to_mask(): return the mask corresponding to a levels character name
1853  */
1854 static int
1855 lvlname_to_mask(char name, int *mask)
1856 {
1857 	int i;
1858 	for (i = 0; i < LVL_NELEMS; i++) {
1859 		if (lvls[i].lvl_name == name) {
1860 			*mask = lvls[i].lvl_mask;
1861 			return (0);
1862 		}
1863 	}
1864 	return (-1);
1865 }
1866 
1867 /*
1868  * state_to_flags(): return the flags corresponding to a runlevel.  These
1869  * indicate properties of that runlevel.
1870  */
1871 static int
1872 state_to_flags(int state)
1873 {
1874 	int i;
1875 	for (i = 0; i < LVL_NELEMS; i++) {
1876 		if (lvls[i].lvl_state == state)
1877 			return (lvls[i].lvl_flags);
1878 	}
1879 	return (0);
1880 }
1881 
1882 /*
1883  * killproc() creates a child which kills the process specified by pid.
1884  */
1885 void
1886 killproc(pid_t pid)
1887 {
1888 	struct PROC_TABLE	*process;
1889 
1890 	(void) sighold(SIGCLD);
1891 	while ((process = efork(M_OFF, NULLPROC, 0)) == NO_ROOM)
1892 		(void) pause();
1893 	(void) sigrelse(SIGCLD);
1894 
1895 	if (process == NULLPROC) {
1896 		/*
1897 		 * efork() sets all signal handlers to the default, so reset
1898 		 * the ALRM handler to make timer() work as expected.
1899 		 */
1900 		(void) sigset(SIGALRM, alarmclk);
1901 
1902 		/*
1903 		 * We are the child.  Try to terminate the process nicely
1904 		 * first using SIGTERM and if it refuses to die in TWARN
1905 		 * seconds kill it with SIGKILL.
1906 		 */
1907 		(void) kill(pid, SIGTERM);
1908 		(void) timer(TWARN);
1909 		(void) kill(pid, SIGKILL);
1910 		(void) exit(0);
1911 	}
1912 }
1913 
1914 /*
1915  * Set up the default environment for all procs to be forked from init.
1916  * Read the values from the /etc/default/init file, except for PATH.  If
1917  * there's not enough room in the environment array, the environment
1918  * lines that don't fit are silently discarded.
1919  */
1920 void
1921 init_env()
1922 {
1923 	char	line[MAXCMDL];
1924 	FILE	*fp;
1925 	int	inquotes, length, wslength;
1926 	char	*tokp, *cp1, *cp2;
1927 
1928 	glob_envp[0] = malloc((unsigned)(strlen(DEF_PATH)+2));
1929 	(void) strcpy(glob_envp[0], DEF_PATH);
1930 	glob_envn = 1;
1931 
1932 	if (rflg) {
1933 		glob_envp[1] =
1934 		    malloc((unsigned)(strlen("_DVFS_RECONFIG=YES")+2));
1935 		(void) strcpy(glob_envp[1], "_DVFS_RECONFIG=YES");
1936 		++glob_envn;
1937 	} else if (bflg == 1) {
1938 		glob_envp[1] =
1939 		    malloc((unsigned)(strlen("RB_NOBOOTRC=YES")+2));
1940 		(void) strcpy(glob_envp[1], "RB_NOBOOTRC=YES");
1941 		++glob_envn;
1942 	}
1943 
1944 	if ((fp = fopen(ENVFILE, "r")) == NULL) {
1945 		console(B_TRUE,
1946 		    "Cannot open %s. Environment not initialized.\n",
1947 		    ENVFILE);
1948 	} else {
1949 		while (fgets(line, MAXCMDL - 1, fp) != NULL &&
1950 		    glob_envn < MAXENVENT - 2) {
1951 			/*
1952 			 * Toss newline
1953 			 */
1954 			length = strlen(line);
1955 			if (line[length - 1] == '\n')
1956 				line[length - 1] = '\0';
1957 
1958 			/*
1959 			 * Ignore blank or comment lines.
1960 			 */
1961 			if (line[0] == '#' || line[0] == '\0' ||
1962 			    (wslength = strspn(line, " \t\n")) ==
1963 			    strlen(line) ||
1964 			    strchr(line, '#') == line + wslength)
1965 				continue;
1966 
1967 			/*
1968 			 * First make a pass through the line and change
1969 			 * any non-quoted semi-colons to blanks so they
1970 			 * will be treated as token separators below.
1971 			 */
1972 			inquotes = 0;
1973 			for (cp1 = line; *cp1 != '\0'; cp1++) {
1974 				if (*cp1 == '"') {
1975 					if (inquotes == 0)
1976 						inquotes = 1;
1977 					else
1978 						inquotes = 0;
1979 				} else if (*cp1 == ';') {
1980 					if (inquotes == 0)
1981 						*cp1 = ' ';
1982 				}
1983 			}
1984 
1985 			/*
1986 			 * Tokens within the line are separated by blanks
1987 			 *  and tabs.  For each token in the line which
1988 			 * contains a '=' we strip out any quotes and then
1989 			 * stick the token in the environment array.
1990 			 */
1991 			if ((tokp = strtok(line, " \t")) == NULL)
1992 				continue;
1993 			do {
1994 				if (strchr(tokp, '=') == NULL)
1995 					continue;
1996 				length = strlen(tokp);
1997 				while ((cp1 = strpbrk(tokp, "\"\'")) != NULL) {
1998 					for (cp2 = cp1;
1999 					    cp2 < &tokp[length]; cp2++)
2000 						*cp2 = *(cp2 + 1);
2001 					length--;
2002 				}
2003 
2004 				if (strncmp(tokp, "CMASK=",
2005 				    sizeof ("CMASK=") - 1) == 0) {
2006 					long t;
2007 
2008 					/* We know there's an = */
2009 					t = strtol(strchr(tokp, '=') + 1, NULL,
2010 					    8);
2011 
2012 					/* Sanity */
2013 					if (t <= 077 && t >= 0)
2014 						cmask = (int)t;
2015 					(void) umask(cmask);
2016 					continue;
2017 				}
2018 				glob_envp[glob_envn] =
2019 				    malloc((unsigned)(length + 1));
2020 				(void) strcpy(glob_envp[glob_envn], tokp);
2021 				if (++glob_envn >= MAXENVENT - 1)
2022 					break;
2023 			} while ((tokp = strtok(NULL, " \t")) != NULL);
2024 		}
2025 
2026 		/*
2027 		 * Append a null pointer to the environment array
2028 		 * to mark its end.
2029 		 */
2030 		glob_envp[glob_envn] = NULL;
2031 		(void) fclose(fp);
2032 	}
2033 }
2034 
2035 /*
2036  * boot_init(): Do initialization things that should be done at boot.
2037  */
2038 void
2039 boot_init()
2040 {
2041 	int i;
2042 	struct PROC_TABLE *process, *oprocess;
2043 	struct CMD_LINE	cmd;
2044 	char	line[MAXCMDL];
2045 	char	svc_aux[SVC_AUX_SIZE];
2046 	char	init_svc_fmri[SVC_FMRI_SIZE];
2047 	char *old_path;
2048 	int maxfiles;
2049 
2050 	/* Use INIT_PATH for sysinit cmds */
2051 	old_path = glob_envp[0];
2052 	glob_envp[0] = malloc((unsigned)(strlen(INIT_PATH)+2));
2053 	(void) strcpy(glob_envp[0], INIT_PATH);
2054 
2055 	/*
2056 	 * Scan inittab(4) and process the special svc.startd entry, initdefault
2057 	 * and sysinit entries.
2058 	 */
2059 	while (getcmd(&cmd, &line[0]) == TRUE) {
2060 		if (startd_tmpl >= 0 && id_eq(cmd.c_id, "smf")) {
2061 			process_startd_line(&cmd, line);
2062 			(void) snprintf(startd_svc_aux, SVC_AUX_SIZE,
2063 			    INITTAB_ENTRY_ID_STR_FORMAT, cmd.c_id);
2064 		} else if (cmd.c_action == M_INITDEFAULT) {
2065 			/*
2066 			 * initdefault is no longer meaningful, as the SMF
2067 			 * milestone controls what (legacy) run level we
2068 			 * boot to.
2069 			 */
2070 			console(B_TRUE,
2071 			    "Ignoring legacy \"initdefault\" entry.\n");
2072 		} else if (cmd.c_action == M_SYSINIT) {
2073 			/*
2074 			 * Execute the "sysinit" entry and wait for it to
2075 			 * complete.  No bookkeeping is performed on these
2076 			 * entries because we avoid writing to the file system
2077 			 * until after there has been an chance to check it.
2078 			 */
2079 			if (process = findpslot(&cmd)) {
2080 				(void) sighold(SIGCLD);
2081 				(void) snprintf(svc_aux, SVC_AUX_SIZE,
2082 				    INITTAB_ENTRY_ID_STR_FORMAT, cmd.c_id);
2083 				(void) snprintf(init_svc_fmri, SVC_FMRI_SIZE,
2084 				    SVC_INIT_PREFIX INITTAB_ENTRY_ID_STR_FORMAT,
2085 				    cmd.c_id);
2086 				if (legacy_tmpl >= 0) {
2087 					(void) ct_pr_tmpl_set_svc_fmri(
2088 					    legacy_tmpl, init_svc_fmri);
2089 					(void) ct_pr_tmpl_set_svc_aux(
2090 					    legacy_tmpl, svc_aux);
2091 				}
2092 
2093 				for (oprocess = process;
2094 				    (process = efork(M_OFF, oprocess,
2095 				    (NAMED|NOCLEANUP))) == NO_ROOM;
2096 				    /* CSTYLED */)
2097 					;
2098 				(void) sigrelse(SIGCLD);
2099 
2100 				if (process == NULLPROC) {
2101 					maxfiles = ulimit(UL_GDESLIM, 0);
2102 
2103 					for (i = 0; i < maxfiles; i++)
2104 						(void) fcntl(i, F_SETFD,
2105 						    FD_CLOEXEC);
2106 					(void) execle(SH, "INITSH", "-c",
2107 					    cmd.c_command,
2108 					    (char *)0, glob_envp);
2109 					console(B_TRUE,
2110 "Command\n\"%s\"\n failed to execute.  errno = %d (exec of shell failed)\n",
2111 					    cmd.c_command, errno);
2112 					exit(1);
2113 				} else while (waitproc(process) == FAILURE);
2114 				process->p_flags = 0;
2115 				st_write();
2116 			}
2117 		}
2118 	}
2119 
2120 	/* Restore the path. */
2121 	free(glob_envp[0]);
2122 	glob_envp[0] = old_path;
2123 
2124 	/*
2125 	 * This will enable st_write() to complain about init_state_file.
2126 	 */
2127 	booting = 0;
2128 
2129 	/*
2130 	 * If the /etc/ioctl.syscon didn't exist or had invalid contents write
2131 	 * out a correct version.
2132 	 */
2133 	if (write_ioctl)
2134 		write_ioctl_syscon();
2135 
2136 	/*
2137 	 * Start svc.startd(1M), which does most of the work.
2138 	 */
2139 	if (startd_cline[0] != '\0' && startd_tmpl >= 0) {
2140 		/* Start svc.startd. */
2141 		if (startd_run(startd_cline, startd_tmpl, 0) == -1)
2142 			cur_state = SINGLE_USER;
2143 	} else {
2144 		console(B_TRUE, "Absent svc.startd entry or bad "
2145 		    "contract template.  Not starting svc.startd.\n");
2146 		enter_maintenance();
2147 	}
2148 }
2149 
2150 /*
2151  * init_signals(): Initialize all signals to either be caught or ignored.
2152  */
2153 void
2154 init_signals(void)
2155 {
2156 	struct sigaction act;
2157 	int i;
2158 
2159 	/*
2160 	 * Start by ignoring all signals, then selectively re-enable some.
2161 	 * The SIG_IGN disposition will only affect asynchronous signals:
2162 	 * any signal that we trigger synchronously that doesn't end up
2163 	 * being handled by siglvl() will be forcibly delivered by the kernel.
2164 	 */
2165 	for (i = SIGHUP; i <= SIGRTMAX; i++)
2166 		(void) sigset(i, SIG_IGN);
2167 
2168 	/*
2169 	 * Handle all level-changing signals using siglvl() and set sa_mask so
2170 	 * that all level-changing signals are blocked while in siglvl().
2171 	 */
2172 	act.sa_handler = siglvl;
2173 	act.sa_flags = SA_SIGINFO;
2174 	(void) sigemptyset(&act.sa_mask);
2175 
2176 	(void) sigaddset(&act.sa_mask, LVLQ);
2177 	(void) sigaddset(&act.sa_mask, LVL0);
2178 	(void) sigaddset(&act.sa_mask, LVL1);
2179 	(void) sigaddset(&act.sa_mask, LVL2);
2180 	(void) sigaddset(&act.sa_mask, LVL3);
2181 	(void) sigaddset(&act.sa_mask, LVL4);
2182 	(void) sigaddset(&act.sa_mask, LVL5);
2183 	(void) sigaddset(&act.sa_mask, LVL6);
2184 	(void) sigaddset(&act.sa_mask, SINGLE_USER);
2185 	(void) sigaddset(&act.sa_mask, LVLa);
2186 	(void) sigaddset(&act.sa_mask, LVLb);
2187 	(void) sigaddset(&act.sa_mask, LVLc);
2188 
2189 	(void) sigaction(LVLQ, &act, NULL);
2190 	(void) sigaction(LVL0, &act, NULL);
2191 	(void) sigaction(LVL1, &act, NULL);
2192 	(void) sigaction(LVL2, &act, NULL);
2193 	(void) sigaction(LVL3, &act, NULL);
2194 	(void) sigaction(LVL4, &act, NULL);
2195 	(void) sigaction(LVL5, &act, NULL);
2196 	(void) sigaction(LVL6, &act, NULL);
2197 	(void) sigaction(SINGLE_USER, &act, NULL);
2198 	(void) sigaction(LVLa, &act, NULL);
2199 	(void) sigaction(LVLb, &act, NULL);
2200 	(void) sigaction(LVLc, &act, NULL);
2201 
2202 	(void) sigset(SIGALRM, alarmclk);
2203 	alarmclk();
2204 
2205 	(void) sigset(SIGCLD, childeath);
2206 	(void) sigset(SIGPWR, powerfail);
2207 }
2208 
2209 /*
2210  * Set up pipe for "godchildren". If the file exists and is a pipe just open
2211  * it. Else, if the file system is r/w create it.  Otherwise, defer its
2212  * creation and open until after /var/run has been mounted.  This function is
2213  * only called on startup and when explicitly requested via LVLQ.
2214  */
2215 void
2216 setup_pipe()
2217 {
2218 	struct stat stat_buf;
2219 	struct statvfs statvfs_buf;
2220 	struct sigaction act;
2221 
2222 	/*
2223 	 * Always close the previous pipe descriptor as the mounted filesystems
2224 	 * may have changed.
2225 	 */
2226 	if (Pfd >= 0)
2227 		(void) close(Pfd);
2228 
2229 	if ((stat(INITPIPE, &stat_buf) == 0) &&
2230 	    ((stat_buf.st_mode & (S_IFMT|S_IRUSR)) == (S_IFIFO|S_IRUSR)))
2231 		Pfd = open(INITPIPE, O_RDWR | O_NDELAY);
2232 	else
2233 		if ((statvfs(INITPIPE_DIR, &statvfs_buf) == 0) &&
2234 		    ((statvfs_buf.f_flag & ST_RDONLY) == 0)) {
2235 			(void) unlink(INITPIPE);
2236 			(void) mknod(INITPIPE, S_IFIFO | 0600, 0);
2237 			Pfd = open(INITPIPE, O_RDWR | O_NDELAY);
2238 		}
2239 
2240 	if (Pfd >= 0) {
2241 		(void) ioctl(Pfd, I_SETSIG, S_INPUT);
2242 		/*
2243 		 * Read pipe in message discard mode.
2244 		 */
2245 		(void) ioctl(Pfd, I_SRDOPT, RMSGD);
2246 
2247 		act.sa_handler = sigpoll;
2248 		act.sa_flags = 0;
2249 		(void) sigemptyset(&act.sa_mask);
2250 		(void) sigaddset(&act.sa_mask, SIGCLD);
2251 		(void) sigaction(SIGPOLL, &act, NULL);
2252 	}
2253 }
2254 
2255 /*
2256  * siglvl - handle an asynchronous signal from init(1M) telling us that we
2257  * should change the current run level.  We set new_state accordingly.
2258  */
2259 void
2260 siglvl(int sig, siginfo_t *sip, ucontext_t *ucp)
2261 {
2262 	struct PROC_TABLE *process;
2263 	struct sigaction act;
2264 
2265 	/*
2266 	 * If the signal was from the kernel (rather than init(1M)) then init
2267 	 * itself tripped the signal.  That is, we might have a bug and tripped
2268 	 * a real SIGSEGV instead of receiving it as an alias for SIGLVLa.  In
2269 	 * such a case we reset the disposition to SIG_DFL, block all signals
2270 	 * in uc_mask but the current one, and return to the interrupted ucp
2271 	 * to effect an appropriate death.  The kernel will then restart us.
2272 	 *
2273 	 * The one exception to SI_FROMKERNEL() is SIGFPE (a.k.a. LVL6), which
2274 	 * the kernel can send us when it wants to effect an orderly reboot.
2275 	 * For this case we must also verify si_code is zero, rather than a
2276 	 * code such as FPE_INTDIV which a bug might have triggered.
2277 	 */
2278 	if (sip != NULL && SI_FROMKERNEL(sip) &&
2279 	    (sig != SIGFPE || sip->si_code == 0)) {
2280 
2281 		(void) sigemptyset(&act.sa_mask);
2282 		act.sa_handler = SIG_DFL;
2283 		act.sa_flags = 0;
2284 		(void) sigaction(sig, &act, NULL);
2285 
2286 		(void) sigfillset(&ucp->uc_sigmask);
2287 		(void) sigdelset(&ucp->uc_sigmask, sig);
2288 		ucp->uc_flags |= UC_SIGMASK;
2289 
2290 		(void) setcontext(ucp);
2291 	}
2292 
2293 	/*
2294 	 * If the signal received is a LVLQ signal, do not really
2295 	 * change levels, just restate the current level.  If the
2296 	 * signal is not a LVLQ, set the new level to the signal
2297 	 * received.
2298 	 */
2299 	if (sig == LVLQ) {
2300 		new_state = cur_state;
2301 		lvlq_received = B_TRUE;
2302 	} else {
2303 		new_state = sig;
2304 	}
2305 
2306 	/*
2307 	 * Clear all times and repeat counts in the process table
2308 	 * since either the level is changing or the user has editted
2309 	 * the inittab file and wants us to look at it again.
2310 	 * If the user has fixed a typo, we don't want residual timing
2311 	 * data preventing the fixed command line from executing.
2312 	 */
2313 	for (process = proc_table;
2314 	    (process < proc_table + num_proc); process++) {
2315 		process->p_time = 0L;
2316 		process->p_count = 0;
2317 	}
2318 
2319 	/*
2320 	 * Set the flag to indicate that a "user signal" was received.
2321 	 */
2322 	wakeup.w_flags.w_usersignal = 1;
2323 }
2324 
2325 
2326 /*
2327  * alarmclk
2328  */
2329 static void
2330 alarmclk()
2331 {
2332 	time_up = TRUE;
2333 }
2334 
2335 /*
2336  * childeath_single():
2337  *
2338  * This used to be the SIGCLD handler and it was set with signal()
2339  * (as opposed to sigset()).  When a child exited we'd come to the
2340  * handler, wait for the child, and reenable the handler with
2341  * signal() just before returning.  The implementation of signal()
2342  * checks with waitid() for waitable children and sends a SIGCLD
2343  * if there are some.  If children are exiting faster than the
2344  * handler can run we keep sending signals and the handler never
2345  * gets to return and eventually the stack runs out and init dies.
2346  * To prevent that we set the handler with sigset() so the handler
2347  * doesn't need to be reset, and in childeath() (see below) we
2348  * call childeath_single() as long as there are children to be
2349  * waited for.  If a child exits while init is in the handler a
2350  * SIGCLD will be pending and delivered on return from the handler.
2351  * If the child was already waited for the handler will have nothing
2352  * to do and return, otherwise the child will be waited for.
2353  */
2354 static void
2355 childeath_single(pid_t pid, int status)
2356 {
2357 	struct PROC_TABLE	*process;
2358 	struct pidlist		*pp;
2359 
2360 	/*
2361 	 * Scan the process table to see if we are interested in this process.
2362 	 */
2363 	for (process = proc_table;
2364 	    (process < proc_table + num_proc); process++) {
2365 		if ((process->p_flags & (LIVING|OCCUPIED)) ==
2366 		    (LIVING|OCCUPIED) && process->p_pid == pid) {
2367 
2368 			/*
2369 			 * Mark this process as having died and store the exit
2370 			 * status.  Also set the wakeup flag for a dead child
2371 			 * and break out of the loop.
2372 			 */
2373 			process->p_flags &= ~LIVING;
2374 			process->p_exit = (short)status;
2375 			wakeup.w_flags.w_childdeath = 1;
2376 
2377 			return;
2378 		}
2379 	}
2380 
2381 	/*
2382 	 * No process was found above, look through auxiliary list.
2383 	 */
2384 	(void) sighold(SIGPOLL);
2385 	pp = Plhead;
2386 	while (pp) {
2387 		if (pid > pp->pl_pid) {
2388 			/*
2389 			 * Keep on looking.
2390 			 */
2391 			pp = pp->pl_next;
2392 			continue;
2393 		} else if (pid < pp->pl_pid) {
2394 			/*
2395 			 * Not in the list.
2396 			 */
2397 			break;
2398 		} else {
2399 			/*
2400 			 * This is a dead "godchild".
2401 			 */
2402 			pp->pl_dflag = 1;
2403 			pp->pl_exit = (short)status;
2404 			wakeup.w_flags.w_childdeath = 1;
2405 			Gchild = 1;	/* Notice to call cleanaux(). */
2406 			break;
2407 		}
2408 	}
2409 
2410 	(void) sigrelse(SIGPOLL);
2411 }
2412 
2413 /* ARGSUSED */
2414 static void
2415 childeath(int signo)
2416 {
2417 	pid_t pid;
2418 	int status;
2419 
2420 	while ((pid = waitpid(-1, &status, WNOHANG)) > 0)
2421 		childeath_single(pid, status);
2422 }
2423 
2424 static void
2425 powerfail()
2426 {
2427 	(void) nice(-19);
2428 	wakeup.w_flags.w_powerhit = 1;
2429 }
2430 
2431 /*
2432  * efork() forks a child and the parent inserts the process in its table
2433  * of processes that are directly a result of forks that it has performed.
2434  * The child just changes the "global" with the process id for this process
2435  * to it's new value.
2436  * If efork() is called with a pointer into the proc_table it uses that slot,
2437  * otherwise it searches for a free slot.  Regardless of how it was called,
2438  * it returns the pointer to the proc_table entry
2439  *
2440  * The SIGCLD signal is blocked (held) before calling efork()
2441  * and is unblocked (released) after efork() returns.
2442  *
2443  * Ideally, this should be rewritten to use modern signal semantics.
2444  */
2445 static struct PROC_TABLE *
2446 efork(int action, struct PROC_TABLE *process, int modes)
2447 {
2448 	pid_t	childpid;
2449 	struct PROC_TABLE *proc;
2450 	int		i;
2451 	/*
2452 	 * Freshen up the proc_table, removing any entries for dead processes
2453 	 * that don't have NOCLEANUP set.  Perform the necessary accounting.
2454 	 */
2455 	for (proc = proc_table; (proc < proc_table + num_proc); proc++) {
2456 		if ((proc->p_flags & (OCCUPIED|LIVING|NOCLEANUP)) ==
2457 		    (OCCUPIED)) {
2458 			/*
2459 			 * Is this a named process?
2460 			 * If so, do the necessary bookkeeping.
2461 			 */
2462 			if (proc->p_flags & NAMED)
2463 				(void) account(DEAD_PROCESS, proc, NULL);
2464 
2465 			/*
2466 			 * Free this entry for new usage.
2467 			 */
2468 			proc->p_flags = 0;
2469 		}
2470 	}
2471 
2472 	while ((childpid = fork()) == FAILURE) {
2473 		/*
2474 		 * Shorten the alarm timer in case someone else's child dies
2475 		 * and free up a slot in the process table.
2476 		 */
2477 		setimer(5);
2478 
2479 		/*
2480 		 * Wait for some children to die.  Since efork()
2481 		 * is always called with SIGCLD blocked, unblock
2482 		 * it here so that child death signals can come in.
2483 		 */
2484 		(void) sigrelse(SIGCLD);
2485 		(void) pause();
2486 		(void) sighold(SIGCLD);
2487 		setimer(0);
2488 	}
2489 
2490 	if (childpid != 0) {
2491 
2492 		if (process == NULLPROC) {
2493 			/*
2494 			 * No proc table pointer specified so search
2495 			 * for a free slot.
2496 			 */
2497 			for (process = proc_table;  process->p_flags != 0 &&
2498 			    (process < proc_table + num_proc); process++)
2499 					;
2500 
2501 			if (process == (proc_table + num_proc)) {
2502 				int old_proc_table_size = num_proc;
2503 
2504 				/* Increase the process table size */
2505 				increase_proc_table_size();
2506 				if (old_proc_table_size == num_proc) {
2507 					/* didn't grow: memory failure */
2508 					return (NO_ROOM);
2509 				} else {
2510 					process =
2511 					    proc_table + old_proc_table_size;
2512 				}
2513 			}
2514 
2515 			process->p_time = 0L;
2516 			process->p_count = 0;
2517 		}
2518 		process->p_id[0] = '\0';
2519 		process->p_id[1] = '\0';
2520 		process->p_id[2] = '\0';
2521 		process->p_id[3] = '\0';
2522 		process->p_pid = childpid;
2523 		process->p_flags = (LIVING | OCCUPIED | modes);
2524 		process->p_exit = 0;
2525 
2526 		st_write();
2527 	} else {
2528 		if ((action & (M_WAIT | M_BOOTWAIT)) == 0)
2529 			(void) setpgrp();
2530 
2531 		process = NULLPROC;
2532 
2533 		/*
2534 		 * Reset all signals to the system defaults.
2535 		 */
2536 		for (i = SIGHUP; i <= SIGRTMAX; i++)
2537 			(void) sigset(i, SIG_DFL);
2538 
2539 		/*
2540 		 * POSIX B.2.2.2 advises that init should set SIGTTOU,
2541 		 * SIGTTIN, and SIGTSTP to SIG_IGN.
2542 		 *
2543 		 * Make sure that SIGXCPU and SIGXFSZ also remain ignored,
2544 		 * for backward compatibility.
2545 		 */
2546 		(void) sigset(SIGTTIN, SIG_IGN);
2547 		(void) sigset(SIGTTOU, SIG_IGN);
2548 		(void) sigset(SIGTSTP, SIG_IGN);
2549 		(void) sigset(SIGXCPU, SIG_IGN);
2550 		(void) sigset(SIGXFSZ, SIG_IGN);
2551 	}
2552 	return (process);
2553 }
2554 
2555 
2556 /*
2557  * waitproc() waits for a specified process to die.  For this function to
2558  * work, the specified process must already in the proc_table.  waitproc()
2559  * returns the exit status of the specified process when it dies.
2560  */
2561 static long
2562 waitproc(struct PROC_TABLE *process)
2563 {
2564 	int		answer;
2565 	sigset_t	oldmask, newmask, zeromask;
2566 
2567 	(void) sigemptyset(&zeromask);
2568 	(void) sigemptyset(&newmask);
2569 
2570 	(void) sigaddset(&newmask, SIGCLD);
2571 
2572 	/* Block SIGCLD and save the current signal mask */
2573 	if (sigprocmask(SIG_BLOCK, &newmask, &oldmask) < 0)
2574 		perror("SIG_BLOCK error");
2575 
2576 	/*
2577 	 * Wait around until the process dies.
2578 	 */
2579 	if (process->p_flags & LIVING)
2580 		(void) sigsuspend(&zeromask);
2581 
2582 	/* Reset signal mask to unblock SIGCLD */
2583 	if (sigprocmask(SIG_SETMASK, &oldmask, NULL) < 0)
2584 		perror("SIG_SETMASK error");
2585 
2586 	if (process->p_flags & LIVING)
2587 		return (FAILURE);
2588 
2589 	/*
2590 	 * Make sure to only return 16 bits so that answer will always
2591 	 * be positive whenever the process of interest really died.
2592 	 */
2593 	answer = (process->p_exit & 0xffff);
2594 
2595 	/*
2596 	 * Free the slot in the proc_table.
2597 	 */
2598 	process->p_flags = 0;
2599 	return (answer);
2600 }
2601 
2602 /*
2603  * notify_pam_dead(): calls into the PAM framework to close the given session.
2604  */
2605 static void
2606 notify_pam_dead(struct utmpx *up)
2607 {
2608 	pam_handle_t *pamh;
2609 	char user[sizeof (up->ut_user) + 1];
2610 	char ttyn[sizeof (up->ut_line) + 1];
2611 	char host[sizeof (up->ut_host) + 1];
2612 
2613 	/*
2614 	 * PAM does not take care of updating utmpx/wtmpx.
2615 	 */
2616 	(void) snprintf(user, sizeof (user), "%s", up->ut_user);
2617 	(void) snprintf(ttyn, sizeof (ttyn), "%s", up->ut_line);
2618 	(void) snprintf(host, sizeof (host), "%s", up->ut_host);
2619 
2620 	if (pam_start("init", user, NULL, &pamh) == PAM_SUCCESS)  {
2621 		(void) pam_set_item(pamh, PAM_TTY, ttyn);
2622 		(void) pam_set_item(pamh, PAM_RHOST, host);
2623 		(void) pam_close_session(pamh, 0);
2624 		(void) pam_end(pamh, PAM_SUCCESS);
2625 	}
2626 }
2627 
2628 /*
2629  * Check you can access utmpx (As / may be read-only and
2630  * /var may not be mounted yet).
2631  */
2632 static int
2633 access_utmpx(void)
2634 {
2635 	do {
2636 		utmpx_ok = (access(UTMPX, R_OK|W_OK) == 0);
2637 	} while (!utmpx_ok && errno == EINTR);
2638 
2639 	return (utmpx_ok);
2640 }
2641 
2642 /*
2643  * account() updates entries in utmpx and appends new entries to the end of
2644  * wtmpx (assuming they exist).  The program argument indicates the name of
2645  * program if INIT_PROCESS, otherwise should be NULL.
2646  *
2647  * account() only blocks for INIT_PROCESS requests.
2648  *
2649  * Returns non-zero if write failed.
2650  */
2651 static int
2652 account(short state, struct PROC_TABLE *process, char *program)
2653 {
2654 	struct utmpx utmpbuf, *u, *oldu;
2655 	int tmplen;
2656 	char fail_buf[UT_LINE_SZ];
2657 	sigset_t block, unblock;
2658 
2659 	if (!utmpx_ok && !access_utmpx()) {
2660 		return (-1);
2661 	}
2662 
2663 	/*
2664 	 * Set up the prototype for the utmp structure we want to write.
2665 	 */
2666 	u = &utmpbuf;
2667 	(void) memset(u, 0, sizeof (struct utmpx));
2668 
2669 	/*
2670 	 * Fill in the various fields of the utmp structure.
2671 	 */
2672 	u->ut_id[0] = process->p_id[0];
2673 	u->ut_id[1] = process->p_id[1];
2674 	u->ut_id[2] = process->p_id[2];
2675 	u->ut_id[3] = process->p_id[3];
2676 	u->ut_pid = process->p_pid;
2677 
2678 	/*
2679 	 * Fill the "ut_exit" structure.
2680 	 */
2681 	u->ut_exit.e_termination = WTERMSIG(process->p_exit);
2682 	u->ut_exit.e_exit = WEXITSTATUS(process->p_exit);
2683 	u->ut_type = state;
2684 
2685 	(void) time(&u->ut_tv.tv_sec);
2686 
2687 	/*
2688 	 * Block signals for utmp update.
2689 	 */
2690 	(void) sigfillset(&block);
2691 	(void) sigprocmask(SIG_BLOCK, &block, &unblock);
2692 
2693 	/*
2694 	 * See if there already is such an entry in the "utmpx" file.
2695 	 */
2696 	setutxent();	/* Start at beginning of utmpx file. */
2697 
2698 	if ((oldu = getutxid(u)) != NULL) {
2699 		/*
2700 		 * Copy in the old "user", "line" and "host" fields
2701 		 * to our new structure.
2702 		 */
2703 		bcopy(oldu->ut_user, u->ut_user, sizeof (u->ut_user));
2704 		bcopy(oldu->ut_line, u->ut_line, sizeof (u->ut_line));
2705 		bcopy(oldu->ut_host, u->ut_host, sizeof (u->ut_host));
2706 		u->ut_syslen = (tmplen = strlen(u->ut_host)) ?
2707 		    min(tmplen + 1, sizeof (u->ut_host)) : 0;
2708 
2709 		if (oldu->ut_type == USER_PROCESS && state == DEAD_PROCESS) {
2710 			notify_pam_dead(oldu);
2711 		}
2712 	}
2713 
2714 	/*
2715 	 * Perform special accounting. Insert the special string into the
2716 	 * ut_line array. For INIT_PROCESSes put in the name of the
2717 	 * program in the "ut_user" field.
2718 	 */
2719 	switch (state) {
2720 	case INIT_PROCESS:
2721 		(void) strncpy(u->ut_user, program, sizeof (u->ut_user));
2722 		(void) strcpy(fail_buf, "INIT_PROCESS");
2723 		break;
2724 
2725 	default:
2726 		(void) strlcpy(fail_buf, u->ut_id, sizeof (u->ut_id) + 1);
2727 		break;
2728 	}
2729 
2730 	/*
2731 	 * Write out the updated entry to utmpx file.
2732 	 */
2733 	if (pututxline(u) == NULL) {
2734 		console(B_TRUE, "Failed write of utmpx entry: \"%s\": %s\n",
2735 		    fail_buf, strerror(errno));
2736 		endutxent();
2737 		(void) sigprocmask(SIG_SETMASK, &unblock, NULL);
2738 		return (-1);
2739 	}
2740 
2741 	/*
2742 	 * If we're able to write to utmpx, then attempt to add to the
2743 	 * end of the wtmpx file.
2744 	 */
2745 	updwtmpx(WTMPX, u);
2746 
2747 	endutxent();
2748 
2749 	(void) sigprocmask(SIG_SETMASK, &unblock, NULL);
2750 
2751 	return (0);
2752 }
2753 
2754 static void
2755 clearent(pid_t pid, short status)
2756 {
2757 	struct utmpx *up;
2758 	sigset_t block, unblock;
2759 
2760 	/*
2761 	 * Block signals for utmp update.
2762 	 */
2763 	(void) sigfillset(&block);
2764 	(void) sigprocmask(SIG_BLOCK, &block, &unblock);
2765 
2766 	/*
2767 	 * No error checking for now.
2768 	 */
2769 
2770 	setutxent();
2771 	while (up = getutxent()) {
2772 		if (up->ut_pid == pid) {
2773 			if (up->ut_type == DEAD_PROCESS) {
2774 				/*
2775 				 * Cleaned up elsewhere.
2776 				 */
2777 				continue;
2778 			}
2779 
2780 			notify_pam_dead(up);
2781 
2782 			up->ut_type = DEAD_PROCESS;
2783 			up->ut_exit.e_termination = WTERMSIG(status);
2784 			up->ut_exit.e_exit = WEXITSTATUS(status);
2785 			(void) time(&up->ut_tv.tv_sec);
2786 
2787 			(void) pututxline(up);
2788 			/*
2789 			 * Now attempt to add to the end of the
2790 			 * wtmp and wtmpx files.  Do not create
2791 			 * if they don't already exist.
2792 			 */
2793 			updwtmpx(WTMPX, up);
2794 
2795 			break;
2796 		}
2797 	}
2798 
2799 	endutxent();
2800 	(void) sigprocmask(SIG_SETMASK, &unblock, NULL);
2801 }
2802 
2803 /*
2804  * prog_name() searches for the word or unix path name and
2805  * returns a pointer to the last element of the pathname.
2806  */
2807 static char *
2808 prog_name(char *string)
2809 {
2810 	char	*ptr, *ptr2;
2811 	/* XXX - utmp - fix name length */
2812 	static char word[_POSIX_LOGIN_NAME_MAX];
2813 
2814 	/*
2815 	 * Search for the first word skipping leading spaces and tabs.
2816 	 */
2817 	while (*string == ' ' || *string == '\t')
2818 		string++;
2819 
2820 	/*
2821 	 * If the first non-space non-tab character is not one allowed in
2822 	 * a word, return a pointer to a null string, otherwise parse the
2823 	 * pathname.
2824 	 */
2825 	if (*string != '.' && *string != '/' && *string != '_' &&
2826 	    (*string < 'a' || *string > 'z') &&
2827 	    (*string < 'A' || * string > 'Z') &&
2828 	    (*string < '0' || *string > '9'))
2829 		return ("");
2830 
2831 	/*
2832 	 * Parse the pathname looking forward for '/', ' ', '\t', '\n' or
2833 	 * '\0'.  Each time a '/' is found, move "ptr" to one past the
2834 	 * '/', thus when a ' ', '\t', '\n', or '\0' is found, "ptr" will
2835 	 * point to the last element of the pathname.
2836 	 */
2837 	for (ptr = string; *string != ' ' && *string != '\t' &&
2838 	    *string != '\n' && *string != '\0'; string++) {
2839 		if (*string == '/')
2840 			ptr = string+1;
2841 	}
2842 
2843 	/*
2844 	 * Copy out up to the size of the "ut_user" array into "word",
2845 	 * null terminate it and return a pointer to it.
2846 	 */
2847 	/* XXX - utmp - fix name length */
2848 	for (ptr2 = &word[0]; ptr2 < &word[_POSIX_LOGIN_NAME_MAX - 1] &&
2849 	    ptr < string; /* CSTYLED */)
2850 		*ptr2++ = *ptr++;
2851 
2852 	*ptr2 = '\0';
2853 	return (&word[0]);
2854 }
2855 
2856 
2857 /*
2858  * realcon() returns a nonzero value if there is a character device
2859  * associated with SYSCON that has the same device number as CONSOLE.
2860  */
2861 static int
2862 realcon()
2863 {
2864 	struct stat sconbuf, conbuf;
2865 
2866 	if (stat(SYSCON, &sconbuf) != -1 &&
2867 	    stat(CONSOLE, &conbuf) != -1 &&
2868 	    S_ISCHR(sconbuf.st_mode) &&
2869 	    S_ISCHR(conbuf.st_mode) &&
2870 	    sconbuf.st_rdev == conbuf.st_rdev) {
2871 		return (1);
2872 	} else {
2873 		return (0);
2874 	}
2875 }
2876 
2877 
2878 /*
2879  * get_ioctl_syscon() retrieves the SYSCON settings from the IOCTLSYSCON file.
2880  * Returns true if the IOCTLSYSCON file needs to be written (with
2881  * write_ioctl_syscon() below)
2882  */
2883 static int
2884 get_ioctl_syscon()
2885 {
2886 	FILE	*fp;
2887 	unsigned int	iflags, oflags, cflags, lflags, ldisc, cc[18];
2888 	int		i, valid_format = 0;
2889 
2890 	/*
2891 	 * Read in the previous modes for SYSCON from IOCTLSYSCON.
2892 	 */
2893 	if ((fp = fopen(IOCTLSYSCON, "r")) == NULL) {
2894 		stored_syscon_termios = dflt_termios;
2895 		console(B_TRUE,
2896 		    "warning:%s does not exist, default settings assumed\n",
2897 		    IOCTLSYSCON);
2898 	} else {
2899 
2900 	    i = fscanf(fp,
2901 	    "%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x",
2902 		&iflags, &oflags, &cflags, &lflags,
2903 		&cc[0], &cc[1], &cc[2], &cc[3], &cc[4], &cc[5], &cc[6],
2904 		&cc[7], &cc[8], &cc[9], &cc[10], &cc[11], &cc[12], &cc[13],
2905 		&cc[14], &cc[15], &cc[16], &cc[17]);
2906 
2907 	    if (i == 22) {
2908 		stored_syscon_termios.c_iflag = iflags;
2909 		stored_syscon_termios.c_oflag = oflags;
2910 		stored_syscon_termios.c_cflag = cflags;
2911 		stored_syscon_termios.c_lflag = lflags;
2912 		for (i = 0; i < 18; i++)
2913 			stored_syscon_termios.c_cc[i] = (char)cc[i];
2914 		valid_format = 1;
2915 	    } else if (i == 13) {
2916 		rewind(fp);
2917 		i = fscanf(fp, "%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x",
2918 		    &iflags, &oflags, &cflags, &lflags, &ldisc, &cc[0], &cc[1],
2919 		    &cc[2], &cc[3], &cc[4], &cc[5], &cc[6], &cc[7]);
2920 
2921 		/*
2922 		 * If the file is formatted properly, use the values to
2923 		 * initialize the console terminal condition.
2924 		 */
2925 		stored_syscon_termios.c_iflag = (ushort_t)iflags;
2926 		stored_syscon_termios.c_oflag = (ushort_t)oflags;
2927 		stored_syscon_termios.c_cflag = (ushort_t)cflags;
2928 		stored_syscon_termios.c_lflag = (ushort_t)lflags;
2929 		for (i = 0; i < 8; i++)
2930 			stored_syscon_termios.c_cc[i] = (char)cc[i];
2931 		valid_format = 1;
2932 	    }
2933 	    (void) fclose(fp);
2934 
2935 	    /* If the file is badly formatted, use the default settings. */
2936 	    if (!valid_format)
2937 		stored_syscon_termios = dflt_termios;
2938 	}
2939 
2940 	/* If the file had a bad format, rewrite it later. */
2941 	return (!valid_format);
2942 }
2943 
2944 
2945 static void
2946 write_ioctl_syscon()
2947 {
2948 	FILE *fp;
2949 	int i;
2950 
2951 	(void) unlink(SYSCON);
2952 	(void) link(SYSTTY, SYSCON);
2953 	(void) umask(022);
2954 	fp = fopen(IOCTLSYSCON, "w");
2955 
2956 	(void) fprintf(fp, "%x:%x:%x:%x:0", stored_syscon_termios.c_iflag,
2957 	    stored_syscon_termios.c_oflag, stored_syscon_termios.c_cflag,
2958 	    stored_syscon_termios.c_lflag);
2959 	for (i = 0; i < 8; ++i)
2960 		(void) fprintf(fp, ":%x", stored_syscon_termios.c_cc[i]);
2961 	(void) putc('\n', fp);
2962 
2963 	(void) fflush(fp);
2964 	(void) fsync(fileno(fp));
2965 	(void) fclose(fp);
2966 	(void) umask(cmask);
2967 }
2968 
2969 
2970 /*
2971  * void console(boolean_t, char *, ...)
2972  *   Outputs the requested message to the system console.  Note that the number
2973  *   of arguments passed to console() should be determined by the print format.
2974  *
2975  *   The "prefix" parameter indicates whether or not "INIT: " should precede the
2976  *   message.
2977  *
2978  *   To make sure we write to the console in a sane fashion, we use the modes
2979  *   we keep in stored_syscon_termios (which we read out of /etc/ioctl.syscon).
2980  *   Afterwards we restore whatever modes were already there.
2981  */
2982 /* PRINTFLIKE2 */
2983 static void
2984 console(boolean_t prefix, char *format, ...)
2985 {
2986 	char	outbuf[BUFSIZ];
2987 	va_list	args;
2988 	int fd, getret;
2989 	struct termios old_syscon_termios;
2990 	FILE *f;
2991 
2992 	/*
2993 	 * We open SYSCON anew each time in case it has changed (see
2994 	 * userinit()).
2995 	 */
2996 	if ((fd = open(SYSCON, O_RDWR | O_NOCTTY)) < 0 ||
2997 	    (f = fdopen(fd, "r+")) == NULL) {
2998 		if (prefix)
2999 			syslog(LOG_WARNING, "INIT: ");
3000 		va_start(args, format);
3001 		vsyslog(LOG_WARNING, format, args);
3002 		va_end(args);
3003 		if (fd >= 0)
3004 			(void) close(fd);
3005 		return;
3006 	}
3007 	setbuf(f, &outbuf[0]);
3008 
3009 	getret = tcgetattr(fd, &old_syscon_termios);
3010 	old_syscon_termios.c_cflag &= ~HUPCL;
3011 	if (realcon())
3012 		/* Don't overwrite cflag of real console. */
3013 		stored_syscon_termios.c_cflag = old_syscon_termios.c_cflag;
3014 
3015 	stored_syscon_termios.c_cflag &= ~HUPCL;
3016 
3017 	(void) tcsetattr(fd, TCSANOW, &stored_syscon_termios);
3018 
3019 	if (prefix)
3020 		(void) fprintf(f, "\nINIT: ");
3021 	va_start(args, format);
3022 	(void) vfprintf(f, format, args);
3023 	va_end(args);
3024 
3025 	if (getret == 0)
3026 		(void) tcsetattr(fd, TCSADRAIN, &old_syscon_termios);
3027 
3028 	(void) fclose(f);
3029 }
3030 
3031 /*
3032  * timer() is a substitute for sleep() which uses alarm() and pause().
3033  */
3034 static void
3035 timer(int waitime)
3036 {
3037 	setimer(waitime);
3038 	while (time_up == FALSE)
3039 		(void) pause();
3040 }
3041 
3042 static void
3043 setimer(int timelimit)
3044 {
3045 	alarmclk();
3046 	(void) alarm(timelimit);
3047 	time_up = (timelimit ? FALSE : TRUE);
3048 }
3049 
3050 /*
3051  * Fails with
3052  *   ENOMEM - out of memory
3053  *   ECONNABORTED - repository connection broken
3054  *   EPERM - permission denied
3055  *   EACCES - backend access denied
3056  *   EROFS - backend readonly
3057  */
3058 static int
3059 get_or_add_startd(scf_instance_t *inst)
3060 {
3061 	scf_handle_t *h;
3062 	scf_scope_t *scope = NULL;
3063 	scf_service_t *svc = NULL;
3064 	int ret = 0;
3065 
3066 	h = scf_instance_handle(inst);
3067 
3068 	if (scf_handle_decode_fmri(h, SCF_SERVICE_STARTD, NULL, NULL, inst,
3069 	    NULL, NULL, SCF_DECODE_FMRI_EXACT) == 0)
3070 		return (0);
3071 
3072 	switch (scf_error()) {
3073 	case SCF_ERROR_CONNECTION_BROKEN:
3074 		return (ECONNABORTED);
3075 
3076 	case SCF_ERROR_NOT_FOUND:
3077 		break;
3078 
3079 	case SCF_ERROR_HANDLE_MISMATCH:
3080 	case SCF_ERROR_INVALID_ARGUMENT:
3081 	case SCF_ERROR_CONSTRAINT_VIOLATED:
3082 	default:
3083 		bad_error("scf_handle_decode_fmri", scf_error());
3084 	}
3085 
3086 	/* Make sure we're right, since we're adding piece-by-piece. */
3087 	assert(strcmp(SCF_SERVICE_STARTD,
3088 	    "svc:/system/svc/restarter:default") == 0);
3089 
3090 	if ((scope = scf_scope_create(h)) == NULL ||
3091 	    (svc = scf_service_create(h)) == NULL) {
3092 		ret = ENOMEM;
3093 		goto out;
3094 	}
3095 
3096 get_scope:
3097 	if (scf_handle_get_scope(h, SCF_SCOPE_LOCAL, scope) != 0) {
3098 		switch (scf_error()) {
3099 		case SCF_ERROR_CONNECTION_BROKEN:
3100 			ret = ECONNABORTED;
3101 			goto out;
3102 
3103 		case SCF_ERROR_NOT_FOUND:
3104 			(void) fputs(gettext(
3105 			    "smf(5) repository missing local scope.\n"),
3106 			    stderr);
3107 			exit(1);
3108 			/* NOTREACHED */
3109 
3110 		case SCF_ERROR_HANDLE_MISMATCH:
3111 		case SCF_ERROR_INVALID_ARGUMENT:
3112 		default:
3113 			bad_error("scf_handle_get_scope", scf_error());
3114 		}
3115 	}
3116 
3117 get_svc:
3118 	if (scf_scope_get_service(scope, "system/svc/restarter", svc) != 0) {
3119 		switch (scf_error()) {
3120 		case SCF_ERROR_CONNECTION_BROKEN:
3121 			ret = ECONNABORTED;
3122 			goto out;
3123 
3124 		case SCF_ERROR_DELETED:
3125 			goto get_scope;
3126 
3127 		case SCF_ERROR_NOT_FOUND:
3128 			break;
3129 
3130 		case SCF_ERROR_HANDLE_MISMATCH:
3131 		case SCF_ERROR_INVALID_ARGUMENT:
3132 		case SCF_ERROR_NOT_SET:
3133 		default:
3134 			bad_error("scf_scope_get_service", scf_error());
3135 		}
3136 
3137 add_svc:
3138 		if (scf_scope_add_service(scope, "system/svc/restarter", svc) !=
3139 		    0) {
3140 			switch (scf_error()) {
3141 			case SCF_ERROR_CONNECTION_BROKEN:
3142 				ret = ECONNABORTED;
3143 				goto out;
3144 
3145 			case SCF_ERROR_EXISTS:
3146 				goto get_svc;
3147 
3148 			case SCF_ERROR_PERMISSION_DENIED:
3149 				ret = EPERM;
3150 				goto out;
3151 
3152 			case SCF_ERROR_BACKEND_ACCESS:
3153 				ret = EACCES;
3154 				goto out;
3155 
3156 			case SCF_ERROR_BACKEND_READONLY:
3157 				ret = EROFS;
3158 				goto out;
3159 
3160 			case SCF_ERROR_HANDLE_MISMATCH:
3161 			case SCF_ERROR_INVALID_ARGUMENT:
3162 			case SCF_ERROR_NOT_SET:
3163 			default:
3164 				bad_error("scf_scope_add_service", scf_error());
3165 			}
3166 		}
3167 	}
3168 
3169 get_inst:
3170 	if (scf_service_get_instance(svc, "default", inst) != 0) {
3171 		switch (scf_error()) {
3172 		case SCF_ERROR_CONNECTION_BROKEN:
3173 			ret = ECONNABORTED;
3174 			goto out;
3175 
3176 		case SCF_ERROR_DELETED:
3177 			goto add_svc;
3178 
3179 		case SCF_ERROR_NOT_FOUND:
3180 			break;
3181 
3182 		case SCF_ERROR_HANDLE_MISMATCH:
3183 		case SCF_ERROR_INVALID_ARGUMENT:
3184 		case SCF_ERROR_NOT_SET:
3185 		default:
3186 			bad_error("scf_service_get_instance", scf_error());
3187 		}
3188 
3189 		if (scf_service_add_instance(svc, "default", inst) !=
3190 		    0) {
3191 			switch (scf_error()) {
3192 			case SCF_ERROR_CONNECTION_BROKEN:
3193 				ret = ECONNABORTED;
3194 				goto out;
3195 
3196 			case SCF_ERROR_DELETED:
3197 				goto add_svc;
3198 
3199 			case SCF_ERROR_EXISTS:
3200 				goto get_inst;
3201 
3202 			case SCF_ERROR_PERMISSION_DENIED:
3203 				ret = EPERM;
3204 				goto out;
3205 
3206 			case SCF_ERROR_BACKEND_ACCESS:
3207 				ret = EACCES;
3208 				goto out;
3209 
3210 			case SCF_ERROR_BACKEND_READONLY:
3211 				ret = EROFS;
3212 				goto out;
3213 
3214 			case SCF_ERROR_HANDLE_MISMATCH:
3215 			case SCF_ERROR_INVALID_ARGUMENT:
3216 			case SCF_ERROR_NOT_SET:
3217 			default:
3218 				bad_error("scf_service_add_instance",
3219 				    scf_error());
3220 			}
3221 		}
3222 	}
3223 
3224 	ret = 0;
3225 
3226 out:
3227 	scf_service_destroy(svc);
3228 	scf_scope_destroy(scope);
3229 	return (ret);
3230 }
3231 
3232 /*
3233  * Fails with
3234  *   ECONNABORTED - repository connection broken
3235  *   ECANCELED - the transaction's property group was deleted
3236  */
3237 static int
3238 transaction_add_set(scf_transaction_t *tx, scf_transaction_entry_t *ent,
3239     const char *pname, scf_type_t type)
3240 {
3241 change_type:
3242 	if (scf_transaction_property_change_type(tx, ent, pname, type) == 0)
3243 		return (0);
3244 
3245 	switch (scf_error()) {
3246 	case SCF_ERROR_CONNECTION_BROKEN:
3247 		return (ECONNABORTED);
3248 
3249 	case SCF_ERROR_DELETED:
3250 		return (ECANCELED);
3251 
3252 	case SCF_ERROR_NOT_FOUND:
3253 		goto new;
3254 
3255 	case SCF_ERROR_HANDLE_MISMATCH:
3256 	case SCF_ERROR_INVALID_ARGUMENT:
3257 	case SCF_ERROR_NOT_BOUND:
3258 	case SCF_ERROR_NOT_SET:
3259 	default:
3260 		bad_error("scf_transaction_property_change_type", scf_error());
3261 	}
3262 
3263 new:
3264 	if (scf_transaction_property_new(tx, ent, pname, type) == 0)
3265 		return (0);
3266 
3267 	switch (scf_error()) {
3268 	case SCF_ERROR_CONNECTION_BROKEN:
3269 		return (ECONNABORTED);
3270 
3271 	case SCF_ERROR_DELETED:
3272 		return (ECANCELED);
3273 
3274 	case SCF_ERROR_EXISTS:
3275 		goto change_type;
3276 
3277 	case SCF_ERROR_HANDLE_MISMATCH:
3278 	case SCF_ERROR_INVALID_ARGUMENT:
3279 	case SCF_ERROR_NOT_BOUND:
3280 	case SCF_ERROR_NOT_SET:
3281 	default:
3282 		bad_error("scf_transaction_property_new", scf_error());
3283 		/* NOTREACHED */
3284 	}
3285 }
3286 
3287 static void
3288 scferr(void)
3289 {
3290 	switch (scf_error()) {
3291 	case SCF_ERROR_NO_MEMORY:
3292 		console(B_TRUE, gettext("Out of memory.\n"));
3293 		break;
3294 
3295 	case SCF_ERROR_CONNECTION_BROKEN:
3296 		console(B_TRUE, gettext(
3297 		    "Connection to smf(5) repository server broken.\n"));
3298 		break;
3299 
3300 	case SCF_ERROR_NO_RESOURCES:
3301 		console(B_TRUE, gettext(
3302 		    "smf(5) repository server is out of memory.\n"));
3303 		break;
3304 
3305 	case SCF_ERROR_PERMISSION_DENIED:
3306 		console(B_TRUE, gettext("Insufficient privileges.\n"));
3307 		break;
3308 
3309 	default:
3310 		console(B_TRUE, gettext("libscf error: %s\n"),
3311 		    scf_strerror(scf_error()));
3312 	}
3313 }
3314 
3315 static void
3316 lscf_set_runlevel(char rl)
3317 {
3318 	scf_handle_t *h;
3319 	scf_instance_t *inst = NULL;
3320 	scf_propertygroup_t *pg = NULL;
3321 	scf_transaction_t *tx = NULL;
3322 	scf_transaction_entry_t *ent = NULL;
3323 	scf_value_t *val = NULL;
3324 	char buf[2];
3325 	int r;
3326 
3327 	h = scf_handle_create(SCF_VERSION);
3328 	if (h == NULL) {
3329 		scferr();
3330 		return;
3331 	}
3332 
3333 	if (scf_handle_bind(h) != 0) {
3334 		switch (scf_error()) {
3335 		case SCF_ERROR_NO_SERVER:
3336 			console(B_TRUE,
3337 			    gettext("smf(5) repository server not running.\n"));
3338 			goto bail;
3339 
3340 		default:
3341 			scferr();
3342 			goto bail;
3343 		}
3344 	}
3345 
3346 	if ((inst = scf_instance_create(h)) == NULL ||
3347 	    (pg = scf_pg_create(h)) == NULL ||
3348 	    (val = scf_value_create(h)) == NULL ||
3349 	    (tx = scf_transaction_create(h)) == NULL ||
3350 	    (ent = scf_entry_create(h)) == NULL) {
3351 		scferr();
3352 		goto bail;
3353 	}
3354 
3355 get_inst:
3356 	r = get_or_add_startd(inst);
3357 	switch (r) {
3358 	case 0:
3359 		break;
3360 
3361 	case ENOMEM:
3362 	case ECONNABORTED:
3363 	case EPERM:
3364 	case EACCES:
3365 	case EROFS:
3366 		scferr();
3367 		goto bail;
3368 	default:
3369 		bad_error("get_or_add_startd", r);
3370 	}
3371 
3372 get_pg:
3373 	if (scf_instance_get_pg(inst, SCF_PG_OPTIONS_OVR, pg) != 0) {
3374 		switch (scf_error()) {
3375 		case SCF_ERROR_CONNECTION_BROKEN:
3376 			scferr();
3377 			goto bail;
3378 
3379 		case SCF_ERROR_DELETED:
3380 			goto get_inst;
3381 
3382 		case SCF_ERROR_NOT_FOUND:
3383 			break;
3384 
3385 		case SCF_ERROR_HANDLE_MISMATCH:
3386 		case SCF_ERROR_INVALID_ARGUMENT:
3387 		case SCF_ERROR_NOT_SET:
3388 		default:
3389 			bad_error("scf_instance_get_pg", scf_error());
3390 		}
3391 
3392 add_pg:
3393 		if (scf_instance_add_pg(inst, SCF_PG_OPTIONS_OVR,
3394 		    SCF_PG_OPTIONS_OVR_TYPE, SCF_PG_OPTIONS_OVR_FLAGS, pg) !=
3395 		    0) {
3396 			switch (scf_error()) {
3397 			case SCF_ERROR_CONNECTION_BROKEN:
3398 			case SCF_ERROR_PERMISSION_DENIED:
3399 			case SCF_ERROR_BACKEND_ACCESS:
3400 				scferr();
3401 				goto bail;
3402 
3403 			case SCF_ERROR_DELETED:
3404 				goto get_inst;
3405 
3406 			case SCF_ERROR_EXISTS:
3407 				goto get_pg;
3408 
3409 			case SCF_ERROR_HANDLE_MISMATCH:
3410 			case SCF_ERROR_INVALID_ARGUMENT:
3411 			case SCF_ERROR_NOT_SET:
3412 			default:
3413 				bad_error("scf_instance_add_pg", scf_error());
3414 			}
3415 		}
3416 	}
3417 
3418 	buf[0] = rl;
3419 	buf[1] = '\0';
3420 	r = scf_value_set_astring(val, buf);
3421 	assert(r == 0);
3422 
3423 	for (;;) {
3424 		if (scf_transaction_start(tx, pg) != 0) {
3425 			switch (scf_error()) {
3426 			case SCF_ERROR_CONNECTION_BROKEN:
3427 			case SCF_ERROR_PERMISSION_DENIED:
3428 			case SCF_ERROR_BACKEND_ACCESS:
3429 				scferr();
3430 				goto bail;
3431 
3432 			case SCF_ERROR_DELETED:
3433 				goto add_pg;
3434 
3435 			case SCF_ERROR_HANDLE_MISMATCH:
3436 			case SCF_ERROR_NOT_BOUND:
3437 			case SCF_ERROR_IN_USE:
3438 			case SCF_ERROR_NOT_SET:
3439 			default:
3440 				bad_error("scf_transaction_start", scf_error());
3441 			}
3442 		}
3443 
3444 		r = transaction_add_set(tx, ent, "runlevel", SCF_TYPE_ASTRING);
3445 		switch (r) {
3446 		case 0:
3447 			break;
3448 
3449 		case ECONNABORTED:
3450 			scferr();
3451 			goto bail;
3452 
3453 		case ECANCELED:
3454 			scf_transaction_reset(tx);
3455 			goto add_pg;
3456 
3457 		default:
3458 			bad_error("transaction_add_set", r);
3459 		}
3460 
3461 		r = scf_entry_add_value(ent, val);
3462 		assert(r == 0);
3463 
3464 		r = scf_transaction_commit(tx);
3465 		if (r == 1)
3466 			break;
3467 
3468 		if (r != 0) {
3469 			switch (scf_error()) {
3470 			case SCF_ERROR_CONNECTION_BROKEN:
3471 			case SCF_ERROR_PERMISSION_DENIED:
3472 			case SCF_ERROR_BACKEND_ACCESS:
3473 			case SCF_ERROR_BACKEND_READONLY:
3474 				scferr();
3475 				goto bail;
3476 
3477 			case SCF_ERROR_DELETED:
3478 				scf_transaction_reset(tx);
3479 				goto add_pg;
3480 
3481 			case SCF_ERROR_INVALID_ARGUMENT:
3482 			case SCF_ERROR_NOT_BOUND:
3483 			case SCF_ERROR_NOT_SET:
3484 			default:
3485 				bad_error("scf_transaction_commit",
3486 				    scf_error());
3487 			}
3488 		}
3489 
3490 		scf_transaction_reset(tx);
3491 		(void) scf_pg_update(pg);
3492 	}
3493 
3494 bail:
3495 	scf_transaction_destroy(tx);
3496 	scf_entry_destroy(ent);
3497 	scf_value_destroy(val);
3498 	scf_pg_destroy(pg);
3499 	scf_instance_destroy(inst);
3500 
3501 	(void) scf_handle_unbind(h);
3502 	scf_handle_destroy(h);
3503 }
3504 
3505 /*
3506  * Function to handle requests from users to main init running as process 1.
3507  */
3508 static void
3509 userinit(int argc, char **argv)
3510 {
3511 	FILE	*fp;
3512 	char	*ln;
3513 	int	init_signal;
3514 	struct stat	sconbuf, conbuf;
3515 	const char *usage_msg = "Usage: init [0123456SsQqabc]\n";
3516 
3517 	/*
3518 	 * We are a user invoked init.  Is there an argument and is it
3519 	 * a single character?  If not, print usage message and quit.
3520 	 */
3521 	if (argc != 2 || argv[1][1] != '\0') {
3522 		(void) fprintf(stderr, usage_msg);
3523 		exit(0);
3524 	}
3525 
3526 	if ((init_signal = lvlname_to_state((char)argv[1][0])) == -1) {
3527 		(void) fprintf(stderr, usage_msg);
3528 		(void) audit_put_record(ADT_FAILURE, ADT_FAIL_VALUE_BAD_CMD,
3529 		    argv[1]);
3530 		exit(1);
3531 	}
3532 
3533 	if (init_signal == SINGLE_USER) {
3534 		/*
3535 		 * Make sure this process is talking to a legal tty line
3536 		 * and that /dev/syscon is linked to this line.
3537 		 */
3538 		ln = ttyname(0);	/* Get the name of tty */
3539 		if (ln == NULL) {
3540 			(void) fprintf(stderr,
3541 			    "Standard input not a tty line\n");
3542 			(void) audit_put_record(ADT_FAILURE,
3543 			    ADT_FAIL_VALUE_BAD_TTY, argv[1]);
3544 			exit(1);
3545 		}
3546 
3547 		if ((stat(ln, &sconbuf) != -1) &&
3548 		    (stat(SYSCON, &conbuf) == -1 ||
3549 		    sconbuf.st_rdev != conbuf.st_rdev)) {
3550 			/*
3551 			 * /dev/syscon needs to change.
3552 			 * Unlink /dev/syscon and relink it to the current line.
3553 			 */
3554 			if (lstat(SYSCON, &conbuf) != -1 &&
3555 			    unlink(SYSCON) == FAILURE) {
3556 				perror("Can't unlink /dev/syscon");
3557 				(void) fprintf(stderr,
3558 				    "Run command on the system console.\n");
3559 				(void) audit_put_record(ADT_FAILURE,
3560 				    ADT_FAIL_VALUE_PROGRAM, argv[1]);
3561 				exit(1);
3562 			}
3563 			if (symlink(ln, SYSCON) == FAILURE) {
3564 				(void) fprintf(stderr,
3565 				    "Can't symlink /dev/syscon to %s: %s", ln,
3566 				    strerror(errno));
3567 
3568 				/* Try to leave a syscon */
3569 				(void) link(SYSTTY, SYSCON);
3570 				(void) audit_put_record(ADT_FAILURE,
3571 				    ADT_FAIL_VALUE_PROGRAM, argv[1]);
3572 				exit(1);
3573 			}
3574 
3575 			/*
3576 			 * Try to leave a message on system console saying where
3577 			 * /dev/syscon is currently connected.
3578 			 */
3579 			if ((fp = fopen(SYSTTY, "r+")) != NULL) {
3580 				(void) fprintf(fp,
3581 				    "\n****	SYSCON CHANGED TO %s	****\n",
3582 				    ln);
3583 				(void) fclose(fp);
3584 			}
3585 		}
3586 	}
3587 
3588 	update_boot_archive(init_signal);
3589 
3590 	(void) audit_put_record(ADT_SUCCESS, ADT_SUCCESS, argv[1]);
3591 
3592 	/*
3593 	 * Signal init; init will take care of telling svc.startd.
3594 	 */
3595 	if (kill(init_pid, init_signal) == FAILURE) {
3596 		(void) fprintf(stderr, "Must be super-user\n");
3597 		(void) audit_put_record(ADT_FAILURE,
3598 		    ADT_FAIL_VALUE_AUTH, argv[1]);
3599 		exit(1);
3600 	}
3601 
3602 	exit(0);
3603 }
3604 
3605 
3606 #define	DELTA	25	/* Number of pidlist elements to allocate at a time */
3607 
3608 /* ARGSUSED */
3609 void
3610 sigpoll(int n)
3611 {
3612 	struct pidrec prec;
3613 	struct pidrec *p = &prec;
3614 	struct pidlist *plp;
3615 	struct pidlist *tp, *savetp;
3616 	int i;
3617 
3618 	if (Pfd < 0) {
3619 		return;
3620 	}
3621 
3622 	for (;;) {
3623 		/*
3624 		 * Important Note: Either read will really fail (in which case
3625 		 * return is all we can do) or will get EAGAIN (Pfd was opened
3626 		 * O_NDELAY), in which case we also want to return.
3627 		 * Always return from here!
3628 		 */
3629 		if (read(Pfd, p, sizeof (struct pidrec)) !=
3630 						sizeof (struct pidrec)) {
3631 			return;
3632 		}
3633 		switch (p->pd_type) {
3634 
3635 		case ADDPID:
3636 			/*
3637 			 * New "godchild", add to list.
3638 			 */
3639 			if (Plfree == NULL) {
3640 				plp = (struct pidlist *)calloc(DELTA,
3641 				    sizeof (struct pidlist));
3642 				if (plp == NULL) {
3643 					/* Can't save pid */
3644 					break;
3645 				}
3646 				/*
3647 				 * Point at 2nd record allocated, we'll use plp.
3648 				 */
3649 				tp = plp + 1;
3650 				/*
3651 				 * Link them into a chain.
3652 				 */
3653 				Plfree = tp;
3654 				for (i = 0; i < DELTA - 2; i++) {
3655 					tp->pl_next = tp + 1;
3656 					tp++;
3657 				}
3658 			} else {
3659 				plp = Plfree;
3660 				Plfree = plp->pl_next;
3661 			}
3662 			plp->pl_pid = p->pd_pid;
3663 			plp->pl_dflag = 0;
3664 			plp->pl_next = NULL;
3665 			/*
3666 			 * Note - pid list is kept in increasing order of pids.
3667 			 */
3668 			if (Plhead == NULL) {
3669 				Plhead = plp;
3670 				/* Back up to read next record */
3671 				break;
3672 			} else {
3673 				savetp = tp = Plhead;
3674 				while (tp) {
3675 					if (plp->pl_pid > tp->pl_pid) {
3676 						savetp = tp;
3677 						tp = tp->pl_next;
3678 						continue;
3679 					} else if (plp->pl_pid < tp->pl_pid) {
3680 						if (tp == Plhead) {
3681 							plp->pl_next = Plhead;
3682 							Plhead = plp;
3683 						} else {
3684 							plp->pl_next =
3685 							    savetp->pl_next;
3686 							savetp->pl_next = plp;
3687 						}
3688 						break;
3689 					} else {
3690 						/* Already in list! */
3691 						plp->pl_next = Plfree;
3692 						Plfree = plp;
3693 						break;
3694 					}
3695 				}
3696 				if (tp == NULL) {
3697 					/* Add to end of list */
3698 					savetp->pl_next = plp;
3699 				}
3700 			}
3701 			/* Back up to read next record. */
3702 			break;
3703 
3704 		case REMPID:
3705 			/*
3706 			 * This one was handled by someone else,
3707 			 * purge it from the list.
3708 			 */
3709 			if (Plhead == NULL) {
3710 				/* Back up to read next record. */
3711 				break;
3712 			}
3713 			savetp = tp = Plhead;
3714 			while (tp) {
3715 				if (p->pd_pid > tp->pl_pid) {
3716 					/* Keep on looking. */
3717 					savetp = tp;
3718 					tp = tp->pl_next;
3719 					continue;
3720 				} else if (p->pd_pid < tp->pl_pid) {
3721 					/* Not in list. */
3722 					break;
3723 				} else {
3724 					/* Found it. */
3725 					if (tp == Plhead)
3726 						Plhead = tp->pl_next;
3727 					else
3728 						savetp->pl_next = tp->pl_next;
3729 					tp->pl_next = Plfree;
3730 					Plfree = tp;
3731 					break;
3732 				}
3733 			}
3734 			/* Back up to read next record. */
3735 			break;
3736 		default:
3737 			console(B_TRUE, "Bad message on initpipe\n");
3738 			break;
3739 		}
3740 	}
3741 }
3742 
3743 
3744 static void
3745 cleanaux()
3746 {
3747 	struct pidlist *savep, *p;
3748 	pid_t	pid;
3749 	short	status;
3750 
3751 	(void) sighold(SIGCLD);
3752 	Gchild = 0;	/* Note - Safe to do this here since no SIGCLDs */
3753 	(void) sighold(SIGPOLL);
3754 	savep = p = Plhead;
3755 	while (p) {
3756 		if (p->pl_dflag) {
3757 			/*
3758 			 * Found an entry to delete,
3759 			 * remove it from list first.
3760 			 */
3761 			pid = p->pl_pid;
3762 			status = p->pl_exit;
3763 			if (p == Plhead) {
3764 				Plhead = p->pl_next;
3765 				p->pl_next = Plfree;
3766 				Plfree = p;
3767 				savep = p = Plhead;
3768 			} else {
3769 				savep->pl_next = p->pl_next;
3770 				p->pl_next = Plfree;
3771 				Plfree = p;
3772 				p = savep->pl_next;
3773 			}
3774 			clearent(pid, status);
3775 			continue;
3776 		}
3777 		savep = p;
3778 		p = p->pl_next;
3779 	}
3780 	(void) sigrelse(SIGPOLL);
3781 	(void) sigrelse(SIGCLD);
3782 }
3783 
3784 
3785 /*
3786  * /etc/inittab has more entries and we have run out of room in the proc_table
3787  * array. Double the size of proc_table to accomodate the extra entries.
3788  */
3789 static void
3790 increase_proc_table_size()
3791 {
3792 	sigset_t block, unblock;
3793 	void *ptr;
3794 	size_t delta = num_proc * sizeof (struct PROC_TABLE);
3795 
3796 
3797 	/*
3798 	 * Block signals for realloc.
3799 	 */
3800 	(void) sigfillset(&block);
3801 	(void) sigprocmask(SIG_BLOCK, &block, &unblock);
3802 
3803 
3804 	/*
3805 	 * On failure we just return because callers of this function check
3806 	 * for failure.
3807 	 */
3808 	do
3809 		ptr = realloc(g_state, g_state_sz + delta);
3810 	while (ptr == NULL && errno == EAGAIN);
3811 
3812 	if (ptr != NULL) {
3813 		/* ensure that the new part is initialized to zero */
3814 		bzero((caddr_t)ptr + g_state_sz, delta);
3815 
3816 		g_state = ptr;
3817 		g_state_sz += delta;
3818 		num_proc <<= 1;
3819 	}
3820 
3821 
3822 	/* unblock our signals before returning */
3823 	(void) sigprocmask(SIG_SETMASK, &unblock, NULL);
3824 }
3825 
3826 
3827 
3828 /*
3829  * Sanity check g_state.
3830  */
3831 static int
3832 st_sane()
3833 {
3834 	int i;
3835 	struct PROC_TABLE *ptp;
3836 
3837 
3838 	/* Note: cur_state is encoded as a signal number */
3839 	if (cur_state < 1 || cur_state == 9 || cur_state > 13)
3840 		return (0);
3841 
3842 	/* Check num_proc */
3843 	if (g_state_sz != sizeof (struct init_state) + (num_proc - 1) *
3844 	    sizeof (struct PROC_TABLE))
3845 		return (0);
3846 
3847 	/* Check proc_table */
3848 	for (i = 0, ptp = proc_table; i < num_proc; ++i, ++ptp) {
3849 		/* skip unoccupied entries */
3850 		if (!(ptp->p_flags & OCCUPIED))
3851 			continue;
3852 
3853 		/* p_flags has no bits outside of PF_MASK */
3854 		if (ptp->p_flags & ~(PF_MASK))
3855 			return (0);
3856 
3857 		/* 5 <= pid <= MAXPID */
3858 		if (ptp->p_pid < 5 || ptp->p_pid > MAXPID)
3859 			return (0);
3860 
3861 		/* p_count >= 0 */
3862 		if (ptp->p_count < 0)
3863 			return (0);
3864 
3865 		/* p_time >= 0 */
3866 		if (ptp->p_time < 0)
3867 			return (0);
3868 	}
3869 
3870 	return (1);
3871 }
3872 
3873 /*
3874  * Initialize our state.
3875  *
3876  * If the system just booted, then init_state_file, which is located on an
3877  * everpresent tmpfs filesystem, should not exist.
3878  *
3879  * If we were restarted, then init_state_file should exist, in
3880  * which case we'll read it in, sanity check it, and use it.
3881  *
3882  * Note: You can't call console() until proc_table is ready.
3883  */
3884 void
3885 st_init()
3886 {
3887 	struct stat stb;
3888 	int ret, st_fd, insane = 0;
3889 	size_t to_be_read;
3890 	char *ptr;
3891 
3892 
3893 	booting = 1;
3894 
3895 	do {
3896 		/*
3897 		 * If we can exclusively create the file, then we're the
3898 		 * initial invocation of init(1M).
3899 		 */
3900 		st_fd = open(init_state_file, O_RDWR | O_CREAT | O_EXCL,
3901 		    S_IRUSR | S_IWUSR);
3902 	} while (st_fd == -1 && errno == EINTR);
3903 	if (st_fd != -1)
3904 		goto new_state;
3905 
3906 	booting = 0;
3907 
3908 	do {
3909 		st_fd = open(init_state_file, O_RDWR, S_IRUSR | S_IWUSR);
3910 	} while (st_fd == -1 && errno == EINTR);
3911 	if (st_fd == -1)
3912 		goto new_state;
3913 
3914 	/* Get the size of the file. */
3915 	do
3916 		ret = fstat(st_fd, &stb);
3917 	while (ret == -1 && errno == EINTR);
3918 	if (ret == -1)
3919 		goto new_state;
3920 
3921 	do
3922 		g_state = malloc(stb.st_size);
3923 	while (g_state == NULL && errno == EAGAIN);
3924 	if (g_state == NULL)
3925 		goto new_state;
3926 
3927 	to_be_read = stb.st_size;
3928 	ptr = (char *)g_state;
3929 	while (to_be_read > 0) {
3930 		ssize_t read_ret;
3931 
3932 		read_ret = read(st_fd, ptr, to_be_read);
3933 		if (read_ret < 0) {
3934 			if (errno == EINTR)
3935 				continue;
3936 
3937 			goto new_state;
3938 		}
3939 
3940 		to_be_read -= read_ret;
3941 		ptr += read_ret;
3942 	}
3943 
3944 	(void) close(st_fd);
3945 
3946 	g_state_sz = stb.st_size;
3947 
3948 	if (st_sane()) {
3949 		console(B_TRUE, "Restarting.\n");
3950 		return;
3951 	}
3952 
3953 	insane = 1;
3954 
3955 new_state:
3956 	if (st_fd >= 0)
3957 		(void) close(st_fd);
3958 	else
3959 		(void) unlink(init_state_file);
3960 
3961 	if (g_state != NULL)
3962 		free(g_state);
3963 
3964 	/* Something went wrong, so allocate new state. */
3965 	g_state_sz = sizeof (struct init_state) +
3966 	    ((init_num_proc - 1) * sizeof (struct PROC_TABLE));
3967 	do
3968 		g_state = calloc(1, g_state_sz);
3969 	while (g_state == NULL && errno == EAGAIN);
3970 	if (g_state == NULL) {
3971 		/* Fatal error! */
3972 		exit(errno);
3973 	}
3974 
3975 	g_state->ist_runlevel = -1;
3976 	num_proc = init_num_proc;
3977 
3978 	if (!booting) {
3979 		console(B_TRUE, "Restarting.\n");
3980 
3981 		/* Overwrite the bad state file. */
3982 		st_write();
3983 
3984 		if (!insane) {
3985 			console(B_TRUE,
3986 			    "Error accessing persistent state file `%s'.  "
3987 			    "Ignored.\n", init_state_file);
3988 		} else {
3989 			console(B_TRUE,
3990 			    "Persistent state file `%s' is invalid and was "
3991 			    "ignored.\n", init_state_file);
3992 		}
3993 	}
3994 }
3995 
3996 /*
3997  * Write g_state out to the state file.
3998  */
3999 void
4000 st_write()
4001 {
4002 	static int complained = 0;
4003 
4004 	int st_fd;
4005 	char *cp;
4006 	size_t sz;
4007 	ssize_t ret;
4008 
4009 
4010 	do {
4011 		st_fd = open(init_next_state_file,
4012 		    O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR);
4013 	} while (st_fd < 0 && errno == EINTR);
4014 	if (st_fd < 0)
4015 		goto err;
4016 
4017 	cp = (char *)g_state;
4018 	sz = g_state_sz;
4019 	while (sz > 0) {
4020 		ret = write(st_fd, cp, sz);
4021 		if (ret < 0) {
4022 			if (errno == EINTR)
4023 				continue;
4024 
4025 			goto err;
4026 		}
4027 
4028 		sz -= ret;
4029 		cp += ret;
4030 	}
4031 
4032 	(void) close(st_fd);
4033 	st_fd = -1;
4034 	if (rename(init_next_state_file, init_state_file)) {
4035 		(void) unlink(init_next_state_file);
4036 		goto err;
4037 	}
4038 	complained = 0;
4039 
4040 	return;
4041 
4042 err:
4043 	if (st_fd >= 0)
4044 		(void) close(st_fd);
4045 
4046 	if (!booting && !complained) {
4047 		/*
4048 		 * Only complain after the filesystem should have come up.
4049 		 * And only do it once so we don't loop between console()
4050 		 * & efork().
4051 		 */
4052 		complained = 1;
4053 		if (st_fd)
4054 			console(B_TRUE, "Couldn't write persistent state "
4055 			    "file `%s'.\n", init_state_file);
4056 		else
4057 			console(B_TRUE, "Couldn't move persistent state "
4058 			    "file `%s' to `%s'.\n", init_next_state_file,
4059 			    init_state_file);
4060 	}
4061 }
4062 
4063 /*
4064  * Create a contract with these parameters.
4065  */
4066 static int
4067 contract_make_template(uint_t info, uint_t critical, uint_t fatal,
4068     uint64_t cookie)
4069 {
4070 	int fd, err;
4071 
4072 	char *ioctl_tset_emsg =
4073 	    "Couldn't set \"%s\" contract template parameter: %s.\n";
4074 
4075 	do
4076 		fd = open64(CTFS_ROOT "/process/template", O_RDWR);
4077 	while (fd < 0 && errno == EINTR);
4078 	if (fd < 0) {
4079 		console(B_TRUE, "Couldn't create process template: %s.\n",
4080 		    strerror(errno));
4081 		return (-1);
4082 	}
4083 
4084 	if (err = ct_pr_tmpl_set_param(fd, CT_PR_INHERIT | CT_PR_REGENT))
4085 		console(B_TRUE, "Contract set template inherit, regent "
4086 		    "failed: %s.\n", strerror(err));
4087 
4088 	/*
4089 	 * These errors result in a misconfigured template, which is better
4090 	 * than no template at all, so warn but don't abort.
4091 	 */
4092 	if (err = ct_tmpl_set_informative(fd, info))
4093 		console(B_TRUE, ioctl_tset_emsg, "informative", strerror(err));
4094 
4095 	if (err = ct_tmpl_set_critical(fd, critical))
4096 		console(B_TRUE, ioctl_tset_emsg, "critical", strerror(err));
4097 
4098 	if (err = ct_pr_tmpl_set_fatal(fd, fatal))
4099 		console(B_TRUE, ioctl_tset_emsg, "fatal", strerror(err));
4100 
4101 	if (err = ct_tmpl_set_cookie(fd, cookie))
4102 		console(B_TRUE, ioctl_tset_emsg, "cookie", strerror(err));
4103 
4104 	(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
4105 
4106 	return (fd);
4107 }
4108 
4109 /*
4110  * Create the templates and open an event file descriptor.  We use dup2(2) to
4111  * get these descriptors away from the stdin/stdout/stderr group.
4112  */
4113 static void
4114 contracts_init()
4115 {
4116 	int err, fd;
4117 
4118 	/*
4119 	 * Create & configure a legacy template.  We only want empty events so
4120 	 * we know when to abandon them.
4121 	 */
4122 	legacy_tmpl = contract_make_template(0, CT_PR_EV_EMPTY, CT_PR_EV_HWERR,
4123 	    ORDINARY_COOKIE);
4124 	if (legacy_tmpl >= 0) {
4125 		err = ct_tmpl_activate(legacy_tmpl);
4126 		if (err != 0) {
4127 			(void) close(legacy_tmpl);
4128 			legacy_tmpl = -1;
4129 			console(B_TRUE,
4130 			    "Couldn't activate legacy template (%s); "
4131 			    "legacy services will be in init's contract.\n",
4132 			    strerror(err));
4133 		}
4134 	} else
4135 		console(B_TRUE,
4136 		    "Legacy services will be in init's contract.\n");
4137 
4138 	if (dup2(legacy_tmpl, 255) == -1) {
4139 		console(B_TRUE, "Could not duplicate legacy template: %s.\n",
4140 		    strerror(errno));
4141 	} else {
4142 		(void) close(legacy_tmpl);
4143 		legacy_tmpl = 255;
4144 	}
4145 
4146 	(void) fcntl(legacy_tmpl, F_SETFD, FD_CLOEXEC);
4147 
4148 	startd_tmpl = contract_make_template(0, CT_PR_EV_EMPTY,
4149 	    CT_PR_EV_HWERR | CT_PR_EV_SIGNAL | CT_PR_EV_CORE, STARTD_COOKIE);
4150 
4151 	if (dup2(startd_tmpl, 254) == -1) {
4152 		console(B_TRUE, "Could not duplicate startd template: %s.\n",
4153 		    strerror(errno));
4154 	} else {
4155 		(void) close(startd_tmpl);
4156 		startd_tmpl = 254;
4157 	}
4158 
4159 	(void) fcntl(startd_tmpl, F_SETFD, FD_CLOEXEC);
4160 
4161 	if (legacy_tmpl < 0 && startd_tmpl < 0) {
4162 		/* The creation errors have already been reported. */
4163 		console(B_TRUE,
4164 		    "Ignoring contract events.  Core smf(5) services will not "
4165 		    "be restarted.\n");
4166 		return;
4167 	}
4168 
4169 	/*
4170 	 * Open an event endpoint.
4171 	 */
4172 	do
4173 		fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
4174 	while (fd < 0 && errno == EINTR);
4175 	if (fd < 0) {
4176 		console(B_TRUE,
4177 		    "Couldn't open process pbundle: %s.  Core smf(5) services "
4178 		    "will not be restarted.\n", strerror(errno));
4179 		return;
4180 	}
4181 
4182 	if (dup2(fd, 253) == -1) {
4183 		console(B_TRUE, "Could not duplicate process bundle: %s.\n",
4184 		    strerror(errno));
4185 	} else {
4186 		(void) close(fd);
4187 		fd = 253;
4188 	}
4189 
4190 	(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
4191 
4192 	/* Reset in case we've been restarted. */
4193 	(void) ct_event_reset(fd);
4194 
4195 	poll_fds[0].fd = fd;
4196 	poll_fds[0].events = POLLIN;
4197 	poll_nfds = 1;
4198 }
4199 
4200 static int
4201 contract_getfile(ctid_t id, const char *name, int oflag)
4202 {
4203 	int fd;
4204 
4205 	do
4206 		fd = contract_open(id, "process", name, oflag);
4207 	while (fd < 0 && errno == EINTR);
4208 
4209 	if (fd < 0)
4210 		console(B_TRUE, "Couldn't open %s for contract %ld: %s.\n",
4211 		    name, id, strerror(errno));
4212 
4213 	return (fd);
4214 }
4215 
4216 static int
4217 contract_cookie(ctid_t id, uint64_t *cp)
4218 {
4219 	int fd, err;
4220 	ct_stathdl_t sh;
4221 
4222 	fd = contract_getfile(id, "status", O_RDONLY);
4223 	if (fd < 0)
4224 		return (-1);
4225 
4226 	err = ct_status_read(fd, CTD_COMMON, &sh);
4227 	if (err != 0) {
4228 		console(B_TRUE, "Couldn't read status of contract %ld: %s.\n",
4229 		    id, strerror(err));
4230 		(void) close(fd);
4231 		return (-1);
4232 	}
4233 
4234 	(void) close(fd);
4235 
4236 	*cp = ct_status_get_cookie(sh);
4237 
4238 	ct_status_free(sh);
4239 	return (0);
4240 }
4241 
4242 static void
4243 contract_ack(ct_evthdl_t e)
4244 {
4245 	int fd;
4246 
4247 	if (ct_event_get_flags(e) & CTE_INFO)
4248 		return;
4249 
4250 	fd = contract_getfile(ct_event_get_ctid(e), "ctl", O_WRONLY);
4251 	if (fd < 0)
4252 		return;
4253 
4254 	(void) ct_ctl_ack(fd, ct_event_get_evid(e));
4255 	(void) close(fd);
4256 }
4257 
4258 /*
4259  * Process a contract event.
4260  */
4261 static void
4262 contract_event(struct pollfd *poll)
4263 {
4264 	ct_evthdl_t e;
4265 	int err;
4266 	ctid_t ctid;
4267 
4268 	if (!(poll->revents & POLLIN)) {
4269 		if (poll->revents & POLLERR)
4270 			console(B_TRUE,
4271 			    "Unknown poll error on my process contract "
4272 			    "pbundle.\n");
4273 		return;
4274 	}
4275 
4276 	err = ct_event_read(poll->fd, &e);
4277 	if (err != 0) {
4278 		console(B_TRUE, "Error retrieving contract event: %s.\n",
4279 		    strerror(err));
4280 		return;
4281 	}
4282 
4283 	ctid = ct_event_get_ctid(e);
4284 
4285 	if (ct_event_get_type(e) == CT_PR_EV_EMPTY) {
4286 		uint64_t cookie;
4287 		int ret, abandon = 1;
4288 
4289 		/* If it's svc.startd, restart it.  Else, abandon. */
4290 		ret = contract_cookie(ctid, &cookie);
4291 
4292 		if (ret == 0) {
4293 			if (cookie == STARTD_COOKIE &&
4294 			    do_restart_startd) {
4295 				if (smf_debug)
4296 					console(B_TRUE, "Restarting "
4297 					    "svc.startd.\n");
4298 
4299 				/*
4300 				 * Account for the failure.  If the failure rate
4301 				 * exceeds a threshold, then drop to maintenance
4302 				 * mode.
4303 				 */
4304 				startd_record_failure();
4305 				if (startd_failure_rate_critical())
4306 					enter_maintenance();
4307 
4308 				if (startd_tmpl < 0)
4309 					console(B_TRUE,
4310 					    "Restarting svc.startd in "
4311 					    "improper contract (bad "
4312 					    "template).\n");
4313 
4314 				(void) startd_run(startd_cline, startd_tmpl,
4315 				    ctid);
4316 
4317 				abandon = 0;
4318 			}
4319 		}
4320 
4321 		if (abandon && (err = contract_abandon_id(ctid))) {
4322 			console(B_TRUE, "Couldn't abandon contract %ld: %s.\n",
4323 			    ctid, strerror(err));
4324 		}
4325 
4326 		/*
4327 		 * No need to acknowledge the event since either way the
4328 		 * originating contract should be abandoned.
4329 		 */
4330 	} else {
4331 		console(B_TRUE,
4332 		    "Received contract event of unexpected type %d from "
4333 		    "contract %ld.\n", ct_event_get_type(e), ctid);
4334 
4335 		if ((ct_event_get_flags(e) & (CTE_INFO | CTE_ACK)) == 0)
4336 			/* Allow unexpected critical events to be released. */
4337 			contract_ack(e);
4338 	}
4339 
4340 	ct_event_free(e);
4341 }
4342 
4343 /*
4344  * svc.startd(1M) Management
4345  */
4346 
4347 /*
4348  * (Re)start svc.startd(1M).  old_ctid should be the contract ID of the old
4349  * contract, or 0 if we're starting it for the first time.  If wait is true
4350  * we'll wait for and return the exit value of the child.
4351  */
4352 static int
4353 startd_run(const char *cline, int tmpl, ctid_t old_ctid)
4354 {
4355 	int err, i, ret, did_activate;
4356 	pid_t pid;
4357 	struct stat sb;
4358 
4359 	if (cline[0] == '\0')
4360 		return (-1);
4361 
4362 	/*
4363 	 * Don't restart startd if the system is rebooting or shutting down.
4364 	 */
4365 	do {
4366 		ret = stat("/etc/svc/volatile/resetting", &sb);
4367 	} while (ret == -1 && errno == EINTR);
4368 
4369 	if (ret == 0) {
4370 		if (smf_debug)
4371 			console(B_TRUE, "Quiescing for reboot.\n");
4372 		(void) pause();
4373 		return (-1);
4374 	}
4375 
4376 	err = ct_pr_tmpl_set_transfer(tmpl, old_ctid);
4377 	if (err == EINVAL) {
4378 		console(B_TRUE, "Remake startd_tmpl; reattempt transfer.\n");
4379 		tmpl = startd_tmpl = contract_make_template(0, CT_PR_EV_EMPTY,
4380 		    CT_PR_EV_HWERR, STARTD_COOKIE);
4381 
4382 		err = ct_pr_tmpl_set_transfer(tmpl, old_ctid);
4383 	}
4384 	if (err != 0) {
4385 		console(B_TRUE,
4386 		    "Couldn't set transfer parameter of contract template: "
4387 		    "%s.\n", strerror(err));
4388 	}
4389 
4390 	if ((err = ct_pr_tmpl_set_svc_fmri(startd_tmpl,
4391 	    SCF_SERVICE_STARTD)) != 0)
4392 		console(B_TRUE,
4393 		    "Can not set svc_fmri in contract template: %s\n",
4394 		    strerror(err));
4395 	if ((err = ct_pr_tmpl_set_svc_aux(startd_tmpl,
4396 	    startd_svc_aux)) != 0)
4397 		console(B_TRUE,
4398 		    "Can not set svc_aux in contract template: %s\n",
4399 		    strerror(err));
4400 	did_activate = !(ct_tmpl_activate(tmpl));
4401 	if (!did_activate)
4402 		console(B_TRUE,
4403 		    "Template activation failed; not starting \"%s\" in "
4404 		    "proper contract.\n", cline);
4405 
4406 	/* Hold SIGCLD so we can wait if necessary. */
4407 	(void) sighold(SIGCLD);
4408 
4409 	while ((pid = fork()) < 0) {
4410 		if (errno == EPERM) {
4411 			console(B_TRUE, "Insufficient permission to fork.\n");
4412 
4413 			/* Now that's a doozy. */
4414 			exit(1);
4415 		}
4416 
4417 		console(B_TRUE,
4418 		    "fork() for svc.startd failed: %s.  Will retry in 1 "
4419 		    "second...\n", strerror(errno));
4420 
4421 		(void) sleep(1);
4422 
4423 		/* Eventually give up? */
4424 	}
4425 
4426 	if (pid == 0) {
4427 		/* child */
4428 
4429 		/* See the comment in efork() */
4430 		for (i = SIGHUP; i <= SIGRTMAX; ++i) {
4431 			if (i == SIGTTOU || i == SIGTTIN || i == SIGTSTP)
4432 				(void) sigset(i, SIG_IGN);
4433 			else
4434 				(void) sigset(i, SIG_DFL);
4435 		}
4436 
4437 		if (smf_options != NULL) {
4438 			/* Put smf_options in the environment. */
4439 			glob_envp[glob_envn] =
4440 			    malloc(sizeof ("SMF_OPTIONS=") - 1 +
4441 			    strlen(smf_options) + 1);
4442 
4443 			if (glob_envp[glob_envn] != NULL) {
4444 				/* LINTED */
4445 				(void) sprintf(glob_envp[glob_envn],
4446 				    "SMF_OPTIONS=%s", smf_options);
4447 				glob_envp[glob_envn+1] = NULL;
4448 			} else {
4449 				console(B_TRUE,
4450 				    "Could not set SMF_OPTIONS (%s).\n",
4451 				    strerror(errno));
4452 			}
4453 		}
4454 
4455 		if (smf_debug)
4456 			console(B_TRUE, "Executing svc.startd\n");
4457 
4458 		(void) execle(SH, "INITSH", "-c", cline, NULL, glob_envp);
4459 
4460 		console(B_TRUE, "Could not exec \"%s\" (%s).\n", SH,
4461 		    strerror(errno));
4462 
4463 		exit(1);
4464 	}
4465 
4466 	/* parent */
4467 
4468 	if (did_activate) {
4469 		if (legacy_tmpl < 0 || ct_tmpl_activate(legacy_tmpl) != 0)
4470 			(void) ct_tmpl_clear(tmpl);
4471 	}
4472 
4473 	/* Clear the old_ctid reference so the kernel can reclaim it. */
4474 	if (old_ctid != 0)
4475 		(void) ct_pr_tmpl_set_transfer(tmpl, 0);
4476 
4477 	(void) sigrelse(SIGCLD);
4478 
4479 	return (0);
4480 }
4481 
4482 /*
4483  * void startd_record_failure(void)
4484  *   Place the current time in our circular array of svc.startd failures.
4485  */
4486 void
4487 startd_record_failure()
4488 {
4489 	int index = startd_failure_index++ % NSTARTD_FAILURE_TIMES;
4490 
4491 	startd_failure_time[index] = gethrtime();
4492 }
4493 
4494 /*
4495  * int startd_failure_rate_critical(void)
4496  *   Return true if the average failure interval is less than the permitted
4497  *   interval.  Implicit success if insufficient measurements for an average
4498  *   exist.
4499  */
4500 int
4501 startd_failure_rate_critical()
4502 {
4503 	int n = startd_failure_index;
4504 	hrtime_t avg_ns = 0;
4505 
4506 	if (startd_failure_index < NSTARTD_FAILURE_TIMES)
4507 		return (0);
4508 
4509 	avg_ns =
4510 	    (startd_failure_time[(n - 1) % NSTARTD_FAILURE_TIMES] -
4511 	    startd_failure_time[n % NSTARTD_FAILURE_TIMES]) /
4512 	    NSTARTD_FAILURE_TIMES;
4513 
4514 	return (avg_ns < STARTD_FAILURE_RATE_NS);
4515 }
4516 
4517 /*
4518  * returns string that must be free'd
4519  */
4520 
4521 static char
4522 *audit_boot_msg()
4523 {
4524 	char		*b, *p;
4525 	char		desc[] = "booted";
4526 	zoneid_t	zid = getzoneid();
4527 
4528 	b = malloc(sizeof (desc) + MAXNAMELEN + 3);
4529 	if (b == NULL)
4530 		return (b);
4531 
4532 	p = b;
4533 	p += strlcpy(p, desc, sizeof (desc));
4534 	if (zid != GLOBAL_ZONEID) {
4535 		p += strlcpy(p, ": ", 3);
4536 		(void) getzonenamebyid(zid, p, MAXNAMELEN);
4537 	}
4538 	return (b);
4539 }
4540 
4541 /*
4542  * Generate AUE_init_solaris audit record.  Return 1 if
4543  * auditing is enabled in case the caller cares.
4544  *
4545  * In the case of userint() or a local zone invocation of
4546  * one_true_init, the process initially contains the audit
4547  * characteristics of the process that invoked init.  The first pass
4548  * through here uses those characteristics then for the case of
4549  * one_true_init in a local zone, clears them so subsequent system
4550  * state changes won't be attributed to the person who booted the
4551  * zone.
4552  */
4553 static int
4554 audit_put_record(int pass_fail, int status, char *msg)
4555 {
4556 	adt_session_data_t	*ah;
4557 	adt_event_data_t	*event;
4558 
4559 	if (!adt_audit_enabled())
4560 		return (0);
4561 
4562 	/*
4563 	 * the PROC_DATA picks up the context to tell whether this is
4564 	 * an attributed record (auid = -2 is unattributed)
4565 	 */
4566 	if (adt_start_session(&ah, NULL, ADT_USE_PROC_DATA)) {
4567 		console(B_TRUE, "audit failure:  %s\n", strerror(errno));
4568 		return (1);
4569 	}
4570 	event = adt_alloc_event(ah, ADT_init_solaris);
4571 	if (event == NULL) {
4572 		console(B_TRUE, "audit failure:  %s\n", strerror(errno));
4573 		(void) adt_end_session(ah);
4574 		return (1);
4575 	}
4576 	event->adt_init_solaris.info = msg;	/* NULL is ok here */
4577 
4578 	if (adt_put_event(event, pass_fail, status)) {
4579 		console(B_TRUE, "audit failure:  %s\n", strerror(errno));
4580 		(void) adt_end_session(ah);
4581 		return (1);
4582 	}
4583 	adt_free_event(event);
4584 
4585 	(void) adt_end_session(ah);
4586 
4587 	return (1);
4588 }
4589