1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2019 Joyent, Inc.
25 */
26
27/*
28 * restarter.c - service manipulation
29 *
30 * This component manages services whose restarter is svc.startd, the standard
31 * restarter.  It translates restarter protocol events from the graph engine
32 * into actions on processes, as a delegated restarter would do.
33 *
34 * The master restarter manages a number of always-running threads:
35 *   - restarter event thread: events from the graph engine
36 *   - timeout thread: thread to fire queued timeouts
37 *   - contract thread: thread to handle contract events
38 *   - wait thread: thread to handle wait-based services
39 *
40 * The other threads are created as-needed:
41 *   - per-instance method threads
42 *   - per-instance event processing threads
43 *
44 * The interaction of all threads must result in the following conditions
45 * being satisfied (on a per-instance basis):
46 *   - restarter events must be processed in order
47 *   - method execution must be serialized
48 *   - instance delete must be held until outstanding methods are complete
49 *   - contract events shouldn't be processed while a method is running
50 *   - timeouts should fire even when a method is running
51 *
52 * Service instances are represented by restarter_inst_t's and are kept in the
53 * instance_list list.
54 *
55 * Service States
56 *   The current state of a service instance is kept in
57 *   restarter_inst_t->ri_i.i_state.  If transition to a new state could take
58 *   some time, then before we effect the transition we set
59 *   restarter_inst_t->ri_i.i_next_state to the target state, and afterwards we
60 *   rotate i_next_state to i_state and set i_next_state to
61 *   RESTARTER_STATE_NONE.  So usually i_next_state is _NONE when ri_lock is not
62 *   held.  The exception is when we launch methods, which are done with
63 *   a separate thread.  To keep any other threads from grabbing ri_lock before
64 *   method_thread() does, we set ri_method_thread to the thread id of the
65 *   method thread, and when it is nonzero any thread with a different thread id
66 *   waits on ri_method_cv.
67 *
68 * Method execution is serialized by blocking on ri_method_cv in
69 * inst_lookup_by_id() and waiting for a 0 value of ri_method_thread.  This
70 * also prevents the instance structure from being deleted until all
71 * outstanding operations such as method_thread() have finished.
72 *
73 * Lock ordering:
74 *
75 * dgraph_lock [can be held when taking:]
76 *   utmpx_lock
77 *   dictionary->dict_lock
78 *   st->st_load_lock
79 *   wait_info_lock
80 *   ru->restarter_update_lock
81 *     restarter_queue->rpeq_lock
82 *   instance_list.ril_lock
83 *     inst->ri_lock
84 *   st->st_configd_live_lock
85 *
86 * instance_list.ril_lock
87 *   graph_queue->gpeq_lock
88 *   gu->gu_lock
89 *   st->st_configd_live_lock
90 *   dictionary->dict_lock
91 *   inst->ri_lock
92 *     graph_queue->gpeq_lock
93 *     gu->gu_lock
94 *     tu->tu_lock
95 *     tq->tq_lock
96 *     inst->ri_queue_lock
97 *       wait_info_lock
98 *       bp->cb_lock
99 *     utmpx_lock
100 *
101 * single_user_thread_lock
102 *   wait_info_lock
103 *   utmpx_lock
104 *
105 * gu_freeze_lock
106 *
107 * logbuf_mutex nests inside pretty much everything.
108 */
109
110#include <sys/contract/process.h>
111#include <sys/ctfs.h>
112#include <sys/stat.h>
113#include <sys/time.h>
114#include <sys/types.h>
115#include <sys/uio.h>
116#include <sys/wait.h>
117#include <assert.h>
118#include <errno.h>
119#include <fcntl.h>
120#include <libcontract.h>
121#include <libcontract_priv.h>
122#include <libintl.h>
123#include <librestart.h>
124#include <librestart_priv.h>
125#include <libuutil.h>
126#include <limits.h>
127#include <poll.h>
128#include <port.h>
129#include <pthread.h>
130#include <stdarg.h>
131#include <stdio.h>
132#include <strings.h>
133#include <unistd.h>
134
135#include "startd.h"
136#include "protocol.h"
137
138static uu_list_pool_t *restarter_instance_pool;
139static restarter_instance_list_t instance_list;
140
141static uu_list_pool_t *restarter_queue_pool;
142
143#define	WT_SVC_ERR_THROTTLE	1	/* 1 sec delay for erroring wait svc */
144
145/*
146 * Function used to reset the restart times for an instance, when
147 * an administrative task comes along and essentially makes the times
148 * in this array ineffective.
149 */
150static void
151reset_start_times(restarter_inst_t *inst)
152{
153	inst->ri_start_index = 0;
154	bzero(inst->ri_start_time, sizeof (inst->ri_start_time));
155}
156
157/*ARGSUSED*/
158static int
159restarter_instance_compare(const void *lc_arg, const void *rc_arg,
160    void *private)
161{
162	int lc_id = ((const restarter_inst_t *)lc_arg)->ri_id;
163	int rc_id = *(int *)rc_arg;
164
165	if (lc_id > rc_id)
166		return (1);
167	if (lc_id < rc_id)
168		return (-1);
169	return (0);
170}
171
172static restarter_inst_t *
173inst_lookup_by_name(const char *name)
174{
175	int id;
176
177	id = dict_lookup_byname(name);
178	if (id == -1)
179		return (NULL);
180
181	return (inst_lookup_by_id(id));
182}
183
184restarter_inst_t *
185inst_lookup_by_id(int id)
186{
187	restarter_inst_t *inst;
188
189	MUTEX_LOCK(&instance_list.ril_lock);
190	inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
191	if (inst != NULL)
192		MUTEX_LOCK(&inst->ri_lock);
193	MUTEX_UNLOCK(&instance_list.ril_lock);
194
195	if (inst != NULL) {
196		while (inst->ri_method_thread != 0 &&
197		    !pthread_equal(inst->ri_method_thread, pthread_self())) {
198			++inst->ri_method_waiters;
199			(void) pthread_cond_wait(&inst->ri_method_cv,
200			    &inst->ri_lock);
201			assert(inst->ri_method_waiters > 0);
202			--inst->ri_method_waiters;
203		}
204	}
205
206	return (inst);
207}
208
209static restarter_inst_t *
210inst_lookup_queue(const char *name)
211{
212	int id;
213	restarter_inst_t *inst;
214
215	id = dict_lookup_byname(name);
216	if (id == -1)
217		return (NULL);
218
219	MUTEX_LOCK(&instance_list.ril_lock);
220	inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
221	if (inst != NULL)
222		MUTEX_LOCK(&inst->ri_queue_lock);
223	MUTEX_UNLOCK(&instance_list.ril_lock);
224
225	return (inst);
226}
227
228const char *
229service_style(int flags)
230{
231	switch (flags & RINST_STYLE_MASK) {
232	case RINST_CONTRACT:	return ("contract");
233	case RINST_TRANSIENT:	return ("transient");
234	case RINST_WAIT:	return ("wait");
235
236	default:
237#ifndef NDEBUG
238		uu_warn("%s:%d: Bad flags 0x%x.\n", __FILE__, __LINE__, flags);
239#endif
240		abort();
241		/* NOTREACHED */
242	}
243}
244
245/*
246 * Fails with ECONNABORTED or ECANCELED.
247 */
248static int
249check_contract(restarter_inst_t *inst, boolean_t primary,
250    scf_instance_t *scf_inst)
251{
252	ctid_t *ctidp;
253	int fd, r;
254
255	ctidp = primary ? &inst->ri_i.i_primary_ctid :
256	    &inst->ri_i.i_transient_ctid;
257
258	assert(*ctidp >= 1);
259
260	fd = contract_open(*ctidp, NULL, "status", O_RDONLY);
261	if (fd >= 0) {
262		r = close(fd);
263		assert(r == 0);
264		return (0);
265	}
266
267	r = restarter_remove_contract(scf_inst, *ctidp, primary ?
268	    RESTARTER_CONTRACT_PRIMARY : RESTARTER_CONTRACT_TRANSIENT);
269	switch (r) {
270	case 0:
271	case ECONNABORTED:
272	case ECANCELED:
273		*ctidp = 0;
274		return (r);
275
276	case ENOMEM:
277		uu_die("Out of memory\n");
278		/* NOTREACHED */
279
280	case EPERM:
281		uu_die("Insufficient privilege.\n");
282		/* NOTREACHED */
283
284	case EACCES:
285		uu_die("Repository backend access denied.\n");
286		/* NOTREACHED */
287
288	case EROFS:
289		log_error(LOG_INFO, "Could not remove unusable contract id %ld "
290		    "for %s from repository.\n", *ctidp, inst->ri_i.i_fmri);
291		return (0);
292
293	case EINVAL:
294	case EBADF:
295	default:
296		assert(0);
297		abort();
298		/* NOTREACHED */
299	}
300}
301
302static int stop_instance(scf_handle_t *, restarter_inst_t *, stop_cause_t);
303
304/*
305 * int restarter_insert_inst(scf_handle_t *, char *)
306 *   If the inst is already in the restarter list, return its id.  If the inst
307 *   is not in the restarter list, initialize a restarter_inst_t, initialize its
308 *   states, insert it into the list, and return 0.
309 *
310 *   Fails with
311 *     ENOENT - name is not in the repository
312 */
313static int
314restarter_insert_inst(scf_handle_t *h, const char *name)
315{
316	int id, r;
317	restarter_inst_t *inst;
318	uu_list_index_t idx;
319	scf_service_t *scf_svc;
320	scf_instance_t *scf_inst;
321	scf_snapshot_t *snap = NULL;
322	scf_propertygroup_t *pg;
323	char *svc_name, *inst_name;
324	char logfilebuf[PATH_MAX];
325	char *c;
326	boolean_t do_commit_states;
327	restarter_instance_state_t state, next_state;
328	protocol_states_t *ps;
329	pid_t start_pid;
330	restarter_str_t reason = restarter_str_insert_in_graph;
331
332	MUTEX_LOCK(&instance_list.ril_lock);
333
334	/*
335	 * We don't use inst_lookup_by_name() here because we want the lookup
336	 * & insert to be atomic.
337	 */
338	id = dict_lookup_byname(name);
339	if (id != -1) {
340		inst = uu_list_find(instance_list.ril_instance_list, &id, NULL,
341		    &idx);
342		if (inst != NULL) {
343			MUTEX_UNLOCK(&instance_list.ril_lock);
344			return (0);
345		}
346	}
347
348	/* Allocate an instance */
349	inst = startd_zalloc(sizeof (restarter_inst_t));
350	inst->ri_utmpx_prefix = startd_alloc(max_scf_value_size);
351	inst->ri_utmpx_prefix[0] = '\0';
352
353	inst->ri_i.i_fmri = startd_alloc(strlen(name) + 1);
354	(void) strcpy((char *)inst->ri_i.i_fmri, name);
355
356	inst->ri_queue = startd_list_create(restarter_queue_pool, inst, 0);
357
358	/*
359	 * id shouldn't be -1 since we use the same dictionary as graph.c, but
360	 * just in case.
361	 */
362	inst->ri_id = (id != -1 ? id : dict_insert(name));
363
364	special_online_hooks_get(name, &inst->ri_pre_online_hook,
365	    &inst->ri_post_online_hook, &inst->ri_post_offline_hook);
366
367	scf_svc = safe_scf_service_create(h);
368	scf_inst = safe_scf_instance_create(h);
369	pg = safe_scf_pg_create(h);
370	svc_name = startd_alloc(max_scf_name_size);
371	inst_name = startd_alloc(max_scf_name_size);
372
373rep_retry:
374	if (snap != NULL)
375		scf_snapshot_destroy(snap);
376	if (inst->ri_logstem != NULL)
377		startd_free(inst->ri_logstem, PATH_MAX);
378	if (inst->ri_common_name != NULL)
379		free(inst->ri_common_name);
380	if (inst->ri_C_common_name != NULL)
381		free(inst->ri_C_common_name);
382	snap = NULL;
383	inst->ri_logstem = NULL;
384	inst->ri_common_name = NULL;
385	inst->ri_C_common_name = NULL;
386
387	if (scf_handle_decode_fmri(h, name, NULL, scf_svc, scf_inst, NULL,
388	    NULL, SCF_DECODE_FMRI_EXACT) != 0) {
389		switch (scf_error()) {
390		case SCF_ERROR_CONNECTION_BROKEN:
391			libscf_handle_rebind(h);
392			goto rep_retry;
393
394		case SCF_ERROR_NOT_FOUND:
395			goto deleted;
396		}
397
398		uu_die("Can't decode FMRI %s: %s\n", name,
399		    scf_strerror(scf_error()));
400	}
401
402	/*
403	 * If there's no running snapshot, then we execute using the editing
404	 * snapshot.  Pending snapshots will be taken later.
405	 */
406	snap = libscf_get_running_snapshot(scf_inst);
407
408	if ((scf_service_get_name(scf_svc, svc_name, max_scf_name_size) < 0) ||
409	    (scf_instance_get_name(scf_inst, inst_name, max_scf_name_size) <
410	    0)) {
411		switch (scf_error()) {
412		case SCF_ERROR_NOT_SET:
413			break;
414
415		case SCF_ERROR_CONNECTION_BROKEN:
416			libscf_handle_rebind(h);
417			goto rep_retry;
418
419		default:
420			assert(0);
421			abort();
422		}
423
424		goto deleted;
425	}
426
427	(void) snprintf(logfilebuf, PATH_MAX, "%s:%s", svc_name, inst_name);
428	for (c = logfilebuf; *c != '\0'; c++)
429		if (*c == '/')
430			*c = '-';
431
432	inst->ri_logstem = startd_alloc(PATH_MAX);
433	(void) snprintf(inst->ri_logstem, PATH_MAX, "%s%s", logfilebuf,
434	    LOG_SUFFIX);
435
436	/*
437	 * If the restarter group is missing, use uninit/none.  Otherwise,
438	 * we're probably being restarted & don't want to mess up the states
439	 * that are there.
440	 */
441	state = RESTARTER_STATE_UNINIT;
442	next_state = RESTARTER_STATE_NONE;
443
444	r = scf_instance_get_pg(scf_inst, SCF_PG_RESTARTER, pg);
445	if (r != 0) {
446		switch (scf_error()) {
447		case SCF_ERROR_CONNECTION_BROKEN:
448			libscf_handle_rebind(h);
449			goto rep_retry;
450
451		case SCF_ERROR_NOT_SET:
452			goto deleted;
453
454		case SCF_ERROR_NOT_FOUND:
455			/*
456			 * This shouldn't happen since the graph engine should
457			 * have initialized the state to uninitialized/none if
458			 * there was no restarter pg.  In case somebody
459			 * deleted it, though....
460			 */
461			do_commit_states = B_TRUE;
462			break;
463
464		default:
465			assert(0);
466			abort();
467		}
468	} else {
469		r = libscf_read_states(pg, &state, &next_state);
470		if (r != 0) {
471			do_commit_states = B_TRUE;
472		} else {
473			if (next_state != RESTARTER_STATE_NONE) {
474				/*
475				 * Force next_state to _NONE since we
476				 * don't look for method processes.
477				 */
478				next_state = RESTARTER_STATE_NONE;
479				do_commit_states = B_TRUE;
480			} else {
481				/*
482				 * The reason for transition will depend on
483				 * state.
484				 */
485				if (st->st_initial == 0)
486					reason = restarter_str_startd_restart;
487				else if (state == RESTARTER_STATE_MAINT)
488					reason = restarter_str_bad_repo_state;
489				/*
490				 * Inform the restarter of our state without
491				 * changing the STIME in the repository.
492				 */
493				ps = startd_alloc(sizeof (*ps));
494				inst->ri_i.i_state = ps->ps_state = state;
495				inst->ri_i.i_next_state = ps->ps_state_next =
496				    next_state;
497				ps->ps_reason = reason;
498
499				graph_protocol_send_event(inst->ri_i.i_fmri,
500				    GRAPH_UPDATE_STATE_CHANGE, ps);
501
502				do_commit_states = B_FALSE;
503			}
504		}
505	}
506
507	switch (libscf_get_startd_properties(scf_inst, snap, &inst->ri_flags,
508	    &inst->ri_utmpx_prefix)) {
509	case 0:
510		break;
511
512	case ECONNABORTED:
513		libscf_handle_rebind(h);
514		goto rep_retry;
515
516	case ECANCELED:
517		goto deleted;
518
519	case ENOENT:
520		/*
521		 * This is odd, because the graph engine should have required
522		 * the general property group.  So we'll just use default
523		 * flags in anticipation of the graph engine sending us
524		 * REMOVE_INSTANCE when it finds out that the general property
525		 * group has been deleted.
526		 */
527		inst->ri_flags = RINST_CONTRACT;
528		break;
529
530	default:
531		assert(0);
532		abort();
533	}
534
535	r = libscf_get_template_values(scf_inst, snap,
536	    &inst->ri_common_name, &inst->ri_C_common_name);
537
538	/*
539	 * Copy our names to smaller buffers to reduce our memory footprint.
540	 */
541	if (inst->ri_common_name != NULL) {
542		char *tmp = safe_strdup(inst->ri_common_name);
543		startd_free(inst->ri_common_name, max_scf_value_size);
544		inst->ri_common_name = tmp;
545	}
546
547	if (inst->ri_C_common_name != NULL) {
548		char *tmp = safe_strdup(inst->ri_C_common_name);
549		startd_free(inst->ri_C_common_name, max_scf_value_size);
550		inst->ri_C_common_name = tmp;
551	}
552
553	switch (r) {
554	case 0:
555		break;
556
557	case ECONNABORTED:
558		libscf_handle_rebind(h);
559		goto rep_retry;
560
561	case ECANCELED:
562		goto deleted;
563
564	case ECHILD:
565	case ENOENT:
566		break;
567
568	default:
569		assert(0);
570		abort();
571	}
572
573	switch (libscf_read_method_ids(h, scf_inst, inst->ri_i.i_fmri,
574	    &inst->ri_i.i_primary_ctid, &inst->ri_i.i_transient_ctid,
575	    &start_pid)) {
576	case 0:
577		break;
578
579	case ECONNABORTED:
580		libscf_handle_rebind(h);
581		goto rep_retry;
582
583	case ECANCELED:
584		goto deleted;
585
586	default:
587		assert(0);
588		abort();
589	}
590
591	if (inst->ri_i.i_primary_ctid >= 1) {
592		contract_hash_store(inst->ri_i.i_primary_ctid, inst->ri_id);
593
594		switch (check_contract(inst, B_TRUE, scf_inst)) {
595		case 0:
596			break;
597
598		case ECONNABORTED:
599			libscf_handle_rebind(h);
600			goto rep_retry;
601
602		case ECANCELED:
603			goto deleted;
604
605		default:
606			assert(0);
607			abort();
608		}
609	}
610
611	if (inst->ri_i.i_transient_ctid >= 1) {
612		switch (check_contract(inst, B_FALSE, scf_inst)) {
613		case 0:
614			break;
615
616		case ECONNABORTED:
617			libscf_handle_rebind(h);
618			goto rep_retry;
619
620		case ECANCELED:
621			goto deleted;
622
623		default:
624			assert(0);
625			abort();
626		}
627	}
628
629	/* No more failures we live through, so add it to the list. */
630	(void) pthread_mutex_init(&inst->ri_lock, &mutex_attrs);
631	(void) pthread_mutex_init(&inst->ri_queue_lock, &mutex_attrs);
632	MUTEX_LOCK(&inst->ri_lock);
633	MUTEX_LOCK(&inst->ri_queue_lock);
634
635	(void) pthread_cond_init(&inst->ri_method_cv, NULL);
636
637	uu_list_node_init(inst, &inst->ri_link, restarter_instance_pool);
638	uu_list_insert(instance_list.ril_instance_list, inst, idx);
639	MUTEX_UNLOCK(&instance_list.ril_lock);
640
641	if (start_pid != -1 &&
642	    (inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT) {
643		int ret;
644		ret = wait_register(start_pid, inst->ri_i.i_fmri, 0, 1);
645		if (ret == -1) {
646			/*
647			 * Implication:  if we can't reregister the
648			 * instance, we will start another one.  Two
649			 * instances may or may not result in a resource
650			 * conflict.
651			 */
652			log_error(LOG_WARNING,
653			    "%s: couldn't reregister %ld for wait\n",
654			    inst->ri_i.i_fmri, start_pid);
655		} else if (ret == 1) {
656			/*
657			 * Leading PID has exited.
658			 */
659			(void) stop_instance(h, inst, RSTOP_EXIT);
660		}
661	}
662
663
664	scf_pg_destroy(pg);
665
666	if (do_commit_states)
667		(void) restarter_instance_update_states(h, inst, state,
668		    next_state, RERR_NONE, reason);
669
670	log_framework(LOG_DEBUG, "%s is a %s-style service\n", name,
671	    service_style(inst->ri_flags));
672
673	MUTEX_UNLOCK(&inst->ri_queue_lock);
674	MUTEX_UNLOCK(&inst->ri_lock);
675
676	startd_free(svc_name, max_scf_name_size);
677	startd_free(inst_name, max_scf_name_size);
678	scf_snapshot_destroy(snap);
679	scf_instance_destroy(scf_inst);
680	scf_service_destroy(scf_svc);
681
682	log_framework(LOG_DEBUG, "%s: inserted instance into restarter list\n",
683	    name);
684
685	return (0);
686
687deleted:
688	MUTEX_UNLOCK(&instance_list.ril_lock);
689	startd_free(inst_name, max_scf_name_size);
690	startd_free(svc_name, max_scf_name_size);
691	if (snap != NULL)
692		scf_snapshot_destroy(snap);
693	scf_pg_destroy(pg);
694	scf_instance_destroy(scf_inst);
695	scf_service_destroy(scf_svc);
696	startd_free((void *)inst->ri_i.i_fmri, strlen(inst->ri_i.i_fmri) + 1);
697	uu_list_destroy(inst->ri_queue);
698	if (inst->ri_logstem != NULL)
699		startd_free(inst->ri_logstem, PATH_MAX);
700	if (inst->ri_common_name != NULL)
701		free(inst->ri_common_name);
702	if (inst->ri_C_common_name != NULL)
703		free(inst->ri_C_common_name);
704	startd_free(inst->ri_utmpx_prefix, max_scf_value_size);
705	startd_free(inst, sizeof (restarter_inst_t));
706	return (ENOENT);
707}
708
709static void
710restarter_delete_inst(restarter_inst_t *ri)
711{
712	int id;
713	restarter_inst_t *rip;
714	void *cookie = NULL;
715	restarter_instance_qentry_t *e;
716
717	assert(MUTEX_HELD(&ri->ri_lock));
718
719	/*
720	 * Must drop the instance lock so we can pick up the instance_list
721	 * lock & remove the instance.
722	 */
723	id = ri->ri_id;
724	MUTEX_UNLOCK(&ri->ri_lock);
725
726	MUTEX_LOCK(&instance_list.ril_lock);
727
728	rip = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
729	if (rip == NULL) {
730		MUTEX_UNLOCK(&instance_list.ril_lock);
731		return;
732	}
733
734	assert(ri == rip);
735
736	uu_list_remove(instance_list.ril_instance_list, ri);
737
738	log_framework(LOG_DEBUG, "%s: deleted instance from restarter list\n",
739	    ri->ri_i.i_fmri);
740
741	MUTEX_UNLOCK(&instance_list.ril_lock);
742
743	/*
744	 * We can lock the instance without holding the instance_list lock
745	 * since we removed the instance from the list.
746	 */
747	MUTEX_LOCK(&ri->ri_lock);
748	MUTEX_LOCK(&ri->ri_queue_lock);
749
750	if (ri->ri_i.i_primary_ctid >= 1)
751		contract_hash_remove(ri->ri_i.i_primary_ctid);
752
753	while (ri->ri_method_thread != 0 || ri->ri_method_waiters > 0)
754		(void) pthread_cond_wait(&ri->ri_method_cv, &ri->ri_lock);
755
756	while ((e = uu_list_teardown(ri->ri_queue, &cookie)) != NULL)
757		startd_free(e, sizeof (*e));
758	uu_list_destroy(ri->ri_queue);
759
760	startd_free((void *)ri->ri_i.i_fmri, strlen(ri->ri_i.i_fmri) + 1);
761	startd_free(ri->ri_logstem, PATH_MAX);
762	if (ri->ri_common_name != NULL)
763		free(ri->ri_common_name);
764	if (ri->ri_C_common_name != NULL)
765		free(ri->ri_C_common_name);
766	startd_free(ri->ri_utmpx_prefix, max_scf_value_size);
767	(void) pthread_mutex_destroy(&ri->ri_lock);
768	(void) pthread_mutex_destroy(&ri->ri_queue_lock);
769	startd_free(ri, sizeof (restarter_inst_t));
770}
771
772/*
773 * instance_is_wait_style()
774 *
775 *   Returns 1 if the given instance is a "wait-style" service instance.
776 */
777int
778instance_is_wait_style(restarter_inst_t *inst)
779{
780	assert(MUTEX_HELD(&inst->ri_lock));
781	return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT);
782}
783
784/*
785 * instance_is_transient_style()
786 *
787 *   Returns 1 if the given instance is a transient service instance.
788 */
789int
790instance_is_transient_style(restarter_inst_t *inst)
791{
792	assert(MUTEX_HELD(&inst->ri_lock));
793	return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_TRANSIENT);
794}
795
796/*
797 * instance_in_transition()
798 * Returns 1 if instance is in transition, 0 if not
799 */
800int
801instance_in_transition(restarter_inst_t *inst)
802{
803	assert(MUTEX_HELD(&inst->ri_lock));
804	if (inst->ri_i.i_next_state == RESTARTER_STATE_NONE)
805		return (0);
806	return (1);
807}
808
809/*
810 * returns 1 if instance is already started, 0 if not
811 */
812static int
813instance_started(restarter_inst_t *inst)
814{
815	int ret;
816
817	assert(MUTEX_HELD(&inst->ri_lock));
818
819	if (inst->ri_i.i_state == RESTARTER_STATE_ONLINE ||
820	    inst->ri_i.i_state == RESTARTER_STATE_DEGRADED)
821		ret = 1;
822	else
823		ret = 0;
824
825	return (ret);
826}
827
828/*
829 * Returns
830 *   0 - success
831 *   ECONNRESET - success, but h was rebound
832 */
833int
834restarter_instance_update_states(scf_handle_t *h, restarter_inst_t *ri,
835    restarter_instance_state_t new_state,
836    restarter_instance_state_t new_state_next, restarter_error_t err,
837    restarter_str_t reason)
838{
839	protocol_states_t *states;
840	int e;
841	uint_t retry_count = 0, msecs = ALLOC_DELAY;
842	boolean_t rebound = B_FALSE;
843	int prev_state_online;
844	int state_online;
845
846	assert(MUTEX_HELD(&ri->ri_lock));
847
848	prev_state_online = instance_started(ri);
849
850retry:
851	e = _restarter_commit_states(h, &ri->ri_i, new_state, new_state_next,
852	    restarter_get_str_short(reason));
853	switch (e) {
854	case 0:
855		break;
856
857	case ENOMEM:
858		++retry_count;
859		if (retry_count < ALLOC_RETRY) {
860			(void) poll(NULL, 0, msecs);
861			msecs *= ALLOC_DELAY_MULT;
862			goto retry;
863		}
864
865		/* Like startd_alloc(). */
866		uu_die("Insufficient memory.\n");
867		/* NOTREACHED */
868
869	case ECONNABORTED:
870		libscf_handle_rebind(h);
871		rebound = B_TRUE;
872		goto retry;
873
874	case EPERM:
875	case EACCES:
876	case EROFS:
877		log_error(LOG_NOTICE, "Could not commit state change for %s "
878		    "to repository: %s.\n", ri->ri_i.i_fmri, strerror(e));
879		/* FALLTHROUGH */
880
881	case ENOENT:
882		ri->ri_i.i_state = new_state;
883		ri->ri_i.i_next_state = new_state_next;
884		break;
885
886	case EINVAL:
887	default:
888		bad_error("_restarter_commit_states", e);
889	}
890
891	states = startd_alloc(sizeof (protocol_states_t));
892	states->ps_state = new_state;
893	states->ps_state_next = new_state_next;
894	states->ps_err = err;
895	states->ps_reason = reason;
896	graph_protocol_send_event(ri->ri_i.i_fmri, GRAPH_UPDATE_STATE_CHANGE,
897	    (void *)states);
898
899	state_online = instance_started(ri);
900
901	if (prev_state_online && !state_online)
902		ri->ri_post_offline_hook();
903	else if (!prev_state_online && state_online)
904		ri->ri_post_online_hook();
905
906	return (rebound ? ECONNRESET : 0);
907}
908
909void
910restarter_mark_pending_snapshot(const char *fmri, uint_t flag)
911{
912	restarter_inst_t *inst;
913
914	assert(flag == RINST_RETAKE_RUNNING || flag == RINST_RETAKE_START);
915
916	inst = inst_lookup_by_name(fmri);
917	if (inst == NULL)
918		return;
919
920	inst->ri_flags |= flag;
921
922	MUTEX_UNLOCK(&inst->ri_lock);
923}
924
925static void
926restarter_take_pending_snapshots(scf_handle_t *h)
927{
928	restarter_inst_t *inst;
929	int r;
930
931	MUTEX_LOCK(&instance_list.ril_lock);
932
933	for (inst = uu_list_first(instance_list.ril_instance_list);
934	    inst != NULL;
935	    inst = uu_list_next(instance_list.ril_instance_list, inst)) {
936		const char *fmri;
937		scf_instance_t *sinst = NULL;
938
939		MUTEX_LOCK(&inst->ri_lock);
940
941		/*
942		 * This is where we'd check inst->ri_method_thread and if it
943		 * were nonzero we'd wait in anticipation of another thread
944		 * executing a method for inst.  Doing so with the instance_list
945		 * locked, though, leads to deadlock.  Since taking a snapshot
946		 * during that window won't hurt anything, we'll just continue.
947		 */
948
949		fmri = inst->ri_i.i_fmri;
950
951		if (inst->ri_flags & RINST_RETAKE_RUNNING) {
952			scf_snapshot_t *rsnap;
953
954			(void) libscf_fmri_get_instance(h, fmri, &sinst);
955
956			rsnap = libscf_get_or_make_running_snapshot(sinst,
957			    fmri, B_FALSE);
958
959			scf_instance_destroy(sinst);
960
961			if (rsnap != NULL)
962				inst->ri_flags &= ~RINST_RETAKE_RUNNING;
963
964			scf_snapshot_destroy(rsnap);
965		}
966
967		if (inst->ri_flags & RINST_RETAKE_START) {
968			switch (r = libscf_snapshots_poststart(h, fmri,
969			    B_FALSE)) {
970			case 0:
971			case ENOENT:
972				inst->ri_flags &= ~RINST_RETAKE_START;
973				break;
974
975			case ECONNABORTED:
976				break;
977
978			case EACCES:
979			default:
980				bad_error("libscf_snapshots_poststart", r);
981			}
982		}
983
984		MUTEX_UNLOCK(&inst->ri_lock);
985	}
986
987	MUTEX_UNLOCK(&instance_list.ril_lock);
988}
989
990/* ARGSUSED */
991void *
992restarter_post_fsminimal_thread(void *unused)
993{
994	scf_handle_t *h;
995	int r;
996
997	(void) pthread_setname_np(pthread_self(), "restarter_post_fsmin");
998
999	h = libscf_handle_create_bound_loop();
1000
1001	for (;;) {
1002		r = libscf_create_self(h);
1003		if (r == 0)
1004			break;
1005
1006		assert(r == ECONNABORTED);
1007		libscf_handle_rebind(h);
1008	}
1009
1010	restarter_take_pending_snapshots(h);
1011
1012	(void) scf_handle_unbind(h);
1013	scf_handle_destroy(h);
1014
1015	return (NULL);
1016}
1017
1018/*
1019 * int stop_instance()
1020 *
1021 *   Stop the instance identified by the instance given as the second argument,
1022 *   for the cause stated.
1023 *
1024 *   Returns
1025 *     0 - success
1026 *     -1 - inst is in transition
1027 */
1028static int
1029stop_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
1030    stop_cause_t cause)
1031{
1032	fork_info_t *info;
1033	const char *cp;
1034	int err;
1035	restarter_error_t re;
1036	restarter_str_t	reason;
1037	restarter_instance_state_t new_state;
1038
1039	assert(MUTEX_HELD(&inst->ri_lock));
1040	assert(inst->ri_method_thread == 0);
1041
1042	switch (cause) {
1043	case RSTOP_EXIT:
1044		re = RERR_RESTART;
1045		reason = restarter_str_ct_ev_exit;
1046		cp = "all processes in service exited";
1047		break;
1048	case RSTOP_ERR_CFG:
1049		re = RERR_FAULT;
1050		reason = restarter_str_method_failed;
1051		cp = "service exited with a configuration error";
1052		break;
1053	case RSTOP_ERR_EXIT:
1054		re = RERR_RESTART;
1055		reason = restarter_str_ct_ev_exit;
1056		cp = "service exited with an error";
1057		break;
1058	case RSTOP_CORE:
1059		re = RERR_FAULT;
1060		reason = restarter_str_ct_ev_core;
1061		cp = "process dumped core";
1062		break;
1063	case RSTOP_SIGNAL:
1064		re = RERR_FAULT;
1065		reason = restarter_str_ct_ev_signal;
1066		cp = "process received fatal signal from outside the service";
1067		break;
1068	case RSTOP_HWERR:
1069		re = RERR_FAULT;
1070		reason = restarter_str_ct_ev_hwerr;
1071		cp = "process killed due to uncorrectable hardware error";
1072		break;
1073	case RSTOP_DEPENDENCY:
1074		re = RERR_RESTART;
1075		reason = restarter_str_dependency_activity;
1076		cp = "dependency activity requires stop";
1077		break;
1078	case RSTOP_DISABLE:
1079		re = RERR_RESTART;
1080		reason = restarter_str_disable_request;
1081		cp = "service disabled";
1082		break;
1083	case RSTOP_RESTART:
1084		re = RERR_RESTART;
1085		reason = restarter_str_restart_request;
1086		cp = "service restarting";
1087		break;
1088	default:
1089#ifndef NDEBUG
1090		(void) fprintf(stderr, "Unknown cause %d at %s:%d.\n",
1091		    cause, __FILE__, __LINE__);
1092#endif
1093		abort();
1094	}
1095
1096	/* Services in the disabled and maintenance state are ignored */
1097	if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1098	    inst->ri_i.i_state == RESTARTER_STATE_DISABLED) {
1099		log_framework(LOG_DEBUG,
1100		    "%s: stop_instance -> is maint/disabled\n",
1101		    inst->ri_i.i_fmri);
1102		return (0);
1103	}
1104
1105	/* Already stopped instances are left alone */
1106	if (instance_started(inst) == 0) {
1107		log_framework(LOG_DEBUG, "Restarter: %s is already stopped.\n",
1108		    inst->ri_i.i_fmri);
1109		return (0);
1110	}
1111
1112	if (instance_in_transition(inst)) {
1113		/* requeue event by returning -1 */
1114		log_framework(LOG_DEBUG,
1115		    "Restarter: Not stopping %s, in transition.\n",
1116		    inst->ri_i.i_fmri);
1117		return (-1);
1118	}
1119
1120	log_instance(inst, B_TRUE, "Stopping because %s.", cp);
1121
1122	log_framework(re == RERR_FAULT ? LOG_INFO : LOG_DEBUG,
1123	    "%s: Instance stopping because %s.\n", inst->ri_i.i_fmri, cp);
1124
1125	if (instance_is_wait_style(inst) &&
1126	    (cause == RSTOP_EXIT ||
1127	    cause == RSTOP_ERR_CFG ||
1128	    cause == RSTOP_ERR_EXIT)) {
1129		/*
1130		 * No need to stop instance, as child has exited; remove
1131		 * contract and move the instance to the offline state.
1132		 */
1133		switch (err = restarter_instance_update_states(local_handle,
1134		    inst, inst->ri_i.i_state, RESTARTER_STATE_OFFLINE, re,
1135		    reason)) {
1136		case 0:
1137		case ECONNRESET:
1138			break;
1139
1140		default:
1141			bad_error("restarter_instance_update_states", err);
1142		}
1143
1144		if (cause == RSTOP_ERR_EXIT) {
1145			/*
1146			 * The RSTOP_ERR_EXIT cause is set via the
1147			 * wait_thread -> wait_remove code path when we have
1148			 * a "wait" style svc that exited with an error. If
1149			 * the svc is failing too quickly, we throttle it so
1150			 * that we don't restart it more than once/second.
1151			 * Since we know we're running in the wait thread its
1152			 * ok to throttle it right here.
1153			 */
1154			(void) update_fault_count(inst, FAULT_COUNT_INCR);
1155			if (method_rate_critical(inst)) {
1156				log_instance(inst, B_TRUE, "Failing too "
1157				    "quickly, throttling.");
1158				(void) sleep(WT_SVC_ERR_THROTTLE);
1159			}
1160		} else {
1161			(void) update_fault_count(inst, FAULT_COUNT_RESET);
1162			reset_start_times(inst);
1163		}
1164
1165		if (inst->ri_i.i_primary_ctid != 0) {
1166			inst->ri_m_inst =
1167			    safe_scf_instance_create(local_handle);
1168			inst->ri_mi_deleted = B_FALSE;
1169
1170			libscf_reget_instance(inst);
1171			method_remove_contract(inst, B_TRUE, B_TRUE);
1172
1173			scf_instance_destroy(inst->ri_m_inst);
1174			inst->ri_m_inst = NULL;
1175		}
1176
1177		switch (err = restarter_instance_update_states(local_handle,
1178		    inst, inst->ri_i.i_next_state, RESTARTER_STATE_NONE, re,
1179		    reason)) {
1180		case 0:
1181		case ECONNRESET:
1182			break;
1183
1184		default:
1185			bad_error("restarter_instance_update_states", err);
1186		}
1187
1188		if (cause != RSTOP_ERR_CFG)
1189			return (0);
1190	} else if (instance_is_wait_style(inst) && re == RERR_RESTART) {
1191		/*
1192		 * Stopping a wait service through means other than the pid
1193		 * exiting should keep wait_thread() from restarting the
1194		 * service, by removing it from the wait list.
1195		 * We cannot remove it right now otherwise the process will
1196		 * end up <defunct> so mark it to be ignored.
1197		 */
1198		wait_ignore_by_fmri(inst->ri_i.i_fmri);
1199	}
1200
1201	/*
1202	 * There are some configuration errors which we cannot detect until we
1203	 * try to run the method.  For example, see exec_method() where the
1204	 * restarter_set_method_context() call can return SMF_EXIT_ERR_CONFIG
1205	 * in several cases. If this happens for a "wait-style" svc,
1206	 * wait_remove() sets the cause as RSTOP_ERR_CFG so that we can detect
1207	 * the configuration error and go into maintenance, even though it is
1208	 * a "wait-style" svc.
1209	 */
1210	if (cause == RSTOP_ERR_CFG)
1211		new_state = RESTARTER_STATE_MAINT;
1212	else
1213		new_state = inst->ri_i.i_enabled ?
1214		    RESTARTER_STATE_OFFLINE : RESTARTER_STATE_DISABLED;
1215
1216	switch (err = restarter_instance_update_states(local_handle, inst,
1217	    inst->ri_i.i_state, new_state, RERR_NONE, reason)) {
1218	case 0:
1219	case ECONNRESET:
1220		break;
1221
1222	default:
1223		bad_error("restarter_instance_update_states", err);
1224	}
1225
1226	info = startd_zalloc(sizeof (fork_info_t));
1227
1228	info->sf_id = inst->ri_id;
1229	info->sf_method_type = METHOD_STOP;
1230	info->sf_event_type = re;
1231	info->sf_reason = reason;
1232	inst->ri_method_thread = startd_thread_create(method_thread, info);
1233
1234	return (0);
1235}
1236
1237/*
1238 * Returns
1239 *   ENOENT - fmri is not in instance_list
1240 *   0 - success
1241 *   ECONNRESET - success, though handle was rebound
1242 *   -1 - instance is in transition
1243 */
1244int
1245stop_instance_fmri(scf_handle_t *h, const char *fmri, uint_t flags)
1246{
1247	restarter_inst_t *rip;
1248	int r;
1249
1250	rip = inst_lookup_by_name(fmri);
1251	if (rip == NULL)
1252		return (ENOENT);
1253
1254	r = stop_instance(h, rip, flags);
1255
1256	MUTEX_UNLOCK(&rip->ri_lock);
1257
1258	return (r);
1259}
1260
1261static void
1262unmaintain_instance(scf_handle_t *h, restarter_inst_t *rip,
1263    unmaint_cause_t cause)
1264{
1265	ctid_t ctid;
1266	scf_instance_t *inst;
1267	int r;
1268	uint_t tries = 0, msecs = ALLOC_DELAY;
1269	const char *cp;
1270	restarter_str_t	reason;
1271
1272	assert(MUTEX_HELD(&rip->ri_lock));
1273
1274	if (rip->ri_i.i_state != RESTARTER_STATE_MAINT) {
1275		log_error(LOG_DEBUG, "Restarter: "
1276		    "Ignoring maintenance off command because %s is not in the "
1277		    "maintenance state.\n", rip->ri_i.i_fmri);
1278		return;
1279	}
1280
1281	switch (cause) {
1282	case RUNMAINT_CLEAR:
1283		cp = "clear requested";
1284		reason = restarter_str_clear_request;
1285		break;
1286	case RUNMAINT_DISABLE:
1287		cp = "disable requested";
1288		reason = restarter_str_disable_request;
1289		break;
1290	default:
1291#ifndef NDEBUG
1292		(void) fprintf(stderr, "Uncaught case for %d at %s:%d.\n",
1293		    cause, __FILE__, __LINE__);
1294#endif
1295		abort();
1296	}
1297
1298	log_instance(rip, B_TRUE, "Leaving maintenance because %s.",
1299	    cp);
1300	log_framework(LOG_DEBUG, "%s: Instance leaving maintenance because "
1301	    "%s.\n", rip->ri_i.i_fmri, cp);
1302
1303	(void) restarter_instance_update_states(h, rip, RESTARTER_STATE_UNINIT,
1304	    RESTARTER_STATE_NONE, RERR_RESTART, reason);
1305
1306	/*
1307	 * If we did ADMIN_MAINT_ON_IMMEDIATE, then there might still be
1308	 * a primary contract.
1309	 */
1310	if (rip->ri_i.i_primary_ctid == 0)
1311		return;
1312
1313	ctid = rip->ri_i.i_primary_ctid;
1314	contract_abandon(ctid);
1315	rip->ri_i.i_primary_ctid = 0;
1316
1317rep_retry:
1318	switch (r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst)) {
1319	case 0:
1320		break;
1321
1322	case ECONNABORTED:
1323		libscf_handle_rebind(h);
1324		goto rep_retry;
1325
1326	case ENOENT:
1327		/* Must have been deleted. */
1328		return;
1329
1330	case EINVAL:
1331	case ENOTSUP:
1332	default:
1333		bad_error("libscf_handle_rebind", r);
1334	}
1335
1336again:
1337	r = restarter_remove_contract(inst, ctid, RESTARTER_CONTRACT_PRIMARY);
1338	switch (r) {
1339	case 0:
1340		break;
1341
1342	case ENOMEM:
1343		++tries;
1344		if (tries < ALLOC_RETRY) {
1345			(void) poll(NULL, 0, msecs);
1346			msecs *= ALLOC_DELAY_MULT;
1347			goto again;
1348		}
1349
1350		uu_die("Insufficient memory.\n");
1351		/* NOTREACHED */
1352
1353	case ECONNABORTED:
1354		scf_instance_destroy(inst);
1355		libscf_handle_rebind(h);
1356		goto rep_retry;
1357
1358	case ECANCELED:
1359		break;
1360
1361	case EPERM:
1362	case EACCES:
1363	case EROFS:
1364		log_error(LOG_INFO,
1365		    "Could not remove contract id %lu for %s (%s).\n", ctid,
1366		    rip->ri_i.i_fmri, strerror(r));
1367		break;
1368
1369	case EINVAL:
1370	case EBADF:
1371	default:
1372		bad_error("restarter_remove_contract", r);
1373	}
1374
1375	scf_instance_destroy(inst);
1376}
1377
1378/*
1379 * enable_inst()
1380 *   Set inst->ri_i.i_enabled.  Expects 'e' to be _ENABLE, _DISABLE, or
1381 *   _ADMIN_DISABLE.  If the event is _ENABLE and inst is uninitialized or
1382 *   disabled, move it to offline.  If the event is _DISABLE or
1383 *   _ADMIN_DISABLE, make sure inst will move to disabled.
1384 *
1385 *   Returns
1386 *     0 - success
1387 *     ECONNRESET - h was rebound
1388 */
1389static int
1390enable_inst(scf_handle_t *h, restarter_inst_t *inst,
1391    restarter_instance_qentry_t *riq)
1392{
1393	restarter_instance_state_t state;
1394	restarter_event_type_t e = riq->riq_type;
1395	restarter_str_t reason = restarter_str_per_configuration;
1396	int r;
1397
1398	assert(MUTEX_HELD(&inst->ri_lock));
1399	assert(e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE ||
1400	    e == RESTARTER_EVENT_TYPE_DISABLE ||
1401	    e == RESTARTER_EVENT_TYPE_ENABLE);
1402	assert(instance_in_transition(inst) == 0);
1403
1404	state = inst->ri_i.i_state;
1405
1406	if (e == RESTARTER_EVENT_TYPE_ENABLE) {
1407		inst->ri_i.i_enabled = 1;
1408
1409		if (state == RESTARTER_STATE_UNINIT ||
1410		    state == RESTARTER_STATE_DISABLED) {
1411			/*
1412			 * B_FALSE: Don't log an error if the log_instance()
1413			 * fails because it will fail on the miniroot before
1414			 * install-discovery runs.
1415			 */
1416			log_instance(inst, B_FALSE, "Enabled.");
1417			log_framework(LOG_DEBUG, "%s: Instance enabled.\n",
1418			    inst->ri_i.i_fmri);
1419
1420			/*
1421			 * If we are coming from DISABLED, it was obviously an
1422			 * enable request. If we are coming from UNINIT, it may
1423			 * have been a sevice in MAINT that was cleared.
1424			 */
1425			if (riq->riq_reason == restarter_str_clear_request)
1426				reason = restarter_str_clear_request;
1427			else if (state == RESTARTER_STATE_DISABLED)
1428				reason = restarter_str_enable_request;
1429			(void) restarter_instance_update_states(h, inst,
1430			    RESTARTER_STATE_OFFLINE, RESTARTER_STATE_NONE,
1431			    RERR_NONE, reason);
1432		} else {
1433			log_framework(LOG_DEBUG, "Restarter: "
1434			    "Not changing state of %s for enable command.\n",
1435			    inst->ri_i.i_fmri);
1436		}
1437	} else {
1438		inst->ri_i.i_enabled = 0;
1439
1440		switch (state) {
1441		case RESTARTER_STATE_ONLINE:
1442		case RESTARTER_STATE_DEGRADED:
1443			r = stop_instance(h, inst, RSTOP_DISABLE);
1444			return (r == ECONNRESET ? 0 : r);
1445
1446		case RESTARTER_STATE_OFFLINE:
1447		case RESTARTER_STATE_UNINIT:
1448			if (inst->ri_i.i_primary_ctid != 0) {
1449				inst->ri_m_inst = safe_scf_instance_create(h);
1450				inst->ri_mi_deleted = B_FALSE;
1451
1452				libscf_reget_instance(inst);
1453				method_remove_contract(inst, B_TRUE, B_TRUE);
1454
1455				scf_instance_destroy(inst->ri_m_inst);
1456			}
1457			/* B_FALSE: See log_instance(..., "Enabled."); above */
1458			log_instance(inst, B_FALSE, "Disabled.");
1459			log_framework(LOG_DEBUG, "%s: Instance disabled.\n",
1460			    inst->ri_i.i_fmri);
1461
1462			/*
1463			 * If we are coming from OFFLINE, it was obviously a
1464			 * disable request. But if we are coming from
1465			 * UNINIT, it may have been a disable request for a
1466			 * service in MAINT.
1467			 */
1468			if (riq->riq_reason == restarter_str_disable_request ||
1469			    state == RESTARTER_STATE_OFFLINE)
1470				reason = restarter_str_disable_request;
1471			(void) restarter_instance_update_states(h, inst,
1472			    RESTARTER_STATE_DISABLED, RESTARTER_STATE_NONE,
1473			    RERR_RESTART, reason);
1474			return (0);
1475
1476		case RESTARTER_STATE_DISABLED:
1477			break;
1478
1479		case RESTARTER_STATE_MAINT:
1480			/*
1481			 * We only want to pull the instance out of maintenance
1482			 * if the disable is on adminstrative request.  The
1483			 * graph engine sends _DISABLE events whenever a
1484			 * service isn't in the disabled state, and we don't
1485			 * want to pull the service out of maintenance if,
1486			 * for example, it is there due to a dependency cycle.
1487			 */
1488			if (e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE)
1489				unmaintain_instance(h, inst, RUNMAINT_DISABLE);
1490			break;
1491
1492		default:
1493#ifndef NDEBUG
1494			(void) fprintf(stderr, "Restarter instance %s has "
1495			    "unknown state %d.\n", inst->ri_i.i_fmri, state);
1496#endif
1497			abort();
1498		}
1499	}
1500
1501	return (0);
1502}
1503
1504static void
1505start_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
1506    int32_t reason)
1507{
1508	fork_info_t *info;
1509	restarter_str_t	new_reason;
1510
1511	assert(MUTEX_HELD(&inst->ri_lock));
1512	assert(instance_in_transition(inst) == 0);
1513	assert(inst->ri_method_thread == 0);
1514
1515	log_framework(LOG_DEBUG, "%s: trying to start instance\n",
1516	    inst->ri_i.i_fmri);
1517
1518	/*
1519	 * We want to keep the original reason for restarts and clear actions
1520	 */
1521	switch (reason) {
1522	case restarter_str_restart_request:
1523	case restarter_str_clear_request:
1524		new_reason = reason;
1525		break;
1526	default:
1527		new_reason = restarter_str_dependencies_satisfied;
1528	}
1529
1530	/* Services in the disabled and maintenance state are ignored */
1531	if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1532	    inst->ri_i.i_state == RESTARTER_STATE_DISABLED ||
1533	    inst->ri_i.i_enabled == 0) {
1534		log_framework(LOG_DEBUG,
1535		    "%s: start_instance -> is maint/disabled\n",
1536		    inst->ri_i.i_fmri);
1537		return;
1538	}
1539
1540	/* Already started instances are left alone */
1541	if (instance_started(inst) == 1) {
1542		log_framework(LOG_DEBUG,
1543		    "%s: start_instance -> is already started\n",
1544		    inst->ri_i.i_fmri);
1545		return;
1546	}
1547
1548	log_framework(LOG_DEBUG, "%s: starting instance.\n", inst->ri_i.i_fmri);
1549
1550	(void) restarter_instance_update_states(local_handle, inst,
1551	    inst->ri_i.i_state, RESTARTER_STATE_ONLINE, RERR_NONE, new_reason);
1552
1553	info = startd_zalloc(sizeof (fork_info_t));
1554
1555	info->sf_id = inst->ri_id;
1556	info->sf_method_type = METHOD_START;
1557	info->sf_event_type = RERR_NONE;
1558	info->sf_reason = new_reason;
1559	inst->ri_method_thread = startd_thread_create(method_thread, info);
1560}
1561
1562static int
1563event_from_tty(scf_handle_t *h, restarter_inst_t *rip)
1564{
1565	scf_instance_t *inst;
1566	int ret = 0;
1567
1568	if (libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst))
1569		return (-1);
1570
1571	ret = restarter_inst_ractions_from_tty(inst);
1572
1573	scf_instance_destroy(inst);
1574	return (ret);
1575}
1576
1577static boolean_t
1578restart_dump(scf_handle_t *h, restarter_inst_t *rip)
1579{
1580	scf_instance_t *inst;
1581	boolean_t ret = B_FALSE;
1582
1583	if (libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst))
1584		return (-1);
1585
1586	if (restarter_inst_dump(inst) == 1)
1587		ret = B_TRUE;
1588
1589	scf_instance_destroy(inst);
1590	return (ret);
1591}
1592
1593static void
1594maintain_instance(scf_handle_t *h, restarter_inst_t *rip, int immediate,
1595    restarter_str_t reason)
1596{
1597	fork_info_t *info;
1598	scf_instance_t *scf_inst = NULL;
1599
1600	assert(MUTEX_HELD(&rip->ri_lock));
1601	assert(reason != restarter_str_none);
1602	assert(rip->ri_method_thread == 0);
1603
1604	log_instance(rip, B_TRUE, "Stopping for maintenance due to %s.",
1605	    restarter_get_str_short(reason));
1606	log_framework(LOG_DEBUG, "%s: stopping for maintenance due to %s.\n",
1607	    rip->ri_i.i_fmri, restarter_get_str_short(reason));
1608
1609	/* Services in the maintenance state are ignored */
1610	if (rip->ri_i.i_state == RESTARTER_STATE_MAINT) {
1611		log_framework(LOG_DEBUG,
1612		    "%s: maintain_instance -> is already in maintenance\n",
1613		    rip->ri_i.i_fmri);
1614		return;
1615	}
1616
1617	/*
1618	 * If reason state is restarter_str_service_request and
1619	 * restarter_actions/auxiliary_fmri property is set with a valid fmri,
1620	 * copy the fmri to restarter/auxiliary_fmri so svcs -x can use.
1621	 */
1622	if (reason == restarter_str_service_request &&
1623	    libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &scf_inst) == 0) {
1624		if (restarter_inst_validate_ractions_aux_fmri(scf_inst) == 0) {
1625			if (restarter_inst_set_aux_fmri(scf_inst))
1626				log_framework(LOG_DEBUG, "%s: "
1627				    "restarter_inst_set_aux_fmri failed: ",
1628				    rip->ri_i.i_fmri);
1629		} else {
1630			log_framework(LOG_DEBUG, "%s: "
1631			    "restarter_inst_validate_ractions_aux_fmri "
1632			    "failed: ", rip->ri_i.i_fmri);
1633
1634			if (restarter_inst_reset_aux_fmri(scf_inst))
1635				log_framework(LOG_DEBUG, "%s: "
1636				    "restarter_inst_reset_aux_fmri failed: ",
1637				    rip->ri_i.i_fmri);
1638		}
1639		scf_instance_destroy(scf_inst);
1640	}
1641
1642	if (immediate || !instance_started(rip)) {
1643		if (rip->ri_i.i_primary_ctid != 0) {
1644			rip->ri_m_inst = safe_scf_instance_create(h);
1645			rip->ri_mi_deleted = B_FALSE;
1646
1647			libscf_reget_instance(rip);
1648			method_remove_contract(rip, B_TRUE, B_TRUE);
1649
1650			scf_instance_destroy(rip->ri_m_inst);
1651		}
1652
1653		(void) restarter_instance_update_states(h, rip,
1654		    RESTARTER_STATE_MAINT, RESTARTER_STATE_NONE, RERR_RESTART,
1655		    reason);
1656		return;
1657	}
1658
1659	(void) restarter_instance_update_states(h, rip, rip->ri_i.i_state,
1660	    RESTARTER_STATE_MAINT, RERR_NONE, reason);
1661
1662	log_transition(rip, MAINT_REQUESTED);
1663
1664	info = startd_zalloc(sizeof (*info));
1665	info->sf_id = rip->ri_id;
1666	info->sf_method_type = METHOD_STOP;
1667	info->sf_event_type = RERR_RESTART;
1668	info->sf_reason = reason;
1669	rip->ri_method_thread = startd_thread_create(method_thread, info);
1670}
1671
1672static void
1673refresh_instance(scf_handle_t *h, restarter_inst_t *rip)
1674{
1675	scf_instance_t *inst;
1676	scf_snapshot_t *snap;
1677	fork_info_t *info;
1678	int r;
1679
1680	assert(MUTEX_HELD(&rip->ri_lock));
1681
1682	log_instance(rip, B_TRUE, "Rereading configuration.");
1683	log_framework(LOG_DEBUG, "%s: rereading configuration.\n",
1684	    rip->ri_i.i_fmri);
1685
1686rep_retry:
1687	r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst);
1688	switch (r) {
1689	case 0:
1690		break;
1691
1692	case ECONNABORTED:
1693		libscf_handle_rebind(h);
1694		goto rep_retry;
1695
1696	case ENOENT:
1697		/* Must have been deleted. */
1698		return;
1699
1700	case EINVAL:
1701	case ENOTSUP:
1702	default:
1703		bad_error("libscf_fmri_get_instance", r);
1704	}
1705
1706	snap = libscf_get_running_snapshot(inst);
1707
1708	r = libscf_get_startd_properties(inst, snap, &rip->ri_flags,
1709	    &rip->ri_utmpx_prefix);
1710	switch (r) {
1711	case 0:
1712		log_framework(LOG_DEBUG, "%s is a %s-style service\n",
1713		    rip->ri_i.i_fmri, service_style(rip->ri_flags));
1714		break;
1715
1716	case ECONNABORTED:
1717		scf_instance_destroy(inst);
1718		scf_snapshot_destroy(snap);
1719		libscf_handle_rebind(h);
1720		goto rep_retry;
1721
1722	case ECANCELED:
1723	case ENOENT:
1724		/* Succeed in anticipation of REMOVE_INSTANCE. */
1725		break;
1726
1727	default:
1728		bad_error("libscf_get_startd_properties", r);
1729	}
1730
1731	if (instance_started(rip)) {
1732		/* Refresh does not change the state. */
1733		(void) restarter_instance_update_states(h, rip,
1734		    rip->ri_i.i_state, rip->ri_i.i_state, RERR_NONE,
1735		    restarter_str_refresh);
1736
1737		info = startd_zalloc(sizeof (*info));
1738		info->sf_id = rip->ri_id;
1739		info->sf_method_type = METHOD_REFRESH;
1740		info->sf_event_type = RERR_REFRESH;
1741		info->sf_reason = 0;
1742
1743		assert(rip->ri_method_thread == 0);
1744		rip->ri_method_thread =
1745		    startd_thread_create(method_thread, info);
1746	}
1747
1748	scf_snapshot_destroy(snap);
1749	scf_instance_destroy(inst);
1750}
1751
1752const char *event_names[] = { "INVALID", "ADD_INSTANCE", "REMOVE_INSTANCE",
1753	"ENABLE", "DISABLE", "ADMIN_DEGRADED", "ADMIN_REFRESH",
1754	"ADMIN_RESTART", "ADMIN_MAINT_OFF", "ADMIN_MAINT_ON",
1755	"ADMIN_MAINT_ON_IMMEDIATE", "STOP", "START", "DEPENDENCY_CYCLE",
1756	"INVALID_DEPENDENCY", "ADMIN_DISABLE", "STOP_RESET"
1757};
1758
1759/*
1760 * void *restarter_process_events()
1761 *
1762 *   Called in a separate thread to process the events on an instance's
1763 *   queue.  Empties the queue completely, and tries to keep the thread
1764 *   around for a little while after the queue is empty to save on
1765 *   startup costs.
1766 */
1767static void *
1768restarter_process_events(void *arg)
1769{
1770	scf_handle_t *h;
1771	restarter_instance_qentry_t *event;
1772	restarter_inst_t *rip;
1773	char *fmri = (char *)arg;
1774	struct timespec to;
1775
1776	(void) pthread_setname_np(pthread_self(), "restarter_process_events");
1777
1778	assert(fmri != NULL);
1779
1780	h = libscf_handle_create_bound_loop();
1781
1782	/* grab the queue lock */
1783	rip = inst_lookup_queue(fmri);
1784	if (rip == NULL)
1785		goto out;
1786
1787again:
1788
1789	while ((event = uu_list_first(rip->ri_queue)) != NULL) {
1790		restarter_inst_t *inst;
1791
1792		/* drop the queue lock */
1793		MUTEX_UNLOCK(&rip->ri_queue_lock);
1794
1795		/*
1796		 * Grab the inst lock -- this waits until any outstanding
1797		 * method finishes running.
1798		 */
1799		inst = inst_lookup_by_name(fmri);
1800		if (inst == NULL) {
1801			/* Getting deleted in the middle isn't an error. */
1802			goto cont;
1803		}
1804
1805		assert(instance_in_transition(inst) == 0);
1806
1807		/* process the event */
1808		switch (event->riq_type) {
1809		case RESTARTER_EVENT_TYPE_ENABLE:
1810		case RESTARTER_EVENT_TYPE_DISABLE:
1811			(void) enable_inst(h, inst, event);
1812			break;
1813
1814		case RESTARTER_EVENT_TYPE_ADMIN_DISABLE:
1815			if (enable_inst(h, inst, event) == 0)
1816				reset_start_times(inst);
1817			break;
1818
1819		case RESTARTER_EVENT_TYPE_REMOVE_INSTANCE:
1820			restarter_delete_inst(inst);
1821			inst = NULL;
1822			goto cont;
1823
1824		case RESTARTER_EVENT_TYPE_STOP_RESET:
1825			reset_start_times(inst);
1826			/* FALLTHROUGH */
1827		case RESTARTER_EVENT_TYPE_STOP:
1828			(void) stop_instance(h, inst, RSTOP_DEPENDENCY);
1829			break;
1830
1831		case RESTARTER_EVENT_TYPE_START:
1832			start_instance(h, inst, event->riq_reason);
1833			break;
1834
1835		case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1836			maintain_instance(h, inst, 0,
1837			    restarter_str_dependency_cycle);
1838			break;
1839
1840		case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1841			maintain_instance(h, inst, 0,
1842			    restarter_str_invalid_dependency);
1843			break;
1844
1845		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1846			if (event_from_tty(h, inst) == 0)
1847				maintain_instance(h, inst, 0,
1848				    restarter_str_service_request);
1849			else
1850				maintain_instance(h, inst, 0,
1851				    restarter_str_administrative_request);
1852			break;
1853
1854		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1855			if (event_from_tty(h, inst) == 0)
1856				maintain_instance(h, inst, 1,
1857				    restarter_str_service_request);
1858			else
1859				maintain_instance(h, inst, 1,
1860				    restarter_str_administrative_request);
1861			break;
1862
1863		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1864			unmaintain_instance(h, inst, RUNMAINT_CLEAR);
1865			reset_start_times(inst);
1866			break;
1867
1868		case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1869			refresh_instance(h, inst);
1870			break;
1871
1872		case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1873			log_framework(LOG_WARNING, "Restarter: "
1874			    "%s command (for %s) unimplemented.\n",
1875			    event_names[event->riq_type], inst->ri_i.i_fmri);
1876			break;
1877
1878		case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1879			if (!instance_started(inst)) {
1880				log_framework(LOG_DEBUG, "Restarter: "
1881				    "Not restarting %s; not running.\n",
1882				    inst->ri_i.i_fmri);
1883			} else {
1884				/*
1885				 * Stop the instance.  If it can be restarted,
1886				 * the graph engine will send a new event.
1887				 */
1888				if (restart_dump(h, inst)) {
1889					(void) contract_kill(
1890					    inst->ri_i.i_primary_ctid, SIGABRT,
1891					    inst->ri_i.i_fmri);
1892				} else if (stop_instance(h, inst,
1893				    RSTOP_RESTART) == 0) {
1894					reset_start_times(inst);
1895				}
1896			}
1897			break;
1898
1899		case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
1900		default:
1901#ifndef NDEBUG
1902			uu_warn("%s:%d: Bad restarter event %d.  "
1903			    "Aborting.\n", __FILE__, __LINE__, event->riq_type);
1904#endif
1905			abort();
1906		}
1907
1908		assert(inst != NULL);
1909		MUTEX_UNLOCK(&inst->ri_lock);
1910
1911cont:
1912		/* grab the queue lock */
1913		rip = inst_lookup_queue(fmri);
1914		if (rip == NULL)
1915			goto out;
1916
1917		/* delete the event */
1918		uu_list_remove(rip->ri_queue, event);
1919		startd_free(event, sizeof (restarter_instance_qentry_t));
1920	}
1921
1922	assert(rip != NULL);
1923
1924	/*
1925	 * Try to preserve the thread for a little while for future use.
1926	 */
1927	to.tv_sec = 3;
1928	to.tv_nsec = 0;
1929	(void) pthread_cond_reltimedwait_np(&rip->ri_queue_cv,
1930	    &rip->ri_queue_lock, &to);
1931
1932	if (uu_list_first(rip->ri_queue) != NULL)
1933		goto again;
1934
1935	rip->ri_queue_thread = 0;
1936	MUTEX_UNLOCK(&rip->ri_queue_lock);
1937
1938out:
1939	(void) scf_handle_unbind(h);
1940	scf_handle_destroy(h);
1941	free(fmri);
1942	return (NULL);
1943}
1944
1945static int
1946is_admin_event(restarter_event_type_t t)
1947{
1948	switch (t) {
1949	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1950	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1951	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1952	case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1953	case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1954	case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1955		return (1);
1956	default:
1957		return (0);
1958	}
1959}
1960
1961static void
1962restarter_queue_event(restarter_inst_t *ri, restarter_protocol_event_t *e)
1963{
1964	restarter_instance_qentry_t *qe;
1965	int r;
1966
1967	assert(MUTEX_HELD(&ri->ri_queue_lock));
1968	assert(!MUTEX_HELD(&ri->ri_lock));
1969
1970	qe = startd_zalloc(sizeof (restarter_instance_qentry_t));
1971	qe->riq_type = e->rpe_type;
1972	qe->riq_reason = e->rpe_reason;
1973
1974	uu_list_node_init(qe, &qe->riq_link, restarter_queue_pool);
1975	r = uu_list_insert_before(ri->ri_queue, NULL, qe);
1976	assert(r == 0);
1977}
1978
1979/*
1980 * void *restarter_event_thread()
1981 *
1982 *  Handle incoming graph events by placing them on a per-instance
1983 *  queue.  We can't lock the main part of the instance structure, so
1984 *  just modify the seprarately locked event queue portion.
1985 */
1986/*ARGSUSED*/
1987static void *
1988restarter_event_thread(void *unused)
1989{
1990	scf_handle_t *h;
1991
1992	(void) pthread_setname_np(pthread_self(), "restarter_event");
1993
1994	/*
1995	 * This is a new thread, and thus, gets its own handle
1996	 * to the repository.
1997	 */
1998	h = libscf_handle_create_bound_loop();
1999
2000	MUTEX_LOCK(&ru->restarter_update_lock);
2001
2002	/*CONSTCOND*/
2003	while (1) {
2004		restarter_protocol_event_t *e;
2005
2006		while (ru->restarter_update_wakeup == 0)
2007			(void) pthread_cond_wait(&ru->restarter_update_cv,
2008			    &ru->restarter_update_lock);
2009
2010		ru->restarter_update_wakeup = 0;
2011
2012		while ((e = restarter_event_dequeue()) != NULL) {
2013			restarter_inst_t *rip;
2014			char *fmri;
2015
2016			MUTEX_UNLOCK(&ru->restarter_update_lock);
2017
2018			/*
2019			 * ADD_INSTANCE is special: there's likely no
2020			 * instance structure yet, so we need to handle the
2021			 * addition synchronously.
2022			 */
2023			switch (e->rpe_type) {
2024			case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
2025				if (restarter_insert_inst(h, e->rpe_inst) != 0)
2026					log_error(LOG_INFO, "Restarter: "
2027					    "Could not add %s.\n", e->rpe_inst);
2028
2029				MUTEX_LOCK(&st->st_load_lock);
2030				if (--st->st_load_instances == 0)
2031					(void) pthread_cond_broadcast(
2032					    &st->st_load_cv);
2033				MUTEX_UNLOCK(&st->st_load_lock);
2034
2035				goto nolookup;
2036			}
2037
2038			/*
2039			 * Lookup the instance, locking only the event queue.
2040			 * Can't grab ri_lock here because it might be held
2041			 * by a long-running method.
2042			 */
2043			rip = inst_lookup_queue(e->rpe_inst);
2044			if (rip == NULL) {
2045				log_error(LOG_INFO, "Restarter: "
2046				    "Ignoring %s command for unknown service "
2047				    "%s.\n", event_names[e->rpe_type],
2048				    e->rpe_inst);
2049				goto nolookup;
2050			}
2051
2052			/* Keep ADMIN events from filling up the queue. */
2053			if (is_admin_event(e->rpe_type) &&
2054			    uu_list_numnodes(rip->ri_queue) >
2055			    RINST_QUEUE_THRESHOLD) {
2056				MUTEX_UNLOCK(&rip->ri_queue_lock);
2057				log_instance(rip, B_TRUE, "Instance event "
2058				    "queue overflow.  Dropping administrative "
2059				    "request.");
2060				log_framework(LOG_DEBUG, "%s: Instance event "
2061				    "queue overflow.  Dropping administrative "
2062				    "request.\n", rip->ri_i.i_fmri);
2063				goto nolookup;
2064			}
2065
2066			/* Now add the event to the instance queue. */
2067			restarter_queue_event(rip, e);
2068
2069			if (rip->ri_queue_thread == 0) {
2070				/*
2071				 * Start a thread if one isn't already
2072				 * running.
2073				 */
2074				fmri = safe_strdup(e->rpe_inst);
2075				rip->ri_queue_thread =  startd_thread_create(
2076				    restarter_process_events, (void *)fmri);
2077			} else {
2078				/*
2079				 * Signal the existing thread that there's
2080				 * a new event.
2081				 */
2082				(void) pthread_cond_broadcast(
2083				    &rip->ri_queue_cv);
2084			}
2085
2086			MUTEX_UNLOCK(&rip->ri_queue_lock);
2087nolookup:
2088			restarter_event_release(e);
2089
2090			MUTEX_LOCK(&ru->restarter_update_lock);
2091		}
2092	}
2093}
2094
2095static restarter_inst_t *
2096contract_to_inst(ctid_t ctid)
2097{
2098	restarter_inst_t *inst;
2099	int id;
2100
2101	id = lookup_inst_by_contract(ctid);
2102	if (id == -1)
2103		return (NULL);
2104
2105	inst = inst_lookup_by_id(id);
2106	if (inst != NULL) {
2107		/*
2108		 * Since ri_lock isn't held by the contract id lookup, this
2109		 * instance may have been restarted and now be in a new
2110		 * contract, making the old contract no longer valid for this
2111		 * instance.
2112		 */
2113		if (ctid != inst->ri_i.i_primary_ctid) {
2114			MUTEX_UNLOCK(&inst->ri_lock);
2115			inst = NULL;
2116		}
2117	}
2118	return (inst);
2119}
2120
2121/*
2122 * void contract_action()
2123 *   Take action on contract events.
2124 */
2125static void
2126contract_action(scf_handle_t *h, restarter_inst_t *inst, ctid_t id,
2127    uint32_t type)
2128{
2129	const char *fmri = inst->ri_i.i_fmri;
2130
2131	assert(MUTEX_HELD(&inst->ri_lock));
2132
2133	/*
2134	 * If startd has stopped this contract, there is no need to
2135	 * stop it again.
2136	 */
2137	if (inst->ri_i.i_primary_ctid > 0 &&
2138	    inst->ri_i.i_primary_ctid_stopped)
2139		return;
2140
2141	if ((type & (CT_PR_EV_EMPTY | CT_PR_EV_CORE | CT_PR_EV_SIGNAL
2142	    | CT_PR_EV_HWERR)) == 0) {
2143		/*
2144		 * There shouldn't be other events, since that's not how we set
2145		 * the terms. Thus, just log an error and drive on.
2146		 */
2147		log_framework(LOG_NOTICE,
2148		    "%s: contract %ld received unexpected critical event "
2149		    "(%d)\n", fmri, id, type);
2150		return;
2151	}
2152
2153	assert(instance_in_transition(inst) == 0);
2154
2155	if (instance_is_wait_style(inst)) {
2156		/*
2157		 * We ignore all events; if they impact the
2158		 * process we're monitoring, then the
2159		 * wait_thread will stop the instance.
2160		 */
2161		log_framework(LOG_DEBUG,
2162		    "%s: ignoring contract event on wait-style service\n",
2163		    fmri);
2164	} else {
2165		/*
2166		 * A CT_PR_EV_EMPTY event is an RSTOP_EXIT request.
2167		 */
2168		switch (type) {
2169		case CT_PR_EV_EMPTY:
2170			(void) stop_instance(h, inst, RSTOP_EXIT);
2171			break;
2172		case CT_PR_EV_CORE:
2173			(void) stop_instance(h, inst, RSTOP_CORE);
2174			break;
2175		case CT_PR_EV_SIGNAL:
2176			(void) stop_instance(h, inst, RSTOP_SIGNAL);
2177			break;
2178		case CT_PR_EV_HWERR:
2179			(void) stop_instance(h, inst, RSTOP_HWERR);
2180			break;
2181		}
2182	}
2183}
2184
2185/*
2186 * void *restarter_contract_event_thread(void *)
2187 *   Listens to the process contract bundle for critical events, taking action
2188 *   on events from contracts we know we are responsible for.
2189 */
2190/*ARGSUSED*/
2191static void *
2192restarter_contracts_event_thread(void *unused)
2193{
2194	int fd, err;
2195	scf_handle_t *local_handle;
2196
2197	(void) pthread_setname_np(pthread_self(), "restarter_contracts_event");
2198
2199	/*
2200	 * Await graph load completion.  That is, stop here, until we've scanned
2201	 * the repository for contract - instance associations.
2202	 */
2203	MUTEX_LOCK(&st->st_load_lock);
2204	while (!(st->st_load_complete && st->st_load_instances == 0))
2205		(void) pthread_cond_wait(&st->st_load_cv, &st->st_load_lock);
2206	MUTEX_UNLOCK(&st->st_load_lock);
2207
2208	/*
2209	 * This is a new thread, and thus, gets its own handle
2210	 * to the repository.
2211	 */
2212	if ((local_handle = libscf_handle_create_bound(SCF_VERSION)) == NULL)
2213		uu_die("Unable to bind a new repository handle: %s\n",
2214		    scf_strerror(scf_error()));
2215
2216	fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
2217	if (fd == -1)
2218		uu_die("process bundle open failed");
2219
2220	/*
2221	 * Make sure we get all events (including those generated by configd
2222	 * before this thread was started).
2223	 */
2224	err = ct_event_reset(fd);
2225	assert(err == 0);
2226
2227	for (;;) {
2228		int efd, sfd;
2229		ct_evthdl_t ev;
2230		uint32_t type;
2231		ctevid_t evid;
2232		ct_stathdl_t status;
2233		ctid_t ctid;
2234		restarter_inst_t *inst;
2235		uint64_t cookie;
2236
2237		if (err = ct_event_read_critical(fd, &ev)) {
2238			log_error(LOG_WARNING,
2239			    "Error reading next contract event: %s",
2240			    strerror(err));
2241			continue;
2242		}
2243
2244		evid = ct_event_get_evid(ev);
2245		ctid = ct_event_get_ctid(ev);
2246		type = ct_event_get_type(ev);
2247
2248		/* Fetch cookie. */
2249		if ((sfd = contract_open(ctid, "process", "status", O_RDONLY))
2250		    < 0) {
2251			ct_event_free(ev);
2252			continue;
2253		}
2254
2255		if (err = ct_status_read(sfd, CTD_COMMON, &status)) {
2256			log_framework(LOG_WARNING, "Could not get status for "
2257			    "contract %ld: %s\n", ctid, strerror(err));
2258
2259			startd_close(sfd);
2260			ct_event_free(ev);
2261			continue;
2262		}
2263
2264		cookie = ct_status_get_cookie(status);
2265
2266		log_framework(LOG_DEBUG, "Received event %d for ctid %ld "
2267		    "cookie %lld\n", type, ctid, cookie);
2268
2269		ct_status_free(status);
2270
2271		startd_close(sfd);
2272
2273		/*
2274		 * svc.configd(1M) restart handling performed by the
2275		 * fork_configd_thread.  We don't acknowledge, as that thread
2276		 * will do so.
2277		 */
2278		if (cookie == CONFIGD_COOKIE) {
2279			ct_event_free(ev);
2280			continue;
2281		}
2282
2283		inst = NULL;
2284		if (storing_contract != 0 &&
2285		    (inst = contract_to_inst(ctid)) == NULL) {
2286			/*
2287			 * This can happen for two reasons:
2288			 * - method_run() has not yet stored the
2289			 *    the contract into the internal hash table.
2290			 * - we receive an EMPTY event for an abandoned
2291			 *    contract.
2292			 * If there is any contract in the process of
2293			 * being stored into the hash table then re-read
2294			 * the event later.
2295			 */
2296			log_framework(LOG_DEBUG,
2297			    "Reset event %d for unknown "
2298			    "contract id %ld\n", type, ctid);
2299
2300			/* don't go too fast */
2301			(void) poll(NULL, 0, 100);
2302
2303			(void) ct_event_reset(fd);
2304			ct_event_free(ev);
2305			continue;
2306		}
2307
2308		/*
2309		 * Do not call contract_to_inst() again if first
2310		 * call succeeded.
2311		 */
2312		if (inst == NULL)
2313			inst = contract_to_inst(ctid);
2314		if (inst == NULL) {
2315			/*
2316			 * This can happen if we receive an EMPTY
2317			 * event for an abandoned contract.
2318			 */
2319			log_framework(LOG_DEBUG,
2320			    "Received event %d for unknown contract id "
2321			    "%ld\n", type, ctid);
2322		} else {
2323			log_framework(LOG_DEBUG,
2324			    "Received event %d for contract id "
2325			    "%ld (%s)\n", type, ctid,
2326			    inst->ri_i.i_fmri);
2327
2328			contract_action(local_handle, inst, ctid, type);
2329
2330			MUTEX_UNLOCK(&inst->ri_lock);
2331		}
2332
2333		efd = contract_open(ct_event_get_ctid(ev), "process", "ctl",
2334		    O_WRONLY);
2335		if (efd != -1) {
2336			(void) ct_ctl_ack(efd, evid);
2337			startd_close(efd);
2338		}
2339
2340		ct_event_free(ev);
2341
2342	}
2343
2344	/*NOTREACHED*/
2345	return (NULL);
2346}
2347
2348/*
2349 * Timeout queue, processed by restarter_timeouts_event_thread().
2350 */
2351timeout_queue_t *timeouts;
2352static uu_list_pool_t *timeout_pool;
2353
2354typedef struct timeout_update {
2355	pthread_mutex_t		tu_lock;
2356	pthread_cond_t		tu_cv;
2357	int			tu_wakeup;
2358} timeout_update_t;
2359
2360timeout_update_t *tu;
2361
2362static const char *timeout_ovr_svcs[] = {
2363	"svc:/system/manifest-import:default",
2364	"svc:/network/initial:default",
2365	"svc:/network/service:default",
2366	"svc:/system/rmtmpfiles:default",
2367	"svc:/network/loopback:default",
2368	"svc:/network/physical:default",
2369	"svc:/system/device/local:default",
2370	"svc:/system/filesystem/usr:default",
2371	"svc:/system/filesystem/minimal:default",
2372	"svc:/system/filesystem/local:default",
2373	NULL
2374};
2375
2376int
2377is_timeout_ovr(restarter_inst_t *inst)
2378{
2379	int i;
2380
2381	for (i = 0; timeout_ovr_svcs[i] != NULL; ++i) {
2382		if (strcmp(inst->ri_i.i_fmri, timeout_ovr_svcs[i]) == 0) {
2383			log_instance(inst, B_TRUE, "Timeout override by "
2384			    "svc.startd.  Using infinite timeout.");
2385			return (1);
2386		}
2387	}
2388
2389	return (0);
2390}
2391
2392/*ARGSUSED*/
2393static int
2394timeout_compare(const void *lc_arg, const void *rc_arg, void *private)
2395{
2396	hrtime_t t1 = ((const timeout_entry_t *)lc_arg)->te_timeout;
2397	hrtime_t t2 = ((const timeout_entry_t *)rc_arg)->te_timeout;
2398
2399	if (t1 > t2)
2400		return (1);
2401	else if (t1 < t2)
2402		return (-1);
2403	return (0);
2404}
2405
2406void
2407timeout_init()
2408{
2409	timeouts = startd_zalloc(sizeof (timeout_queue_t));
2410
2411	(void) pthread_mutex_init(&timeouts->tq_lock, &mutex_attrs);
2412
2413	timeout_pool = startd_list_pool_create("timeouts",
2414	    sizeof (timeout_entry_t), offsetof(timeout_entry_t, te_link),
2415	    timeout_compare, UU_LIST_POOL_DEBUG);
2416	assert(timeout_pool != NULL);
2417
2418	timeouts->tq_list = startd_list_create(timeout_pool,
2419	    timeouts, UU_LIST_SORTED);
2420	assert(timeouts->tq_list != NULL);
2421
2422	tu = startd_zalloc(sizeof (timeout_update_t));
2423	(void) pthread_cond_init(&tu->tu_cv, NULL);
2424	(void) pthread_mutex_init(&tu->tu_lock, &mutex_attrs);
2425}
2426
2427void
2428timeout_insert(restarter_inst_t *inst, ctid_t cid, uint64_t timeout_sec)
2429{
2430	hrtime_t now, timeout;
2431	timeout_entry_t *entry;
2432	uu_list_index_t idx;
2433
2434	assert(MUTEX_HELD(&inst->ri_lock));
2435
2436	now = gethrtime();
2437
2438	/*
2439	 * If we overflow LLONG_MAX, we're never timing out anyways, so
2440	 * just return.
2441	 */
2442	if (timeout_sec >= (LLONG_MAX - now) / 1000000000LL) {
2443		log_instance(inst, B_TRUE, "timeout_seconds too large, "
2444		    "treating as infinite.");
2445		return;
2446	}
2447
2448	/* hrtime is in nanoseconds. Convert timeout_sec. */
2449	timeout = now + (timeout_sec * 1000000000LL);
2450
2451	entry = startd_alloc(sizeof (timeout_entry_t));
2452	entry->te_timeout = timeout;
2453	entry->te_ctid = cid;
2454	entry->te_fmri = safe_strdup(inst->ri_i.i_fmri);
2455	entry->te_logstem = safe_strdup(inst->ri_logstem);
2456	entry->te_fired = 0;
2457	/* Insert the calculated timeout time onto the queue. */
2458	MUTEX_LOCK(&timeouts->tq_lock);
2459	(void) uu_list_find(timeouts->tq_list, entry, NULL, &idx);
2460	uu_list_node_init(entry, &entry->te_link, timeout_pool);
2461	uu_list_insert(timeouts->tq_list, entry, idx);
2462	MUTEX_UNLOCK(&timeouts->tq_lock);
2463
2464	assert(inst->ri_timeout == NULL);
2465	inst->ri_timeout = entry;
2466
2467	MUTEX_LOCK(&tu->tu_lock);
2468	tu->tu_wakeup = 1;
2469	(void) pthread_cond_broadcast(&tu->tu_cv);
2470	MUTEX_UNLOCK(&tu->tu_lock);
2471}
2472
2473
2474void
2475timeout_remove(restarter_inst_t *inst, ctid_t cid)
2476{
2477	assert(MUTEX_HELD(&inst->ri_lock));
2478
2479	if (inst->ri_timeout == NULL)
2480		return;
2481
2482	assert(inst->ri_timeout->te_ctid == cid);
2483
2484	MUTEX_LOCK(&timeouts->tq_lock);
2485	uu_list_remove(timeouts->tq_list, inst->ri_timeout);
2486	MUTEX_UNLOCK(&timeouts->tq_lock);
2487
2488	free(inst->ri_timeout->te_fmri);
2489	free(inst->ri_timeout->te_logstem);
2490	startd_free(inst->ri_timeout, sizeof (timeout_entry_t));
2491	inst->ri_timeout = NULL;
2492}
2493
2494static int
2495timeout_now()
2496{
2497	timeout_entry_t *e;
2498	hrtime_t now;
2499	int ret;
2500
2501	now = gethrtime();
2502
2503	/*
2504	 * Walk through the (sorted) timeouts list.  While the timeout
2505	 * at the head of the list is <= the current time, kill the
2506	 * method.
2507	 */
2508	MUTEX_LOCK(&timeouts->tq_lock);
2509
2510	for (e = uu_list_first(timeouts->tq_list);
2511	    e != NULL && e->te_timeout <= now;
2512	    e = uu_list_next(timeouts->tq_list, e)) {
2513		log_framework(LOG_WARNING, "%s: Method or service exit timed "
2514		    "out.  Killing contract %ld.\n", e->te_fmri, e->te_ctid);
2515		log_instance_fmri(e->te_fmri, e->te_logstem, B_TRUE,
2516		    "Method or service exit timed out.  Killing contract %ld.",
2517		    e->te_ctid);
2518		e->te_fired = 1;
2519		(void) contract_kill(e->te_ctid, SIGKILL, e->te_fmri);
2520	}
2521
2522	if (uu_list_numnodes(timeouts->tq_list) > 0)
2523		ret = 0;
2524	else
2525		ret = -1;
2526
2527	MUTEX_UNLOCK(&timeouts->tq_lock);
2528
2529	return (ret);
2530}
2531
2532/*
2533 * void *restarter_timeouts_event_thread(void *)
2534 *   Responsible for monitoring the method timeouts.  This thread must
2535 *   be started before any methods are called.
2536 */
2537/*ARGSUSED*/
2538static void *
2539restarter_timeouts_event_thread(void *unused)
2540{
2541	/*
2542	 * Timeouts are entered on a priority queue, which is processed by
2543	 * this thread.  As timeouts are specified in seconds, we'll do
2544	 * the necessary processing every second, as long as the queue
2545	 * is not empty.
2546	 */
2547
2548	(void) pthread_setname_np(pthread_self(), "restarter_timeouts_event");
2549
2550	/*CONSTCOND*/
2551	while (1) {
2552		/*
2553		 * As long as the timeout list isn't empty, process it
2554		 * every second.
2555		 */
2556		if (timeout_now() == 0) {
2557			(void) sleep(1);
2558			continue;
2559		}
2560
2561		/* The list is empty, wait until we have more timeouts. */
2562		MUTEX_LOCK(&tu->tu_lock);
2563
2564		while (tu->tu_wakeup == 0)
2565			(void) pthread_cond_wait(&tu->tu_cv, &tu->tu_lock);
2566
2567		tu->tu_wakeup = 0;
2568		MUTEX_UNLOCK(&tu->tu_lock);
2569	}
2570
2571	return (NULL);
2572}
2573
2574void
2575restarter_start()
2576{
2577	(void) startd_thread_create(restarter_timeouts_event_thread, NULL);
2578	(void) startd_thread_create(restarter_event_thread, NULL);
2579	(void) startd_thread_create(restarter_contracts_event_thread, NULL);
2580	(void) startd_thread_create(wait_thread, NULL);
2581}
2582
2583
2584void
2585restarter_init()
2586{
2587	restarter_instance_pool = startd_list_pool_create("restarter_instances",
2588	    sizeof (restarter_inst_t), offsetof(restarter_inst_t,
2589	    ri_link), restarter_instance_compare, UU_LIST_POOL_DEBUG);
2590	(void) memset(&instance_list, 0, sizeof (instance_list));
2591
2592	(void) pthread_mutex_init(&instance_list.ril_lock, &mutex_attrs);
2593	instance_list.ril_instance_list = startd_list_create(
2594	    restarter_instance_pool, &instance_list, UU_LIST_SORTED);
2595
2596	restarter_queue_pool = startd_list_pool_create(
2597	    "restarter_instance_queue", sizeof (restarter_instance_qentry_t),
2598	    offsetof(restarter_instance_qentry_t,  riq_link), NULL,
2599	    UU_LIST_POOL_DEBUG);
2600
2601	contract_list_pool = startd_list_pool_create(
2602	    "contract_list", sizeof (contract_entry_t),
2603	    offsetof(contract_entry_t,  ce_link), NULL,
2604	    UU_LIST_POOL_DEBUG);
2605	contract_hash_init();
2606
2607	log_framework(LOG_DEBUG, "Initialized restarter\n");
2608}
2609