1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 *
25 * Copyright 2016-2018 RackTop Systems.
26 */
27
28
29/*
30 * transition.c - Graph State Machine
31 *
32 * The graph state machine is implemented here, with a typical approach
33 * of a function per state.  Separating the implementation allows more
34 * clarity into the actions taken on notification of state change, as well
35 * as a place for future expansion including hooks for configurable actions.
36 * All functions are called with dgraph_lock held.
37 *
38 * The start action for this state machine is not explicit.  The states
39 * (ONLINE and DEGRADED) which need to know when they're entering the state
40 * due to a daemon restart implement this understanding by checking for
41 * transition from uninitialized.  In the future, this would likely be better
42 * as an explicit start action instead of relying on an overloaded transition.
43 *
44 * All gt_enter functions use the same set of return codes.
45 *    0              success
46 *    ECONNABORTED   repository connection aborted
47 */
48
49#include "startd.h"
50
51static int
52gt_running(restarter_instance_state_t state)
53{
54	if (state == RESTARTER_STATE_ONLINE ||
55	    state == RESTARTER_STATE_DEGRADED)
56		return (1);
57
58	return (0);
59}
60
61static int
62gt_enter_uninit(scf_handle_t *h, graph_vertex_t *v,
63    restarter_instance_state_t old_state, restarter_error_t rerr)
64{
65	int err;
66	scf_instance_t *inst;
67
68	/* Initialize instance by refreshing it. */
69
70	err = libscf_fmri_get_instance(h, v->gv_name, &inst);
71	switch (err) {
72	case 0:
73		break;
74
75	case ECONNABORTED:
76		return (ECONNABORTED);
77
78	case ENOENT:
79		return (0);
80
81	case EINVAL:
82	case ENOTSUP:
83	default:
84		bad_error("libscf_fmri_get_instance", err);
85	}
86
87	err = refresh_vertex(v, inst);
88	if (err == 0)
89		graph_enable_by_vertex(v, v->gv_flags & GV_ENABLED, 0);
90
91	scf_instance_destroy(inst);
92
93	/* If the service was running, propagate a stop event. */
94	if (gt_running(old_state)) {
95		log_framework(LOG_DEBUG, "Propagating stop of %s.\n",
96		    v->gv_name);
97
98		graph_transition_propagate(v, PROPAGATE_STOP, rerr);
99	}
100
101	graph_transition_sulogin(RESTARTER_STATE_UNINIT, old_state);
102	return (0);
103}
104
105/* ARGSUSED */
106static int
107gt_enter_maint(scf_handle_t *h, graph_vertex_t *v,
108    restarter_instance_state_t old_state, restarter_error_t rerr)
109{
110	int to_offline = v->gv_flags & GV_TOOFFLINE;
111
112	/*
113	 * If the service was running, propagate a stop event.  If the
114	 * service was not running the maintenance transition may satisfy
115	 * optional dependencies and should be propagated to determine
116	 * whether new dependents are satisfiable.
117	 * Instances that transition to maintenance and have the GV_TOOFFLINE
118	 * flag are special because they can expose new subtree leaves so
119	 * propagate the offline to the instance dependencies.
120	 */
121
122	/* instance transitioning to maintenance is considered disabled */
123	v->gv_flags &= ~GV_TODISABLE;
124	v->gv_flags &= ~GV_TOOFFLINE;
125
126	if (gt_running(old_state)) {
127		/*
128		 * Handle state change during instance disabling.
129		 * Propagate offline to the new exposed leaves.
130		 */
131		if (to_offline) {
132			log_framework(LOG_DEBUG, "%s removed from subtree\n",
133			    v->gv_name);
134
135			graph_offline_subtree_leaves(v, (void *)h);
136		}
137
138		log_framework(LOG_DEBUG, "Propagating maintenance (stop) of "
139		    "%s.\n", v->gv_name);
140
141		graph_transition_propagate(v, PROPAGATE_STOP, rerr);
142
143		/*
144		 * The maintenance transition may satisfy optional_all/restart
145		 * dependencies and should be propagated to determine
146		 * whether new dependents are satisfiable.
147		 */
148		graph_transition_propagate(v, PROPAGATE_SAT, rerr);
149	} else {
150		log_framework(LOG_DEBUG, "Propagating maintenance of %s.\n",
151		    v->gv_name);
152
153		graph_transition_propagate(v, PROPAGATE_SAT, rerr);
154	}
155
156	graph_transition_sulogin(RESTARTER_STATE_MAINT, old_state);
157	return (0);
158}
159
160/* ARGSUSED */
161static int
162gt_enter_offline(scf_handle_t *h, graph_vertex_t *v,
163    restarter_instance_state_t old_state, restarter_error_t rerr)
164{
165	int to_offline = v->gv_flags & GV_TOOFFLINE;
166
167	v->gv_flags &= ~GV_TOOFFLINE;
168
169	/*
170	 * If the instance should be disabled send it a disable command.
171	 * Otherwise, if GV_TOOFFLINE was not set, see if we can start it.
172	 */
173	if (v->gv_flags & GV_TODISABLE) {
174		if (gt_running(old_state) && v->gv_post_disable_f)
175			v->gv_post_disable_f();
176
177		vertex_send_event(v, RESTARTER_EVENT_TYPE_DISABLE);
178	} else if (v->gv_flags & GV_ENABLED) {
179		if (to_offline == 0)
180			graph_start_if_satisfied(v);
181	}
182
183	/*
184	 * If the service was running, propagate a stop event.  If the
185	 * service was not running the offline transition may satisfy
186	 * optional dependencies and should be propagated to determine
187	 * whether new dependents are satisfiable.
188	 * Instances that transition to offline and have the GV_TOOFFLINE flag
189	 * are special because they can expose new subtree leaves so propagate
190	 * the offline to the instance dependencies.
191	 */
192	if (gt_running(old_state)) {
193		/*
194		 * Handle state change during instance disabling.
195		 * Propagate offline to the new exposed leaves.
196		 */
197		if (to_offline) {
198			log_framework(LOG_DEBUG, "%s removed from subtree\n",
199			    v->gv_name);
200
201			graph_offline_subtree_leaves(v, (void *)h);
202		}
203
204		log_framework(LOG_DEBUG, "Propagating stop of %s.\n",
205		    v->gv_name);
206
207		graph_transition_propagate(v, PROPAGATE_STOP, rerr);
208
209		/*
210		 * The offline transition may satisfy require_any/restart
211		 * dependencies and should be propagated to determine
212		 * whether new dependents are satisfiable.
213		 */
214		graph_transition_propagate(v, PROPAGATE_SAT, rerr);
215	} else {
216		log_framework(LOG_DEBUG, "Propagating offline of %s.\n",
217		    v->gv_name);
218
219		graph_transition_propagate(v, PROPAGATE_SAT, rerr);
220	}
221
222	graph_transition_sulogin(RESTARTER_STATE_OFFLINE, old_state);
223	return (0);
224}
225
226/* ARGSUSED */
227static int
228gt_enter_disabled(scf_handle_t *h, graph_vertex_t *v,
229    restarter_instance_state_t old_state, restarter_error_t rerr)
230{
231	int to_offline = v->gv_flags & GV_TOOFFLINE;
232
233	v->gv_flags &= ~GV_TODISABLE;
234	v->gv_flags &= ~GV_TOOFFLINE;
235
236	/*
237	 * If the instance should be disabled, no problem.  Otherwise,
238	 * send an enable command, which should result in the instance
239	 * moving to OFFLINE unless the instance is part of a subtree
240	 * (non root) and in this case the result is unpredictable.
241	 */
242	if (v->gv_flags & GV_ENABLED) {
243		vertex_send_event(v, RESTARTER_EVENT_TYPE_ENABLE);
244	} else if (gt_running(old_state) && v->gv_post_disable_f) {
245		v->gv_post_disable_f();
246	}
247
248	/*
249	 * If the service was running, propagate this as a stop.  If the
250	 * service was not running the disabled transition may satisfy
251	 * optional dependencies and should be propagated to determine
252	 * whether new dependents are satisfiable.
253	 */
254	if (gt_running(old_state)) {
255		/*
256		 * We need to propagate the offline to new exposed leaves in
257		 * case we've just disabled an instance that was part of a
258		 * subtree.
259		 */
260		if (to_offline) {
261			log_framework(LOG_DEBUG, "%s removed from subtree\n",
262			    v->gv_name);
263
264			/*
265			 * Handle state change during instance disabling.
266			 * Propagate offline to the new exposed leaves.
267			 */
268			graph_offline_subtree_leaves(v, (void *)h);
269		}
270
271
272		log_framework(LOG_DEBUG, "Propagating stop of %s.\n",
273		    v->gv_name);
274
275		graph_transition_propagate(v, PROPAGATE_STOP, rerr);
276
277		/*
278		 * The disable transition may satisfy optional_all/restart
279		 * dependencies and should be propagated to determine
280		 * whether new dependents are satisfiable.
281		 */
282		graph_transition_propagate(v, PROPAGATE_SAT, rerr);
283	} else {
284		log_framework(LOG_DEBUG, "Propagating disable of %s.\n",
285		    v->gv_name);
286
287		graph_transition_propagate(v, PROPAGATE_SAT, rerr);
288	}
289
290	graph_transition_sulogin(RESTARTER_STATE_DISABLED, old_state);
291	return (0);
292}
293
294static int
295gt_internal_online_or_degraded(scf_handle_t *h, graph_vertex_t *v,
296    restarter_instance_state_t old_state, restarter_error_t rerr)
297{
298	int r;
299
300	/*
301	 * If the instance has just come up, update the start
302	 * snapshot.
303	 */
304	if (gt_running(old_state) == 0) {
305		/*
306		 * Don't fire if we're just recovering state
307		 * after a restart.
308		 */
309		if (old_state != RESTARTER_STATE_UNINIT &&
310		    v->gv_post_online_f)
311			v->gv_post_online_f();
312
313		r = libscf_snapshots_poststart(h, v->gv_name, B_TRUE);
314		switch (r) {
315		case 0:
316		case ENOENT:
317			/*
318			 * If ENOENT, the instance must have been
319			 * deleted.  Pretend we were successful since
320			 * we should get a delete event later.
321			 */
322			break;
323
324		case ECONNABORTED:
325			return (ECONNABORTED);
326
327		case EACCES:
328		case ENOTSUP:
329		default:
330			bad_error("libscf_snapshots_poststart", r);
331		}
332	}
333
334	if (!(v->gv_flags & GV_ENABLED)) {
335		vertex_send_event(v, RESTARTER_EVENT_TYPE_DISABLE);
336	} else if (v->gv_flags & GV_TOOFFLINE) {
337		/*
338		 * If the vertex has the GV_TOOFFLINE flag set then that's
339		 * because the instance was transitioning from offline to
340		 * online and the reverse disable algorithm doesn't offline
341		 * those instances because it was already appearing offline.
342		 * So do it now.
343		 */
344		offline_vertex(v);
345	}
346
347	if (gt_running(old_state) == 0) {
348		log_framework(LOG_DEBUG, "Propagating start of %s.\n",
349		    v->gv_name);
350
351		graph_transition_propagate(v, PROPAGATE_START, rerr);
352	} else if (rerr == RERR_REFRESH) {
353		/* For refresh we'll get a message sans state change */
354
355		log_framework(LOG_DEBUG, "Propagating refresh of %s.\n",
356		    v->gv_name);
357
358		graph_transition_propagate(v, PROPAGATE_STOP, rerr);
359	}
360
361	return (0);
362}
363
364static int
365gt_enter_online(scf_handle_t *h, graph_vertex_t *v,
366    restarter_instance_state_t old_state, restarter_error_t rerr)
367{
368	int r;
369
370	r = gt_internal_online_or_degraded(h, v, old_state, rerr);
371	if (r != 0)
372		return (r);
373
374	graph_transition_sulogin(RESTARTER_STATE_ONLINE, old_state);
375	return (0);
376}
377
378static int
379gt_enter_degraded(scf_handle_t *h, graph_vertex_t *v,
380    restarter_instance_state_t old_state, restarter_error_t rerr)
381{
382	int r;
383
384	r = gt_internal_online_or_degraded(h, v, old_state, rerr);
385	if (r != 0)
386		return (r);
387
388	graph_transition_sulogin(RESTARTER_STATE_DEGRADED, old_state);
389	return (0);
390}
391
392/*
393 * gt_transition() implements the state transition for the graph
394 * state machine.  It can return:
395 *    0              success
396 *    ECONNABORTED   repository connection aborted
397 *
398 * v->gv_state should be set to the state we're transitioning to before
399 * calling this function.
400 */
401int
402gt_transition(scf_handle_t *h, graph_vertex_t *v, restarter_error_t rerr,
403    restarter_instance_state_t old_state)
404{
405	int err;
406	int lost_repository = 0;
407
408	/*
409	 * If there's a common set of work to be done on exit from the
410	 * old_state, include it as a separate set of functions here.  For
411	 * now there's no such work, so there are no gt_exit functions.
412	 */
413
414	err = vertex_subgraph_dependencies_shutdown(h, v, old_state);
415	switch (err) {
416	case 0:
417		break;
418
419	case ECONNABORTED:
420		lost_repository = 1;
421		break;
422
423	default:
424		bad_error("vertex_subgraph_dependencies_shutdown", err);
425	}
426
427	/*
428	 * Now call the appropriate gt_enter function for the new state.
429	 */
430	switch (v->gv_state) {
431	case RESTARTER_STATE_UNINIT:
432		err = gt_enter_uninit(h, v, old_state, rerr);
433		break;
434
435	case RESTARTER_STATE_DISABLED:
436		err = gt_enter_disabled(h, v, old_state, rerr);
437		break;
438
439	case RESTARTER_STATE_OFFLINE:
440		err = gt_enter_offline(h, v, old_state, rerr);
441		break;
442
443	case RESTARTER_STATE_ONLINE:
444		err = gt_enter_online(h, v, old_state, rerr);
445		break;
446
447	case RESTARTER_STATE_DEGRADED:
448		err = gt_enter_degraded(h, v, old_state, rerr);
449		break;
450
451	case RESTARTER_STATE_MAINT:
452		err = gt_enter_maint(h, v, old_state, rerr);
453		break;
454
455	default:
456		/* Shouldn't be in an invalid state. */
457#ifndef NDEBUG
458		uu_warn("%s:%d: Invalid state %d.\n", __FILE__, __LINE__,
459		    v->gv_state);
460#endif
461		abort();
462	}
463
464	switch (err) {
465	case 0:
466		break;
467
468	case ECONNABORTED:
469		lost_repository = 1;
470		break;
471
472	default:
473#ifndef NDEBUG
474		uu_warn("%s:%d: "
475		    "gt_enter_%s() failed with unexpected error %d.\n",
476		    __FILE__, __LINE__, instance_state_str[v->gv_state], err);
477#endif
478		abort();
479	}
480
481	return (lost_repository ? ECONNABORTED : 0);
482}
483