xref: /illumos-gate/usr/src/cmd/fm/modules/common/disk-monitor/hotplug_mgr.c (revision 184cd04c26b064536977dfbb913a1240eaf6f708)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/sysevent/dr.h>
31 #include <sys/sysevent/eventdefs.h>
32 #include <sys/sunddi.h>	/* for the EC's for DEVFS */
33 
34 #include <errno.h>
35 #include <string.h>
36 #include <strings.h>
37 #include <stdio.h>
38 #include <unistd.h>
39 #include <time.h>
40 #include <pthread.h>
41 
42 #include <libsysevent.h>
43 #include <sys/sysevent_impl.h>
44 
45 #include <libnvpair.h>
46 #include <config_admin.h>
47 
48 #include "disk_monitor.h"
49 #include "hotplug_mgr.h"
50 #include "schg_mgr.h"
51 #include "dm_platform.h"
52 
53 typedef struct sysevent_event {
54 	sysevent_t	*evp;
55 } sysevent_event_t;
56 
57 /* Lock guarantees the ordering of the incoming sysevents */
58 static pthread_t g_sysev_tid;
59 static pthread_mutex_t g_event_handler_lock = PTHREAD_MUTEX_INITIALIZER;
60 static pthread_cond_t g_event_handler_cond = PTHREAD_COND_INITIALIZER;
61 static qu_t *g_sysev_queue = NULL;
62 static thread_state_t g_sysev_thread_state = TS_NOT_RUNNING;
63 /*
64  * The sysevent handle is bound to the main sysevent handler
65  * (event_handler), for each of the hotplug sysevents.
66  */
67 static sysevent_handle_t *sysevent_handle = NULL;
68 
69 static void free_sysevent_event(void *p);
70 
71 static int
72 nsleep(int seconds)
73 {
74 	struct timespec tspec;
75 
76 	tspec.tv_sec = seconds;
77 	tspec.tv_nsec = 0;
78 
79 	return (nanosleep(&tspec, NULL));
80 }
81 
82 static int
83 config_list_ext_poll(int num, char * const *path,
84     cfga_list_data_t **list_array, int *nlist)
85 {
86 	boolean_t done = B_FALSE;
87 	boolean_t timedout = B_FALSE;
88 	boolean_t interrupted = B_FALSE;
89 	int timeout = 0;
90 	int e;
91 #define	TIMEOUT_MAX 60
92 
93 	do {
94 		switch ((e = config_list_ext(num, path, list_array,
95 		    nlist, NULL, NULL, NULL, CFGA_FLAG_LIST_ALL))) {
96 
97 		case CFGA_OK:
98 
99 			return (CFGA_OK);
100 
101 		case CFGA_BUSY:
102 		case CFGA_SYSTEM_BUSY:
103 
104 			if (timeout++ >= TIMEOUT_MAX)
105 				timedout = B_TRUE;
106 			else {
107 				if (nsleep(1) < 0)
108 					interrupted = (errno == EINTR);
109 			}
110 			break;
111 
112 		default:
113 			done = B_TRUE;
114 			break;
115 
116 		}
117 	} while (!done && !timedout && !interrupted);
118 
119 	return (e);
120 }
121 
122 /*
123  * Looks up the attachment point's state and returns it in one of
124  * the hotplug states that the state change manager understands.
125  */
126 hotplug_state_t
127 disk_ap_state_to_hotplug_state(diskmon_t *diskp)
128 {
129 	hotplug_state_t state = HPS_UNKNOWN;
130 	cfga_list_data_t *list_array = NULL;
131 	int nlist;
132 	char *app = (char *)dm_prop_lookup(diskp->app_props,
133 	    DISK_AP_PROP_APID);
134 	char *ap_path[1];
135 	char *devices_app;
136 	int len;
137 	boolean_t list_valid = B_FALSE;
138 
139 	dm_assert(app != NULL);
140 
141 	ap_path[0] = app;
142 
143 	if (config_list_ext_poll(1, ap_path, &list_array, &nlist)
144 	    == CFGA_OK) {
145 
146 		dm_assert(nlist == 1);
147 		dm_assert(strcmp(app, list_array[0].ap_phys_id) == 0);
148 
149 		list_valid = B_TRUE;
150 
151 	} else {
152 		/*
153 		 * The sata libcfgadm plugin adds a
154 		 * /devices to the phys id; to use it, we must
155 		 * prepend this string before the call.
156 		 */
157 		len = 8 /* strlen("/devices") */ + strlen(app) + 1;
158 		devices_app = dmalloc(len);
159 
160 		(void) snprintf(devices_app, len, "/devices%s",
161 		    app);
162 
163 		ap_path[0] = devices_app;
164 
165 		if (config_list_ext_poll(1, ap_path, &list_array, &nlist)
166 		    == CFGA_OK) {
167 
168 			dm_assert(nlist == 1);
169 			dm_assert(strcmp(devices_app, list_array[0].ap_phys_id)
170 			    == 0);
171 
172 			list_valid = B_TRUE;
173 		}
174 
175 		dfree(devices_app, len);
176 	}
177 
178 	if (list_valid) {
179 		/*
180 		 * The following truth table defines how each state is
181 		 * computed:
182 		 *
183 		 * +----------------------------------------------+
184 		 * |		  | o_state | r_state | condition |
185 		 * |		  +---------+---------+-----------|
186 		 * | Absent	  |Don'tCare|Disc/Empt|	Don'tCare |
187 		 * | Present	  |Unconfgrd|Connected|	 unknown  |
188 		 * | Configured	  |Configred|Connected|	Don'tCare |
189 		 * | Unconfigured |Unconfgrd|Connected|	   OK	  |
190 		 * +--------------+---------+---------+-----------+
191 		 */
192 
193 		if (list_array[0].ap_r_state == CFGA_STAT_EMPTY ||
194 		    list_array[0].ap_r_state == CFGA_STAT_DISCONNECTED)
195 			state = HPS_ABSENT;
196 		else if (list_array[0].ap_r_state == CFGA_STAT_CONNECTED &&
197 		    list_array[0].ap_o_state == CFGA_STAT_UNCONFIGURED &&
198 		    list_array[0].ap_cond == CFGA_COND_UNKNOWN)
199 			state = HPS_PRESENT;
200 		else if (list_array[0].ap_r_state == CFGA_STAT_CONNECTED &&
201 		    list_array[0].ap_o_state == CFGA_STAT_UNCONFIGURED &&
202 		    list_array[0].ap_cond != CFGA_COND_UNKNOWN)
203 			state = HPS_UNCONFIGURED;
204 		else if (list_array[0].ap_r_state == CFGA_STAT_CONNECTED &&
205 		    list_array[0].ap_o_state == CFGA_STAT_CONFIGURED)
206 			state = HPS_CONFIGURED;
207 
208 		free(list_array);
209 	}
210 
211 	return (state);
212 }
213 
214 /*
215  * Examine the sysevent passed in and returns the hotplug state that
216  * the sysevent states (or implies, in the case of attachment point
217  * events).
218  */
219 static hotplug_state_t
220 disk_sysev_to_state(diskmon_t *diskp, sysevent_t *evp)
221 {
222 	const char *class_name, *subclass;
223 	hotplug_state_t state = HPS_UNKNOWN;
224 	sysevent_value_t se_val;
225 
226 	/*
227 	 * The state mapping is as follows:
228 	 *
229 	 * Sysevent				State
230 	 * --------------------------------------------------------
231 	 * EC_DEVFS/ESC_DEVFS_DEVI_ADD		Configured
232 	 * EC_DEVFS/ESC_DEVFS_DEVI_REMOVE	Unconfigured
233 	 * EC_DR/ESC_DR_AP_STATE_CHANGE		*[Absent/Present]
234 	 *
235 	 * (The EC_DR event requires a probe of the attachment point
236 	 * to determine the AP's state if there is no usable HINT)
237 	 *
238 	 */
239 
240 	class_name = sysevent_get_class_name(evp);
241 	subclass = sysevent_get_subclass_name(evp);
242 
243 	if (strcmp(class_name, EC_DEVFS) == 0) {
244 		if (strcmp(subclass, ESC_DEVFS_DEVI_ADD) == 0) {
245 
246 			state = HPS_CONFIGURED;
247 
248 		} else if (strcmp(subclass, ESC_DEVFS_DEVI_REMOVE) == 0) {
249 
250 			state = HPS_UNCONFIGURED;
251 
252 		}
253 
254 	} else if (strcmp(class_name, EC_DR) == 0 &&
255 	    strcmp(subclass, ESC_DR_AP_STATE_CHANGE) == 0) {
256 
257 		if (sysevent_lookup_attr(evp, DR_HINT, SE_DATA_TYPE_STRING,
258 		    &se_val) == 0 && se_val.value.sv_string != NULL) {
259 
260 			if (strcmp(se_val.value.sv_string, DR_HINT_INSERT)
261 			    == 0) {
262 
263 				state = HPS_PRESENT;
264 
265 			} else if (strcmp(se_val.value.sv_string,
266 			    DR_HINT_REMOVE) == 0) {
267 
268 				state = HPS_ABSENT;
269 			}
270 
271 		}
272 
273 		/*
274 		 * If the state could not be determined by the hint
275 		 * (or there was no hint), ask the AP directly.
276 		 */
277 		if (state == HPS_UNKNOWN)
278 			state = disk_ap_state_to_hotplug_state(diskp);
279 	}
280 
281 	return (state);
282 }
283 
284 /*
285  * Returns the diskmon that corresponds to the physical disk path
286  * passed in.
287  */
288 static diskmon_t *
289 disk_match_by_device_path(diskmon_t *disklistp, const char *dev_path)
290 {
291 	char *p;
292 	int targetid;
293 	char tgtnum[MAXNAMELEN];
294 	char finalpath[MAXPATHLEN];
295 	char devicepath[MAXPATHLEN];
296 	dm_assert(disklistp != NULL);
297 	dm_assert(dev_path != NULL);
298 
299 	if (strncmp(dev_path, DEVICES_PREFIX, 8) == 0)
300 		dev_path += 8;
301 
302 	/*
303 	 * The AP path specified in the configuration properties is
304 	 * the path to an attachment point minor node whose port number is
305 	 * equal to the target number on the disk "major" node sent by the
306 	 * sysevent.  To match them, we need to extract the target id and
307 	 * construct an AP string to compare to the AP path in the diskmon.
308 	 */
309 	while (disklistp != NULL) {
310 		char *app = (char *)dm_prop_lookup(disklistp->app_props,
311 		    DISK_AP_PROP_APID);
312 		dm_assert(app != NULL);
313 
314 		/*
315 		 * The disk device path is of the form:
316 		 * /rootnode/.../device/target@tgtid,tgtlun
317 		 * The AP path is of the form:
318 		 * /devices/rootnode/.../device:portnum
319 		 */
320 
321 		if (strncmp(app, DEVICES_PREFIX, 8) == 0)
322 			app += 8;
323 
324 		/* Get the target number from the disk path: */
325 		p = strrchr(dev_path, '/');
326 		dm_assert(p != NULL);
327 
328 		p = strchr(p, '@');
329 		dm_assert(p != NULL);
330 
331 		bzero(tgtnum, MAXNAMELEN);
332 		(void) strlcpy(tgtnum, p + 1, MAXNAMELEN);
333 
334 		if ((p = strchr(tgtnum, ',')) != NULL)
335 			*p = 0;
336 
337 		targetid = strtol(tgtnum, 0, 16);
338 
339 		/*
340 		 * Now copy the last part of the disk path and create the
341 		 * string we want to match.
342 		 */
343 		(void) strlcpy(devicepath, dev_path, MAXPATHLEN);
344 		if ((p = strrchr(devicepath, '/')) != NULL)
345 			*p = 0;
346 		(void) snprintf(finalpath, MAXPATHLEN, "%s:%x",
347 		    devicepath, targetid);
348 
349 		if (strcmp(finalpath, app) == 0)
350 			return (disklistp);
351 
352 		disklistp = disklistp->next;
353 	}
354 	return (NULL);
355 }
356 
357 static diskmon_t *
358 disk_match_by_ap_id(diskmon_t *disklistp, const char *ap_id)
359 {
360 	const char *disk_ap_id;
361 	dm_assert(disklistp != NULL);
362 	dm_assert(ap_id != NULL);
363 
364 	/* Match only the device-tree portion of the name */
365 	if (strncmp(ap_id, DEVICES_PREFIX, 8 /* strlen("/devices") */) == 0)
366 		ap_id += 8;
367 
368 	while (disklistp != NULL) {
369 		disk_ap_id = dm_prop_lookup(disklistp->app_props,
370 		    DISK_AP_PROP_APID);
371 
372 		dm_assert(disk_ap_id != NULL);
373 
374 		if (strcmp(disk_ap_id, ap_id) == 0)
375 			return (disklistp);
376 
377 		disklistp = disklistp->next;
378 	}
379 	return (NULL);
380 }
381 
382 static diskmon_t *
383 match_sysevent_to_disk(diskmon_t *disklistp, sysevent_t *evp)
384 {
385 	diskmon_t *dmp = NULL;
386 	sysevent_value_t se_val;
387 	char *class_name = sysevent_get_class_name(evp);
388 	char *subclass = sysevent_get_subclass_name(evp);
389 
390 	se_val.value.sv_string = NULL;
391 
392 	if (strcmp(class_name, EC_DEVFS) == 0) {
393 		/* EC_DEVFS-class events have a `DEVFS_PATHNAME' property */
394 		if (sysevent_lookup_attr(evp, DEVFS_PATHNAME,
395 		    SE_DATA_TYPE_STRING, &se_val) == 0 &&
396 		    se_val.value.sv_string != NULL) {
397 
398 			dmp = disk_match_by_device_path(disklistp,
399 			    se_val.value.sv_string);
400 
401 		}
402 
403 	} else if (strcmp(class_name, EC_DR) == 0 &&
404 	    strcmp(subclass, ESC_DR_AP_STATE_CHANGE) == 0) {
405 
406 		/* EC_DR-class events have a `DR_AP_ID' property */
407 		if (sysevent_lookup_attr(evp, DR_AP_ID, SE_DATA_TYPE_STRING,
408 		    &se_val) == 0 && se_val.value.sv_string != NULL) {
409 
410 			dmp = disk_match_by_ap_id(disklistp,
411 			    se_val.value.sv_string);
412 		}
413 	}
414 
415 	if (se_val.value.sv_string)
416 		log_msg(MM_HPMGR, "match_sysevent_to_disk: device/ap: %s\n",
417 		    se_val.value.sv_string);
418 
419 	return (dmp);
420 }
421 
422 
423 /*
424  * The disk hotplug monitor (DHPM) listens for disk hotplug events and calls the
425  * state-change functionality when a disk's state changes.  The DHPM listens for
426  * hotplug events via sysevent subscriptions to the following sysevent
427  * classes/subclasses: { EC_DEVFS/ESC_DEVFS_BRANCH_ADD,
428  * EC_DEVFS/ESC_DEVFS_BRANCH_REMOVE, EC_DEVFS/ESC_DEVFS_DEVI_ADD,
429  * EC_DEVFS/ESC_DEVFS_DEVI_REMOVE, EC_DR/ESC_DR_AP_STATE_CHANGE }.  Once the
430  * event is received, the device path sent as part of the event is matched
431  * to one of the disks described by the configuration data structures.
432  */
433 static void
434 dm_process_sysevent(sysevent_t *dupev)
435 {
436 	char		*class_name;
437 	char		*pub;
438 	char		*subclass = sysevent_get_subclass_name(dupev);
439 	diskmon_t	*diskp;
440 
441 	class_name = sysevent_get_class_name(dupev);
442 	log_msg(MM_HPMGR, "****EVENT: %s %s (by %s)\n", class_name,
443 	    subclass,
444 	    ((pub = sysevent_get_pub_name(dupev)) != NULL) ? pub : "UNKNOWN");
445 
446 	if (pub)
447 		free(pub);
448 
449 	if (strcmp(class_name, EC_PLATFORM) == 0 &&
450 	    strcmp(subclass, ESC_PLATFORM_SP_RESET) == 0) {
451 		if (dm_platform_resync() != 0)
452 			log_warn("failed to resync SP platform\n");
453 		return;
454 	}
455 
456 	/*
457 	 * We will handle this event if the event's target matches one of the
458 	 * disks we're monitoring
459 	 */
460 	if ((diskp = match_sysevent_to_disk(config_data->disk_list, dupev))
461 	    != NULL) {
462 
463 		dm_state_change(diskp, disk_sysev_to_state(diskp, dupev));
464 	}
465 
466 	sysevent_free(dupev);
467 }
468 
469 static void
470 dm_fmd_sysevent_thread(void *queuep)
471 {
472 	qu_t			*qp = (qu_t *)queuep;
473 	sysevent_event_t	*sevevp;
474 
475 	/* Signal the thread spawner that we're running */
476 	dm_assert(pthread_mutex_lock(&g_event_handler_lock) == 0);
477 	if (g_sysev_thread_state != TS_EXIT_REQUESTED)
478 		g_sysev_thread_state = TS_RUNNING;
479 	(void) pthread_cond_broadcast(&g_event_handler_cond);
480 	dm_assert(pthread_mutex_unlock(&g_event_handler_lock) == 0);
481 
482 	while (g_sysev_thread_state != TS_EXIT_REQUESTED) {
483 		if ((sevevp = (sysevent_event_t *)queue_remove(qp)) == NULL)
484 			continue;
485 
486 		dm_process_sysevent(sevevp->evp);
487 
488 		free_sysevent_event(sevevp);
489 	}
490 
491 	/* Signal the thread spawner that we've exited */
492 	dm_assert(pthread_mutex_lock(&g_event_handler_lock) == 0);
493 	g_sysev_thread_state = TS_EXITED;
494 	(void) pthread_cond_broadcast(&g_event_handler_cond);
495 	dm_assert(pthread_mutex_unlock(&g_event_handler_lock) == 0);
496 
497 	log_msg(MM_HPMGR, "FMD sysevent handler thread exiting...");
498 }
499 
500 static sysevent_event_t *
501 new_sysevent_event(sysevent_t *ev)
502 {
503 	/*
504 	 * Cannot use dmalloc for this because the thread isn't a FMD-created
505 	 * thread!
506 	 */
507 	sysevent_event_t *sevevp = malloc(sizeof (sysevent_event_t));
508 	sevevp->evp = ev;
509 	return (sevevp);
510 }
511 
512 static void
513 free_sysevent_event(void *p)
514 {
515 	/* the sysevent_event was allocated with malloc(): */
516 	free(p);
517 }
518 
519 static void
520 event_handler(sysevent_t *ev)
521 {
522 	/* The duplicated sysevent will be freed in the child thread */
523 	sysevent_t	*dupev = sysevent_dup(ev);
524 
525 	/*
526 	 * Add this sysevent to the work queue of our FMA thread so we can
527 	 * handle the sysevent and use the FMA API (e.g. for memory
528 	 * allocation, etc.) in the sysevent handler.
529 	 */
530 	queue_add(g_sysev_queue, new_sysevent_event(dupev));
531 }
532 
533 static void
534 fini_sysevents(void)
535 {
536 	sysevent_unsubscribe_event(sysevent_handle, EC_ALL);
537 }
538 
539 static int
540 init_sysevents(void)
541 {
542 	int rv = 0;
543 	const char *devfs_subclasses[] = {
544 		ESC_DEVFS_DEVI_ADD,
545 		ESC_DEVFS_DEVI_REMOVE
546 	};
547 	const char *dr_subclasses[] = {
548 		ESC_DR_AP_STATE_CHANGE
549 	};
550 	const char *platform_subclasses[] = {
551 		ESC_PLATFORM_SP_RESET
552 	};
553 
554 	if ((sysevent_handle = sysevent_bind_handle(event_handler)) == NULL) {
555 		rv = errno;
556 		log_err("Could not initialize the hotplug manager ("
557 		    "sysevent_bind_handle failure");
558 	}
559 
560 	if (sysevent_subscribe_event(sysevent_handle, EC_DEVFS,
561 	    devfs_subclasses, 2) != 0) {
562 
563 		log_err("Could not initialize the hotplug manager "
564 		    "sysevent_subscribe_event(event class = EC_DEVFS) "
565 		    "failure");
566 
567 		rv = -1;
568 
569 	} else if (sysevent_subscribe_event(sysevent_handle, EC_DR,
570 	    dr_subclasses, 1) != 0) {
571 
572 		log_err("Could not initialize the hotplug manager "
573 		    "sysevent_subscribe_event(event class = EC_DR) "
574 		    "failure");
575 
576 		/* Unsubscribe from all sysevents in the event of a failure */
577 		fini_sysevents();
578 
579 		rv = -1;
580 	} else if (sysevent_subscribe_event(sysevent_handle, EC_PLATFORM,
581 	    platform_subclasses, 1) != 0) {
582 
583 		log_err("Could not initialize the hotplug manager "
584 		    "sysevent_subscribe_event(event class = EC_PLATFORM) "
585 		    "failure");
586 
587 		/* Unsubscribe from all sysevents in the event of a failure */
588 		fini_sysevents();
589 
590 		rv = -1;
591 	}
592 
593 
594 	return (rv);
595 }
596 
597 /*ARGSUSED*/
598 static void
599 stdfree(void *p, size_t sz)
600 {
601 	free(p);
602 }
603 
604 /*
605  * Assumptions: Each disk's current state was determined and stored in
606  * its diskmon_t.
607  */
608 hotplug_mgr_init_err_t
609 init_hotplug_manager()
610 {
611 	/* Create the queue to which we'll add sysevents */
612 	g_sysev_queue = new_queue(B_TRUE, malloc, stdfree, free_sysevent_event);
613 
614 	/*
615 	 * Grab the event handler lock before spawning the thread so we can
616 	 * wait for the thread to transition to the running state.
617 	 */
618 	dm_assert(pthread_mutex_lock(&g_event_handler_lock) == 0);
619 
620 	/* Create the sysevent handling thread */
621 	g_sysev_tid = fmd_thr_create(g_fm_hdl, dm_fmd_sysevent_thread,
622 	    g_sysev_queue);
623 
624 	/* Wait for the thread's acknowledgement */
625 	while (g_sysev_thread_state != TS_RUNNING)
626 		(void) pthread_cond_wait(&g_event_handler_cond,
627 		    &g_event_handler_lock);
628 	dm_assert(pthread_mutex_unlock(&g_event_handler_lock) == 0);
629 
630 	if (init_sysevents() != 0) {
631 		log_warn_e("Error initializing sysevents");
632 		return (HPM_ERR_SYSEVENT_INIT);
633 	}
634 
635 	return (0);
636 }
637 
638 void
639 cleanup_hotplug_manager()
640 {
641 	/* Unsubscribe from the sysevents */
642 	fini_sysevents();
643 
644 	/*
645 	 * Wait for the thread to exit before we can destroy
646 	 * the event queue.
647 	 */
648 	dm_assert(pthread_mutex_lock(&g_event_handler_lock) == 0);
649 	g_sysev_thread_state = TS_EXIT_REQUESTED;
650 	queue_add(g_sysev_queue, NULL);
651 	while (g_sysev_thread_state != TS_EXITED)
652 		(void) pthread_cond_wait(&g_event_handler_cond,
653 		    &g_event_handler_lock);
654 	dm_assert(pthread_mutex_unlock(&g_event_handler_lock) == 0);
655 	(void) pthread_join(g_sysev_tid, NULL);
656 	fmd_thr_destroy(g_fm_hdl, g_sysev_tid);
657 
658 	/* Finally, destroy the event queue and reset the thread state */
659 	queue_free(&g_sysev_queue);
660 	g_sysev_thread_state = TS_NOT_RUNNING;
661 }
662