1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26/*
27 * Copyright 2019 Joyent, Inc.
28 */
29
30#include <stdio.h>
31#include <stdlib.h>
32#include <stropts.h>
33#include <synch.h>
34#include <thread.h>
35#include <libsysevent.h>
36#include <sys/sysevent/eventdefs.h>
37#include <sys/sysevent/dev.h>
38#include <errno.h>
39#include <libgen.h>
40#include <unistd.h>
41
42#include "libdiskmgt.h"
43#include "disks_private.h"
44
45#pragma fini(libdiskmgt_fini)
46
47struct event_list {
48	struct event_list	*next;
49	nvlist_t		*event;
50};
51
52static mutex_t			shp_lock = ERRORCHECKMUTEX;
53static sysevent_handle_t	*shp = NULL;
54
55static struct event_list	*events = NULL;
56static int			event_error = 0;
57static int			event_break = 0;
58static mutex_t			queue_lock;
59static sema_t			semaphore;
60
61/*
62 * When we add a controller we get an add event for each drive on the
63 * controller.  We don't want to walk the devtree for each drive since
64 * we will get the same information each time.  So, the solution is to
65 * wait for a few seconds for all of the add events to come in and then
66 * do a single walk.  If an add event comes in after we start the walk, we
67 * need to do another walk since we might have missed that drive.
68 *
69 * State: 0 - no walker; 1 - walker waiting; 2 - walker running
70 *	0 -> 1; wait a few seconds
71 *	1 -> 2; walking the devtree
72 *	2 -> either 0 or 1 (see below)
73 * While running (state 2), if event comes in, go back to waiting (state 1)
74 * after the walk otherwise go back to none (state 0).
75 *
76 * walker_lock protects walker_state & events_pending
77 */
78#define	WALK_NONE		0
79#define	WALK_WAITING		1
80#define	WALK_RUNNING		2
81#define	WALK_WAIT_TIME		60	/* wait 60 seconds */
82
83static mutex_t			walker_lock = ERRORCHECKMUTEX;
84static cond_t			walker_cv = DEFAULTCV;
85static int			walker_state = WALK_NONE;
86
87static int			events_pending = 0;
88
89static int			sendevents = 0;
90
91static void		add_event_to_queue(nvlist_t *event);
92static void		*cb_watch_events(void *);
93static void		event_handler(sysevent_t *ev);
94static void		print_nvlist(char *prefix, nvlist_t *list);
95static void		walk_devtree(void);
96static void		*walker(void *arg);
97
98static void(*callback)(nvlist_t *, int) = NULL;
99
100static boolean_t		shutting_down = B_FALSE;
101
102static void
103libdiskmgt_fini(void)
104{
105	mutex_enter(&shp_lock);
106	if (shp != NULL) {
107		sysevent_unsubscribe_event(shp, EC_ALL);
108		sysevent_unbind_handle(shp);
109		shp = NULL;
110	}
111	/*
112	 * At this point a new invocation of walker() can't occur.  However,
113	 * if one was already running then we need to wait for it to finish
114	 * because if we allow ourselves to be unloaded out from underneath
115	 * it, then bad things will happen.
116	 */
117	mutex_enter(&walker_lock);
118	shutting_down = B_TRUE;
119	while (walker_state != WALK_NONE)
120		(void) cond_wait(&walker_cv, &walker_lock);
121
122	mutex_exit(&walker_lock);
123}
124
125nvlist_t *
126dm_get_event(int *errp)
127{
128	nvlist_t *event = NULL;
129
130	*errp = 0;
131
132	/* wait until there is an event in the queue */
133	/*CONSTCOND*/
134	while (1) {
135	    (void) sema_wait(&semaphore);
136
137	    if (event_break) {
138		event_break = 0;
139		*errp = EINTR;
140		break;
141	    }
142
143	    (void) mutex_lock(&queue_lock);
144
145	    /* first see if we ran out of memory since the last call */
146	    if (event_error != 0) {
147		*errp = event_error;
148		event_error = 0;
149
150	    } else if (events != NULL) {
151		struct event_list *tmpp;
152
153		event = events->event;
154		tmpp = events->next;
155		free(events);
156		events = tmpp;
157	    }
158
159	    (void) mutex_unlock(&queue_lock);
160
161	    if (*errp != 0 || event != NULL) {
162		break;
163	    }
164	}
165
166	return (event);
167}
168
169void
170dm_init_event_queue(void (*cb)(nvlist_t *, int), int *errp)
171{
172	if (sendevents == 1) {
173	    /* we were already initialized, see what changes to make */
174	    *errp = 0;
175	    if (cb != callback) {
176
177		callback = cb;
178		if (cb == NULL) {
179		    /* clearing the cb so shutdown the internal cb thread */
180		    event_break = 1;
181		    (void) sema_post(&semaphore);
182
183		} else {
184		    /* installing a cb; we didn't have one before */
185		    thread_t watch_thread;
186
187		    *errp = thr_create(NULL, 0, cb_watch_events, NULL,
188			THR_DAEMON, &watch_thread);
189		}
190	    }
191
192	} else {
193	    /* first time to initialize */
194	    sendevents = 1;
195
196	    *errp = sema_init(&semaphore, 0, USYNC_THREAD, NULL);
197	    if (*errp != 0) {
198		return;
199	    }
200
201	    if (cb != NULL) {
202		thread_t watch_thread;
203
204		callback = cb;
205
206		*errp = thr_create(NULL, 0, cb_watch_events, NULL, THR_DAEMON,
207		    &watch_thread);
208	    }
209	}
210}
211
212void
213events_new_event(char *name, int dtype, char *etype)
214{
215	nvlist_t	*event = NULL;
216
217	if (!sendevents) {
218	    return;
219	}
220
221	if (nvlist_alloc(&event, NVATTRS, 0) != 0) {
222	    event = NULL;
223
224	} else {
225	    int	error = 0;
226
227	    if (name != NULL &&
228		nvlist_add_string(event, DM_EV_NAME, name) != 0) {
229		error = ENOMEM;
230	    }
231
232	    if (dtype != -1 &&
233		nvlist_add_uint32(event, DM_EV_DTYPE, dtype) != 0) {
234		error = ENOMEM;
235	    }
236
237	    if (nvlist_add_string(event, DM_EV_TYPE, etype) != 0) {
238		error = ENOMEM;
239	    }
240
241	    if (error != 0) {
242		nvlist_free(event);
243		event = NULL;
244	    }
245	}
246
247	add_event_to_queue(event);
248}
249
250void
251events_new_slice_event(char *dev, char *type)
252{
253	events_new_event(basename(dev), DM_SLICE, type);
254}
255
256int
257events_start_event_watcher()
258{
259	const char *subclass_list[1];
260	int ret = -1;
261
262	mutex_enter(&shp_lock);
263	if (shp != NULL) {
264		ret = 0;
265		goto out;
266	}
267
268	/* Bind event handler and create subscriber handle */
269	shp = sysevent_bind_handle(event_handler);
270	if (shp == NULL) {
271		if (dm_debug) {
272			(void) fprintf(stderr, "ERROR: sysevent bind failed: "
273			    "%d\n", errno);
274		}
275		goto out;
276	}
277
278	subclass_list[0] = ESC_DISK;
279	if (sysevent_subscribe_event(shp, EC_DEV_ADD, subclass_list, 1) != 0 ||
280	    sysevent_subscribe_event(shp, EC_DEV_REMOVE, subclass_list, 1) !=
281	    0) {
282
283		sysevent_unsubscribe_event(shp, EC_ALL);
284		sysevent_unbind_handle(shp);
285		shp = NULL;
286
287		if (dm_debug) {
288			(void) fprintf(stderr, "ERROR: sysevent subscribe "
289			    "failed: %d\n", errno);
290		}
291		goto out;
292	}
293	ret = 0;
294out:
295	mutex_exit(&shp_lock);
296	return (ret);
297}
298
299static void
300add_event_to_queue(nvlist_t *event)
301{
302	(void) mutex_lock(&queue_lock);
303
304	if (event == NULL) {
305	    event_error = ENOMEM;
306	    (void) mutex_unlock(&queue_lock);
307	    return;
308	}
309
310	if (events == NULL) {
311
312	    events = (struct event_list *)malloc(sizeof (struct event_list));
313	    if (events == NULL) {
314		event_error = ENOMEM;
315		nvlist_free(event);
316	    } else {
317		events->next = NULL;
318		events->event = event;
319	    }
320
321	} else {
322	    /* already have events in the queue */
323	    struct event_list *ep;
324	    struct event_list *new_event;
325
326	    /* find the last element in the list */
327	    for (ep = events; ep->next != NULL; ep = ep->next);
328
329	    new_event = (struct event_list *)malloc(sizeof (struct event_list));
330	    if (new_event == NULL) {
331		event_error = ENOMEM;
332		nvlist_free(event);
333	    } else {
334		new_event->next = NULL;
335		new_event->event = event;
336		ep->next = new_event;
337	    }
338	}
339
340	(void) mutex_unlock(&queue_lock);
341
342	(void) sema_post(&semaphore);
343}
344
345static void *
346cb_watch_events(void *arg __unused)
347{
348	nvlist_t	*event;
349	int		error;
350
351	/*CONSTCOND*/
352	while (1) {
353	    event = dm_get_event(&error);
354	    if (callback == NULL) {
355		/* end the thread */
356		return (NULL);
357	    }
358	    callback(event, error);
359	}
360}
361
362static void
363event_handler(sysevent_t *ev)
364{
365	char		*class_name;
366	char		*pub;
367
368	class_name = sysevent_get_class_name(ev);
369	if (dm_debug) {
370	    (void) fprintf(stderr, "****EVENT: %s %s ", class_name,
371		sysevent_get_subclass_name(ev));
372	    if ((pub = sysevent_get_pub_name(ev)) != NULL) {
373		(void) fprintf(stderr, "%s\n", pub);
374		free(pub);
375	    } else {
376		(void) fprintf(stderr, "\n");
377	    }
378	}
379
380	if (libdiskmgt_str_eq(class_name, EC_DEV_ADD)) {
381	    /* batch up the adds into a single devtree walk */
382	    walk_devtree();
383
384	} else if (libdiskmgt_str_eq(class_name, EC_DEV_REMOVE)) {
385	    nvlist_t	*nvlist = NULL;
386	    char	*dev_name = NULL;
387
388	    (void) sysevent_get_attr_list(ev, &nvlist);
389	    if (nvlist != NULL) {
390		(void) nvlist_lookup_string(nvlist, DEV_NAME, &dev_name);
391
392		if (dm_debug) {
393		    print_nvlist("**** ", nvlist);
394		}
395	    }
396
397	    if (dev_name != NULL) {
398		cache_update(DM_EV_DISK_DELETE, dev_name);
399	    }
400
401	    if (nvlist != NULL) {
402		nvlist_free(nvlist);
403	    }
404	}
405}
406
407/*
408 * This is a debugging function only.
409 */
410static void
411print_nvlist(char *prefix, nvlist_t *list)
412{
413	nvpair_t	*nvp;
414
415	nvp = nvlist_next_nvpair(list, NULL);
416	while (nvp != NULL) {
417	    char	*attrname;
418	    char	*str;
419	    uint32_t	ui32;
420	    uint64_t	ui64;
421	    char	**str_array;
422	    uint_t	cnt;
423	    int		i;
424
425	    attrname = nvpair_name(nvp);
426	    switch (nvpair_type(nvp)) {
427	    case DATA_TYPE_STRING:
428		(void) nvpair_value_string(nvp, &str);
429		(void) fprintf(stderr, "%s%s: %s\n", prefix, attrname, str);
430		break;
431
432	    case DATA_TYPE_STRING_ARRAY:
433		(void) nvpair_value_string_array(nvp, &str_array, &cnt);
434		(void) fprintf(stderr, "%s%s:\n", prefix, attrname);
435		for (i = 0; i < cnt; i++) {
436		    (void) fprintf(stderr, "%s    %s\n", prefix, str_array[i]);
437		}
438		break;
439
440	    case DATA_TYPE_UINT32:
441		(void) nvpair_value_uint32(nvp, &ui32);
442		(void) fprintf(stderr, "%s%s: %u\n", prefix, attrname, ui32);
443		break;
444
445	    case DATA_TYPE_UINT64:
446		(void) nvpair_value_uint64(nvp, &ui64);
447#ifdef _LP64
448		(void) fprintf(stderr, "%s%s: %lu\n", prefix, attrname, ui64);
449#else
450		(void) fprintf(stderr, "%s%s: %llu\n", prefix, attrname, ui64);
451#endif
452		break;
453
454
455	    case DATA_TYPE_BOOLEAN:
456		(void) fprintf(stderr, "%s%s: true\n", prefix, attrname);
457		break;
458
459	    default:
460		(void) fprintf(stderr, "%s%s: UNSUPPORTED TYPE\n", prefix,
461		    attrname);
462		break;
463	    }
464
465	    nvp = nvlist_next_nvpair(list, nvp);
466	}
467}
468
469/*
470 * Batch up the adds into a single devtree walk.  We can get a bunch of
471 * adds when we add a controller since we will get an add event for each
472 * drive.
473 */
474static void
475walk_devtree(void)
476{
477	thread_t	walk_thread;
478
479	mutex_enter(&walker_lock);
480
481	switch (walker_state) {
482	case WALK_NONE:
483	    if (thr_create(NULL, 0, walker, NULL,
484		THR_DAEMON, &walk_thread) == 0) {
485		walker_state = WALK_WAITING;
486	    }
487	    break;
488
489	case WALK_WAITING:
490	    /* absorb the event and do nothing */
491	    break;
492
493	case WALK_RUNNING:
494	    events_pending = 1;
495	    break;
496	}
497
498	mutex_exit(&walker_lock);
499}
500
501static void *
502walker(void *arg __unused)
503{
504	int	walk_again = 0;
505
506	do {
507	    /* start by waiting for a few seconds to absorb extra events */
508	    (void) sleep(WALK_WAIT_TIME);
509
510	    mutex_enter(&walker_lock);
511	    if (shutting_down) {
512		walker_state = WALK_NONE;
513		(void) cond_broadcast(&walker_cv);
514		mutex_exit(&walker_lock);
515		return (NULL);
516	    }
517	    walker_state = WALK_RUNNING;
518	    mutex_exit(&walker_lock);
519
520	    cache_update(DM_EV_DISK_ADD, NULL);
521
522	    mutex_enter(&walker_lock);
523	    if (shutting_down) {
524		walker_state = WALK_NONE;
525		(void) cond_broadcast(&walker_cv);
526		mutex_exit(&walker_lock);
527		return (NULL);
528	    }
529
530	    if (events_pending) {
531		events_pending = 0;
532		walker_state = WALK_WAITING;
533		walk_again = 1;
534	    } else {
535		walker_state = WALK_NONE;
536		walk_again = 0;
537	    }
538
539	    mutex_exit(&walker_lock);
540
541	} while (walk_again);
542	return (NULL);
543}
544