xref: /illumos-gate/usr/src/cmd/bhyve/mevent.c (revision e8d71297)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2011 NetApp, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 /*
32  * Copyright 2018 Joyent, Inc.
33  * Copyright 2022 OmniOS Community Edition (OmniOSce) Association.
34  */
35 
36 /*
37  * Micro event library for FreeBSD, designed for a single i/o thread
38  * using kqueue, and having events be persistent by default.
39  */
40 
41 #include <sys/cdefs.h>
42 __FBSDID("$FreeBSD$");
43 
44 #include <assert.h>
45 #ifndef WITHOUT_CAPSICUM
46 #include <capsicum_helpers.h>
47 #endif
48 #include <err.h>
49 #include <errno.h>
50 #include <stdbool.h>
51 #include <stdlib.h>
52 #include <stdio.h>
53 #include <string.h>
54 #include <sysexits.h>
55 #include <unistd.h>
56 
57 #include <sys/types.h>
58 #ifndef WITHOUT_CAPSICUM
59 #include <sys/capsicum.h>
60 #endif
61 #ifdef __FreeBSD__
62 #include <sys/event.h>
63 #else
64 #include <port.h>
65 #include <sys/poll.h>
66 #include <sys/siginfo.h>
67 #include <sys/queue.h>
68 #include <sys/debug.h>
69 #include <sys/stat.h>
70 #endif
71 #include <sys/time.h>
72 
73 #include <pthread.h>
74 #include <pthread_np.h>
75 
76 #include "mevent.h"
77 
78 #define	MEVENT_MAX	64
79 
80 #ifndef __FreeBSD__
81 #define	EV_ENABLE	0x01
82 #define	EV_ADD		EV_ENABLE
83 #define	EV_DISABLE	0x02
84 #define	EV_DELETE	0x04
85 
86 static int mevent_file_poll_interval_ms = 5000;
87 #endif
88 
89 static pthread_t mevent_tid;
90 static pthread_once_t mevent_once = PTHREAD_ONCE_INIT;
91 #ifdef __FreeBSD__
92 static int mevent_timid = 43;
93 #endif
94 static int mevent_pipefd[2];
95 static int mfd;
96 static pthread_mutex_t mevent_lmutex = PTHREAD_MUTEX_INITIALIZER;
97 
98 struct mevent {
99 	void	(*me_func)(int, enum ev_type, void *);
100 #define me_msecs me_fd
101 	int	me_fd;
102 #ifdef __FreeBSD__
103 	int	me_timid;
104 #else
105 	timer_t me_timid;
106 #endif
107 	enum ev_type me_type;
108 	void    *me_param;
109 	int	me_cq;
110 	int	me_state; /* Desired kevent flags. */
111 	int	me_closefd;
112 	int	me_fflags;
113 #ifndef __FreeBSD__
114 	port_notify_t	me_notify;
115 	struct sigevent	me_sigev;
116 	boolean_t	me_auto_requeue;
117 	struct {
118 		int	mp_fd;
119 		off_t	mp_size;
120 		void	(*mp_func)(int, enum ev_type, void *);
121 		void    *mp_param;
122 	} me_poll;
123 #endif
124 	LIST_ENTRY(mevent) me_list;
125 };
126 
LIST_HEAD(listhead,mevent)127 static LIST_HEAD(listhead, mevent) global_head, change_head;
128 
129 static void
130 mevent_qlock(void)
131 {
132 	pthread_mutex_lock(&mevent_lmutex);
133 }
134 
135 static void
mevent_qunlock(void)136 mevent_qunlock(void)
137 {
138 	pthread_mutex_unlock(&mevent_lmutex);
139 }
140 
141 static void
mevent_pipe_read(int fd,enum ev_type type,void * param)142 mevent_pipe_read(int fd, enum ev_type type, void *param)
143 {
144 	char buf[MEVENT_MAX];
145 	int status;
146 
147 	/*
148 	 * Drain the pipe read side. The fd is non-blocking so this is
149 	 * safe to do.
150 	 */
151 	do {
152 		status = read(fd, buf, sizeof(buf));
153 	} while (status == MEVENT_MAX);
154 }
155 
156 static void
mevent_notify(void)157 mevent_notify(void)
158 {
159 	char c = '\0';
160 
161 	/*
162 	 * If calling from outside the i/o thread, write a byte on the
163 	 * pipe to force the i/o thread to exit the blocking kevent call.
164 	 */
165 	if (mevent_pipefd[1] != 0 && pthread_self() != mevent_tid) {
166 		write(mevent_pipefd[1], &c, 1);
167 	}
168 }
169 
170 static void
mevent_init(void)171 mevent_init(void)
172 {
173 #ifndef WITHOUT_CAPSICUM
174 	cap_rights_t rights;
175 #endif
176 
177 #ifdef __FreeBSD__
178 	mfd = kqueue();
179 #else
180 	mfd = port_create();
181 #endif
182 	assert(mfd > 0);
183 
184 #ifndef WITHOUT_CAPSICUM
185 	cap_rights_init(&rights, CAP_KQUEUE);
186 	if (caph_rights_limit(mfd, &rights) == -1)
187 		errx(EX_OSERR, "Unable to apply rights for sandbox");
188 #endif
189 
190 	LIST_INIT(&change_head);
191 	LIST_INIT(&global_head);
192 }
193 
194 
195 #ifdef __FreeBSD__
196 static int
mevent_kq_filter(struct mevent * mevp)197 mevent_kq_filter(struct mevent *mevp)
198 {
199 	int retval;
200 
201 	retval = 0;
202 
203 	if (mevp->me_type == EVF_READ)
204 		retval = EVFILT_READ;
205 
206 	if (mevp->me_type == EVF_WRITE)
207 		retval = EVFILT_WRITE;
208 
209 	if (mevp->me_type == EVF_TIMER)
210 		retval = EVFILT_TIMER;
211 
212 	if (mevp->me_type == EVF_SIGNAL)
213 		retval = EVFILT_SIGNAL;
214 
215 	if (mevp->me_type == EVF_VNODE)
216 		retval = EVFILT_VNODE;
217 
218 	return (retval);
219 }
220 
221 static int
mevent_kq_flags(struct mevent * mevp)222 mevent_kq_flags(struct mevent *mevp)
223 {
224 	int retval;
225 
226 	retval = mevp->me_state;
227 
228 	if (mevp->me_type == EVF_VNODE)
229 		retval |= EV_CLEAR;
230 
231 	return (retval);
232 }
233 
234 static int
mevent_kq_fflags(struct mevent * mevp)235 mevent_kq_fflags(struct mevent *mevp)
236 {
237 	int retval;
238 
239 	retval = 0;
240 
241 	switch (mevp->me_type) {
242 	case EVF_VNODE:
243 		if ((mevp->me_fflags & EVFF_ATTRIB) != 0)
244 			retval |= NOTE_ATTRIB;
245 		break;
246 	case EVF_READ:
247 	case EVF_WRITE:
248 	case EVF_TIMER:
249 	case EVF_SIGNAL:
250 		break;
251 	}
252 
253 	return (retval);
254 }
255 
256 static void
mevent_populate(struct mevent * mevp,struct kevent * kev)257 mevent_populate(struct mevent *mevp, struct kevent *kev)
258 {
259 	if (mevp->me_type == EVF_TIMER) {
260 		kev->ident = mevp->me_timid;
261 		kev->data = mevp->me_msecs;
262 	} else {
263 		kev->ident = mevp->me_fd;
264 		kev->data = 0;
265 	}
266 	kev->filter = mevent_kq_filter(mevp);
267 	kev->flags = mevent_kq_flags(mevp);
268 	kev->fflags = mevent_kq_fflags(mevp);
269 	kev->udata = mevp;
270 }
271 
272 static int
mevent_build(struct kevent * kev)273 mevent_build(struct kevent *kev)
274 {
275 	struct mevent *mevp, *tmpp;
276 	int i;
277 
278 	i = 0;
279 
280 	mevent_qlock();
281 
282 	LIST_FOREACH_SAFE(mevp, &change_head, me_list, tmpp) {
283 		if (mevp->me_closefd) {
284 			/*
285 			 * A close of the file descriptor will remove the
286 			 * event
287 			 */
288 			close(mevp->me_fd);
289 		} else {
290 			assert((mevp->me_state & EV_ADD) == 0);
291 			mevent_populate(mevp, &kev[i]);
292 			i++;
293 		}
294 
295 		mevp->me_cq = 0;
296 		LIST_REMOVE(mevp, me_list);
297 
298 		if (mevp->me_state & EV_DELETE) {
299 			free(mevp);
300 		} else {
301 			LIST_INSERT_HEAD(&global_head, mevp, me_list);
302 		}
303 
304 		assert(i < MEVENT_MAX);
305 	}
306 
307 	mevent_qunlock();
308 
309 	return (i);
310 }
311 
312 static void
mevent_handle(struct kevent * kev,int numev)313 mevent_handle(struct kevent *kev, int numev)
314 {
315 	struct mevent *mevp;
316 	int i;
317 
318 	for (i = 0; i < numev; i++) {
319 		mevp = kev[i].udata;
320 
321 		/* XXX check for EV_ERROR ? */
322 
323 		(*mevp->me_func)(mevp->me_fd, mevp->me_type, mevp->me_param);
324 	}
325 }
326 
327 #else /* __FreeBSD__ */
328 
329 static boolean_t
mevent_clarify_state(struct mevent * mevp)330 mevent_clarify_state(struct mevent *mevp)
331 {
332 	const int state = mevp->me_state;
333 
334 	if ((state & EV_DELETE) != 0) {
335 		/* All other intents are overriden by delete. */
336 		mevp->me_state = EV_DELETE;
337 		return (B_TRUE);
338 	}
339 
340 	/*
341 	 * Without a distinction between EV_ADD and EV_ENABLE in our emulation,
342 	 * handling the add-disabled case means eliding the portfs operation
343 	 * when both flags are present.
344 	 *
345 	 * This is not a concern for subsequent enable/disable operations, as
346 	 * mevent_update() toggles the flags properly so they are not left in
347 	 * conflict.
348 	 */
349 	if (state == (EV_ENABLE|EV_DISABLE)) {
350 		mevp->me_state = EV_DISABLE;
351 		return (B_FALSE);
352 	}
353 
354 	return (B_TRUE);
355 }
356 
357 static void
mevent_poll_file_attrib(int fd,enum ev_type type,void * param)358 mevent_poll_file_attrib(int fd, enum ev_type type, void *param)
359 {
360 	struct mevent *mevp = param;
361 	struct stat st;
362 
363 	if (fstat(mevp->me_poll.mp_fd, &st) != 0) {
364 		(void) fprintf(stderr, "%s: fstat(%d) failed: %s\n",
365 		    __func__, fd, strerror(errno));
366 		return;
367 	}
368 
369 	/*
370 	 * The only current consumer of file attribute monitoring is
371 	 * blockif, which wants to know about size changes.
372 	 */
373 	if (mevp->me_poll.mp_size != st.st_size) {
374 		mevp->me_poll.mp_size = st.st_size;
375 
376 		(*mevp->me_poll.mp_func)(mevp->me_poll.mp_fd, EVF_VNODE,
377 		    mevp->me_poll.mp_param);
378 	}
379 }
380 
381 static void
mevent_update_one_readwrite(struct mevent * mevp)382 mevent_update_one_readwrite(struct mevent *mevp)
383 {
384 	int portfd = mevp->me_notify.portnfy_port;
385 
386 	mevp->me_auto_requeue = B_FALSE;
387 
388 	switch (mevp->me_state) {
389 	case EV_ENABLE:
390 	{
391 		const int events = (mevp->me_type == EVF_READ) ?
392 		    POLLIN : POLLOUT;
393 
394 		if (port_associate(portfd, PORT_SOURCE_FD, mevp->me_fd,
395 		    events, mevp) != 0) {
396 			(void) fprintf(stderr,
397 			    "port_associate fd %d %p failed: %s\n",
398 			    mevp->me_fd, mevp, strerror(errno));
399 		}
400 		return;
401 	}
402 	case EV_DISABLE:
403 	case EV_DELETE:
404 		/*
405 		 * A disable that comes in while an event is being
406 		 * handled will result in an ENOENT.
407 		 */
408 		if (port_dissociate(portfd, PORT_SOURCE_FD,
409 		    mevp->me_fd) != 0 && errno != ENOENT) {
410 			(void) fprintf(stderr, "port_dissociate "
411 			    "portfd %d fd %d mevp %p failed: %s\n",
412 			    portfd, mevp->me_fd, mevp, strerror(errno));
413 		}
414 		return;
415 	default:
416 		(void) fprintf(stderr, "%s: unhandled state %d\n", __func__,
417 		    mevp->me_state);
418 		abort();
419 	}
420 }
421 
422 static void
mevent_update_one_timer(struct mevent * mevp)423 mevent_update_one_timer(struct mevent *mevp)
424 {
425 	mevp->me_auto_requeue = B_TRUE;
426 
427 	switch (mevp->me_state) {
428 	case EV_ENABLE:
429 	{
430 		struct itimerspec it = { 0 };
431 
432 		mevp->me_sigev.sigev_notify = SIGEV_PORT;
433 		mevp->me_sigev.sigev_value.sival_ptr = &mevp->me_notify;
434 
435 		if (timer_create(CLOCK_REALTIME, &mevp->me_sigev,
436 		    &mevp->me_timid) != 0) {
437 			(void) fprintf(stderr, "timer_create failed: %s",
438 			    strerror(errno));
439 			return;
440 		}
441 
442 		/* The first timeout */
443 		it.it_value.tv_sec = mevp->me_msecs / MILLISEC;
444 		it.it_value.tv_nsec =
445 			MSEC2NSEC(mevp->me_msecs % MILLISEC);
446 		/* Repeat at the same interval */
447 		it.it_interval = it.it_value;
448 
449 		if (timer_settime(mevp->me_timid, 0, &it, NULL) != 0) {
450 			(void) fprintf(stderr, "timer_settime failed: %s",
451 			    strerror(errno));
452 		}
453 		return;
454 	}
455 	case EV_DISABLE:
456 	case EV_DELETE:
457 		if (timer_delete(mevp->me_timid) != 0) {
458 			(void) fprintf(stderr, "timer_delete failed: %s",
459 			    strerror(errno));
460 		}
461 		mevp->me_timid = -1;
462 		return;
463 	default:
464 		(void) fprintf(stderr, "%s: unhandled state %d\n", __func__,
465 		    mevp->me_state);
466 		abort();
467 	}
468 }
469 
470 static void
mevent_update_one_vnode(struct mevent * mevp)471 mevent_update_one_vnode(struct mevent *mevp)
472 {
473 	switch (mevp->me_state) {
474 	case EV_ENABLE:
475 	{
476 		struct stat st;
477 		int events = 0;
478 
479 		if ((mevp->me_fflags & EVFF_ATTRIB) != 0)
480 			events |= FILE_ATTRIB;
481 
482 		assert(events != 0);
483 
484 		/*
485 		 * It is tempting to use the PORT_SOURCE_FILE type for this in
486 		 * conjunction with the FILE_ATTRIB event type. Unfortunately
487 		 * this event type triggers on any change to the file's
488 		 * ctime, and therefore for every write as well as attribute
489 		 * changes. It also does not work for ZVOLs.
490 		 *
491 		 * Convert this to a timer event and poll for the file
492 		 * attribute changes that we care about.
493 		 */
494 
495 		if (fstat(mevp->me_fd, &st) != 0) {
496 			(void) fprintf(stderr, "fstat(%d) failed: %s\n",
497 			    mevp->me_fd, strerror(errno));
498 			return;
499 		}
500 
501 		mevp->me_poll.mp_fd = mevp->me_fd;
502 		mevp->me_poll.mp_size = st.st_size;
503 
504 		mevp->me_poll.mp_func = mevp->me_func;
505 		mevp->me_poll.mp_param = mevp->me_param;
506 		mevp->me_func = mevent_poll_file_attrib;
507 		mevp->me_param = mevp;
508 
509 		mevp->me_type = EVF_TIMER;
510 		mevp->me_timid = -1;
511 		mevp->me_msecs = mevent_file_poll_interval_ms;
512 		mevent_update_one_timer(mevp);
513 
514 		return;
515 	}
516 	case EV_DISABLE:
517 	case EV_DELETE:
518 		/*
519 		 * These events do not really exist as they are converted to
520 		 * timers; fall through to abort.
521 		 */
522 	default:
523 		(void) fprintf(stderr, "%s: unhandled state %d\n", __func__,
524 		    mevp->me_state);
525 		abort();
526 	}
527 }
528 
529 static void
mevent_update_one(struct mevent * mevp)530 mevent_update_one(struct mevent *mevp)
531 {
532 	switch (mevp->me_type) {
533 	case EVF_READ:
534 	case EVF_WRITE:
535 		mevent_update_one_readwrite(mevp);
536 		break;
537 	case EVF_TIMER:
538 		mevent_update_one_timer(mevp);
539 		break;
540 	case EVF_VNODE:
541 		mevent_update_one_vnode(mevp);
542 		break;
543 	case EVF_SIGNAL: /* EVF_SIGNAL not yet implemented. */
544 	default:
545 		(void) fprintf(stderr, "%s: unhandled event type %d\n",
546 		    __func__, mevp->me_type);
547 		abort();
548 	}
549 }
550 
551 static void
mevent_populate(struct mevent * mevp)552 mevent_populate(struct mevent *mevp)
553 {
554 	mevp->me_notify.portnfy_port = mfd;
555 	mevp->me_notify.portnfy_user = mevp;
556 }
557 
558 static void
mevent_update_pending()559 mevent_update_pending()
560 {
561 	struct mevent *mevp, *tmpp;
562 
563 	mevent_qlock();
564 
565 	LIST_FOREACH_SAFE(mevp, &change_head, me_list, tmpp) {
566 		mevent_populate(mevp);
567 		if (mevp->me_closefd) {
568 			/*
569 			 * A close of the file descriptor will remove the
570 			 * event
571 			 */
572 			(void) close(mevp->me_fd);
573 			mevp->me_fd = -1;
574 		} else {
575 			if (mevent_clarify_state(mevp)) {
576 				mevent_update_one(mevp);
577 			}
578 		}
579 
580 		mevp->me_cq = 0;
581 		LIST_REMOVE(mevp, me_list);
582 
583 		if (mevp->me_state & EV_DELETE) {
584 			free(mevp);
585 		} else {
586 			LIST_INSERT_HEAD(&global_head, mevp, me_list);
587 		}
588 	}
589 
590 	mevent_qunlock();
591 }
592 
593 static void
mevent_handle_pe(port_event_t * pe)594 mevent_handle_pe(port_event_t *pe)
595 {
596 	struct mevent *mevp = pe->portev_user;
597 
598 	(*mevp->me_func)(mevp->me_fd, mevp->me_type, mevp->me_param);
599 
600 	mevent_qlock();
601 	if (!mevp->me_cq && !mevp->me_auto_requeue) {
602 		mevent_update_one(mevp);
603 	}
604 	mevent_qunlock();
605 }
606 #endif
607 
608 static struct mevent *
mevent_add_state(int tfd,enum ev_type type,void (* func)(int,enum ev_type,void *),void * param,int state,int fflags)609 mevent_add_state(int tfd, enum ev_type type,
610 	   void (*func)(int, enum ev_type, void *), void *param,
611 	   int state, int fflags)
612 {
613 #ifdef __FreeBSD__
614 	struct kevent kev;
615 #endif
616 	struct mevent *lp, *mevp;
617 #ifdef __FreeBSD__
618 	int ret;
619 #endif
620 
621 	if (tfd < 0 || func == NULL) {
622 		return (NULL);
623 	}
624 
625 	mevp = NULL;
626 
627 	pthread_once(&mevent_once, mevent_init);
628 
629 	mevent_qlock();
630 
631 	/*
632 	 * Verify that the fd/type tuple is not present in any list
633 	 */
634 	LIST_FOREACH(lp, &global_head, me_list) {
635 		if (type != EVF_TIMER && lp->me_fd == tfd &&
636 		    lp->me_type == type) {
637 			goto exit;
638 		}
639 	}
640 
641 	LIST_FOREACH(lp, &change_head, me_list) {
642 		if (type != EVF_TIMER && lp->me_fd == tfd &&
643 		    lp->me_type == type) {
644 			goto exit;
645 		}
646 	}
647 
648 	/*
649 	 * Allocate an entry and populate it.
650 	 */
651 	mevp = calloc(1, sizeof(struct mevent));
652 	if (mevp == NULL) {
653 		goto exit;
654 	}
655 
656 	if (type == EVF_TIMER) {
657 		mevp->me_msecs = tfd;
658 #ifdef __FreeBSD__
659 		mevp->me_timid = mevent_timid++;
660 #else
661 		mevp->me_timid = -1;
662 #endif
663 	} else
664 		mevp->me_fd = tfd;
665 	mevp->me_type = type;
666 	mevp->me_func = func;
667 	mevp->me_param = param;
668 	mevp->me_state = state;
669 	mevp->me_fflags = fflags;
670 
671 	/*
672 	 * Try to add the event.  If this fails, report the failure to
673 	 * the caller.
674 	 */
675 #ifdef __FreeBSD__
676 	mevent_populate(mevp, &kev);
677 	ret = kevent(mfd, &kev, 1, NULL, 0, NULL);
678 	if (ret == -1) {
679 		free(mevp);
680 		mevp = NULL;
681 		goto exit;
682 	}
683 	mevp->me_state &= ~EV_ADD;
684 #else
685 	mevent_populate(mevp);
686 	if (mevent_clarify_state(mevp))
687 		mevent_update_one(mevp);
688 #endif
689 
690 	LIST_INSERT_HEAD(&global_head, mevp, me_list);
691 
692 exit:
693 	mevent_qunlock();
694 
695 	return (mevp);
696 }
697 
698 struct mevent *
mevent_add(int tfd,enum ev_type type,void (* func)(int,enum ev_type,void *),void * param)699 mevent_add(int tfd, enum ev_type type,
700 	   void (*func)(int, enum ev_type, void *), void *param)
701 {
702 
703 	return (mevent_add_state(tfd, type, func, param, EV_ADD, 0));
704 }
705 
706 struct mevent *
mevent_add_flags(int tfd,enum ev_type type,int fflags,void (* func)(int,enum ev_type,void *),void * param)707 mevent_add_flags(int tfd, enum ev_type type, int fflags,
708 		 void (*func)(int, enum ev_type, void *), void *param)
709 {
710 
711 	return (mevent_add_state(tfd, type, func, param, EV_ADD, fflags));
712 }
713 
714 struct mevent *
mevent_add_disabled(int tfd,enum ev_type type,void (* func)(int,enum ev_type,void *),void * param)715 mevent_add_disabled(int tfd, enum ev_type type,
716 		    void (*func)(int, enum ev_type, void *), void *param)
717 {
718 
719 	return (mevent_add_state(tfd, type, func, param, EV_ADD | EV_DISABLE, 0));
720 }
721 
722 static int
mevent_update(struct mevent * evp,bool enable)723 mevent_update(struct mevent *evp, bool enable)
724 {
725 	int newstate;
726 
727 	mevent_qlock();
728 
729 	/*
730 	 * It's not possible to enable/disable a deleted event
731 	 */
732 	assert((evp->me_state & EV_DELETE) == 0);
733 
734 	newstate = evp->me_state;
735 	if (enable) {
736 		newstate |= EV_ENABLE;
737 		newstate &= ~EV_DISABLE;
738 	} else {
739 		newstate |= EV_DISABLE;
740 		newstate &= ~EV_ENABLE;
741 	}
742 
743 	/*
744 	 * No update needed if state isn't changing
745 	 */
746 	if (evp->me_state != newstate) {
747 		evp->me_state = newstate;
748 
749 		/*
750 		 * Place the entry onto the changed list if not
751 		 * already there.
752 		 */
753 		if (evp->me_cq == 0) {
754 			evp->me_cq = 1;
755 			LIST_REMOVE(evp, me_list);
756 			LIST_INSERT_HEAD(&change_head, evp, me_list);
757 			mevent_notify();
758 		}
759 	}
760 
761 	mevent_qunlock();
762 
763 	return (0);
764 }
765 
766 int
mevent_enable(struct mevent * evp)767 mevent_enable(struct mevent *evp)
768 {
769 
770 	return (mevent_update(evp, true));
771 }
772 
773 int
mevent_disable(struct mevent * evp)774 mevent_disable(struct mevent *evp)
775 {
776 
777 	return (mevent_update(evp, false));
778 }
779 
780 static int
mevent_delete_event(struct mevent * evp,int closefd)781 mevent_delete_event(struct mevent *evp, int closefd)
782 {
783 	mevent_qlock();
784 
785 	/*
786          * Place the entry onto the changed list if not already there, and
787 	 * mark as to be deleted.
788          */
789         if (evp->me_cq == 0) {
790 		evp->me_cq = 1;
791 		LIST_REMOVE(evp, me_list);
792 		LIST_INSERT_HEAD(&change_head, evp, me_list);
793 		mevent_notify();
794         }
795 	evp->me_state = EV_DELETE;
796 
797 	if (closefd)
798 		evp->me_closefd = 1;
799 
800 	mevent_qunlock();
801 
802 	return (0);
803 }
804 
805 int
mevent_delete(struct mevent * evp)806 mevent_delete(struct mevent *evp)
807 {
808 
809 	return (mevent_delete_event(evp, 0));
810 }
811 
812 int
mevent_delete_close(struct mevent * evp)813 mevent_delete_close(struct mevent *evp)
814 {
815 
816 	return (mevent_delete_event(evp, 1));
817 }
818 
819 static void
mevent_set_name(void)820 mevent_set_name(void)
821 {
822 
823 	pthread_set_name_np(mevent_tid, "mevent");
824 }
825 
826 void
mevent_dispatch(void)827 mevent_dispatch(void)
828 {
829 #ifdef __FreeBSD__
830 	struct kevent changelist[MEVENT_MAX];
831 	struct kevent eventlist[MEVENT_MAX];
832 	struct mevent *pipev;
833 	int numev;
834 #else
835 	struct mevent *pipev;
836 #endif
837 	int ret;
838 #ifndef WITHOUT_CAPSICUM
839 	cap_rights_t rights;
840 #endif
841 
842 	mevent_tid = pthread_self();
843 	mevent_set_name();
844 
845 	pthread_once(&mevent_once, mevent_init);
846 
847 	/*
848 	 * Open the pipe that will be used for other threads to force
849 	 * the blocking kqueue call to exit by writing to it. Set the
850 	 * descriptor to non-blocking.
851 	 */
852 	ret = pipe(mevent_pipefd);
853 	if (ret < 0) {
854 		perror("pipe");
855 		exit(0);
856 	}
857 
858 #ifndef WITHOUT_CAPSICUM
859 	cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE);
860 	if (caph_rights_limit(mevent_pipefd[0], &rights) == -1)
861 		errx(EX_OSERR, "Unable to apply rights for sandbox");
862 	if (caph_rights_limit(mevent_pipefd[1], &rights) == -1)
863 		errx(EX_OSERR, "Unable to apply rights for sandbox");
864 #endif
865 
866 	/*
867 	 * Add internal event handler for the pipe write fd
868 	 */
869 	pipev = mevent_add(mevent_pipefd[0], EVF_READ, mevent_pipe_read, NULL);
870 	assert(pipev != NULL);
871 
872 	for (;;) {
873 #ifdef __FreeBSD__
874 		/*
875 		 * Build changelist if required.
876 		 * XXX the changelist can be put into the blocking call
877 		 * to eliminate the extra syscall. Currently better for
878 		 * debug.
879 		 */
880 		numev = mevent_build(changelist);
881 		if (numev) {
882 			ret = kevent(mfd, changelist, numev, NULL, 0, NULL);
883 			if (ret == -1) {
884 				perror("Error return from kevent change");
885 			}
886 		}
887 
888 		/*
889 		 * Block awaiting events
890 		 */
891 		ret = kevent(mfd, NULL, 0, eventlist, MEVENT_MAX, NULL);
892 		if (ret == -1 && errno != EINTR) {
893 			perror("Error return from kevent monitor");
894 		}
895 
896 		/*
897 		 * Handle reported events
898 		 */
899 		mevent_handle(eventlist, ret);
900 
901 #else /* __FreeBSD__ */
902 		port_event_t pev;
903 
904 		/* Handle any pending updates */
905 		mevent_update_pending();
906 
907 		/* Block awaiting events */
908 		ret = port_get(mfd, &pev, NULL);
909 		if (ret != 0) {
910 			if (errno != EINTR)
911 				perror("Error return from port_get");
912 			continue;
913 		}
914 
915 		/* Handle reported event */
916 		mevent_handle_pe(&pev);
917 #endif /* __FreeBSD__ */
918 	}
919 }
920