xref: /illumos-gate/usr/src/cmd/bhyve/mevent.c (revision 7bb0eb34)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2011 NetApp, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 /*
32  * Copyright 2018 Joyent, Inc.
33  * Copyright 2022 OmniOS Community Edition (OmniOSce) Association.
34  */
35 
36 /*
37  * Micro event library for FreeBSD, designed for a single i/o thread
38  * using kqueue, and having events be persistent by default.
39  */
40 
41 #include <sys/cdefs.h>
42 __FBSDID("$FreeBSD$");
43 
44 #include <assert.h>
45 #ifndef WITHOUT_CAPSICUM
46 #include <capsicum_helpers.h>
47 #endif
48 #include <err.h>
49 #include <errno.h>
50 #include <stdbool.h>
51 #include <stdlib.h>
52 #include <stdio.h>
53 #include <string.h>
54 #include <sysexits.h>
55 #include <unistd.h>
56 
57 #include <sys/types.h>
58 #ifndef WITHOUT_CAPSICUM
59 #include <sys/capsicum.h>
60 #endif
61 #ifdef __FreeBSD__
62 #include <sys/event.h>
63 #else
64 #include <port.h>
65 #include <sys/poll.h>
66 #include <sys/siginfo.h>
67 #include <sys/queue.h>
68 #include <sys/debug.h>
69 #include <libproc.h>
70 #endif
71 #include <sys/time.h>
72 
73 #include <pthread.h>
74 #include <pthread_np.h>
75 
76 #include "mevent.h"
77 
78 #define	MEVENT_MAX	64
79 
80 #ifndef __FreeBSD__
81 #define	EV_ENABLE	0x01
82 #define	EV_ADD		EV_ENABLE
83 #define	EV_DISABLE	0x02
84 #define	EV_DELETE	0x04
85 
86 static int mevent_file_poll_interval_ms = 5000;
87 #endif
88 
89 static pthread_t mevent_tid;
90 static pthread_once_t mevent_once = PTHREAD_ONCE_INIT;
91 #ifdef __FreeBSD__
92 static int mevent_timid = 43;
93 #endif
94 static int mevent_pipefd[2];
95 static int mfd;
96 static pthread_mutex_t mevent_lmutex = PTHREAD_MUTEX_INITIALIZER;
97 
98 struct mevent {
99 	void	(*me_func)(int, enum ev_type, void *);
100 #define me_msecs me_fd
101 	int	me_fd;
102 #ifdef __FreeBSD__
103 	int	me_timid;
104 #else
105 	timer_t me_timid;
106 #endif
107 	enum ev_type me_type;
108 	void    *me_param;
109 	int	me_cq;
110 	int	me_state; /* Desired kevent flags. */
111 	int	me_closefd;
112 	int	me_fflags;
113 #ifndef __FreeBSD__
114 	port_notify_t	me_notify;
115 	struct sigevent	me_sigev;
116 	boolean_t	me_auto_requeue;
117 	struct file_obj	me_fobj;
118 	char		*me_fname;
119 	struct {
120 		int	mp_fd;
121 		off_t	mp_size;
122 		void	(*mp_func)(int, enum ev_type, void *);
123 		void    *mp_param;
124 	} me_poll;
125 #endif
126 	LIST_ENTRY(mevent) me_list;
127 };
128 
129 static LIST_HEAD(listhead, mevent) global_head, change_head;
130 
131 static void
132 mevent_qlock(void)
133 {
134 	pthread_mutex_lock(&mevent_lmutex);
135 }
136 
137 static void
138 mevent_qunlock(void)
139 {
140 	pthread_mutex_unlock(&mevent_lmutex);
141 }
142 
143 static void
144 mevent_pipe_read(int fd, enum ev_type type, void *param)
145 {
146 	char buf[MEVENT_MAX];
147 	int status;
148 
149 	/*
150 	 * Drain the pipe read side. The fd is non-blocking so this is
151 	 * safe to do.
152 	 */
153 	do {
154 		status = read(fd, buf, sizeof(buf));
155 	} while (status == MEVENT_MAX);
156 }
157 
158 static void
159 mevent_notify(void)
160 {
161 	char c = '\0';
162 
163 	/*
164 	 * If calling from outside the i/o thread, write a byte on the
165 	 * pipe to force the i/o thread to exit the blocking kevent call.
166 	 */
167 	if (mevent_pipefd[1] != 0 && pthread_self() != mevent_tid) {
168 		write(mevent_pipefd[1], &c, 1);
169 	}
170 }
171 
172 static void
173 mevent_init(void)
174 {
175 #ifndef WITHOUT_CAPSICUM
176 	cap_rights_t rights;
177 #endif
178 
179 #ifdef __FreeBSD__
180 	mfd = kqueue();
181 #else
182 	mfd = port_create();
183 #endif
184 	assert(mfd > 0);
185 
186 #ifndef WITHOUT_CAPSICUM
187 	cap_rights_init(&rights, CAP_KQUEUE);
188 	if (caph_rights_limit(mfd, &rights) == -1)
189 		errx(EX_OSERR, "Unable to apply rights for sandbox");
190 #endif
191 
192 	LIST_INIT(&change_head);
193 	LIST_INIT(&global_head);
194 }
195 
196 
197 #ifdef __FreeBSD__
198 static int
199 mevent_kq_filter(struct mevent *mevp)
200 {
201 	int retval;
202 
203 	retval = 0;
204 
205 	if (mevp->me_type == EVF_READ)
206 		retval = EVFILT_READ;
207 
208 	if (mevp->me_type == EVF_WRITE)
209 		retval = EVFILT_WRITE;
210 
211 	if (mevp->me_type == EVF_TIMER)
212 		retval = EVFILT_TIMER;
213 
214 	if (mevp->me_type == EVF_SIGNAL)
215 		retval = EVFILT_SIGNAL;
216 
217 	if (mevp->me_type == EVF_VNODE)
218 		retval = EVFILT_VNODE;
219 
220 	return (retval);
221 }
222 
223 static int
224 mevent_kq_flags(struct mevent *mevp)
225 {
226 	int retval;
227 
228 	retval = mevp->me_state;
229 
230 	if (mevp->me_type == EVF_VNODE)
231 		retval |= EV_CLEAR;
232 
233 	return (retval);
234 }
235 
236 static int
237 mevent_kq_fflags(struct mevent *mevp)
238 {
239 	int retval;
240 
241 	retval = 0;
242 
243 	switch (mevp->me_type) {
244 	case EVF_VNODE:
245 		if ((mevp->me_fflags & EVFF_ATTRIB) != 0)
246 			retval |= NOTE_ATTRIB;
247 		break;
248 	case EVF_READ:
249 	case EVF_WRITE:
250 	case EVF_TIMER:
251 	case EVF_SIGNAL:
252 		break;
253 	}
254 
255 	return (retval);
256 }
257 
258 static void
259 mevent_populate(struct mevent *mevp, struct kevent *kev)
260 {
261 	if (mevp->me_type == EVF_TIMER) {
262 		kev->ident = mevp->me_timid;
263 		kev->data = mevp->me_msecs;
264 	} else {
265 		kev->ident = mevp->me_fd;
266 		kev->data = 0;
267 	}
268 	kev->filter = mevent_kq_filter(mevp);
269 	kev->flags = mevent_kq_flags(mevp);
270 	kev->fflags = mevent_kq_fflags(mevp);
271 	kev->udata = mevp;
272 }
273 
274 static int
275 mevent_build(struct kevent *kev)
276 {
277 	struct mevent *mevp, *tmpp;
278 	int i;
279 
280 	i = 0;
281 
282 	mevent_qlock();
283 
284 	LIST_FOREACH_SAFE(mevp, &change_head, me_list, tmpp) {
285 		if (mevp->me_closefd) {
286 			/*
287 			 * A close of the file descriptor will remove the
288 			 * event
289 			 */
290 			close(mevp->me_fd);
291 		} else {
292 			assert((mevp->me_state & EV_ADD) == 0);
293 			mevent_populate(mevp, &kev[i]);
294 			i++;
295 		}
296 
297 		mevp->me_cq = 0;
298 		LIST_REMOVE(mevp, me_list);
299 
300 		if (mevp->me_state & EV_DELETE) {
301 			free(mevp);
302 		} else {
303 			LIST_INSERT_HEAD(&global_head, mevp, me_list);
304 		}
305 
306 		assert(i < MEVENT_MAX);
307 	}
308 
309 	mevent_qunlock();
310 
311 	return (i);
312 }
313 
314 static void
315 mevent_handle(struct kevent *kev, int numev)
316 {
317 	struct mevent *mevp;
318 	int i;
319 
320 	for (i = 0; i < numev; i++) {
321 		mevp = kev[i].udata;
322 
323 		/* XXX check for EV_ERROR ? */
324 
325 		(*mevp->me_func)(mevp->me_fd, mevp->me_type, mevp->me_param);
326 	}
327 }
328 
329 #else /* __FreeBSD__ */
330 
331 static boolean_t
332 mevent_clarify_state(struct mevent *mevp)
333 {
334 	const int state = mevp->me_state;
335 
336 	if ((state & EV_DELETE) != 0) {
337 		/* All other intents are overriden by delete. */
338 		mevp->me_state = EV_DELETE;
339 		return (B_TRUE);
340 	}
341 
342 	/*
343 	 * Without a distinction between EV_ADD and EV_ENABLE in our emulation,
344 	 * handling the add-disabled case means eliding the portfs operation
345 	 * when both flags are present.
346 	 *
347 	 * This is not a concern for subsequent enable/disable operations, as
348 	 * mevent_update() toggles the flags properly so they are not left in
349 	 * conflict.
350 	 */
351 	if (state == (EV_ENABLE|EV_DISABLE)) {
352 		mevp->me_state = EV_DISABLE;
353 		return (B_FALSE);
354 	}
355 
356 	return (B_TRUE);
357 }
358 
359 static char *
360 mevent_fdpath(int fd)
361 {
362 	prfdinfo_t *fdinfo;
363 	char *path;
364 	size_t len;
365 
366 	fdinfo = proc_get_fdinfo(getpid(), fd);
367 	if (fdinfo == NULL) {
368 		(void) fprintf(stderr, "%s: proc_get_fdinfo(%d) failed: %s\n",
369 		    __func__, fd, strerror(errno));
370 		path = NULL;
371 	} else {
372 		path = (char *)proc_fdinfo_misc(fdinfo, PR_PATHNAME, &len);
373 	}
374 
375 	if (path == NULL) {
376 		(void) fprintf(stderr, "%s: Fall back to /proc/self/fd/%d\n",
377 		    __func__, fd);
378 		(void) asprintf(&path, "/proc/self/fd/%d", fd);
379 	} else {
380 		path = strdup(path);
381 	}
382 
383 	proc_fdinfo_free(fdinfo);
384 
385 	if (path == NULL) {
386 		(void) fprintf(stderr,
387 		    "%s: Error building path for fd %d: %s\n", __func__,
388 		    fd, strerror(errno));
389 	}
390 
391 	return (path);
392 }
393 
394 static void
395 mevent_poll_file_attrib(int fd, enum ev_type type, void *param)
396 {
397 	struct mevent *mevp = param;
398 	struct stat st;
399 
400 	if (fstat(mevp->me_poll.mp_fd, &st) != 0) {
401 		(void) fprintf(stderr, "%s: fstat(%d) \"%s\" failed: %s\n",
402 		    __func__, fd, mevp->me_fname, strerror(errno));
403 		return;
404 	}
405 
406 	if (mevp->me_poll.mp_size != st.st_size ||
407 	    mevp->me_fobj.fo_ctime.tv_sec != st.st_ctim.tv_sec ||
408 	    mevp->me_fobj.fo_ctime.tv_nsec != st.st_ctim.tv_nsec) {
409 		mevp->me_poll.mp_size = st.st_size;
410 		mevp->me_fobj.fo_atime = st.st_atim;
411 		mevp->me_fobj.fo_mtime = st.st_mtim;
412 		mevp->me_fobj.fo_ctime = st.st_ctim;
413 
414 		(*mevp->me_poll.mp_func)(mevp->me_poll.mp_fd, EVF_VNODE,
415 		    mevp->me_poll.mp_param);
416 	}
417 }
418 
419 static void
420 mevent_update_one_readwrite(struct mevent *mevp)
421 {
422 	int portfd = mevp->me_notify.portnfy_port;
423 
424 	mevp->me_auto_requeue = B_FALSE;
425 
426 	switch (mevp->me_state) {
427 	case EV_ENABLE:
428 	{
429 		const int events = (mevp->me_type == EVF_READ) ?
430 		    POLLIN : POLLOUT;
431 
432 		if (port_associate(portfd, PORT_SOURCE_FD, mevp->me_fd,
433 		    events, mevp) != 0) {
434 			(void) fprintf(stderr,
435 			    "port_associate fd %d %p failed: %s\n",
436 			    mevp->me_fd, mevp, strerror(errno));
437 		}
438 		return;
439 	}
440 	case EV_DISABLE:
441 	case EV_DELETE:
442 		/*
443 		 * A disable that comes in while an event is being
444 		 * handled will result in an ENOENT.
445 		 */
446 		if (port_dissociate(portfd, PORT_SOURCE_FD,
447 		    mevp->me_fd) != 0 && errno != ENOENT) {
448 			(void) fprintf(stderr, "port_dissociate "
449 			    "portfd %d fd %d mevp %p failed: %s\n",
450 			    portfd, mevp->me_fd, mevp, strerror(errno));
451 		}
452 		return;
453 	default:
454 		(void) fprintf(stderr, "%s: unhandled state %d\n", __func__,
455 		    mevp->me_state);
456 		abort();
457 	}
458 }
459 
460 static void
461 mevent_update_one_timer(struct mevent *mevp)
462 {
463 	mevp->me_auto_requeue = B_TRUE;
464 
465 	switch (mevp->me_state) {
466 	case EV_ENABLE:
467 	{
468 		struct itimerspec it = { 0 };
469 
470 		mevp->me_sigev.sigev_notify = SIGEV_PORT;
471 		mevp->me_sigev.sigev_value.sival_ptr = &mevp->me_notify;
472 
473 		if (timer_create(CLOCK_REALTIME, &mevp->me_sigev,
474 		    &mevp->me_timid) != 0) {
475 			(void) fprintf(stderr, "timer_create failed: %s",
476 			    strerror(errno));
477 			return;
478 		}
479 
480 		/* The first timeout */
481 		it.it_value.tv_sec = mevp->me_msecs / MILLISEC;
482 		it.it_value.tv_nsec =
483 			MSEC2NSEC(mevp->me_msecs % MILLISEC);
484 		/* Repeat at the same interval */
485 		it.it_interval = it.it_value;
486 
487 		if (timer_settime(mevp->me_timid, 0, &it, NULL) != 0) {
488 			(void) fprintf(stderr, "timer_settime failed: %s",
489 			    strerror(errno));
490 		}
491 		return;
492 	}
493 	case EV_DISABLE:
494 	case EV_DELETE:
495 		if (timer_delete(mevp->me_timid) != 0) {
496 			(void) fprintf(stderr, "timer_delete failed: %s",
497 			    strerror(errno));
498 		}
499 		mevp->me_timid = -1;
500 		return;
501 	default:
502 		(void) fprintf(stderr, "%s: unhandled state %d\n", __func__,
503 		    mevp->me_state);
504 		abort();
505 	}
506 }
507 
508 static void
509 mevent_update_one_vnode(struct mevent *mevp)
510 {
511 	int portfd = mevp->me_notify.portnfy_port;
512 
513 	mevp->me_auto_requeue = B_FALSE;
514 
515 	switch (mevp->me_state) {
516 	case EV_ENABLE:
517 	{
518 		int events = 0;
519 
520 		if ((mevp->me_fflags & EVFF_ATTRIB) != 0)
521 			events |= FILE_ATTRIB;
522 
523 		assert(events != 0);
524 
525 		if (mevp->me_fname == NULL) {
526 			mevp->me_fname = mevent_fdpath(mevp->me_fd);
527 			if (mevp->me_fname == NULL)
528 				return;
529 		}
530 
531 		bzero(&mevp->me_fobj, sizeof (mevp->me_fobj));
532 		mevp->me_fobj.fo_name = mevp->me_fname;
533 
534 		if (port_associate(portfd, PORT_SOURCE_FILE,
535 		    (uintptr_t)&mevp->me_fobj, events, mevp) != 0) {
536 			/*
537 			 * If this file does not support event ports
538 			 * (e.g. ZVOLs do not yet have support)
539 			 * then convert this to a timer event and poll for
540 			 * file attribute changes.
541 			 */
542 			struct stat st;
543 
544 			if (errno != ENOTSUP) {
545 				(void) fprintf(stderr,
546 				    "port_associate fd %d (%s) %p failed: %s"
547 				    ", polling instead\n",
548 				    mevp->me_fd, mevp->me_fname, mevp,
549 				    strerror(errno));
550 			}
551 
552 			if (fstat(mevp->me_fd, &st) != 0) {
553 				(void) fprintf(stderr,
554 				    "fstat(%d) \"%s\" failed: %s\n",
555 				    mevp->me_fd, mevp->me_fname,
556 				    strerror(errno));
557 				return;
558 			}
559 
560 			mevp->me_fobj.fo_atime = st.st_atim;
561 			mevp->me_fobj.fo_mtime = st.st_mtim;
562 			mevp->me_fobj.fo_ctime = st.st_ctim;
563 
564 			mevp->me_poll.mp_fd = mevp->me_fd;
565 			mevp->me_poll.mp_size = st.st_size;
566 
567 			mevp->me_poll.mp_func = mevp->me_func;
568 			mevp->me_poll.mp_param = mevp->me_param;
569 			mevp->me_func = mevent_poll_file_attrib;
570 			mevp->me_param = mevp;
571 
572 			mevp->me_type = EVF_TIMER;
573 			mevp->me_timid = -1;
574 			mevp->me_msecs = mevent_file_poll_interval_ms;
575 			mevent_update_one_timer(mevp);
576 		}
577 		return;
578 	}
579 	case EV_DISABLE:
580 	case EV_DELETE:
581 		/*
582 		 * A disable that comes in while an event is being
583 		 * handled will result in an ENOENT.
584 		 */
585 		if (port_dissociate(portfd, PORT_SOURCE_FILE,
586 		    (uintptr_t)&mevp->me_fobj) != 0 &&
587 		    errno != ENOENT) {
588 			(void) fprintf(stderr, "port_dissociate "
589 			    "portfd %d fd %d mevp %p failed: %s\n",
590 			    portfd, mevp->me_fd, mevp, strerror(errno));
591 		}
592 		free(mevp->me_fname);
593 		mevp->me_fname = NULL;
594 		return;
595 	default:
596 		(void) fprintf(stderr, "%s: unhandled state %d\n", __func__,
597 		    mevp->me_state);
598 		abort();
599 	}
600 }
601 
602 static void
603 mevent_update_one(struct mevent *mevp)
604 {
605 	switch (mevp->me_type) {
606 	case EVF_READ:
607 	case EVF_WRITE:
608 		mevent_update_one_readwrite(mevp);
609 		break;
610 	case EVF_TIMER:
611 		mevent_update_one_timer(mevp);
612 		break;
613 	case EVF_VNODE:
614 		mevent_update_one_vnode(mevp);
615 		break;
616 	case EVF_SIGNAL: /* EVF_SIGNAL not yet implemented. */
617 	default:
618 		(void) fprintf(stderr, "%s: unhandled event type %d\n",
619 		    __func__, mevp->me_type);
620 		abort();
621 	}
622 }
623 
624 static void
625 mevent_populate(struct mevent *mevp)
626 {
627 	mevp->me_notify.portnfy_port = mfd;
628 	mevp->me_notify.portnfy_user = mevp;
629 }
630 
631 static void
632 mevent_update_pending()
633 {
634 	struct mevent *mevp, *tmpp;
635 
636 	mevent_qlock();
637 
638 	LIST_FOREACH_SAFE(mevp, &change_head, me_list, tmpp) {
639 		mevent_populate(mevp);
640 		if (mevp->me_closefd) {
641 			/*
642 			 * A close of the file descriptor will remove the
643 			 * event
644 			 */
645 			(void) close(mevp->me_fd);
646 			mevp->me_fd = -1;
647 		} else {
648 			if (mevent_clarify_state(mevp)) {
649 				mevent_update_one(mevp);
650 			}
651 		}
652 
653 		mevp->me_cq = 0;
654 		LIST_REMOVE(mevp, me_list);
655 
656 		if (mevp->me_state & EV_DELETE) {
657 			free(mevp->me_fname);
658 			free(mevp);
659 		} else {
660 			LIST_INSERT_HEAD(&global_head, mevp, me_list);
661 		}
662 	}
663 
664 	mevent_qunlock();
665 }
666 
667 static void
668 mevent_handle_pe(port_event_t *pe)
669 {
670 	struct mevent *mevp = pe->portev_user;
671 
672 	(*mevp->me_func)(mevp->me_fd, mevp->me_type, mevp->me_param);
673 
674 	mevent_qlock();
675 	if (!mevp->me_cq && !mevp->me_auto_requeue) {
676 		mevent_update_one(mevp);
677 	}
678 	mevent_qunlock();
679 }
680 #endif
681 
682 static struct mevent *
683 mevent_add_state(int tfd, enum ev_type type,
684 	   void (*func)(int, enum ev_type, void *), void *param,
685 	   int state, int fflags)
686 {
687 #ifdef __FreeBSD__
688 	struct kevent kev;
689 #endif
690 	struct mevent *lp, *mevp;
691 #ifdef __FreeBSD__
692 	int ret;
693 #endif
694 
695 	if (tfd < 0 || func == NULL) {
696 		return (NULL);
697 	}
698 
699 	mevp = NULL;
700 
701 	pthread_once(&mevent_once, mevent_init);
702 
703 	mevent_qlock();
704 
705 	/*
706 	 * Verify that the fd/type tuple is not present in any list
707 	 */
708 	LIST_FOREACH(lp, &global_head, me_list) {
709 		if (type != EVF_TIMER && lp->me_fd == tfd &&
710 		    lp->me_type == type) {
711 			goto exit;
712 		}
713 	}
714 
715 	LIST_FOREACH(lp, &change_head, me_list) {
716 		if (type != EVF_TIMER && lp->me_fd == tfd &&
717 		    lp->me_type == type) {
718 			goto exit;
719 		}
720 	}
721 
722 	/*
723 	 * Allocate an entry and populate it.
724 	 */
725 	mevp = calloc(1, sizeof(struct mevent));
726 	if (mevp == NULL) {
727 		goto exit;
728 	}
729 
730 	if (type == EVF_TIMER) {
731 		mevp->me_msecs = tfd;
732 #ifdef __FreeBSD__
733 		mevp->me_timid = mevent_timid++;
734 #else
735 		mevp->me_timid = -1;
736 #endif
737 	} else
738 		mevp->me_fd = tfd;
739 	mevp->me_type = type;
740 	mevp->me_func = func;
741 	mevp->me_param = param;
742 	mevp->me_state = state;
743 	mevp->me_fflags = fflags;
744 
745 	/*
746 	 * Try to add the event.  If this fails, report the failure to
747 	 * the caller.
748 	 */
749 #ifdef __FreeBSD__
750 	mevent_populate(mevp, &kev);
751 	ret = kevent(mfd, &kev, 1, NULL, 0, NULL);
752 	if (ret == -1) {
753 		free(mevp);
754 		mevp = NULL;
755 		goto exit;
756 	}
757 	mevp->me_state &= ~EV_ADD;
758 #else
759 	mevent_populate(mevp);
760 	if (mevent_clarify_state(mevp))
761 		mevent_update_one(mevp);
762 #endif
763 
764 	LIST_INSERT_HEAD(&global_head, mevp, me_list);
765 
766 exit:
767 	mevent_qunlock();
768 
769 	return (mevp);
770 }
771 
772 struct mevent *
773 mevent_add(int tfd, enum ev_type type,
774 	   void (*func)(int, enum ev_type, void *), void *param)
775 {
776 
777 	return (mevent_add_state(tfd, type, func, param, EV_ADD, 0));
778 }
779 
780 struct mevent *
781 mevent_add_flags(int tfd, enum ev_type type, int fflags,
782 		 void (*func)(int, enum ev_type, void *), void *param)
783 {
784 
785 	return (mevent_add_state(tfd, type, func, param, EV_ADD, fflags));
786 }
787 
788 struct mevent *
789 mevent_add_disabled(int tfd, enum ev_type type,
790 		    void (*func)(int, enum ev_type, void *), void *param)
791 {
792 
793 	return (mevent_add_state(tfd, type, func, param, EV_ADD | EV_DISABLE, 0));
794 }
795 
796 static int
797 mevent_update(struct mevent *evp, bool enable)
798 {
799 	int newstate;
800 
801 	mevent_qlock();
802 
803 	/*
804 	 * It's not possible to enable/disable a deleted event
805 	 */
806 	assert((evp->me_state & EV_DELETE) == 0);
807 
808 	newstate = evp->me_state;
809 	if (enable) {
810 		newstate |= EV_ENABLE;
811 		newstate &= ~EV_DISABLE;
812 	} else {
813 		newstate |= EV_DISABLE;
814 		newstate &= ~EV_ENABLE;
815 	}
816 
817 	/*
818 	 * No update needed if state isn't changing
819 	 */
820 	if (evp->me_state != newstate) {
821 		evp->me_state = newstate;
822 
823 		/*
824 		 * Place the entry onto the changed list if not
825 		 * already there.
826 		 */
827 		if (evp->me_cq == 0) {
828 			evp->me_cq = 1;
829 			LIST_REMOVE(evp, me_list);
830 			LIST_INSERT_HEAD(&change_head, evp, me_list);
831 			mevent_notify();
832 		}
833 	}
834 
835 	mevent_qunlock();
836 
837 	return (0);
838 }
839 
840 int
841 mevent_enable(struct mevent *evp)
842 {
843 
844 	return (mevent_update(evp, true));
845 }
846 
847 int
848 mevent_disable(struct mevent *evp)
849 {
850 
851 	return (mevent_update(evp, false));
852 }
853 
854 static int
855 mevent_delete_event(struct mevent *evp, int closefd)
856 {
857 	mevent_qlock();
858 
859 	/*
860          * Place the entry onto the changed list if not already there, and
861 	 * mark as to be deleted.
862          */
863         if (evp->me_cq == 0) {
864 		evp->me_cq = 1;
865 		LIST_REMOVE(evp, me_list);
866 		LIST_INSERT_HEAD(&change_head, evp, me_list);
867 		mevent_notify();
868         }
869 	evp->me_state = EV_DELETE;
870 
871 	if (closefd)
872 		evp->me_closefd = 1;
873 
874 	mevent_qunlock();
875 
876 	return (0);
877 }
878 
879 int
880 mevent_delete(struct mevent *evp)
881 {
882 
883 	return (mevent_delete_event(evp, 0));
884 }
885 
886 int
887 mevent_delete_close(struct mevent *evp)
888 {
889 
890 	return (mevent_delete_event(evp, 1));
891 }
892 
893 static void
894 mevent_set_name(void)
895 {
896 
897 	pthread_set_name_np(mevent_tid, "mevent");
898 }
899 
900 void
901 mevent_dispatch(void)
902 {
903 #ifdef __FreeBSD__
904 	struct kevent changelist[MEVENT_MAX];
905 	struct kevent eventlist[MEVENT_MAX];
906 	struct mevent *pipev;
907 	int numev;
908 #else
909 	struct mevent *pipev;
910 #endif
911 	int ret;
912 #ifndef WITHOUT_CAPSICUM
913 	cap_rights_t rights;
914 #endif
915 
916 	mevent_tid = pthread_self();
917 	mevent_set_name();
918 
919 	pthread_once(&mevent_once, mevent_init);
920 
921 	/*
922 	 * Open the pipe that will be used for other threads to force
923 	 * the blocking kqueue call to exit by writing to it. Set the
924 	 * descriptor to non-blocking.
925 	 */
926 	ret = pipe(mevent_pipefd);
927 	if (ret < 0) {
928 		perror("pipe");
929 		exit(0);
930 	}
931 
932 #ifndef WITHOUT_CAPSICUM
933 	cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE);
934 	if (caph_rights_limit(mevent_pipefd[0], &rights) == -1)
935 		errx(EX_OSERR, "Unable to apply rights for sandbox");
936 	if (caph_rights_limit(mevent_pipefd[1], &rights) == -1)
937 		errx(EX_OSERR, "Unable to apply rights for sandbox");
938 #endif
939 
940 	/*
941 	 * Add internal event handler for the pipe write fd
942 	 */
943 	pipev = mevent_add(mevent_pipefd[0], EVF_READ, mevent_pipe_read, NULL);
944 	assert(pipev != NULL);
945 
946 	for (;;) {
947 #ifdef __FreeBSD__
948 		/*
949 		 * Build changelist if required.
950 		 * XXX the changelist can be put into the blocking call
951 		 * to eliminate the extra syscall. Currently better for
952 		 * debug.
953 		 */
954 		numev = mevent_build(changelist);
955 		if (numev) {
956 			ret = kevent(mfd, changelist, numev, NULL, 0, NULL);
957 			if (ret == -1) {
958 				perror("Error return from kevent change");
959 			}
960 		}
961 
962 		/*
963 		 * Block awaiting events
964 		 */
965 		ret = kevent(mfd, NULL, 0, eventlist, MEVENT_MAX, NULL);
966 		if (ret == -1 && errno != EINTR) {
967 			perror("Error return from kevent monitor");
968 		}
969 
970 		/*
971 		 * Handle reported events
972 		 */
973 		mevent_handle(eventlist, ret);
974 
975 #else /* __FreeBSD__ */
976 		port_event_t pev;
977 
978 		/* Handle any pending updates */
979 		mevent_update_pending();
980 
981 		/* Block awaiting events */
982 		ret = port_get(mfd, &pev, NULL);
983 		if (ret != 0) {
984 			if (errno != EINTR)
985 				perror("Error return from port_get");
986 			continue;
987 		}
988 
989 		/* Handle reported event */
990 		mevent_handle_pe(&pev);
991 #endif /* __FreeBSD__ */
992 	}
993 }
994