xref: /illumos-gate/usr/src/cmd/bhyve/mevent.c (revision e8d71297)
14c87aefeSPatrick Mooney /*-
24c87aefeSPatrick Mooney  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
34c87aefeSPatrick Mooney  *
44c87aefeSPatrick Mooney  * Copyright (c) 2011 NetApp, Inc.
54c87aefeSPatrick Mooney  * All rights reserved.
64c87aefeSPatrick Mooney  *
74c87aefeSPatrick Mooney  * Redistribution and use in source and binary forms, with or without
84c87aefeSPatrick Mooney  * modification, are permitted provided that the following conditions
94c87aefeSPatrick Mooney  * are met:
104c87aefeSPatrick Mooney  * 1. Redistributions of source code must retain the above copyright
114c87aefeSPatrick Mooney  *    notice, this list of conditions and the following disclaimer.
124c87aefeSPatrick Mooney  * 2. Redistributions in binary form must reproduce the above copyright
134c87aefeSPatrick Mooney  *    notice, this list of conditions and the following disclaimer in the
144c87aefeSPatrick Mooney  *    documentation and/or other materials provided with the distribution.
154c87aefeSPatrick Mooney  *
164c87aefeSPatrick Mooney  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
174c87aefeSPatrick Mooney  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
184c87aefeSPatrick Mooney  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
194c87aefeSPatrick Mooney  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
204c87aefeSPatrick Mooney  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
214c87aefeSPatrick Mooney  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
224c87aefeSPatrick Mooney  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
234c87aefeSPatrick Mooney  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
244c87aefeSPatrick Mooney  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
254c87aefeSPatrick Mooney  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
264c87aefeSPatrick Mooney  * SUCH DAMAGE.
274c87aefeSPatrick Mooney  *
284c87aefeSPatrick Mooney  * $FreeBSD$
294c87aefeSPatrick Mooney  */
304c87aefeSPatrick Mooney 
314c87aefeSPatrick Mooney /*
324c87aefeSPatrick Mooney  * Copyright 2018 Joyent, Inc.
33251becc8SAndy Fiddaman  * Copyright 2022 OmniOS Community Edition (OmniOSce) Association.
344c87aefeSPatrick Mooney  */
354c87aefeSPatrick Mooney 
364c87aefeSPatrick Mooney /*
376dc98349SAndy Fiddaman  * Micro event library for FreeBSD, designed for a single i/o thread
384c87aefeSPatrick Mooney  * using kqueue, and having events be persistent by default.
394c87aefeSPatrick Mooney  */
404c87aefeSPatrick Mooney 
414c87aefeSPatrick Mooney #include <sys/cdefs.h>
424c87aefeSPatrick Mooney __FBSDID("$FreeBSD$");
434c87aefeSPatrick Mooney 
444c87aefeSPatrick Mooney #include <assert.h>
454c87aefeSPatrick Mooney #ifndef WITHOUT_CAPSICUM
464c87aefeSPatrick Mooney #include <capsicum_helpers.h>
474c87aefeSPatrick Mooney #endif
484c87aefeSPatrick Mooney #include <err.h>
494c87aefeSPatrick Mooney #include <errno.h>
50154972afSPatrick Mooney #include <stdbool.h>
514c87aefeSPatrick Mooney #include <stdlib.h>
524c87aefeSPatrick Mooney #include <stdio.h>
534c87aefeSPatrick Mooney #include <string.h>
544c87aefeSPatrick Mooney #include <sysexits.h>
554c87aefeSPatrick Mooney #include <unistd.h>
564c87aefeSPatrick Mooney 
574c87aefeSPatrick Mooney #include <sys/types.h>
584c87aefeSPatrick Mooney #ifndef WITHOUT_CAPSICUM
594c87aefeSPatrick Mooney #include <sys/capsicum.h>
604c87aefeSPatrick Mooney #endif
614c87aefeSPatrick Mooney #ifdef __FreeBSD__
624c87aefeSPatrick Mooney #include <sys/event.h>
634c87aefeSPatrick Mooney #else
644c87aefeSPatrick Mooney #include <port.h>
654c87aefeSPatrick Mooney #include <sys/poll.h>
664c87aefeSPatrick Mooney #include <sys/siginfo.h>
674c87aefeSPatrick Mooney #include <sys/queue.h>
68154972afSPatrick Mooney #include <sys/debug.h>
69*e8d71297SAndy Fiddaman #include <sys/stat.h>
704c87aefeSPatrick Mooney #endif
714c87aefeSPatrick Mooney #include <sys/time.h>
724c87aefeSPatrick Mooney 
734c87aefeSPatrick Mooney #include <pthread.h>
744c87aefeSPatrick Mooney #include <pthread_np.h>
754c87aefeSPatrick Mooney 
764c87aefeSPatrick Mooney #include "mevent.h"
774c87aefeSPatrick Mooney 
784c87aefeSPatrick Mooney #define	MEVENT_MAX	64
794c87aefeSPatrick Mooney 
80154972afSPatrick Mooney #ifndef __FreeBSD__
81154972afSPatrick Mooney #define	EV_ENABLE	0x01
82154972afSPatrick Mooney #define	EV_ADD		EV_ENABLE
83154972afSPatrick Mooney #define	EV_DISABLE	0x02
84154972afSPatrick Mooney #define	EV_DELETE	0x04
85251becc8SAndy Fiddaman 
86251becc8SAndy Fiddaman static int mevent_file_poll_interval_ms = 5000;
87154972afSPatrick Mooney #endif
884c87aefeSPatrick Mooney 
894c87aefeSPatrick Mooney static pthread_t mevent_tid;
90b0de25cbSAndy Fiddaman static pthread_once_t mevent_once = PTHREAD_ONCE_INIT;
91251becc8SAndy Fiddaman #ifdef __FreeBSD__
924c87aefeSPatrick Mooney static int mevent_timid = 43;
93251becc8SAndy Fiddaman #endif
944c87aefeSPatrick Mooney static int mevent_pipefd[2];
95b0de25cbSAndy Fiddaman static int mfd;
964c87aefeSPatrick Mooney static pthread_mutex_t mevent_lmutex = PTHREAD_MUTEX_INITIALIZER;
974c87aefeSPatrick Mooney 
984c87aefeSPatrick Mooney struct mevent {
994c87aefeSPatrick Mooney 	void	(*me_func)(int, enum ev_type, void *);
1004c87aefeSPatrick Mooney #define me_msecs me_fd
1014c87aefeSPatrick Mooney 	int	me_fd;
1024c87aefeSPatrick Mooney #ifdef __FreeBSD__
1034c87aefeSPatrick Mooney 	int	me_timid;
1044c87aefeSPatrick Mooney #else
1054c87aefeSPatrick Mooney 	timer_t me_timid;
1064c87aefeSPatrick Mooney #endif
1074c87aefeSPatrick Mooney 	enum ev_type me_type;
1084c87aefeSPatrick Mooney 	void    *me_param;
1094c87aefeSPatrick Mooney 	int	me_cq;
110154972afSPatrick Mooney 	int	me_state; /* Desired kevent flags. */
1114c87aefeSPatrick Mooney 	int	me_closefd;
112b0de25cbSAndy Fiddaman 	int	me_fflags;
1134c87aefeSPatrick Mooney #ifndef __FreeBSD__
1144c87aefeSPatrick Mooney 	port_notify_t	me_notify;
1154c87aefeSPatrick Mooney 	struct sigevent	me_sigev;
1164c87aefeSPatrick Mooney 	boolean_t	me_auto_requeue;
117251becc8SAndy Fiddaman 	struct {
118251becc8SAndy Fiddaman 		int	mp_fd;
119251becc8SAndy Fiddaman 		off_t	mp_size;
120251becc8SAndy Fiddaman 		void	(*mp_func)(int, enum ev_type, void *);
121251becc8SAndy Fiddaman 		void    *mp_param;
122251becc8SAndy Fiddaman 	} me_poll;
1234c87aefeSPatrick Mooney #endif
1244c87aefeSPatrick Mooney 	LIST_ENTRY(mevent) me_list;
1254c87aefeSPatrick Mooney };
1264c87aefeSPatrick Mooney 
1274c87aefeSPatrick Mooney static LIST_HEAD(listhead, mevent) global_head, change_head;
1284c87aefeSPatrick Mooney 
1294c87aefeSPatrick Mooney static void
1304c87aefeSPatrick Mooney mevent_qlock(void)
1314c87aefeSPatrick Mooney {
1324c87aefeSPatrick Mooney 	pthread_mutex_lock(&mevent_lmutex);
1334c87aefeSPatrick Mooney }
1344c87aefeSPatrick Mooney 
1354c87aefeSPatrick Mooney static void
1364c87aefeSPatrick Mooney mevent_qunlock(void)
1374c87aefeSPatrick Mooney {
1384c87aefeSPatrick Mooney 	pthread_mutex_unlock(&mevent_lmutex);
1394c87aefeSPatrick Mooney }
1404c87aefeSPatrick Mooney 
1414c87aefeSPatrick Mooney static void
1424c87aefeSPatrick Mooney mevent_pipe_read(int fd, enum ev_type type, void *param)
1434c87aefeSPatrick Mooney {
1444c87aefeSPatrick Mooney 	char buf[MEVENT_MAX];
1454c87aefeSPatrick Mooney 	int status;
1464c87aefeSPatrick Mooney 
1474c87aefeSPatrick Mooney 	/*
1484c87aefeSPatrick Mooney 	 * Drain the pipe read side. The fd is non-blocking so this is
1494c87aefeSPatrick Mooney 	 * safe to do.
1504c87aefeSPatrick Mooney 	 */
1514c87aefeSPatrick Mooney 	do {
1524c87aefeSPatrick Mooney 		status = read(fd, buf, sizeof(buf));
1534c87aefeSPatrick Mooney 	} while (status == MEVENT_MAX);
1544c87aefeSPatrick Mooney }
1554c87aefeSPatrick Mooney 
1564c87aefeSPatrick Mooney static void
1574c87aefeSPatrick Mooney mevent_notify(void)
1584c87aefeSPatrick Mooney {
15984659b24SMichael Zeller 	char c = '\0';
1606dc98349SAndy Fiddaman 
1614c87aefeSPatrick Mooney 	/*
1624c87aefeSPatrick Mooney 	 * If calling from outside the i/o thread, write a byte on the
1634c87aefeSPatrick Mooney 	 * pipe to force the i/o thread to exit the blocking kevent call.
1644c87aefeSPatrick Mooney 	 */
1654c87aefeSPatrick Mooney 	if (mevent_pipefd[1] != 0 && pthread_self() != mevent_tid) {
1664c87aefeSPatrick Mooney 		write(mevent_pipefd[1], &c, 1);
1674c87aefeSPatrick Mooney 	}
1684c87aefeSPatrick Mooney }
169b0de25cbSAndy Fiddaman 
170b0de25cbSAndy Fiddaman static void
171b0de25cbSAndy Fiddaman mevent_init(void)
172b0de25cbSAndy Fiddaman {
173b0de25cbSAndy Fiddaman #ifndef WITHOUT_CAPSICUM
174b0de25cbSAndy Fiddaman 	cap_rights_t rights;
175b0de25cbSAndy Fiddaman #endif
176b0de25cbSAndy Fiddaman 
177b0de25cbSAndy Fiddaman #ifdef __FreeBSD__
178b0de25cbSAndy Fiddaman 	mfd = kqueue();
179b0de25cbSAndy Fiddaman #else
180b0de25cbSAndy Fiddaman 	mfd = port_create();
181b0de25cbSAndy Fiddaman #endif
182b0de25cbSAndy Fiddaman 	assert(mfd > 0);
183b0de25cbSAndy Fiddaman 
184b0de25cbSAndy Fiddaman #ifndef WITHOUT_CAPSICUM
185b0de25cbSAndy Fiddaman 	cap_rights_init(&rights, CAP_KQUEUE);
186b0de25cbSAndy Fiddaman 	if (caph_rights_limit(mfd, &rights) == -1)
187b0de25cbSAndy Fiddaman 		errx(EX_OSERR, "Unable to apply rights for sandbox");
188251becc8SAndy Fiddaman #endif
189b0de25cbSAndy Fiddaman 
190b0de25cbSAndy Fiddaman 	LIST_INIT(&change_head);
191b0de25cbSAndy Fiddaman 	LIST_INIT(&global_head);
192b0de25cbSAndy Fiddaman }
193b0de25cbSAndy Fiddaman 
194b0de25cbSAndy Fiddaman 
1954c87aefeSPatrick Mooney #ifdef __FreeBSD__
1964c87aefeSPatrick Mooney static int
1974c87aefeSPatrick Mooney mevent_kq_filter(struct mevent *mevp)
1984c87aefeSPatrick Mooney {
1994c87aefeSPatrick Mooney 	int retval;
2004c87aefeSPatrick Mooney 
2014c87aefeSPatrick Mooney 	retval = 0;
2024c87aefeSPatrick Mooney 
2034c87aefeSPatrick Mooney 	if (mevp->me_type == EVF_READ)
2044c87aefeSPatrick Mooney 		retval = EVFILT_READ;
2054c87aefeSPatrick Mooney 
2064c87aefeSPatrick Mooney 	if (mevp->me_type == EVF_WRITE)
2074c87aefeSPatrick Mooney 		retval = EVFILT_WRITE;
2084c87aefeSPatrick Mooney 
2094c87aefeSPatrick Mooney 	if (mevp->me_type == EVF_TIMER)
2104c87aefeSPatrick Mooney 		retval = EVFILT_TIMER;
2114c87aefeSPatrick Mooney 
2124c87aefeSPatrick Mooney 	if (mevp->me_type == EVF_SIGNAL)
2134c87aefeSPatrick Mooney 		retval = EVFILT_SIGNAL;
2144c87aefeSPatrick Mooney 
215b0de25cbSAndy Fiddaman 	if (mevp->me_type == EVF_VNODE)
216b0de25cbSAndy Fiddaman 		retval = EVFILT_VNODE;
217b0de25cbSAndy Fiddaman 
2184c87aefeSPatrick Mooney 	return (retval);
2194c87aefeSPatrick Mooney }
2204c87aefeSPatrick Mooney 
2214c87aefeSPatrick Mooney static int
2224c87aefeSPatrick Mooney mevent_kq_flags(struct mevent *mevp)
2234c87aefeSPatrick Mooney {
2246dc98349SAndy Fiddaman 	int retval;
2256dc98349SAndy Fiddaman 
2266dc98349SAndy Fiddaman 	retval = mevp->me_state;
2276dc98349SAndy Fiddaman 
2286dc98349SAndy Fiddaman 	if (mevp->me_type == EVF_VNODE)
2296dc98349SAndy Fiddaman 		retval |= EV_CLEAR;
2306dc98349SAndy Fiddaman 
2316dc98349SAndy Fiddaman 	return (retval);
2324c87aefeSPatrick Mooney }
2334c87aefeSPatrick Mooney 
2344c87aefeSPatrick Mooney static int
2354c87aefeSPatrick Mooney mevent_kq_fflags(struct mevent *mevp)
2364c87aefeSPatrick Mooney {
237b0de25cbSAndy Fiddaman 	int retval;
238b0de25cbSAndy Fiddaman 
239b0de25cbSAndy Fiddaman 	retval = 0;
240b0de25cbSAndy Fiddaman 
241b0de25cbSAndy Fiddaman 	switch (mevp->me_type) {
242b0de25cbSAndy Fiddaman 	case EVF_VNODE:
243b0de25cbSAndy Fiddaman 		if ((mevp->me_fflags & EVFF_ATTRIB) != 0)
244b0de25cbSAndy Fiddaman 			retval |= NOTE_ATTRIB;
245b0de25cbSAndy Fiddaman 		break;
2466dc98349SAndy Fiddaman 	case EVF_READ:
2476dc98349SAndy Fiddaman 	case EVF_WRITE:
2486dc98349SAndy Fiddaman 	case EVF_TIMER:
2496dc98349SAndy Fiddaman 	case EVF_SIGNAL:
2506dc98349SAndy Fiddaman 		break;
251b0de25cbSAndy Fiddaman 	}
252b0de25cbSAndy Fiddaman 
253b0de25cbSAndy Fiddaman 	return (retval);
254b0de25cbSAndy Fiddaman }
255b0de25cbSAndy Fiddaman 
256b0de25cbSAndy Fiddaman static void
257b0de25cbSAndy Fiddaman mevent_populate(struct mevent *mevp, struct kevent *kev)
258b0de25cbSAndy Fiddaman {
259b0de25cbSAndy Fiddaman 	if (mevp->me_type == EVF_TIMER) {
260b0de25cbSAndy Fiddaman 		kev->ident = mevp->me_timid;
261b0de25cbSAndy Fiddaman 		kev->data = mevp->me_msecs;
262b0de25cbSAndy Fiddaman 	} else {
263b0de25cbSAndy Fiddaman 		kev->ident = mevp->me_fd;
264b0de25cbSAndy Fiddaman 		kev->data = 0;
265b0de25cbSAndy Fiddaman 	}
266b0de25cbSAndy Fiddaman 	kev->filter = mevent_kq_filter(mevp);
267b0de25cbSAndy Fiddaman 	kev->flags = mevent_kq_flags(mevp);
268b0de25cbSAndy Fiddaman 	kev->fflags = mevent_kq_fflags(mevp);
269b0de25cbSAndy Fiddaman 	kev->udata = mevp;
2704c87aefeSPatrick Mooney }
2714c87aefeSPatrick Mooney 
2724c87aefeSPatrick Mooney static int
273b0de25cbSAndy Fiddaman mevent_build(struct kevent *kev)
2744c87aefeSPatrick Mooney {
2754c87aefeSPatrick Mooney 	struct mevent *mevp, *tmpp;
2764c87aefeSPatrick Mooney 	int i;
2774c87aefeSPatrick Mooney 
2784c87aefeSPatrick Mooney 	i = 0;
2794c87aefeSPatrick Mooney 
2804c87aefeSPatrick Mooney 	mevent_qlock();
2814c87aefeSPatrick Mooney 
2824c87aefeSPatrick Mooney 	LIST_FOREACH_SAFE(mevp, &change_head, me_list, tmpp) {
2834c87aefeSPatrick Mooney 		if (mevp->me_closefd) {
2844c87aefeSPatrick Mooney 			/*
2854c87aefeSPatrick Mooney 			 * A close of the file descriptor will remove the
2864c87aefeSPatrick Mooney 			 * event
2874c87aefeSPatrick Mooney 			 */
2884c87aefeSPatrick Mooney 			close(mevp->me_fd);
2894c87aefeSPatrick Mooney 		} else {
290b0de25cbSAndy Fiddaman 			assert((mevp->me_state & EV_ADD) == 0);
291b0de25cbSAndy Fiddaman 			mevent_populate(mevp, &kev[i]);
2924c87aefeSPatrick Mooney 			i++;
2934c87aefeSPatrick Mooney 		}
2944c87aefeSPatrick Mooney 
2954c87aefeSPatrick Mooney 		mevp->me_cq = 0;
2964c87aefeSPatrick Mooney 		LIST_REMOVE(mevp, me_list);
2974c87aefeSPatrick Mooney 
298154972afSPatrick Mooney 		if (mevp->me_state & EV_DELETE) {
2994c87aefeSPatrick Mooney 			free(mevp);
3004c87aefeSPatrick Mooney 		} else {
3014c87aefeSPatrick Mooney 			LIST_INSERT_HEAD(&global_head, mevp, me_list);
3024c87aefeSPatrick Mooney 		}
3034c87aefeSPatrick Mooney 
3044c87aefeSPatrick Mooney 		assert(i < MEVENT_MAX);
3054c87aefeSPatrick Mooney 	}
3064c87aefeSPatrick Mooney 
3074c87aefeSPatrick Mooney 	mevent_qunlock();
3084c87aefeSPatrick Mooney 
3094c87aefeSPatrick Mooney 	return (i);
3104c87aefeSPatrick Mooney }
3114c87aefeSPatrick Mooney 
3124c87aefeSPatrick Mooney static void
3134c87aefeSPatrick Mooney mevent_handle(struct kevent *kev, int numev)
3144c87aefeSPatrick Mooney {
3154c87aefeSPatrick Mooney 	struct mevent *mevp;
3164c87aefeSPatrick Mooney 	int i;
3174c87aefeSPatrick Mooney 
3184c87aefeSPatrick Mooney 	for (i = 0; i < numev; i++) {
3194c87aefeSPatrick Mooney 		mevp = kev[i].udata;
3204c87aefeSPatrick Mooney 
3214c87aefeSPatrick Mooney 		/* XXX check for EV_ERROR ? */
3224c87aefeSPatrick Mooney 
3234c87aefeSPatrick Mooney 		(*mevp->me_func)(mevp->me_fd, mevp->me_type, mevp->me_param);
3244c87aefeSPatrick Mooney 	}
3254c87aefeSPatrick Mooney }
3264c87aefeSPatrick Mooney 
3274c87aefeSPatrick Mooney #else /* __FreeBSD__ */
3284c87aefeSPatrick Mooney 
329154972afSPatrick Mooney static boolean_t
330154972afSPatrick Mooney mevent_clarify_state(struct mevent *mevp)
331154972afSPatrick Mooney {
332154972afSPatrick Mooney 	const int state = mevp->me_state;
333154972afSPatrick Mooney 
334154972afSPatrick Mooney 	if ((state & EV_DELETE) != 0) {
335154972afSPatrick Mooney 		/* All other intents are overriden by delete. */
336154972afSPatrick Mooney 		mevp->me_state = EV_DELETE;
337154972afSPatrick Mooney 		return (B_TRUE);
338154972afSPatrick Mooney 	}
339154972afSPatrick Mooney 
340154972afSPatrick Mooney 	/*
341154972afSPatrick Mooney 	 * Without a distinction between EV_ADD and EV_ENABLE in our emulation,
342154972afSPatrick Mooney 	 * handling the add-disabled case means eliding the portfs operation
343154972afSPatrick Mooney 	 * when both flags are present.
344154972afSPatrick Mooney 	 *
345154972afSPatrick Mooney 	 * This is not a concern for subsequent enable/disable operations, as
346154972afSPatrick Mooney 	 * mevent_update() toggles the flags properly so they are not left in
347154972afSPatrick Mooney 	 * conflict.
348154972afSPatrick Mooney 	 */
349154972afSPatrick Mooney 	if (state == (EV_ENABLE|EV_DISABLE)) {
350154972afSPatrick Mooney 		mevp->me_state = EV_DISABLE;
351154972afSPatrick Mooney 		return (B_FALSE);
352154972afSPatrick Mooney 	}
353154972afSPatrick Mooney 
354154972afSPatrick Mooney 	return (B_TRUE);
355154972afSPatrick Mooney }
356154972afSPatrick Mooney 
3574c87aefeSPatrick Mooney static void
358251becc8SAndy Fiddaman mevent_poll_file_attrib(int fd, enum ev_type type, void *param)
3594c87aefeSPatrick Mooney {
360251becc8SAndy Fiddaman 	struct mevent *mevp = param;
361251becc8SAndy Fiddaman 	struct stat st;
3624c87aefeSPatrick Mooney 
363251becc8SAndy Fiddaman 	if (fstat(mevp->me_poll.mp_fd, &st) != 0) {
364*e8d71297SAndy Fiddaman 		(void) fprintf(stderr, "%s: fstat(%d) failed: %s\n",
365*e8d71297SAndy Fiddaman 		    __func__, fd, strerror(errno));
366251becc8SAndy Fiddaman 		return;
367251becc8SAndy Fiddaman 	}
3684c87aefeSPatrick Mooney 
369*e8d71297SAndy Fiddaman 	/*
370*e8d71297SAndy Fiddaman 	 * The only current consumer of file attribute monitoring is
371*e8d71297SAndy Fiddaman 	 * blockif, which wants to know about size changes.
372*e8d71297SAndy Fiddaman 	 */
373*e8d71297SAndy Fiddaman 	if (mevp->me_poll.mp_size != st.st_size) {
374251becc8SAndy Fiddaman 		mevp->me_poll.mp_size = st.st_size;
3754c87aefeSPatrick Mooney 
376251becc8SAndy Fiddaman 		(*mevp->me_poll.mp_func)(mevp->me_poll.mp_fd, EVF_VNODE,
377251becc8SAndy Fiddaman 		    mevp->me_poll.mp_param);
378251becc8SAndy Fiddaman 	}
379251becc8SAndy Fiddaman }
3804c87aefeSPatrick Mooney 
381251becc8SAndy Fiddaman static void
382251becc8SAndy Fiddaman mevent_update_one_readwrite(struct mevent *mevp)
383251becc8SAndy Fiddaman {
384251becc8SAndy Fiddaman 	int portfd = mevp->me_notify.portnfy_port;
385251becc8SAndy Fiddaman 
386251becc8SAndy Fiddaman 	mevp->me_auto_requeue = B_FALSE;
387251becc8SAndy Fiddaman 
388251becc8SAndy Fiddaman 	switch (mevp->me_state) {
389251becc8SAndy Fiddaman 	case EV_ENABLE:
390251becc8SAndy Fiddaman 	{
391251becc8SAndy Fiddaman 		const int events = (mevp->me_type == EVF_READ) ?
392251becc8SAndy Fiddaman 		    POLLIN : POLLOUT;
393251becc8SAndy Fiddaman 
394251becc8SAndy Fiddaman 		if (port_associate(portfd, PORT_SOURCE_FD, mevp->me_fd,
395251becc8SAndy Fiddaman 		    events, mevp) != 0) {
396251becc8SAndy Fiddaman 			(void) fprintf(stderr,
397251becc8SAndy Fiddaman 			    "port_associate fd %d %p failed: %s\n",
398251becc8SAndy Fiddaman 			    mevp->me_fd, mevp, strerror(errno));
3994c87aefeSPatrick Mooney 		}
400251becc8SAndy Fiddaman 		return;
401251becc8SAndy Fiddaman 	}
402251becc8SAndy Fiddaman 	case EV_DISABLE:
403251becc8SAndy Fiddaman 	case EV_DELETE:
404251becc8SAndy Fiddaman 		/*
405251becc8SAndy Fiddaman 		 * A disable that comes in while an event is being
406251becc8SAndy Fiddaman 		 * handled will result in an ENOENT.
407251becc8SAndy Fiddaman 		 */
408251becc8SAndy Fiddaman 		if (port_dissociate(portfd, PORT_SOURCE_FD,
409251becc8SAndy Fiddaman 		    mevp->me_fd) != 0 && errno != ENOENT) {
410251becc8SAndy Fiddaman 			(void) fprintf(stderr, "port_dissociate "
411251becc8SAndy Fiddaman 			    "portfd %d fd %d mevp %p failed: %s\n",
412251becc8SAndy Fiddaman 			    portfd, mevp->me_fd, mevp, strerror(errno));
4134c87aefeSPatrick Mooney 		}
414251becc8SAndy Fiddaman 		return;
415251becc8SAndy Fiddaman 	default:
416251becc8SAndy Fiddaman 		(void) fprintf(stderr, "%s: unhandled state %d\n", __func__,
417251becc8SAndy Fiddaman 		    mevp->me_state);
418251becc8SAndy Fiddaman 		abort();
419251becc8SAndy Fiddaman 	}
420251becc8SAndy Fiddaman }
4214c87aefeSPatrick Mooney 
422251becc8SAndy Fiddaman static void
423251becc8SAndy Fiddaman mevent_update_one_timer(struct mevent *mevp)
424251becc8SAndy Fiddaman {
425251becc8SAndy Fiddaman 	mevp->me_auto_requeue = B_TRUE;
4264c87aefeSPatrick Mooney 
427251becc8SAndy Fiddaman 	switch (mevp->me_state) {
428251becc8SAndy Fiddaman 	case EV_ENABLE:
429251becc8SAndy Fiddaman 	{
430251becc8SAndy Fiddaman 		struct itimerspec it = { 0 };
4314c87aefeSPatrick Mooney 
432251becc8SAndy Fiddaman 		mevp->me_sigev.sigev_notify = SIGEV_PORT;
433251becc8SAndy Fiddaman 		mevp->me_sigev.sigev_value.sival_ptr = &mevp->me_notify;
4344c87aefeSPatrick Mooney 
435251becc8SAndy Fiddaman 		if (timer_create(CLOCK_REALTIME, &mevp->me_sigev,
436251becc8SAndy Fiddaman 		    &mevp->me_timid) != 0) {
437251becc8SAndy Fiddaman 			(void) fprintf(stderr, "timer_create failed: %s",
438251becc8SAndy Fiddaman 			    strerror(errno));
439251becc8SAndy Fiddaman 			return;
440251becc8SAndy Fiddaman 		}
4414c87aefeSPatrick Mooney 
442251becc8SAndy Fiddaman 		/* The first timeout */
443251becc8SAndy Fiddaman 		it.it_value.tv_sec = mevp->me_msecs / MILLISEC;
444251becc8SAndy Fiddaman 		it.it_value.tv_nsec =
445251becc8SAndy Fiddaman 			MSEC2NSEC(mevp->me_msecs % MILLISEC);
446251becc8SAndy Fiddaman 		/* Repeat at the same interval */
447251becc8SAndy Fiddaman 		it.it_interval = it.it_value;
4484c87aefeSPatrick Mooney 
449251becc8SAndy Fiddaman 		if (timer_settime(mevp->me_timid, 0, &it, NULL) != 0) {
450251becc8SAndy Fiddaman 			(void) fprintf(stderr, "timer_settime failed: %s",
451251becc8SAndy Fiddaman 			    strerror(errno));
4524c87aefeSPatrick Mooney 		}
453251becc8SAndy Fiddaman 		return;
454251becc8SAndy Fiddaman 	}
455251becc8SAndy Fiddaman 	case EV_DISABLE:
456251becc8SAndy Fiddaman 	case EV_DELETE:
457251becc8SAndy Fiddaman 		if (timer_delete(mevp->me_timid) != 0) {
458251becc8SAndy Fiddaman 			(void) fprintf(stderr, "timer_delete failed: %s",
459251becc8SAndy Fiddaman 			    strerror(errno));
4604c87aefeSPatrick Mooney 		}
461251becc8SAndy Fiddaman 		mevp->me_timid = -1;
462251becc8SAndy Fiddaman 		return;
463251becc8SAndy Fiddaman 	default:
464251becc8SAndy Fiddaman 		(void) fprintf(stderr, "%s: unhandled state %d\n", __func__,
465251becc8SAndy Fiddaman 		    mevp->me_state);
466251becc8SAndy Fiddaman 		abort();
467251becc8SAndy Fiddaman 	}
468251becc8SAndy Fiddaman }
469b0de25cbSAndy Fiddaman 
470251becc8SAndy Fiddaman static void
471251becc8SAndy Fiddaman mevent_update_one_vnode(struct mevent *mevp)
472251becc8SAndy Fiddaman {
473251becc8SAndy Fiddaman 	switch (mevp->me_state) {
474251becc8SAndy Fiddaman 	case EV_ENABLE:
475251becc8SAndy Fiddaman 	{
476*e8d71297SAndy Fiddaman 		struct stat st;
477251becc8SAndy Fiddaman 		int events = 0;
478b0de25cbSAndy Fiddaman 
479251becc8SAndy Fiddaman 		if ((mevp->me_fflags & EVFF_ATTRIB) != 0)
480251becc8SAndy Fiddaman 			events |= FILE_ATTRIB;
481b0de25cbSAndy Fiddaman 
482251becc8SAndy Fiddaman 		assert(events != 0);
483251becc8SAndy Fiddaman 
484*e8d71297SAndy Fiddaman 		/*
485*e8d71297SAndy Fiddaman 		 * It is tempting to use the PORT_SOURCE_FILE type for this in
486*e8d71297SAndy Fiddaman 		 * conjunction with the FILE_ATTRIB event type. Unfortunately
487*e8d71297SAndy Fiddaman 		 * this event type triggers on any change to the file's
488*e8d71297SAndy Fiddaman 		 * ctime, and therefore for every write as well as attribute
489*e8d71297SAndy Fiddaman 		 * changes. It also does not work for ZVOLs.
490*e8d71297SAndy Fiddaman 		 *
491*e8d71297SAndy Fiddaman 		 * Convert this to a timer event and poll for the file
492*e8d71297SAndy Fiddaman 		 * attribute changes that we care about.
493*e8d71297SAndy Fiddaman 		 */
494251becc8SAndy Fiddaman 
495*e8d71297SAndy Fiddaman 		if (fstat(mevp->me_fd, &st) != 0) {
496*e8d71297SAndy Fiddaman 			(void) fprintf(stderr, "fstat(%d) failed: %s\n",
497*e8d71297SAndy Fiddaman 			    mevp->me_fd, strerror(errno));
498*e8d71297SAndy Fiddaman 			return;
499*e8d71297SAndy Fiddaman 		}
500b0de25cbSAndy Fiddaman 
501*e8d71297SAndy Fiddaman 		mevp->me_poll.mp_fd = mevp->me_fd;
502*e8d71297SAndy Fiddaman 		mevp->me_poll.mp_size = st.st_size;
503251becc8SAndy Fiddaman 
504*e8d71297SAndy Fiddaman 		mevp->me_poll.mp_func = mevp->me_func;
505*e8d71297SAndy Fiddaman 		mevp->me_poll.mp_param = mevp->me_param;
506*e8d71297SAndy Fiddaman 		mevp->me_func = mevent_poll_file_attrib;
507*e8d71297SAndy Fiddaman 		mevp->me_param = mevp;
508251becc8SAndy Fiddaman 
509*e8d71297SAndy Fiddaman 		mevp->me_type = EVF_TIMER;
510*e8d71297SAndy Fiddaman 		mevp->me_timid = -1;
511*e8d71297SAndy Fiddaman 		mevp->me_msecs = mevent_file_poll_interval_ms;
512*e8d71297SAndy Fiddaman 		mevent_update_one_timer(mevp);
513251becc8SAndy Fiddaman 
514251becc8SAndy Fiddaman 		return;
515251becc8SAndy Fiddaman 	}
516251becc8SAndy Fiddaman 	case EV_DISABLE:
517251becc8SAndy Fiddaman 	case EV_DELETE:
518251becc8SAndy Fiddaman 		/*
519*e8d71297SAndy Fiddaman 		 * These events do not really exist as they are converted to
520*e8d71297SAndy Fiddaman 		 * timers; fall through to abort.
521251becc8SAndy Fiddaman 		 */
5224c87aefeSPatrick Mooney 	default:
523251becc8SAndy Fiddaman 		(void) fprintf(stderr, "%s: unhandled state %d\n", __func__,
524251becc8SAndy Fiddaman 		    mevp->me_state);
525251becc8SAndy Fiddaman 		abort();
5264c87aefeSPatrick Mooney 	}
527251becc8SAndy Fiddaman }
5284c87aefeSPatrick Mooney 
529251becc8SAndy Fiddaman static void
530251becc8SAndy Fiddaman mevent_update_one(struct mevent *mevp)
531251becc8SAndy Fiddaman {
532251becc8SAndy Fiddaman 	switch (mevp->me_type) {
533251becc8SAndy Fiddaman 	case EVF_READ:
534251becc8SAndy Fiddaman 	case EVF_WRITE:
535251becc8SAndy Fiddaman 		mevent_update_one_readwrite(mevp);
536251becc8SAndy Fiddaman 		break;
537251becc8SAndy Fiddaman 	case EVF_TIMER:
538251becc8SAndy Fiddaman 		mevent_update_one_timer(mevp);
539251becc8SAndy Fiddaman 		break;
540251becc8SAndy Fiddaman 	case EVF_VNODE:
541251becc8SAndy Fiddaman 		mevent_update_one_vnode(mevp);
542251becc8SAndy Fiddaman 		break;
543251becc8SAndy Fiddaman 	case EVF_SIGNAL: /* EVF_SIGNAL not yet implemented. */
544251becc8SAndy Fiddaman 	default:
545251becc8SAndy Fiddaman 		(void) fprintf(stderr, "%s: unhandled event type %d\n",
546251becc8SAndy Fiddaman 		    __func__, mevp->me_type);
547251becc8SAndy Fiddaman 		abort();
548251becc8SAndy Fiddaman 	}
5494c87aefeSPatrick Mooney }
5504c87aefeSPatrick Mooney 
5514c87aefeSPatrick Mooney static void
552b0de25cbSAndy Fiddaman mevent_populate(struct mevent *mevp)
553b0de25cbSAndy Fiddaman {
554b0de25cbSAndy Fiddaman 	mevp->me_notify.portnfy_port = mfd;
555b0de25cbSAndy Fiddaman 	mevp->me_notify.portnfy_user = mevp;
556b0de25cbSAndy Fiddaman }
557b0de25cbSAndy Fiddaman 
558b0de25cbSAndy Fiddaman static void
559b0de25cbSAndy Fiddaman mevent_update_pending()
5604c87aefeSPatrick Mooney {
5614c87aefeSPatrick Mooney 	struct mevent *mevp, *tmpp;
5624c87aefeSPatrick Mooney 
5634c87aefeSPatrick Mooney 	mevent_qlock();
5644c87aefeSPatrick Mooney 
5654c87aefeSPatrick Mooney 	LIST_FOREACH_SAFE(mevp, &change_head, me_list, tmpp) {
566b0de25cbSAndy Fiddaman 		mevent_populate(mevp);
5674c87aefeSPatrick Mooney 		if (mevp->me_closefd) {
5684c87aefeSPatrick Mooney 			/*
5694c87aefeSPatrick Mooney 			 * A close of the file descriptor will remove the
5704c87aefeSPatrick Mooney 			 * event
5714c87aefeSPatrick Mooney 			 */
5724c87aefeSPatrick Mooney 			(void) close(mevp->me_fd);
5734c87aefeSPatrick Mooney 			mevp->me_fd = -1;
5744c87aefeSPatrick Mooney 		} else {
575154972afSPatrick Mooney 			if (mevent_clarify_state(mevp)) {
576154972afSPatrick Mooney 				mevent_update_one(mevp);
577154972afSPatrick Mooney 			}
5784c87aefeSPatrick Mooney 		}
5794c87aefeSPatrick Mooney 
5804c87aefeSPatrick Mooney 		mevp->me_cq = 0;
5814c87aefeSPatrick Mooney 		LIST_REMOVE(mevp, me_list);
5824c87aefeSPatrick Mooney 
583154972afSPatrick Mooney 		if (mevp->me_state & EV_DELETE) {
5844c87aefeSPatrick Mooney 			free(mevp);
5854c87aefeSPatrick Mooney 		} else {
5864c87aefeSPatrick Mooney 			LIST_INSERT_HEAD(&global_head, mevp, me_list);
5874c87aefeSPatrick Mooney 		}
5884c87aefeSPatrick Mooney 	}
5894c87aefeSPatrick Mooney 
5904c87aefeSPatrick Mooney 	mevent_qunlock();
5914c87aefeSPatrick Mooney }
5924c87aefeSPatrick Mooney 
5934c87aefeSPatrick Mooney static void
5944c87aefeSPatrick Mooney mevent_handle_pe(port_event_t *pe)
5954c87aefeSPatrick Mooney {
5964c87aefeSPatrick Mooney 	struct mevent *mevp = pe->portev_user;
5974c87aefeSPatrick Mooney 
5984c87aefeSPatrick Mooney 	(*mevp->me_func)(mevp->me_fd, mevp->me_type, mevp->me_param);
5994c87aefeSPatrick Mooney 
6004c87aefeSPatrick Mooney 	mevent_qlock();
6014c87aefeSPatrick Mooney 	if (!mevp->me_cq && !mevp->me_auto_requeue) {
6024c87aefeSPatrick Mooney 		mevent_update_one(mevp);
6034c87aefeSPatrick Mooney 	}
6044c87aefeSPatrick Mooney 	mevent_qunlock();
6054c87aefeSPatrick Mooney }
6064c87aefeSPatrick Mooney #endif
6074c87aefeSPatrick Mooney 
608154972afSPatrick Mooney static struct mevent *
609154972afSPatrick Mooney mevent_add_state(int tfd, enum ev_type type,
610154972afSPatrick Mooney 	   void (*func)(int, enum ev_type, void *), void *param,
611b0de25cbSAndy Fiddaman 	   int state, int fflags)
6124c87aefeSPatrick Mooney {
613b0de25cbSAndy Fiddaman #ifdef __FreeBSD__
614b0de25cbSAndy Fiddaman 	struct kevent kev;
615b0de25cbSAndy Fiddaman #endif
6164c87aefeSPatrick Mooney 	struct mevent *lp, *mevp;
617b0de25cbSAndy Fiddaman #ifdef __FreeBSD__
618b0de25cbSAndy Fiddaman 	int ret;
619b0de25cbSAndy Fiddaman #endif
6204c87aefeSPatrick Mooney 
6214c87aefeSPatrick Mooney 	if (tfd < 0 || func == NULL) {
6224c87aefeSPatrick Mooney 		return (NULL);
6234c87aefeSPatrick Mooney 	}
6244c87aefeSPatrick Mooney 
6254c87aefeSPatrick Mooney 	mevp = NULL;
6264c87aefeSPatrick Mooney 
627b0de25cbSAndy Fiddaman 	pthread_once(&mevent_once, mevent_init);
628b0de25cbSAndy Fiddaman 
6294c87aefeSPatrick Mooney 	mevent_qlock();
6304c87aefeSPatrick Mooney 
6314c87aefeSPatrick Mooney 	/*
6324c87aefeSPatrick Mooney 	 * Verify that the fd/type tuple is not present in any list
6334c87aefeSPatrick Mooney 	 */
6344c87aefeSPatrick Mooney 	LIST_FOREACH(lp, &global_head, me_list) {
635