xref: /illumos-gate/usr/src/lib/libc/port/sys/epoll.c (revision bbf21555)
1a5eb7107SBryan Cantrill /*
2a5eb7107SBryan Cantrill  * This file and its contents are supplied under the terms of the
3a5eb7107SBryan Cantrill  * Common Development and Distribution License ("CDDL"), version 1.0.
4a5eb7107SBryan Cantrill  * You may only use this file in accordance with the terms of version
5a5eb7107SBryan Cantrill  * 1.0 of the CDDL.
6a5eb7107SBryan Cantrill  *
7a5eb7107SBryan Cantrill  * A full copy of the text of the CDDL should have accompanied this
8a5eb7107SBryan Cantrill  * source.  A copy of the CDDL is also available via the Internet at
9a5eb7107SBryan Cantrill  * http://www.illumos.org/license/CDDL.
10a5eb7107SBryan Cantrill  */
11a5eb7107SBryan Cantrill 
12a5eb7107SBryan Cantrill /*
13f4f9009fSPatrick Mooney  * Copyright 2017 Joyent, Inc.
1466373fa7SPatrick Mooney  * Copyright 2020 Oxide Computer Company
15a5eb7107SBryan Cantrill  */
16a5eb7107SBryan Cantrill 
17a5eb7107SBryan Cantrill #include <sys/types.h>
18a5eb7107SBryan Cantrill #include <sys/epoll.h>
19a5eb7107SBryan Cantrill #include <sys/devpoll.h>
20a5eb7107SBryan Cantrill #include <unistd.h>
21a5eb7107SBryan Cantrill #include <errno.h>
22a5eb7107SBryan Cantrill #include <fcntl.h>
23a5eb7107SBryan Cantrill #include <poll.h>
24a5eb7107SBryan Cantrill 
25a5eb7107SBryan Cantrill /*
26a5eb7107SBryan Cantrill  * Events that match their epoll(7) equivalents.
27a5eb7107SBryan Cantrill  */
28a5eb7107SBryan Cantrill #if EPOLLIN != POLLIN
29a5eb7107SBryan Cantrill #error value of EPOLLIN does not match value of POLLIN
30a5eb7107SBryan Cantrill #endif
31a5eb7107SBryan Cantrill 
32a5eb7107SBryan Cantrill #if EPOLLPRI != POLLPRI
33a5eb7107SBryan Cantrill #error value of EPOLLPRI does not match value of POLLPRI
34a5eb7107SBryan Cantrill #endif
35a5eb7107SBryan Cantrill 
36a5eb7107SBryan Cantrill #if EPOLLOUT != POLLOUT
37a5eb7107SBryan Cantrill #error value of EPOLLOUT does not match value of POLLOUT
38a5eb7107SBryan Cantrill #endif
39a5eb7107SBryan Cantrill 
40a5eb7107SBryan Cantrill #if EPOLLRDNORM != POLLRDNORM
41a5eb7107SBryan Cantrill #error value of EPOLLRDNORM does not match value of POLLRDNORM
42a5eb7107SBryan Cantrill #endif
43a5eb7107SBryan Cantrill 
44a5eb7107SBryan Cantrill #if EPOLLRDBAND != POLLRDBAND
45a5eb7107SBryan Cantrill #error value of EPOLLRDBAND does not match value of POLLRDBAND
46a5eb7107SBryan Cantrill #endif
47a5eb7107SBryan Cantrill 
48a5eb7107SBryan Cantrill #if EPOLLERR != POLLERR
49a5eb7107SBryan Cantrill #error value of EPOLLERR does not match value of POLLERR
50a5eb7107SBryan Cantrill #endif
51a5eb7107SBryan Cantrill 
52a5eb7107SBryan Cantrill #if EPOLLHUP != POLLHUP
53a5eb7107SBryan Cantrill #error value of EPOLLHUP does not match value of POLLHUP
54a5eb7107SBryan Cantrill #endif
55a5eb7107SBryan Cantrill 
56a5eb7107SBryan Cantrill /*
57a5eb7107SBryan Cantrill  * Events that we ignore entirely.  They can be set in events, but they will
58a5eb7107SBryan Cantrill  * never be returned.
59a5eb7107SBryan Cantrill  */
6066373fa7SPatrick Mooney #define	EPOLLIGNORED	(EPOLLMSG | EPOLLWAKEUP | EPOLLEXCLUSIVE)
61a5eb7107SBryan Cantrill 
62a5eb7107SBryan Cantrill /*
63a5eb7107SBryan Cantrill  * Events that we swizzle into other bit positions.
64a5eb7107SBryan Cantrill  */
65a5eb7107SBryan Cantrill #define	EPOLLSWIZZLED	\
66a5eb7107SBryan Cantrill 	(EPOLLRDHUP | EPOLLONESHOT | EPOLLET | EPOLLWRBAND | EPOLLWRNORM)
67a5eb7107SBryan Cantrill 
68f4f9009fSPatrick Mooney /*
69f4f9009fSPatrick Mooney  * The defined behavior for epoll_wait/epoll_pwait when using a timeout less
70f4f9009fSPatrick Mooney  * than 0 is to wait for events until they arrive (or interrupted by a signal).
71*bbf21555SRichard Lowe  * While poll(4D) operates in this manner for a timeout of -1, using other
72f4f9009fSPatrick Mooney  * negative values results in an immediate timeout, as if it had been set to 0.
73f4f9009fSPatrick Mooney  * For that reason, negative values are clamped to -1.
74f4f9009fSPatrick Mooney  */
75f4f9009fSPatrick Mooney #define	EPOLL_TIMEOUT_CLAMP(t)	(((t) < -1) ? -1 : (t))
76f4f9009fSPatrick Mooney 
77a5eb7107SBryan Cantrill int
epoll_create(int size)78a5eb7107SBryan Cantrill epoll_create(int size)
79a5eb7107SBryan Cantrill {
80a5eb7107SBryan Cantrill 	int fd;
81a5eb7107SBryan Cantrill 
82a5eb7107SBryan Cantrill 	/*
83a5eb7107SBryan Cantrill 	 * From the epoll_create() man page:  "Since Linux 2.6.8, the size
84a5eb7107SBryan Cantrill 	 * argument is ignored, but must be greater than zero."  You keep using
85a5eb7107SBryan Cantrill 	 * that word "ignored"...
86a5eb7107SBryan Cantrill 	 */
87a5eb7107SBryan Cantrill 	if (size <= 0) {
88a5eb7107SBryan Cantrill 		errno = EINVAL;
89a5eb7107SBryan Cantrill 		return (-1);
90a5eb7107SBryan Cantrill 	}
91a5eb7107SBryan Cantrill 
92a5eb7107SBryan Cantrill 	if ((fd = open("/dev/poll", O_RDWR)) == -1)
93a5eb7107SBryan Cantrill 		return (-1);
94a5eb7107SBryan Cantrill 
95a5eb7107SBryan Cantrill 	if (ioctl(fd, DP_EPOLLCOMPAT, 0) == -1) {
96a5eb7107SBryan Cantrill 		(void) close(fd);
97a5eb7107SBryan Cantrill 		return (-1);
98a5eb7107SBryan Cantrill 	}
99a5eb7107SBryan Cantrill 
100a5eb7107SBryan Cantrill 	return (fd);
101a5eb7107SBryan Cantrill }
102a5eb7107SBryan Cantrill 
103a5eb7107SBryan Cantrill int
epoll_create1(int flags)104a5eb7107SBryan Cantrill epoll_create1(int flags)
105a5eb7107SBryan Cantrill {
106a5eb7107SBryan Cantrill 	int fd, oflags = O_RDWR;
107a5eb7107SBryan Cantrill 
108a192d1c0SPatrick Mooney 	if (flags & EPOLL_CLOEXEC) {
109a5eb7107SBryan Cantrill 		oflags |= O_CLOEXEC;
110a192d1c0SPatrick Mooney 		flags ^= EPOLL_CLOEXEC;
111a192d1c0SPatrick Mooney 	}
112a192d1c0SPatrick Mooney 	/* Reject unrecognized flags */
113a192d1c0SPatrick Mooney 	if (flags != 0) {
114a192d1c0SPatrick Mooney 		errno = EINVAL;
115a192d1c0SPatrick Mooney 		return (-1);
116a192d1c0SPatrick Mooney 	}
117a5eb7107SBryan Cantrill 
118a5eb7107SBryan Cantrill 	if ((fd = open("/dev/poll", oflags)) == -1)
119a5eb7107SBryan Cantrill 		return (-1);
120a5eb7107SBryan Cantrill 
121a5eb7107SBryan Cantrill 	if (ioctl(fd, DP_EPOLLCOMPAT, 0) == -1) {
122a5eb7107SBryan Cantrill 		(void) close(fd);
123a5eb7107SBryan Cantrill 		return (-1);
124a5eb7107SBryan Cantrill 	}
125a5eb7107SBryan Cantrill 
126a5eb7107SBryan Cantrill 	return (fd);
127a5eb7107SBryan Cantrill }
128a5eb7107SBryan Cantrill 
129a5eb7107SBryan Cantrill int
epoll_ctl(int epfd,int op,int fd,struct epoll_event * event)130a5eb7107SBryan Cantrill epoll_ctl(int epfd, int op, int fd, struct epoll_event *event)
131a5eb7107SBryan Cantrill {
132a5eb7107SBryan Cantrill 	dvpoll_epollfd_t epoll[2];
133a5eb7107SBryan Cantrill 	uint32_t events, ev = 0;
13457a0264bSPatrick Mooney 	int i = 0, res;
135a5eb7107SBryan Cantrill 
136a5eb7107SBryan Cantrill 	epoll[i].dpep_pollfd.fd = fd;
137a5eb7107SBryan Cantrill 
138a5eb7107SBryan Cantrill 	switch (op) {
139a5eb7107SBryan Cantrill 	case EPOLL_CTL_DEL:
140a5eb7107SBryan Cantrill 		ev = POLLREMOVE;
141a5eb7107SBryan Cantrill 		break;
142a5eb7107SBryan Cantrill 
143a5eb7107SBryan Cantrill 	case EPOLL_CTL_MOD:
14466373fa7SPatrick Mooney 		/* EPOLLEXCLUSIVE is prohibited for modify operations */
14566373fa7SPatrick Mooney 		if ((event->events & EPOLLEXCLUSIVE) != 0) {
14666373fa7SPatrick Mooney 			errno = EINVAL;
14766373fa7SPatrick Mooney 			return (-1);
14866373fa7SPatrick Mooney 		}
149a5eb7107SBryan Cantrill 		/*
150a5eb7107SBryan Cantrill 		 * In the modify case, we pass down two events:  one to
151a5eb7107SBryan Cantrill 		 * remove the event and another to add it back.
152a5eb7107SBryan Cantrill 		 */
153a5eb7107SBryan Cantrill 		epoll[i++].dpep_pollfd.events = POLLREMOVE;
154a5eb7107SBryan Cantrill 		epoll[i].dpep_pollfd.fd = fd;
155a5eb7107SBryan Cantrill 		/* FALLTHROUGH */
156a5eb7107SBryan Cantrill 
157a5eb7107SBryan Cantrill 	case EPOLL_CTL_ADD:
158a5eb7107SBryan Cantrill 		/*
159a5eb7107SBryan Cantrill 		 * Mask off the events that we ignore, and then swizzle the
160a5eb7107SBryan Cantrill 		 * events for which our values differ from their epoll(7)
161a5eb7107SBryan Cantrill 		 * equivalents.
162a5eb7107SBryan Cantrill 		 */
163a5eb7107SBryan Cantrill 		events = event->events;
164a5eb7107SBryan Cantrill 		ev = events & ~(EPOLLIGNORED | EPOLLSWIZZLED);
165a5eb7107SBryan Cantrill 
166a5eb7107SBryan Cantrill 		if (events & EPOLLRDHUP)
167a5eb7107SBryan Cantrill 			ev |= POLLRDHUP;
168a5eb7107SBryan Cantrill 
169a5eb7107SBryan Cantrill 		if (events & EPOLLET)
170a5eb7107SBryan Cantrill 			ev |= POLLET;
171a5eb7107SBryan Cantrill 
172a5eb7107SBryan Cantrill 		if (events & EPOLLONESHOT)
173a5eb7107SBryan Cantrill 			ev |= POLLONESHOT;
174a5eb7107SBryan Cantrill 
175a5eb7107SBryan Cantrill 		if (events & EPOLLWRNORM)
176a5eb7107SBryan Cantrill 			ev |= POLLWRNORM;
177a5eb7107SBryan Cantrill 
178a5eb7107SBryan Cantrill 		if (events & EPOLLWRBAND)
179a5eb7107SBryan Cantrill 			ev |= POLLWRBAND;
180a5eb7107SBryan Cantrill 
181a5eb7107SBryan Cantrill 		epoll[i].dpep_data = event->data.u64;
182a5eb7107SBryan Cantrill 		break;
183a5eb7107SBryan Cantrill 
184a5eb7107SBryan Cantrill 	default:
185a5eb7107SBryan Cantrill 		errno = EOPNOTSUPP;
186a5eb7107SBryan Cantrill 		return (-1);
187a5eb7107SBryan Cantrill 	}
188a5eb7107SBryan Cantrill 
189a5eb7107SBryan Cantrill 	epoll[i].dpep_pollfd.events = ev;
19057a0264bSPatrick Mooney retry:
19157a0264bSPatrick Mooney 	res = write(epfd, epoll, sizeof (epoll[0]) * (i + 1));
19257a0264bSPatrick Mooney 
19357a0264bSPatrick Mooney 	if (res == -1) {
19457a0264bSPatrick Mooney 		if (errno == EINTR) {
19557a0264bSPatrick Mooney 			/*
19657a0264bSPatrick Mooney 			 * Linux does not document EINTR as an allowed error
19757a0264bSPatrick Mooney 			 * for epoll_ctl.  The write must be retried if it is
19857a0264bSPatrick Mooney 			 * not done automatically via SA_RESTART.
19957a0264bSPatrick Mooney 			 */
20057a0264bSPatrick Mooney 			goto retry;
20157a0264bSPatrick Mooney 		}
20257a0264bSPatrick Mooney 		if (errno == ELOOP) {
20357a0264bSPatrick Mooney 			/*
20457a0264bSPatrick Mooney 			 * Convert the specific /dev/poll error about an fd
20557a0264bSPatrick Mooney 			 * loop into what is expected from the Linux epoll
20657a0264bSPatrick Mooney 			 * interface.
20757a0264bSPatrick Mooney 			 */
20857a0264bSPatrick Mooney 			errno = EINVAL;
20957a0264bSPatrick Mooney 		}
21057a0264bSPatrick Mooney 		return (-1);
21157a0264bSPatrick Mooney 	}
21257a0264bSPatrick Mooney 	return (0);
213a5eb7107SBryan Cantrill }
214a5eb7107SBryan Cantrill 
215a5eb7107SBryan Cantrill int
epoll_wait(int epfd,struct epoll_event * events,int maxevents,int timeout)216a5eb7107SBryan Cantrill epoll_wait(int epfd, struct epoll_event *events,
217a5eb7107SBryan Cantrill     int maxevents, int timeout)
218a5eb7107SBryan Cantrill {
219a5eb7107SBryan Cantrill 	struct dvpoll arg;
220a5eb7107SBryan Cantrill 
221a5eb7107SBryan Cantrill 	if (maxevents <= 0) {
222a5eb7107SBryan Cantrill 		errno = EINVAL;
223a5eb7107SBryan Cantrill 		return (-1);
224a5eb7107SBryan Cantrill 	}
225a5eb7107SBryan Cantrill 
226a5eb7107SBryan Cantrill 	arg.dp_nfds = maxevents;
227f4f9009fSPatrick Mooney 	arg.dp_timeout = EPOLL_TIMEOUT_CLAMP(timeout);
228a5eb7107SBryan Cantrill 	arg.dp_fds = (pollfd_t *)events;
229a5eb7107SBryan Cantrill 
230a5eb7107SBryan Cantrill 	return (ioctl(epfd, DP_POLL, &arg));
231a5eb7107SBryan Cantrill }
232a5eb7107SBryan Cantrill 
233a5eb7107SBryan Cantrill int
epoll_pwait(int epfd,struct epoll_event * events,int maxevents,int timeout,const sigset_t * sigmask)234a5eb7107SBryan Cantrill epoll_pwait(int epfd, struct epoll_event *events,
235a5eb7107SBryan Cantrill     int maxevents, int timeout, const sigset_t *sigmask)
236a5eb7107SBryan Cantrill {
237a5eb7107SBryan Cantrill 	struct dvpoll arg;
238a5eb7107SBryan Cantrill 
239a5eb7107SBryan Cantrill 	if (maxevents <= 0) {
240a5eb7107SBryan Cantrill 		errno = EINVAL;
241a5eb7107SBryan Cantrill 		return (-1);
242a5eb7107SBryan Cantrill 	}
243a5eb7107SBryan Cantrill 
244a5eb7107SBryan Cantrill 	arg.dp_nfds = maxevents;
245f4f9009fSPatrick Mooney 	arg.dp_timeout = EPOLL_TIMEOUT_CLAMP(timeout);
246a5eb7107SBryan Cantrill 	arg.dp_fds = (pollfd_t *)events;
247a5eb7107SBryan Cantrill 	arg.dp_setp = (sigset_t *)sigmask;
248a5eb7107SBryan Cantrill 
249a5eb7107SBryan Cantrill 	return (ioctl(epfd, DP_PPOLL, &arg));
250a5eb7107SBryan Cantrill }
251