27c478bdstevel@tonic-gate * CDDL HEADER START
37c478bdstevel@tonic-gate *
47c478bdstevel@tonic-gate * The contents of this file are subject to the terms of the
53470957raf * Common Development and Distribution License (the "License").
63470957raf * You may not use this file except in compliance with the License.
77c478bdstevel@tonic-gate *
87c478bdstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bdstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
107c478bdstevel@tonic-gate * See the License for the specific language governing permissions
117c478bdstevel@tonic-gate * and limitations under the License.
127c478bdstevel@tonic-gate *
137c478bdstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
147c478bdstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bdstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
167c478bdstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
177c478bdstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bdstevel@tonic-gate *
197c478bdstevel@tonic-gate * CDDL HEADER END
207c478bdstevel@tonic-gate */
23df2381bpraks * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
247c478bdstevel@tonic-gate * Use is subject to license terms.
257c478bdstevel@tonic-gate */
27d158018Bryan Cantrill/*
28d158018Bryan Cantrill * Copyright (c) 2015 Joyent, Inc.  All rights reserved.
29d158018Bryan Cantrill */
30d158018Bryan Cantrill
317c478bdstevel@tonic-gate#include <sys/types.h>
327c478bdstevel@tonic-gate#include <sys/systm.h>
337c478bdstevel@tonic-gate#include <sys/cred.h>
347c478bdstevel@tonic-gate#include <sys/modctl.h>
357c478bdstevel@tonic-gate#include <sys/vfs.h>
36aa59c4crsb#include <sys/vfs_opreg.h>
377c478bdstevel@tonic-gate#include <sys/sysmacros.h>
387c478bdstevel@tonic-gate#include <sys/cmn_err.h>
397c478bdstevel@tonic-gate#include <sys/stat.h>
407c478bdstevel@tonic-gate#include <sys/errno.h>
417c478bdstevel@tonic-gate#include <sys/kmem.h>
427c478bdstevel@tonic-gate#include <sys/file.h>
437c478bdstevel@tonic-gate#include <sys/kstat.h>
447c478bdstevel@tonic-gate#include <sys/port_impl.h>
457c478bdstevel@tonic-gate#include <sys/task.h>
467c478bdstevel@tonic-gate#include <sys/project.h>
497c478bdstevel@tonic-gate * Event Ports can be shared across threads or across processes.
507c478bdstevel@tonic-gate * Every thread/process can use an own event port or a group of them
517c478bdstevel@tonic-gate * can use a single port. A major request was also to get the ability
527c478bdstevel@tonic-gate * to submit user-defined events to a port. The idea of the
537c478bdstevel@tonic-gate * user-defined events is to use the event ports for communication between
547c478bdstevel@tonic-gate * threads/processes (like message queues). User defined-events are queued
557c478bdstevel@tonic-gate * in a port with the same priority as other event types.
567c478bdstevel@tonic-gate *
577c478bdstevel@tonic-gate * Events are delivered only once. The thread/process which is waiting
587c478bdstevel@tonic-gate * for events with the "highest priority" (priority here is related to the
597c478bdstevel@tonic-gate * internal strategy to wakeup waiting threads) will retrieve the event,
607c478bdstevel@tonic-gate * all other threads/processes will not be notified. There is also
617c478bdstevel@tonic-gate * the requirement to have events which should be submitted immediately
627c478bdstevel@tonic-gate * to all "waiting" threads. That is the main task of the alert event.
637c478bdstevel@tonic-gate * The alert event is submitted by the application to a port. The port
647c478bdstevel@tonic-gate * changes from a standard mode to the alert mode. Now all waiting threads
657c478bdstevel@tonic-gate * will be awaken immediately and they will return with the alert event.
667c478bdstevel@tonic-gate * Threads trying to retrieve events from a port in alert mode will
677c478bdstevel@tonic-gate * return immediately with the alert event.
687c478bdstevel@tonic-gate *
697c478bdstevel@tonic-gate *
707c478bdstevel@tonic-gate * An event port is like a kernel queue, which accept events submitted from
717c478bdstevel@tonic-gate * user level as well as events submitted from kernel sub-systems. Sub-systems
727c478bdstevel@tonic-gate * able to submit events to a port are the so-called "event sources".
737c478bdstevel@tonic-gate * Current event sources:
747c478bdstevel@tonic-gate * PORT_SOURCE_AIO	 : events submitted per transaction completion from
757c478bdstevel@tonic-gate *			   POSIX-I/O framework.
767c478bdstevel@tonic-gate * PORT_SOURCE_TIMER	 : events submitted when a timer fires
777c478bdstevel@tonic-gate *			   (see timer_create(3RT)).
787c478bdstevel@tonic-gate * PORT_SOURCE_FD	 : events submitted per file descriptor (see poll(2)).
797c478bdstevel@tonic-gate * PORT_SOURCE_ALERT	 : events submitted from user. This is not really a
807c478bdstevel@tonic-gate *			   single event, this is actually a port mode
817c478bdstevel@tonic-gate *			   (see port_alert(3c)).
827c478bdstevel@tonic-gate * PORT_SOURCE_USER	 : events submitted by applications with
837c478bdstevel@tonic-gate *			   port_send(3c) or port_sendn(3c).
84df2381bpraks * PORT_SOURCE_FILE	 : events submitted per file being watched for file
85df2381bpraks *			   change events  (see port_create(3c).
867c478bdstevel@tonic-gate *
877c478bdstevel@tonic-gate * There is a user API implemented in the libc library as well as a
887c478bdstevel@tonic-gate * kernel API implemented in port_subr.c in genunix.
897c478bdstevel@tonic-gate * The available user API functions are:
907c478bdstevel@tonic-gate * port_create() : create a port as a file descriptor of portfs file system
917c478bdstevel@tonic-gate *		   The standard close(2) function closes a port.
927c478bdstevel@tonic-gate * port_associate() : associate a file descriptor with a port to be able to
937c478bdstevel@tonic-gate *		      retrieve events from that file descriptor.
947c478bdstevel@tonic-gate * port_dissociate(): remove the association of a file descriptor with a port.
957c478bdstevel@tonic-gate * port_alert()	 : set/unset a port in alert mode
967c478bdstevel@tonic-gate * port_send()	 : send an event of type PORT_SOURCE_USER to a port
977c478bdstevel@tonic-gate * port_sendn()	 : send an event of type PORT_SOURCE_USER to a list of ports
987c478bdstevel@tonic-gate * port_get()	 : retrieve a single event from a port
997c478bdstevel@tonic-gate * port_getn()	 : retrieve a list of events from a port
1007c478bdstevel@tonic-gate *
1017c478bdstevel@tonic-gate * The available kernel API functions are:
1027c478bdstevel@tonic-gate * port_allocate_event(): allocate an event slot/structure of/from a port
1037c478bdstevel@tonic-gate * port_init_event()    : set event data in the event structure
1047c478bdstevel@tonic-gate * port_send_event()    : send event to a port
1057c478bdstevel@tonic-gate * port_free_event()    : deliver allocated slot/structure back to a port
1067c478bdstevel@tonic-gate * port_associate_ksource(): associate a kernel event source with a port
1077c478bdstevel@tonic-gate * port_dissociate_ksource(): dissociate a kernel event source from a port
1087c478bdstevel@tonic-gate *
1097c478bdstevel@tonic-gate * The libc implementation consists of small functions which pass the
1107c478bdstevel@tonic-gate * arguments to the kernel using the "portfs" system call. It means, all the
1117c478bdstevel@tonic-gate * synchronisation work is being done in the kernel. The "portfs" system
1127c478bdstevel@tonic-gate * call loads the portfs file system into the kernel.
1137c478bdstevel@tonic-gate *
1147c478bdstevel@tonic-gate * PORT CREATION
1157c478bdstevel@tonic-gate * The first function to be used is port_create() which internally creates
1167c478bdstevel@tonic-gate * a vnode and a portfs node. The portfs node is represented by the port_t
1177c478bdstevel@tonic-gate * structure, which again includes all the data necessary to control a port.
1187c478bdstevel@tonic-gate * port_create() returns a file descriptor, which needs to be used in almost
1197c478bdstevel@tonic-gate * all other event port functions.
1207c478bdstevel@tonic-gate * The maximum number of ports per system is controlled by the resource
1217c478bdstevel@tonic-gate * control: project:port-max-ids.
1227c478bdstevel@tonic-gate *
1237c478bdstevel@tonic-gate * EVENT GENERATION
1247c478bdstevel@tonic-gate * The second step is the triggering of events, which could be sent to a port.
1257c478bdstevel@tonic-gate * Every event source implements an own method to generate events for a port:
1267c478bdstevel@tonic-gate * PORT_SOURCE_AIO:
12743bd900Toomas Soome *	The sigevent structure of the standard POSIX-IO functions
12843bd900Toomas Soome *	was extended by an additional notification type.
12943bd900Toomas Soome *	Standard notification types:
13043bd900Toomas Soome *	SIGEV_NONE, SIGEV_SIGNAL and SIGEV_THREAD
13143bd900Toomas Soome *	Event ports introduced now SIGEV_PORT.
13243bd900Toomas Soome *	The notification type SIGEV_PORT specifies that a structure
13343bd900Toomas Soome *	of type port_notify_t has to be attached to the sigev_value.
13443bd900Toomas Soome *	The port_notify_t structure contains the event port file
13543bd900Toomas Soome *	descriptor and a user-defined pointer.
13643bd900Toomas Soome *	Internally the AIO implementation will use the kernel API
13743bd900Toomas Soome *	functions to allocate an event port slot per transaction (aiocb)
13843bd900Toomas Soome *	and sent the event to the port as soon as the transaction completes.
13943bd900Toomas Soome *	All the events submitted per transaction are of type
14043bd900Toomas Soome *	PORT_SOURCE_AIO.
1417c478bdstevel@tonic-gate * PORT_SOURCE_TIMER:
14243bd900Toomas Soome *	The timer_create() function uses the same method as the
14343bd900Toomas Soome *	PORT_SOURCE_AIO event source. It also uses the sigevent structure
14443bd900Toomas Soome *	to deliver the port information.
14543bd900Toomas Soome *	Internally the timer code will allocate a single event slot/struct
14643bd900Toomas Soome *	per timer and it will send the timer event as soon as the timer
14743bd900Toomas Soome *	fires. If the timer-fired event is not delivered to the application
14843bd900Toomas Soome *	before the next period elapsed, then an overrun counter will be
14943bd900Toomas Soome *	incremented. The timer event source uses a callback function to
15043bd900Toomas Soome *	detect the delivery of the event to the application. At that time
15143bd900Toomas Soome *	the timer callback function will update the event overrun counter.
1527c478bdstevel@tonic-gate * PORT_SOURCE_FD:
15343bd900Toomas Soome *	This event source uses the port_associate() function to allocate
15443bd900Toomas Soome *	an event slot/struct from a port. The application defines in the
15543bd900Toomas Soome *	events argument of port_associate() the type of events which it is
15643bd900Toomas Soome *	interested on.
15743bd900Toomas Soome *	The internal pollwakeup() function is used by all the file
15843bd900Toomas Soome *	systems --which are supporting the VOP_POLL() interface- to notify
15943bd900Toomas Soome *	the upper layer (poll(2), devpoll(7d) and now event ports) about
16043bd900Toomas Soome *	the event triggered (see valid events in poll(2)).
16143bd900Toomas Soome *	The pollwakeup() function forwards the event to the layer registered
16243bd900Toomas Soome *	to receive the current event.
16343bd900Toomas Soome *	The port_dissociate() function can be used to free the allocated
16443bd900Toomas Soome *	event slot from the port. Anyway, file descriptors deliver events
16543bd900Toomas Soome *	only one time and remain deactivated until the application
16643bd900Toomas Soome *	reactivates the association of a file descriptor with port_associate().
16743bd900Toomas Soome *	If an associated file descriptor is closed then the file descriptor
16843bd900Toomas Soome *	will be dissociated automatically from the port.
1697c478bdstevel@tonic-gate *
1707c478bdstevel@tonic-gate * PORT_SOURCE_ALERT:
17143bd900Toomas Soome *	This event type is generated when the port was previously set in
17243bd900Toomas Soome *	alert mode using the port_alert() function.
17343bd900Toomas Soome *	A single alert event is delivered to every thread which tries to
17443bd900Toomas Soome *	retrieve events from a port.
1757c478bdstevel@tonic-gate * PORT_SOURCE_USER:
17643bd900Toomas Soome *	This type of event is generated from user level using the port_send()
17743bd900Toomas Soome *	function to send a user event to a port or the port_sendn() function
17843bd900Toomas Soome *	to send an event to a list of ports.
179df2381bpraks * PORT_SOURCE_FILE:
180df2381bpraks *	This event source uses the port_associate() interface to register
181df2381bpraks *	a file to be monitored for changes. The file name that needs to be
182df2381bpraks *	monitored is specified in the file_obj_t structure, a pointer to which
183df2381bpraks *	is passed as an argument. The event types to be monitored are specified
184df2381bpraks *	in the events argument.
185df2381bpraks *	A file events monitor is represented internal per port per object
186df2381bpraks *	address(the file_obj_t pointer). Which means there can be multiple
187df2381bpraks *	watches registered on the same file using different file_obj_t
188df2381bpraks *	structure pointer. With the help of the	FEM(File Event Monitoring)
189df2381bpraks *	hooks, the file's vnode ops are intercepted and relevant events
190df2381bpraks *	delivered. The port_dissociate() function is used to de-register a
191df2381bpraks *	file events monitor on a file. When the specified file is
192df2381bpraks *	removed/renamed, the file events watch/monitor is automatically
193df2381bpraks *	removed.
1947c478bdstevel@tonic-gate *
1957c478bdstevel@tonic-gate * EVENT DELIVERY / RETRIEVING EVENTS
1967c478bdstevel@tonic-gate * Events remain in the port queue until:
1977c478bdstevel@tonic-gate * - the application uses port_get() or port_getn() to retrieve events,
1987c478bdstevel@tonic-gate * - the event source cancel the event,
1997c478bdstevel@tonic-gate * - the event port is closed or
2007c478bdstevel@tonic-gate * - the process exits.
2017c478bdstevel@tonic-gate * The maximal number of events in a port queue is the maximal number
2027c478bdstevel@tonic-gate * of event slots/structures which can be allocated by event sources.
2037c478bdstevel@tonic-gate * The allocation of event slots/structures is controlled by the resource
2047c478bdstevel@tonic-gate * control: process.port-max-events.
2057c478bdstevel@tonic-gate * The port_get() function retrieves a single event and the port_getn()
2067c478bdstevel@tonic-gate * function retrieves a list of events.
2077c478bdstevel@tonic-gate * Events are classified as shareable and non-shareable events across processes.
2087c478bdstevel@tonic-gate * Non-shareable events are invisible for the port_get(n)() functions of
2097c478bdstevel@tonic-gate * processes other than the owner of the event.
2107c478bdstevel@tonic-gate *    Shareable event types are:
2117c478bdstevel@tonic-gate *    PORT_SOURCE_USER events
21243bd900Toomas Soome *	This type of event is unconditionally shareable and without
21343bd900Toomas Soome *	limitations. If the parent process sends a user event and closes
21443bd900Toomas Soome *	the port afterwards, the event remains in the port and the child
21543bd900Toomas Soome *	process will still be able to retrieve the user event.
2167c478bdstevel@tonic-gate *    PORT_SOURCE_ALERT events
21743bd900Toomas Soome *	This type of event is shareable between processes.
21843bd900Toomas Soome *	Limitation:	The alert mode of the port is removed if the owner
21943bd900Toomas Soome *			(process which set the port in alert mode) of the
22043bd900Toomas Soome *			alert event closes the port.
2217c478bdstevel@tonic-gate *    PORT_SOURCE_FD events
22243bd900Toomas Soome *	This type of event is conditional shareable between processes.
22343bd900Toomas Soome *	After fork(2) all forked file descriptors are shareable between
22443bd900Toomas Soome *	the processes. The child process is allowed to retrieve events
22543bd900Toomas Soome *	from the associated file descriptors and it can also re-associate
22643bd900Toomas Soome *	the fd with the port.
22743bd900Toomas Soome *	Limitations:	The child process is not allowed to dissociate
22843bd900Toomas Soome *			the file descriptor from the port. Only the
22943bd900Toomas Soome *			owner (process) of the association is allowed to
23043bd900Toomas Soome *			dissociate the file descriptor from the port.
23143bd900Toomas Soome *			If the owner of the association closes the port
23243bd900Toomas Soome *			the association will be removed.
2337c478bdstevel@tonic-gate *    PORT_SOURCE_AIO events
23443bd900Toomas Soome *	This type of event is not shareable between processes.
2357c478bdstevel@tonic-gate *    PORT_SOURCE_TIMER events
23643bd900Toomas Soome *	This type of event is not shareable between processes.
237df2381bpraks *    PORT_SOURCE_FILE events
23843bd900Toomas Soome *	This type of event is not shareable between processes.
2397c478bdstevel@tonic-gate *
2407c478bdstevel@tonic-gate * FORK BEHAVIOUR
2417c478bdstevel@tonic-gate * On fork(2) the child process inherits all opened file descriptors from
2427c478bdstevel@tonic-gate * the parent process. This is also valid for port file descriptors.
2437c478bdstevel@tonic-gate * Associated file descriptors with a port maintain the association across the
2447c478bdstevel@tonic-gate * fork(2). It means, the child process gets full access to the port and
2457c478bdstevel@tonic-gate * it can retrieve events from all common associated file descriptors.
2467c478bdstevel@tonic-gate * Events of file descriptors created and associated with a port after the
2477c478bdstevel@tonic-gate * fork(2) are non-shareable and can only be retrieved by the same process.
2487c478bdstevel@tonic-gate *
2497c478bdstevel@tonic-gate * If the parent or the child process closes an exported port (using fork(2)
2507c478bdstevel@tonic-gate * or I_SENDFD) all the file descriptors associated with the port by the
2517c478bdstevel@tonic-gate * process will be dissociated from the port. Events of dissociated file
2527c478bdstevel@tonic-gate * descriptors as well as all non-shareable events will be discarded.
2537c478bdstevel@tonic-gate * The other process can continue working with the port as usual.
2547c478bdstevel@tonic-gate *
2557c478bdstevel@tonic-gate * CLOSING A PORT
2567c478bdstevel@tonic-gate * close(2) has to be used to close a port. See FORK BEHAVIOUR for details.
2577c478bdstevel@tonic-gate *
2587c478bdstevel@tonic-gate * PORT EVENT STRUCTURES
2597c478bdstevel@tonic-gate * The global control structure of the event ports framework is port_control_t.
2607c478bdstevel@tonic-gate * port_control_t keeps track of the number of created ports in the system.
2617c478bdstevel@tonic-gate * The cache of the port event structures is also located in port_control_t.
2627c478bdstevel@tonic-gate *
2637c478bdstevel@tonic-gate * On port_create() the vnode and the portfs node is also created.
2647c478bdstevel@tonic-gate * The portfs node is represented by the port_t structure.
2657c478bdstevel@tonic-gate * The port_t structure manages all port specific tasks:
2667c478bdstevel@tonic-gate * - management of resource control values
2677c478bdstevel@tonic-gate * - port VOP_POLL interface
2687c478bdstevel@tonic-gate * - creation time
2697c478bdstevel@tonic-gate * - uid and gid of the port
2707c478bdstevel@tonic-gate *
2717c478bdstevel@tonic-gate * The port_t structure contains the port_queue_t structure.
2727c478bdstevel@tonic-gate * The port_queue_t structure contains all the data necessary for the
2737c478bdstevel@tonic-gate * queue management:
2747c478bdstevel@tonic-gate * - locking
2757c478bdstevel@tonic-gate * - condition variables
2767c478bdstevel@tonic-gate * - event counters
2777c478bdstevel@tonic-gate * - submitted events	(represented by port_kevent_t structures)
2787c478bdstevel@tonic-gate * - threads waiting for event delivery (check portget_t structure)
2797c478bdstevel@tonic-gate * - PORT_SOURCE_FD cache	(managed by the port_fdcache_t structure)
2807c478bdstevel@tonic-gate * - event source management (managed by the port_source_t structure)
2817c478bdstevel@tonic-gate * - alert mode management	(check port_alert_t structure)
2827c478bdstevel@tonic-gate *
2837c478bdstevel@tonic-gate * EVENT MANAGEMENT
2847c478bdstevel@tonic-gate * The event port file system creates a kmem_cache for internal allocation of
2857c478bdstevel@tonic-gate * event port structures.
2867c478bdstevel@tonic-gate *
2877c478bdstevel@tonic-gate * 1. Event source association with a port:
2887c478bdstevel@tonic-gate * The first step to do for event sources is to get associated with a port
2897c478bdstevel@tonic-gate * using the port_associate_ksource() function or adding an entry to the
2907c478bdstevel@tonic-gate * port_ksource_tab[]. An event source can get dissociated from a port
2917c478bdstevel@tonic-gate * using the port_dissociate_ksource() function. An entry in the
2927c478bdstevel@tonic-gate * port_ksource_tab[] implies that the source will be associated
2937c478bdstevel@tonic-gate * automatically with every new created port.
2947c478bdstevel@tonic-gate * The event source can deliver a callback function, which is used by the
2957c478bdstevel@tonic-gate * port to notify the event source about close(2). The idea is that
2967c478bdstevel@tonic-gate * in such a case the event source should free all allocated resources
2977c478bdstevel@tonic-gate * and it must return to the port all allocated slots/structures.
2987c478bdstevel@tonic-gate * The port_close() function will wait until all allocated event
2997c478bdstevel@tonic-gate * structures/slots are returned to the port.
3007c478bdstevel@tonic-gate * The callback function is not necessary when the event source does not
3017c478bdstevel@tonic-gate * maintain local resources, a second condition is that the event source
3027c478bdstevel@tonic-gate * can guarantee that allocated event slots will be returned without
3037c478bdstevel@tonic-gate * delay to the port (it will not block and sleep somewhere).
3047c478bdstevel@tonic-gate *
3057c478bdstevel@tonic-gate * 2. Reservation of an event slot / event structure
3067c478bdstevel@tonic-gate * The event port reliability is based on the reservation of an event "slot"
3077c478bdstevel@tonic-gate * (allocation of an event structure) by the event source as part of the
3087c478bdstevel@tonic-gate * application call. If the maximal number of event slots is exhausted then
3097c478bdstevel@tonic-gate * the event source can return a corresponding error code to the application.
3107c478bdstevel@tonic-gate *
3117c478bdstevel@tonic-gate * The port_alloc_event() function has to be used by event sources to
3127c478bdstevel@tonic-gate * allocate an event slot (reserve an event structure). The port_alloc_event()
3137c478bdstevel@tonic-gate * doesn not block and it will return a 0 value on success or an error code
3147c478bdstevel@tonic-gate * if it fails.
3157c478bdstevel@tonic-gate * An argument of port_alloc_event() is a flag which determines the behavior
3167c478bdstevel@tonic-gate * of the event after it was delivered to the application:
3177c478bdstevel@tonic-gate * PORT_ALLOC_DEFAULT	: event slot becomes free after delivery to the
3187c478bdstevel@tonic-gate *			  application.
3197c478bdstevel@tonic-gate * PORT_ALLOC_PRIVATE	: event slot remains under the control of the event
3207c478bdstevel@tonic-gate *			  source. This kind of slots can not be used for
3217c478bdstevel@tonic-gate *			  event delivery and should only be used internally
3227c478bdstevel@tonic-gate *			  by the event source.
3237c478bdstevel@tonic-gate * PORT_KEV_CACHED	: event slot remains under the control of an event
3247c478bdstevel@tonic-gate *			  port cache. It does not become free after delivery
3257c478bdstevel@tonic-gate *			  to the application.
3267c478bdstevel@tonic-gate * PORT_ALLOC_SCACHED	: event slot remains under the control of the event
3277c478bdstevel@tonic-gate *			  source. The event source takes the control over
3287c478bdstevel@tonic-gate *			  the slot after the event is delivered to the
3297c478bdstevel@tonic-gate *			  application.
3307c478bdstevel@tonic-gate *
3317c478bdstevel@tonic-gate * 3. Delivery of events to the event port
3327c478bdstevel@tonic-gate * Earlier allocated event structure/slot has to be used to deliver
3337c478bdstevel@tonic-gate * event data to the port. Event source has to use the function
3347c478bdstevel@tonic-gate * port_send_event(). The single argument is a pointer to the previously
3357c478bdstevel@tonic-gate * reserved event structure/slot.
3367c478bdstevel@tonic-gate * The portkev_events field of the port_kevent_t structure can be updated/set
3377c478bdstevel@tonic-gate * in two ways:
3387c478bdstevel@tonic-gate * 1. using the port_set_event() function, or
3397c478bdstevel@tonic-gate * 2. updating the portkev_events field out of the callback function:
3407c478bdstevel@tonic-gate *    The event source can deliver a callback function to the port as an
3417c478bdstevel@tonic-gate *    argument of port_init_event().
3427c478bdstevel@tonic-gate *    One of the arguments of the callback function is a pointer to the
3437c478bdstevel@tonic-gate *    events field, which will be delivered to the application.
3447c478bdstevel@tonic-gate *    (see Delivery of events to the application).
3457c478bdstevel@tonic-gate * Event structures/slots can be delivered to the event port only one time,
3467c478bdstevel@tonic-gate * they remain blocked until the data is delivered to the application and the
3477c478bdstevel@tonic-gate * slot becomes free or it is delivered back to the event source
3487c478bdstevel@tonic-gate * (PORT_ALLOC_SCACHED). The activation of the callback function mentioned above
3497c478bdstevel@tonic-gate * is at the same time the indicator for the event source that the event
3507c478bdstevel@tonic-gate * structure/slot is free for reuse.
3517c478bdstevel@tonic-gate *
3527c478bdstevel@tonic-gate * 4. Delivery of events to the application
3537c478bdstevel@tonic-gate * The events structures/slots delivered by event sources remain in the
3547c478bdstevel@tonic-gate * port queue until they are retrieved by the application or the port
3557c478bdstevel@tonic-gate * is closed (exit(2) also closes all opened file descriptors)..
3567c478bdstevel@tonic-gate * The application uses port_get() or port_getn() to retrieve events from
3577c478bdstevel@tonic-gate * a port. port_get() retrieves a single event structure/slot and port_getn()
3587c478bdstevel@tonic-gate * retrieves a list of event structures/slots.
3597c478bdstevel@tonic-gate * Both functions are able to poll for events and return immediately or they
3607c478bdstevel@tonic-gate * can specify a timeout value.
3617c478bdstevel@tonic-gate * Before the events are delivered to the application they are moved to a
3627c478bdstevel@tonic-gate * second temporary internal queue. The idea is to avoid lock collisions or
3637c478bdstevel@tonic-gate * contentions of the global queue lock.
3647c478bdstevel@tonic-gate * The global queue lock is used every time when an event source delivers
3657c478bdstevel@tonic-gate * new events to the port.
3667c478bdstevel@tonic-gate * The port_get() and port_getn() functions
3677c478bdstevel@tonic-gate * a) retrieve single events from the temporary queue,
3687c478bdstevel@tonic-gate * b) prepare the data to be passed to the application memory,
3697c478bdstevel@tonic-gate * c) activate the callback function of the event sources:
3707c478bdstevel@tonic-gate *    - to get the latest event data,
3717c478bdstevel@tonic-gate *    - the event source can free all allocated resources associated with the
3727c478bdstevel@tonic-gate *      current event,
3737c478bdstevel@tonic-gate *    - the event source can re-use the current event slot/structure
3747c478bdstevel@tonic-gate *    - the event source can deny the delivery of the event to the application
3757c478bdstevel@tonic-gate *      (e.g. because of the wrong process).
3767c478bdstevel@tonic-gate * d) put the event back to the temporary queue if the event delivery was denied
3777c478bdstevel@tonic-gate * e) repeat a) until d) as long as there are events in the queue and
3787c478bdstevel@tonic-gate *    there is enough user space available.
3797c478bdstevel@tonic-gate *
3807c478bdstevel@tonic-gate * The loop described above could block for a very long time the global mutex,
3817c478bdstevel@tonic-gate * to avoid that a second mutex was introduced to synchronized concurrent
3827c478bdstevel@tonic-gate * threads accessing the temporary queue.
3837c478bdstevel@tonic-gate */
3857c478bdstevel@tonic-gatestatic int64_t portfs(int, uintptr_t, uintptr_t, uintptr_t, uintptr_t,
3867c478bdstevel@tonic-gate    uintptr_t);
3887c478bdstevel@tonic-gatestatic struct sysent port_sysent = {
3897c478bdstevel@tonic-gate	6,
3907c478bdstevel@tonic-gate	SE_ARGC | SE_64RVAL | SE_NOUNLOAD,
391660213eToomas Soome	(int (*)())(uintptr_t)portfs,
3947c478bdstevel@tonic-gatestatic struct modlsys modlsys = {
3957c478bdstevel@tonic-gate	&mod_syscallops, "event ports", &port_sysent
3987c478bdstevel@tonic-gate#ifdef _SYSCALL32_IMPL
4007c478bdstevel@tonic-gatestatic int64_t
4017c478bdstevel@tonic-gateportfs32(uint32_t arg1, int32_t arg2, uint32_t arg3, uint32_t arg4,
4027c478bdstevel@tonic-gate    uint32_t arg5, uint32_t arg6);
4047c478bdstevel@tonic-gatestatic struct sysent port_sysent32 = {
4057c478bdstevel@tonic-gate	6,
4067c478bdstevel@tonic-gate	SE_ARGC | SE_64RVAL | SE_NOUNLOAD,
407660213eToomas Soome	(int (*)())(uintptr_t)portfs32,
4107c478bdstevel@tonic-gatestatic struct modlsys modlsys32 = {
4117c478bdstevel@tonic-gate	&mod_syscallops32,
4127c478bdstevel@tonic-gate	"32-bit event ports syscalls",
4137c478bdstevel@tonic-gate	&port_sysent32
4157c478bdstevel@tonic-gate#endif	/* _SYSCALL32_IMPL */
4177c478bdstevel@tonic-gatestatic struct modlinkage modlinkage = {
4187c478bdstevel@tonic-gate	MODREV_1,
4197c478bdstevel@tonic-gate	&modlsys,
4207c478bdstevel@tonic-gate#ifdef _SYSCALL32_IMPL
4217c478bdstevel@tonic-gate	&modlsys32,
4237c478bdstevel@tonic-gate	NULL
4267c478bdstevel@tonic-gateport_kstat_t port_kstat = {
4277c478bdstevel@tonic-gate	{ "ports",	KSTAT_DATA_UINT32 }
4307c478bdstevel@tonic-gatedev_t	portdev;
4317c478bdstevel@tonic-gatestruct	vnodeops *port_vnodeops;
4327c478bdstevel@tonic-gatestruct	vfs port_vfs;
4347c478bdstevel@tonic-gateextern	rctl_hndl_t rc_process_portev;
4357c478bdstevel@tonic-gateextern	rctl_hndl_t rc_project_portids;
4367c478bdstevel@tonic-gateextern	void aio_close_port(void *, int, pid_t, int);
4397c478bdstevel@tonic-gate * This table contains a list of event sources which need a static
4407c478bdstevel@tonic-gate * association with a port (every port).
4417c478bdstevel@tonic-gate * The last NULL entry in the table is required to detect "end of table".
4427c478bdstevel@tonic-gate */
4437c478bdstevel@tonic-gatestruct port_ksource port_ksource_tab[] = {
4447c478bdstevel@tonic-gate	{PORT_SOURCE_AIO, aio_close_port, NULL, NULL},
4457c478bdstevel@tonic-gate	{0, NULL, NULL, NULL}
4487c478bdstevel@tonic-gate/* local functions */
4497c478bdstevel@tonic-gatestatic int port_getn(port_t *, port_event_t *, uint_t, uint_t *,
4507c478bdstevel@tonic-gate    port_gettimer_t *);
4517c478bdstevel@tonic-gatestatic int port_sendn(int [], int [], uint_t, int, void *, uint_t *);
4527c478bdstevel@tonic-gatestatic int port_alert(port_t *, int, int, void *);
4537c478bdstevel@tonic-gatestatic int port_dispatch_event(port_t *, int, int, int, uintptr_t, void *);
4547c478bdstevel@tonic-gatestatic int port_send(port_t *, int, int, void *);
4557c478bdstevel@tonic-gatestatic int port_create(int *);
4567c478bdstevel@tonic-gatestatic int port_get_alert(port_alert_t *, port_event_t *);
4577c478bdstevel@tonic-gatestatic int port_copy_event(port_event_t *, port_kevent_t *, list_t *);
4587c478bdstevel@tonic-gatestatic int *port_errorn(int *, int, int, int);
4597c478bdstevel@tonic-gatestatic int port_noshare(void *, int *, pid_t, int, void *);
4607c478bdstevel@tonic-gatestatic int port_get_timeout(timespec_t *, timespec_t *, timespec_t **, int *,
4617c478bdstevel@tonic-gate    int);
4627c478bdstevel@tonic-gatestatic void port_init(port_t *);
4637c478bdstevel@tonic-gatestatic void port_remove_alert(port_queue_t *);
4647c478bdstevel@tonic-gatestatic void port_add_ksource_local(port_t *, port_ksource_t *);
4657c478bdstevel@tonic-gatestatic void port_check_return_cond(port_queue_t *);
4667c478bdstevel@tonic-gatestatic void port_dequeue_thread(port_queue_t *, portget_t *);
4677c478bdstevel@tonic-gatestatic portget_t *port_queue_thread(port_queue_t *, uint_t);
4687c478bdstevel@tonic-gatestatic void port_kstat_init(void);
4707c478bdstevel@tonic-gate#ifdef	_SYSCALL32_IMPL
4717c478bdstevel@tonic-gatestatic int port_copy_event32(port_event32_t *, port_kevent_t *, list_t *);
4777c478bdstevel@tonic-gate	static const fs_operation_def_t port_vfsops_template[] = {
4787c478bdstevel@tonic-gate		NULL, NULL
4797c478bdstevel@tonic-gate	};
4807c478bdstevel@tonic-gate	extern const	fs_operation_def_t port_vnodeops_template[];
4817c478bdstevel@tonic-gate	vfsops_t	*port_vfsops;
4827c478bdstevel@tonic-gate	int		error;
48343bd900Toomas Soome	major_t		major;
4857c478bdstevel@tonic-gate	if ((major = getudev()) == (major_t)-1)
4867c478bdstevel@tonic-gate		return (ENXIO);
4877c478bdstevel@tonic-gate	portdev = makedevice(major, 0);
4897c478bdstevel@tonic-gate	/* Create a dummy vfs */
4907c478bdstevel@tonic-gate	error = vfs_makefsops(port_vfsops_template, &port_vfsops);
4917c478bdstevel@tonic-gate	if (error) {
4927c478bdstevel@tonic-gate		cmn_err(CE_WARN, "port init: bad vfs ops");
4937c478bdstevel@tonic-gate		return (error);
4947c478bdstevel@tonic-gate	}
4957c478bdstevel@tonic-gate	vfs_setops(&port_vfs, port_vfsops);
4967c478bdstevel@tonic-gate	port_vfs.vfs_flag = VFS_RDONLY;
4977c478bdstevel@tonic-gate	port_vfs.vfs_dev = portdev;
4987c478bdstevel@tonic-gate	vfs_make_fsid(&(port_vfs.vfs_fsid), portdev, 0);
5007c478bdstevel@tonic-gate	error = vn_make_ops("portfs", port_vnodeops_template, &port_vnodeops);
5017c478bdstevel@tonic-gate	if (error) {
5027c478bdstevel@tonic-gate		vfs_freevfsops(port_vfsops);
5037c478bdstevel@tonic-gate		cmn_err(CE_WARN, "port init: bad vnode ops");
5047c478bdstevel@tonic-gate		return (error);
5057c478bdstevel@tonic-gate	}
5077c478bdstevel@tonic-gate	mutex_init(&port_control.pc_mutex, NULL, MUTEX_DEFAULT, NULL);
5087c478bdstevel@tonic-gate	port_control.pc_nents = 0;	/* number of active ports */
5107c478bdstevel@tonic-gate	/* create kmem_cache for port event structures */
5117c478bdstevel@tonic-gate	port_control.pc_cache = kmem_cache_create("port_cache",
5127c478bdstevel@tonic-gate	    sizeof (port_kevent_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
5147c478bdstevel@tonic-gate	port_kstat_init();		/* init port kstats */
5157c478bdstevel@tonic-gate	return (mod_install(&modlinkage));
5197c478bdstevel@tonic-gate_info(struct modinfo *modinfop)
5217c478bdstevel@tonic-gate	return (mod_info(&modlinkage, modinfop));
5257c478bdstevel@tonic-gate * System call wrapper for all port related system calls from 32-bit programs.
5267c478bdstevel@tonic-gate */
5277c478bdstevel@tonic-gate#ifdef _SYSCALL32_IMPL
5287c478bdstevel@tonic-gatestatic int64_t
5297c478bdstevel@tonic-gateportfs32(uint32_t opcode, int32_t a0, uint32_t a1, uint32_t a2, uint32_t a3,
5307c478bdstevel@tonic-gate    uint32_t a4)
5327c478bdstevel@tonic-gate	int64_t	error;
5347c478bdstevel@tonic-gate	switch (opcode & PORT_CODE_MASK) {
5357c478bdstevel@tonic-gate	case PORT_GET:
5367c478bdstevel@tonic-gate		error = portfs(PORT_GET, a0, a1, (int)a2, (int)a3, a4);
5377c478bdstevel@tonic-gate		break;
5387c478bdstevel@tonic-gate	case PORT_SENDN:
5397c478bdstevel@tonic-gate		error = portfs(opcode, (uint32_t)a0, a1, a2, a3, a4);
5407c478bdstevel@tonic-gate		break;
5417c478bdstevel@tonic-gate	default:
5427c478bdstevel@tonic-gate		error = portfs(opcode, a0, a1, a2, a3, a4);
5437c478bdstevel@tonic-gate		break;
5447c478bdstevel@tonic-gate	}
5457c478bdstevel@tonic-gate	return (error);
5477c478bdstevel@tonic-gate#endif	/* _SYSCALL32_IMPL */
5507c478bdstevel@tonic-gate * System entry point for port functions.
5517c478bdstevel@tonic-gate * a0 is a port file descriptor (except for PORT_SENDN and PORT_CREATE).
5527c478bdstevel@tonic-gate * The libc uses PORT_SYS_NOPORT in functions which do not deliver a
5537c478bdstevel@tonic-gate * port file descriptor as first argument.
5547c478bdstevel@tonic-gate */
5557c478bdstevel@tonic-gatestatic int64_t
5567c478bdstevel@tonic-gateportfs(int opcode, uintptr_t a0, uintptr_t a1, uintptr_t a2, uintptr_t a3,
5577c478bdstevel@tonic-gate    uintptr_t a4)
5597c478bdstevel@tonic-gate	rval_t		r;
5607c478bdstevel@tonic-gate	port_t		*pp;
56143bd900Toomas Soome	int		error = 0;
5627c478bdstevel@tonic-gate	uint_t		nget;
5637c478bdstevel@tonic-gate	file_t		*fp;
5647c478bdstevel@tonic-gate	port_gettimer_t	port_timer;
5667c478bdstevel@tonic-gate	r.r_vals = 0;
5677c478bdstevel@tonic-gate	if (opcode & PORT_SYS_NOPORT) {
5687c478bdstevel@tonic-gate		opcode &= PORT_CODE_MASK;
5697c478bdstevel@tonic-gate		if (opcode == PORT_SENDN) {
5707c478bdstevel@tonic-gate			error = port_sendn((int *)a0, (int *)a1, (uint_t)a2,
5717c478bdstevel@tonic-gate			    (int)a3, (void *)a4, (uint_t *)&r.r_val1);
5727c478bdstevel@tonic-gate			if (error && (error != EIO))
5737c478bdstevel@tonic-gate				return ((int64_t)set_errno(error));
5747c478bdstevel@tonic-gate			return (r.r_vals);
5757c478bdstevel@tonic-gate		}
5777c478bdstevel@tonic-gate		if (opcode == PORT_CREATE) {
5787c478bdstevel@tonic-gate			error = port_create(&r.r_val1);
5797c478bdstevel@tonic-gate			if (error)
5807c478bdstevel@tonic-gate				return ((int64_t)set_errno(error));
5817c478bdstevel@tonic-gate			return (r.r_vals);
5827c478bdstevel@tonic-gate		}
5837c478bdstevel@tonic-gate	}
5857c478bdstevel@tonic-gate	/* opcodes using port as first argument (a0) */
5877c478bdstevel@tonic-gate	if ((fp = getf((int)a0)) == NULL)
5887c478bdstevel@tonic-gate		return ((uintptr_t)set_errno(EBADF));
5907c478bdstevel@tonic-gate	if (fp->f_vnode->v_type != VPORT) {
5917c478bdstevel@tonic-gate		releasef((int)a0);
5927c478bdstevel@tonic-gate		return ((uintptr_t)set_errno(EBADFD));
5937c478bdstevel@tonic-gate	}
5957c478bdstevel@tonic-gate	pp = VTOEP(fp->f_vnode);
5977c478bdstevel@tonic-gate	switch (opcode & PORT_CODE_MASK) {
5987c478bdstevel@tonic-gate	case	PORT_GET:
5997c478bdstevel@tonic-gate	{
6007c478bdstevel@tonic-gate		/* see PORT_GETN description */
6017c478bdstevel@tonic-gate		struct	timespec timeout;
6037c478bdstevel@tonic-gate		port_timer.pgt_flags = PORTGET_ONE;
6047c478bdstevel@tonic-gate		port_timer.pgt_loop = 0;
6057c478bdstevel@tonic-gate		port_timer.pgt_rqtp = NULL;
60643bd900Toomas Soome		if (a4 != 0) {
6077c478bdstevel@tonic-gate			port_timer.pgt_timeout = &timeout;
6087c478bdstevel@tonic-gate			timeout.tv_sec = (time_t)a2;
6097c478bdstevel@tonic-gate			timeout.tv_nsec = (long)a3;
6107c478bdstevel@tonic-gate		} else {
6117c478bdstevel@tonic-gate			port_timer.pgt_timeout = NULL;
6127c478bdstevel@tonic-gate		}
6137c478bdstevel@tonic-gate		do {
6147c478bdstevel@tonic-gate			nget = 1;
6157c478bdstevel@tonic-gate			error = port_getn(pp, (port_event_t *)a1, 1,
6167c478bdstevel@tonic-gate			    (uint_t *)&nget, &port_timer);
6177c478bdstevel@tonic-gate		} while (nget == 0 && error == 0 && port_timer.pgt_loop);
6187c478bdstevel@tonic-gate		break;
6197c478bdstevel@tonic-gate	}
6207c478bdstevel@tonic-gate	case	PORT_GETN:
6217c478bdstevel@tonic-gate	{
6227c478bdstevel@tonic-gate		/*
6237c478bdstevel@tonic-gate		 * port_getn() can only retrieve own or shareable events from
6247c478bdstevel@tonic-gate		 * other processes. The port_getn() function remains in the
6257c478bdstevel@tonic-gate		 * kernel until own or shareable events are available or the
6267c478bdstevel@tonic-gate		 * timeout elapses.
6277c478bdstevel@tonic-gate		 */
6287c478bdstevel@tonic-gate		port_timer.pgt_flags = 0;
6297c478bdstevel@tonic-gate		port_timer.pgt_loop = 0;
6307c478bdstevel@tonic-gate		port_timer.pgt_rqtp = NULL;
6317c478bdstevel@tonic-gate		port_timer.pgt_timeout = (struct timespec *)a4;
6327c478bdstevel@tonic-gate		do {
6337c478bdstevel@tonic-gate			nget = a3;
6347c478bdstevel@tonic-gate			error = port_getn(pp, (port_event_t *)a1, (uint_t)a2,
6357c478bdstevel@tonic-gate			    (uint_t *)&nget, &port_timer);
6367c478bdstevel@tonic-gate		} while (nget == 0 && error == 0 && port_timer.pgt_loop);
6377c478bdstevel@tonic-gate		r.r_val1 = nget;
6387c478bdstevel@tonic-gate		r.r_val2 = error;
6397c478bdstevel@tonic-gate		releasef((int)a0);
6407c478bdstevel@tonic-gate		if (error && error != ETIME)
6417c478bdstevel@tonic-gate			return ((int64_t)set_errno(error));
6427c478bdstevel@tonic-gate		return (r.r_vals);
6437c478bdstevel@tonic-gate	}
6447c478bdstevel@tonic-gate	case	PORT_ASSOCIATE:
6457c478bdstevel@tonic-gate	{
646df2381bpraks		switch ((int)a1) {
647df2381bpraks		case PORT_SOURCE_FD:
648df2381bpraks			error = port_associate_fd(pp, (int)a1, (uintptr_t)a2,
649df2381bpraks			    (int)a3, (void *)a4);
650df2381bpraks			break;
651df2381bpraks		case PORT_SOURCE_FILE:
652df2381bpraks			error = port_associate_fop(pp, (int)a1, (uintptr_t)a2,
653df2381bpraks			    (int)a3, (void *)a4);
654df2381bpraks			break;
655df2381bpraks		default:
6567c478bdstevel@tonic-gate			error = EINVAL;
6577c478bdstevel@tonic-gate			break;
6587c478bdstevel@tonic-gate		}
6597c478bdstevel@tonic-gate		break;
6607c478bdstevel@tonic-gate	}
6617c478bdstevel@tonic-gate	case	PORT_SEND:
6627c478bdstevel@tonic-gate	{
6637c478bdstevel@tonic-gate		/* user-defined events */
6647c478bdstevel@tonic-gate		error = port_send(pp, PORT_SOURCE_USER, (int)a1, (void *)a2);
6657c478bdstevel@tonic-gate		break;
6667c478bdstevel@tonic-gate	}
6677c478bdstevel@tonic-gate	case	PORT_DISPATCH:
6687c478bdstevel@tonic-gate	{
6697c478bdstevel@tonic-gate		/*
6707c478bdstevel@tonic-gate		 * library events, blocking
6713470957raf		 * Only events of type PORT_SOURCE_AIO or PORT_SOURCE_MQ
6723470957raf		 * are currently allowed.
6737c478bdstevel@tonic-gate		 */
6743470957raf		if ((int)a1 != PORT_SOURCE_AIO && (int)a1 != PORT_SOURCE_MQ) {
6757c478bdstevel@tonic-gate			error = EINVAL;
6767c478bdstevel@tonic-gate			break;
6777c478bdstevel@tonic-gate		}
6787c478bdstevel@tonic-gate		error = port_dispatch_event(pp, (int)opcode, (int)a1, (int)a2,
6797c478bdstevel@tonic-gate		    (uintptr_t)a3, (void *)a4);
6807c478bdstevel@tonic-gate		break;
6817c478bdstevel@tonic-gate	}
6827c478bdstevel@tonic-gate	case	PORT_DISSOCIATE:
6837c478bdstevel@tonic-gate	{
684df2381bpraks		switch ((int)a1) {
685df2381bpraks		case PORT_SOURCE_FD:
686df2381bpraks			error = port_dissociate_fd(pp, (uintptr_t)a2);
687df2381bpraks			break;
688df2381bpraks		case PORT_SOURCE_FILE:
689df2381bpraks			error = port_dissociate_fop(pp, (uintptr_t)a2);
690df2381bpraks			break;
691df2381bpraks		default:
6927c478bdstevel@tonic-gate			error = EINVAL;
6937c478bdstevel@tonic-gate			break;
6947c478bdstevel@tonic-gate		}
6957c478bdstevel@tonic-gate		break;
6967c478bdstevel@tonic-gate	}
6977c478bdstevel@tonic-gate	case	PORT_ALERT:
6987c478bdstevel@tonic-gate	{
6997c478bdstevel@tonic-gate		if ((int)a2)	/* a2 = events */
7007c478bdstevel@tonic-gate			error = port_alert(pp, (int)a1, (int)a2, (void *)a3);
7017c478bdstevel@tonic-gate		else
7027c478bdstevel@tonic-gate			port_remove_alert(&pp->port_queue);
7037c478bdstevel@tonic-gate		break;
7047c478bdstevel@tonic-gate	}
7057c478bdstevel@tonic-gate	default:
7067c478bdstevel@tonic-gate		error = EINVAL;
7077c478bdstevel@tonic-gate		break;
7087c478bdstevel@tonic-gate	}
7107c478bdstevel@tonic-gate	releasef((int)a0);
7117c478bdstevel@tonic-gate	if (error)
7127c478bdstevel@tonic-gate		return ((int64_t)set_errno(error));
7137c478bdstevel@tonic-gate	return (r.r_vals);
7177c478bdstevel@tonic-gate * System call to create a port.
7187c478bdstevel@tonic-gate *
7197c478bdstevel@tonic-gate * The port_create() function creates a vnode of type VPORT per port.
7207c478bdstevel@tonic-gate * The port control data is associated with the vnode as vnode private data.
7217c478bdstevel@tonic-gate * The port_create() function returns an event port file descriptor.
7227c478bdstevel@tonic-gate */
7237c478bdstevel@tonic-gatestatic int
7247c478bdstevel@tonic-gateport_create(int *fdp)
7267c478bdstevel@tonic-gate	port_t		*pp;
7277c478bdstevel@tonic-gate	vnode_t		*vp;
7287c478bdstevel@tonic-gate	struct file	*fp;
7297c478bdstevel@tonic-gate	proc_t		*p = curproc;
7317c478bdstevel@tonic-gate	/* initialize vnode and port private data */
7327c478bdstevel@tonic-gate	pp = kmem_zalloc(sizeof (port_t), KM_SLEEP);
7347c478bdstevel@tonic-gate	pp->port_vnode = vn_alloc(KM_SLEEP);
7357c478bdstevel@tonic-gate	vp = EPTOV(pp);
7367c478bdstevel@tonic-gate	vn_setops(vp, port_vnodeops);
7377c478bdstevel@tonic-gate	vp->v_type = VPORT;
7387c478bdstevel@tonic-gate	vp->v_vfsp = &port_vfs;
7397c478bdstevel@tonic-gate	vp->v_data = (caddr_t)pp;
7417c478bdstevel@tonic-gate	mutex_enter(&port_control.pc_mutex);
7427c478bdstevel@tonic-gate	/*
7437c478bdstevel@tonic-gate	 * Retrieve the maximal number of event ports allowed per system from
7447c478bdstevel@tonic-gate	 * the resource control: project.port-max-ids.
7457c478bdstevel@tonic-gate	 */
7467c478bdstevel@tonic-gate	mutex_enter(&p->p_lock);
7477c478bdstevel@tonic-gate	if (rctl_test(rc_project_portids, p->p_task->tk_proj->kpj_rctls, p,
7487c478bdstevel@tonic-gate	    port_control.pc_nents + 1, RCA_SAFE) & RCT_DENY) {
7497c478bdstevel@tonic-gate		mutex_exit(&p->p_lock);
7507c478bdstevel@tonic-gate		vn_free(vp);
7517c478bdstevel@tonic-gate		kmem_free(pp, sizeof (port_t));
7527c478bdstevel@tonic-gate		mutex_exit(&port_control.pc_mutex);
7537c478bdstevel@tonic-gate		return (EAGAIN);
7547c478bdstevel@tonic-gate	}
7567c478bdstevel@tonic-gate	/*
7577c478bdstevel@tonic-gate	 * Retrieve the maximal number of events allowed per port from
7587c478bdstevel@tonic-gate	 * the resource control: process.port-max-events.
7597c478bdstevel@tonic-gate	 */
7607c478bdstevel@tonic-gate	pp->port_max_events = rctl_enforced_value(rc_process_portev,
7617c478bdstevel@tonic-gate	    p->p_rctls, p);
7627c478bdstevel@tonic-gate	mutex_exit(&p->p_lock);
7647c478bdstevel@tonic-gate	/* allocate a new user file descriptor and a file structure */
7657c478bdstevel@tonic-gate	if (falloc(vp, 0, &fp, fdp)) {
7667c478bdstevel@tonic-gate		/*
7677c478bdstevel@tonic-gate		 * If the file table is full, free allocated resources.
7687c478bdstevel@tonic-gate		 */
7697c478bdstevel@tonic-gate		vn_free(vp);
7707c478bdstevel@tonic-gate		kmem_free(pp, sizeof (port_t));
7717c478bdstevel@tonic-gate		mutex_exit(&port_control.pc_mutex);
7727c478bdstevel@tonic-gate		return (EMFILE);
7737c478bdstevel@tonic-gate	}
7757c478bdstevel@tonic-gate	mutex_exit(&fp->f_tlock);
7777c478bdstevel@tonic-gate	pp->port_fd = *fdp;
7787c478bdstevel@tonic-gate	port_control.pc_nents++;
7797c478bdstevel@tonic-gate	p->p_portcnt++;
7807c478bdstevel@tonic-gate	port_kstat.pks_ports.value.ui32++;
7817c478bdstevel@tonic-gate	mutex_exit(&port_control.pc_mutex);
7837c478bdstevel@tonic-gate	/* initializes port private data */
7847c478bdstevel@tonic-gate	port_init(pp);
78561b4b1epraks	/* set user file pointer */
78661b4b1epraks	setf(*fdp, fp);
7877c478bdstevel@tonic-gate	return (0);
7917c478bdstevel@tonic-gate * port_init() initializes event port specific data
7927c478bdstevel@tonic-gate */
7937c478bdstevel@tonic-gatestatic void
7947c478bdstevel@tonic-gateport_init(port_t *pp)
7967c478bdstevel@tonic-gate	port_queue_t	*portq;
7977c478bdstevel@tonic-gate	port_ksource_t	*pks;
7997c478bdstevel@tonic-gate	mutex_init(&pp->port_mutex, NULL, MUTEX_DEFAULT, NULL);
8007c478bdstevel@tonic-gate	portq = &pp->port_queue;
8017c478bdstevel@tonic-gate	mutex_init(&portq->portq_mutex, NULL, MUTEX_DEFAULT, NULL);
8027c478bdstevel@tonic-gate	pp->port_flags |= PORT_INIT;
8047c478bdstevel@tonic-gate	/*
8057c478bdstevel@tonic-gate	 * If it is not enough memory available to satisfy a user
8067c478bdstevel@tonic-gate	 * request using a single port_getn() call then port_getn()
8077c478bdstevel@tonic-gate	 * will reduce the size of the list to PORT_MAX_LIST.
8087c478bdstevel@tonic-gate	 */
8097c478bdstevel@tonic-gate	pp->port_max_list = port_max_list;
8117c478bdstevel@tonic-gate	/* Set timestamp entries required for fstat(2) requests */
8127c478bdstevel@tonic-gate	gethrestime(&pp->port_ctime);
8137c478bdstevel@tonic-gate	pp->port_uid = crgetuid(curproc->p_cred);
8147c478bdstevel@tonic-gate	pp->port_gid = crgetgid(curproc->p_cred);
8167c478bdstevel@tonic-gate	/* initialize port queue structs */
8177c478bdstevel@tonic-gate	list_create(&portq->portq_list, sizeof (port_kevent_t),
8187c478bdstevel@tonic-gate	    offsetof(port_kevent_t, portkev_node));
8197c478bdstevel@tonic-gate	list_create(&portq->portq_get_list, sizeof (port_kevent_t),
8207c478bdstevel@tonic-gate	    offsetof(port_kevent_t, portkev_node));
8217c478bdstevel@tonic-gate	portq->portq_flags = 0;
8227c478bdstevel@tonic-gate	pp->port_pid = curproc->p_pid;
8247c478bdstevel@tonic-gate	/* Allocate cache skeleton for PORT_SOURCE_FD events */
8257c478bdstevel@tonic-gate	portq->portq_pcp = kmem_zalloc(sizeof (port_fdcache_t), KM_SLEEP);
8267c478bdstevel@tonic-gate	mutex_init(&portq->portq_pcp->pc_lock, NULL, MUTEX_DEFAULT, NULL);
8287c478bdstevel@tonic-gate	/*
8297c478bdstevel@tonic-gate	 * Allocate cache skeleton for association of event sources.
8307c478bdstevel@tonic-gate	 */
8317c478bdstevel@tonic-gate	mutex_init(&portq->portq_source_mutex, NULL, MUTEX_DEFAULT, NULL);
8327c478bdstevel@tonic-gate	portq->portq_scache = kmem_zalloc(
8337c478bdstevel@tonic-gate	    PORT_SCACHE_SIZE * sizeof (port_source_t *), KM_SLEEP);
8357c478bdstevel@tonic-gate	/*
8367c478bdstevel@tonic-gate	 * pre-associate some kernel sources with this port.
8377c478bdstevel@tonic-gate	 * The pre-association is required to create port_source_t
8387c478bdstevel@tonic-gate	 * structures for object association.
8397c478bdstevel@tonic-gate	 * Some sources can not get associated with a port before the first
8407c478bdstevel@tonic-gate	 * object association is requested. Another reason to pre_associate
8417c478bdstevel@tonic-gate	 * a particular source with a port is because of performance.
8427c478bdstevel@tonic-gate	 */
8447c478bdstevel@tonic-gate	for (pks = port_ksource_tab; pks->pks_source != 0; pks++)
8457c478bdstevel@tonic-gate		port_add_ksource_local(pp, pks);
8497c478bdstevel@tonic-gate * The port_add_ksource_local() function is being used to associate
8507c478bdstevel@tonic-gate * event sources with every new port.
8517c478bdstevel@tonic-gate * The event sources need to be added to port_ksource_tab[].
8527c478bdstevel@tonic-gate */
8537c478bdstevel@tonic-gatestatic void
8547c478bdstevel@tonic-gateport_add_ksource_local(port_t *pp, port_ksource_t *pks)
8567c478bdstevel@tonic-gate	port_source_t	*pse;
8577c478bdstevel@tonic-gate	port_source_t	**ps;
8597c478bdstevel@tonic-gate	mutex_enter(&pp->port_queue.portq_source_mutex);
8607c478bdstevel@tonic-gate	ps = &pp->port_queue.portq_scache[PORT_SHASH(pks->pks_source)];
8617c478bdstevel@tonic-gate	for (pse = *ps; pse != NULL; pse = pse->portsrc_next) {
8627c478bdstevel@tonic-gate		if (pse->portsrc_source == pks->pks_source)
8637c478bdstevel@tonic-gate			break;
8647c478bdstevel@tonic-gate	}
8667c478bdstevel@tonic-gate	if (pse == NULL) {
8677c478bdstevel@tonic-gate		/* associate new source with the port */
8687c478bdstevel@tonic-gate		pse = kmem_zalloc(sizeof (port_source_t), KM_SLEEP);
8697c478bdstevel@tonic-gate		pse->portsrc_source = pks->pks_source;
8707c478bdstevel@tonic-gate		pse->portsrc_close = pks->pks_close;
8717c478bdstevel@tonic-gate		pse->portsrc_closearg = pks->pks_closearg;
8727c478bdstevel@tonic-gate		pse->portsrc_cnt = 1;
8747c478bdstevel@tonic-gate		pks->pks_portsrc = pse;
8757c478bdstevel@tonic-gate		if (*ps != NULL)
8767c478bdstevel@tonic-gate			pse->portsrc_next = (*ps)->portsrc_next;
8777c478bdstevel@tonic-gate		*ps = pse;
8787c478bdstevel@tonic-gate	}
8797c478bdstevel@tonic-gate	mutex_exit(&pp->port_queue.portq_source_mutex);
8837c478bdstevel@tonic-gate * The port_send() function sends an event of type "source" to a
8847c478bdstevel@tonic-gate * port. This function is non-blocking. An event can be sent to
8857c478bdstevel@tonic-gate * a port as long as the number of events per port does not achieve the
8867c478bdstevel@tonic-gate * maximal allowed number of events. The max. number of events per port is
8877c478bdstevel@tonic-gate * defined by the resource control process.max-port-events.
8887c478bdstevel@tonic-gate * This function is used by the port library function port_send()
8897c478bdstevel@tonic-gate * and port_dispatch(). The port_send(3c) function is part of the
8907c478bdstevel@tonic-gate * event ports API and submits events of type PORT_SOURCE_USER. The
8917c478bdstevel@tonic-gate * port_dispatch() function is project private and it is used by library
8927c478bdstevel@tonic-gate * functions to submit events of other types than PORT_SOURCE_USER
8937c478bdstevel@tonic-gate * (e.g. PORT_SOURCE_AIO).
8947c478bdstevel@tonic-gate */
8957c478bdstevel@tonic-gatestatic int
8967c478bdstevel@tonic-gateport_send(port_t *pp, int source, int events, void *user)
8987c478bdstevel@tonic-gate	port_kevent_t	*pev;
8997c478bdstevel@tonic-gate	int		error;
9017c478bdstevel@tonic-gate	error = port_alloc_event_local(pp, source, PORT_ALLOC_DEFAULT, &pev);
9027c478bdstevel@tonic-gate	if (error)
9037c478bdstevel@tonic-gate		return (error);
9057c478bdstevel@tonic-gate	pev->portkev_object = 0;
9067c478bdstevel@tonic-gate	pev->portkev_events = events;
9077c478bdstevel@tonic-gate	pev->portkev_user = user;
9087c478bdstevel@tonic-gate	pev->portkev_callback = NULL;
9097c478bdstevel@tonic-gate	pev->portkev_arg = NULL;
9107c478bdstevel@tonic-gate	pev->portkev_flags = 0;
9123470957raf	port_send_event(pev);
9137c478bdstevel@tonic-gate	return (0);
9177c478bdstevel@tonic-gate * The port_noshare() function returns 0 if the current event was generated
9187c478bdstevel@tonic-gate * by the same process. Otherwise is returns a value other than 0 and the
9197c478bdstevel@tonic-gate * event should not be delivered to the current processe.
9207c478bdstevel@tonic-gate * The port_noshare() function is normally used by the port_dispatch()
9217c478bdstevel@tonic-gate * function. The port_dispatch() function is project private and can only be
9227c478bdstevel@tonic-gate * used within the event port project.
9237c478bdstevel@tonic-gate * Currently the libaio uses the port_dispatch() function to deliver events
9247c478bdstevel@tonic-gate * of types PORT_SOURCE_AIO.
9257c478bdstevel@tonic-gate */
9267c478bdstevel@tonic-gate/* ARGSUSED */
9277c478bdstevel@tonic-gatestatic int
9287c478bdstevel@tonic-gateport_noshare(void *arg, int *events, pid_t pid, int flag, void *evp)
9307c478bdstevel@tonic-gate	if (flag == PORT_CALLBACK_DEFAULT && curproc->p_pid != pid)
9317c478bdstevel@tonic-gate		return (1);
9327c478bdstevel@tonic-gate	return (0);
9367c478bdstevel@tonic-gate * The port_dispatch_event() function is project private and it is used by
9377c478bdstevel@tonic-gate * libraries involved in the project to deliver events to the port.
9387c478bdstevel@tonic-gate * port_dispatch will sleep and wait for enough resources to satisfy the
9397c478bdstevel@tonic-gate * request, if necessary.
9407c478bdstevel@tonic-gate * The library can specify if the delivered event is shareable with other
9417c478bdstevel@tonic-gate * processes (see PORT_SYS_NOSHARE flag).
9427c478bdstevel@tonic-gate */
9437c478bdstevel@tonic-gatestatic int
9447c478bdstevel@tonic-gateport_dispatch_event(port_t *pp, int opcode, int source, int events,
9457c478bdstevel@tonic-gate    uintptr_t object, void *user)
9477c478bdstevel@tonic-gate	port_kevent_t	*pev;
9487c478bdstevel@tonic-gate	int		error;
9507c478bdstevel@tonic-gate	error = port_alloc_event_block(pp, source, PORT_ALLOC_DEFAULT, &pev);
9517c478bdstevel@tonic-gate	if (error)
9527c478bdstevel@tonic-gate		return (error);
9547c478bdstevel@tonic-gate	pev->portkev_object = object;
9557c478bdstevel@tonic-gate	pev->portkev_events = events;
9567c478bdstevel@tonic-gate	pev->portkev_user = user;
9577c478bdstevel@tonic-gate	pev->portkev_arg = NULL;
9587c478bdstevel@tonic-gate	if (opcode & PORT_SYS_NOSHARE) {
9597c478bdstevel@tonic-gate		pev->portkev_flags = PORT_KEV_NOSHARE;
9607c478bdstevel@tonic-gate		pev->portkev_callback = port_noshare;
9617c478bdstevel@tonic-gate	} else {
9627c478bdstevel@tonic-gate		pev->portkev_flags = 0;
9637c478bdstevel@tonic-gate		pev->portkev_callback = NULL;
9647c478bdstevel@tonic-gate	}
9663470957raf	port_send_event(pev);
9677c478bdstevel@tonic-gate	return (0);
9727c478bdstevel@tonic-gate * The port_sendn() function is the kernel implementation of the event
9737c478bdstevel@tonic-gate * port API function port_sendn(3c).
9747c478bdstevel@tonic-gate * This function is able to send an event to a list of event ports.
9757c478bdstevel@tonic-gate */
9767c478bdstevel@tonic-gatestatic int
9777c478bdstevel@tonic-gateport_sendn(int ports[], int errors[], uint_t nent, int events, void *user,
9787c478bdstevel@tonic-gate    uint_t *nget)
9807c478bdstevel@tonic-gate	port_kevent_t	*pev;
9817c478bdstevel@tonic-gate	int		errorcnt = 0;
9827c478bdstevel@tonic-gate	int		error = 0;
9837c478bdstevel@tonic-gate	int		count;
9847c478bdstevel@tonic-gate	int		port;
9857c478bdstevel@tonic-gate	int		*plist;
9867c478bdstevel@tonic-gate	int		*elist = NULL;
9877c478bdstevel@tonic-gate	file_t		*fp;
9887c478bdstevel@tonic-gate	port_t		*pp;
9907c478bdstevel@tonic-gate	if (nent == 0 || nent > port_max_list)
9917c478bdstevel@tonic-gate		return (EINVAL);
9937c478bdstevel@tonic-gate	plist = kmem_alloc(nent * sizeof (int), KM_SLEEP);
9947c478bdstevel@tonic-gate	if (copyin((void *)ports, plist, nent * sizeof (int))) {
9957c478bdstevel@tonic-gate		kmem_free(plist, nent * sizeof (int));
9967c478bdstevel@tonic-gate		return (EFAULT);
9977c478bdstevel@tonic-gate	}
9997c478bdstevel@tonic-gate	/*
10007c478bdstevel@tonic-gate	 * Scan the list for event port file descriptors and send the
10017c478bdstevel@tonic-gate	 * attached user event data embedded in a event of type
10027c478bdstevel@tonic-gate	 * PORT_SOURCE_USER to every event port in the list.
10037c478bdstevel@tonic-gate	 * If a list entry is not a valid event port then the corresponding
10047c478bdstevel@tonic-gate	 * error code will be stored in the errors[] list with the same
10057c478bdstevel@tonic-gate	 * list offset as in the ports[] list.
10067c478bdstevel@tonic-gate	 */
10087c478bdstevel@tonic-gate	for (count = 0; count < nent; count++) {
10097c478bdstevel@tonic-gate		port = plist[count];
10107c478bdstevel@tonic-gate		if ((fp = getf(port)) == NULL) {
10117c478bdstevel@tonic-gate			elist = port_errorn(elist, nent, EBADF, count);
10127c478bdstevel@tonic-gate			errorcnt++;
10137c478bdstevel@tonic-gate			continue;
10147c478bdstevel@tonic-gate		}
10167c478bdstevel@tonic-gate		pp = VTOEP(fp->f_vnode);
10177c478bdstevel@tonic-gate		if (fp->f_vnode->v_type != VPORT) {
10187c478bdstevel@tonic-gate			releasef(port);
10197c478bdstevel@tonic-gate			elist = port_errorn(elist, nent, EBADFD, count);
10207c478bdstevel@tonic-gate			errorcnt++;
10217c478bdstevel@tonic-gate			continue;
10227c478bdstevel@tonic-gate		}
10247c478bdstevel@tonic-gate		error = port_alloc_event_local(pp, PORT_SOURCE_USER,
10257c478bdstevel@tonic-gate		    PORT_ALLOC_DEFAULT, &pev);
10267c478bdstevel@tonic-gate		if (error) {
10277c478bdstevel@tonic-gate			releasef(port);
10287c478bdstevel@tonic-gate			elist = port_errorn(elist, nent, error, count);
10297c478bdstevel@tonic-gate			errorcnt++;
10307c478bdstevel@tonic-gate			continue;
10317c478bdstevel@tonic-gate		}
10337c478bdstevel@tonic-gate		pev->portkev_object = 0;
10347c478bdstevel@tonic-gate		pev->portkev_events = events;
10357c478bdstevel@tonic-gate		pev->portkev_user = user;
10367c478bdstevel@tonic-gate		pev->portkev_callback = NULL;
10377c478bdstevel@tonic-gate		pev->portkev_arg = NULL;
10387c478bdstevel@tonic-gate		pev->portkev_flags = 0;
10403470957raf		port_send_event(pev);
10417c478bdstevel@tonic-gate		releasef(port);
10427c478bdstevel@tonic-gate	}
10437c478bdstevel@tonic-gate	if (errorcnt) {
10447c478bdstevel@tonic-gate		error = EIO;
10457c478bdstevel@tonic-gate		if (copyout(elist, (void *)errors, nent * sizeof (int)))
10467c478bdstevel@tonic-gate			error = EFAULT;
10477c478bdstevel@tonic-gate		kmem_free(elist, nent * sizeof (int));
10487c478bdstevel@tonic-gate	}
10497c478bdstevel@tonic-gate	*nget = nent - errorcnt;
10507c478bdstevel@tonic-gate	kmem_free(plist, nent * sizeof (int));
10517c478bdstevel@tonic-gate	return (error);
10547c478bdstevel@tonic-gatestatic int *
10557c478bdstevel@tonic-gateport_errorn(int *elist, int nent, int error, int index)
10577c478bdstevel@tonic-gate	if (elist == NULL)
10587c478bdstevel@tonic-gate		elist = kmem_zalloc(nent * sizeof (int), KM_SLEEP);
10597c478bdstevel@tonic-gate	elist[index] = error;
10607c478bdstevel@tonic-gate	return (elist);
10647c478bdstevel@tonic-gate * port_alert()
10657c478bdstevel@tonic-gate * The port_alert() funcion is a high priority event and it is always set
10667c478bdstevel@tonic-gate * on top of the queue. It is also delivered as single event.
10677c478bdstevel@tonic-gate * flags:
10687c478bdstevel@tonic-gate *	- SET	:overwrite current alert data
10697c478bdstevel@tonic-gate *	- UPDATE:set alert data or return EBUSY if alert mode is already set
10707c478bdstevel@tonic-gate *
10717c478bdstevel@tonic-gate * - set the ALERT flag
10727c478bdstevel@tonic-gate * - wakeup all sleeping threads
10737c478bdstevel@tonic-gate */
10747c478bdstevel@tonic-gatestatic int
10757c478bdstevel@tonic-gateport_alert(port_t *pp, int flags, int events, void *user)
10777c478bdstevel@tonic-gate	port_queue_t	*portq;
10787c478bdstevel@tonic-gate	portget_t	*pgetp;
10797c478bdstevel@tonic-gate	port_alert_t	*pa;
10817c478bdstevel@tonic-gate	if ((flags & PORT_ALERT_INVALID) == PORT_ALERT_INVALID)
10827c478bdstevel@tonic-gate		return (EINVAL);
10847c478bdstevel@tonic-gate	portq = &pp->port_queue;
10857c478bdstevel@tonic-gate	pa = &portq->portq_alert;
10867c478bdstevel@tonic-gate	mutex_enter(&portq->portq_mutex);
10887c478bdstevel@tonic-gate	/* check alert conditions */
10897c478bdstevel@tonic-gate	if (flags == PORT_ALERT_UPDATE) {
10907c478bdstevel@tonic-gate		if (portq->portq_flags & PORTQ_ALERT) {
10917c478bdstevel@tonic-gate			mutex_exit(&portq->portq_mutex);
10927c478bdstevel@tonic-gate			return (EBUSY);
10937c478bdstevel@tonic-gate		}
10947c478bdstevel@tonic-gate	}
10967c478bdstevel@tonic-gate	/*
10977c478bdstevel@tonic-gate	 * Store alert data in the port to be delivered to threads
10987c478bdstevel@tonic-gate	 * which are using port_get(n) to retrieve events.
10997c478bdstevel@tonic-gate	 */
11017c478bdstevel@tonic-gate	portq->portq_flags |= PORTQ_ALERT;
11027c478bdstevel@tonic-gate	pa->portal_events = events;		/* alert info */
11037c478bdstevel@tonic-gate	pa->portal_pid = curproc->p_pid;	/* process owner */
11047c478bdstevel@tonic-gate	pa->portal_object = 0;			/* no object */
11057c478bdstevel@tonic-gate	pa->portal_user = user;			/* user alert data */
11077c478bdstevel@tonic-gate	/* alert and deliver alert data to waiting threads */
11087c478bdstevel@tonic-gate	pgetp = portq->portq_thread;
11097c478bdstevel@tonic-gate	if (pgetp == NULL) {
11107c478bdstevel@tonic-gate		/* no threads waiting for events */
11117c478bdstevel@tonic-gate		mutex_exit(&portq->portq_mutex);
11127c478bdstevel@tonic-gate		return (0);
11137c478bdstevel@tonic-gate	}
11157c478bdstevel@tonic-gate	/*
11167c478bdstevel@tonic-gate	 * Set waiting threads in alert mode (PORTGET_ALERT)..
11177c478bdstevel@tonic-gate	 * Every thread waiting for events already allocated a portget_t
11187c478bdstevel@tonic-gate	 * structure to sleep on.
11197c478bdstevel@tonic-gate	 * The port alert arguments are stored in the portget_t structure.
11207c478bdstevel@tonic-gate	 * The PORTGET_ALERT flag is set to indicate the thread to return
11217c478bdstevel@tonic-gate	 * immediately with the alert event.
11227c478bdstevel@tonic-gate	 */
11237c478bdstevel@tonic-gate	do {
11247c478bdstevel@tonic-gate		if ((pgetp->portget_state & PORTGET_ALERT) == 0) {
11257c478bdstevel@tonic-gate			pa = &pgetp->portget_alert;
11267c478bdstevel@tonic-gate			pa->portal_events = events;
11277c478bdstevel@tonic-gate			pa->portal_object = 0;
11287c478bdstevel@tonic-gate			pa->portal_user = user;
11297c478bdstevel@tonic-gate			pgetp->portget_state |= PORTGET_ALERT;
11307c478bdstevel@tonic-gate			cv_signal(&pgetp->portget_cv);
11317c478bdstevel@tonic-gate		}
11327c478bdstevel@tonic-gate	} while ((pgetp = pgetp->portget_next) != portq->portq_thread);
11337c478bdstevel@tonic-gate	mutex_exit(&portq->portq_mutex);
11347c478bdstevel@tonic-gate	return (0);
11387c478bdstevel@tonic-gate * Clear alert state of the port
11397c478bdstevel@tonic-gate */
11407c478bdstevel@tonic-gatestatic void
11417c478bdstevel@tonic-gateport_remove_alert(port_queue_t *portq)
11437c478bdstevel@tonic-gate	mutex_enter(&portq->portq_mutex);
11447c478bdstevel@tonic-gate	portq->portq_flags &= ~PORTQ_ALERT;
11457c478bdstevel@tonic-gate	mutex_exit(&portq->portq_mutex);
11497c478bdstevel@tonic-gate * The port_getn() function is used to retrieve events from a port.
11507c478bdstevel@tonic-gate *
11517c478bdstevel@tonic-gate * The port_getn() function returns immediately if there are enough events
11527c478bdstevel@tonic-gate * available in the port to satisfy the request or if the port is in alert
11537c478bdstevel@tonic-gate * mode (see port_alert(3c)).
11547c478bdstevel@tonic-gate * The timeout argument of port_getn(3c) -which is embedded in the
11557c478bdstevel@tonic-gate * port_gettimer_t structure- specifies if the system call should block or if it
11567c478bdstevel@tonic-gate * should return immediately depending on the number of events available.
11577c478bdstevel@tonic-gate * This function is internally used by port_getn(3c) as well as by
11587c478bdstevel@tonic-gate * port_get(3c).
11597c478bdstevel@tonic-gate */
11607c478bdstevel@tonic-gatestatic int
11617c478bdstevel@tonic-gateport_getn(port_t *pp, port_event_t *uevp, uint_t max, uint_t *nget,
11627c478bdstevel@tonic-gate    port_gettimer_t *pgt)
11647c478bdstevel@tonic-gate	port_queue_t	*portq;
116543bd900Toomas Soome	port_kevent_t	*pev;
116643bd900Toomas Soome	port_kevent_t	*lev;
11677c478bdstevel@tonic-gate	int		error = 0;
11687c478bdstevel@tonic-gate	uint_t		nmax;
11697c478bdstevel@tonic-gate	uint_t		nevents;
11707c478bdstevel@tonic-gate	uint_t		eventsz;
11717c478bdstevel@tonic-gate	port_event_t	*kevp;
11727c478bdstevel@tonic-gate	list_t		*glist;
11737c478bdstevel@tonic-gate	uint_t		tnent;
11747c478bdstevel@tonic-gate	int		rval;
11757c478bdstevel@tonic-gate	int		blocking = -1;
11763348528dm	int		timecheck;
11777c478bdstevel@tonic-gate	int		flag;
11787c478bdstevel@tonic-gate	timespec_t	rqtime;
11797c478bdstevel@tonic-gate	timespec_t	*rqtp = NULL;
11807c478bdstevel@tonic-gate	portget_t	*pgetp;
11817c478bdstevel@tonic-gate	void		*results;
11827c478bdstevel@tonic-gate	model_t		model = get_udatamodel();
11847c478bdstevel@tonic-gate	flag = pgt->pgt_flags;
11867c478bdstevel@tonic-gate	if (*nget > max && max > 0)
11877c478bdstevel@tonic-gate		return (EINVAL);
11897c478bdstevel@tonic-gate	portq = &pp->port_queue;
11907c478bdstevel@tonic-gate	mutex_enter(&portq->portq_mutex);
11917c478bdstevel@tonic-gate	if (max == 0) {
11927c478bdstevel@tonic-gate		/*
11933470957raf		 * Return number of objects with events.
11943470957raf		 * The port_block() call is required to synchronize this
11957c478bdstevel@tonic-gate		 * thread with another possible thread, which could be
11967c478bdstevel@tonic-gate		 * retrieving events from the port queue.
11977c478bdstevel@tonic-gate		 */
11983470957raf		port_block(portq);
11997c478bdstevel@tonic-gate		/*
12007c478bdstevel@tonic-gate		 * Check if a second thread is currently retrieving events
12017c478bdstevel@tonic-gate		 * and it is using the temporary event queue.
12027c478bdstevel@tonic-gate		 */
12037c478bdstevel@tonic-gate		if (portq->portq_tnent) {
12047c478bdstevel@tonic-gate			/* put remaining events back to the port queue */
12057c478bdstevel@tonic-gate			port_push_eventq(portq);
12067c478bdstevel@tonic-gate		}
12077c478bdstevel@tonic-gate		*nget = portq->portq_nent;
12083470957raf		port_unblock(portq);
12097c478bdstevel@tonic-gate		mutex_exit(&portq->portq_mutex);
12107c478bdstevel@tonic-gate		return (0);
12117c478bdstevel@tonic-gate	}
12137c478bdstevel@tonic-gate	if (uevp == NULL) {
12147c478bdstevel@tonic-gate		mutex_exit(&portq->portq_mutex);
12157c478bdstevel@tonic-gate		return (EFAULT);
12167c478bdstevel@tonic-gate	}
12177c478bdstevel@tonic-gate	if (*nget == 0) {		/* no events required */
12187c478bdstevel@tonic-gate		mutex_exit(&portq->portq_mutex);
12197c478bdstevel@tonic-gate		return (0);
12207c478bdstevel@tonic-gate	}
12227c478bdstevel@tonic-gate	/* port is being closed ... */
12237c478bdstevel@tonic-gate	if (portq->portq_flags & PORTQ_CLOSE) {
12247c478bdstevel@tonic-gate		mutex_exit(&portq->portq_mutex);
12257c478bdstevel@tonic-gate		return (EBADFD);
12267c478bdstevel@tonic-gate	}
12287c478bdstevel@tonic-gate	/* return immediately if port in alert mode */
12297c478bdstevel@tonic-gate	if (portq->portq_flags & PORTQ_ALERT) {
12307c478bdstevel@tonic-gate		error = port_get_alert(&portq->portq_alert, uevp);
12317c478bdstevel@tonic-gate		if (error == 0)
12327c478bdstevel@tonic-gate			*nget = 1;
12337c478bdstevel@tonic-gate		mutex_exit(&portq->portq_mutex);
12347c478bdstevel@tonic-gate		return (error);
12357c478bdstevel@tonic-gate	}
12377c478bdstevel@tonic-gate	portq->portq_thrcnt++;
12397c478bdstevel@tonic-gate	/*
12407c478bdstevel@tonic-gate	 * Now check if the completed events satisfy the
12417c478bdstevel@tonic-gate	 * "wait" requirements of the current thread:
12427c478bdstevel@tonic-gate	 */
12447c478bdstevel@tonic-gate	if (pgt->pgt_loop) {
12457c478bdstevel@tonic-gate		/*
12467c478bdstevel@tonic-gate		 * loop entry of same thread
12477c478bdstevel@tonic-gate		 * pgt_loop is set when the current thread returns
12487c478bdstevel@tonic-gate		 * prematurely from this function. That could happen
12497c478bdstevel@tonic-gate		 * when a port is being shared between processes and
12507c478bdstevel@tonic-gate		 * this thread could not find events to return.
12517c478bdstevel@tonic-gate		 * It is not allowed to a thread to retrieve non-shareable
12527c478bdstevel@tonic-gate		 * events generated in other processes.
12537c478bdstevel@tonic-gate		 * PORTQ_WAIT_EVENTS is set when a thread already
12547c478bdstevel@tonic-gate		 * checked the current event queue and no new events
12557c478bdstevel@tonic-gate		 * are added to the queue.
12567c478bdstevel@tonic-gate		 */
12577c478bdstevel@tonic-gate		if (((portq->portq_flags & PORTQ_WAIT_EVENTS) == 0) &&
12587c478bdstevel@tonic-gate		    (portq->portq_nent >= *nget)) {
12597c478bdstevel@tonic-gate			/* some new events arrived ...check them */
12607c478bdstevel@tonic-gate			goto portnowait;
12617c478bdstevel@tonic-gate		}
12627c478bdstevel@tonic-gate		rqtp = pgt->pgt_rqtp;
12633348528dm		timecheck = pgt->pgt_timecheck;
12647c478bdstevel@tonic-gate		pgt->pgt_flags |= PORTGET_WAIT_EVENTS;
12657c478bdstevel@tonic-gate	} else {
12667c478bdstevel@tonic-gate		/* check if enough events are available ... */
12677c478bdstevel@tonic-gate		if (portq->portq_nent >= *nget)
12687c478bdstevel@tonic-gate			goto portnowait;
12697c478bdstevel@tonic-gate		/*
12707c478bdstevel@tonic-gate		 * There are not enough events available to satisfy
12717c478bdstevel@tonic-gate		 * the request, check timeout value and wait for
12727c478bdstevel@tonic-gate		 * incoming events.
12737c478bdstevel@tonic-gate		 */
12747c478bdstevel@tonic-gate		error = port_get_timeout(pgt->pgt_timeout, &rqtime, &rqtp,
12757c478bdstevel@tonic-gate		    &blocking, flag);
12767c478bdstevel@tonic-gate		if (error) {
12777c478bdstevel@tonic-gate			port_check_return_cond(portq);
12787c478bdstevel@tonic-gate			mutex_exit(&portq->portq_mutex);
12797c478bdstevel@tonic-gate			return (error);
12807c478bdstevel@tonic-gate		}
12827c478bdstevel@tonic-gate		if (blocking == 0) /* don't block, check fired events */
12837c478bdstevel@tonic-gate			goto portnowait;
12857c478bdstevel@tonic-gate		if (rqtp != NULL) {
12867c478bdstevel@tonic-gate			timespec_t	now;
12873348528dm			timecheck = timechanged;
12887c478bdstevel@tonic-gate			gethrestime(&now);
12897c478bdstevel@tonic-gate			timespecadd(rqtp, &now);
12907c478bdstevel@tonic-gate		}
12917c478bdstevel@tonic-gate	}
12937c478bdstevel@tonic-gate	/* enqueue thread in the list of waiting threads */
12947c478bdstevel@tonic-gate	pgetp = port_queue_thread(portq, *nget);
12977c478bdstevel@tonic-gate	/* Wait here until return conditions met */
12987c478bdstevel@tonic-gate	for (;;) {
12997c478bdstevel@tonic-gate		if (pgetp->portget_state & PORTGET_ALERT) {
13007c478bdstevel@tonic-gate			/* reap alert event and return */
13017c478bdstevel@tonic-gate			error = port_get_alert(&pgetp->portget_alert, uevp);
13027c478bdstevel@tonic-gate			if (error)
13037c478bdstevel@tonic-gate				*nget = 0;
13047c478bdstevel@tonic-gate			else
13057c478bdstevel@tonic-gate				*nget = 1;
13067c478bdstevel@tonic-gate			port_dequeue_thread(&pp->port_queue, pgetp);
13077c478bdstevel@tonic-gate			portq->portq_thrcnt--;
13087c478bdstevel@tonic-gate			mutex_exit(&portq->portq_mutex);
13097c478bdstevel@tonic-gate			return (error);
13107c478bdstevel@tonic-gate		}
13127c478bdstevel@tonic-gate		/*
13137c478bdstevel@tonic-gate		 * Check if some other thread is already retrieving
13147c478bdstevel@tonic-gate		 * events (portq_getn > 0).
13157c478bdstevel@tonic-gate		 */
13177c478bdstevel@tonic-gate		if ((portq->portq_getn  == 0) &&
13187c478bdstevel@tonic-gate		    ((portq)->portq_nent >= *nget) &&
13197c478bdstevel@tonic-gate		    (!((pgt)->pgt_flags & PORTGET_WAIT_EVENTS) ||
13207c478bdstevel@tonic-gate		    !((portq)->portq_flags & PORTQ_WAIT_EVENTS)))
13217c478bdstevel@tonic-gate			break;
13237c478bdstevel@tonic-gate		if (portq->portq_flags & PORTQ_CLOSE) {
13247c478bdstevel@tonic-gate			error = EBADFD;
13257c478bdstevel@tonic-gate			break;
13267c478bdstevel@tonic-gate		}
13287c478bdstevel@tonic-gate		rval = cv_waituntil_sig(&pgetp->portget_cv, &portq->portq_mutex,
13293348528dm		    rqtp, timecheck);
13317c478bdstevel@tonic-gate		if (rval <= 0) {
13327c478bdstevel@tonic-gate			error = (rval == 0) ? EINTR : ETIME;
13337c478bdstevel@tonic-gate			break;
13347c478bdstevel@tonic-gate		}
13357c478bdstevel@tonic-gate	}
13377c478bdstevel@tonic-gate	/* take thread out of the wait queue */
13387c478bdstevel@tonic-gate	port_dequeue_thread(portq, pgetp);
13407c478bdstevel@tonic-gate	if (error != 0 && (error == EINTR || error == EBADFD ||
13417c478bdstevel@tonic-gate	    (error == ETIME && flag))) {
13427c478bdstevel@tonic-gate		/* return without events */
13437c478bdstevel@tonic-gate		port_check_return_cond(portq);
13447c478bdstevel@tonic-gate		mutex_exit(&portq->portq_mutex);
13457c478bdstevel@tonic-gate		return (error);
13467c478bdstevel@tonic-gate	}
13497c478bdstevel@tonic-gate	/*
13507c478bdstevel@tonic-gate	 * Move port event queue to a temporary event queue .
13517c478bdstevel@tonic-gate	 * New incoming events will be continue be posted to the event queue
13527c478bdstevel@tonic-gate	 * and they will not be considered by the current thread.
13537c478bdstevel@tonic-gate	 * The idea is to avoid lock contentions or an often locking/unlocking
13547c478bdstevel@tonic-gate	 * of the port queue mutex. The contention and performance degradation
13557c478bdstevel@tonic-gate	 * could happen because:
13567c478bdstevel@tonic-gate	 * a) incoming events use the port queue mutex to enqueue new events and
13577c478bdstevel@tonic-gate	 * b) before the event can be delivered to the application it is
13587c478bdstevel@tonic-gate	 *    necessary to notify the event sources about the event delivery.
13597c478bdstevel@tonic-gate	 *    Sometimes the event sources can require a long time to return and
13607c478bdstevel@tonic-gate	 *    the queue mutex would block incoming events.
13617c478bdstevel@tonic-gate	 * During this time incoming events (port_send_event()) do not need
13627c478bdstevel@tonic-gate	 * to awake threads waiting for events. Before the current thread
13637c478bdstevel@tonic-gate	 * returns it will check the conditions to awake other waiting threads.
13647c478bdstevel@tonic-gate	 */
13657c478bdstevel@tonic-gate	portq->portq_getn++;	/* number of threads retrieving events */
13663470957raf	port_block(portq);	/* block other threads here */
13673470957raf	nmax = max < portq->portq_nent ? max : portq->portq_nent;
13697c478bdstevel@tonic-gate	if (portq->portq_tnent) {
13707c478bdstevel@tonic-gate		/*
13717c478bdstevel@tonic-gate		 * Move remaining events from previous thread back to the
13727c478bdstevel@tonic-gate		 * port event queue.
13737c478bdstevel@tonic-gate		 */
13747c478bdstevel@tonic-gate		port_push_eventq(portq);
13757c478bdstevel@tonic-gate	}
13767c478bdstevel@tonic-gate	/* move port event queue to a temporary queue */
13777c478bdstevel@tonic-gate	list_move_tail(&portq->portq_get_list, &portq->portq_list);
13787c478bdstevel@tonic-gate	glist = &portq->portq_get_list;	/* use temporary event queue */
13797c478bdstevel@tonic-gate	tnent = portq->portq_nent;	/* get current number of events */
13807c478bdstevel@tonic-gate	portq->portq_nent = 0;		/* no events in the port event queue */
13817c478bdstevel@tonic-gate	portq->portq_flags |= PORTQ_WAIT_EVENTS; /* detect incoming events */
13827c478bdstevel@tonic-gate	mutex_exit(&portq->portq_mutex);    /* event queue can be reused now */
13847c478bdstevel@tonic-gate	if (model == DATAMODEL_NATIVE) {
13857c478bdstevel@tonic-gate		eventsz = sizeof (port_event_t);
1386d158018Bryan Cantrill
1387d158018Bryan Cantrill		if (nmax == 0) {
1388d158018Bryan Cantrill			kevp = NULL;
1389d158018Bryan Cantrill		} else {
1390d158018Bryan Cantrill			kevp = kmem_alloc(eventsz * nmax, KM_NOSLEEP);
1391d158018Bryan Cantrill			if (kevp == NULL) {
1392d158018Bryan Cantrill				if (nmax > pp->port_max_list)
1393d158018Bryan Cantrill					nmax = pp->port_max_list;
1394d158018Bryan Cantrill				kevp = kmem_alloc(eventsz * nmax, KM_SLEEP);
1395d158018Bryan Cantrill			}
13967c478bdstevel@tonic-gate		}
1397d158018Bryan Cantrill
13987c478bdstevel@tonic-gate		results = kevp;
13997c478bdstevel@tonic-gate		lev = NULL;	/* start with first event in the queue */
14007c478bdstevel@tonic-gate		for (nevents = 0; nevents < nmax; ) {
14017c478bdstevel@tonic-gate			pev = port_get_kevent(glist, lev);
14027c478bdstevel@tonic-gate			if (pev == NULL)	/* no more events available */
14037c478bdstevel@tonic-gate				break;
14047c478bdstevel@tonic-gate			if (pev->portkev_flags & PORT_KEV_FREE) {
14057c478bdstevel@tonic-gate				/* Just discard event */
14067c478bdstevel@tonic-gate				list_remove(glist, pev);
14077c478bdstevel@tonic-gate				pev->portkev_flags &= ~(PORT_CLEANUP_DONE);
14087c478bdstevel@tonic-gate				if (PORT_FREE_EVENT(pev))
14097c478bdstevel@tonic-gate					port_free_event_local(pev, 0);
14107c478bdstevel@tonic-gate				tnent--;
14117c478bdstevel@tonic-gate				continue;
14127c478bdstevel@tonic-gate			}
14147c478bdstevel@tonic-gate			/* move event data to copyout list */
14157c478bdstevel@tonic-gate			if (port_copy_event(&kevp[nevents], pev, glist)) {
14167c478bdstevel@tonic-gate				/*
14177c478bdstevel@tonic-gate				 * Event can not be delivered to the
14187c478bdstevel@tonic-gate				 * current process.
14197c478bdstevel@tonic-gate				 */
14207c478bdstevel@tonic-gate				if (lev != NULL)
14217c478bdstevel@tonic-gate					list_insert_after(glist, lev, pev);
14227c478bdstevel@tonic-gate				else
14237c478bdstevel@tonic-gate					list_insert_head(glist, pev);
14247c478bdstevel@tonic-gate				lev = pev;  /* last checked event */
14257c478bdstevel@tonic-gate			} else {
14267c478bdstevel@tonic-gate				nevents++;	/* # of events ready */
14277c478bdstevel@tonic-gate			}
14287c478bdstevel@tonic-gate		}
14297c478bdstevel@tonic-gate#ifdef	_SYSCALL32_IMPL
14307c478bdstevel@tonic-gate	} else {
14317c478bdstevel@tonic-gate		port_event32_t	*kevp32;
14337c478bdstevel@tonic-gate		eventsz = sizeof (port_event32_t);
1434d158018Bryan Cantrill
1435d158018Bryan Cantrill		if (nmax == 0) {
1436d158018Bryan Cantrill			kevp32 = NULL;
1437d158018Bryan Cantrill		} else {
1438d158018Bryan Cantrill			kevp32 = kmem_alloc(eventsz * nmax, KM_NOSLEEP);
1439d158018Bryan Cantrill			if (kevp32 == NULL) {
1440d158018Bryan Cantrill				if (nmax > pp->port_max_list)
1441d158018Bryan Cantrill					nmax = pp->port_max_list;
1442d158018Bryan Cantrill				kevp32 = kmem_alloc(eventsz * nmax, KM_SLEEP);
1443d158018Bryan Cantrill			}
14447c478bdstevel@tonic-gate		}
1445d158018Bryan Cantrill
14467c478bdstevel@tonic-gate		results = kevp32;
14477c478bdstevel@tonic-gate		lev = NULL;	/* start with first event in the queue */
14487c478bdstevel@tonic-gate		for (nevents = 0; nevents < nmax; ) {
14497c478bdstevel@tonic-gate			pev = port_get_kevent(glist, lev);
14507c478bdstevel@tonic-gate			if (pev == NULL)	/* no more events available */
14517c478bdstevel@tonic-gate				break;
14527c478bdstevel@tonic-gate			if (pev->portkev_flags & PORT_KEV_FREE) {
14537c478bdstevel@tonic-gate				/* Just discard event */
14547c478bdstevel@tonic-gate				list_remove(glist, pev);
14557c478bdstevel@tonic-gate				pev->portkev_flags &= ~(PORT_CLEANUP_DONE);
14567c478bdstevel@tonic-gate				if (PORT_FREE_EVENT(pev))
14577c478bdstevel@tonic-gate					port_free_event_local(pev, 0);
14587c478bdstevel@tonic-gate				tnent--;
14597c478bdstevel@tonic-gate				continue;
14607c478bdstevel@tonic-gate			}
14627c478bdstevel@tonic-gate			/* move event data to copyout list */
14637c478bdstevel@tonic-gate			if (port_copy_event32(&kevp32[nevents], pev, glist)) {
14647c478bdstevel@tonic-gate				/*
14657c478bdstevel@tonic-gate				 * Event can not be delivered to the
14667c478bdstevel@tonic-gate				 * current process.
14677c478bdstevel@tonic-gate				 */
14687c478bdstevel@tonic-gate				if (lev != NULL)
14697c478bdstevel@tonic-gate					list_insert_after(glist, lev, pev);
14707c478bdstevel@tonic-gate				else
14717c478bdstevel@tonic-gate					list_insert_head(glist, pev);
14727c478bdstevel@tonic-gate				lev = pev;  /* last checked event */
14737c478bdstevel@tonic-gate			} else {
14747c478bdstevel@tonic-gate				nevents++;	/* # of events ready */
14757c478bdstevel@tonic-gate			}
14767c478bdstevel@tonic-gate		}
14777c478bdstevel@tonic-gate#endif	/* _SYSCALL32_IMPL */
14787c478bdstevel@tonic-gate	}
14807c478bdstevel@tonic-gate	/*
14817c478bdstevel@tonic-gate	 *  Remember number of remaining events in the temporary event queue.
14827c478bdstevel@tonic-gate	 */
14837c478bdstevel@tonic-gate	portq->portq_tnent = tnent - nevents;
14857c478bdstevel@tonic-gate	/*
14867c478bdstevel@tonic-gate	 * Work to do before return :
14877c478bdstevel@tonic-gate	 * - push list of remaining events back to the top of the standard
14887c478bdstevel@tonic-gate	 *   port queue.
14897c478bdstevel@tonic-gate	 * - if this is the last thread calling port_get(n) then wakeup the
14907c478bdstevel@tonic-gate	 *   thread waiting on close(2).
14917c478bdstevel@tonic-gate	 * - check for a deferred cv_signal from port_send_event() and wakeup
14927c478bdstevel@tonic-gate	 *   the sleeping thread.
14937c478bdstevel@tonic-gate	 */
14957c478bdstevel@tonic-gate	mutex_enter(&portq->portq_mutex);
14963470957raf	port_unblock(portq);
14977c478bdstevel@tonic-gate	if (portq->portq_tnent) {
14987c478bdstevel@tonic-gate		/*
14997c478bdstevel@tonic-gate		 * move remaining events in the temporary event queue back
15007c478bdstevel@tonic-gate		 * to the port event queue
15017c478bdstevel@tonic-gate		 */
15027c478bdstevel@tonic-gate		port_push_eventq(portq);
15037c478bdstevel@tonic-gate	}
15047c478bdstevel@tonic-gate	portq->portq_getn--;	/* update # of threads retrieving events */
15057c478bdstevel@tonic-gate	if (--portq->portq_thrcnt == 0) { /* # of threads waiting ... */
15067c478bdstevel@tonic-gate		/* Last thread => check close(2) conditions ... */
15077c478bdstevel@tonic-gate		if (portq->portq_flags & PORTQ_CLOSE) {
15087c478bdstevel@tonic-gate			cv_signal(&portq->portq_closecv);
15097c478bdstevel@tonic-gate			mutex_exit(&portq->portq_mutex);
15107c478bdstevel@tonic-gate			kmem_free(results, eventsz * nmax);
15117c478bdstevel@tonic-gate			/* do not copyout events */
15127c478bdstevel@tonic-gate			*nget = 0;
15137c478bdstevel@tonic-gate			return (EBADFD);
15147c478bdstevel@tonic-gate		}
15157c478bdstevel@tonic-gate	} else if (portq->portq_getn == 0) {
15167c478bdstevel@tonic-gate		/*
15177c478bdstevel@tonic-gate		 * no other threads retrieving events ...
15187c478bdstevel@tonic-gate		 * check wakeup conditions of sleeping threads
15197c478bdstevel@tonic-gate		 */
15207c478bdstevel@tonic-gate		if ((portq->portq_thread != NULL) &&
15217c478bdstevel@tonic-gate		    (portq->portq_nent >= portq->portq_nget))
15227c478bdstevel@tonic-gate			cv_signal(&portq->portq_thread->portget_cv);
15237c478bdstevel@tonic-gate	}
15257c478bdstevel@tonic-gate	/*
15267c478bdstevel@tonic-gate	 * Check PORTQ_POLLIN here because the current thread set temporarily
15277c478bdstevel@tonic-gate	 * the number of events in the queue to zero.
15287c478bdstevel@tonic-gate	 */
15297c478bdstevel@tonic-gate	if (portq->portq_flags & PORTQ_POLLIN) {
15307c478bdstevel@tonic-gate		portq->portq_flags &= ~PORTQ_POLLIN;
15317c478bdstevel@tonic-gate		mutex_exit(&portq->portq_mutex);
15327c478bdstevel@tonic-gate		pollwakeup(&pp->port_pollhd, POLLIN);
15337c478bdstevel@tonic-gate	} else {
15347c478bdstevel@tonic-gate		mutex_exit(&portq->portq_mutex);
15357c478bdstevel@tonic-gate	}
15377c478bdstevel@tonic-gate	/* now copyout list of user event structures to user space */
15387c478bdstevel@tonic-gate	if (nevents) {
15397c478bdstevel@tonic-gate		if (copyout(results, uevp, nevents * eventsz))
15407c478bdstevel@tonic-gate			error = EFAULT;
15417c478bdstevel@tonic-gate	}
15427c478bdstevel@tonic-gate	kmem_free(results, eventsz * nmax);
15447c478bdstevel@tonic-gate	if (nevents == 0 && error == 0 && pgt->pgt_loop == 0 && blocking != 0) {
15457c478bdstevel@tonic-gate		/* no events retrieved: check loop conditions */
15467c478bdstevel@tonic-gate		if (blocking == -1) {
15477c478bdstevel@tonic-gate			/* no timeout checked */
15487c478bdstevel@tonic-gate			error = port_get_timeout(pgt->pgt_timeout,
15497c478bdstevel@tonic-gate			    &pgt->pgt_rqtime, &rqtp, &blocking, flag);
15507c478bdstevel@tonic-gate			if (error) {
15517c478bdstevel@tonic-gate				*nget = nevents;
15527c478bdstevel@tonic-gate				return (error);
15537c478bdstevel@tonic-gate			}
15547c478bdstevel@tonic-gate			if (rqtp != NULL) {
15557c478bdstevel@tonic-gate				timespec_t	now;
15563348528dm				pgt->pgt_timecheck = timechanged;
15577c478bdstevel@tonic-gate				gethrestime(&now);
15587c478bdstevel@tonic-gate				timespecadd(&pgt->pgt_rqtime, &now);
15597c478bdstevel@tonic-gate			}
15607c478bdstevel@tonic-gate			pgt->pgt_rqtp = rqtp;
15617c478bdstevel@tonic-gate		} else {
15627c478bdstevel@tonic-gate			/* timeout already checked -> remember values */
15637c478bdstevel@tonic-gate			pgt->pgt_rqtp = rqtp;
1564f7ccf9bpraks			if (rqtp != NULL) {
15653348528dm				pgt->pgt_timecheck = timecheck;
1566f7ccf9bpraks				pgt->pgt_rqtime = *rqtp;
1567f7ccf9bpraks			}
15687c478bdstevel@tonic-gate		}
15697c478bdstevel@tonic-gate		if (blocking)
15707c478bdstevel@tonic-gate			/* timeout remaining */
15717c478bdstevel@tonic-gate			pgt->pgt_loop = 1;
15727c478bdstevel@tonic-gate	}
15747c478bdstevel@tonic-gate	/* set number of user event structures completed */
15757c478bdstevel@tonic-gate	*nget = nevents;
15767c478bdstevel@tonic-gate	return (error);
15807c478bdstevel@tonic-gate * 1. copy kernel event structure to user event structure.
15817c478bdstevel@tonic-gate * 2. PORT_KEV_WIRED event structures will be reused by the "source"
15827c478bdstevel@tonic-gate * 3. Remove PORT_KEV_DONEQ flag (event removed from the event queue)
15837c478bdstevel@tonic-gate * 4. Other types of event structures can be delivered back to the port cache
15847c478bdstevel@tonic-gate *    (port_free_event_local()).
15857c478bdstevel@tonic-gate * 5. The event source callback function is the last opportunity for the
15867c478bdstevel@tonic-gate *    event source to update events, to free local resources associated with
15877c478bdstevel@tonic-gate *    the event or to deny the delivery of the event.
15887c478bdstevel@tonic-gate */
15897c478bdstevel@tonic-gatestatic int
15907c478bdstevel@tonic-gateport_copy_event(port_event_t *puevp, port_kevent_t *pkevp, list_t *list)
15927c478bdstevel@tonic-gate	int	free_event = 0;
15937c478bdstevel@tonic-gate	int	flags;
15947c478bdstevel@tonic-gate	int	error;
15967c478bdstevel@tonic-gate	puevp->portev_source = pkevp->portkev_source;
15977c478bdstevel@tonic-gate	puevp->portev_object = pkevp->portkev_object;
15987c478bdstevel@tonic-gate	puevp->portev_user = pkevp->portkev_user;
15997c478bdstevel@tonic-gate	puevp->portev_events = pkevp->portkev_events;
16017c478bdstevel@tonic-gate	/* remove event from the queue */
16027c478bdstevel@tonic-gate	list_remove(list, pkevp);
16047c478bdstevel@tonic-gate	/*
16057c478bdstevel@tonic-gate	 * Events of type PORT_KEV_WIRED remain allocated by the
16067c478bdstevel@tonic-gate	 * event source.
16077c478bdstevel@tonic-gate	 */
16087c478bdstevel@tonic-gate	flags = pkevp->portkev_flags;
16097c478bdstevel@tonic-gate	if (pkevp->portkev_flags & PORT_KEV_WIRED)
16107c478bdstevel@tonic-gate		pkevp->portkev_flags &= ~PORT_KEV_DONEQ;
16117c478bdstevel@tonic-gate	else
16127c478bdstevel@tonic-gate		free_event = 1;
16147c478bdstevel@tonic-gate	if (pkevp->portkev_callback) {
16157c478bdstevel@tonic-gate		error = (*pkevp->portkev_callback)(pkevp->portkev_arg,
16167c478bdstevel@tonic-gate		    &puevp->portev_events, pkevp->portkev_pid,
16177c478bdstevel@tonic-gate		    PORT_CALLBACK_DEFAULT, pkevp);
16197c478bdstevel@tonic-gate		if (error) {
16207c478bdstevel@tonic-gate			/*
16217c478bdstevel@tonic-gate			 * Event can not be delivered.
16227c478bdstevel@tonic-gate			 * Caller must reinsert the event into the queue.
16237c478bdstevel@tonic-gate			 */
16247c478bdstevel@tonic-gate			pkevp->portkev_flags = flags;
16257c478bdstevel@tonic-gate			return (error);
16267c478bdstevel@tonic-gate		}
16277c478bdstevel@tonic-gate	}
16287c478bdstevel@tonic-gate	if (free_event)
16297c478bdstevel@tonic-gate		port_free_event_local(pkevp, 0);
16307c478bdstevel@tonic-gate	return (0);
16337c478bdstevel@tonic-gate#ifdef	_SYSCALL32_IMPL
16357c478bdstevel@tonic-gate * 1. copy kernel event structure to user event structure.
16367c478bdstevel@tonic-gate * 2. PORT_KEV_WIRED event structures will be reused by the "source"
16377c478bdstevel@tonic-gate * 3. Remove PORT_KEV_DONEQ flag (event removed from the event queue)
16387c478bdstevel@tonic-gate * 4. Other types of event structures can be delivered back to the port cache
16397c478bdstevel@tonic-gate *    (port_free_event_local()).
16407c478bdstevel@tonic-gate * 5. The event source callback function is the last opportunity for the
16417c478bdstevel@tonic-gate *    event source to update events, to free local resources associated with
16427c478bdstevel@tonic-gate *    the event or to deny the delivery of the event.
16437c478bdstevel@tonic-gate */
16447c478bdstevel@tonic-gatestatic int
16457c478bdstevel@tonic-gateport_copy_event32(port_event32_t *puevp, port_kevent_t *pkevp, list_t *list)
16477c478bdstevel@tonic-gate	int	free_event = 0;
16487c478bdstevel@tonic-gate	int	error;
16497c478bdstevel@tonic-gate	int	flags;
16517c478bdstevel@tonic-gate	puevp->portev_source = pkevp->portkev_source;
16527c478bdstevel@tonic-gate	puevp->portev_object = (daddr32_t)pkevp->portkev_object;
16537c478bdstevel@tonic-gate	puevp->portev_user = (caddr32_t)(uintptr_t)pkevp->portkev_user;
16547c478bdstevel@tonic-gate	puevp->portev_events = pkevp->portkev_events;
16567c478bdstevel@tonic-gate	/* remove event from the queue */
16577c478bdstevel@tonic-gate	list_remove(list, pkevp);
16597c478bdstevel@tonic-gate	/*
16607c478bdstevel@tonic-gate	 * Events if type PORT_KEV_WIRED remain allocated by the
16617c478bdstevel@tonic-gate	 * sub-system (source).
16627c478bdstevel@tonic-gate	 */
16647c478bdstevel@tonic-gate	flags = pkevp->portkev_flags;
16657c478bdstevel@tonic-gate	if (pkevp->portkev_flags & PORT_KEV_WIRED)
16667c478bdstevel@tonic-gate		pkevp->portkev_flags &= ~PORT_KEV_DONEQ;
16677c478bdstevel@tonic-gate	else
16687c478bdstevel@tonic-gate		free_event = 1;
16707c478bdstevel@tonic-gate	if (pkevp->portkev_callback != NULL) {
16717c478bdstevel@tonic-gate		error = (*pkevp->portkev_callback)(pkevp->portkev_arg,
16727c478bdstevel@tonic-gate		    &puevp->portev_events, pkevp->portkev_pid,
16737c478bdstevel@tonic-gate		    PORT_CALLBACK_DEFAULT, pkevp);
16747c478bdstevel@tonic-gate		if (error) {
16757c478bdstevel@tonic-gate			/*
16767c478bdstevel@tonic-gate			 * Event can not be delivered.
16777c478bdstevel@tonic-gate			 * Caller must reinsert the event into the queue.
16787c478bdstevel@tonic-gate			 */
16797c478bdstevel@tonic-gate			pkevp->portkev_flags = flags;
16807c478bdstevel@tonic-gate			return (error);
16817c478bdstevel@tonic-gate		}
16827c478bdstevel@tonic-gate	}
16837c478bdstevel@tonic-gate	if (free_event)
16847c478bdstevel@tonic-gate		port_free_event_local(pkevp, 0);
16857c478bdstevel@tonic-gate	return (0);
16877c478bdstevel@tonic-gate#endif	/* _SYSCALL32_IMPL */
16907c478bdstevel@tonic-gate * copyout alert event.
16917c478bdstevel@tonic-gate */
16927c478bdstevel@tonic-gatestatic int
16937c478bdstevel@tonic-gateport_get_alert(port_alert_t *pa, port_event_t *uevp)
16957c478bdstevel@tonic-gate	model_t	model = get_udatamodel();
16977c478bdstevel@tonic-gate	/* copyout alert event structures to user space */
16987c478bdstevel@tonic-gate	if (model == DATAMODEL_NATIVE) {
16997c478bdstevel@tonic-gate		port_event_t	uev;
17007c478bdstevel@tonic-gate		uev.portev_source = PORT_SOURCE_ALERT;
17017c478bdstevel@tonic-gate		uev.portev_object = pa->portal_object;
17027c478bdstevel@tonic-gate		uev.portev_events = pa->portal_events;
17037c478bdstevel@tonic-gate		uev.portev_user = pa->portal_user;
17047c478bdstevel@tonic-gate		if (copyout(&uev, uevp, sizeof (port_event_t)))
17057c478bdstevel@tonic-gate			return (EFAULT);
17067c478bdstevel@tonic-gate#ifdef	_SYSCALL32_IMPL
17077c478bdstevel@tonic-gate	} else {
17087c478bdstevel@tonic-gate		port_event32_t	uev32;
17097c478bdstevel@tonic-gate		uev32.portev_source = PORT_SOURCE_ALERT;
17107c478bdstevel@tonic-gate		uev32.portev_object = (daddr32_t)pa->portal_object;
17117c478bdstevel@tonic-gate		uev32.portev_events = pa->portal_events;
17127c478bdstevel@tonic-gate		uev32.portev_user = (daddr32_t)(uintptr_t)pa->portal_user;
17137c478bdstevel@tonic-gate		if (copyout(&uev32, uevp, sizeof (port_event32_t)))
17147c478bdstevel@tonic-gate			return (EFAULT);
17157c478bdstevel@tonic-gate#endif	/* _SYSCALL32_IMPL */
17167c478bdstevel@tonic-gate	}
17177c478bdstevel@tonic-gate	return (0);
17217c478bdstevel@tonic-gate * Check return conditions :
17227c478bdstevel@tonic-gate * - pending port close(2)
17237c478bdstevel@tonic-gate * - threads waiting for events
17247c478bdstevel@tonic-gate */
17257c478bdstevel@tonic-gatestatic void
17267c478bdstevel@tonic-gateport_check_return_cond(port_queue_t *portq)
17287c478bdstevel@tonic-gate	ASSERT(MUTEX_HELD(&portq->portq_mutex));
17297c478bdstevel@tonic-gate	portq->portq_thrcnt--;
17307c478bdstevel@tonic-gate	if (portq->portq_flags & PORTQ_CLOSE) {
17317c478bdstevel@tonic-gate		if (portq->portq_thrcnt == 0)
17327c478bdstevel@tonic-gate			cv_signal(&portq->portq_closecv);
17337c478bdstevel@tonic-gate		else
17347c478bdstevel@tonic-gate			cv_signal(&portq->portq_thread->portget_cv);
17357c478bdstevel@tonic-gate	}
17397c478bdstevel@tonic-gate * The port_get_kevent() function returns
17407c478bdstevel@tonic-gate * - the event located at the head of the queue if 'last' pointer is NULL
17417c478bdstevel@tonic-gate * - the next event after the event pointed by 'last'
17427c478bdstevel@tonic-gate * The caller of this function is responsible for the integrity of the queue
17437c478bdstevel@tonic-gate * in use:
17443470957raf * - port_getn() is using a temporary queue protected with port_block().
17453470957raf * - port_close_events() is working on the global event queue and protects
17463470957raf *   the queue with portq->portq_mutex.
17477c478bdstevel@tonic-gate */
17487c478bdstevel@tonic-gateport_kevent_t *
17497c478bdstevel@tonic-gateport_get_kevent(list_t *list, port_kevent_t *last)
17517c478bdstevel@tonic-gate	if (last == NULL)
17527c478bdstevel@tonic-gate		return (list_head(list));
17537c478bdstevel@tonic-gate	else
17547c478bdstevel@tonic-gate		return (list_next(list, last));
17587c478bdstevel@tonic-gate * The port_get_timeout() function gets the timeout data from user space
17597c478bdstevel@tonic-gate * and converts that info into a corresponding internal representation.
17607c478bdstevel@tonic-gate * The kerneldata flag means that the timeout data is already loaded.
17617c478bdstevel@tonic-gate */
17627c478bdstevel@tonic-gatestatic int
17637c478bdstevel@tonic-gateport_get_timeout(timespec_t *timeout, timespec_t *rqtime, timespec_t **rqtp,
17647c478bdstevel@tonic-gate    int *blocking, int kerneldata)
17667c478bdstevel@tonic-gate	model_t	model = get_udatamodel();
17687c478bdstevel@tonic-gate	*rqtp = NULL;
17697c478bdstevel@tonic-gate	if (timeout == NULL) {
17707c478bdstevel@tonic-gate		*blocking = 1;
17717c478bdstevel@tonic-gate		return (0);
17727c478bdstevel@tonic-gate	}
17747c478bdstevel@tonic-gate	if (kerneldata) {
17757c478bdstevel@tonic-gate		*rqtime = *timeout;
17767c478bdstevel@tonic-gate	} else {
17777c478bdstevel@tonic-gate		if (model == DATAMODEL_NATIVE) {
17787c478bdstevel@tonic-gate			if (copyin(timeout, rqtime, sizeof (*rqtime)))
17797c478bdstevel@tonic-gate				return (EFAULT);
17807c478bdstevel@tonic-gate#ifdef	_SYSCALL32_IMPL
17817c478bdstevel@tonic-gate		} else {
178243bd900Toomas Soome			timespec32_t	wait_time_32;
17837c478bdstevel@tonic-gate			if (copyin(timeout, &wait_time_32,
17847c478bdstevel@tonic-gate			    sizeof (wait_time_32)))
17857c478bdstevel@tonic-gate				return (EFAULT);
17867c478bdstevel@tonic-gate			TIMESPEC32_TO_TIMESPEC(rqtime, &wait_time_32);
17877c478bdstevel@tonic-gate#endif  /* _SYSCALL32_IMPL */
17887c478bdstevel@tonic-gate		}
17897c478bdstevel@tonic-gate	}
17917c478bdstevel@tonic-gate	if (rqtime->tv_sec == 0 && rqtime->tv_nsec == 0) {
17927c478bdstevel@tonic-gate		*blocking = 0;
17937c478bdstevel@tonic-gate		return (0);
17947c478bdstevel@tonic-gate	}
17967c478bdstevel@tonic-gate	if (rqtime->tv_sec < 0 ||
17977c478bdstevel@tonic-gate	    rqtime->tv_nsec < 0 || rqtime->tv_nsec >= NANOSEC)
17987c478bdstevel@tonic-gate		return (EINVAL);
18007c478bdstevel@tonic-gate	*rqtp = rqtime;
18017c478bdstevel@tonic-gate	*blocking = 1;
18027c478bdstevel@tonic-gate	return (0);
18067c478bdstevel@tonic-gate * port_queue_thread()
18077c478bdstevel@tonic-gate * Threads requiring more events than available will be put in a wait queue.
18087c478bdstevel@tonic-gate * There is a "thread wait queue" per port.
18097c478bdstevel@tonic-gate * Threads requiring less events get a higher priority than others and they
18107c478bdstevel@tonic-gate * will be awoken first.
18117c478bdstevel@tonic-gate */
18127c478bdstevel@tonic-gatestatic portget_t *
18137c478bdstevel@tonic-gateport_queue_thread(port_queue_t *portq, uint_t nget)
18157c478bdstevel@tonic-gate	portget_t	*pgetp;
18167c478bdstevel@tonic-gate	portget_t	*ttp;
18177c478bdstevel@tonic-gate	portget_t	*htp;
18197c478bdstevel@tonic-gate	pgetp = kmem_zalloc(sizeof (portget_t), KM_SLEEP);
18207c478bdstevel@tonic-gate	pgetp->portget_nget = nget;
18217c478bdstevel@tonic-gate	pgetp->portget_pid = curproc->p_pid;
18227c478bdstevel@tonic-gate	if (portq->portq_thread == NULL) {
18237c478bdstevel@tonic-gate		/* first waiting thread */
18247c478bdstevel@tonic-gate		portq->portq_thread = pgetp;
18257c478bdstevel@tonic-gate		portq->portq_nget = nget;
18267c478bdstevel@tonic-gate		pgetp->portget_prev = pgetp;
18277c478bdstevel@tonic-gate		pgetp->portget_next = pgetp;
18287c478bdstevel@tonic-gate		return (pgetp);
18297c478bdstevel@tonic-gate	}
18317c478bdstevel@tonic-gate	/*
18327c478bdstevel@tonic-gate	 * thread waiting for less events will be set on top of the queue.
18337c478bdstevel@tonic-gate	 */
18347c478bdstevel@tonic-gate	ttp = portq->portq_thread;
18357c478bdstevel@tonic-gate	htp = ttp;
18367c478bdstevel@tonic-gate	for (;;) {
18377c478bdstevel@tonic-gate		if (nget <= ttp->portget_nget)
18387c478bdstevel@tonic-gate			break;
18397c478bdstevel@tonic-gate		if (htp == ttp->portget_next)
18407c478bdstevel@tonic-gate			break;	/* last event */
18417c478bdstevel@tonic-gate		ttp = ttp->portget_next;
18427c478bdstevel@tonic-gate	}
18447c478bdstevel@tonic-gate	/* add thread to the queue */
18457c478bdstevel@tonic-gate	pgetp->portget_next = ttp;
18467c478bdstevel@tonic-gate	pgetp->portget_prev = ttp->portget_prev;
18477c478bdstevel@tonic-gate	ttp->portget_prev->portget_next = pgetp;
18487c478bdstevel@tonic-gate	ttp->portget_prev = pgetp;
18497c478bdstevel@tonic-gate	if (portq->portq_thread == ttp)
18507c478bdstevel@tonic-gate		portq->portq_thread = pgetp;
18517c478bdstevel@tonic-gate	portq->portq_nget = portq->portq_thread->portget_nget;
18527c478bdstevel@tonic-gate	return (pgetp);
18567c478bdstevel@tonic-gate * Take thread out of the queue.
18577c478bdstevel@tonic-gate */
18587c478bdstevel@tonic-gatestatic void
18597c478bdstevel@tonic-gateport_dequeue_thread(port_queue_t *portq, portget_t *pgetp)
18617c478bdstevel@tonic-gate	if (pgetp->portget_next == pgetp) {
18627c478bdstevel@tonic-gate		/* last (single) waiting thread */
18637c478bdstevel@tonic-gate		portq->portq_thread = NULL;
18643470957raf		portq->portq_nget = 0;
18657c478bdstevel@tonic-gate	} else {
18667c478bdstevel@tonic-gate		pgetp->portget_prev->portget_next = pgetp->portget_next;
18677c478bdstevel@tonic-gate		pgetp->portget_next->portget_prev = pgetp->portget_prev;
18687c478bdstevel@tonic-gate		if (portq->portq_thread == pgetp)
18697c478bdstevel@tonic-gate			portq->portq_thread = pgetp->portget_next;
18707c478bdstevel@tonic-gate		portq->portq_nget = portq->portq_thread->portget_nget;
18717c478bdstevel@tonic-gate	}
18727c478bdstevel@tonic-gate	kmem_free(pgetp, sizeof (portget_t));
18767c478bdstevel@tonic-gate * Set up event port kstats.
18777c478bdstevel@tonic-gate */
18787c478bdstevel@tonic-gatestatic void
18817c478bdstevel@tonic-gate	kstat_t	*ksp;
18827c478bdstevel@tonic-gate	uint_t	ndata;
18847c478bdstevel@tonic-gate	ndata = sizeof (port_kstat) / sizeof (kstat_named_t);
18857c478bdstevel@tonic-gate	ksp = kstat_create("portfs", 0, "Event Ports", "misc",
18867c478bdstevel@tonic-gate	    KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_VIRTUAL);
18877c478bdstevel@tonic-gate	if (ksp) {
18887c478bdstevel@tonic-gate		ksp->ks_data = &port_kstat;
18897c478bdstevel@tonic-gate		kstat_install(ksp);
18907c478bdstevel@tonic-gate	}