1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * posix_aio.c implements the POSIX async. I/O functions.
29 *
30 * aio_read
31 * aio_write
32 * aio_error
33 * aio_return
34 * aio_suspend
35 * lio_listio
36 * aio_fsync
37 * aio_cancel
38 */
39
40 #include "lint.h"
41 #include "thr_uberdata.h"
42 #include "asyncio.h"
43 #include <atomic.h>
44 #include <sys/file.h>
45 #include <sys/port.h>
46
47 extern int __fdsync(int, int);
48
49 cond_t _aio_waitn_cv = DEFAULTCV; /* wait for end of aio_waitn */
50
51 static int _aio_check_timeout(const timespec_t *, timespec_t *, int *);
52
53 /* defines for timedwait in __aio_waitn() and __aio_suspend() */
54 #define AIO_TIMEOUT_INDEF -1
55 #define AIO_TIMEOUT_POLL 0
56 #define AIO_TIMEOUT_WAIT 1
57 #define AIO_TIMEOUT_UNDEF 2
58
59 /*
60 * List I/O stuff
61 */
62 static void _lio_list_decr(aio_lio_t *);
63 static long aio_list_max = 0;
64
65 int
aio_read(aiocb_t * aiocbp)66 aio_read(aiocb_t *aiocbp)
67 {
68 if (aiocbp == NULL || aiocbp->aio_reqprio != 0) {
69 errno = EINVAL;
70 return (-1);
71 }
72 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
73 errno = EBUSY;
74 return (-1);
75 }
76 if (_aio_sigev_thread(aiocbp) != 0)
77 return (-1);
78 aiocbp->aio_lio_opcode = LIO_READ;
79 return (_aio_rw(aiocbp, NULL, &__nextworker_rw, AIOAREAD,
80 (AIO_KAIO | AIO_NO_DUPS)));
81 }
82
83 int
aio_write(aiocb_t * aiocbp)84 aio_write(aiocb_t *aiocbp)
85 {
86 if (aiocbp == NULL || aiocbp->aio_reqprio != 0) {
87 errno = EINVAL;
88 return (-1);
89 }
90 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
91 errno = EBUSY;
92 return (-1);
93 }
94 if (_aio_sigev_thread(aiocbp) != 0)
95 return (-1);
96 aiocbp->aio_lio_opcode = LIO_WRITE;
97 return (_aio_rw(aiocbp, NULL, &__nextworker_rw, AIOAWRITE,
98 (AIO_KAIO | AIO_NO_DUPS)));
99 }
100
101 /*
102 * __lio_listio() cancellation handler.
103 */
104 /* ARGSUSED */
105 static void
_lio_listio_cleanup(aio_lio_t * head)106 _lio_listio_cleanup(aio_lio_t *head)
107 {
108 int freeit = 0;
109
110 ASSERT(MUTEX_HELD(&head->lio_mutex));
111 if (head->lio_refcnt == 0) {
112 ASSERT(head->lio_nent == 0);
113 freeit = 1;
114 }
115 head->lio_waiting = 0;
116 sig_mutex_unlock(&head->lio_mutex);
117 if (freeit)
118 _aio_lio_free(head);
119 }
120
121 int
lio_listio(int mode,aiocb_t * _RESTRICT_KYWD const * _RESTRICT_KYWD list,int nent,struct sigevent * _RESTRICT_KYWD sigevp)122 lio_listio(int mode, aiocb_t *_RESTRICT_KYWD const *_RESTRICT_KYWD list,
123 int nent, struct sigevent *_RESTRICT_KYWD sigevp)
124 {
125 int aio_ufs = 0;
126 int oerrno = 0;
127 aio_lio_t *head = NULL;
128 aiocb_t *aiocbp;
129 int state = 0;
130 int EIOflg = 0;
131 int rw;
132 int do_kaio = 0;
133 int error;
134 int i;
135
136 if (!_kaio_ok)
137 _kaio_init();
138
139 if (aio_list_max == 0)
140 aio_list_max = sysconf(_SC_AIO_LISTIO_MAX);
141
142 if (nent <= 0 || nent > aio_list_max) {
143 errno = EINVAL;
144 return (-1);
145 }
146
147 switch (mode) {
148 case LIO_WAIT:
149 state = NOCHECK;
150 break;
151 case LIO_NOWAIT:
152 state = CHECK;
153 break;
154 default:
155 errno = EINVAL;
156 return (-1);
157 }
158
159 for (i = 0; i < nent; i++) {
160 if ((aiocbp = list[i]) == NULL)
161 continue;
162 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
163 errno = EBUSY;
164 return (-1);
165 }
166 if (_aio_sigev_thread(aiocbp) != 0)
167 return (-1);
168 if (aiocbp->aio_lio_opcode == LIO_NOP)
169 aiocbp->aio_state = NOCHECK;
170 else {
171 aiocbp->aio_state = state;
172 if (KAIO_SUPPORTED(aiocbp->aio_fildes))
173 do_kaio++;
174 else
175 aiocbp->aio_resultp.aio_errno = ENOTSUP;
176 }
177 }
178 if (_aio_sigev_thread_init(sigevp) != 0)
179 return (-1);
180
181 if (do_kaio) {
182 error = (int)_kaio(AIOLIO, mode, list, nent, sigevp);
183 if (error == 0)
184 return (0);
185 oerrno = errno;
186 } else {
187 oerrno = errno = ENOTSUP;
188 error = -1;
189 }
190
191 if (error == -1 && errno == ENOTSUP) {
192 error = errno = 0;
193 /*
194 * If LIO_WAIT, or notification required, allocate a list head.
195 */
196 if (mode == LIO_WAIT ||
197 (sigevp != NULL &&
198 (sigevp->sigev_notify == SIGEV_SIGNAL ||
199 sigevp->sigev_notify == SIGEV_THREAD ||
200 sigevp->sigev_notify == SIGEV_PORT)))
201 head = _aio_lio_alloc();
202 if (head) {
203 sig_mutex_lock(&head->lio_mutex);
204 head->lio_mode = mode;
205 head->lio_largefile = 0;
206 if (mode == LIO_NOWAIT && sigevp != NULL) {
207 if (sigevp->sigev_notify == SIGEV_THREAD) {
208 head->lio_port = sigevp->sigev_signo;
209 head->lio_event = AIOLIO;
210 head->lio_sigevent = sigevp;
211 head->lio_sigval.sival_ptr =
212 sigevp->sigev_value.sival_ptr;
213 } else if (sigevp->sigev_notify == SIGEV_PORT) {
214 port_notify_t *pn =
215 sigevp->sigev_value.sival_ptr;
216 head->lio_port = pn->portnfy_port;
217 head->lio_event = AIOLIO;
218 head->lio_sigevent = sigevp;
219 head->lio_sigval.sival_ptr =
220 pn->portnfy_user;
221 } else { /* SIGEV_SIGNAL */
222 head->lio_signo = sigevp->sigev_signo;
223 head->lio_sigval.sival_ptr =
224 sigevp->sigev_value.sival_ptr;
225 }
226 }
227 head->lio_nent = head->lio_refcnt = nent;
228 sig_mutex_unlock(&head->lio_mutex);
229 }
230 /*
231 * find UFS requests, errno == ENOTSUP/EBADFD,
232 */
233 for (i = 0; i < nent; i++) {
234 if ((aiocbp = list[i]) == NULL ||
235 aiocbp->aio_lio_opcode == LIO_NOP ||
236 (aiocbp->aio_resultp.aio_errno != ENOTSUP &&
237 aiocbp->aio_resultp.aio_errno != EBADFD)) {
238 if (head)
239 _lio_list_decr(head);
240 continue;
241 }
242 if (aiocbp->aio_resultp.aio_errno == EBADFD)
243 SET_KAIO_NOT_SUPPORTED(aiocbp->aio_fildes);
244 if (aiocbp->aio_reqprio != 0) {
245 aiocbp->aio_resultp.aio_errno = EINVAL;
246 aiocbp->aio_resultp.aio_return = -1;
247 EIOflg = 1;
248 if (head)
249 _lio_list_decr(head);
250 continue;
251 }
252 /*
253 * submit an AIO request with flags AIO_NO_KAIO
254 * to avoid the kaio() syscall in _aio_rw()
255 */
256 switch (aiocbp->aio_lio_opcode) {
257 case LIO_READ:
258 rw = AIOAREAD;
259 break;
260 case LIO_WRITE:
261 rw = AIOAWRITE;
262 break;
263 }
264 error = _aio_rw(aiocbp, head, &__nextworker_rw, rw,
265 (AIO_NO_KAIO | AIO_NO_DUPS));
266 if (error == 0)
267 aio_ufs++;
268 else {
269 if (head)
270 _lio_list_decr(head);
271 aiocbp->aio_resultp.aio_errno = error;
272 EIOflg = 1;
273 }
274 }
275 }
276 if (EIOflg) {
277 errno = EIO;
278 return (-1);
279 }
280 if (mode == LIO_WAIT && oerrno == ENOTSUP) {
281 /*
282 * call kaio(AIOLIOWAIT) to get all outstanding
283 * kernel AIO requests
284 */
285 if ((nent - aio_ufs) > 0)
286 (void) _kaio(AIOLIOWAIT, mode, list, nent, sigevp);
287 if (head != NULL && head->lio_nent > 0) {
288 sig_mutex_lock(&head->lio_mutex);
289 while (head->lio_refcnt > 0) {
290 int err;
291 head->lio_waiting = 1;
292 pthread_cleanup_push(_lio_listio_cleanup, head);
293 err = sig_cond_wait(&head->lio_cond_cv,
294 &head->lio_mutex);
295 pthread_cleanup_pop(0);
296 head->lio_waiting = 0;
297 if (err && head->lio_nent > 0) {
298 sig_mutex_unlock(&head->lio_mutex);
299 errno = err;
300 return (-1);
301 }
302 }
303 sig_mutex_unlock(&head->lio_mutex);
304 ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0);
305 _aio_lio_free(head);
306 for (i = 0; i < nent; i++) {
307 if ((aiocbp = list[i]) != NULL &&
308 aiocbp->aio_resultp.aio_errno) {
309 errno = EIO;
310 return (-1);
311 }
312 }
313 }
314 return (0);
315 }
316 return (error);
317 }
318
319 static void
_lio_list_decr(aio_lio_t * head)320 _lio_list_decr(aio_lio_t *head)
321 {
322 sig_mutex_lock(&head->lio_mutex);
323 head->lio_nent--;
324 head->lio_refcnt--;
325 sig_mutex_unlock(&head->lio_mutex);
326 }
327
328 /*
329 * __aio_suspend() cancellation handler.
330 */
331 /* ARGSUSED */
332 static void
_aio_suspend_cleanup(int * counter)333 _aio_suspend_cleanup(int *counter)
334 {
335 ASSERT(MUTEX_HELD(&__aio_mutex));
336 (*counter)--; /* _aio_kernel_suspend or _aio_suscv_cnt */
337 sig_mutex_unlock(&__aio_mutex);
338 }
339
340 static int
__aio_suspend(void ** list,int nent,const timespec_t * timo,int largefile)341 __aio_suspend(void **list, int nent, const timespec_t *timo, int largefile)
342 {
343 int cv_err; /* error code from cond_xxx() */
344 int kerr; /* error code from _kaio(AIOSUSPEND) */
345 int i;
346 timespec_t twait; /* copy of timo for internal calculations */
347 timespec_t *wait = NULL;
348 int timedwait;
349 int req_outstanding;
350 aiocb_t **listp;
351 aiocb_t *aiocbp;
352 #if !defined(_LP64)
353 aiocb64_t **listp64;
354 aiocb64_t *aiocbp64;
355 #endif
356 hrtime_t hrtstart;
357 hrtime_t hrtend;
358 hrtime_t hrtres;
359
360 #if defined(_LP64)
361 if (largefile)
362 aio_panic("__aio_suspend: largefile set when _LP64 defined");
363 #endif
364
365 if (nent <= 0) {
366 errno = EINVAL;
367 return (-1);
368 }
369
370 if (timo) {
371 if (timo->tv_sec < 0 || timo->tv_nsec < 0 ||
372 timo->tv_nsec >= NANOSEC) {
373 errno = EINVAL;
374 return (-1);
375 }
376 /* Initialize start time if time monitoring desired */
377 if (timo->tv_sec > 0 || timo->tv_nsec > 0) {
378 timedwait = AIO_TIMEOUT_WAIT;
379 hrtstart = gethrtime();
380 } else {
381 /* content of timeout = 0 : polling */
382 timedwait = AIO_TIMEOUT_POLL;
383 }
384 } else {
385 /* timeout pointer = NULL : wait indefinitely */
386 timedwait = AIO_TIMEOUT_INDEF;
387 }
388
389 #if !defined(_LP64)
390 if (largefile) {
391 listp64 = (aiocb64_t **)list;
392 for (i = 0; i < nent; i++) {
393 if ((aiocbp64 = listp64[i]) != NULL &&
394 aiocbp64->aio_state == CHECK)
395 aiocbp64->aio_state = CHECKED;
396 }
397 } else
398 #endif /* !_LP64 */
399 {
400 listp = (aiocb_t **)list;
401 for (i = 0; i < nent; i++) {
402 if ((aiocbp = listp[i]) != NULL &&
403 aiocbp->aio_state == CHECK)
404 aiocbp->aio_state = CHECKED;
405 }
406 }
407
408 sig_mutex_lock(&__aio_mutex);
409
410 /*
411 * The next "if -case" is required to accelerate the
412 * access to completed RAW-IO requests.
413 */
414 if ((_aio_doneq_cnt + _aio_outstand_cnt) == 0) {
415 /* Only kernel requests pending */
416
417 /*
418 * _aio_kernel_suspend is used to detect completed non RAW-IO
419 * requests.
420 * As long as this thread resides in the kernel (_kaio) further
421 * asynchronous non RAW-IO requests could be submitted.
422 */
423 _aio_kernel_suspend++;
424
425 /*
426 * Always do the kaio() call without using the KAIO_SUPPORTED()
427 * checks because it is not mandatory to have a valid fd
428 * set in the list entries, only the resultp must be set.
429 *
430 * _kaio(AIOSUSPEND ...) return values :
431 * 0: everythink ok, completed request found
432 * -1: error
433 * 1: no error : _aiodone awaked the _kaio(AIOSUSPEND,,)
434 * system call using _kaio(AIONOTIFY). It means, that some
435 * non RAW-IOs completed inbetween.
436 */
437
438 pthread_cleanup_push(_aio_suspend_cleanup,
439 &_aio_kernel_suspend);
440 pthread_cleanup_push(sig_mutex_lock, &__aio_mutex);
441 sig_mutex_unlock(&__aio_mutex);
442 _cancel_prologue();
443 kerr = (int)_kaio(largefile? AIOSUSPEND64 : AIOSUSPEND,
444 list, nent, timo, -1);
445 _cancel_epilogue();
446 pthread_cleanup_pop(1); /* sig_mutex_lock(&__aio_mutex) */
447 pthread_cleanup_pop(0);
448
449 _aio_kernel_suspend--;
450
451 if (!kerr) {
452 sig_mutex_unlock(&__aio_mutex);
453 return (0);
454 }
455 } else {
456 kerr = 1; /* simulation: _kaio detected AIONOTIFY */
457 }
458
459 /*
460 * Return kernel error code if no other IOs are outstanding.
461 */
462 req_outstanding = _aio_doneq_cnt + _aio_outstand_cnt;
463
464 sig_mutex_unlock(&__aio_mutex);
465
466 if (req_outstanding == 0) {
467 /* no IOs outstanding in the thread pool */
468 if (kerr == 1)
469 /* return "no IOs completed" */
470 errno = EAGAIN;
471 return (-1);
472 }
473
474 /*
475 * IOs using the thread pool are outstanding.
476 */
477 if (timedwait == AIO_TIMEOUT_WAIT) {
478 /* time monitoring */
479 hrtend = hrtstart + (hrtime_t)timo->tv_sec * (hrtime_t)NANOSEC +
480 (hrtime_t)timo->tv_nsec;
481 hrtres = hrtend - gethrtime();
482 if (hrtres <= 0)
483 hrtres = 1;
484 twait.tv_sec = hrtres / (hrtime_t)NANOSEC;
485 twait.tv_nsec = hrtres % (hrtime_t)NANOSEC;
486 wait = &twait;
487 } else if (timedwait == AIO_TIMEOUT_POLL) {
488 twait = *timo; /* content of timo = 0 : polling */
489 wait = &twait;
490 }
491
492 for (;;) {
493 int error;
494 int inprogress;
495
496 /* first scan file system requests */
497 inprogress = 0;
498 for (i = 0; i < nent; i++) {
499 #if !defined(_LP64)
500 if (largefile) {
501 if ((aiocbp64 = listp64[i]) == NULL)
502 continue;
503 error = aiocbp64->aio_resultp.aio_errno;
504 } else
505 #endif
506 {
507 if ((aiocbp = listp[i]) == NULL)
508 continue;
509 error = aiocbp->aio_resultp.aio_errno;
510 }
511 if (error == EINPROGRESS)
512 inprogress = 1;
513 else if (error != ECANCELED) {
514 errno = 0;
515 return (0);
516 }
517 }
518
519 sig_mutex_lock(&__aio_mutex);
520
521 /*
522 * If there aren't outstanding I/Os in the thread pool then
523 * we have to return here, provided that all kernel RAW-IOs
524 * also completed.
525 * If the kernel was notified to return, then we have to check
526 * possible pending RAW-IOs.
527 */
528 if (_aio_outstand_cnt == 0 && inprogress == 0 && kerr != 1) {
529 sig_mutex_unlock(&__aio_mutex);
530 errno = EAGAIN;
531 break;
532 }
533
534 /*
535 * There are outstanding IOs in the thread pool or the kernel
536 * was notified to return.
537 * Check pending RAW-IOs first.
538 */
539 if (kerr == 1) {
540 /*
541 * _aiodone just notified the kernel about
542 * completed non RAW-IOs (AIONOTIFY was detected).
543 */
544 if (timedwait == AIO_TIMEOUT_WAIT) {
545 /* Update remaining timeout for the kernel */
546 hrtres = hrtend - gethrtime();
547 if (hrtres <= 0) {
548 /* timer expired */
549 sig_mutex_unlock(&__aio_mutex);
550 errno = EAGAIN;
551 break;
552 }
553 wait->tv_sec = hrtres / (hrtime_t)NANOSEC;
554 wait->tv_nsec = hrtres % (hrtime_t)NANOSEC;
555 }
556 _aio_kernel_suspend++;
557
558 pthread_cleanup_push(_aio_suspend_cleanup,
559 &_aio_kernel_suspend);
560 pthread_cleanup_push(sig_mutex_lock, &__aio_mutex);
561 sig_mutex_unlock(&__aio_mutex);
562 _cancel_prologue();
563 kerr = (int)_kaio(largefile? AIOSUSPEND64 : AIOSUSPEND,
564 list, nent, wait, -1);
565 _cancel_epilogue();
566 pthread_cleanup_pop(1);
567 pthread_cleanup_pop(0);
568
569 _aio_kernel_suspend--;
570
571 if (!kerr) {
572 sig_mutex_unlock(&__aio_mutex);
573 return (0);
574 }
575 }
576
577 if (timedwait == AIO_TIMEOUT_POLL) {
578 sig_mutex_unlock(&__aio_mutex);
579 errno = EAGAIN;
580 break;
581 }
582
583 if (timedwait == AIO_TIMEOUT_WAIT) {
584 /* Update remaining timeout */
585 hrtres = hrtend - gethrtime();
586 if (hrtres <= 0) {
587 /* timer expired */
588 sig_mutex_unlock(&__aio_mutex);
589 errno = EAGAIN;
590 break;
591 }
592 wait->tv_sec = hrtres / (hrtime_t)NANOSEC;
593 wait->tv_nsec = hrtres % (hrtime_t)NANOSEC;
594 }
595
596 if (_aio_outstand_cnt == 0) {
597 sig_mutex_unlock(&__aio_mutex);
598 continue;
599 }
600
601 _aio_suscv_cnt++; /* ID for _aiodone (wake up) */
602
603 pthread_cleanup_push(_aio_suspend_cleanup, &_aio_suscv_cnt);
604 if (timedwait == AIO_TIMEOUT_WAIT) {
605 cv_err = sig_cond_reltimedwait(&_aio_iowait_cv,
606 &__aio_mutex, wait);
607 if (cv_err == ETIME)
608 cv_err = EAGAIN;
609 } else {
610 /* wait indefinitely */
611 cv_err = sig_cond_wait(&_aio_iowait_cv, &__aio_mutex);
612 }
613 /* this decrements _aio_suscv_cnt and drops __aio_mutex */
614 pthread_cleanup_pop(1);
615
616 if (cv_err) {
617 errno = cv_err;
618 break;
619 }
620 }
621 return (-1);
622 }
623
624 int
aio_suspend(const aiocb_t * const list[],int nent,const timespec_t * timeout)625 aio_suspend(const aiocb_t * const list[], int nent,
626 const timespec_t *timeout)
627 {
628 return (__aio_suspend((void **)list, nent, timeout, 0));
629 }
630
631 int
aio_error(const aiocb_t * aiocbp)632 aio_error(const aiocb_t *aiocbp)
633 {
634 const aio_result_t *resultp = &aiocbp->aio_resultp;
635 aio_req_t *reqp;
636 int error;
637
638 if ((error = resultp->aio_errno) == EINPROGRESS) {
639 if (aiocbp->aio_state == CHECK) {
640 /*
641 * Always do the kaio() call without using the
642 * KAIO_SUPPORTED() checks because it is not
643 * mandatory to have a valid fd set in the
644 * aiocb, only the resultp must be set.
645 */
646 if ((int)_kaio(AIOERROR, aiocbp) == EINVAL) {
647 errno = EINVAL;
648 return (-1);
649 }
650 error = resultp->aio_errno;
651 } else if (aiocbp->aio_state == CHECKED) {
652 ((aiocb_t *)aiocbp)->aio_state = CHECK;
653 }
654 } else if (aiocbp->aio_state == USERAIO) {
655 sig_mutex_lock(&__aio_mutex);
656 if ((reqp = _aio_hash_del((aio_result_t *)resultp)) == NULL) {
657 sig_mutex_unlock(&__aio_mutex);
658 ((aiocb_t *)aiocbp)->aio_state = CHECKED;
659 } else {
660 ((aiocb_t *)aiocbp)->aio_state = NOCHECK;
661 ASSERT(reqp->req_head == NULL);
662 (void) _aio_req_remove(reqp);
663 sig_mutex_unlock(&__aio_mutex);
664 _aio_req_free(reqp);
665 }
666 }
667 return (error);
668 }
669
670 ssize_t
aio_return(aiocb_t * aiocbp)671 aio_return(aiocb_t *aiocbp)
672 {
673 aio_result_t *resultp = &aiocbp->aio_resultp;
674 aio_req_t *reqp;
675 int error;
676 ssize_t retval;
677
678 /*
679 * The _aiodone() function stores resultp->aio_return before
680 * storing resultp->aio_errno (with an membar_producer() in
681 * between). We use membar_consumer() below to ensure proper
682 * memory ordering between _aiodone() and ourself.
683 */
684 error = resultp->aio_errno;
685 membar_consumer();
686 retval = resultp->aio_return;
687
688 /*
689 * we use this condition to indicate either that
690 * aio_return() has been called before or should
691 * not have been called yet.
692 */
693 if ((retval == -1 && error == EINVAL) || error == EINPROGRESS) {
694 errno = error;
695 return (-1);
696 }
697
698 /*
699 * Before we return, mark the result as being returned so that later
700 * calls to aio_return() will return the fact that the result has
701 * already been returned.
702 */
703 sig_mutex_lock(&__aio_mutex);
704 /* retest, in case more than one thread actually got in here */
705 if (resultp->aio_return == -1 && resultp->aio_errno == EINVAL) {
706 sig_mutex_unlock(&__aio_mutex);
707 errno = EINVAL;
708 return (-1);
709 }
710 resultp->aio_return = -1;
711 resultp->aio_errno = EINVAL;
712 if ((reqp = _aio_hash_del(resultp)) == NULL)
713 sig_mutex_unlock(&__aio_mutex);
714 else {
715 aiocbp->aio_state = NOCHECK;
716 ASSERT(reqp->req_head == NULL);
717 (void) _aio_req_remove(reqp);
718 sig_mutex_unlock(&__aio_mutex);
719 _aio_req_free(reqp);
720 }
721
722 if (retval == -1)
723 errno = error;
724 return (retval);
725 }
726
727 void
_lio_remove(aio_req_t * reqp)728 _lio_remove(aio_req_t *reqp)
729 {
730 aio_lio_t *head;
731 int refcnt;
732
733 if ((head = reqp->req_head) != NULL) {
734 sig_mutex_lock(&head->lio_mutex);
735 ASSERT(head->lio_refcnt == head->lio_nent);
736 refcnt = --head->lio_nent;
737 head->lio_refcnt--;
738 sig_mutex_unlock(&head->lio_mutex);
739 if (refcnt == 0)
740 _aio_lio_free(head);
741 reqp->req_head = NULL;
742 }
743 }
744
745 /*
746 * This function returns the number of asynchronous I/O requests submitted.
747 */
748 static int
__aio_fsync_bar(aiocb_t * aiocbp,aio_lio_t * head,aio_worker_t * aiowp,int workerscnt)749 __aio_fsync_bar(aiocb_t *aiocbp, aio_lio_t *head, aio_worker_t *aiowp,
750 int workerscnt)
751 {
752 int i;
753 int error;
754 aio_worker_t *next = aiowp;
755
756 for (i = 0; i < workerscnt; i++) {
757 error = _aio_rw(aiocbp, head, &next, AIOFSYNC, AIO_NO_KAIO);
758 if (error != 0) {
759 sig_mutex_lock(&head->lio_mutex);
760 head->lio_mode = LIO_DESTROY; /* ignore fsync */
761 head->lio_nent -= workerscnt - i;
762 head->lio_refcnt -= workerscnt - i;
763 sig_mutex_unlock(&head->lio_mutex);
764 errno = EAGAIN;
765 return (i);
766 }
767 next = next->work_forw;
768 }
769 return (i);
770 }
771
772 int
aio_fsync(int op,aiocb_t * aiocbp)773 aio_fsync(int op, aiocb_t *aiocbp)
774 {
775 aio_lio_t *head;
776 struct stat statb;
777 int fret;
778
779 if (aiocbp == NULL)
780 return (0);
781 if (op != O_DSYNC && op != O_SYNC) {
782 errno = EINVAL;
783 return (-1);
784 }
785 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
786 errno = EBUSY;
787 return (-1);
788 }
789 if (fstat(aiocbp->aio_fildes, &statb) < 0)
790 return (-1);
791 if (_aio_sigev_thread(aiocbp) != 0)
792 return (-1);
793
794 /*
795 * Kernel aio_fsync() is not supported.
796 * We force user-level aio_fsync() just
797 * for the notification side-effect.
798 */
799 if (!__uaio_ok && __uaio_init() == -1)
800 return (-1);
801
802 /*
803 * The first asynchronous I/O request in the current process will
804 * create a bunch of workers (via __uaio_init()). If the number
805 * of workers is zero then the number of pending asynchronous I/O
806 * requests is zero. In such a case only execute the standard
807 * fsync(3C) or fdatasync(3RT) as appropriate.
808 */
809 if (__rw_workerscnt == 0) {
810 if (op == O_DSYNC)
811 return (__fdsync(aiocbp->aio_fildes, FDSYNC));
812 else
813 return (__fdsync(aiocbp->aio_fildes, FSYNC));
814 }
815
816 /*
817 * re-use aio_offset as the op field.
818 * O_DSYNC - fdatasync()
819 * O_SYNC - fsync()
820 */
821 aiocbp->aio_offset = op;
822 aiocbp->aio_lio_opcode = AIOFSYNC;
823
824 /*
825 * Create a list of fsync requests. The worker that
826 * gets the last request will do the fsync request.
827 */
828 head = _aio_lio_alloc();
829 if (head == NULL) {
830 errno = EAGAIN;
831 return (-1);
832 }
833 head->lio_mode = LIO_FSYNC;
834 head->lio_nent = head->lio_refcnt = __rw_workerscnt;
835 head->lio_largefile = 0;
836
837 /*
838 * Insert an fsync request on every worker's queue.
839 */
840 fret = __aio_fsync_bar(aiocbp, head, __workers_rw, __rw_workerscnt);
841 if (fret != __rw_workerscnt) {
842 /*
843 * Fewer fsync requests than workers means that it was
844 * not possible to submit fsync requests to all workers.
845 * Actions:
846 * a) number of fsync requests submitted is 0:
847 * => free allocated memory (aio_lio_t).
848 * b) number of fsync requests submitted is > 0:
849 * => the last worker executing the fsync request
850 * will free the aio_lio_t struct.
851 */
852 if (fret == 0)
853 _aio_lio_free(head);
854 return (-1);
855 }
856 return (0);
857 }
858
859 int
aio_cancel(int fd,aiocb_t * aiocbp)860 aio_cancel(int fd, aiocb_t *aiocbp)
861 {
862 aio_req_t *reqp;
863 aio_worker_t *aiowp;
864 int done = 0;
865 int canceled = 0;
866 struct stat buf;
867
868 if (fstat(fd, &buf) < 0)
869 return (-1);
870
871 if (aiocbp != NULL) {
872 if (fd != aiocbp->aio_fildes) {
873 errno = EINVAL;
874 return (-1);
875 }
876 if (aiocbp->aio_state == USERAIO) {
877 sig_mutex_lock(&__aio_mutex);
878 reqp = _aio_hash_find(&aiocbp->aio_resultp);
879 if (reqp == NULL) {
880 sig_mutex_unlock(&__aio_mutex);
881 return (AIO_ALLDONE);
882 }
883 aiowp = reqp->req_worker;
884 sig_mutex_lock(&aiowp->work_qlock1);
885 (void) _aio_cancel_req(aiowp, reqp, &canceled, &done);
886 sig_mutex_unlock(&aiowp->work_qlock1);
887 sig_mutex_unlock(&__aio_mutex);
888 if (done)
889 return (AIO_ALLDONE);
890 if (canceled)
891 return (AIO_CANCELED);
892 return (AIO_NOTCANCELED);
893 }
894 if (aiocbp->aio_state == USERAIO_DONE)
895 return (AIO_ALLDONE);
896 return ((int)_kaio(AIOCANCEL, fd, aiocbp));
897 }
898
899 return (aiocancel_all(fd));
900 }
901
902 /*
903 * __aio_waitn() cancellation handler.
904 */
905 static void
_aio_waitn_cleanup(void * arg __unused)906 _aio_waitn_cleanup(void *arg __unused)
907 {
908 ASSERT(MUTEX_HELD(&__aio_mutex));
909
910 /* check for pending aio_waitn() calls */
911 _aio_flags &= ~(AIO_LIB_WAITN | AIO_WAIT_INPROGRESS | AIO_IO_WAITING);
912 if (_aio_flags & AIO_LIB_WAITN_PENDING) {
913 _aio_flags &= ~AIO_LIB_WAITN_PENDING;
914 (void) cond_signal(&_aio_waitn_cv);
915 }
916
917 sig_mutex_unlock(&__aio_mutex);
918 }
919
920 /*
921 * aio_waitn can be used to reap the results of several I/O operations that
922 * were submitted asynchronously. The submission of I/Os can be done using
923 * existing POSIX interfaces: lio_listio, aio_write or aio_read.
924 * aio_waitn waits until "nwait" I/Os (supplied as a parameter) have
925 * completed and it returns the descriptors for these I/Os in "list". The
926 * maximum size of this list is given by "nent" and the actual number of I/Os
927 * completed is returned in "nwait". Otherwise aio_waitn might also
928 * return if the timeout expires. Additionally, aio_waitn returns 0 if
929 * successful or -1 if an error occurred.
930 */
931 static int
__aio_waitn(void ** list,uint_t nent,uint_t * nwait,const timespec_t * utimo)932 __aio_waitn(void **list, uint_t nent, uint_t *nwait, const timespec_t *utimo)
933 {
934 int error = 0;
935 uint_t dnwait = 0; /* amount of requests in the waitn-done list */
936 uint_t kwaitcnt; /* expected "done" requests from kernel */
937 uint_t knentcnt; /* max. expected "done" requests from kernel */
938 int uerrno = 0;
939 int kerrno = 0; /* save errno from _kaio() call */
940 int timedwait = AIO_TIMEOUT_UNDEF;
941 aio_req_t *reqp;
942 timespec_t end;
943 timespec_t twait; /* copy of utimo for internal calculations */
944 timespec_t *wait = NULL;
945
946 if (nent == 0 || *nwait == 0 || *nwait > nent) {
947 errno = EINVAL;
948 return (-1);
949 }
950
951 /*
952 * Only one running aio_waitn call per process allowed.
953 * Further calls will be blocked here until the running
954 * call finishes.
955 */
956
957 sig_mutex_lock(&__aio_mutex);
958
959 while (_aio_flags & AIO_LIB_WAITN) {
960 if (utimo && utimo->tv_sec == 0 && utimo->tv_nsec == 0) {
961 sig_mutex_unlock(&__aio_mutex);
962 *nwait = 0;
963 return (0);
964 }
965 _aio_flags |= AIO_LIB_WAITN_PENDING;
966 pthread_cleanup_push(sig_mutex_unlock, &__aio_mutex);
967 error = sig_cond_wait(&_aio_waitn_cv, &__aio_mutex);
968 pthread_cleanup_pop(0);
969 if (error != 0) {
970 sig_mutex_unlock(&__aio_mutex);
971 *nwait = 0;
972 errno = error;
973 return (-1);
974 }
975 }
976
977 pthread_cleanup_push(_aio_waitn_cleanup, NULL);
978
979 _aio_flags |= AIO_LIB_WAITN;
980
981 if (_aio_check_timeout(utimo, &end, &timedwait) != 0) {
982 error = -1;
983 dnwait = 0;
984 goto out;
985 }
986 if (timedwait != AIO_TIMEOUT_INDEF) {
987 twait = *utimo;
988 wait = &twait;
989 }
990
991 /*
992 * If both counters are still set to zero, then only
993 * kernel requests are currently outstanding (raw-I/Os).
994 */
995 if ((_aio_doneq_cnt + _aio_outstand_cnt) == 0) {
996 for (;;) {
997 kwaitcnt = *nwait - dnwait;
998 knentcnt = nent - dnwait;
999 if (knentcnt > AIO_WAITN_MAXIOCBS)
1000 knentcnt = AIO_WAITN_MAXIOCBS;
1001 kwaitcnt = (kwaitcnt > knentcnt) ? knentcnt : kwaitcnt;
1002
1003 pthread_cleanup_push(sig_mutex_lock, &__aio_mutex);
1004 sig_mutex_unlock(&__aio_mutex);
1005 _cancel_prologue();
1006 error = (int)_kaio(AIOWAITN, &list[dnwait], knentcnt,
1007 &kwaitcnt, wait);
1008 _cancel_epilogue();
1009 pthread_cleanup_pop(1);
1010
1011 if (error == 0) {
1012 dnwait += kwaitcnt;
1013 if (dnwait >= *nwait ||
1014 *nwait < AIO_WAITN_MAXIOCBS)
1015 break;
1016 if (timedwait == AIO_TIMEOUT_WAIT) {
1017 error = _aio_get_timedelta(&end, wait);
1018 if (error == -1) {
1019 /* timer expired */
1020 errno = ETIME;
1021 break;
1022 }
1023 }
1024 continue;
1025 }
1026 if (errno == EAGAIN) {
1027 if (dnwait > 0)
1028 error = 0;
1029 break;
1030 }
1031 if (errno == ETIME || errno == EINTR) {
1032 dnwait += kwaitcnt;
1033 break;
1034 }
1035 /* fatal error */
1036 break;
1037 }
1038
1039 goto out;
1040 }
1041
1042 /* File system I/Os outstanding ... */
1043
1044 if (timedwait == AIO_TIMEOUT_UNDEF) {
1045 if (_aio_check_timeout(utimo, &end, &timedwait) != 0) {
1046 error = -1;
1047 dnwait = 0;
1048 goto out;
1049 }
1050 if (timedwait != AIO_TIMEOUT_INDEF) {
1051 twait = *utimo;
1052 wait = &twait;
1053 }
1054 }
1055
1056 for (;;) {
1057 uint_t sum_reqs;
1058
1059 /*
1060 * Calculate sum of active non RAW-IO requests (sum_reqs).
1061 * If the expected amount of completed requests (*nwait) is
1062 * greater than the calculated sum (sum_reqs) then
1063 * use _kaio to check pending RAW-IO requests.
1064 */
1065 sum_reqs = _aio_doneq_cnt + dnwait + _aio_outstand_cnt;
1066 kwaitcnt = (*nwait > sum_reqs) ? *nwait - sum_reqs : 0;
1067
1068 if (kwaitcnt != 0) {
1069 /* possibly some kernel I/Os outstanding */
1070 knentcnt = nent - dnwait;
1071 if (knentcnt > AIO_WAITN_MAXIOCBS)
1072 knentcnt = AIO_WAITN_MAXIOCBS;
1073 kwaitcnt = (kwaitcnt > knentcnt) ? knentcnt : kwaitcnt;
1074
1075 _aio_flags |= AIO_WAIT_INPROGRESS;
1076
1077 pthread_cleanup_push(sig_mutex_lock, &__aio_mutex);
1078 sig_mutex_unlock(&__aio_mutex);
1079 _cancel_prologue();
1080 error = (int)_kaio(AIOWAITN, &list[dnwait], knentcnt,
1081 &kwaitcnt, wait);
1082 _cancel_epilogue();
1083 pthread_cleanup_pop(1);
1084
1085 _aio_flags &= ~AIO_WAIT_INPROGRESS;
1086
1087 if (error == 0) {
1088 dnwait += kwaitcnt;
1089 } else {
1090 switch (errno) {
1091 case EINVAL:
1092 case EAGAIN:
1093 /* don't wait for kernel I/Os */
1094 kerrno = 0; /* ignore _kaio() errno */
1095 *nwait = _aio_doneq_cnt +
1096 _aio_outstand_cnt + dnwait;
1097 error = 0;
1098 break;
1099 case EINTR:
1100 case ETIME:
1101 /* just scan for completed LIB I/Os */
1102 dnwait += kwaitcnt;
1103 timedwait = AIO_TIMEOUT_POLL;
1104 kerrno = errno; /* save _kaio() errno */
1105 error = 0;
1106 break;
1107 default:
1108 kerrno = errno; /* save _kaio() errno */
1109 break;
1110 }
1111 }
1112 if (error)
1113 break; /* fatal kernel error */
1114 }
1115
1116 /* check completed FS requests in the "done" queue */
1117
1118 while (_aio_doneq_cnt && dnwait < nent) {
1119 /* get done requests */
1120 if ((reqp = _aio_req_remove(NULL)) != NULL) {
1121 (void) _aio_hash_del(reqp->req_resultp);
1122 list[dnwait++] = reqp->req_aiocbp;
1123 _aio_req_mark_done(reqp);
1124 _lio_remove(reqp);
1125 _aio_req_free(reqp);
1126 }
1127 }
1128
1129 if (dnwait >= *nwait) {
1130 /* min. requested amount of completed I/Os satisfied */
1131 break;
1132 }
1133 if (timedwait == AIO_TIMEOUT_WAIT &&
1134 (error = _aio_get_timedelta(&end, wait)) == -1) {
1135 /* timer expired */
1136 uerrno = ETIME;
1137 break;
1138 }
1139
1140 /*
1141 * If some I/Os are outstanding and we have to wait for them,
1142 * then sleep here. _aiodone() will call _aio_waitn_wakeup()
1143 * to wakeup this thread as soon as the required amount of
1144 * completed I/Os is done.
1145 */
1146 if (_aio_outstand_cnt > 0 && timedwait != AIO_TIMEOUT_POLL) {
1147 /*
1148 * _aio_waitn_wakeup() will wake up this thread when:
1149 * - _aio_waitncnt requests are completed or
1150 * - _aio_outstand_cnt becomes zero.
1151 * sig_cond_reltimedwait() could also return with
1152 * a timeout error (ETIME).
1153 */
1154 if (*nwait < _aio_outstand_cnt)
1155 _aio_waitncnt = *nwait;
1156 else
1157 _aio_waitncnt = _aio_outstand_cnt;
1158
1159 _aio_flags |= AIO_IO_WAITING;
1160
1161 if (wait)
1162 uerrno = sig_cond_reltimedwait(&_aio_iowait_cv,
1163 &__aio_mutex, wait);
1164 else
1165 uerrno = sig_cond_wait(&_aio_iowait_cv,
1166 &__aio_mutex);
1167
1168 _aio_flags &= ~AIO_IO_WAITING;
1169
1170 if (uerrno == ETIME) {
1171 timedwait = AIO_TIMEOUT_POLL;
1172 continue;
1173 }
1174 if (uerrno != 0)
1175 timedwait = AIO_TIMEOUT_POLL;
1176 }
1177
1178 if (timedwait == AIO_TIMEOUT_POLL) {
1179 /* polling or timer expired */
1180 break;
1181 }
1182 }
1183
1184 errno = uerrno == 0 ? kerrno : uerrno;
1185 if (errno)
1186 error = -1;
1187 else
1188 error = 0;
1189
1190 out:
1191 *nwait = dnwait;
1192
1193 pthread_cleanup_pop(1); /* drops __aio_mutex */
1194
1195 return (error);
1196 }
1197
1198 int
aio_waitn(aiocb_t * list[],uint_t nent,uint_t * nwait,const timespec_t * timeout)1199 aio_waitn(aiocb_t *list[], uint_t nent, uint_t *nwait,
1200 const timespec_t *timeout)
1201 {
1202 return (__aio_waitn((void **)list, nent, nwait, timeout));
1203 }
1204
1205 void
_aio_waitn_wakeup(void)1206 _aio_waitn_wakeup(void)
1207 {
1208 /*
1209 * __aio_waitn() sets AIO_IO_WAITING to notify _aiodone() that
1210 * it is waiting for completed I/Os. The number of required
1211 * completed I/Os is stored into "_aio_waitncnt".
1212 * aio_waitn() is woken up when
1213 * - there are no further outstanding I/Os
1214 * (_aio_outstand_cnt == 0) or
1215 * - the expected number of I/Os has completed.
1216 * Only one __aio_waitn() function waits for completed I/Os at
1217 * a time.
1218 *
1219 * __aio_suspend() increments "_aio_suscv_cnt" to notify
1220 * _aiodone() that at least one __aio_suspend() call is
1221 * waiting for completed I/Os.
1222 * There could be more than one __aio_suspend() function
1223 * waiting for completed I/Os. Because every function should
1224 * be waiting for different I/Os, _aiodone() has to wake up all
1225 * __aio_suspend() functions each time.
1226 * Every __aio_suspend() function will compare the recently
1227 * completed I/O with its own list.
1228 */
1229 ASSERT(MUTEX_HELD(&__aio_mutex));
1230 if (_aio_flags & AIO_IO_WAITING) {
1231 if (_aio_waitncnt > 0)
1232 _aio_waitncnt--;
1233 if (_aio_outstand_cnt == 0 || _aio_waitncnt == 0 ||
1234 _aio_suscv_cnt > 0)
1235 (void) cond_broadcast(&_aio_iowait_cv);
1236 } else {
1237 /* Wake up waiting aio_suspend calls */
1238 if (_aio_suscv_cnt > 0)
1239 (void) cond_broadcast(&_aio_iowait_cv);
1240 }
1241 }
1242
1243 /*
1244 * timedwait values :
1245 * AIO_TIMEOUT_POLL : polling
1246 * AIO_TIMEOUT_WAIT : timeout
1247 * AIO_TIMEOUT_INDEF : wait indefinitely
1248 */
1249 static int
_aio_check_timeout(const timespec_t * utimo,timespec_t * end,int * timedwait)1250 _aio_check_timeout(const timespec_t *utimo, timespec_t *end, int *timedwait)
1251 {
1252 struct timeval curtime;
1253
1254 if (utimo) {
1255 if (utimo->tv_sec < 0 || utimo->tv_nsec < 0 ||
1256 utimo->tv_nsec >= NANOSEC) {
1257 errno = EINVAL;
1258 return (-1);
1259 }
1260 if (utimo->tv_sec > 0 || utimo->tv_nsec > 0) {
1261 (void) gettimeofday(&curtime, NULL);
1262 end->tv_sec = utimo->tv_sec + curtime.tv_sec;
1263 end->tv_nsec = utimo->tv_nsec + 1000 * curtime.tv_usec;
1264 if (end->tv_nsec >= NANOSEC) {
1265 end->tv_nsec -= NANOSEC;
1266 end->tv_sec += 1;
1267 }
1268 *timedwait = AIO_TIMEOUT_WAIT;
1269 } else {
1270 /* polling */
1271 *timedwait = AIO_TIMEOUT_POLL;
1272 }
1273 } else {
1274 *timedwait = AIO_TIMEOUT_INDEF; /* wait indefinitely */
1275 }
1276 return (0);
1277 }
1278
1279 #if !defined(_LP64)
1280
1281 int
aio_read64(aiocb64_t * aiocbp)1282 aio_read64(aiocb64_t *aiocbp)
1283 {
1284 if (aiocbp == NULL || aiocbp->aio_reqprio != 0) {
1285 errno = EINVAL;
1286 return (-1);
1287 }
1288 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
1289 errno = EBUSY;
1290 return (-1);
1291 }
1292 if (_aio_sigev_thread64(aiocbp) != 0)
1293 return (-1);
1294 aiocbp->aio_lio_opcode = LIO_READ;
1295 return (_aio_rw64(aiocbp, NULL, &__nextworker_rw, AIOAREAD64,
1296 (AIO_KAIO | AIO_NO_DUPS)));
1297 }
1298
1299 int
aio_write64(aiocb64_t * aiocbp)1300 aio_write64(aiocb64_t *aiocbp)
1301 {
1302 if (aiocbp == NULL || aiocbp->aio_reqprio != 0) {
1303 errno = EINVAL;
1304 return (-1);
1305 }
1306 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
1307 errno = EBUSY;
1308 return (-1);
1309 }
1310 if (_aio_sigev_thread64(aiocbp) != 0)
1311 return (-1);
1312 aiocbp->aio_lio_opcode = LIO_WRITE;
1313 return (_aio_rw64(aiocbp, NULL, &__nextworker_rw, AIOAWRITE64,
1314 (AIO_KAIO | AIO_NO_DUPS)));
1315 }
1316
1317 int
lio_listio64(int mode,aiocb64_t * _RESTRICT_KYWD const * _RESTRICT_KYWD list,int nent,struct sigevent * _RESTRICT_KYWD sigevp)1318 lio_listio64(int mode, aiocb64_t *_RESTRICT_KYWD const *_RESTRICT_KYWD list,
1319 int nent, struct sigevent *_RESTRICT_KYWD sigevp)
1320 {
1321 int aio_ufs = 0;
1322 int oerrno = 0;
1323 aio_lio_t *head = NULL;
1324 aiocb64_t *aiocbp;
1325 int state = 0;
1326 int EIOflg = 0;
1327 int rw;
1328 int do_kaio = 0;
1329 int error;
1330 int i;
1331
1332 if (!_kaio_ok)
1333 _kaio_init();
1334
1335 if (aio_list_max == 0)
1336 aio_list_max = sysconf(_SC_AIO_LISTIO_MAX);
1337
1338 if (nent <= 0 || nent > aio_list_max) {
1339 errno = EINVAL;
1340 return (-1);
1341 }
1342
1343 switch (mode) {
1344 case LIO_WAIT:
1345 state = NOCHECK;
1346 break;
1347 case LIO_NOWAIT:
1348 state = CHECK;
1349 break;
1350 default:
1351 errno = EINVAL;
1352 return (-1);
1353 }
1354
1355 for (i = 0; i < nent; i++) {
1356 if ((aiocbp = list[i]) == NULL)
1357 continue;
1358 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
1359 errno = EBUSY;
1360 return (-1);
1361 }
1362 if (_aio_sigev_thread64(aiocbp) != 0)
1363 return (-1);
1364 if (aiocbp->aio_lio_opcode == LIO_NOP)
1365 aiocbp->aio_state = NOCHECK;
1366 else {
1367 aiocbp->aio_state = state;
1368 if (KAIO_SUPPORTED(aiocbp->aio_fildes))
1369 do_kaio++;
1370 else
1371 aiocbp->aio_resultp.aio_errno = ENOTSUP;
1372 }
1373 }
1374 if (_aio_sigev_thread_init(sigevp) != 0)
1375 return (-1);
1376
1377 if (do_kaio) {
1378 error = (int)_kaio(AIOLIO64, mode, list, nent, sigevp);
1379 if (error == 0)
1380 return (0);
1381 oerrno = errno;
1382 } else {
1383 oerrno = errno = ENOTSUP;
1384 error = -1;
1385 }
1386
1387 if (error == -1 && errno == ENOTSUP) {
1388 error = errno = 0;
1389 /*
1390 * If LIO_WAIT, or notification required, allocate a list head.
1391 */
1392 if (mode == LIO_WAIT ||
1393 (sigevp != NULL &&
1394 (sigevp->sigev_notify == SIGEV_SIGNAL ||
1395 sigevp->sigev_notify == SIGEV_THREAD ||
1396 sigevp->sigev_notify == SIGEV_PORT)))
1397 head = _aio_lio_alloc();
1398 if (head) {
1399 sig_mutex_lock(&head->lio_mutex);
1400 head->lio_mode = mode;
1401 head->lio_largefile = 1;
1402 if (mode == LIO_NOWAIT && sigevp != NULL) {
1403 if (sigevp->sigev_notify == SIGEV_THREAD) {
1404 head->lio_port = sigevp->sigev_signo;
1405 head->lio_event = AIOLIO64;
1406 head->lio_sigevent = sigevp;
1407 head->lio_sigval.sival_ptr =
1408 sigevp->sigev_value.sival_ptr;
1409 } else if (sigevp->sigev_notify == SIGEV_PORT) {
1410 port_notify_t *pn =
1411 sigevp->sigev_value.sival_ptr;
1412 head->lio_port = pn->portnfy_port;
1413 head->lio_event = AIOLIO64;
1414 head->lio_sigevent = sigevp;
1415 head->lio_sigval.sival_ptr =
1416 pn->portnfy_user;
1417 } else { /* SIGEV_SIGNAL */
1418 head->lio_signo = sigevp->sigev_signo;
1419 head->lio_sigval.sival_ptr =
1420 sigevp->sigev_value.sival_ptr;
1421 }
1422 }
1423 head->lio_nent = head->lio_refcnt = nent;
1424 sig_mutex_unlock(&head->lio_mutex);
1425 }
1426 /*
1427 * find UFS requests, errno == ENOTSUP/EBADFD,
1428 */
1429 for (i = 0; i < nent; i++) {
1430 if ((aiocbp = list[i]) == NULL ||
1431 aiocbp->aio_lio_opcode == LIO_NOP ||
1432 (aiocbp->aio_resultp.aio_errno != ENOTSUP &&
1433 aiocbp->aio_resultp.aio_errno != EBADFD)) {
1434 if (head)
1435 _lio_list_decr(head);
1436 continue;
1437 }
1438 if (aiocbp->aio_resultp.aio_errno == EBADFD)
1439 SET_KAIO_NOT_SUPPORTED(aiocbp->aio_fildes);
1440 if (aiocbp->aio_reqprio != 0) {
1441 aiocbp->aio_resultp.aio_errno = EINVAL;
1442 aiocbp->aio_resultp.aio_return = -1;
1443 EIOflg = 1;
1444 if (head)
1445 _lio_list_decr(head);
1446 continue;
1447 }
1448 /*
1449 * submit an AIO request with flags AIO_NO_KAIO
1450 * to avoid the kaio() syscall in _aio_rw()
1451 */
1452 switch (aiocbp->aio_lio_opcode) {
1453 case LIO_READ:
1454 rw = AIOAREAD64;
1455 break;
1456 case LIO_WRITE:
1457 rw = AIOAWRITE64;
1458 break;
1459 }
1460 error = _aio_rw64(aiocbp, head, &__nextworker_rw, rw,
1461 (AIO_NO_KAIO | AIO_NO_DUPS));
1462 if (error == 0)
1463 aio_ufs++;
1464 else {
1465 if (head)
1466 _lio_list_decr(head);
1467 aiocbp->aio_resultp.aio_errno = error;
1468 EIOflg = 1;
1469 }
1470 }
1471 }
1472 if (EIOflg) {
1473 errno = EIO;
1474 return (-1);
1475 }
1476 if (mode == LIO_WAIT && oerrno == ENOTSUP) {
1477 /*
1478 * call kaio(AIOLIOWAIT) to get all outstanding
1479 * kernel AIO requests
1480 */
1481 if ((nent - aio_ufs) > 0)
1482 (void) _kaio(AIOLIOWAIT, mode, list, nent, sigevp);
1483 if (head != NULL && head->lio_nent > 0) {
1484 sig_mutex_lock(&head->lio_mutex);
1485 while (head->lio_refcnt > 0) {
1486 int err;
1487 head->lio_waiting = 1;
1488 pthread_cleanup_push(_lio_listio_cleanup, head);
1489 err = sig_cond_wait(&head->lio_cond_cv,
1490 &head->lio_mutex);
1491 pthread_cleanup_pop(0);
1492 head->lio_waiting = 0;
1493 if (err && head->lio_nent > 0) {
1494 sig_mutex_unlock(&head->lio_mutex);
1495 errno = err;
1496 return (-1);
1497 }
1498 }
1499 sig_mutex_unlock(&head->lio_mutex);
1500 ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0);
1501 _aio_lio_free(head);
1502 for (i = 0; i < nent; i++) {
1503 if ((aiocbp = list[i]) != NULL &&
1504 aiocbp->aio_resultp.aio_errno) {
1505 errno = EIO;
1506 return (-1);
1507 }
1508 }
1509 }
1510 return (0);
1511 }
1512 return (error);
1513 }
1514
1515 int
aio_suspend64(const aiocb64_t * const list[],int nent,const timespec_t * timeout)1516 aio_suspend64(const aiocb64_t * const list[], int nent,
1517 const timespec_t *timeout)
1518 {
1519 return (__aio_suspend((void **)list, nent, timeout, 1));
1520 }
1521
1522 int
aio_error64(const aiocb64_t * aiocbp)1523 aio_error64(const aiocb64_t *aiocbp)
1524 {
1525 const aio_result_t *resultp = &aiocbp->aio_resultp;
1526 int error;
1527
1528 if ((error = resultp->aio_errno) == EINPROGRESS) {
1529 if (aiocbp->aio_state == CHECK) {
1530 /*
1531 * Always do the kaio() call without using the
1532 * KAIO_SUPPORTED() checks because it is not
1533 * mandatory to have a valid fd set in the
1534 * aiocb, only the resultp must be set.
1535 */
1536 if ((int)_kaio(AIOERROR64, aiocbp) == EINVAL) {
1537 errno = EINVAL;
1538 return (-1);
1539 }
1540 error = resultp->aio_errno;
1541 } else if (aiocbp->aio_state == CHECKED) {
1542 ((aiocb64_t *)aiocbp)->aio_state = CHECK;
1543 }
1544 }
1545 return (error);
1546 }
1547
1548 ssize_t
aio_return64(aiocb64_t * aiocbp)1549 aio_return64(aiocb64_t *aiocbp)
1550 {
1551 aio_result_t *resultp = &aiocbp->aio_resultp;
1552 aio_req_t *reqp;
1553 int error;
1554 ssize_t retval;
1555
1556 /*
1557 * The _aiodone() function stores resultp->aio_return before
1558 * storing resultp->aio_errno (with an membar_producer() in
1559 * between). We use membar_consumer() below to ensure proper
1560 * memory ordering between _aiodone() and ourself.
1561 */
1562 error = resultp->aio_errno;
1563 membar_consumer();
1564 retval = resultp->aio_return;
1565
1566 /*
1567 * we use this condition to indicate either that
1568 * aio_return() has been called before or should
1569 * not have been called yet.
1570 */
1571 if ((retval == -1 && error == EINVAL) || error == EINPROGRESS) {
1572 errno = error;
1573 return (-1);
1574 }
1575
1576 /*
1577 * Before we return, mark the result as being returned so that later
1578 * calls to aio_return() will return the fact that the result has
1579 * already been returned.
1580 */
1581 sig_mutex_lock(&__aio_mutex);
1582 /* retest, in case more than one thread actually got in here */
1583 if (resultp->aio_return == -1 && resultp->aio_errno == EINVAL) {
1584 sig_mutex_unlock(&__aio_mutex);
1585 errno = EINVAL;
1586 return (-1);
1587 }
1588 resultp->aio_return = -1;
1589 resultp->aio_errno = EINVAL;
1590 if ((reqp = _aio_hash_del(resultp)) == NULL)
1591 sig_mutex_unlock(&__aio_mutex);
1592 else {
1593 aiocbp->aio_state = NOCHECK;
1594 ASSERT(reqp->req_head == NULL);
1595 (void) _aio_req_remove(reqp);
1596 sig_mutex_unlock(&__aio_mutex);
1597 _aio_req_free(reqp);
1598 }
1599
1600 if (retval == -1)
1601 errno = error;
1602 return (retval);
1603 }
1604
1605 static int
__aio_fsync_bar64(aiocb64_t * aiocbp,aio_lio_t * head,aio_worker_t * aiowp,int workerscnt)1606 __aio_fsync_bar64(aiocb64_t *aiocbp, aio_lio_t *head, aio_worker_t *aiowp,
1607 int workerscnt)
1608 {
1609 int i;
1610 int error;
1611 aio_worker_t *next = aiowp;
1612
1613 for (i = 0; i < workerscnt; i++) {
1614 error = _aio_rw64(aiocbp, head, &next, AIOFSYNC, AIO_NO_KAIO);
1615 if (error != 0) {
1616 sig_mutex_lock(&head->lio_mutex);
1617 head->lio_mode = LIO_DESTROY; /* ignore fsync */
1618 head->lio_nent -= workerscnt - i;
1619 head->lio_refcnt -= workerscnt - i;
1620 sig_mutex_unlock(&head->lio_mutex);
1621 errno = EAGAIN;
1622 return (i);
1623 }
1624 next = next->work_forw;
1625 }
1626 return (i);
1627 }
1628
1629 int
aio_fsync64(int op,aiocb64_t * aiocbp)1630 aio_fsync64(int op, aiocb64_t *aiocbp)
1631 {
1632 aio_lio_t *head;
1633 struct stat64 statb;
1634 int fret;
1635
1636 if (aiocbp == NULL)
1637 return (0);
1638 if (op != O_DSYNC && op != O_SYNC) {
1639 errno = EINVAL;
1640 return (-1);
1641 }
1642 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
1643 errno = EBUSY;
1644 return (-1);
1645 }
1646 if (fstat64(aiocbp->aio_fildes, &statb) < 0)
1647 return (-1);
1648 if (_aio_sigev_thread64(aiocbp) != 0)
1649 return (-1);
1650
1651 /*
1652 * Kernel aio_fsync() is not supported.
1653 * We force user-level aio_fsync() just
1654 * for the notification side-effect.
1655 */
1656 if (!__uaio_ok && __uaio_init() == -1)
1657 return (-1);
1658
1659 /*
1660 * The first asynchronous I/O request in the current process will
1661 * create a bunch of workers (via __uaio_init()). If the number
1662 * of workers is zero then the number of pending asynchronous I/O
1663 * requests is zero. In such a case only execute the standard
1664 * fsync(3C) or fdatasync(3RT) as appropriate.
1665 */
1666 if (__rw_workerscnt == 0) {
1667 if (op == O_DSYNC)
1668 return (__fdsync(aiocbp->aio_fildes, FDSYNC));
1669 else
1670 return (__fdsync(aiocbp->aio_fildes, FSYNC));
1671 }
1672
1673 /*
1674 * re-use aio_offset as the op field.
1675 * O_DSYNC - fdatasync()
1676 * O_SYNC - fsync()
1677 */
1678 aiocbp->aio_offset = op;
1679 aiocbp->aio_lio_opcode = AIOFSYNC;
1680
1681 /*
1682 * Create a list of fsync requests. The worker that
1683 * gets the last request will do the fsync request.
1684 */
1685 head = _aio_lio_alloc();
1686 if (head == NULL) {
1687 errno = EAGAIN;
1688 return (-1);
1689 }
1690 head->lio_mode = LIO_FSYNC;
1691 head->lio_nent = head->lio_refcnt = __rw_workerscnt;
1692 head->lio_largefile = 1;
1693
1694 /*
1695 * Insert an fsync request on every worker's queue.
1696 */
1697 fret = __aio_fsync_bar64(aiocbp, head, __workers_rw, __rw_workerscnt);
1698 if (fret != __rw_workerscnt) {
1699 /*
1700 * Fewer fsync requests than workers means that it was
1701 * not possible to submit fsync requests to all workers.
1702 * Actions:
1703 * a) number of fsync requests submitted is 0:
1704 * => free allocated memory (aio_lio_t).
1705 * b) number of fsync requests submitted is > 0:
1706 * => the last worker executing the fsync request
1707 * will free the aio_lio_t struct.
1708 */
1709 if (fret == 0)
1710 _aio_lio_free(head);
1711 return (-1);
1712 }
1713 return (0);
1714 }
1715
1716 int
aio_cancel64(int fd,aiocb64_t * aiocbp)1717 aio_cancel64(int fd, aiocb64_t *aiocbp)
1718 {
1719 aio_req_t *reqp;
1720 aio_worker_t *aiowp;
1721 int done = 0;
1722 int canceled = 0;
1723 struct stat64 buf;
1724
1725 if (fstat64(fd, &buf) < 0)
1726 return (-1);
1727
1728 if (aiocbp != NULL) {
1729 if (fd != aiocbp->aio_fildes) {
1730 errno = EINVAL;
1731 return (-1);
1732 }
1733 if (aiocbp->aio_state == USERAIO) {
1734 sig_mutex_lock(&__aio_mutex);
1735 reqp = _aio_hash_find(&aiocbp->aio_resultp);
1736 if (reqp == NULL) {
1737 sig_mutex_unlock(&__aio_mutex);
1738 return (AIO_ALLDONE);
1739 }
1740 aiowp = reqp->req_worker;
1741 sig_mutex_lock(&aiowp->work_qlock1);
1742 (void) _aio_cancel_req(aiowp, reqp, &canceled, &done);
1743 sig_mutex_unlock(&aiowp->work_qlock1);
1744 sig_mutex_unlock(&__aio_mutex);
1745 if (done)
1746 return (AIO_ALLDONE);
1747 if (canceled)
1748 return (AIO_CANCELED);
1749 return (AIO_NOTCANCELED);
1750 }
1751 if (aiocbp->aio_state == USERAIO_DONE)
1752 return (AIO_ALLDONE);
1753 return ((int)_kaio(AIOCANCEL, fd, aiocbp));
1754 }
1755
1756 return (aiocancel_all(fd));
1757 }
1758
1759 int
aio_waitn64(aiocb64_t * list[],uint_t nent,uint_t * nwait,const timespec_t * timeout)1760 aio_waitn64(aiocb64_t *list[], uint_t nent, uint_t *nwait,
1761 const timespec_t *timeout)
1762 {
1763 return (__aio_waitn((void **)list, nent, nwait, timeout));
1764 }
1765
1766 #endif /* !defined(_LP64) */
1767