xref: /illumos-gate/usr/src/uts/common/fs/ufs/lufs_top.c (revision 31d4cf52)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
580d34432Sfrankho  * Common Development and Distribution License (the "License").
680d34432Sfrankho  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
2280d34432Sfrankho  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
26*31d4cf52SMatthew Ahrens /*
27*31d4cf52SMatthew Ahrens  * Copyright (c) 2015 by Delphix. All rights reserved.
28*31d4cf52SMatthew Ahrens  */
297c478bd9Sstevel@tonic-gate 
307c478bd9Sstevel@tonic-gate #include <sys/systm.h>
317c478bd9Sstevel@tonic-gate #include <sys/types.h>
327c478bd9Sstevel@tonic-gate #include <sys/vnode.h>
337c478bd9Sstevel@tonic-gate #include <sys/errno.h>
347c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
357c478bd9Sstevel@tonic-gate #include <sys/debug.h>
367c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
377c478bd9Sstevel@tonic-gate #include <sys/conf.h>
387c478bd9Sstevel@tonic-gate #include <sys/proc.h>
397c478bd9Sstevel@tonic-gate #include <sys/taskq.h>
407c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
417c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_inode.h>
427c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_filio.h>
437c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_log.h>
447c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_bio.h>
457c478bd9Sstevel@tonic-gate 
467c478bd9Sstevel@tonic-gate /*
477c478bd9Sstevel@tonic-gate  * FILE SYSTEM INTERFACE TO TRANSACTION OPERATIONS (TOP; like VOP)
487c478bd9Sstevel@tonic-gate  */
497c478bd9Sstevel@tonic-gate 
507c478bd9Sstevel@tonic-gate uint_t topkey; /* tsd transaction key */
517c478bd9Sstevel@tonic-gate 
527c478bd9Sstevel@tonic-gate /*
537c478bd9Sstevel@tonic-gate  * declare a delta
547c478bd9Sstevel@tonic-gate  */
557c478bd9Sstevel@tonic-gate void
top_delta(ufsvfs_t * ufsvfsp,offset_t mof,off_t nb,delta_t dtyp,int (* func)(),ulong_t arg)567c478bd9Sstevel@tonic-gate top_delta(
577c478bd9Sstevel@tonic-gate 	ufsvfs_t *ufsvfsp,
587c478bd9Sstevel@tonic-gate 	offset_t mof,
597c478bd9Sstevel@tonic-gate 	off_t nb,
607c478bd9Sstevel@tonic-gate 	delta_t dtyp,
617c478bd9Sstevel@tonic-gate 	int (*func)(),
627c478bd9Sstevel@tonic-gate 	ulong_t arg)
637c478bd9Sstevel@tonic-gate {
647c478bd9Sstevel@tonic-gate 	ml_unit_t		*ul	= ufsvfsp->vfs_log;
657c478bd9Sstevel@tonic-gate 	threadtrans_t		*tp	= tsd_get(topkey);
667c478bd9Sstevel@tonic-gate 
677c478bd9Sstevel@tonic-gate 	ASSERT(ufsvfsp->vfs_dev == ul->un_dev);
687c478bd9Sstevel@tonic-gate 	ASSERT(nb);
697c478bd9Sstevel@tonic-gate 	ASSERT(((ul->un_debug & (MT_TRANSACT|MT_MATAMAP)) == 0) ||
7080d34432Sfrankho 	    top_delta_debug(ul, mof, nb, dtyp));
717c478bd9Sstevel@tonic-gate 
727c478bd9Sstevel@tonic-gate 	deltamap_add(ul->un_deltamap, mof, nb, dtyp, func, arg, tp);
737c478bd9Sstevel@tonic-gate 
747c478bd9Sstevel@tonic-gate 	ul->un_logmap->mtm_ref = 1; /* for roll thread's heuristic */
757c478bd9Sstevel@tonic-gate 	if (tp) {
767c478bd9Sstevel@tonic-gate 		tp->any_deltas = 1;
777c478bd9Sstevel@tonic-gate 	}
787c478bd9Sstevel@tonic-gate }
797c478bd9Sstevel@tonic-gate 
807c478bd9Sstevel@tonic-gate /*
817c478bd9Sstevel@tonic-gate  * cancel a delta
827c478bd9Sstevel@tonic-gate  */
837c478bd9Sstevel@tonic-gate void
top_cancel(ufsvfs_t * ufsvfsp,offset_t mof,off_t nb,int flags)847c478bd9Sstevel@tonic-gate top_cancel(ufsvfs_t *ufsvfsp, offset_t mof, off_t nb, int flags)
857c478bd9Sstevel@tonic-gate {
867c478bd9Sstevel@tonic-gate 	ml_unit_t	*ul	= ufsvfsp->vfs_log;
877c478bd9Sstevel@tonic-gate 	int		metadata = flags & (I_DIR|I_IBLK|I_SHAD|I_QUOTA);
887c478bd9Sstevel@tonic-gate 
897c478bd9Sstevel@tonic-gate 	ASSERT(ufsvfsp->vfs_dev == ul->un_dev);
907c478bd9Sstevel@tonic-gate 	ASSERT(nb);
917c478bd9Sstevel@tonic-gate 	ASSERT(((ul->un_debug & (MT_TRANSACT|MT_MATAMAP)) == 0) ||
9280d34432Sfrankho 	    (!(flags & metadata) ||
9380d34432Sfrankho 	    top_delta_debug(ul, mof, nb, DT_CANCEL)));
947c478bd9Sstevel@tonic-gate 
957c478bd9Sstevel@tonic-gate 	if (metadata)
967c478bd9Sstevel@tonic-gate 		deltamap_del(ul->un_deltamap, mof, nb);
977c478bd9Sstevel@tonic-gate 
987c478bd9Sstevel@tonic-gate 	logmap_cancel(ul, mof, nb, metadata);
997c478bd9Sstevel@tonic-gate 
1007c478bd9Sstevel@tonic-gate 	/*
1017c478bd9Sstevel@tonic-gate 	 * needed for the roll thread's heuristic
1027c478bd9Sstevel@tonic-gate 	 */
1037c478bd9Sstevel@tonic-gate 	ul->un_logmap->mtm_ref = 1;
1047c478bd9Sstevel@tonic-gate }
1057c478bd9Sstevel@tonic-gate 
1067c478bd9Sstevel@tonic-gate /*
1077c478bd9Sstevel@tonic-gate  * check if this delta has been canceled (metadata -> userdata)
1087c478bd9Sstevel@tonic-gate  */
1097c478bd9Sstevel@tonic-gate int
top_iscancel(ufsvfs_t * ufsvfsp,offset_t mof,off_t nb)1107c478bd9Sstevel@tonic-gate top_iscancel(ufsvfs_t *ufsvfsp, offset_t mof, off_t nb)
1117c478bd9Sstevel@tonic-gate {
1127c478bd9Sstevel@tonic-gate 	ml_unit_t	*ul	= ufsvfsp->vfs_log;
1137c478bd9Sstevel@tonic-gate 
1147c478bd9Sstevel@tonic-gate 	ASSERT(ufsvfsp->vfs_dev == ul->un_dev);
1157c478bd9Sstevel@tonic-gate 	ASSERT(nb);
1167c478bd9Sstevel@tonic-gate 	if (logmap_iscancel(ul->un_logmap, mof, nb))
1177c478bd9Sstevel@tonic-gate 		return (1);
1187c478bd9Sstevel@tonic-gate 	if (ul->un_flags & LDL_ERROR)
1197c478bd9Sstevel@tonic-gate 		return (1);
1207c478bd9Sstevel@tonic-gate 	return (0);
1217c478bd9Sstevel@tonic-gate }
1227c478bd9Sstevel@tonic-gate 
1237c478bd9Sstevel@tonic-gate /*
1247c478bd9Sstevel@tonic-gate  * put device into error state
1257c478bd9Sstevel@tonic-gate  */
1267c478bd9Sstevel@tonic-gate void
top_seterror(ufsvfs_t * ufsvfsp)1277c478bd9Sstevel@tonic-gate top_seterror(ufsvfs_t *ufsvfsp)
1287c478bd9Sstevel@tonic-gate {
1297c478bd9Sstevel@tonic-gate 	ml_unit_t	*ul	= ufsvfsp->vfs_log;
1307c478bd9Sstevel@tonic-gate 
1317c478bd9Sstevel@tonic-gate 	ASSERT(ufsvfsp->vfs_dev == ul->un_dev);
1327c478bd9Sstevel@tonic-gate 	ldl_seterror(ul, "ufs is forcing a ufs log error");
1337c478bd9Sstevel@tonic-gate }
1347c478bd9Sstevel@tonic-gate 
1357c478bd9Sstevel@tonic-gate /*
1367c478bd9Sstevel@tonic-gate  * issue a empty sync op to help empty the delta/log map or the log
1377c478bd9Sstevel@tonic-gate  */
1387c478bd9Sstevel@tonic-gate static void
top_issue_sync(ufsvfs_t * ufsvfsp)139*31d4cf52SMatthew Ahrens top_issue_sync(ufsvfs_t *ufsvfsp)
1407c478bd9Sstevel@tonic-gate {
141*31d4cf52SMatthew Ahrens 	int error = 0;
1427c478bd9Sstevel@tonic-gate 
1437c478bd9Sstevel@tonic-gate 	if ((curthread->t_flag & T_DONTBLOCK) == 0)
1447c478bd9Sstevel@tonic-gate 		curthread->t_flag |= T_DONTBLOCK;
1457c478bd9Sstevel@tonic-gate 	top_begin_sync(ufsvfsp, TOP_COMMIT_ASYNC, 0, &error);
1467c478bd9Sstevel@tonic-gate 	if (!error) {
1477c478bd9Sstevel@tonic-gate 		top_end_sync(ufsvfsp, &error, TOP_COMMIT_ASYNC, 0);
1487c478bd9Sstevel@tonic-gate 	}
149*31d4cf52SMatthew Ahrens }
150*31d4cf52SMatthew Ahrens 
151*31d4cf52SMatthew Ahrens static void
top_issue_from_taskq(void * arg)152*31d4cf52SMatthew Ahrens top_issue_from_taskq(void *arg)
153*31d4cf52SMatthew Ahrens {
154*31d4cf52SMatthew Ahrens 	ufsvfs_t *ufsvfsp = arg;
155*31d4cf52SMatthew Ahrens 	ml_unit_t *ul = ufsvfsp->vfs_log;
156*31d4cf52SMatthew Ahrens 	mt_map_t *mtm = ul->un_logmap;
157*31d4cf52SMatthew Ahrens 
158*31d4cf52SMatthew Ahrens 	top_issue_sync(ufsvfsp);
1597c478bd9Sstevel@tonic-gate 
1607c478bd9Sstevel@tonic-gate 	/*
161*31d4cf52SMatthew Ahrens 	 * We were called from the taskq_dispatch() in top_begin_async(), so
162*31d4cf52SMatthew Ahrens 	 * decrement mtm_taskq_sync_count and wake up the thread waiting
163*31d4cf52SMatthew Ahrens 	 * on the mtm_cv if the mtm_taskq_sync_count hits zero.
1647c478bd9Sstevel@tonic-gate 	 */
165*31d4cf52SMatthew Ahrens 	ASSERT(taskq_member(system_taskq, curthread));
1667c478bd9Sstevel@tonic-gate 
167*31d4cf52SMatthew Ahrens 	mutex_enter(&mtm->mtm_lock);
168*31d4cf52SMatthew Ahrens 	mtm->mtm_taskq_sync_count--;
169*31d4cf52SMatthew Ahrens 	if (mtm->mtm_taskq_sync_count == 0) {
170*31d4cf52SMatthew Ahrens 		cv_signal(&mtm->mtm_cv);
1717c478bd9Sstevel@tonic-gate 	}
172*31d4cf52SMatthew Ahrens 	mutex_exit(&mtm->mtm_lock);
1737c478bd9Sstevel@tonic-gate }
1747c478bd9Sstevel@tonic-gate 
1757c478bd9Sstevel@tonic-gate /*
1767c478bd9Sstevel@tonic-gate  * MOBY TRANSACTION ROUTINES
1777c478bd9Sstevel@tonic-gate  * begin a moby transaction
1787c478bd9Sstevel@tonic-gate  *	sync ops enter until first sync op finishes
1797c478bd9Sstevel@tonic-gate  *	async ops enter until last sync op finishes
1807c478bd9Sstevel@tonic-gate  * end a moby transaction
1817c478bd9Sstevel@tonic-gate  *		outstanding deltas are pushed thru log
1827c478bd9Sstevel@tonic-gate  *		log buffer is committed (incore only)
1837c478bd9Sstevel@tonic-gate  *		next trans is open to async ops
1847c478bd9Sstevel@tonic-gate  *		log buffer is committed on the log
1857c478bd9Sstevel@tonic-gate  *		next trans is open to sync ops
1867c478bd9Sstevel@tonic-gate  */
1877c478bd9Sstevel@tonic-gate 
1887c478bd9Sstevel@tonic-gate /*ARGSUSED*/
1897c478bd9Sstevel@tonic-gate void
top_begin_sync(ufsvfs_t * ufsvfsp,top_t topid,ulong_t size,int * error)1907c478bd9Sstevel@tonic-gate top_begin_sync(ufsvfs_t *ufsvfsp, top_t topid, ulong_t size, int *error)
1917c478bd9Sstevel@tonic-gate {
1927c478bd9Sstevel@tonic-gate 	ml_unit_t	*ul	= ufsvfsp->vfs_log;
1937c478bd9Sstevel@tonic-gate 	mt_map_t	*mtm = ul->un_logmap;
1947c478bd9Sstevel@tonic-gate 	threadtrans_t	*tp;
1957c478bd9Sstevel@tonic-gate 	ushort_t	seq;
1967c478bd9Sstevel@tonic-gate 
1977c478bd9Sstevel@tonic-gate 	ASSERT(ufsvfsp->vfs_dev == ul->un_dev);
1987c478bd9Sstevel@tonic-gate 	ASSERT(error != NULL);
1997c478bd9Sstevel@tonic-gate 	ASSERT(*error == 0);
2007c478bd9Sstevel@tonic-gate 
2017c478bd9Sstevel@tonic-gate 	mutex_enter(&mtm->mtm_lock);
2027c478bd9Sstevel@tonic-gate 	if (topid == TOP_FSYNC) {
2037c478bd9Sstevel@tonic-gate 		/*
2047c478bd9Sstevel@tonic-gate 		 * Error the fsync immediately if this is an nfs thread
2057c478bd9Sstevel@tonic-gate 		 * and its last transaction has already been committed.
2067c478bd9Sstevel@tonic-gate 		 * The only transactions outstanding are those
2077c478bd9Sstevel@tonic-gate 		 * where no commit has even started
2087c478bd9Sstevel@tonic-gate 		 * (last_async_tid == mtm->mtm_tid)
2097c478bd9Sstevel@tonic-gate 		 * or those where a commit is in progress
2107c478bd9Sstevel@tonic-gate 		 * (last_async_tid == mtm->mtm_committid)
2117c478bd9Sstevel@tonic-gate 		 */
2127c478bd9Sstevel@tonic-gate 		if (curthread->t_flag & T_DONTPEND) {
2137c478bd9Sstevel@tonic-gate 			tp = tsd_get(topkey);
2147c478bd9Sstevel@tonic-gate 			if (tp && (tp->last_async_tid != mtm->mtm_tid) &&
2157c478bd9Sstevel@tonic-gate 			    (tp->last_async_tid != mtm->mtm_committid)) {
2167c478bd9Sstevel@tonic-gate 				mutex_exit(&mtm->mtm_lock);
2177c478bd9Sstevel@tonic-gate 				*error = 1;
2187c478bd9Sstevel@tonic-gate 				return;
2197c478bd9Sstevel@tonic-gate 			}
2207c478bd9Sstevel@tonic-gate 		}
2217c478bd9Sstevel@tonic-gate 
2227c478bd9Sstevel@tonic-gate 		/*
2237c478bd9Sstevel@tonic-gate 		 * If there's already other synchronous transactions
2247c478bd9Sstevel@tonic-gate 		 * and we haven't allowed async ones to start yet
2257c478bd9Sstevel@tonic-gate 		 * then just wait for the commit to complete.
2267c478bd9Sstevel@tonic-gate 		 */
2277c478bd9Sstevel@tonic-gate 		if (((mtm->mtm_closed & (TOP_SYNC | TOP_ASYNC)) ==
2287c478bd9Sstevel@tonic-gate 		    (TOP_SYNC | TOP_ASYNC)) || mtm->mtm_activesync) {
2297c478bd9Sstevel@tonic-gate 			seq = mtm->mtm_seq;
2307c478bd9Sstevel@tonic-gate 			do {
2317c478bd9Sstevel@tonic-gate 				cv_wait(&mtm->mtm_cv_commit, &mtm->mtm_lock);
2327c478bd9Sstevel@tonic-gate 			} while (seq == mtm->mtm_seq);
2337c478bd9Sstevel@tonic-gate 			mutex_exit(&mtm->mtm_lock);
2347c478bd9Sstevel@tonic-gate 			*error = 1;
2357c478bd9Sstevel@tonic-gate 			return;
2367c478bd9Sstevel@tonic-gate 		}
2377c478bd9Sstevel@tonic-gate 		if (mtm->mtm_closed & TOP_SYNC) {
2387c478bd9Sstevel@tonic-gate 			/*
2397c478bd9Sstevel@tonic-gate 			 * We know we're in the window where a thread is
2407c478bd9Sstevel@tonic-gate 			 * committing a transaction in top_end_sync() and
2417c478bd9Sstevel@tonic-gate 			 * has allowed async threads to start but hasn't
2427c478bd9Sstevel@tonic-gate 			 * got the completion on the commit write to
2437c478bd9Sstevel@tonic-gate 			 * allow sync threads to start.
2447c478bd9Sstevel@tonic-gate 			 * So wait for that commit completion then retest
2457c478bd9Sstevel@tonic-gate 			 * for the quick nfs check and if that fails
2467c478bd9Sstevel@tonic-gate 			 * go on to start a transaction
2477c478bd9Sstevel@tonic-gate 			 */
2487c478bd9Sstevel@tonic-gate 			seq = mtm->mtm_seq;
2497c478bd9Sstevel@tonic-gate 			do {
2507c478bd9Sstevel@tonic-gate 				cv_wait(&mtm->mtm_cv_commit, &mtm->mtm_lock);
2517c478bd9Sstevel@tonic-gate 			} while (seq == mtm->mtm_seq);
2527c478bd9Sstevel@tonic-gate 
2537c478bd9Sstevel@tonic-gate 			/* tp is set above if T_DONTPEND */
2547c478bd9Sstevel@tonic-gate 			if ((curthread->t_flag & T_DONTPEND) && tp &&
2557c478bd9Sstevel@tonic-gate 			    (tp->last_async_tid != mtm->mtm_tid) &&
2567c478bd9Sstevel@tonic-gate 			    (tp->last_async_tid != mtm->mtm_committid)) {
2577c478bd9Sstevel@tonic-gate 				mutex_exit(&mtm->mtm_lock);
2587c478bd9Sstevel@tonic-gate 				*error = 1;
2597c478bd9Sstevel@tonic-gate 				return;
2607c478bd9Sstevel@tonic-gate 			}
2617c478bd9Sstevel@tonic-gate 		}
2627c478bd9Sstevel@tonic-gate 	}
2637c478bd9Sstevel@tonic-gate retry:
2647c478bd9Sstevel@tonic-gate 	mtm->mtm_ref = 1;
2657c478bd9Sstevel@tonic-gate 	/*
2667c478bd9Sstevel@tonic-gate 	 * current transaction closed to sync ops; try for next transaction
2677c478bd9Sstevel@tonic-gate 	 */
2687c478bd9Sstevel@tonic-gate 	if ((mtm->mtm_closed & TOP_SYNC) && !panicstr) {
2697c478bd9Sstevel@tonic-gate 		ulong_t		resv;
2707c478bd9Sstevel@tonic-gate 
2717c478bd9Sstevel@tonic-gate 		/*
2727c478bd9Sstevel@tonic-gate 		 * We know a commit is in progress, if we are trying to
2737c478bd9Sstevel@tonic-gate 		 * commit and we haven't allowed async ones to start yet,
2747c478bd9Sstevel@tonic-gate 		 * then just wait for the commit completion
2757c478bd9Sstevel@tonic-gate 		 */
2767c478bd9Sstevel@tonic-gate 		if ((size == TOP_COMMIT_SIZE) &&
2777c478bd9Sstevel@tonic-gate 		    (((mtm->mtm_closed & (TOP_SYNC | TOP_ASYNC)) ==
2787c478bd9Sstevel@tonic-gate 		    (TOP_SYNC | TOP_ASYNC)) || (mtm->mtm_activesync))) {
2797c478bd9Sstevel@tonic-gate 			seq = mtm->mtm_seq;
2807c478bd9Sstevel@tonic-gate 			do {
2817c478bd9Sstevel@tonic-gate 				cv_wait(&mtm->mtm_cv_commit, &mtm->mtm_lock);
2827c478bd9Sstevel@tonic-gate 			} while (seq == mtm->mtm_seq);
2837c478bd9Sstevel@tonic-gate 			mutex_exit(&mtm->mtm_lock);
2847c478bd9Sstevel@tonic-gate 			*error = 1;
2857c478bd9Sstevel@tonic-gate 			return;
2867c478bd9Sstevel@tonic-gate 		}
2877c478bd9Sstevel@tonic-gate 
2887c478bd9Sstevel@tonic-gate 		/*
2897c478bd9Sstevel@tonic-gate 		 * next transaction is full; try for next transaction
2907c478bd9Sstevel@tonic-gate 		 */
2917c478bd9Sstevel@tonic-gate 		resv = size + ul->un_resv_wantin + ul->un_resv;
2927c478bd9Sstevel@tonic-gate 		if (resv > ul->un_maxresv) {
2937c478bd9Sstevel@tonic-gate 			cv_wait(&mtm->mtm_cv_commit, &mtm->mtm_lock);
2947c478bd9Sstevel@tonic-gate 			goto retry;
2957c478bd9Sstevel@tonic-gate 		}
2967c478bd9Sstevel@tonic-gate 		/*
2977c478bd9Sstevel@tonic-gate 		 * we are in the next transaction; wait for it to start
2987c478bd9Sstevel@tonic-gate 		 */
2997c478bd9Sstevel@tonic-gate 		mtm->mtm_wantin++;
3007c478bd9Sstevel@tonic-gate 		ul->un_resv_wantin += size;
3017c478bd9Sstevel@tonic-gate 		/*
3027c478bd9Sstevel@tonic-gate 		 * The corresponding cv_broadcast wakes up
3037c478bd9Sstevel@tonic-gate 		 * all threads that have been validated to go into
3047c478bd9Sstevel@tonic-gate 		 * the next transaction. However, because spurious
3057c478bd9Sstevel@tonic-gate 		 * cv_wait wakeups are possible we use a sequence
3067c478bd9Sstevel@tonic-gate 		 * number to check that the commit and cv_broadcast
3077c478bd9Sstevel@tonic-gate 		 * has really occurred. We couldn't use mtm_tid
3087c478bd9Sstevel@tonic-gate 		 * because on error that doesn't get incremented.
3097c478bd9Sstevel@tonic-gate 		 */
3107c478bd9Sstevel@tonic-gate 		seq = mtm->mtm_seq;
3117c478bd9Sstevel@tonic-gate 		do {
3127c478bd9Sstevel@tonic-gate 			cv_wait(&mtm->mtm_cv_commit, &mtm->mtm_lock);
3137c478bd9Sstevel@tonic-gate 		} while (seq == mtm->mtm_seq);
3147c478bd9Sstevel@tonic-gate 	} else {
3157c478bd9Sstevel@tonic-gate 		/*
3167c478bd9Sstevel@tonic-gate 		 * if the current transaction is full; try the next one
3177c478bd9Sstevel@tonic-gate 		 */
3187c478bd9Sstevel@tonic-gate 		if (size && (ul->un_resv && ((size + ul->un_resv) >
3197c478bd9Sstevel@tonic-gate 		    ul->un_maxresv)) && !panicstr) {
3207c478bd9Sstevel@tonic-gate 			/*
3217c478bd9Sstevel@tonic-gate 			 * log is over reserved and no one will unresv the space
3227c478bd9Sstevel@tonic-gate 			 *	so generate empty sync op to unresv the space
3237c478bd9Sstevel@tonic-gate 			 */
3247c478bd9Sstevel@tonic-gate 			if (mtm->mtm_activesync == 0) {
3257c478bd9Sstevel@tonic-gate 				mutex_exit(&mtm->mtm_lock);
3267c478bd9Sstevel@tonic-gate 				top_issue_sync(ufsvfsp);
3277c478bd9Sstevel@tonic-gate 				mutex_enter(&mtm->mtm_lock);
3287c478bd9Sstevel@tonic-gate 				goto retry;
3297c478bd9Sstevel@tonic-gate 			}
3307c478bd9Sstevel@tonic-gate 			cv_wait(&mtm->mtm_cv_commit, &mtm->mtm_lock);
3317c478bd9Sstevel@tonic-gate 			goto retry;
3327c478bd9Sstevel@tonic-gate 		}
3337c478bd9Sstevel@tonic-gate 		/*
3347c478bd9Sstevel@tonic-gate 		 * we are in the current transaction
3357c478bd9Sstevel@tonic-gate 		 */
3367c478bd9Sstevel@tonic-gate 		mtm->mtm_active++;
3377c478bd9Sstevel@tonic-gate 		mtm->mtm_activesync++;
3387c478bd9Sstevel@tonic-gate 		ul->un_resv += size;
3397c478bd9Sstevel@tonic-gate 	}
3407c478bd9Sstevel@tonic-gate 
3417c478bd9Sstevel@tonic-gate 	ASSERT(mtm->mtm_active > 0);
3427c478bd9Sstevel@tonic-gate 	ASSERT(mtm->mtm_activesync > 0);
3437c478bd9Sstevel@tonic-gate 	mutex_exit(&mtm->mtm_lock);
3447c478bd9Sstevel@tonic-gate 
3457c478bd9Sstevel@tonic-gate 	ASSERT(((ul->un_debug & MT_TRANSACT) == 0) ||
34680d34432Sfrankho 	    top_begin_debug(ul, topid, size));
3477c478bd9Sstevel@tonic-gate }
3487c478bd9Sstevel@tonic-gate 
3497c478bd9Sstevel@tonic-gate int tryfail_cnt;
3507c478bd9Sstevel@tonic-gate 
3517c478bd9Sstevel@tonic-gate int
top_begin_async(ufsvfs_t * ufsvfsp,top_t topid,ulong_t size,int tryasync)3527c478bd9Sstevel@tonic-gate top_begin_async(ufsvfs_t *ufsvfsp, top_t topid, ulong_t size, int tryasync)
3537c478bd9Sstevel@tonic-gate {
3547c478bd9Sstevel@tonic-gate 	ml_unit_t	*ul	= ufsvfsp->vfs_log;
3557c478bd9Sstevel@tonic-gate 	mt_map_t	*mtm	= ul->un_logmap;
3567c478bd9Sstevel@tonic-gate 	threadtrans_t   *tp;
3577c478bd9Sstevel@tonic-gate 
3587c478bd9Sstevel@tonic-gate 	ASSERT(ufsvfsp->vfs_dev == ul->un_dev);
3597c478bd9Sstevel@tonic-gate 
3607c478bd9Sstevel@tonic-gate 	tp = tsd_get(topkey);
3617c478bd9Sstevel@tonic-gate 	if (tp == NULL) {
3627c478bd9Sstevel@tonic-gate 		tp = kmem_zalloc(sizeof (threadtrans_t), KM_SLEEP);
3637c478bd9Sstevel@tonic-gate 		(void) tsd_set(topkey, tp);
3647c478bd9Sstevel@tonic-gate 	}
3657c478bd9Sstevel@tonic-gate 	tp->deltas_size = 0;
3667c478bd9Sstevel@tonic-gate 	tp->any_deltas = 0;
3677c478bd9Sstevel@tonic-gate 
3687c478bd9Sstevel@tonic-gate 	mutex_enter(&mtm->mtm_lock);
3697c478bd9Sstevel@tonic-gate retry:
3707c478bd9Sstevel@tonic-gate 	mtm->mtm_ref = 1;
3717c478bd9Sstevel@tonic-gate 	/*
3727c478bd9Sstevel@tonic-gate 	 * current transaction closed to async ops; try for next transaction
3737c478bd9Sstevel@tonic-gate 	 */
3747c478bd9Sstevel@tonic-gate 	if ((mtm->mtm_closed & TOP_ASYNC) && !panicstr) {
3757c478bd9Sstevel@tonic-gate 		if (tryasync) {
3767c478bd9Sstevel@tonic-gate 			mutex_exit(&mtm->mtm_lock);
3777c478bd9Sstevel@tonic-gate 			tryfail_cnt++;
3787c478bd9Sstevel@tonic-gate 			return (EWOULDBLOCK);
3797c478bd9Sstevel@tonic-gate 		}
3807c478bd9Sstevel@tonic-gate 		cv_wait(&mtm->mtm_cv_next, &mtm->mtm_lock);
3817c478bd9Sstevel@tonic-gate 		goto retry;
3827c478bd9Sstevel@tonic-gate 	}
3837c478bd9Sstevel@tonic-gate 
3847c478bd9Sstevel@tonic-gate 	/*
3857c478bd9Sstevel@tonic-gate 	 * if the current transaction is full; try the next one
3867c478bd9Sstevel@tonic-gate 	 */
3877c478bd9Sstevel@tonic-gate 	if (((size + ul->un_resv + ul->un_resv_wantin) > ul->un_maxresv) &&
3887c478bd9Sstevel@tonic-gate 	    !panicstr) {
3897c478bd9Sstevel@tonic-gate 		/*
3907c478bd9Sstevel@tonic-gate 		 * log is overreserved and no one will unresv the space
3917c478bd9Sstevel@tonic-gate 		 *	so generate empty sync op to unresv the space
3927c478bd9Sstevel@tonic-gate 		 * We need TOP_SYNC_FORCED because we want to know when
3937c478bd9Sstevel@tonic-gate 		 * a top_end_sync is completed.
3947c478bd9Sstevel@tonic-gate 		 * mtm_taskq_sync_count is needed because we want to keep track
3957c478bd9Sstevel@tonic-gate 		 * of the pending top_issue_sync dispatches so that during
3967c478bd9Sstevel@tonic-gate 		 * forced umount we can wait for these to complete.
3977c478bd9Sstevel@tonic-gate 		 * mtm_taskq_sync_count is decremented in top_issue_sync and
3987c478bd9Sstevel@tonic-gate 		 * can remain set even after top_end_sync completes.
3997c478bd9Sstevel@tonic-gate 		 * We have a window between the clearing of TOP_SYNC_FORCED
4007c478bd9Sstevel@tonic-gate 		 * flag and the decrementing of mtm_taskq_sync_count.
4017c478bd9Sstevel@tonic-gate 		 * If in this window new async transactions start consuming
4027c478bd9Sstevel@tonic-gate 		 * log space, the log can get overreserved.
4037c478bd9Sstevel@tonic-gate 		 * Subsequently a new async transaction would fail to generate
4047c478bd9Sstevel@tonic-gate 		 * an empty sync transaction via the taskq, since it finds
4057c478bd9Sstevel@tonic-gate 		 * the mtm_taskq_sync_count set. This can cause a hang.
4067c478bd9Sstevel@tonic-gate 		 * Hence we do not test for mtm_taskq_sync_count being zero.
4077c478bd9Sstevel@tonic-gate 		 * Instead, the TOP_SYNC_FORCED flag is tested here.
4087c478bd9Sstevel@tonic-gate 		 */
4097c478bd9Sstevel@tonic-gate 		if ((mtm->mtm_activesync == 0) &&
4107c478bd9Sstevel@tonic-gate 		    (!(mtm->mtm_closed & TOP_SYNC_FORCED))) {
4117c478bd9Sstevel@tonic-gate 			/*
4127c478bd9Sstevel@tonic-gate 			 * Set flag to stop multiple forced empty
4137c478bd9Sstevel@tonic-gate 			 * sync transactions. Increment mtm_taskq_sync_count.
4147c478bd9Sstevel@tonic-gate 			 */
4157c478bd9Sstevel@tonic-gate 			mtm->mtm_closed |= TOP_SYNC_FORCED;
4167c478bd9Sstevel@tonic-gate 			mtm->mtm_taskq_sync_count++;
4177c478bd9Sstevel@tonic-gate 			mutex_exit(&mtm->mtm_lock);
4187c478bd9Sstevel@tonic-gate 			(void) taskq_dispatch(system_taskq,
419*31d4cf52SMatthew Ahrens 			    top_issue_from_taskq, ufsvfsp, TQ_SLEEP);
4207c478bd9Sstevel@tonic-gate 			if (tryasync) {
4217c478bd9Sstevel@tonic-gate 				tryfail_cnt++;
4227c478bd9Sstevel@tonic-gate 				return (EWOULDBLOCK);
4237c478bd9Sstevel@tonic-gate 			}
4247c478bd9Sstevel@tonic-gate 			mutex_enter(&mtm->mtm_lock);
4257c478bd9Sstevel@tonic-gate 			goto retry;
4267c478bd9Sstevel@tonic-gate 		}
4277c478bd9Sstevel@tonic-gate 		if (tryasync) {
4287c478bd9Sstevel@tonic-gate 			mutex_exit(&mtm->mtm_lock);
4297c478bd9Sstevel@tonic-gate 			tryfail_cnt++;
4307c478bd9Sstevel@tonic-gate 			return (EWOULDBLOCK);
4317c478bd9Sstevel@tonic-gate 		}
4327c478bd9Sstevel@tonic-gate 		cv_wait(&mtm->mtm_cv_next, &mtm->mtm_lock);
4337c478bd9Sstevel@tonic-gate 		goto retry;
4347c478bd9Sstevel@tonic-gate 	}
4357c478bd9Sstevel@tonic-gate 	/*
4367c478bd9Sstevel@tonic-gate 	 * we are in the current transaction
4377c478bd9Sstevel@tonic-gate 	 */
4387c478bd9Sstevel@tonic-gate 	mtm->mtm_active++;
4397c478bd9Sstevel@tonic-gate 	ul->un_resv += size;
4407c478bd9Sstevel@tonic-gate 
4417c478bd9Sstevel@tonic-gate 	ASSERT(mtm->mtm_active > 0);
4427c478bd9Sstevel@tonic-gate 	mutex_exit(&mtm->mtm_lock);
4437c478bd9Sstevel@tonic-gate 
4447c478bd9Sstevel@tonic-gate 	ASSERT(((ul->un_debug & MT_TRANSACT) == 0) ||
44580d34432Sfrankho 	    top_begin_debug(ul, topid, size));
4467c478bd9Sstevel@tonic-gate 	return (0);
4477c478bd9Sstevel@tonic-gate }
4487c478bd9Sstevel@tonic-gate 
4497c478bd9Sstevel@tonic-gate /*ARGSUSED*/
4507c478bd9Sstevel@tonic-gate void
top_end_sync(ufsvfs_t * ufsvfsp,int * ep,top_t topid,ulong_t size)4517c478bd9Sstevel@tonic-gate top_end_sync(ufsvfs_t *ufsvfsp, int *ep, top_t topid, ulong_t size)
4527c478bd9Sstevel@tonic-gate {
4537c478bd9Sstevel@tonic-gate 	ml_unit_t	*ul	= ufsvfsp->vfs_log;
4547c478bd9Sstevel@tonic-gate 	mt_map_t	*mtm	= ul->un_logmap;
4557c478bd9Sstevel@tonic-gate 	mapentry_t	*cancellist;
4567c478bd9Sstevel@tonic-gate 	uint32_t	tid;
4577c478bd9Sstevel@tonic-gate 
4587c478bd9Sstevel@tonic-gate 	ASSERT(ufsvfsp->vfs_dev == ul->un_dev);
4597c478bd9Sstevel@tonic-gate 	ASSERT(((ul->un_debug & MT_TRANSACT) == 0) ||
46080d34432Sfrankho 	    top_end_debug(ul, mtm, topid, size));
4617c478bd9Sstevel@tonic-gate 
4627c478bd9Sstevel@tonic-gate 	mutex_enter(&mtm->mtm_lock);
4637c478bd9Sstevel@tonic-gate 	tid = mtm->mtm_tid;
4647c478bd9Sstevel@tonic-gate 
4657c478bd9Sstevel@tonic-gate 	mtm->mtm_activesync--;
4667c478bd9Sstevel@tonic-gate 	mtm->mtm_active--;
4677c478bd9Sstevel@tonic-gate 
4687c478bd9Sstevel@tonic-gate 	mtm->mtm_ref = 1;
4697c478bd9Sstevel@tonic-gate 
4707c478bd9Sstevel@tonic-gate 	/*
4717c478bd9Sstevel@tonic-gate 	 * wait for last syncop to complete
4727c478bd9Sstevel@tonic-gate 	 */
4737c478bd9Sstevel@tonic-gate 	if (mtm->mtm_activesync || panicstr) {
4747c478bd9Sstevel@tonic-gate 		ushort_t seq = mtm->mtm_seq;
4757c478bd9Sstevel@tonic-gate 
4767c478bd9Sstevel@tonic-gate 		mtm->mtm_closed = TOP_SYNC;
4777c478bd9Sstevel@tonic-gate 
4787c478bd9Sstevel@tonic-gate 		do {
4797c478bd9Sstevel@tonic-gate 			cv_wait(&mtm->mtm_cv_commit, &mtm->mtm_lock);
4807c478bd9Sstevel@tonic-gate 		} while (seq == mtm->mtm_seq);
4817c478bd9Sstevel@tonic-gate 		mutex_exit(&mtm->mtm_lock);
4827c478bd9Sstevel@tonic-gate 		goto out;
4837c478bd9Sstevel@tonic-gate 	}
4847c478bd9Sstevel@tonic-gate 	/*
4857c478bd9Sstevel@tonic-gate 	 * last syncop; close current transaction to all ops
4867c478bd9Sstevel@tonic-gate 	 */
4877c478bd9Sstevel@tonic-gate 	mtm->mtm_closed = TOP_SYNC|TOP_ASYNC;
4887c478bd9Sstevel@tonic-gate 
4897c478bd9Sstevel@tonic-gate 	/*
4907c478bd9Sstevel@tonic-gate 	 * wait for last asyncop to finish
4917c478bd9Sstevel@tonic-gate 	 */
4927c478bd9Sstevel@tonic-gate 	while (mtm->mtm_active) {
4937c478bd9Sstevel@tonic-gate 		cv_wait(&mtm->mtm_cv_eot, &mtm->mtm_lock);
4947c478bd9Sstevel@tonic-gate 	}
4957c478bd9Sstevel@tonic-gate 
4967c478bd9Sstevel@tonic-gate 	/*
4977c478bd9Sstevel@tonic-gate 	 * push dirty metadata thru the log
4987c478bd9Sstevel@tonic-gate 	 */
4997c478bd9Sstevel@tonic-gate 	deltamap_push(ul);
5007c478bd9Sstevel@tonic-gate 
5017c478bd9Sstevel@tonic-gate 	ASSERT(((ul->un_debug & MT_FORCEROLL) == 0) ||
50280d34432Sfrankho 	    top_roll_debug(ul));
5037c478bd9Sstevel@tonic-gate 
5047c478bd9Sstevel@tonic-gate 	mtm->mtm_tid = tid + 1;	/* can overflow to 0 */
5057c478bd9Sstevel@tonic-gate 
5067c478bd9Sstevel@tonic-gate 	/*
5077c478bd9Sstevel@tonic-gate 	 * Empty the cancellist, but save it for logmap_free_cancel
5087c478bd9Sstevel@tonic-gate 	 */
5097c478bd9Sstevel@tonic-gate 	mutex_enter(&mtm->mtm_mutex);
5107c478bd9Sstevel@tonic-gate 	cancellist = mtm->mtm_cancel;
5117c478bd9Sstevel@tonic-gate 	mtm->mtm_cancel = NULL;
5127c478bd9Sstevel@tonic-gate 	mutex_exit(&mtm->mtm_mutex);
5137c478bd9Sstevel@tonic-gate 
5147c478bd9Sstevel@tonic-gate 	/*
5157c478bd9Sstevel@tonic-gate 	 * allow async ops
5167c478bd9Sstevel@tonic-gate 	 */
5177c478bd9Sstevel@tonic-gate 	ASSERT(mtm->mtm_active == 0);
5187c478bd9Sstevel@tonic-gate 	ul->un_resv = 0; /* unreserve the log space */
5197c478bd9Sstevel@tonic-gate 	mtm->mtm_closed = TOP_SYNC;
5207c478bd9Sstevel@tonic-gate 	/*
5217c478bd9Sstevel@tonic-gate 	 * Hold the un_log_mutex here until we are done writing
5227c478bd9Sstevel@tonic-gate 	 * the commit record to prevent any more deltas to be written
5237c478bd9Sstevel@tonic-gate 	 * to the log after we allow async operations.
5247c478bd9Sstevel@tonic-gate 	 */
5257c478bd9Sstevel@tonic-gate 	mutex_enter(&ul->un_log_mutex);
5267c478bd9Sstevel@tonic-gate 	mutex_exit(&mtm->mtm_lock);
5277c478bd9Sstevel@tonic-gate 	cv_broadcast(&mtm->mtm_cv_next);
5287c478bd9Sstevel@tonic-gate 
5297c478bd9Sstevel@tonic-gate 	/*
5307c478bd9Sstevel@tonic-gate 	 * asynchronously write the commit record,
5317c478bd9Sstevel@tonic-gate 	 */
5327c478bd9Sstevel@tonic-gate 	logmap_commit(ul, tid);
5337c478bd9Sstevel@tonic-gate 
5347c478bd9Sstevel@tonic-gate 	/*
5357c478bd9Sstevel@tonic-gate 	 * wait for outstanding log writes (e.g., commits) to finish
5367c478bd9Sstevel@tonic-gate 	 */
5377c478bd9Sstevel@tonic-gate 	ldl_waito(ul);
5387c478bd9Sstevel@tonic-gate 
5397c478bd9Sstevel@tonic-gate 	/*
5407c478bd9Sstevel@tonic-gate 	 * Now that we are sure the commit has been written to the log
5417c478bd9Sstevel@tonic-gate 	 * we can free any canceled deltas.  If we free them before
5427c478bd9Sstevel@tonic-gate 	 * guaranteeing that the commit was written, we could panic before
5437c478bd9Sstevel@tonic-gate 	 * the commit, but after an async thread has allocated and written
5447c478bd9Sstevel@tonic-gate 	 * to canceled freed block.
5457c478bd9Sstevel@tonic-gate 	 */
5467c478bd9Sstevel@tonic-gate 
5477c478bd9Sstevel@tonic-gate 	logmap_free_cancel(mtm, &cancellist);
5487c478bd9Sstevel@tonic-gate 	mutex_exit(&ul->un_log_mutex);
5497c478bd9Sstevel@tonic-gate 
5507c478bd9Sstevel@tonic-gate 	/*
5517c478bd9Sstevel@tonic-gate 	 * now, allow all ops
5527c478bd9Sstevel@tonic-gate 	 */
5537c478bd9Sstevel@tonic-gate 	mutex_enter(&mtm->mtm_lock);
5547c478bd9Sstevel@tonic-gate 	mtm->mtm_active += mtm->mtm_wantin;
5557c478bd9Sstevel@tonic-gate 	ul->un_resv += ul->un_resv_wantin;
5567c478bd9Sstevel@tonic-gate 	mtm->mtm_activesync = mtm->mtm_wantin;
5577c478bd9Sstevel@tonic-gate 	mtm->mtm_wantin = 0;
5587c478bd9Sstevel@tonic-gate 	mtm->mtm_closed = 0;
5597c478bd9Sstevel@tonic-gate 	ul->un_resv_wantin = 0;
5607c478bd9Sstevel@tonic-gate 	mtm->mtm_committid = mtm->mtm_tid;
5617c478bd9Sstevel@tonic-gate 	mtm->mtm_seq++;
5627c478bd9Sstevel@tonic-gate 	mutex_exit(&mtm->mtm_lock);
5637c478bd9Sstevel@tonic-gate 
5647c478bd9Sstevel@tonic-gate 	/*
5657c478bd9Sstevel@tonic-gate 	 * Finish any other synchronous transactions and
5667c478bd9Sstevel@tonic-gate 	 * start any waiting new synchronous transactions
5677c478bd9Sstevel@tonic-gate 	 */
5687c478bd9Sstevel@tonic-gate 	cv_broadcast(&mtm->mtm_cv_commit);
5697c478bd9Sstevel@tonic-gate 
5707c478bd9Sstevel@tonic-gate 	/*
5717c478bd9Sstevel@tonic-gate 	 * if the logmap is getting full; roll something
5727c478bd9Sstevel@tonic-gate 	 */
5737c478bd9Sstevel@tonic-gate 	if (logmap_need_roll_sync(mtm)) {
5747c478bd9Sstevel@tonic-gate 		logmap_forceroll_nowait(mtm);
5757c478bd9Sstevel@tonic-gate 	}
5767c478bd9Sstevel@tonic-gate 
5777c478bd9Sstevel@tonic-gate out:
5787c478bd9Sstevel@tonic-gate 	if (ul->un_flags & LDL_ERROR)
5797c478bd9Sstevel@tonic-gate 		*ep = EIO;
5807c478bd9Sstevel@tonic-gate }
5817c478bd9Sstevel@tonic-gate 
5827c478bd9Sstevel@tonic-gate /*ARGSUSED*/
5837c478bd9Sstevel@tonic-gate void
top_end_async(ufsvfs_t * ufsvfsp,top_t topid,ulong_t size)5847c478bd9Sstevel@tonic-gate top_end_async(ufsvfs_t *ufsvfsp, top_t topid, ulong_t size)
5857c478bd9Sstevel@tonic-gate {
5867c478bd9Sstevel@tonic-gate 	ml_unit_t	*ul	= ufsvfsp->vfs_log;
5877c478bd9Sstevel@tonic-gate 	mt_map_t	*mtm	= ul->un_logmap;
5887c478bd9Sstevel@tonic-gate 	threadtrans_t	*tp	= tsd_get(topkey);
5897c478bd9Sstevel@tonic-gate 	int		wakeup_needed = 0;
5907c478bd9Sstevel@tonic-gate 
5917c478bd9Sstevel@tonic-gate 	ASSERT(tp);
5927c478bd9Sstevel@tonic-gate 	ASSERT(ufsvfsp->vfs_dev == ul->un_dev);
5937c478bd9Sstevel@tonic-gate 	ASSERT(((ul->un_debug & MT_TRANSACT) == 0) ||
59480d34432Sfrankho 	    top_end_debug(ul, mtm, topid, size));
5957c478bd9Sstevel@tonic-gate 
5967c478bd9Sstevel@tonic-gate 	mutex_enter(&mtm->mtm_lock);
5977c478bd9Sstevel@tonic-gate 
5987c478bd9Sstevel@tonic-gate 	if (size > tp->deltas_size) {
5997c478bd9Sstevel@tonic-gate 		ul->un_resv -= (size - tp->deltas_size);
6007c478bd9Sstevel@tonic-gate 	}
6017c478bd9Sstevel@tonic-gate 	if (tp->any_deltas) {
6027c478bd9Sstevel@tonic-gate 		tp->last_async_tid = mtm->mtm_tid;
6037c478bd9Sstevel@tonic-gate 	}
6047c478bd9Sstevel@tonic-gate 	mtm->mtm_ref = 1;
6057c478bd9Sstevel@tonic-gate 
6067c478bd9Sstevel@tonic-gate 	mtm->mtm_active--;
6077c478bd9Sstevel@tonic-gate 	if ((mtm->mtm_active == 0) &&
6087c478bd9Sstevel@tonic-gate 	    (mtm->mtm_closed == (TOP_SYNC|TOP_ASYNC))) {
6097c478bd9Sstevel@tonic-gate 		wakeup_needed = 1;
6107c478bd9Sstevel@tonic-gate 	}
6117c478bd9Sstevel@tonic-gate 	mutex_exit(&mtm->mtm_lock);
6127c478bd9Sstevel@tonic-gate 	if (wakeup_needed)
6137c478bd9Sstevel@tonic-gate 		cv_signal(&mtm->mtm_cv_eot);
6147c478bd9Sstevel@tonic-gate 
6157c478bd9Sstevel@tonic-gate 	/*
6167c478bd9Sstevel@tonic-gate 	 * Generate a sync op if the log, logmap, or deltamap are heavily used.
6177c478bd9Sstevel@tonic-gate 	 * Unless we are possibly holding any VM locks, since if we are holding
6187c478bd9Sstevel@tonic-gate 	 * any VM locks and we issue a top_end_sync(), we could deadlock.
6197c478bd9Sstevel@tonic-gate 	 */
6207c478bd9Sstevel@tonic-gate 	if ((mtm->mtm_activesync == 0) &&
6217c478bd9Sstevel@tonic-gate 	    !(mtm->mtm_closed & TOP_SYNC) &&
6227c478bd9Sstevel@tonic-gate 	    (deltamap_need_commit(ul->un_deltamap) ||
6237c478bd9Sstevel@tonic-gate 	    logmap_need_commit(mtm) ||
6247c478bd9Sstevel@tonic-gate 	    ldl_need_commit(ul)) &&
6257c478bd9Sstevel@tonic-gate 	    (topid != TOP_GETPAGE)) {
6267c478bd9Sstevel@tonic-gate 		top_issue_sync(ufsvfsp);
6277c478bd9Sstevel@tonic-gate 	}
6287c478bd9Sstevel@tonic-gate 	/*
6297c478bd9Sstevel@tonic-gate 	 * roll something from the log if the logmap is too full
6307c478bd9Sstevel@tonic-gate 	 */
6317c478bd9Sstevel@tonic-gate 	if (logmap_need_roll_async(mtm))
6327c478bd9Sstevel@tonic-gate 		logmap_forceroll_nowait(mtm);
6337c478bd9Sstevel@tonic-gate }
6347c478bd9Sstevel@tonic-gate 
6357c478bd9Sstevel@tonic-gate /*
6367c478bd9Sstevel@tonic-gate  * Called from roll thread;
6377c478bd9Sstevel@tonic-gate  *	buffer set for reading master
6387c478bd9Sstevel@tonic-gate  * Returns
6397c478bd9Sstevel@tonic-gate  *	0 - success, can continue with next buffer
6407c478bd9Sstevel@tonic-gate  *	1 - failure due to logmap deltas being in use
6417c478bd9Sstevel@tonic-gate  */
6427c478bd9Sstevel@tonic-gate int
top_read_roll(rollbuf_t * rbp,ml_unit_t * ul)6437c478bd9Sstevel@tonic-gate top_read_roll(rollbuf_t *rbp, ml_unit_t *ul)
6447c478bd9Sstevel@tonic-gate {
6457c478bd9Sstevel@tonic-gate 	buf_t		*bp	= &rbp->rb_bh;
6467c478bd9Sstevel@tonic-gate 	offset_t	mof	= ldbtob(bp->b_blkno);
6477c478bd9Sstevel@tonic-gate 
6487c478bd9Sstevel@tonic-gate 	/*
6497c478bd9Sstevel@tonic-gate 	 * get a list of deltas
6507c478bd9Sstevel@tonic-gate 	 */
6517c478bd9Sstevel@tonic-gate 	if (logmap_list_get_roll(ul->un_logmap, mof, rbp)) {
6527c478bd9Sstevel@tonic-gate 		/* logmap deltas are in use */
6537c478bd9Sstevel@tonic-gate 		return (1);
6547c478bd9Sstevel@tonic-gate 	}
6557c478bd9Sstevel@tonic-gate 
6567c478bd9Sstevel@tonic-gate 	/*
6577c478bd9Sstevel@tonic-gate 	 * no deltas were found, nothing to roll
6587c478bd9Sstevel@tonic-gate 	 */
6597c478bd9Sstevel@tonic-gate 	if (rbp->rb_age == NULL) {
6607c478bd9Sstevel@tonic-gate 		bp->b_flags |= B_INVAL;
6617c478bd9Sstevel@tonic-gate 		return (0);
6627c478bd9Sstevel@tonic-gate 	}
6637c478bd9Sstevel@tonic-gate 
6647c478bd9Sstevel@tonic-gate 	/*
6657c478bd9Sstevel@tonic-gate 	 * If there is one cached roll buffer that cover all the deltas then
6667c478bd9Sstevel@tonic-gate 	 * we can use that instead of copying to a separate roll buffer.
6677c478bd9Sstevel@tonic-gate 	 */
6687c478bd9Sstevel@tonic-gate 	if (rbp->rb_crb) {
6697c478bd9Sstevel@tonic-gate 		rbp->rb_bh.b_blkno = lbtodb(rbp->rb_crb->c_mof);
6707c478bd9Sstevel@tonic-gate 		return (0);
6717c478bd9Sstevel@tonic-gate 	}
6727c478bd9Sstevel@tonic-gate 
6737c478bd9Sstevel@tonic-gate 	/*
6747c478bd9Sstevel@tonic-gate 	 * Set up the read.
6757c478bd9Sstevel@tonic-gate 	 * If no read is needed logmap_setup_read() returns 0.
6767c478bd9Sstevel@tonic-gate 	 */
6777c478bd9Sstevel@tonic-gate 	if (logmap_setup_read(rbp->rb_age, rbp)) {
6787c478bd9Sstevel@tonic-gate 		/*
6797c478bd9Sstevel@tonic-gate 		 * async read the data from master
6807c478bd9Sstevel@tonic-gate 		 */
6817c478bd9Sstevel@tonic-gate 		logstats.ls_rreads.value.ui64++;
6827c478bd9Sstevel@tonic-gate 		bp->b_bcount = MAPBLOCKSIZE;
6837c478bd9Sstevel@tonic-gate 		(void) bdev_strategy(bp);
6847c478bd9Sstevel@tonic-gate 		lwp_stat_update(LWP_STAT_INBLK, 1);
6857c478bd9Sstevel@tonic-gate 	} else {
6867c478bd9Sstevel@tonic-gate 		sema_v(&bp->b_io); /* mark read as complete */
6877c478bd9Sstevel@tonic-gate 	}
6887c478bd9Sstevel@tonic-gate 	return (0);
6897c478bd9Sstevel@tonic-gate }
6907c478bd9Sstevel@tonic-gate 
6917c478bd9Sstevel@tonic-gate int ufs_crb_enable = 1;
6927c478bd9Sstevel@tonic-gate 
6937c478bd9Sstevel@tonic-gate /*
6947c478bd9Sstevel@tonic-gate  * move deltas from deltamap into the log
6957c478bd9Sstevel@tonic-gate  */
6967c478bd9Sstevel@tonic-gate void
top_log(ufsvfs_t * ufsvfsp,char * va,offset_t vamof,off_t nb,caddr_t buf,uint32_t bufsz)6977c478bd9Sstevel@tonic-gate top_log(ufsvfs_t *ufsvfsp, char *va, offset_t vamof, off_t nb,
6987c478bd9Sstevel@tonic-gate     caddr_t buf, uint32_t bufsz)
6997c478bd9Sstevel@tonic-gate {
7007c478bd9Sstevel@tonic-gate 	ml_unit_t	*ul = ufsvfsp->vfs_log;
7017c478bd9Sstevel@tonic-gate 	mapentry_t	*me;
7027c478bd9Sstevel@tonic-gate 	offset_t	hmof;
7037c478bd9Sstevel@tonic-gate 	uint32_t	hnb, nb1;
7047c478bd9Sstevel@tonic-gate 
7057c478bd9Sstevel@tonic-gate 	/*
7067c478bd9Sstevel@tonic-gate 	 * needed for the roll thread's heuristic
7077c478bd9Sstevel@tonic-gate 	 */
7087c478bd9Sstevel@tonic-gate 	ul->un_logmap->mtm_ref = 1;
7097c478bd9Sstevel@tonic-gate 
7107c478bd9Sstevel@tonic-gate 	if (buf && ufs_crb_enable) {
7117c478bd9Sstevel@tonic-gate 		ASSERT((bufsz & DEV_BMASK) == 0);
7127c478bd9Sstevel@tonic-gate 		/*
7137c478bd9Sstevel@tonic-gate 		 * Move any deltas to the logmap. Split requests that
7147c478bd9Sstevel@tonic-gate 		 * straddle MAPBLOCKSIZE hash boundaries (i.e. summary info).
7157c478bd9Sstevel@tonic-gate 		 */
7167c478bd9Sstevel@tonic-gate 		for (hmof = vamof - (va - buf), nb1 = nb; bufsz;
7177c478bd9Sstevel@tonic-gate 		    bufsz -= hnb, hmof += hnb, buf += hnb, nb1 -= hnb) {
7187c478bd9Sstevel@tonic-gate 			hnb = MAPBLOCKSIZE - (hmof & MAPBLOCKOFF);
7197c478bd9Sstevel@tonic-gate 			if (hnb > bufsz)
7207c478bd9Sstevel@tonic-gate 				hnb = bufsz;
7217c478bd9Sstevel@tonic-gate 			me = deltamap_remove(ul->un_deltamap,
7227c478bd9Sstevel@tonic-gate 			    MAX(hmof, vamof), MIN(hnb, nb1));
7237c478bd9Sstevel@tonic-gate 			if (me) {
7247c478bd9Sstevel@tonic-gate 				logmap_add_buf(ul, va, hmof, me, buf, hnb);
7257c478bd9Sstevel@tonic-gate 			}
7267c478bd9Sstevel@tonic-gate 		}
7277c478bd9Sstevel@tonic-gate 	} else {
7287c478bd9Sstevel@tonic-gate 		/*
7297c478bd9Sstevel@tonic-gate 		 * if there are deltas
7307c478bd9Sstevel@tonic-gate 		 */
7317c478bd9Sstevel@tonic-gate 		me = deltamap_remove(ul->un_deltamap, vamof, nb);
7327c478bd9Sstevel@tonic-gate 		if (me) {
7337c478bd9Sstevel@tonic-gate 			/*
7347c478bd9Sstevel@tonic-gate 			 * move to logmap
7357c478bd9Sstevel@tonic-gate 			 */
7367c478bd9Sstevel@tonic-gate 			logmap_add(ul, va, vamof, me);
7377c478bd9Sstevel@tonic-gate 		}
7387c478bd9Sstevel@tonic-gate 	}
7397c478bd9Sstevel@tonic-gate 
7407c478bd9Sstevel@tonic-gate 	ASSERT((ul->un_matamap == NULL) ||
74180d34432Sfrankho 	    matamap_within(ul->un_matamap, vamof, nb));
7427c478bd9Sstevel@tonic-gate }
7437c478bd9Sstevel@tonic-gate 
7447c478bd9Sstevel@tonic-gate 
7457c478bd9Sstevel@tonic-gate static void
top_threadtrans_destroy(void * tp)7467c478bd9Sstevel@tonic-gate top_threadtrans_destroy(void *tp)
7477c478bd9Sstevel@tonic-gate {
7487c478bd9Sstevel@tonic-gate 	kmem_free(tp, sizeof (threadtrans_t));
7497c478bd9Sstevel@tonic-gate }
7507c478bd9Sstevel@tonic-gate 
7517c478bd9Sstevel@tonic-gate void
_init_top(void)7527c478bd9Sstevel@tonic-gate _init_top(void)
7537c478bd9Sstevel@tonic-gate {
7547c478bd9Sstevel@tonic-gate 	ASSERT(top_init_debug());
7557c478bd9Sstevel@tonic-gate 
7567c478bd9Sstevel@tonic-gate 	/*
7577c478bd9Sstevel@tonic-gate 	 * set up the delta layer
7587c478bd9Sstevel@tonic-gate 	 */
7597c478bd9Sstevel@tonic-gate 	_init_map();
7607c478bd9Sstevel@tonic-gate 
7617c478bd9Sstevel@tonic-gate 	/*
7627c478bd9Sstevel@tonic-gate 	 * Initialise the thread specific data transaction key
7637c478bd9Sstevel@tonic-gate 	 */
7647c478bd9Sstevel@tonic-gate 	tsd_create(&topkey, top_threadtrans_destroy);
7657c478bd9Sstevel@tonic-gate }
766