xref: /illumos-gate/usr/src/uts/common/fs/ufs/lufs_map.c (revision b97d6ca7)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
520a1ae8aSjkennedy  * Common Development and Distribution License (the "License").
620a1ae8aSjkennedy  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
2280d34432Sfrankho  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
24*b97d6ca7SMilan Jurik  * Copyright 2012 Milan Jurik. All rights reserved.
257c478bd9Sstevel@tonic-gate  */
267c478bd9Sstevel@tonic-gate 
277c478bd9Sstevel@tonic-gate #include <sys/systm.h>
287c478bd9Sstevel@tonic-gate #include <sys/types.h>
297c478bd9Sstevel@tonic-gate #include <sys/vnode.h>
307c478bd9Sstevel@tonic-gate #include <sys/errno.h>
317c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
327c478bd9Sstevel@tonic-gate #include <sys/debug.h>
337c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
347c478bd9Sstevel@tonic-gate #include <sys/conf.h>
357c478bd9Sstevel@tonic-gate #include <sys/proc.h>
367c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
377c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_inode.h>
387c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_filio.h>
397c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_log.h>
407c478bd9Sstevel@tonic-gate #include <sys/inttypes.h>
417c478bd9Sstevel@tonic-gate #include <sys/atomic.h>
427c478bd9Sstevel@tonic-gate #include <sys/tuneable.h>
437c478bd9Sstevel@tonic-gate 
447c478bd9Sstevel@tonic-gate /*
457c478bd9Sstevel@tonic-gate  * externs
467c478bd9Sstevel@tonic-gate  */
477c478bd9Sstevel@tonic-gate extern pri_t minclsyspri;
487c478bd9Sstevel@tonic-gate extern struct kmem_cache *lufs_bp;
49*b97d6ca7SMilan Jurik extern int ufs_trans_push_quota(ufsvfs_t *, delta_t, struct dquot *);
507c478bd9Sstevel@tonic-gate 
517c478bd9Sstevel@tonic-gate /*
527c478bd9Sstevel@tonic-gate  * globals
537c478bd9Sstevel@tonic-gate  */
547c478bd9Sstevel@tonic-gate kmem_cache_t *mapentry_cache;
557c478bd9Sstevel@tonic-gate 
567c478bd9Sstevel@tonic-gate /*
577c478bd9Sstevel@tonic-gate  * logmap tuning constants
587c478bd9Sstevel@tonic-gate  */
597c478bd9Sstevel@tonic-gate long	logmap_maxnme_commit	= 2048;
607c478bd9Sstevel@tonic-gate long	logmap_maxnme_async	= 4096;
617c478bd9Sstevel@tonic-gate long	logmap_maxnme_sync	= 6144;
627c478bd9Sstevel@tonic-gate long	logmap_maxcfrag_commit	= 4;	/* Max canceled fragments per moby */
637c478bd9Sstevel@tonic-gate 
647c478bd9Sstevel@tonic-gate 
657c478bd9Sstevel@tonic-gate uint64_t ufs_crb_size = 0;		/* current size of all crb buffers */
667c478bd9Sstevel@tonic-gate uint64_t ufs_crb_max_size = 0;		/* highest crb buffer use so far */
677c478bd9Sstevel@tonic-gate size_t ufs_crb_limit;			/* max allowable size for crbs */
687c478bd9Sstevel@tonic-gate uint64_t ufs_crb_alloc_fails = 0;	/* crb allocation failures stat */
697c478bd9Sstevel@tonic-gate #define	UFS_MAX_CRB_DEFAULT_DIVISOR 10	/* max 1/10 kmem_maxavail() */
707c478bd9Sstevel@tonic-gate int ufs_max_crb_divisor = UFS_MAX_CRB_DEFAULT_DIVISOR; /* tunable */
717c478bd9Sstevel@tonic-gate void handle_dquot(mapentry_t *);
727c478bd9Sstevel@tonic-gate 
737c478bd9Sstevel@tonic-gate /*
747c478bd9Sstevel@tonic-gate  * GENERIC MAP ROUTINES
757c478bd9Sstevel@tonic-gate  */
767c478bd9Sstevel@tonic-gate 
777c478bd9Sstevel@tonic-gate #define	CRB_FREE(crb, me) \
787c478bd9Sstevel@tonic-gate 	kmem_free(crb->c_buf, crb->c_nb); \
797c478bd9Sstevel@tonic-gate 	atomic_add_64(&ufs_crb_size, -(uint64_t)crb->c_nb); \
807c478bd9Sstevel@tonic-gate 	kmem_free(crb, sizeof (crb_t)); \
817c478bd9Sstevel@tonic-gate 	(me)->me_crb = NULL;
827c478bd9Sstevel@tonic-gate 
837c478bd9Sstevel@tonic-gate #define	CRB_RELE(me) { \
847c478bd9Sstevel@tonic-gate 	crb_t *crb = (me)->me_crb; \
857c478bd9Sstevel@tonic-gate 	if (crb && (--crb->c_refcnt == 0)) { \
867c478bd9Sstevel@tonic-gate 		CRB_FREE(crb, me) \
877c478bd9Sstevel@tonic-gate 	} \
887c478bd9Sstevel@tonic-gate }
897c478bd9Sstevel@tonic-gate 
907c478bd9Sstevel@tonic-gate /*
917c478bd9Sstevel@tonic-gate  * Check that the old delta has an argument and a push function of
927c478bd9Sstevel@tonic-gate  * ufs_trans_push_quota(), then check that the old and new deltas differ.
937c478bd9Sstevel@tonic-gate  * If so we clean up with handle_dquot() before replacing the old delta.
947c478bd9Sstevel@tonic-gate  */
957c478bd9Sstevel@tonic-gate #define	HANDLE_DQUOT(me, melist) { \
967c478bd9Sstevel@tonic-gate 	if ((me->me_arg) && \
977c478bd9Sstevel@tonic-gate 	    (me->me_func == ufs_trans_push_quota)) { \
987c478bd9Sstevel@tonic-gate 		if (!((me->me_dt == melist->me_dt) && \
997c478bd9Sstevel@tonic-gate 		    (me->me_arg == melist->me_arg) && \
1007c478bd9Sstevel@tonic-gate 		    (me->me_func == melist->me_func))) { \
1017c478bd9Sstevel@tonic-gate 			handle_dquot(me); \
1027c478bd9Sstevel@tonic-gate 		} \
1037c478bd9Sstevel@tonic-gate 	} \
1047c478bd9Sstevel@tonic-gate }
1057c478bd9Sstevel@tonic-gate 
1067c478bd9Sstevel@tonic-gate /*
1077c478bd9Sstevel@tonic-gate  * free up all the mapentries for a map
1087c478bd9Sstevel@tonic-gate  */
1097c478bd9Sstevel@tonic-gate void
map_free_entries(mt_map_t * mtm)1107c478bd9Sstevel@tonic-gate map_free_entries(mt_map_t *mtm)
1117c478bd9Sstevel@tonic-gate {
1127c478bd9Sstevel@tonic-gate 	int		i;
1137c478bd9Sstevel@tonic-gate 	mapentry_t	*me;
1147c478bd9Sstevel@tonic-gate 
1157c478bd9Sstevel@tonic-gate 	while ((me = mtm->mtm_next) != (mapentry_t *)mtm) {
1167c478bd9Sstevel@tonic-gate 		me->me_next->me_prev = me->me_prev;
1177c478bd9Sstevel@tonic-gate 		me->me_prev->me_next = me->me_next;
1187c478bd9Sstevel@tonic-gate 		CRB_RELE(me);
1197c478bd9Sstevel@tonic-gate 		kmem_cache_free(mapentry_cache, me);
1207c478bd9Sstevel@tonic-gate 	}
1217c478bd9Sstevel@tonic-gate 	for (i = 0; i < mtm->mtm_nhash; i++)
1227c478bd9Sstevel@tonic-gate 		mtm->mtm_hash[i] = NULL;
1237c478bd9Sstevel@tonic-gate 	mtm->mtm_nme = 0;
1247c478bd9Sstevel@tonic-gate 	mtm->mtm_nmet = 0;
1257c478bd9Sstevel@tonic-gate }
1267c478bd9Sstevel@tonic-gate 
1277c478bd9Sstevel@tonic-gate /*
1287c478bd9Sstevel@tonic-gate  * done with map; free if necessary
1297c478bd9Sstevel@tonic-gate  */
1307c478bd9Sstevel@tonic-gate mt_map_t *
map_put(mt_map_t * mtm)1317c478bd9Sstevel@tonic-gate map_put(mt_map_t *mtm)
1327c478bd9Sstevel@tonic-gate {
1337c478bd9Sstevel@tonic-gate 	/*
1347c478bd9Sstevel@tonic-gate 	 * free up the map's memory
1357c478bd9Sstevel@tonic-gate 	 */
1367c478bd9Sstevel@tonic-gate 	map_free_entries(mtm);
1377c478bd9Sstevel@tonic-gate 	ASSERT(map_put_debug(mtm));
1387c478bd9Sstevel@tonic-gate 	kmem_free(mtm->mtm_hash,
13980d34432Sfrankho 	    (size_t) (sizeof (mapentry_t *) * mtm->mtm_nhash));
1407c478bd9Sstevel@tonic-gate 	mutex_destroy(&mtm->mtm_mutex);
1417c478bd9Sstevel@tonic-gate 	mutex_destroy(&mtm->mtm_scan_mutex);
1427c478bd9Sstevel@tonic-gate 	cv_destroy(&mtm->mtm_to_roll_cv);
1437c478bd9Sstevel@tonic-gate 	cv_destroy(&mtm->mtm_from_roll_cv);
1447c478bd9Sstevel@tonic-gate 	rw_destroy(&mtm->mtm_rwlock);
1457c478bd9Sstevel@tonic-gate 	mutex_destroy(&mtm->mtm_lock);
1467c478bd9Sstevel@tonic-gate 	cv_destroy(&mtm->mtm_cv_commit);
1477c478bd9Sstevel@tonic-gate 	cv_destroy(&mtm->mtm_cv_next);
1487c478bd9Sstevel@tonic-gate 	cv_destroy(&mtm->mtm_cv_eot);
1497c478bd9Sstevel@tonic-gate 	cv_destroy(&mtm->mtm_cv);
1507c478bd9Sstevel@tonic-gate 	kmem_free(mtm, sizeof (mt_map_t));
1517c478bd9Sstevel@tonic-gate 	return (NULL);
1527c478bd9Sstevel@tonic-gate }
1537c478bd9Sstevel@tonic-gate /*
1547c478bd9Sstevel@tonic-gate  * Allocate a map;
1557c478bd9Sstevel@tonic-gate  */
1567c478bd9Sstevel@tonic-gate mt_map_t *
map_get(ml_unit_t * ul,enum maptypes maptype,int nh)1577c478bd9Sstevel@tonic-gate map_get(ml_unit_t *ul, enum maptypes maptype, int nh)
1587c478bd9Sstevel@tonic-gate {
1597c478bd9Sstevel@tonic-gate 	mt_map_t	*mtm;
1607c478bd9Sstevel@tonic-gate 
1617c478bd9Sstevel@tonic-gate 	/*
1627c478bd9Sstevel@tonic-gate 	 * assume the map is not here and allocate the necessary structs
1637c478bd9Sstevel@tonic-gate 	 */
1647c478bd9Sstevel@tonic-gate 	mtm = kmem_zalloc(sizeof (mt_map_t), KM_SLEEP);
1657c478bd9Sstevel@tonic-gate 	mutex_init(&mtm->mtm_mutex, NULL, MUTEX_DEFAULT, NULL);
1667c478bd9Sstevel@tonic-gate 	mutex_init(&mtm->mtm_scan_mutex, NULL, MUTEX_DEFAULT, NULL);
1677c478bd9Sstevel@tonic-gate 	cv_init(&mtm->mtm_to_roll_cv, NULL, CV_DEFAULT, NULL);
1687c478bd9Sstevel@tonic-gate 	cv_init(&mtm->mtm_from_roll_cv, NULL, CV_DEFAULT, NULL);
1697c478bd9Sstevel@tonic-gate 	rw_init(&mtm->mtm_rwlock, NULL, RW_DEFAULT, NULL);
1707c478bd9Sstevel@tonic-gate 	mtm->mtm_next = (mapentry_t *)mtm;
1717c478bd9Sstevel@tonic-gate 	mtm->mtm_prev = (mapentry_t *)mtm;
1727c478bd9Sstevel@tonic-gate 	mtm->mtm_hash = kmem_zalloc((size_t) (sizeof (mapentry_t *) * nh),
1737c478bd9Sstevel@tonic-gate 	    KM_SLEEP);
1747c478bd9Sstevel@tonic-gate 	mtm->mtm_nhash = nh;
1757c478bd9Sstevel@tonic-gate 	mtm->mtm_debug = ul->un_debug;
1767c478bd9Sstevel@tonic-gate 	mtm->mtm_type = maptype;
1777c478bd9Sstevel@tonic-gate 
1787c478bd9Sstevel@tonic-gate 	mtm->mtm_cfrags = 0;
1797c478bd9Sstevel@tonic-gate 	mtm->mtm_cfragmax = logmap_maxcfrag_commit;
1807c478bd9Sstevel@tonic-gate 
1817c478bd9Sstevel@tonic-gate 	/*
1827c478bd9Sstevel@tonic-gate 	 * for scan test
1837c478bd9Sstevel@tonic-gate 	 */
1847c478bd9Sstevel@tonic-gate 	mtm->mtm_ul = ul;
1857c478bd9Sstevel@tonic-gate 
1867c478bd9Sstevel@tonic-gate 	/*
1877c478bd9Sstevel@tonic-gate 	 * Initialize locks
1887c478bd9Sstevel@tonic-gate 	 */
1897c478bd9Sstevel@tonic-gate 	mutex_init(&mtm->mtm_lock, NULL, MUTEX_DEFAULT, NULL);
1907c478bd9Sstevel@tonic-gate 	cv_init(&mtm->mtm_cv_commit, NULL, CV_DEFAULT, NULL);
1917c478bd9Sstevel@tonic-gate 	cv_init(&mtm->mtm_cv_next, NULL, CV_DEFAULT, NULL);
1927c478bd9Sstevel@tonic-gate 	cv_init(&mtm->mtm_cv_eot, NULL, CV_DEFAULT, NULL);
1937c478bd9Sstevel@tonic-gate 	cv_init(&mtm->mtm_cv, NULL, CV_DEFAULT, NULL);
1947c478bd9Sstevel@tonic-gate 	ASSERT(map_get_debug(ul, mtm));
1957c478bd9Sstevel@tonic-gate 
1967c478bd9Sstevel@tonic-gate 	return (mtm);
1977c478bd9Sstevel@tonic-gate }
1987c478bd9Sstevel@tonic-gate 
1997c478bd9Sstevel@tonic-gate /*
2007c478bd9Sstevel@tonic-gate  * DELTAMAP ROUTINES
2017c478bd9Sstevel@tonic-gate  */
2027c478bd9Sstevel@tonic-gate /*
2037c478bd9Sstevel@tonic-gate  * deltamap tuning constants
2047c478bd9Sstevel@tonic-gate  */
2057c478bd9Sstevel@tonic-gate long	deltamap_maxnme	= 1024;	/* global so it can be set */
2067c478bd9Sstevel@tonic-gate 
2077c478bd9Sstevel@tonic-gate int
deltamap_need_commit(mt_map_t * mtm)2087c478bd9Sstevel@tonic-gate deltamap_need_commit(mt_map_t *mtm)
2097c478bd9Sstevel@tonic-gate {
2107c478bd9Sstevel@tonic-gate 	return (mtm->mtm_nme > deltamap_maxnme);
2117c478bd9Sstevel@tonic-gate }
2127c478bd9Sstevel@tonic-gate 
2137c478bd9Sstevel@tonic-gate /*
2147c478bd9Sstevel@tonic-gate  * put a delta into a deltamap; may sleep on memory
2157c478bd9Sstevel@tonic-gate  */
2167c478bd9Sstevel@tonic-gate void
deltamap_add(mt_map_t * mtm,offset_t mof,off_t nb,delta_t dtyp,int (* func)(),ulong_t arg,threadtrans_t * tp)2177c478bd9Sstevel@tonic-gate deltamap_add(
2187c478bd9Sstevel@tonic-gate 	mt_map_t *mtm,
2197c478bd9Sstevel@tonic-gate 	offset_t mof,
2207c478bd9Sstevel@tonic-gate 	off_t nb,
2217c478bd9Sstevel@tonic-gate 	delta_t dtyp,
2227c478bd9Sstevel@tonic-gate 	int (*func)(),
2237c478bd9Sstevel@tonic-gate 	ulong_t arg,
2247c478bd9Sstevel@tonic-gate 	threadtrans_t *tp)
2257c478bd9Sstevel@tonic-gate {
2267c478bd9Sstevel@tonic-gate 	int32_t		hnb;
2277c478bd9Sstevel@tonic-gate 	mapentry_t	*me;
2287c478bd9Sstevel@tonic-gate 	mapentry_t	**mep;
2297c478bd9Sstevel@tonic-gate 
2307c478bd9Sstevel@tonic-gate 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
23180d34432Sfrankho 	    map_check_linkage(mtm));
2327c478bd9Sstevel@tonic-gate 
2337c478bd9Sstevel@tonic-gate 	mutex_enter(&mtm->mtm_mutex);
2347c478bd9Sstevel@tonic-gate 
2357c478bd9Sstevel@tonic-gate 	for (hnb = 0; nb; nb -= hnb, mof += hnb) {
2367c478bd9Sstevel@tonic-gate 		hnb = MAPBLOCKSIZE - (mof & MAPBLOCKOFF);
2377c478bd9Sstevel@tonic-gate 		if (hnb > nb)
2387c478bd9Sstevel@tonic-gate 			hnb = nb;
2397c478bd9Sstevel@tonic-gate 		/*
2407c478bd9Sstevel@tonic-gate 		 * Search for dup entry. We need to ensure that we don't
2417c478bd9Sstevel@tonic-gate 		 * replace a map entry which carries quota information
2427c478bd9Sstevel@tonic-gate 		 * with a map entry which doesn't. In that case we lose
2437c478bd9Sstevel@tonic-gate 		 * reference the the dquot structure which will not be
2447c478bd9Sstevel@tonic-gate 		 * cleaned up by the push function me->me_func as this will
2457c478bd9Sstevel@tonic-gate 		 * never be called.
2467c478bd9Sstevel@tonic-gate 		 * The stray dquot would be found later by invalidatedq()
2477c478bd9Sstevel@tonic-gate 		 * causing a panic when the filesystem is unmounted.
2487c478bd9Sstevel@tonic-gate 		 */
2497c478bd9Sstevel@tonic-gate 		mep = MAP_HASH(mof, mtm);
2507c478bd9Sstevel@tonic-gate 		for (me = *mep; me; me = me->me_hash) {
2517c478bd9Sstevel@tonic-gate 			if (DATAwithinME(mof, hnb, me)) {
2527c478bd9Sstevel@tonic-gate 				/*
2537c478bd9Sstevel@tonic-gate 				 * Don't remove quota entries which have
2547c478bd9Sstevel@tonic-gate 				 * incremented the ref count (those with a
2557c478bd9Sstevel@tonic-gate 				 * ufs_trans_push_quota push function).
2567c478bd9Sstevel@tonic-gate 				 * Let logmap_add[_buf] clean them up.
2577c478bd9Sstevel@tonic-gate 				 */
25880d34432Sfrankho 				if (me->me_func == ufs_trans_push_quota) {
25980d34432Sfrankho 					continue;
26080d34432Sfrankho 				}
26180d34432Sfrankho 				break;
2627c478bd9Sstevel@tonic-gate 			}
2637c478bd9Sstevel@tonic-gate 			ASSERT((dtyp == DT_CANCEL) ||
26480d34432Sfrankho 			    (!DATAoverlapME(mof, hnb, me)) ||
26580d34432Sfrankho 			    MEwithinDATA(me, mof, hnb));
2667c478bd9Sstevel@tonic-gate 		}
2677c478bd9Sstevel@tonic-gate 
2687c478bd9Sstevel@tonic-gate 		if (me) {
2697c478bd9Sstevel@tonic-gate 			/* already in map */
2707c478bd9Sstevel@tonic-gate 			continue;
2717c478bd9Sstevel@tonic-gate 		}
2727c478bd9Sstevel@tonic-gate 
2737c478bd9Sstevel@tonic-gate 		/*
2747c478bd9Sstevel@tonic-gate 		 * Add up all the delta map deltas so we can compute
2757c478bd9Sstevel@tonic-gate 		 * an upper bound on the log size used.
2767c478bd9Sstevel@tonic-gate 		 * Note, some deltas get removed from the deltamap
2777c478bd9Sstevel@tonic-gate 		 * before the deltamap_push by lufs_write_strategy
2787c478bd9Sstevel@tonic-gate 		 * and so multiple deltas to the same mof offset
2797c478bd9Sstevel@tonic-gate 		 * don't get cancelled here but in the logmap.
2807c478bd9Sstevel@tonic-gate 		 * Thus we can't easily get a accurate count of
2817c478bd9Sstevel@tonic-gate 		 * the log space used - only an upper bound.
2827c478bd9Sstevel@tonic-gate 		 */
2837c478bd9Sstevel@tonic-gate 		if (tp && (mtm->mtm_ul->un_deltamap == mtm)) {
2847c478bd9Sstevel@tonic-gate 			ASSERT(dtyp != DT_CANCEL);
2857c478bd9Sstevel@tonic-gate 			if (dtyp == DT_ABZERO) {
2867c478bd9Sstevel@tonic-gate 				tp->deltas_size += sizeof (struct delta);
2877c478bd9Sstevel@tonic-gate 			} else {
2887c478bd9Sstevel@tonic-gate 				tp->deltas_size +=
2897c478bd9Sstevel@tonic-gate 				    (hnb + sizeof (struct delta));
2907c478bd9Sstevel@tonic-gate 			}
2917c478bd9Sstevel@tonic-gate 		}
2927c478bd9Sstevel@tonic-gate 
2937c478bd9Sstevel@tonic-gate 		delta_stats[dtyp]++;
2947c478bd9Sstevel@tonic-gate 
2957c478bd9Sstevel@tonic-gate 		/*
2967c478bd9Sstevel@tonic-gate 		 * get a mapentry
2977c478bd9Sstevel@tonic-gate 		 * May need to drop & re-grab the mtm_mutex
2987c478bd9Sstevel@tonic-gate 		 * and then recheck for a duplicate
2997c478bd9Sstevel@tonic-gate 		 */
3007c478bd9Sstevel@tonic-gate 		me = kmem_cache_alloc(mapentry_cache, KM_NOSLEEP);
3017c478bd9Sstevel@tonic-gate 		if (me == NULL) {
3027c478bd9Sstevel@tonic-gate 			mutex_exit(&mtm->mtm_mutex);
3037c478bd9Sstevel@tonic-gate 			me = kmem_cache_alloc(mapentry_cache, KM_SLEEP);
3047c478bd9Sstevel@tonic-gate 			mutex_enter(&mtm->mtm_mutex);
3057c478bd9Sstevel@tonic-gate 		}
3067c478bd9Sstevel@tonic-gate 		bzero(me, sizeof (mapentry_t));
3077c478bd9Sstevel@tonic-gate 
3087c478bd9Sstevel@tonic-gate 		/*
3097c478bd9Sstevel@tonic-gate 		 * initialize and put in deltamap
3107c478bd9Sstevel@tonic-gate 		 */
3117c478bd9Sstevel@tonic-gate 		me->me_mof = mof;
3127c478bd9Sstevel@tonic-gate 		me->me_nb = hnb;
3137c478bd9Sstevel@tonic-gate 		me->me_func = func;
3147c478bd9Sstevel@tonic-gate 		me->me_arg = arg;
3157c478bd9Sstevel@tonic-gate 		me->me_dt = dtyp;
3167c478bd9Sstevel@tonic-gate 		me->me_flags = ME_HASH;
3177c478bd9Sstevel@tonic-gate 		me->me_tid = mtm->mtm_tid;
3187c478bd9Sstevel@tonic-gate 
3197c478bd9Sstevel@tonic-gate 		me->me_hash = *mep;
3207c478bd9Sstevel@tonic-gate 		*mep = me;
3217c478bd9Sstevel@tonic-gate 		me->me_next = (mapentry_t *)mtm;
3227c478bd9Sstevel@tonic-gate 		me->me_prev = mtm->mtm_prev;
3237c478bd9Sstevel@tonic-gate 		mtm->mtm_prev->me_next = me;
3247c478bd9Sstevel@tonic-gate 		mtm->mtm_prev = me;
3257c478bd9Sstevel@tonic-gate 		mtm->mtm_nme++;
3267c478bd9Sstevel@tonic-gate 	}
3277c478bd9Sstevel@tonic-gate 	mutex_exit(&mtm->mtm_mutex);
3287c478bd9Sstevel@tonic-gate 
3297c478bd9Sstevel@tonic-gate 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
33080d34432Sfrankho 	    map_check_linkage(mtm));
3317c478bd9Sstevel@tonic-gate }
3327c478bd9Sstevel@tonic-gate 
3337c478bd9Sstevel@tonic-gate /*
3347c478bd9Sstevel@tonic-gate  * remove deltas within (mof, nb) and return as linked list
3357c478bd9Sstevel@tonic-gate  */
3367c478bd9Sstevel@tonic-gate mapentry_t *
deltamap_remove(mt_map_t * mtm,offset_t mof,off_t nb)3377c478bd9Sstevel@tonic-gate deltamap_remove(mt_map_t *mtm, offset_t mof, off_t nb)
3387c478bd9Sstevel@tonic-gate {
3397c478bd9Sstevel@tonic-gate 	off_t		hnb;
3407c478bd9Sstevel@tonic-gate 	mapentry_t	*me;
3417c478bd9Sstevel@tonic-gate 	mapentry_t	**mep;
3427c478bd9Sstevel@tonic-gate 	mapentry_t	*mer;
3437c478bd9Sstevel@tonic-gate 
3447c478bd9Sstevel@tonic-gate 	if (mtm == NULL)
3457c478bd9Sstevel@tonic-gate 		return (NULL);
3467c478bd9Sstevel@tonic-gate 
3477c478bd9Sstevel@tonic-gate 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
34880d34432Sfrankho 	    map_check_linkage(mtm));
3497c478bd9Sstevel@tonic-gate 
3507c478bd9Sstevel@tonic-gate 	mutex_enter(&mtm->mtm_mutex);
3517c478bd9Sstevel@tonic-gate 	for (mer = NULL, hnb = 0; nb; nb -= hnb, mof += hnb) {
3527c478bd9Sstevel@tonic-gate 		hnb = MAPBLOCKSIZE - (mof & MAPBLOCKOFF);
3537c478bd9Sstevel@tonic-gate 		if (hnb > nb)
3547c478bd9Sstevel@tonic-gate 			hnb = nb;
3557c478bd9Sstevel@tonic-gate 		/*
3567c478bd9Sstevel@tonic-gate 		 * remove entries from hash and return as a aged linked list
3577c478bd9Sstevel@tonic-gate 		 */
3587c478bd9Sstevel@tonic-gate 		mep = MAP_HASH(mof, mtm);
3597c478bd9Sstevel@tonic-gate 		while ((me = *mep) != 0) {
3607c478bd9Sstevel@tonic-gate 			if (MEwithinDATA(me, mof, hnb)) {
3617c478bd9Sstevel@tonic-gate 				*mep = me->me_hash;
3627c478bd9Sstevel@tonic-gate 				me->me_next->me_prev = me->me_prev;
3637c478bd9Sstevel@tonic-gate 				me->me_prev->me_next = me->me_next;
3647c478bd9Sstevel@tonic-gate 				me->me_hash = mer;
3657c478bd9Sstevel@tonic-gate 				mer = me;
3667c478bd9Sstevel@tonic-gate 				me->me_flags |= ME_LIST;
3677c478bd9Sstevel@tonic-gate 				me->me_flags &= ~ME_HASH;
3687c478bd9Sstevel@tonic-gate 				mtm->mtm_nme--;
3697c478bd9Sstevel@tonic-gate 			} else
3707c478bd9Sstevel@tonic-gate 				mep = &me->me_hash;
3717c478bd9Sstevel@tonic-gate 		}
3727c478bd9Sstevel@tonic-gate 	}
3737c478bd9Sstevel@tonic-gate 	mutex_exit(&mtm->mtm_mutex);
3747c478bd9Sstevel@tonic-gate 
3757c478bd9Sstevel@tonic-gate 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
37680d34432Sfrankho 	    map_check_linkage(mtm));
3777c478bd9Sstevel@tonic-gate 
3787c478bd9Sstevel@tonic-gate 	return (mer);
3797c478bd9Sstevel@tonic-gate }
3807c478bd9Sstevel@tonic-gate 
3817c478bd9Sstevel@tonic-gate /*
3827c478bd9Sstevel@tonic-gate  * delete entries within (mof, nb)
3837c478bd9Sstevel@tonic-gate  */
3847c478bd9Sstevel@tonic-gate void
deltamap_del(mt_map_t * mtm,offset_t mof,off_t nb)3857c478bd9Sstevel@tonic-gate deltamap_del(mt_map_t *mtm, offset_t mof, off_t nb)
3867c478bd9Sstevel@tonic-gate {
3877c478bd9Sstevel@tonic-gate 	mapentry_t	*me;
3887c478bd9Sstevel@tonic-gate 	mapentry_t	*menext;
3897c478bd9Sstevel@tonic-gate 
3907c478bd9Sstevel@tonic-gate 	menext = deltamap_remove(mtm, mof, nb);
3917c478bd9Sstevel@tonic-gate 	while ((me = menext) != 0) {
3927c478bd9Sstevel@tonic-gate 		menext = me->me_hash;
3937c478bd9Sstevel@tonic-gate 		kmem_cache_free(mapentry_cache, me);
3947c478bd9Sstevel@tonic-gate 	}
3957c478bd9Sstevel@tonic-gate }
3967c478bd9Sstevel@tonic-gate 
3977c478bd9Sstevel@tonic-gate /*
3987c478bd9Sstevel@tonic-gate  * Call the indicated function to cause deltas to move to the logmap.
3997c478bd9Sstevel@tonic-gate  * top_end_sync() is the only caller of this function and
4007c478bd9Sstevel@tonic-gate  * it has waited for the completion of all threads, so there can
4017c478bd9Sstevel@tonic-gate  * be no other activity in the deltamap. Therefore we don't need to
4027c478bd9Sstevel@tonic-gate  * hold the deltamap lock.
4037c478bd9Sstevel@tonic-gate  */
4047c478bd9Sstevel@tonic-gate void
deltamap_push(ml_unit_t * ul)4057c478bd9Sstevel@tonic-gate deltamap_push(ml_unit_t *ul)
4067c478bd9Sstevel@tonic-gate {
4077c478bd9Sstevel@tonic-gate 	delta_t		dtyp;
4087c478bd9Sstevel@tonic-gate 	int		(*func)();
4097c478bd9Sstevel@tonic-gate 	ulong_t		arg;
4107c478bd9Sstevel@tonic-gate 	mapentry_t	*me;
4117c478bd9Sstevel@tonic-gate 	offset_t	mof;
4127c478bd9Sstevel@tonic-gate 	off_t		nb;
4137c478bd9Sstevel@tonic-gate 	mt_map_t	*mtm	= ul->un_deltamap;
4147c478bd9Sstevel@tonic-gate 
4157c478bd9Sstevel@tonic-gate 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
41680d34432Sfrankho 	    map_check_linkage(mtm));
4177c478bd9Sstevel@tonic-gate 
4187c478bd9Sstevel@tonic-gate 	/*
4197c478bd9Sstevel@tonic-gate 	 * for every entry in the deltamap
4207c478bd9Sstevel@tonic-gate 	 */
4217c478bd9Sstevel@tonic-gate 	while ((me = mtm->mtm_next) != (mapentry_t *)mtm) {
4227c478bd9Sstevel@tonic-gate 		ASSERT(me->me_func);
4237c478bd9Sstevel@tonic-gate 		func = me->me_func;
4247c478bd9Sstevel@tonic-gate 		dtyp = me->me_dt;
4257c478bd9Sstevel@tonic-gate 		arg = me->me_arg;
4267c478bd9Sstevel@tonic-gate 		mof = me->me_mof;
4277c478bd9Sstevel@tonic-gate 		nb = me->me_nb;
4287c478bd9Sstevel@tonic-gate 		if ((ul->un_flags & LDL_ERROR) ||
4297c478bd9Sstevel@tonic-gate 		    (*func)(ul->un_ufsvfs, dtyp, arg))
4307c478bd9Sstevel@tonic-gate 			deltamap_del(mtm, mof, nb);
4317c478bd9Sstevel@tonic-gate 	}
4327c478bd9Sstevel@tonic-gate 
4337c478bd9Sstevel@tonic-gate 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
43480d34432Sfrankho 	    map_check_linkage(mtm));
4357c478bd9Sstevel@tonic-gate }
4367c478bd9Sstevel@tonic-gate 
4377c478bd9Sstevel@tonic-gate /*
4387c478bd9Sstevel@tonic-gate  * LOGMAP ROUTINES
4397c478bd9Sstevel@tonic-gate  */
4407c478bd9Sstevel@tonic-gate 
4417c478bd9Sstevel@tonic-gate int
logmap_need_commit(mt_map_t * mtm)4427c478bd9Sstevel@tonic-gate logmap_need_commit(mt_map_t *mtm)
4437c478bd9Sstevel@tonic-gate {
4447c478bd9Sstevel@tonic-gate 	return ((mtm->mtm_nmet > logmap_maxnme_commit) ||
44580d34432Sfrankho 	    (mtm->mtm_cfrags >= mtm->mtm_cfragmax));
4467c478bd9Sstevel@tonic-gate }
4477c478bd9Sstevel@tonic-gate 
4487c478bd9Sstevel@tonic-gate int
logmap_need_roll_async(mt_map_t * mtm)4497c478bd9Sstevel@tonic-gate logmap_need_roll_async(mt_map_t *mtm)
4507c478bd9Sstevel@tonic-gate {
4517c478bd9Sstevel@tonic-gate 	return (mtm->mtm_nme > logmap_maxnme_async);
4527c478bd9Sstevel@tonic-gate }
4537c478bd9Sstevel@tonic-gate 
4547c478bd9Sstevel@tonic-gate int
logmap_need_roll_sync(mt_map_t * mtm)4557c478bd9Sstevel@tonic-gate logmap_need_roll_sync(mt_map_t *mtm)
4567c478bd9Sstevel@tonic-gate {
4577c478bd9Sstevel@tonic-gate 	return (mtm->mtm_nme > logmap_maxnme_sync);
4587c478bd9Sstevel@tonic-gate }
4597c478bd9Sstevel@tonic-gate 
4607c478bd9Sstevel@tonic-gate void
logmap_start_roll(ml_unit_t * ul)4617c478bd9Sstevel@tonic-gate logmap_start_roll(ml_unit_t *ul)
4627c478bd9Sstevel@tonic-gate {
4637c478bd9Sstevel@tonic-gate 	mt_map_t	*logmap	= ul->un_logmap;
4647c478bd9Sstevel@tonic-gate 
4657c478bd9Sstevel@tonic-gate 	logmap_settail(logmap, ul);
4667c478bd9Sstevel@tonic-gate 	ASSERT(!(ul->un_flags & LDL_NOROLL));
4677c478bd9Sstevel@tonic-gate 	mutex_enter(&logmap->mtm_mutex);
4687c478bd9Sstevel@tonic-gate 	if ((logmap->mtm_flags & MTM_ROLL_RUNNING) == 0) {
4697c478bd9Sstevel@tonic-gate 		logmap->mtm_flags |= MTM_ROLL_RUNNING;
4707c478bd9Sstevel@tonic-gate 		logmap->mtm_flags &= ~(MTM_FORCE_ROLL | MTM_ROLL_EXIT);
4717c478bd9Sstevel@tonic-gate 		(void) thread_create(NULL, 0, trans_roll, ul, 0, &p0,
4727c478bd9Sstevel@tonic-gate 		    TS_RUN, minclsyspri);
4737c478bd9Sstevel@tonic-gate 	}
4747c478bd9Sstevel@tonic-gate 	mutex_exit(&logmap->mtm_mutex);
4757c478bd9Sstevel@tonic-gate }
4767c478bd9Sstevel@tonic-gate 
4777c478bd9Sstevel@tonic-gate void
logmap_kill_roll(ml_unit_t * ul)4787c478bd9Sstevel@tonic-gate logmap_kill_roll(ml_unit_t *ul)
4797c478bd9Sstevel@tonic-gate {
4807c478bd9Sstevel@tonic-gate 	mt_map_t	*mtm	= ul->un_logmap;
4817c478bd9Sstevel@tonic-gate 
4827c478bd9Sstevel@tonic-gate 	if (mtm == NULL)
4837c478bd9Sstevel@tonic-gate 		return;
4847c478bd9Sstevel@tonic-gate 
4857c478bd9Sstevel@tonic-gate 	mutex_enter(&mtm->mtm_mutex);
4867c478bd9Sstevel@tonic-gate 
4877c478bd9Sstevel@tonic-gate 	while (mtm->mtm_flags & MTM_ROLL_RUNNING) {
4887c478bd9Sstevel@tonic-gate 		mtm->mtm_flags |= MTM_ROLL_EXIT;
4897c478bd9Sstevel@tonic-gate 		cv_signal(&mtm->mtm_to_roll_cv);
4907c478bd9Sstevel@tonic-gate 		cv_wait(&mtm->mtm_from_roll_cv, &mtm->mtm_mutex);
4917c478bd9Sstevel@tonic-gate 	}
4927c478bd9Sstevel@tonic-gate 	mutex_exit(&mtm->mtm_mutex);
4937c478bd9Sstevel@tonic-gate }
4947c478bd9Sstevel@tonic-gate 
4957c478bd9Sstevel@tonic-gate /*
4967c478bd9Sstevel@tonic-gate  * kick the roll thread if it's not doing anything
4977c478bd9Sstevel@tonic-gate  */
4987c478bd9Sstevel@tonic-gate void
logmap_forceroll_nowait(mt_map_t * logmap)4997c478bd9Sstevel@tonic-gate logmap_forceroll_nowait(mt_map_t *logmap)
5007c478bd9Sstevel@tonic-gate {
5017c478bd9Sstevel@tonic-gate 	/*
5027c478bd9Sstevel@tonic-gate 	 * Don't need to lock mtm_mutex to read mtm_flags here as we
5037c478bd9Sstevel@tonic-gate 	 * don't care in the rare case when we get a transitional value
5047c478bd9Sstevel@tonic-gate 	 * of mtm_flags. Just by signalling the thread it will wakeup
5057c478bd9Sstevel@tonic-gate 	 * and notice it has too many logmap entries.
5067c478bd9Sstevel@tonic-gate 	 */
5077c478bd9Sstevel@tonic-gate 	ASSERT(!(logmap->mtm_ul->un_flags & LDL_NOROLL));
5087c478bd9Sstevel@tonic-gate 	if ((logmap->mtm_flags & MTM_ROLLING) == 0) {
5097c478bd9Sstevel@tonic-gate 		cv_signal(&logmap->mtm_to_roll_cv);
5107c478bd9Sstevel@tonic-gate 	}
5117c478bd9Sstevel@tonic-gate }
5127c478bd9Sstevel@tonic-gate 
5137c478bd9Sstevel@tonic-gate /*
5147c478bd9Sstevel@tonic-gate  * kick the roll thread and wait for it to finish a cycle
5157c478bd9Sstevel@tonic-gate  */
5167c478bd9Sstevel@tonic-gate void
logmap_forceroll(mt_map_t * mtm)5177c478bd9Sstevel@tonic-gate logmap_forceroll(mt_map_t *mtm)
5187c478bd9Sstevel@tonic-gate {
5197c478bd9Sstevel@tonic-gate 	mutex_enter(&mtm->mtm_mutex);
5207c478bd9Sstevel@tonic-gate 	if ((mtm->mtm_flags & MTM_FORCE_ROLL) == 0) {
5217c478bd9Sstevel@tonic-gate 		mtm->mtm_flags |= MTM_FORCE_ROLL;
5227c478bd9Sstevel@tonic-gate 		cv_signal(&mtm->mtm_to_roll_cv);
5237c478bd9Sstevel@tonic-gate 	}
5247c478bd9Sstevel@tonic-gate 	do {
5257c478bd9Sstevel@tonic-gate 		if ((mtm->mtm_flags & MTM_ROLL_RUNNING) == 0) {
5267c478bd9Sstevel@tonic-gate 			mtm->mtm_flags &= ~MTM_FORCE_ROLL;
5277c478bd9Sstevel@tonic-gate 			goto out;
5287c478bd9Sstevel@tonic-gate 		}
5297c478bd9Sstevel@tonic-gate 		cv_wait(&mtm->mtm_from_roll_cv, &mtm->mtm_mutex);
5307c478bd9Sstevel@tonic-gate 	} while (mtm->mtm_flags & MTM_FORCE_ROLL);
5317c478bd9Sstevel@tonic-gate out:
5327c478bd9Sstevel@tonic-gate 	mutex_exit(&mtm->mtm_mutex);
5337c478bd9Sstevel@tonic-gate }
5347c478bd9Sstevel@tonic-gate 
5357c478bd9Sstevel@tonic-gate /*
5367c478bd9Sstevel@tonic-gate  * remove rolled deltas within (mof, nb) and free them
5377c478bd9Sstevel@tonic-gate  */
5387c478bd9Sstevel@tonic-gate void
logmap_remove_roll(mt_map_t * mtm,offset_t mof,off_t nb)5397c478bd9Sstevel@tonic-gate logmap_remove_roll(mt_map_t *mtm, offset_t mof, off_t nb)
5407c478bd9Sstevel@tonic-gate {
5417c478bd9Sstevel@tonic-gate 	int		dolock = 0;
5427c478bd9Sstevel@tonic-gate 	off_t		hnb;
5437c478bd9Sstevel@tonic-gate 	mapentry_t	*me;
5447c478bd9Sstevel@tonic-gate 	mapentry_t	**mep;
5457c478bd9Sstevel@tonic-gate 	offset_t	savmof	= mof;
5467c478bd9Sstevel@tonic-gate 	off_t		savnb	= nb;
5477c478bd9Sstevel@tonic-gate 
5487c478bd9Sstevel@tonic-gate 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
54980d34432Sfrankho 	    map_check_linkage(mtm));
5507c478bd9Sstevel@tonic-gate 
5517c478bd9Sstevel@tonic-gate again:
5527c478bd9Sstevel@tonic-gate 	if (dolock)
5537c478bd9Sstevel@tonic-gate 		rw_enter(&mtm->mtm_rwlock, RW_WRITER);
5547c478bd9Sstevel@tonic-gate 	mutex_enter(&mtm->mtm_mutex);
5557c478bd9Sstevel@tonic-gate 	for (hnb = 0; nb; nb -= hnb, mof += hnb) {
5567c478bd9Sstevel@tonic-gate 		hnb = MAPBLOCKSIZE - (mof & MAPBLOCKOFF);
5577c478bd9Sstevel@tonic-gate 		if (hnb > nb)
5587c478bd9Sstevel@tonic-gate 			hnb = nb;
5597c478bd9Sstevel@tonic-gate 		/*
5607c478bd9Sstevel@tonic-gate 		 * remove and free the rolled entries
5617c478bd9Sstevel@tonic-gate 		 */
5627c478bd9Sstevel@tonic-gate 		mep = MAP_HASH(mof, mtm);
5637c478bd9Sstevel@tonic-gate 		while ((me = *mep) != 0) {
5647c478bd9Sstevel@tonic-gate 			if ((me->me_flags & ME_ROLL) &&
5657c478bd9Sstevel@tonic-gate 			    (MEwithinDATA(me, mof, hnb))) {
5667c478bd9Sstevel@tonic-gate 				if (me->me_flags & ME_AGE) {
5677c478bd9Sstevel@tonic-gate 					ASSERT(dolock == 0);
5687c478bd9Sstevel@tonic-gate 					dolock = 1;
5697c478bd9Sstevel@tonic-gate 					mutex_exit(&mtm->mtm_mutex);
5707c478bd9Sstevel@tonic-gate 					mof = savmof;
5717c478bd9Sstevel@tonic-gate 					nb = savnb;
5727c478bd9Sstevel@tonic-gate 					goto again;
5737c478bd9Sstevel@tonic-gate 				}
5747c478bd9Sstevel@tonic-gate 				*mep = me->me_hash;
5757c478bd9Sstevel@tonic-gate 				me->me_next->me_prev = me->me_prev;
5767c478bd9Sstevel@tonic-gate 				me->me_prev->me_next = me->me_next;
5777c478bd9Sstevel@tonic-gate 				me->me_flags &= ~(ME_HASH|ME_ROLL);
5787c478bd9Sstevel@tonic-gate 				ASSERT(!(me->me_flags & ME_USER));
5797c478bd9Sstevel@tonic-gate 				mtm->mtm_nme--;
5807c478bd9Sstevel@tonic-gate 				/*
5817c478bd9Sstevel@tonic-gate 				 * cancelled entries are handled by someone else
5827c478bd9Sstevel@tonic-gate 				 */
5837c478bd9Sstevel@tonic-gate 				if ((me->me_flags & ME_CANCEL) == 0) {
5847c478bd9Sstevel@tonic-gate 					roll_stats[me->me_dt]++;
5857c478bd9Sstevel@tonic-gate 					CRB_RELE(me);
5867c478bd9Sstevel@tonic-gate 					kmem_cache_free(mapentry_cache, me);
5877c478bd9Sstevel@tonic-gate 				}
5887c478bd9Sstevel@tonic-gate 			} else
5897c478bd9Sstevel@tonic-gate 				mep = &me->me_hash;
5907c478bd9Sstevel@tonic-gate 		}
5917c478bd9Sstevel@tonic-gate 	}
5927c478bd9Sstevel@tonic-gate 	mutex_exit(&mtm->mtm_mutex);
5937c478bd9Sstevel@tonic-gate 
5947c478bd9Sstevel@tonic-gate 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
59580d34432Sfrankho 	    map_check_linkage(mtm));
5967c478bd9Sstevel@tonic-gate 
5977c478bd9Sstevel@tonic-gate 	if (dolock)
5987c478bd9Sstevel@tonic-gate 		rw_exit(&mtm->mtm_rwlock);
5997c478bd9Sstevel@tonic-gate }
6007c478bd9Sstevel@tonic-gate 
6017c478bd9Sstevel@tonic-gate /*
6027c478bd9Sstevel@tonic-gate  * Find the disk offset of the next delta to roll.
6037c478bd9Sstevel@tonic-gate  * Returns 0: no more deltas to roll or a transaction is being committed
6047c478bd9Sstevel@tonic-gate  *	   1: a delta to roll has been found and *mofp points
6057c478bd9Sstevel@tonic-gate  *	      to the master file disk offset
6067c478bd9Sstevel@tonic-gate  */
6077c478bd9Sstevel@tonic-gate int
logmap_next_roll(mt_map_t * logmap,offset_t * mofp)6087c478bd9Sstevel@tonic-gate logmap_next_roll(mt_map_t *logmap, offset_t *mofp)
6097c478bd9Sstevel@tonic-gate {
6107c478bd9Sstevel@tonic-gate 	mapentry_t *me;
6117c478bd9Sstevel@tonic-gate 
6127c478bd9Sstevel@tonic-gate 	ASSERT(((logmap->mtm_debug & MT_CHECK_MAP) == 0) ||
61380d34432Sfrankho 	    map_check_linkage(logmap));
6147c478bd9Sstevel@tonic-gate 
6157c478bd9Sstevel@tonic-gate 	mutex_enter(&logmap->mtm_mutex);
6167c478bd9Sstevel@tonic-gate 	for (me = logmap->mtm_next; me != (mapentry_t *)logmap;
6177c478bd9Sstevel@tonic-gate 	    me = me->me_next) {
6187c478bd9Sstevel@tonic-gate 		/* already rolled */
6197c478bd9Sstevel@tonic-gate 		if (me->me_flags & ME_ROLL) {
6207c478bd9Sstevel@tonic-gate 			continue;
6217c478bd9Sstevel@tonic-gate 		}
6227c478bd9Sstevel@tonic-gate 
6237c478bd9Sstevel@tonic-gate 		/* part of currently busy transaction; stop */
6247c478bd9Sstevel@tonic-gate 		if (me->me_tid == logmap->mtm_tid) {
6257c478bd9Sstevel@tonic-gate 			break;
6267c478bd9Sstevel@tonic-gate 		}
6277c478bd9Sstevel@tonic-gate 
6287c478bd9Sstevel@tonic-gate 		/* part of commit-in-progress transaction; stop */
6297c478bd9Sstevel@tonic-gate 		if (me->me_tid == logmap->mtm_committid) {
6307c478bd9Sstevel@tonic-gate 			break;
6317c478bd9Sstevel@tonic-gate 		}
6327c478bd9Sstevel@tonic-gate 
6337c478bd9Sstevel@tonic-gate 		/*
6347c478bd9Sstevel@tonic-gate 		 * We shouldn't see a DT_CANCEL mapentry whose
6357c478bd9Sstevel@tonic-gate 		 * tid != mtm_committid, or != mtm_tid since
6367c478bd9Sstevel@tonic-gate 		 * these are removed at the end of each committed
6377c478bd9Sstevel@tonic-gate 		 * transaction.
6387c478bd9Sstevel@tonic-gate 		 */
6397c478bd9Sstevel@tonic-gate 		ASSERT(!(me->me_dt == DT_CANCEL));
6407c478bd9Sstevel@tonic-gate 
6417c478bd9Sstevel@tonic-gate 		*mofp = me->me_mof;
6427c478bd9Sstevel@tonic-gate 		mutex_exit(&logmap->mtm_mutex);
6437c478bd9Sstevel@tonic-gate 		return (1);
6447c478bd9Sstevel@tonic-gate 	}
6457c478bd9Sstevel@tonic-gate 	mutex_exit(&logmap->mtm_mutex);
6467c478bd9Sstevel@tonic-gate 	return (0);
6477c478bd9Sstevel@tonic-gate }
6487c478bd9Sstevel@tonic-gate 
6497c478bd9Sstevel@tonic-gate /*
6507c478bd9Sstevel@tonic-gate  * put mapentry on sorted age list
6517c478bd9Sstevel@tonic-gate  */
6527c478bd9Sstevel@tonic-gate static void
logmap_list_age(mapentry_t ** age,mapentry_t * meadd)6537c478bd9Sstevel@tonic-gate logmap_list_age(mapentry_t **age, mapentry_t *meadd)
6547c478bd9Sstevel@tonic-gate {
6557c478bd9Sstevel@tonic-gate 	mapentry_t	*me;
6567c478bd9Sstevel@tonic-gate 
6577c478bd9Sstevel@tonic-gate 	ASSERT(!(meadd->me_flags & (ME_AGE|ME_LIST)));
6587c478bd9Sstevel@tonic-gate 
6597c478bd9Sstevel@tonic-gate 	for (me = *age; me; age = &me->me_agenext, me = *age) {
6607c478bd9Sstevel@tonic-gate 		if (me->me_age > meadd->me_age)
6617c478bd9Sstevel@tonic-gate 			break;
6627c478bd9Sstevel@tonic-gate 	}
6637c478bd9Sstevel@tonic-gate 	meadd->me_agenext = me;
6647c478bd9Sstevel@tonic-gate 	meadd->me_flags |= ME_AGE;
6657c478bd9Sstevel@tonic-gate 	*age = meadd;
6667c478bd9Sstevel@tonic-gate }
6677c478bd9Sstevel@tonic-gate 
6687c478bd9Sstevel@tonic-gate /*
6697c478bd9Sstevel@tonic-gate  * get a list of deltas within <mof, mof+nb>
6707c478bd9Sstevel@tonic-gate  *	returns with mtm_rwlock held
6717c478bd9Sstevel@tonic-gate  *	return value says whether the entire mof range is covered by deltas
6727c478bd9Sstevel@tonic-gate  */
6737c478bd9Sstevel@tonic-gate int
logmap_list_get(mt_map_t * mtm,offset_t mof,off_t nb,mapentry_t ** age)6747c478bd9Sstevel@tonic-gate logmap_list_get(
6757c478bd9Sstevel@tonic-gate 	mt_map_t *mtm,
6767c478bd9Sstevel@tonic-gate 	offset_t mof,
6777c478bd9Sstevel@tonic-gate 	off_t nb,
6787c478bd9Sstevel@tonic-gate 	mapentry_t **age)
6797c478bd9Sstevel@tonic-gate {
6807c478bd9Sstevel@tonic-gate 	off_t		hnb;
6817c478bd9Sstevel@tonic-gate 	mapentry_t	*me;
6827c478bd9Sstevel@tonic-gate 	mapentry_t	**mep;
6837c478bd9Sstevel@tonic-gate 	int		rwtype	= RW_READER;
6847c478bd9Sstevel@tonic-gate 	offset_t	savmof	= mof;
6857c478bd9Sstevel@tonic-gate 	off_t		savnb	= nb;
6867c478bd9Sstevel@tonic-gate 	int		entire	= 0;
6877c478bd9Sstevel@tonic-gate 	crb_t		*crb;
6887c478bd9Sstevel@tonic-gate 
6897c478bd9Sstevel@tonic-gate 	mtm->mtm_ref = 1;
6907c478bd9Sstevel@tonic-gate again:
6917c478bd9Sstevel@tonic-gate 
6927c478bd9Sstevel@tonic-gate 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
69380d34432Sfrankho 	    map_check_linkage(mtm));
6947c478bd9Sstevel@tonic-gate 
6957c478bd9Sstevel@tonic-gate 	rw_enter(&mtm->mtm_rwlock, rwtype);
6967c478bd9Sstevel@tonic-gate 	*age = NULL;
6977c478bd9Sstevel@tonic-gate 	mutex_enter(&mtm->mtm_mutex);
6987c478bd9Sstevel@tonic-gate 	for (hnb = 0; nb; nb -= hnb, mof += hnb) {
6997c478bd9Sstevel@tonic-gate 		hnb = MAPBLOCKSIZE - (mof & MAPBLOCKOFF);
7007c478bd9Sstevel@tonic-gate 		if (hnb > nb)
7017c478bd9Sstevel@tonic-gate 			hnb = nb;
7027c478bd9Sstevel@tonic-gate 		/*
7037c478bd9Sstevel@tonic-gate 		 * find overlapping entries
7047c478bd9Sstevel@tonic-gate 		 */
7057c478bd9Sstevel@tonic-gate 		mep = MAP_HASH(mof, mtm);
7067c478bd9Sstevel@tonic-gate 		for (me = *mep; me; me = me->me_hash) {
7077c478bd9Sstevel@tonic-gate 			if (me->me_dt == DT_CANCEL)
7087c478bd9Sstevel@tonic-gate 				continue;
7097c478bd9Sstevel@tonic-gate 			if (!DATAoverlapME(mof, hnb, me))
7107c478bd9Sstevel@tonic-gate 				continue;
7117c478bd9Sstevel@tonic-gate 			/*
7127c478bd9Sstevel@tonic-gate 			 * check if map entry is in use
7137c478bd9Sstevel@tonic-gate 			 * (about to be rolled).
7147c478bd9Sstevel@tonic-gate 			 */
7157c478bd9Sstevel@tonic-gate 			if (me->me_flags & ME_AGE) {
7167c478bd9Sstevel@tonic-gate 				/*
7177c478bd9Sstevel@tonic-gate 				 * reset the age bit in the list,
7187c478bd9Sstevel@tonic-gate 				 * upgrade the lock, and try again
7197c478bd9Sstevel@tonic-gate 				 */
7207c478bd9Sstevel@tonic-gate 				for (me = *age; me; me = *age) {
7217c478bd9Sstevel@tonic-gate 					*age = me->me_agenext;
7227c478bd9Sstevel@tonic-gate 					me->me_flags &= ~ME_AGE;
7237c478bd9Sstevel@tonic-gate 				}
7247c478bd9Sstevel@tonic-gate 				mutex_exit(&mtm->mtm_mutex);
7257c478bd9Sstevel@tonic-gate 				rw_exit(&mtm->mtm_rwlock);
7267c478bd9Sstevel@tonic-gate 				rwtype = RW_WRITER;
7277c478bd9Sstevel@tonic-gate 				mof = savmof;
7287c478bd9Sstevel@tonic-gate 				nb = savnb;
7297c478bd9Sstevel@tonic-gate 				entire = 0;
7307c478bd9Sstevel@tonic-gate 				goto again;
7317c478bd9Sstevel@tonic-gate 			} else {
7327c478bd9Sstevel@tonic-gate 				/* add mapentry to age ordered list */
7337c478bd9Sstevel@tonic-gate 				logmap_list_age(age, me);
7347c478bd9Sstevel@tonic-gate 				crb = me->me_crb;
7357c478bd9Sstevel@tonic-gate 				if (crb) {
7367c478bd9Sstevel@tonic-gate 					if (DATAwithinCRB(savmof, savnb, crb)) {
7377c478bd9Sstevel@tonic-gate 						entire = 1;
7387c478bd9Sstevel@tonic-gate 					}
7397c478bd9Sstevel@tonic-gate 				} else {
7407c478bd9Sstevel@tonic-gate 					if (DATAwithinME(savmof, savnb, me)) {
7417c478bd9Sstevel@tonic-gate 						entire = 1;
7427c478bd9Sstevel@tonic-gate 					}
7437c478bd9Sstevel@tonic-gate 				}
7447c478bd9Sstevel@tonic-gate 			}
7457c478bd9Sstevel@tonic-gate 		}
7467c478bd9Sstevel@tonic-gate 	}
7477c478bd9Sstevel@tonic-gate 	mutex_exit(&mtm->mtm_mutex);
7487c478bd9Sstevel@tonic-gate 
7497c478bd9Sstevel@tonic-gate 	ASSERT(RW_LOCK_HELD(&mtm->mtm_rwlock));
7507c478bd9Sstevel@tonic-gate 	return (entire);
7517c478bd9Sstevel@tonic-gate }
7527c478bd9Sstevel@tonic-gate 
7537c478bd9Sstevel@tonic-gate /*
7547c478bd9Sstevel@tonic-gate  * Get a list of deltas for rolling - returns sucess or failure.
7557c478bd9Sstevel@tonic-gate  * Also return the cached roll buffer if all deltas point to it.
7567c478bd9Sstevel@tonic-gate  */
7577c478bd9Sstevel@tonic-gate int
logmap_list_get_roll(mt_map_t * logmap,offset_t mof,rollbuf_t * rbp)7587c478bd9Sstevel@tonic-gate logmap_list_get_roll(mt_map_t *logmap, offset_t mof, rollbuf_t *rbp)
7597c478bd9Sstevel@tonic-gate {
7607c478bd9Sstevel@tonic-gate 	mapentry_t	*me, **mep, *age = NULL;
7617c478bd9Sstevel@tonic-gate 	crb_t		*crb = NULL;
7627c478bd9Sstevel@tonic-gate 
7637c478bd9Sstevel@tonic-gate 	ASSERT(RW_LOCK_HELD(&logmap->mtm_rwlock));
7647c478bd9Sstevel@tonic-gate 	ASSERT(((logmap->mtm_debug & MT_CHECK_MAP) == 0) ||
76580d34432Sfrankho 	    map_check_linkage(logmap));
7667c478bd9Sstevel@tonic-gate 	ASSERT((mof & MAPBLOCKOFF) == 0);
7677c478bd9Sstevel@tonic-gate 
7687c478bd9Sstevel@tonic-gate 	rbp->rb_crb = NULL;
7697c478bd9Sstevel@tonic-gate 
7707c478bd9Sstevel@tonic-gate 	/*
7717c478bd9Sstevel@tonic-gate 	 * find overlapping entries
7727c478bd9Sstevel@tonic-gate 	 */
7737c478bd9Sstevel@tonic-gate 	mutex_enter(&logmap->mtm_mutex);
7747c478bd9Sstevel@tonic-gate 	mep = MAP_HASH(mof, logmap);
7757c478bd9Sstevel@tonic-gate 	for (me = *mep; me; me = me->me_hash) {
7767c478bd9Sstevel@tonic-gate 		if (!DATAoverlapME(mof, MAPBLOCKSIZE, me))
7777c478bd9Sstevel@tonic-gate 			continue;
7787c478bd9Sstevel@tonic-gate 		if (me->me_tid == logmap->mtm_tid)
7797c478bd9Sstevel@tonic-gate 			continue;
7807c478bd9Sstevel@tonic-gate 		if (me->me_tid == logmap->mtm_committid)
7817c478bd9Sstevel@tonic-gate 			continue;
7827c478bd9Sstevel@tonic-gate 		if (me->me_dt == DT_CANCEL)
7837c478bd9Sstevel@tonic-gate 			continue;
7847c478bd9Sstevel@tonic-gate 
7857c478bd9Sstevel@tonic-gate 		/*
7867c478bd9Sstevel@tonic-gate 		 * Check if map entry is in use (by lufs_read_strategy())
7877c478bd9Sstevel@tonic-gate 		 * and if so reset the age bit in the list,
7887c478bd9Sstevel@tonic-gate 		 * upgrade the lock, and try again
7897c478bd9Sstevel@tonic-gate 		 */
7907c478bd9Sstevel@tonic-gate 		if (me->me_flags & ME_AGE) {
7917c478bd9Sstevel@tonic-gate 			for (me = age; me; me = age) {
7927c478bd9Sstevel@tonic-gate 				age = me->me_agenext;
7937c478bd9Sstevel@tonic-gate 				me->me_flags &= ~ME_AGE;
7947c478bd9Sstevel@tonic-gate 			}
7957c478bd9Sstevel@tonic-gate 			mutex_exit(&logmap->mtm_mutex);
7967c478bd9Sstevel@tonic-gate 			return (1); /* failure */
7977c478bd9Sstevel@tonic-gate 		} else {
7987c478bd9Sstevel@tonic-gate 			/* add mapentry to age ordered list */
7997c478bd9Sstevel@tonic-gate 			logmap_list_age(&age, me);
8007c478bd9Sstevel@tonic-gate 		}
8017c478bd9Sstevel@tonic-gate 	}
8027c478bd9Sstevel@tonic-gate 	if (!age) {
8037c478bd9Sstevel@tonic-gate 		goto out;
8047c478bd9Sstevel@tonic-gate 	}
8057c478bd9Sstevel@tonic-gate 
8067c478bd9Sstevel@tonic-gate 	/*
8077c478bd9Sstevel@tonic-gate 	 * Mark the deltas as being rolled.
8087c478bd9Sstevel@tonic-gate 	 */
8097c478bd9Sstevel@tonic-gate 	for (me = age; me; me = me->me_agenext) {
8107c478bd9Sstevel@tonic-gate 		me->me_flags |= ME_ROLL;
8117c478bd9Sstevel@tonic-gate 	}
8127c478bd9Sstevel@tonic-gate 
8137c478bd9Sstevel@tonic-gate 	/*
8147c478bd9Sstevel@tonic-gate 	 * Test if all deltas are covered by one valid roll buffer
8157c478bd9Sstevel@tonic-gate 	 */
8167c478bd9Sstevel@tonic-gate 	crb = age->me_crb;
8177c478bd9Sstevel@tonic-gate 	if (crb && !(crb->c_invalid)) {
8187c478bd9Sstevel@tonic-gate 		for (me = age; me; me = me->me_agenext) {
8197c478bd9Sstevel@tonic-gate 			if (me->me_crb != crb) {
8207c478bd9Sstevel@tonic-gate 				crb = NULL;
8217c478bd9Sstevel@tonic-gate 				break;
8227c478bd9Sstevel@tonic-gate 			}
8237c478bd9Sstevel@tonic-gate 		}
8247c478bd9Sstevel@tonic-gate 		rbp->rb_crb = crb;
8257c478bd9Sstevel@tonic-gate 	}
8267c478bd9Sstevel@tonic-gate out:
8277c478bd9Sstevel@tonic-gate 	rbp->rb_age = age;
8287c478bd9Sstevel@tonic-gate 
8297c478bd9Sstevel@tonic-gate 	mutex_exit(&logmap->mtm_mutex);
8307c478bd9Sstevel@tonic-gate 
8317c478bd9Sstevel@tonic-gate 	ASSERT(((logmap->mtm_debug & MT_SCAN) == 0) ||
83280d34432Sfrankho 	    logmap_logscan_debug(logmap, age));
8337c478bd9Sstevel@tonic-gate 	ASSERT(RW_LOCK_HELD(&logmap->mtm_rwlock));
8347c478bd9Sstevel@tonic-gate 	return (0); /* success */
8357c478bd9Sstevel@tonic-gate }
8367c478bd9Sstevel@tonic-gate 
8377c478bd9Sstevel@tonic-gate void
logmap_list_put_roll(mt_map_t * mtm,mapentry_t * age)8387c478bd9Sstevel@tonic-gate logmap_list_put_roll(mt_map_t *mtm, mapentry_t *age)
8397c478bd9Sstevel@tonic-gate {
8407c478bd9Sstevel@tonic-gate 	mapentry_t	*me;
8417c478bd9Sstevel@tonic-gate 
8427c478bd9Sstevel@tonic-gate 	ASSERT(RW_LOCK_HELD(&mtm->mtm_rwlock));
8437c478bd9Sstevel@tonic-gate 	mutex_enter(&mtm->mtm_mutex);
8447c478bd9Sstevel@tonic-gate 	for (me = age; me; me = age) {
8457c478bd9Sstevel@tonic-gate 		age = me->me_agenext;
8467c478bd9Sstevel@tonic-gate 		me->me_flags &= ~ME_AGE;
8477c478bd9Sstevel@tonic-gate 	}
8487c478bd9Sstevel@tonic-gate 	mutex_exit(&mtm->mtm_mutex);
8497c478bd9Sstevel@tonic-gate }
8507c478bd9Sstevel@tonic-gate 
8517c478bd9Sstevel@tonic-gate void
logmap_list_put(mt_map_t * mtm,mapentry_t * age)8527c478bd9Sstevel@tonic-gate logmap_list_put(mt_map_t *mtm, mapentry_t *age)
8537c478bd9Sstevel@tonic-gate {
8547c478bd9Sstevel@tonic-gate 	mapentry_t	*me;
8557c478bd9Sstevel@tonic-gate 
8567c478bd9Sstevel@tonic-gate 	ASSERT(RW_LOCK_HELD(&mtm->mtm_rwlock));
8577c478bd9Sstevel@tonic-gate 	mutex_enter(&mtm->mtm_mutex);
8587c478bd9Sstevel@tonic-gate 	for (me = age; me; me = age) {
8597c478bd9Sstevel@tonic-gate 		age = me->me_agenext;
8607c478bd9Sstevel@tonic-gate 		me->me_flags &= ~ME_AGE;
8617c478bd9Sstevel@tonic-gate 	}
8627c478bd9Sstevel@tonic-gate 	mutex_exit(&mtm->mtm_mutex);
8637c478bd9Sstevel@tonic-gate 	rw_exit(&mtm->mtm_rwlock);
8647c478bd9Sstevel@tonic-gate }
8657c478bd9Sstevel@tonic-gate 
8667c478bd9Sstevel@tonic-gate #define	UFS_RW_BALANCE 2
8677c478bd9Sstevel@tonic-gate int ufs_rw_balance = UFS_RW_BALANCE;
8687c478bd9Sstevel@tonic-gate 
8697c478bd9Sstevel@tonic-gate /*
8707c478bd9Sstevel@tonic-gate  * Check if we need to read the master.
8717c478bd9Sstevel@tonic-gate  * The master does not need to be read if the log deltas to the
8727c478bd9Sstevel@tonic-gate  * block are for one contiguous set of full disk sectors.
8737c478bd9Sstevel@tonic-gate  * Both cylinder group bit maps DT_CG (8K); directory entries (512B);
8747c478bd9Sstevel@tonic-gate  * and possibly others should not require master disk reads.
8757c478bd9Sstevel@tonic-gate  * Calculate the sector map for writing later.
8767c478bd9Sstevel@tonic-gate  */
8777c478bd9Sstevel@tonic-gate int
logmap_setup_read(mapentry_t * age,rollbuf_t * rbp)8787c478bd9Sstevel@tonic-gate logmap_setup_read(mapentry_t *age, rollbuf_t *rbp)
8797c478bd9Sstevel@tonic-gate {
8807c478bd9Sstevel@tonic-gate 	offset_t mof;
8817c478bd9Sstevel@tonic-gate 	crb_t *crb;
8827c478bd9Sstevel@tonic-gate 	mapentry_t *me;
8837c478bd9Sstevel@tonic-gate 	int32_t nb;
8847c478bd9Sstevel@tonic-gate 	int i;
8857c478bd9Sstevel@tonic-gate 	int start_sec, end_sec;
8867c478bd9Sstevel@tonic-gate 	int read_needed = 0;
8877c478bd9Sstevel@tonic-gate 	int all_inodes = 1;
8887c478bd9Sstevel@tonic-gate 	int first_sec = INT_MAX;
8897c478bd9Sstevel@tonic-gate 	int last_sec = -1;
8907c478bd9Sstevel@tonic-gate 	rbsecmap_t secmap = 0;
8917c478bd9Sstevel@tonic-gate 
8927c478bd9Sstevel@tonic-gate 	/* LINTED: warning: logical expression always true: op "||" */
8937c478bd9Sstevel@tonic-gate 	ASSERT((MAPBLOCKSIZE / DEV_BSIZE) == (sizeof (secmap) * NBBY));
8947c478bd9Sstevel@tonic-gate 
8957c478bd9Sstevel@tonic-gate 	for (me = age; me; me = me->me_agenext) {
8967c478bd9Sstevel@tonic-gate 		crb = me->me_crb;
8977c478bd9Sstevel@tonic-gate 		if (crb) {
8987c478bd9Sstevel@tonic-gate 			nb = crb->c_nb;
8997c478bd9Sstevel@tonic-gate 			mof = crb->c_mof;
9007c478bd9Sstevel@tonic-gate 		} else {
9017c478bd9Sstevel@tonic-gate 			nb = me->me_nb;
9027c478bd9Sstevel@tonic-gate 			mof = me->me_mof;
9037c478bd9Sstevel@tonic-gate 		}
9047c478bd9Sstevel@tonic-gate 
9057c478bd9Sstevel@tonic-gate 		/*
9067c478bd9Sstevel@tonic-gate 		 * If the delta is not sector aligned then
9077c478bd9Sstevel@tonic-gate 		 * read the whole block.
9087c478bd9Sstevel@tonic-gate 		 */
9097c478bd9Sstevel@tonic-gate 		if ((nb & DEV_BMASK) || (mof & DEV_BMASK)) {
9107c478bd9Sstevel@tonic-gate 			read_needed = 1;
9117c478bd9Sstevel@tonic-gate 		}
9127c478bd9Sstevel@tonic-gate 
9137c478bd9Sstevel@tonic-gate 		/* Set sector map used in the MAPBLOCKSIZE block.  */
9147c478bd9Sstevel@tonic-gate 		start_sec = (mof & MAPBLOCKOFF) >> DEV_BSHIFT;
9157c478bd9Sstevel@tonic-gate 		end_sec = start_sec + ((nb - 1) >> DEV_BSHIFT);
9167c478bd9Sstevel@tonic-gate 		for (i = start_sec; i <= end_sec; i++) {
9177c478bd9Sstevel@tonic-gate 			secmap |= UINT16_C(1) << i;
9187c478bd9Sstevel@tonic-gate 		}
9197c478bd9Sstevel@tonic-gate 
9207c478bd9Sstevel@tonic-gate 		if (me->me_dt != DT_INODE) {
9217c478bd9Sstevel@tonic-gate 			all_inodes = 0;
9227c478bd9Sstevel@tonic-gate 		}
9237c478bd9Sstevel@tonic-gate 		if (start_sec < first_sec) {
9247c478bd9Sstevel@tonic-gate 			first_sec = start_sec;
9257c478bd9Sstevel@tonic-gate 		}
9267c478bd9Sstevel@tonic-gate 		if (end_sec > last_sec) {
9277c478bd9Sstevel@tonic-gate 			last_sec = end_sec;
9287c478bd9Sstevel@tonic-gate 		}
9297c478bd9Sstevel@tonic-gate 	}
9307c478bd9Sstevel@tonic-gate 
9317c478bd9Sstevel@tonic-gate 	ASSERT(secmap);
9327c478bd9Sstevel@tonic-gate 	ASSERT(first_sec != INT_MAX);
9337c478bd9Sstevel@tonic-gate 	ASSERT(last_sec != -1);
9347c478bd9Sstevel@tonic-gate 
9357c478bd9Sstevel@tonic-gate 	if (all_inodes) {
9367c478bd9Sstevel@tonic-gate 		/*
9377c478bd9Sstevel@tonic-gate 		 * Here we have a tradeoff choice. It must be better to
9387c478bd9Sstevel@tonic-gate 		 * do 2 writes * in the same MAPBLOCKSIZE chunk, than a
9397c478bd9Sstevel@tonic-gate 		 * read and a write. But what about 3 or more writes, versus
9407c478bd9Sstevel@tonic-gate 		 * a read+write? * Where is the cut over? It will depend on
9417c478bd9Sstevel@tonic-gate 		 * the track caching, scsi driver and other activity.
9427c478bd9Sstevel@tonic-gate 		 * A unpublished tunable is defined (ufs_rw_balance) that
9437c478bd9Sstevel@tonic-gate 		 * currently defaults to 2.
9447c478bd9Sstevel@tonic-gate 		 */
9457c478bd9Sstevel@tonic-gate 		if (!read_needed) {
9467c478bd9Sstevel@tonic-gate 			int count = 0, gap = 0;
9477c478bd9Sstevel@tonic-gate 			int sector_set; /* write needed to this sector */
9487c478bd9Sstevel@tonic-gate 
9497c478bd9Sstevel@tonic-gate 			/* Count the gaps (every 1 to 0 transation) */
9507c478bd9Sstevel@tonic-gate 			for (i = first_sec + 1; i < last_sec; i++) {
9517c478bd9Sstevel@tonic-gate 				sector_set = secmap & (UINT16_C(1) << i);
9527c478bd9Sstevel@tonic-gate 				if (!gap && !sector_set) {
9537c478bd9Sstevel@tonic-gate 					gap = 1;
9547c478bd9Sstevel@tonic-gate 					count++;
9557c478bd9Sstevel@tonic-gate 					if (count > ufs_rw_balance) {
9567c478bd9Sstevel@tonic-gate 						read_needed = 1;
9577c478bd9Sstevel@tonic-gate 						break;
9587c478bd9Sstevel@tonic-gate 					}
9597c478bd9Sstevel@tonic-gate 				} else if (gap && sector_set) {
9607c478bd9Sstevel@tonic-gate 					gap = 0;
9617c478bd9Sstevel@tonic-gate 				}
9627c478bd9Sstevel@tonic-gate 			}
9637c478bd9Sstevel@tonic-gate 		}
9647c478bd9Sstevel@tonic-gate 
9657c478bd9Sstevel@tonic-gate 		/*
9667c478bd9Sstevel@tonic-gate 		 * Inodes commonly make up the majority (~85%) of deltas.
9677c478bd9Sstevel@tonic-gate 		 * They cannot contain embedded user data, so its safe to
9687c478bd9Sstevel@tonic-gate 		 * read and write them all in one IO.
9697c478bd9Sstevel@tonic-gate 		 * But for directory entries, shadow inode data, and
9707c478bd9Sstevel@tonic-gate 		 * quota record data the user data fragments can be embedded
9717c478bd9Sstevel@tonic-gate 		 * betwen those metadata, and so its not safe to read, modify
9727c478bd9Sstevel@tonic-gate 		 * then write the entire range as user asynchronous user data
9737c478bd9Sstevel@tonic-gate 		 * writes could get overwritten with old data.
9747c478bd9Sstevel@tonic-gate 		 * Thus we have to create a segment map of meta data that
9757c478bd9Sstevel@tonic-gate 		 * needs to get written.
9767c478bd9Sstevel@tonic-gate 		 *
9777c478bd9Sstevel@tonic-gate 		 * If user data was logged then this issue would go away.
9787c478bd9Sstevel@tonic-gate 		 */
9797c478bd9Sstevel@tonic-gate 		if (read_needed) {
9807c478bd9Sstevel@tonic-gate 			for (i = first_sec + 1; i < last_sec; i++) {
9817c478bd9Sstevel@tonic-gate 				secmap |= (UINT16_C(1) << i);
9827c478bd9Sstevel@tonic-gate 			}
9837c478bd9Sstevel@tonic-gate 		}
9847c478bd9Sstevel@tonic-gate 	}
9857c478bd9Sstevel@tonic-gate 	rbp->rb_secmap = secmap;
9867c478bd9Sstevel@tonic-gate 	return (read_needed);
9877c478bd9Sstevel@tonic-gate }
9887c478bd9Sstevel@tonic-gate 
9897c478bd9Sstevel@tonic-gate /*
9907c478bd9Sstevel@tonic-gate  * Abort the load of a set of log map delta's.
9917c478bd9Sstevel@tonic-gate  * ie,
9927c478bd9Sstevel@tonic-gate  * Clear out all mapentries on this unit's log map
9937c478bd9Sstevel@tonic-gate  * which have a tid (transaction id) equal to the
9947c478bd9Sstevel@tonic-gate  * parameter tid.   Walk the cancel list, taking everything
9957c478bd9Sstevel@tonic-gate  * off it, too.
9967c478bd9Sstevel@tonic-gate  */
9977c478bd9Sstevel@tonic-gate static void
logmap_abort(ml_unit_t * ul,uint32_t tid)9987c478bd9Sstevel@tonic-gate logmap_abort(ml_unit_t *ul, uint32_t tid)
9997c478bd9Sstevel@tonic-gate {
10007c478bd9Sstevel@tonic-gate 	struct mt_map	*mtm = ul->un_logmap;	/* Log map */
100180d34432Sfrankho 	mapentry_t	*me, **mep;
10027c478bd9Sstevel@tonic-gate 	int		i;
10037c478bd9Sstevel@tonic-gate 
10047c478bd9Sstevel@tonic-gate 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
100580d34432Sfrankho 	    map_check_linkage(mtm));
10067c478bd9Sstevel@tonic-gate 
10077c478bd9Sstevel@tonic-gate 	/*
10087c478bd9Sstevel@tonic-gate 	 * wait for any outstanding reads to finish; lock out future reads
10097c478bd9Sstevel@tonic-gate 	 */
10107c478bd9Sstevel@tonic-gate 	rw_enter(&mtm->mtm_rwlock, RW_WRITER);
10117c478bd9Sstevel@tonic-gate 
10127c478bd9Sstevel@tonic-gate 	mutex_enter(&mtm->mtm_mutex);
10137c478bd9Sstevel@tonic-gate 	/* Take everything off cancel list */
10147c478bd9Sstevel@tonic-gate 	while ((me = mtm->mtm_cancel) != NULL) {
10157c478bd9Sstevel@tonic-gate 		mtm->mtm_cancel = me->me_cancel;
10167c478bd9Sstevel@tonic-gate 		me->me_flags &= ~ME_CANCEL;
10177c478bd9Sstevel@tonic-gate 		me->me_cancel = NULL;
10187c478bd9Sstevel@tonic-gate 	}
10197c478bd9Sstevel@tonic-gate 
10207c478bd9Sstevel@tonic-gate 	/*
10217c478bd9Sstevel@tonic-gate 	 * Now take out all mapentries with current tid, and committid
10227c478bd9Sstevel@tonic-gate 	 * as this function is called from logmap_logscan and logmap_commit
10237c478bd9Sstevel@tonic-gate 	 * When it is called from logmap_logscan mtm_tid == mtm_committid
10247c478bd9Sstevel@tonic-gate 	 * But when logmap_abort is called from logmap_commit it is
10257c478bd9Sstevel@tonic-gate 	 * because the log errored when trying to write the commit record,
10267c478bd9Sstevel@tonic-gate 	 * after the async ops have been allowed to start in top_end_sync.
10277c478bd9Sstevel@tonic-gate 	 * So we also need to remove all mapentries from the transaction whose
10287c478bd9Sstevel@tonic-gate 	 * commit failed.
10297c478bd9Sstevel@tonic-gate 	 */
10307c478bd9Sstevel@tonic-gate 	for (i = 0; i < mtm->mtm_nhash; i++) {
10317c478bd9Sstevel@tonic-gate 		mep = &mtm->mtm_hash[i];
10327c478bd9Sstevel@tonic-gate 		while ((me = *mep) != NULL) {
10337c478bd9Sstevel@tonic-gate 			if (me->me_tid == tid ||
103480d34432Sfrankho 			    me->me_tid == mtm->mtm_committid) {
10357c478bd9Sstevel@tonic-gate 				*mep = me->me_hash;
10367c478bd9Sstevel@tonic-gate 				me->me_next->me_prev = me->me_prev;
10377c478bd9Sstevel@tonic-gate 				me->me_prev->me_next = me->me_next;
10387c478bd9Sstevel@tonic-gate 				if (!(me->me_flags & ME_USER)) {
10397c478bd9Sstevel@tonic-gate 					mtm->mtm_nme--;
10407c478bd9Sstevel@tonic-gate 				}
10417c478bd9Sstevel@tonic-gate 				CRB_RELE(me);
10427c478bd9Sstevel@tonic-gate 				kmem_cache_free(mapentry_cache, me);
10437c478bd9Sstevel@tonic-gate 				continue;
10447c478bd9Sstevel@tonic-gate 			}
10457c478bd9Sstevel@tonic-gate 			mep = &me->me_hash;
10467c478bd9Sstevel@tonic-gate 		}
10477c478bd9Sstevel@tonic-gate 	}
10487c478bd9Sstevel@tonic-gate 
10497c478bd9Sstevel@tonic-gate 	if (!(ul->un_flags & LDL_SCAN))
10507c478bd9Sstevel@tonic-gate 		mtm->mtm_flags |= MTM_CANCELED;
10517c478bd9Sstevel@tonic-gate 	mutex_exit(&mtm->mtm_mutex);
10527c478bd9Sstevel@tonic-gate 	mtm->mtm_dirty = 0;
10537c478bd9Sstevel@tonic-gate 	mtm->mtm_nmet = 0;
10547c478bd9Sstevel@tonic-gate 	rw_exit(&mtm->mtm_rwlock);
10557c478bd9Sstevel@tonic-gate 
10567c478bd9Sstevel@tonic-gate 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
105780d34432Sfrankho 	    map_check_linkage(mtm));
10587c478bd9Sstevel@tonic-gate }
10597c478bd9Sstevel@tonic-gate 
10607c478bd9Sstevel@tonic-gate static void
logmap_wait_space(mt_map_t * mtm,ml_unit_t * ul,mapentry_t * me)10617c478bd9Sstevel@tonic-gate logmap_wait_space(mt_map_t *mtm, ml_unit_t *ul, mapentry_t *me)
10627c478bd9Sstevel@tonic-gate {
10637c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ul->un_log_mutex));
10647c478bd9Sstevel@tonic-gate 
10657c478bd9Sstevel@tonic-gate 	while (!ldl_has_space(ul, me)) {
10667c478bd9Sstevel@tonic-gate 		ASSERT(!(ul->un_flags & LDL_NOROLL));
10677c478bd9Sstevel@tonic-gate 		mutex_exit(&ul->un_log_mutex);
10687c478bd9Sstevel@tonic-gate 		logmap_forceroll(mtm);
10697c478bd9Sstevel@tonic-gate 		mutex_enter(&ul->un_log_mutex);
10707c478bd9Sstevel@tonic-gate 		if (ul->un_flags & LDL_ERROR)
10717c478bd9Sstevel@tonic-gate 			break;
10727c478bd9Sstevel@tonic-gate 	}
10737c478bd9Sstevel@tonic-gate 
10747c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ul->un_log_mutex));
10757c478bd9Sstevel@tonic-gate }
10767c478bd9Sstevel@tonic-gate 
10777c478bd9Sstevel@tonic-gate /*
10787c478bd9Sstevel@tonic-gate  * put a list of deltas into a logmap
10797c478bd9Sstevel@tonic-gate  * If va == NULL, don't write to the log.
10807c478bd9Sstevel@tonic-gate  */
10817c478bd9Sstevel@tonic-gate void
logmap_add(ml_unit_t * ul,char * va,offset_t vamof,mapentry_t * melist)10827c478bd9Sstevel@tonic-gate logmap_add(
10837c478bd9Sstevel@tonic-gate 	ml_unit_t *ul,
10847c478bd9Sstevel@tonic-gate 	char *va,			/* Ptr to buf w/deltas & data */
10857c478bd9Sstevel@tonic-gate 	offset_t vamof,			/* Offset on master of buf start */
10867c478bd9Sstevel@tonic-gate 	mapentry_t *melist)		/* Entries to add */
10877c478bd9Sstevel@tonic-gate {
10887c478bd9Sstevel@tonic-gate 	offset_t	mof;
10897c478bd9Sstevel@tonic-gate 	off_t		nb;
10907c478bd9Sstevel@tonic-gate 	mapentry_t	*me;
10917c478bd9Sstevel@tonic-gate 	mapentry_t	**mep;
10927c478bd9Sstevel@tonic-gate 	mapentry_t	**savmep;
10937c478bd9Sstevel@tonic-gate 	uint32_t	tid;
10947c478bd9Sstevel@tonic-gate 	mt_map_t	*mtm	= ul->un_logmap;
10957c478bd9Sstevel@tonic-gate 
10967c478bd9Sstevel@tonic-gate 	mutex_enter(&ul->un_log_mutex);
10977c478bd9Sstevel@tonic-gate 	if (va)
10987c478bd9Sstevel@tonic-gate 		logmap_wait_space(mtm, ul, melist);
10997c478bd9Sstevel@tonic-gate 
11007c478bd9Sstevel@tonic-gate 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
110180d34432Sfrankho 	    map_check_linkage(mtm));
11027c478bd9Sstevel@tonic-gate 
11037c478bd9Sstevel@tonic-gate 	mtm->mtm_ref = 1;
11047c478bd9Sstevel@tonic-gate 	mtm->mtm_dirty++;
11057c478bd9Sstevel@tonic-gate 	tid = mtm->mtm_tid;
11067c478bd9Sstevel@tonic-gate 	while (melist) {
11077c478bd9Sstevel@tonic-gate 		mof = melist->me_mof;
11087c478bd9Sstevel@tonic-gate 		nb  = melist->me_nb;
11097c478bd9Sstevel@tonic-gate 
11107c478bd9Sstevel@tonic-gate 		/*
11117c478bd9Sstevel@tonic-gate 		 * search for overlaping entries
11127c478bd9Sstevel@tonic-gate 		 */
11137c478bd9Sstevel@tonic-gate 		savmep = mep = MAP_HASH(mof, mtm);
11147c478bd9Sstevel@tonic-gate 		mutex_enter(&mtm->mtm_mutex);
11157c478bd9Sstevel@tonic-gate 		while ((me = *mep) != 0) {
11167c478bd9Sstevel@tonic-gate 			/*
11177c478bd9Sstevel@tonic-gate 			 * Data consumes old map entry; cancel map entry.
11187c478bd9Sstevel@tonic-gate 			 * Take care when we replace an old map entry
11197c478bd9Sstevel@tonic-gate 			 * which carries quota information with a newer entry
11207c478bd9Sstevel@tonic-gate 			 * which does not. In that case the push function
11217c478bd9Sstevel@tonic-gate 			 * would not be called to clean up the dquot structure.
11227c478bd9Sstevel@tonic-gate 			 * This would be found later by invalidatedq() causing
11237c478bd9Sstevel@tonic-gate 			 * a panic when the filesystem in unmounted.
11247c478bd9Sstevel@tonic-gate 			 * We clean up the dquot manually and then replace
11257c478bd9Sstevel@tonic-gate 			 * the map entry.
11267c478bd9Sstevel@tonic-gate 			 */
11277c478bd9Sstevel@tonic-gate 			if (MEwithinDATA(me, mof, nb) &&
11287c478bd9Sstevel@tonic-gate 			    ((me->me_flags & (ME_ROLL|ME_CANCEL)) == 0)) {
11297c478bd9Sstevel@tonic-gate 				if (tid == me->me_tid &&
11307c478bd9Sstevel@tonic-gate 				    ((me->me_flags & ME_AGE) == 0)) {
11317c478bd9Sstevel@tonic-gate 					*mep = me->me_hash;
11327c478bd9Sstevel@tonic-gate 					me->me_next->me_prev = me->me_prev;
11337c478bd9Sstevel@tonic-gate 					me->me_prev->me_next = me->me_next;
11347c478bd9Sstevel@tonic-gate 					ASSERT(!(me->me_flags & ME_USER));
11357c478bd9Sstevel@tonic-gate 					mtm->mtm_nme--;
11367c478bd9Sstevel@tonic-gate 					/*
11377c478bd9Sstevel@tonic-gate 					 * Special case if the mapentry
11387c478bd9Sstevel@tonic-gate 					 * carries a dquot and a push function.
11397c478bd9Sstevel@tonic-gate 					 * We have to clean up the quota info
11407c478bd9Sstevel@tonic-gate 					 * before replacing the mapentry.
11417c478bd9Sstevel@tonic-gate 					 */
11427c478bd9Sstevel@tonic-gate 					if (me->me_dt == DT_QR)
11437c478bd9Sstevel@tonic-gate 						HANDLE_DQUOT(me, melist);
11447c478bd9Sstevel@tonic-gate 
11457c478bd9Sstevel@tonic-gate 					kmem_cache_free(mapentry_cache, me);
11467c478bd9Sstevel@tonic-gate 					continue;
11477c478bd9Sstevel@tonic-gate 				}
11487c478bd9Sstevel@tonic-gate 				me->me_cancel = mtm->mtm_cancel;
11497c478bd9Sstevel@tonic-gate 				mtm->mtm_cancel = me;
11507c478bd9Sstevel@tonic-gate 				me->me_flags |= ME_CANCEL;
11517c478bd9Sstevel@tonic-gate 			}
11527c478bd9Sstevel@tonic-gate 			mep = &(*mep)->me_hash;
11537c478bd9Sstevel@tonic-gate 		}
11547c478bd9Sstevel@tonic-gate 		mutex_exit(&mtm->mtm_mutex);
11557c478bd9Sstevel@tonic-gate 
11567c478bd9Sstevel@tonic-gate 		/*
11577c478bd9Sstevel@tonic-gate 		 * remove from list
11587c478bd9Sstevel@tonic-gate 		 */
11597c478bd9Sstevel@tonic-gate 		me = melist;
11607c478bd9Sstevel@tonic-gate 		melist = melist->me_hash;
11617c478bd9Sstevel@tonic-gate 		me->me_flags &= ~ME_LIST;
11627c478bd9Sstevel@tonic-gate 		/*
11637c478bd9Sstevel@tonic-gate 		 * If va != NULL, put in the log.
11647c478bd9Sstevel@tonic-gate 		 */
11657c478bd9Sstevel@tonic-gate 		if (va)
11667c478bd9Sstevel@tonic-gate 			ldl_write(ul, va, vamof, me);
11677c478bd9Sstevel@tonic-gate 		if (ul->un_flags & LDL_ERROR) {
11687c478bd9Sstevel@tonic-gate 			kmem_cache_free(mapentry_cache, me);
11697c478bd9Sstevel@tonic-gate 			continue;
11707c478bd9Sstevel@tonic-gate 		}
11717c478bd9Sstevel@tonic-gate 		ASSERT((va == NULL) ||
117280d34432Sfrankho 		    ((mtm->mtm_debug & MT_LOG_WRITE_CHECK) == 0) ||
117380d34432Sfrankho 		    map_check_ldl_write(ul, va, vamof, me));
11747c478bd9Sstevel@tonic-gate 
11757c478bd9Sstevel@tonic-gate 		/*
11767c478bd9Sstevel@tonic-gate 		 * put on hash
11777c478bd9Sstevel@tonic-gate 		 */
11787c478bd9Sstevel@tonic-gate 		mutex_enter(&mtm->mtm_mutex);
11797c478bd9Sstevel@tonic-gate 		me->me_hash = *savmep;
11807c478bd9Sstevel@tonic-gate 		*savmep = me;
11817c478bd9Sstevel@tonic-gate 		me->me_next = (mapentry_t *)mtm;
11827c478bd9Sstevel@tonic-gate 		me->me_prev = mtm->mtm_prev;
11837c478bd9Sstevel@tonic-gate 		mtm->mtm_prev->me_next = me;
11847c478bd9Sstevel@tonic-gate 		mtm->mtm_prev = me;
11857c478bd9Sstevel@tonic-gate 		me->me_flags |= ME_HASH;
11867c478bd9Sstevel@tonic-gate 		me->me_tid = tid;
11877c478bd9Sstevel@tonic-gate 		me->me_age = mtm->mtm_age++;
11887c478bd9Sstevel@tonic-gate 		mtm->mtm_nme++;
11897c478bd9Sstevel@tonic-gate 		mtm->mtm_nmet++;
11907c478bd9Sstevel@tonic-gate 		mutex_exit(&mtm->mtm_mutex);
11917c478bd9Sstevel@tonic-gate 	}
11927c478bd9Sstevel@tonic-gate 
11937c478bd9Sstevel@tonic-gate 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
119480d34432Sfrankho 	    map_check_linkage(mtm));
11957c478bd9Sstevel@tonic-gate 	mutex_exit(&ul->un_log_mutex);
11967c478bd9Sstevel@tonic-gate }
11977c478bd9Sstevel@tonic-gate 
11987c478bd9Sstevel@tonic-gate /*
11997c478bd9Sstevel@tonic-gate  * Add the delta(s) into the log.
12007c478bd9Sstevel@tonic-gate  * Create one cached roll buffer logmap entry, and reference count the
12017c478bd9Sstevel@tonic-gate  * number of mapentries refering to it.
12027c478bd9Sstevel@tonic-gate  * Cancel previous logmap entries.
12037c478bd9Sstevel@tonic-gate  * logmap_add is tolerant of failure to allocate a cached roll buffer.
12047c478bd9Sstevel@tonic-gate  */
12057c478bd9Sstevel@tonic-gate void
logmap_add_buf(ml_unit_t * ul,char * va,offset_t bufmof,mapentry_t * melist,caddr_t buf,uint32_t bufsz)12067c478bd9Sstevel@tonic-gate logmap_add_buf(
12077c478bd9Sstevel@tonic-gate 	ml_unit_t *ul,
12087c478bd9Sstevel@tonic-gate 	char *va,			/* Ptr to buf w/deltas & data */
12097c478bd9Sstevel@tonic-gate 	offset_t bufmof,		/* Offset on master of buf start */
12107c478bd9Sstevel@tonic-gate 	mapentry_t *melist,		/* Entries to add */
12117c478bd9Sstevel@tonic-gate 	caddr_t	buf,			/* Buffer containing delta(s) */
12127c478bd9Sstevel@tonic-gate 	uint32_t bufsz)			/* Size of buf */
12137c478bd9Sstevel@tonic-gate {
12147c478bd9Sstevel@tonic-gate 	offset_t	mof;
12157c478bd9Sstevel@tonic-gate 	offset_t	vamof = bufmof + (va - buf);
12167c478bd9Sstevel@tonic-gate 	off_t		nb;
12177c478bd9Sstevel@tonic-gate 	mapentry_t	*me;
12187c478bd9Sstevel@tonic-gate 	mapentry_t	**mep;
12197c478bd9Sstevel@tonic-gate 	mapentry_t	**savmep;
12207c478bd9Sstevel@tonic-gate 	uint32_t	tid;
12217c478bd9Sstevel@tonic-gate 	mt_map_t	*mtm	= ul->un_logmap;
12227c478bd9Sstevel@tonic-gate 	crb_t		*crb;
12237c478bd9Sstevel@tonic-gate 	crb_t		*crbsav = NULL;
12247c478bd9Sstevel@tonic-gate 
12257c478bd9Sstevel@tonic-gate 	ASSERT((bufsz & DEV_BMASK) == 0);
12267c478bd9Sstevel@tonic-gate 	mutex_enter(&ul->un_log_mutex);
12277c478bd9Sstevel@tonic-gate 	logmap_wait_space(mtm, ul, melist);
12287c478bd9Sstevel@tonic-gate 
12297c478bd9Sstevel@tonic-gate 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
123080d34432Sfrankho 	    map_check_linkage(mtm));
12317c478bd9Sstevel@tonic-gate 
12327c478bd9Sstevel@tonic-gate 	mtm->mtm_ref = 1;
12337c478bd9Sstevel@tonic-gate 	mtm->mtm_dirty++;
12347c478bd9Sstevel@tonic-gate 	tid = mtm->mtm_tid;
12357c478bd9Sstevel@tonic-gate 	while (melist) {
12367c478bd9Sstevel@tonic-gate 		mof = melist->me_mof;
12377c478bd9Sstevel@tonic-gate 		nb  = melist->me_nb;
12387c478bd9Sstevel@tonic-gate 
12397c478bd9Sstevel@tonic-gate 		/*
12407c478bd9Sstevel@tonic-gate 		 * search for overlapping entries
12417c478bd9Sstevel@tonic-gate 		 */
12427c478bd9Sstevel@tonic-gate 		savmep = mep = MAP_HASH(mof, mtm);
12437c478bd9Sstevel@tonic-gate 		mutex_enter(&mtm->mtm_mutex);
12447c478bd9Sstevel@tonic-gate 		while ((me = *mep) != 0) {
12457c478bd9Sstevel@tonic-gate 			/*
12467c478bd9Sstevel@tonic-gate 			 * Data consumes old map entry; cancel map entry.
12477c478bd9Sstevel@tonic-gate 			 * Take care when we replace an old map entry
12487c478bd9Sstevel@tonic-gate 			 * which carries quota information with a newer entry
12497c478bd9Sstevel@tonic-gate 			 * which does not. In that case the push function
12507c478bd9Sstevel@tonic-gate 			 * would not be called to clean up the dquot structure.
12517c478bd9Sstevel@tonic-gate 			 * This would be found later by invalidatedq() causing
12527c478bd9Sstevel@tonic-gate 			 * a panic when the filesystem in unmounted.
12537c478bd9Sstevel@tonic-gate 			 * We clean up the dquot manually and then replace
12547c478bd9Sstevel@tonic-gate 			 * the map entry.
12557c478bd9Sstevel@tonic-gate 			 */
12567c478bd9Sstevel@tonic-gate 			crb = me->me_crb;
12577c478bd9Sstevel@tonic-gate 			if (MEwithinDATA(me, mof, nb) &&
12587c478bd9Sstevel@tonic-gate 			    ((me->me_flags & (ME_ROLL|ME_CANCEL)) == 0)) {
12597c478bd9Sstevel@tonic-gate 				if (tid == me->me_tid &&
12607c478bd9Sstevel@tonic-gate 				    ((me->me_flags & ME_AGE) == 0)) {
12617c478bd9Sstevel@tonic-gate 					*mep = me->me_hash;
12627c478bd9Sstevel@tonic-gate 					me->me_next->me_prev = me->me_prev;
12637c478bd9Sstevel@tonic-gate 					me->me_prev->me_next = me->me_next;
12647c478bd9Sstevel@tonic-gate 					ASSERT(!(me->me_flags & ME_USER));
12657c478bd9Sstevel@tonic-gate 					mtm->mtm_nme--;
12667c478bd9Sstevel@tonic-gate 					/*
12677c478bd9Sstevel@tonic-gate 					 * Special case if the mapentry
12687c478bd9Sstevel@tonic-gate 					 * carries a dquot and a push function.
12697c478bd9Sstevel@tonic-gate 					 * We have to clean up the quota info
12707c478bd9Sstevel@tonic-gate 					 * before replacing the mapentry.
12717c478bd9Sstevel@tonic-gate 					 */
12727c478bd9Sstevel@tonic-gate 					if (me->me_dt == DT_QR)
12737c478bd9Sstevel@tonic-gate 						HANDLE_DQUOT(me, melist);
12747c478bd9Sstevel@tonic-gate 
12757c478bd9Sstevel@tonic-gate 					/*
12767c478bd9Sstevel@tonic-gate 					 * If this soon to be deleted mapentry
12777c478bd9Sstevel@tonic-gate 					 * has a suitable roll buffer then
12787c478bd9Sstevel@tonic-gate 					 * re-use it.
12797c478bd9Sstevel@tonic-gate 					 */
12807c478bd9Sstevel@tonic-gate 					if (crb && (--crb->c_refcnt == 0)) {
12817c478bd9Sstevel@tonic-gate 						if (crbsav ||
12827c478bd9Sstevel@tonic-gate 						    (crb->c_nb != bufsz)) {
12837c478bd9Sstevel@tonic-gate 							CRB_FREE(crb, me);
12847c478bd9Sstevel@tonic-gate 						} else {
12857c478bd9Sstevel@tonic-gate 							bcopy(buf, crb->c_buf,
12867c478bd9Sstevel@tonic-gate 							    bufsz);
12877c478bd9Sstevel@tonic-gate 							crb->c_invalid = 0;
12887c478bd9Sstevel@tonic-gate 							crb->c_mof = bufmof;
12897c478bd9Sstevel@tonic-gate 							crbsav = crb;
12907c478bd9Sstevel@tonic-gate 							me->me_crb = NULL;
12917c478bd9Sstevel@tonic-gate 						}
12927c478bd9Sstevel@tonic-gate 					}
12937c478bd9Sstevel@tonic-gate 					kmem_cache_free(mapentry_cache, me);
12947c478bd9Sstevel@tonic-gate 					continue;
12957c478bd9Sstevel@tonic-gate 				}
12967c478bd9Sstevel@tonic-gate 				me->me_cancel = mtm->mtm_cancel;
12977c478bd9Sstevel@tonic-gate 				mtm->mtm_cancel = me;
12987c478bd9Sstevel@tonic-gate 				me->me_flags |= ME_CANCEL;
12997c478bd9Sstevel@tonic-gate 			}
13007c478bd9Sstevel@tonic-gate 
13017c478bd9Sstevel@tonic-gate 			/*
13027c478bd9Sstevel@tonic-gate 			 * Inode deltas within the same fs block come
13037c478bd9Sstevel@tonic-gate 			 * in individually as separate calls to logmap_add().
13047c478bd9Sstevel@tonic-gate 			 * All others come in as one call. So check for an
13057c478bd9Sstevel@tonic-gate 			 * existing entry where we can re-use the crb.
13067c478bd9Sstevel@tonic-gate 			 */
13077c478bd9Sstevel@tonic-gate 			if ((me->me_dt == DT_INODE) && (tid == me->me_tid) &&
13087c478bd9Sstevel@tonic-gate 			    !crbsav && crb &&
13097c478bd9Sstevel@tonic-gate 			    WITHIN(mof, nb, crb->c_mof, crb->c_nb)) {
13107c478bd9Sstevel@tonic-gate 				ASSERT(crb->c_mof == bufmof);
13117c478bd9Sstevel@tonic-gate 				ASSERT(crb->c_nb == bufsz);
13127c478bd9Sstevel@tonic-gate 				bcopy(buf, crb->c_buf, bufsz);
13137c478bd9Sstevel@tonic-gate 				crbsav = crb;
13147c478bd9Sstevel@tonic-gate 			}
13157c478bd9Sstevel@tonic-gate 			mep = &(*mep)->me_hash;
13167c478bd9Sstevel@tonic-gate 		}
13177c478bd9Sstevel@tonic-gate 		mutex_exit(&mtm->mtm_mutex);
13187c478bd9Sstevel@tonic-gate 
13197c478bd9Sstevel@tonic-gate 		/*
13207c478bd9Sstevel@tonic-gate 		 * If we don't already have a crb then allocate one
13217c478bd9Sstevel@tonic-gate 		 * and copy the incoming buffer. Only do this once
13227c478bd9Sstevel@tonic-gate 		 * for all the incoming deltas.
13237c478bd9Sstevel@tonic-gate 		 */
13247c478bd9Sstevel@tonic-gate 		if ((crbsav == NULL) && (melist->me_dt != DT_ABZERO)) {
13257c478bd9Sstevel@tonic-gate 			/*
13267c478bd9Sstevel@tonic-gate 			 * Only use a cached roll buffer if we
13277c478bd9Sstevel@tonic-gate 			 * have enough memory, and check for failures.
13287c478bd9Sstevel@tonic-gate 			 */
13297c478bd9Sstevel@tonic-gate 			if (((ufs_crb_size + bufsz) < ufs_crb_limit) &&
13307c478bd9Sstevel@tonic-gate 			    (kmem_avail() > bufsz)) {
13317c478bd9Sstevel@tonic-gate 				crbsav = kmem_alloc(sizeof (crb_t), KM_NOSLEEP);
13327c478bd9Sstevel@tonic-gate 			} else {
13337c478bd9Sstevel@tonic-gate 				ufs_crb_alloc_fails++;
13347c478bd9Sstevel@tonic-gate 			}
13357c478bd9Sstevel@tonic-gate 			if (crbsav) {
13367c478bd9Sstevel@tonic-gate 				crbsav->c_buf = kmem_alloc(bufsz, KM_NOSLEEP);
13377c478bd9Sstevel@tonic-gate 				if (crbsav->c_buf) {
13387c478bd9Sstevel@tonic-gate 					atomic_add_64(&ufs_crb_size,
13397c478bd9Sstevel@tonic-gate 					    (uint64_t)bufsz);
13407c478bd9Sstevel@tonic-gate 					if (ufs_crb_size > ufs_crb_max_size) {
13417c478bd9Sstevel@tonic-gate 						ufs_crb_max_size = ufs_crb_size;
13427c478bd9Sstevel@tonic-gate 					}
13437c478bd9Sstevel@tonic-gate 					bcopy(buf, crbsav->c_buf, bufsz);
13447c478bd9Sstevel@tonic-gate 					crbsav->c_nb = bufsz;
13457c478bd9Sstevel@tonic-gate 					crbsav->c_refcnt = 0;
13467c478bd9Sstevel@tonic-gate 					crbsav->c_invalid = 0;
13477c478bd9Sstevel@tonic-gate 					ASSERT((bufmof & DEV_BMASK) == 0);
13487c478bd9Sstevel@tonic-gate 					crbsav->c_mof = bufmof;
13497c478bd9Sstevel@tonic-gate 				} else {
13507c478bd9Sstevel@tonic-gate 					kmem_free(crbsav, sizeof (crb_t));
13517c478bd9Sstevel@tonic-gate 					crbsav = NULL;
13527c478bd9Sstevel@tonic-gate 				}
13537c478bd9Sstevel@tonic-gate 			}
13547c478bd9Sstevel@tonic-gate 		}
13557c478bd9Sstevel@tonic-gate 
13567c478bd9Sstevel@tonic-gate 		/*
13577c478bd9Sstevel@tonic-gate 		 * remove from list
13587c478bd9Sstevel@tonic-gate 		 */
13597c478bd9Sstevel@tonic-gate 		me = melist;
13607c478bd9Sstevel@tonic-gate 		melist = melist->me_hash;
13617c478bd9Sstevel@tonic-gate 		me->me_flags &= ~ME_LIST;
13627c478bd9Sstevel@tonic-gate 		me->me_crb = crbsav;
13637c478bd9Sstevel@tonic-gate 		if (crbsav) {
13647c478bd9Sstevel@tonic-gate 			crbsav->c_refcnt++;
13657c478bd9Sstevel@tonic-gate 		}
13667c478bd9Sstevel@tonic-gate 		crbsav = NULL;
13677c478bd9Sstevel@tonic-gate 
13687c478bd9Sstevel@tonic-gate 		ASSERT(va);
13697c478bd9Sstevel@tonic-gate 		ldl_write(ul, va, vamof, me); /* add to on-disk log */
13707c478bd9Sstevel@tonic-gate 		if (ul->un_flags & LDL_ERROR) {
13717c478bd9Sstevel@tonic-gate 			CRB_RELE(me);
13727c478bd9Sstevel@tonic-gate 			kmem_cache_free(mapentry_cache, me);
13737c478bd9Sstevel@tonic-gate 			continue;
13747c478bd9Sstevel@tonic-gate 		}
13757c478bd9Sstevel@tonic-gate 		ASSERT(((mtm->mtm_debug & MT_LOG_WRITE_CHECK) == 0) ||
137680d34432Sfrankho 		    map_check_ldl_write(ul, va, vamof, me));
13777c478bd9Sstevel@tonic-gate 
13787c478bd9Sstevel@tonic-gate 		/*
13797c478bd9Sstevel@tonic-gate 		 * put on hash
13807c478bd9Sstevel@tonic-gate 		 */
13817c478bd9Sstevel@tonic-gate 		mutex_enter(&mtm->mtm_mutex);
13827c478bd9Sstevel@tonic-gate 		me->me_hash = *savmep;
13837c478bd9Sstevel@tonic-gate 		*savmep = me;
13847c478bd9Sstevel@tonic-gate 		me->me_next = (mapentry_t *)mtm;
13857c478bd9Sstevel@tonic-gate 		me->me_prev = mtm->mtm_prev;
13867c478bd9Sstevel@tonic-gate 		mtm->mtm_prev->me_next = me;
13877c478bd9Sstevel@tonic-gate 		mtm->mtm_prev = me;
13887c478bd9Sstevel@tonic-gate 		me->me_flags |= ME_HASH;
13897c478bd9Sstevel@tonic-gate 		me->me_tid = tid;
13907c478bd9Sstevel@tonic-gate 		me->me_age = mtm->mtm_age++;
13917c478bd9Sstevel@tonic-gate 		mtm->mtm_nme++;
13927c478bd9Sstevel@tonic-gate 		mtm->mtm_nmet++;
13937c478bd9Sstevel@tonic-gate 		mutex_exit(&mtm->mtm_mutex);
13947c478bd9Sstevel@tonic-gate 	}
13957c478bd9Sstevel@tonic-gate 
13967c478bd9Sstevel@tonic-gate 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
139780d34432Sfrankho 	    map_check_linkage(mtm));
13987c478bd9Sstevel@tonic-gate 	mutex_exit(&ul->un_log_mutex);
13997c478bd9Sstevel@tonic-gate }
14007c478bd9Sstevel@tonic-gate 
14017c478bd9Sstevel@tonic-gate /*
14027c478bd9Sstevel@tonic-gate  * free up any cancelled deltas
14037c478bd9Sstevel@tonic-gate  */
14047c478bd9Sstevel@tonic-gate void
logmap_free_cancel(mt_map_t * mtm,mapentry_t ** cancelhead)14057c478bd9Sstevel@tonic-gate logmap_free_cancel(mt_map_t *mtm, mapentry_t **cancelhead)
14067c478bd9Sstevel@tonic-gate {
14077c478bd9Sstevel@tonic-gate 	int		dolock	= 0;
14087c478bd9Sstevel@tonic-gate 	mapentry_t	*me;
14097c478bd9Sstevel@tonic-gate 	mapentry_t	**mep;
14107c478bd9Sstevel@tonic-gate 
14117c478bd9Sstevel@tonic-gate 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
141280d34432Sfrankho 	    map_check_linkage(mtm));
14137c478bd9Sstevel@tonic-gate 
14147c478bd9Sstevel@tonic-gate again:
14157c478bd9Sstevel@tonic-gate 	if (dolock)
14167c478bd9Sstevel@tonic-gate 		rw_enter(&mtm->mtm_rwlock, RW_WRITER);
14177c478bd9Sstevel@tonic-gate 
14187c478bd9Sstevel@tonic-gate 	/*
14197c478bd9Sstevel@tonic-gate 	 * At EOT, cancel the indicated deltas
14207c478bd9Sstevel@tonic-gate 	 */
14217c478bd9Sstevel@tonic-gate 	mutex_enter(&mtm->mtm_mutex);
14227c478bd9Sstevel@tonic-gate 	if (mtm->mtm_flags & MTM_CANCELED) {
14237c478bd9Sstevel@tonic-gate 		mtm->mtm_flags &= ~MTM_CANCELED;
14247c478bd9Sstevel@tonic-gate 		ASSERT(dolock == 0);
14257c478bd9Sstevel@tonic-gate 		mutex_exit(&mtm->mtm_mutex);
14267c478bd9Sstevel@tonic-gate 		return;
14277c478bd9Sstevel@tonic-gate 	}
14287c478bd9Sstevel@tonic-gate 
14297c478bd9Sstevel@tonic-gate 	while ((me = *cancelhead) != NULL) {
14307c478bd9Sstevel@tonic-gate 		/*
14317c478bd9Sstevel@tonic-gate 		 * roll forward or read collision; wait and try again
14327c478bd9Sstevel@tonic-gate 		 */
14337c478bd9Sstevel@tonic-gate 		if (me->me_flags & ME_AGE) {
14347c478bd9Sstevel@tonic-gate 			ASSERT(dolock == 0);
14357c478bd9Sstevel@tonic-gate 			mutex_exit(&mtm->mtm_mutex);
14367c478bd9Sstevel@tonic-gate 			dolock = 1;
14377c478bd9Sstevel@tonic-gate 			goto again;
14387c478bd9Sstevel@tonic-gate 		}
14397c478bd9Sstevel@tonic-gate 		/*
14407c478bd9Sstevel@tonic-gate 		 * remove from cancel list
14417c478bd9Sstevel@tonic-gate 		 */
14427c478bd9Sstevel@tonic-gate 		*cancelhead = me->me_cancel;
14437c478bd9Sstevel@tonic-gate 		me->me_cancel = NULL;
14447c478bd9Sstevel@tonic-gate 		me->me_flags &= ~(ME_CANCEL);
14457c478bd9Sstevel@tonic-gate 
14467c478bd9Sstevel@tonic-gate 		/*
14477c478bd9Sstevel@tonic-gate 		 * logmap_remove_roll handles ME_ROLL entries later
14487c478bd9Sstevel@tonic-gate 		 *	we leave them around for logmap_iscancel
14497c478bd9Sstevel@tonic-gate 		 *	XXX is this necessary?
14507c478bd9Sstevel@tonic-gate 		 */
14517c478bd9Sstevel@tonic-gate 		if (me->me_flags & ME_ROLL)
14527c478bd9Sstevel@tonic-gate 			continue;
14537c478bd9Sstevel@tonic-gate 
14547c478bd9Sstevel@tonic-gate 		/*
14557c478bd9Sstevel@tonic-gate 		 * remove from hash (if necessary)
14567c478bd9Sstevel@tonic-gate 		 */
14577c478bd9Sstevel@tonic-gate 		if (me->me_flags & ME_HASH) {
14587c478bd9Sstevel@tonic-gate 			mep = MAP_HASH(me->me_mof, mtm);
14597c478bd9Sstevel@tonic-gate 			while (*mep) {
14607c478bd9Sstevel@tonic-gate 				if (*mep == me) {
14617c478bd9Sstevel@tonic-gate 					*mep = me->me_hash;
14627c478bd9Sstevel@tonic-gate 					me->me_next->me_prev = me->me_prev;
14637c478bd9Sstevel@tonic-gate 					me->me_prev->me_next = me->me_next;
14647c478bd9Sstevel@tonic-gate 					me->me_flags &= ~(ME_HASH);
14657c478bd9Sstevel@tonic-gate 					if (!(me->me_flags & ME_USER)) {
14667c478bd9Sstevel@tonic-gate 						mtm->mtm_nme--;
14677c478bd9Sstevel@tonic-gate 					}
14687c478bd9Sstevel@tonic-gate 					break;
14697c478bd9Sstevel@tonic-gate 				} else
14707c478bd9Sstevel@tonic-gate 					mep = &(*mep)->me_hash;
14717c478bd9Sstevel@tonic-gate 			}
14727c478bd9Sstevel@tonic-gate 		}
14737c478bd9Sstevel@tonic-gate 		/*
14747c478bd9Sstevel@tonic-gate 		 * put the entry on the free list
14757c478bd9Sstevel@tonic-gate 		 */
14767c478bd9Sstevel@tonic-gate 		CRB_RELE(me);
14777c478bd9Sstevel@tonic-gate 		kmem_cache_free(mapentry_cache, me);
14787c478bd9Sstevel@tonic-gate 	}
14797c478bd9Sstevel@tonic-gate 	mutex_exit(&mtm->mtm_mutex);
14807c478bd9Sstevel@tonic-gate 	if (dolock)
14817c478bd9Sstevel@tonic-gate 		rw_exit(&mtm->mtm_rwlock);
14827c478bd9Sstevel@tonic-gate 
14837c478bd9Sstevel@tonic-gate 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
148480d34432Sfrankho 	    map_check_linkage(mtm));
14857c478bd9Sstevel@tonic-gate }
14867c478bd9Sstevel@tonic-gate 
14877c478bd9Sstevel@tonic-gate 
14887c478bd9Sstevel@tonic-gate void
logmap_commit(ml_unit_t * ul,uint32_t tid)14897c478bd9Sstevel@tonic-gate logmap_commit(ml_unit_t *ul, uint32_t tid)
14907c478bd9Sstevel@tonic-gate {
14917c478bd9Sstevel@tonic-gate 	mapentry_t	me;
14927c478bd9Sstevel@tonic-gate 	mt_map_t	*mtm	= ul->un_logmap;
14937c478bd9Sstevel@tonic-gate 
14947c478bd9Sstevel@tonic-gate 
14957c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ul->un_log_mutex));
14967c478bd9Sstevel@tonic-gate 
14977c478bd9Sstevel@tonic-gate 	/*
14987c478bd9Sstevel@tonic-gate 	 * async'ly write a commit rec into the log
14997c478bd9Sstevel@tonic-gate 	 */
15007c478bd9Sstevel@tonic-gate 	if (mtm->mtm_dirty) {
15017c478bd9Sstevel@tonic-gate 		/*
15027c478bd9Sstevel@tonic-gate 		 * put commit record into log
15037c478bd9Sstevel@tonic-gate 		 */
15047c478bd9Sstevel@tonic-gate 		me.me_mof = mtm->mtm_tid;
15057c478bd9Sstevel@tonic-gate 		me.me_dt = DT_COMMIT;
15067c478bd9Sstevel@tonic-gate 		me.me_nb = 0;
15077c478bd9Sstevel@tonic-gate 		me.me_hash = NULL;
15087c478bd9Sstevel@tonic-gate 		logmap_wait_space(mtm, ul, &me);
15097c478bd9Sstevel@tonic-gate 		ldl_write(ul, NULL, (offset_t)0, &me);
15107c478bd9Sstevel@tonic-gate 		ldl_round_commit(ul);
15117c478bd9Sstevel@tonic-gate 
15127c478bd9Sstevel@tonic-gate 		/*
15137c478bd9Sstevel@tonic-gate 		 * abort on error; else reset dirty flag
15147c478bd9Sstevel@tonic-gate 		 */
15157c478bd9Sstevel@tonic-gate 		if (ul->un_flags & LDL_ERROR)
15167c478bd9Sstevel@tonic-gate 			logmap_abort(ul, tid);
15177c478bd9Sstevel@tonic-gate 		else {
15187c478bd9Sstevel@tonic-gate 			mtm->mtm_dirty = 0;
15197c478bd9Sstevel@tonic-gate 			mtm->mtm_nmet = 0;
15207c478bd9Sstevel@tonic-gate 			mtm->mtm_cfrags = 0;
15217c478bd9Sstevel@tonic-gate 		}
15227c478bd9Sstevel@tonic-gate 		/* push commit */
15237c478bd9Sstevel@tonic-gate 		ldl_push_commit(ul);
15247c478bd9Sstevel@tonic-gate 	}
15257c478bd9Sstevel@tonic-gate }
15267c478bd9Sstevel@tonic-gate 
15277c478bd9Sstevel@tonic-gate void
logmap_sethead(mt_map_t * mtm,ml_unit_t * ul)15287c478bd9Sstevel@tonic-gate logmap_sethead(mt_map_t *mtm, ml_unit_t *ul)
15297c478bd9Sstevel@tonic-gate {
15307c478bd9Sstevel@tonic-gate 	off_t		lof;
15317c478bd9Sstevel@tonic-gate 	uint32_t	tid;
15327c478bd9Sstevel@tonic-gate 	mapentry_t	*me;
15337c478bd9Sstevel@tonic-gate 
15347c478bd9Sstevel@tonic-gate 	/*
15357c478bd9Sstevel@tonic-gate 	 * move the head forward so the log knows how full it is
15367c478bd9Sstevel@tonic-gate 	 * Make sure to skip any mapentry whose me_lof is 0, these
15377c478bd9Sstevel@tonic-gate 	 * are just place holders for DT_CANCELED freed user blocks
15387c478bd9Sstevel@tonic-gate 	 * for the current moby.
15397c478bd9Sstevel@tonic-gate 	 */
15407c478bd9Sstevel@tonic-gate 	mutex_enter(&ul->un_log_mutex);
15417c478bd9Sstevel@tonic-gate 	mutex_enter(&mtm->mtm_mutex);
15427c478bd9Sstevel@tonic-gate 	me = mtm->mtm_next;
15437c478bd9Sstevel@tonic-gate 	while (me != (mapentry_t *)mtm && me->me_lof == 0) {
15447c478bd9Sstevel@tonic-gate 		me = me->me_next;
15457c478bd9Sstevel@tonic-gate 	}
15467c478bd9Sstevel@tonic-gate 
15477c478bd9Sstevel@tonic-gate 	if (me == (mapentry_t *)mtm)
15487c478bd9Sstevel@tonic-gate 		lof = -1;
15497c478bd9Sstevel@tonic-gate 	else {
15507c478bd9Sstevel@tonic-gate 		lof = me->me_lof;
15517c478bd9Sstevel@tonic-gate 		tid = me->me_tid;
15527c478bd9Sstevel@tonic-gate 	}
15537c478bd9Sstevel@tonic-gate 	mutex_exit(&mtm->mtm_mutex);
15547c478bd9Sstevel@tonic-gate 	ldl_sethead(ul, lof, tid);
15557c478bd9Sstevel@tonic-gate 	if (lof == -1)
15567c478bd9Sstevel@tonic-gate 		mtm->mtm_age = 0;
15577c478bd9Sstevel@tonic-gate 	mutex_exit(&ul->un_log_mutex);
15587c478bd9Sstevel@tonic-gate }
15597c478bd9Sstevel@tonic-gate 
15607c478bd9Sstevel@tonic-gate void
logmap_settail(mt_map_t * mtm,ml_unit_t * ul)15617c478bd9Sstevel@tonic-gate logmap_settail(mt_map_t *mtm, ml_unit_t *ul)
15627c478bd9Sstevel@tonic-gate {
15637c478bd9Sstevel@tonic-gate 	off_t		lof;
15647c478bd9Sstevel@tonic-gate 	size_t		nb;
15657c478bd9Sstevel@tonic-gate 
15667c478bd9Sstevel@tonic-gate 	/*
15677c478bd9Sstevel@tonic-gate 	 * set the tail after the logmap_abort
15687c478bd9Sstevel@tonic-gate 	 */
15697c478bd9Sstevel@tonic-gate 	mutex_enter(&ul->un_log_mutex);
15707c478bd9Sstevel@tonic-gate 	mutex_enter(&mtm->mtm_mutex);
15717c478bd9Sstevel@tonic-gate 	if (mtm->mtm_prev == (mapentry_t *)mtm)
15727c478bd9Sstevel@tonic-gate 		lof = -1;
15737c478bd9Sstevel@tonic-gate 	else {
15747c478bd9Sstevel@tonic-gate 		/*
15757c478bd9Sstevel@tonic-gate 		 * set the tail to the end of the last commit
15767c478bd9Sstevel@tonic-gate 		 */
15777c478bd9Sstevel@tonic-gate 		lof = mtm->mtm_tail_lof;
15787c478bd9Sstevel@tonic-gate 		nb = mtm->mtm_tail_nb;
15797c478bd9Sstevel@tonic-gate 	}
15807c478bd9Sstevel@tonic-gate 	mutex_exit(&mtm->mtm_mutex);
15817c478bd9Sstevel@tonic-gate 	ldl_settail(ul, lof, nb);
15827c478bd9Sstevel@tonic-gate 	mutex_exit(&ul->un_log_mutex);
15837c478bd9Sstevel@tonic-gate }
15847c478bd9Sstevel@tonic-gate 
15857c478bd9Sstevel@tonic-gate /*
15867c478bd9Sstevel@tonic-gate  * when reseting a device; roll the log until every
15877c478bd9Sstevel@tonic-gate  * delta has been rolled forward
15887c478bd9Sstevel@tonic-gate  */
15897c478bd9Sstevel@tonic-gate void
logmap_roll_dev(ml_unit_t * ul)15907c478bd9Sstevel@tonic-gate logmap_roll_dev(ml_unit_t *ul)
15917c478bd9Sstevel@tonic-gate {
15927c478bd9Sstevel@tonic-gate 	mt_map_t	*mtm	= ul->un_logmap;
15937c478bd9Sstevel@tonic-gate 	mapentry_t	*me;
15947c478bd9Sstevel@tonic-gate 	ufsvfs_t	*ufsvfsp = ul->un_ufsvfs;
15957c478bd9Sstevel@tonic-gate 
15967c478bd9Sstevel@tonic-gate again:
15977c478bd9Sstevel@tonic-gate 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
159880d34432Sfrankho 	    map_check_linkage(mtm));
15997c478bd9Sstevel@tonic-gate 	if (ul->un_flags & (LDL_ERROR|LDL_NOROLL))
16007c478bd9Sstevel@tonic-gate 		return;
16017c478bd9Sstevel@tonic-gate 
16027c478bd9Sstevel@tonic-gate 	/*
16037c478bd9Sstevel@tonic-gate 	 * look for deltas
16047c478bd9Sstevel@tonic-gate 	 */
16057c478bd9Sstevel@tonic-gate 	mutex_enter(&mtm->mtm_mutex);
16067c478bd9Sstevel@tonic-gate 	for (me = mtm->mtm_next; me != (mapentry_t *)mtm; me = me->me_next) {
16077c478bd9Sstevel@tonic-gate 		if (me->me_flags & ME_ROLL)
16087c478bd9Sstevel@tonic-gate 			break;
16097c478bd9Sstevel@tonic-gate 		if (me->me_tid == mtm->mtm_tid)
16107c478bd9Sstevel@tonic-gate 			continue;
16117c478bd9Sstevel@tonic-gate 		if (me->me_tid == mtm->mtm_committid)
16127c478bd9Sstevel@tonic-gate 			continue;
16137c478bd9Sstevel@tonic-gate 		break;
16147c478bd9Sstevel@tonic-gate 	}
16157c478bd9Sstevel@tonic-gate 
16167c478bd9Sstevel@tonic-gate 	/*
16177c478bd9Sstevel@tonic-gate 	 * found a delta; kick the roll thread
16187c478bd9Sstevel@tonic-gate 	 * but only if the thread is running... (jmh)
16197c478bd9Sstevel@tonic-gate 	 */
16207c478bd9Sstevel@tonic-gate 	if (me != (mapentry_t *)mtm) {
16217c478bd9Sstevel@tonic-gate 		mutex_exit(&mtm->mtm_mutex);
16227c478bd9Sstevel@tonic-gate 		logmap_forceroll(mtm);
16237c478bd9Sstevel@tonic-gate 		goto again;
16247c478bd9Sstevel@tonic-gate 	}
16257c478bd9Sstevel@tonic-gate 
16267c478bd9Sstevel@tonic-gate 	/*
16277c478bd9Sstevel@tonic-gate 	 * no more deltas, return
16287c478bd9Sstevel@tonic-gate 	 */
16297c478bd9Sstevel@tonic-gate 	mutex_exit(&mtm->mtm_mutex);
16307c478bd9Sstevel@tonic-gate 	(void) ufs_putsummaryinfo(ul->un_dev, ufsvfsp, ufsvfsp->vfs_fs);
16317c478bd9Sstevel@tonic-gate 
16327c478bd9Sstevel@tonic-gate 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
163380d34432Sfrankho 	    map_check_linkage(mtm));
16347c478bd9Sstevel@tonic-gate }
16357c478bd9Sstevel@tonic-gate 
16367c478bd9Sstevel@tonic-gate static void
logmap_cancel_delta(ml_unit_t * ul,offset_t mof,int32_t nb,int metadata)16377c478bd9Sstevel@tonic-gate logmap_cancel_delta(ml_unit_t *ul, offset_t mof, int32_t nb, int metadata)
16387c478bd9Sstevel@tonic-gate {
16397c478bd9Sstevel@tonic-gate 	mapentry_t	*me;
16407c478bd9Sstevel@tonic-gate 	mapentry_t	**mep;
16417c478bd9Sstevel@tonic-gate 	mt_map_t	*mtm	= ul->un_logmap;
16427c478bd9Sstevel@tonic-gate 	int		frags;
16437c478bd9Sstevel@tonic-gate 
16447c478bd9Sstevel@tonic-gate 	/*
16457c478bd9Sstevel@tonic-gate 	 * map has been referenced and is dirty
16467c478bd9Sstevel@tonic-gate 	 */
16477c478bd9Sstevel@tonic-gate 	mtm->mtm_ref = 1;
16487c478bd9Sstevel@tonic-gate 	mtm->mtm_dirty++;
16497c478bd9Sstevel@tonic-gate 
16507c478bd9Sstevel@tonic-gate 	/*
16517c478bd9Sstevel@tonic-gate 	 * get a mapentry
16527c478bd9Sstevel@tonic-gate 	 */
16537c478bd9Sstevel@tonic-gate 	me = kmem_cache_alloc(mapentry_cache, KM_SLEEP);
16547c478bd9Sstevel@tonic-gate 	bzero(me, sizeof (mapentry_t));
16557c478bd9Sstevel@tonic-gate 
16567c478bd9Sstevel@tonic-gate 	/*
16577c478bd9Sstevel@tonic-gate 	 * initialize cancel record and put in logmap
16587c478bd9Sstevel@tonic-gate 	 */
16597c478bd9Sstevel@tonic-gate 	me->me_mof = mof;
16607c478bd9Sstevel@tonic-gate 	me->me_nb = nb;
16617c478bd9Sstevel@tonic-gate 	me->me_dt = DT_CANCEL;
16627c478bd9Sstevel@tonic-gate 	me->me_tid = mtm->mtm_tid;
16637c478bd9Sstevel@tonic-gate 	me->me_hash = NULL;
16647c478bd9Sstevel@tonic-gate 
16657c478bd9Sstevel@tonic-gate 	/*
16667c478bd9Sstevel@tonic-gate 	 * Write delta to log if this delta is for metadata.  If this is not
16677c478bd9Sstevel@tonic-gate 	 * metadata it is user data and we are just putting a cancel
16687c478bd9Sstevel@tonic-gate 	 * mapentry into the hash to cancel a user block deletion
16697c478bd9Sstevel@tonic-gate 	 * in which we do not want the block to be allocated
16707c478bd9Sstevel@tonic-gate 	 * within this moby.  This cancel entry will prevent the block from
16717c478bd9Sstevel@tonic-gate 	 * being allocated within the moby and prevent user data corruption
16727c478bd9Sstevel@tonic-gate 	 * if we happen to crash before this moby is committed.
16737c478bd9Sstevel@tonic-gate 	 */
16747c478bd9Sstevel@tonic-gate 	mutex_enter(&ul->un_log_mutex);
16757c478bd9Sstevel@tonic-gate 	if (metadata) {
16767c478bd9Sstevel@tonic-gate 		logmap_wait_space(mtm, ul, me);
16777c478bd9Sstevel@tonic-gate 		ldl_write(ul, NULL, (offset_t)0, me);
16787c478bd9Sstevel@tonic-gate 		if (ul->un_flags & LDL_ERROR) {
16797c478bd9Sstevel@tonic-gate 			kmem_cache_free(mapentry_cache, me);
16807c478bd9Sstevel@tonic-gate 			mutex_exit(&ul->un_log_mutex);
16817c478bd9Sstevel@tonic-gate 			return;
16827c478bd9Sstevel@tonic-gate 		}
16837c478bd9Sstevel@tonic-gate 	}
16847c478bd9Sstevel@tonic-gate 
16857c478bd9Sstevel@tonic-gate 	/*
16867c478bd9Sstevel@tonic-gate 	 * put in hash and on cancel list
16877c478bd9Sstevel@tonic-gate 	 */
16887c478bd9Sstevel@tonic-gate 	mep = MAP_HASH(mof, mtm);
16897c478bd9Sstevel@tonic-gate 	mutex_enter(&mtm->mtm_mutex);
16907c478bd9Sstevel@tonic-gate 	me->me_age = mtm->mtm_age++;
16917c478bd9Sstevel@tonic-gate 	me->me_hash = *mep;
16927c478bd9Sstevel@tonic-gate 	*mep = me;
16937c478bd9Sstevel@tonic-gate 	me->me_next = (mapentry_t *)mtm;
16947c478bd9Sstevel@tonic-gate 	me->me_prev = mtm->mtm_prev;
16957c478bd9Sstevel@tonic-gate 	mtm->mtm_prev->me_next = me;
16967c478bd9Sstevel@tonic-gate 	mtm->mtm_prev = me;
16977c478bd9Sstevel@tonic-gate 	me->me_cancel = mtm->mtm_cancel;
16987c478bd9Sstevel@tonic-gate 	mtm->mtm_cancel = me;
16997c478bd9Sstevel@tonic-gate 	if (metadata) {
17007c478bd9Sstevel@tonic-gate 		mtm->mtm_nme++;
17017c478bd9Sstevel@tonic-gate 		mtm->mtm_nmet++;
17027c478bd9Sstevel@tonic-gate 	} else {
17037c478bd9Sstevel@tonic-gate 		me->me_flags = ME_USER;
17047c478bd9Sstevel@tonic-gate 	}
17057c478bd9Sstevel@tonic-gate 	me->me_flags |= (ME_HASH|ME_CANCEL);
17067c478bd9Sstevel@tonic-gate 	if (!(metadata)) {
17077c478bd9Sstevel@tonic-gate 		frags = blkoff(ul->un_ufsvfs->vfs_fs, nb);
17087c478bd9Sstevel@tonic-gate 		if (frags)
170980d34432Sfrankho 			mtm->mtm_cfrags +=
171080d34432Sfrankho 			    numfrags(ul->un_ufsvfs->vfs_fs, frags);
17117c478bd9Sstevel@tonic-gate 	}
17127c478bd9Sstevel@tonic-gate 	mutex_exit(&mtm->mtm_mutex);
17137c478bd9Sstevel@tonic-gate 
17147c478bd9Sstevel@tonic-gate 	mutex_exit(&ul->un_log_mutex);
17157c478bd9Sstevel@tonic-gate }
17167c478bd9Sstevel@tonic-gate 
17177c478bd9Sstevel@tonic-gate /*
17187c478bd9Sstevel@tonic-gate  * cancel entries in a logmap (entries are freed at EOT)
17197c478bd9Sstevel@tonic-gate  */
17207c478bd9Sstevel@tonic-gate void
logmap_cancel(ml_unit_t * ul,offset_t mof,off_t nb,int metadata)17217c478bd9Sstevel@tonic-gate logmap_cancel(ml_unit_t *ul, offset_t mof, off_t nb, int metadata)
17227c478bd9Sstevel@tonic-gate {
17237c478bd9Sstevel@tonic-gate 	int32_t		hnb;
17247c478bd9Sstevel@tonic-gate 	mapentry_t	*me;
17257c478bd9Sstevel@tonic-gate 	mapentry_t	**mep;
17267c478bd9Sstevel@tonic-gate 	mt_map_t	*mtm	= ul->un_logmap;
17277c478bd9Sstevel@tonic-gate 	crb_t		*crb;
17287c478bd9Sstevel@tonic-gate 
17297c478bd9Sstevel@tonic-gate 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
173080d34432Sfrankho 	    map_check_linkage(mtm));
17317c478bd9Sstevel@tonic-gate 
17327c478bd9Sstevel@tonic-gate 	for (hnb = 0; nb; nb -= hnb, mof += hnb) {
17337c478bd9Sstevel@tonic-gate 		hnb = MAPBLOCKSIZE - (mof & MAPBLOCKOFF);
17347c478bd9Sstevel@tonic-gate 		if (hnb > nb)
17357c478bd9Sstevel@tonic-gate 			hnb = nb;
17367c478bd9Sstevel@tonic-gate 		/*
173720a1ae8aSjkennedy 		 * Find overlapping metadata entries.  Don't search through
173820a1ae8aSjkennedy 		 * the hash chains if this is user data because it is only
173920a1ae8aSjkennedy 		 * possible to have overlapping map entries for metadata,
174020a1ae8aSjkennedy 		 * and the search can become expensive for large files.
17417c478bd9Sstevel@tonic-gate 		 */
174220a1ae8aSjkennedy 		if (metadata) {
174320a1ae8aSjkennedy 			mep = MAP_HASH(mof, mtm);
174420a1ae8aSjkennedy 			mutex_enter(&mtm->mtm_mutex);
174520a1ae8aSjkennedy 			for (me = *mep; me; me = me->me_hash) {
174620a1ae8aSjkennedy 				if (!DATAoverlapME(mof, hnb, me))
174720a1ae8aSjkennedy 					continue;
17487c478bd9Sstevel@tonic-gate 
174920a1ae8aSjkennedy 				ASSERT(MEwithinDATA(me, mof, hnb));
17507c478bd9Sstevel@tonic-gate 
175120a1ae8aSjkennedy 				if ((me->me_flags & ME_CANCEL) == 0) {
175220a1ae8aSjkennedy 					me->me_cancel = mtm->mtm_cancel;
175320a1ae8aSjkennedy 					mtm->mtm_cancel = me;
175420a1ae8aSjkennedy 					me->me_flags |= ME_CANCEL;
175520a1ae8aSjkennedy 					crb = me->me_crb;
175620a1ae8aSjkennedy 					if (crb) {
175720a1ae8aSjkennedy 						crb->c_invalid = 1;
175820a1ae8aSjkennedy 					}
17597c478bd9Sstevel@tonic-gate 				}
17607c478bd9Sstevel@tonic-gate 			}
176120a1ae8aSjkennedy 			mutex_exit(&mtm->mtm_mutex);
17627c478bd9Sstevel@tonic-gate 		}
17637c478bd9Sstevel@tonic-gate 
17647c478bd9Sstevel@tonic-gate 		/*
17657c478bd9Sstevel@tonic-gate 		 * put a cancel record into the log
17667c478bd9Sstevel@tonic-gate 		 */
17677c478bd9Sstevel@tonic-gate 		logmap_cancel_delta(ul, mof, hnb, metadata);
17687c478bd9Sstevel@tonic-gate 	}
17697c478bd9Sstevel@tonic-gate 
17707c478bd9Sstevel@tonic-gate 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
177180d34432Sfrankho 	    map_check_linkage(mtm));
17727c478bd9Sstevel@tonic-gate }
17737c478bd9Sstevel@tonic-gate 
17747c478bd9Sstevel@tonic-gate /*
17757c478bd9Sstevel@tonic-gate  * check for overlap w/cancel delta
17767c478bd9Sstevel@tonic-gate  */
17777c478bd9Sstevel@tonic-gate int
logmap_iscancel(mt_map_t * mtm,offset_t mof,off_t nb)17787c478bd9Sstevel@tonic-gate logmap_iscancel(mt_map_t *mtm, offset_t mof, off_t nb)
17797c478bd9Sstevel@tonic-gate {
17807c478bd9Sstevel@tonic-gate 	off_t		hnb;
17817c478bd9Sstevel@tonic-gate 	mapentry_t	*me;
17827c478bd9Sstevel@tonic-gate 	mapentry_t	**mep;
17837c478bd9Sstevel@tonic-gate 
17847c478bd9Sstevel@tonic-gate 	mutex_enter(&mtm->mtm_mutex);
17857c478bd9Sstevel@tonic-gate 	for (hnb = 0; nb; nb -= hnb, mof += hnb) {
17867c478bd9Sstevel@tonic-gate 		hnb = MAPBLOCKSIZE - (mof & MAPBLOCKOFF);
17877c478bd9Sstevel@tonic-gate 		if (hnb > nb)
17887c478bd9Sstevel@tonic-gate 			hnb = nb;
17897c478bd9Sstevel@tonic-gate 		/*
17907c478bd9Sstevel@tonic-gate 		 * search for dup entry
17917c478bd9Sstevel@tonic-gate 		 */
17927c478bd9Sstevel@tonic-gate 		mep = MAP_HASH(mof, mtm);
17937c478bd9Sstevel@tonic-gate 		for (me = *mep; me; me = me->me_hash) {
17947c478bd9Sstevel@tonic-gate 			if (((me->me_flags & ME_ROLL) == 0) &&
17957c478bd9Sstevel@tonic-gate 			    (me->me_dt != DT_CANCEL))
17967c478bd9Sstevel@tonic-gate 				continue;
17977c478bd9Sstevel@tonic-gate 			if (DATAoverlapME(mof, hnb, me))
17987c478bd9Sstevel@tonic-gate 				break;
17997c478bd9Sstevel@tonic-gate 		}
18007c478bd9Sstevel@tonic-gate 
18017c478bd9Sstevel@tonic-gate 		/*
18027c478bd9Sstevel@tonic-gate 		 * overlap detected
18037c478bd9Sstevel@tonic-gate 		 */
18047c478bd9Sstevel@tonic-gate 		if (me) {
18057c478bd9Sstevel@tonic-gate 			mutex_exit(&mtm->mtm_mutex);
18067c478bd9Sstevel@tonic-gate 			return (1);
18077c478bd9Sstevel@tonic-gate 		}
18087c478bd9Sstevel@tonic-gate 	}
18097c478bd9Sstevel@tonic-gate 	mutex_exit(&mtm->mtm_mutex);
18107c478bd9Sstevel@tonic-gate 	return (0);
18117c478bd9Sstevel@tonic-gate }
18127c478bd9Sstevel@tonic-gate 
18137c478bd9Sstevel@tonic-gate static int
logmap_logscan_add(ml_unit_t * ul,struct delta * dp,off_t lof,size_t * nbp)18147c478bd9Sstevel@tonic-gate logmap_logscan_add(ml_unit_t *ul, struct delta *dp, off_t lof, size_t *nbp)
18157c478bd9Sstevel@tonic-gate {
18167c478bd9Sstevel@tonic-gate 	mapentry_t	*me;
18177c478bd9Sstevel@tonic-gate 	int		error;
18187c478bd9Sstevel@tonic-gate 	mt_map_t	*mtm	= ul->un_logmap;
18197c478bd9Sstevel@tonic-gate 
18207c478bd9Sstevel@tonic-gate 	/*
18217c478bd9Sstevel@tonic-gate 	 * verify delta header; failure == mediafail
18227c478bd9Sstevel@tonic-gate 	 */
18237c478bd9Sstevel@tonic-gate 	error = 0;
18247c478bd9Sstevel@tonic-gate 	/* delta type */
18257c478bd9Sstevel@tonic-gate 	if ((dp->d_typ <= DT_NONE) || (dp->d_typ >= DT_MAX))
18267c478bd9Sstevel@tonic-gate 		error = EINVAL;
18277c478bd9Sstevel@tonic-gate 	if (dp->d_typ == DT_COMMIT) {
18287c478bd9Sstevel@tonic-gate 		if (dp->d_nb != INT32_C(0) && dp->d_nb != INT32_C(-1))
18297c478bd9Sstevel@tonic-gate 			error = EINVAL;
18307c478bd9Sstevel@tonic-gate 	} else {
18317c478bd9Sstevel@tonic-gate 		/* length of delta */
18327c478bd9Sstevel@tonic-gate 		if ((dp->d_nb < INT32_C(0)) ||
18337c478bd9Sstevel@tonic-gate 		    (dp->d_nb > INT32_C(MAPBLOCKSIZE)))
18347c478bd9Sstevel@tonic-gate 			error = EINVAL;
18357c478bd9Sstevel@tonic-gate 
18367c478bd9Sstevel@tonic-gate 		/* offset on master device */
18377c478bd9Sstevel@tonic-gate 		if (dp->d_mof < INT64_C(0))
18387c478bd9Sstevel@tonic-gate 			error = EINVAL;
18397c478bd9Sstevel@tonic-gate 	}
18407c478bd9Sstevel@tonic-gate 
18417c478bd9Sstevel@tonic-gate 	if (error) {
18427c478bd9Sstevel@tonic-gate 		ldl_seterror(ul, "Error processing ufs log data during scan");
18437c478bd9Sstevel@tonic-gate 		return (error);
18447c478bd9Sstevel@tonic-gate 	}
18457c478bd9Sstevel@tonic-gate 
18467c478bd9Sstevel@tonic-gate 	/*
18477c478bd9Sstevel@tonic-gate 	 * process commit record
18487c478bd9Sstevel@tonic-gate 	 */
18497c478bd9Sstevel@tonic-gate 	if (dp->d_typ == DT_COMMIT) {
18507c478bd9Sstevel@tonic-gate 		if (mtm->mtm_dirty) {
18517c478bd9Sstevel@tonic-gate 			ASSERT(dp->d_nb == INT32_C(0));
18527c478bd9Sstevel@tonic-gate 			logmap_free_cancel(mtm, &mtm->mtm_cancel);
18537c478bd9Sstevel@tonic-gate 			mtm->mtm_dirty = 0;
18547c478bd9Sstevel@tonic-gate 			mtm->mtm_nmet = 0;
18557c478bd9Sstevel@tonic-gate 			mtm->mtm_tid++;
18567c478bd9Sstevel@tonic-gate 			mtm->mtm_committid = mtm->mtm_tid;
18577c478bd9Sstevel@tonic-gate 			ASSERT(((mtm->mtm_debug & MT_SCAN) == 0) ||
185880d34432Sfrankho 			    logmap_logscan_commit_debug(lof, mtm));
18597c478bd9Sstevel@tonic-gate 		}
18607c478bd9Sstevel@tonic-gate 		/*
18617c478bd9Sstevel@tonic-gate 		 * return #bytes to next sector (next delta header)
18627c478bd9Sstevel@tonic-gate 		 */
18637c478bd9Sstevel@tonic-gate 		*nbp = ldl_logscan_nbcommit(lof);
18647c478bd9Sstevel@tonic-gate 		mtm->mtm_tail_lof = lof;
18657c478bd9Sstevel@tonic-gate 		mtm->mtm_tail_nb = *nbp;
18667c478bd9Sstevel@tonic-gate 		return (0);
18677c478bd9Sstevel@tonic-gate 	}
18687c478bd9Sstevel@tonic-gate 
18697c478bd9Sstevel@tonic-gate 	/*
18707c478bd9Sstevel@tonic-gate 	 * add delta to logmap
18717c478bd9Sstevel@tonic-gate 	 */
18727c478bd9Sstevel@tonic-gate 	me = kmem_cache_alloc(mapentry_cache, KM_SLEEP);
18737c478bd9Sstevel@tonic-gate 	bzero(me, sizeof (mapentry_t));
18747c478bd9Sstevel@tonic-gate 	me->me_lof = lof;
18757c478bd9Sstevel@tonic-gate 	me->me_mof = dp->d_mof;
18767c478bd9Sstevel@tonic-gate 	me->me_nb = dp->d_nb;
18777c478bd9Sstevel@tonic-gate 	me->me_tid = mtm->mtm_tid;
18787c478bd9Sstevel@tonic-gate 	me->me_dt = dp->d_typ;
18797c478bd9Sstevel@tonic-gate 	me->me_hash = NULL;
18807c478bd9Sstevel@tonic-gate 	me->me_flags = (ME_LIST | ME_SCAN);
18817c478bd9Sstevel@tonic-gate 	logmap_add(ul, NULL, 0, me);
18827c478bd9Sstevel@tonic-gate 	switch (dp->d_typ) {
18837c478bd9Sstevel@tonic-gate 	case DT_CANCEL:
18847c478bd9Sstevel@tonic-gate 		me->me_flags |= ME_CANCEL;
18857c478bd9Sstevel@tonic-gate 		me->me_cancel = mtm->mtm_cancel;
18867c478bd9Sstevel@tonic-gate 		mtm->mtm_cancel = me;
18877c478bd9Sstevel@tonic-gate 		break;
18887c478bd9Sstevel@tonic-gate 	default:
18897c478bd9Sstevel@tonic-gate 		ASSERT(((mtm->mtm_debug & MT_SCAN) == 0) ||
189080d34432Sfrankho 		    logmap_logscan_add_debug(dp, mtm));
18917c478bd9Sstevel@tonic-gate 		break;
18927c478bd9Sstevel@tonic-gate 	}
18937c478bd9Sstevel@tonic-gate 
18947c478bd9Sstevel@tonic-gate sizeofdelta:
18957c478bd9Sstevel@tonic-gate 	/*
18967c478bd9Sstevel@tonic-gate 	 * return #bytes till next delta header
18977c478bd9Sstevel@tonic-gate 	 */
18987c478bd9Sstevel@tonic-gate 	if ((dp->d_typ == DT_CANCEL) || (dp->d_typ == DT_ABZERO))
18997c478bd9Sstevel@tonic-gate 		*nbp = 0;
19007c478bd9Sstevel@tonic-gate 	else
19017c478bd9Sstevel@tonic-gate 		*nbp = dp->d_nb;
19027c478bd9Sstevel@tonic-gate 	return (0);
19037c478bd9Sstevel@tonic-gate }
19047c478bd9Sstevel@tonic-gate 
19057c478bd9Sstevel@tonic-gate void
logmap_logscan(ml_unit_t * ul)19067c478bd9Sstevel@tonic-gate logmap_logscan(ml_unit_t *ul)
19077c478bd9Sstevel@tonic-gate {
19087c478bd9Sstevel@tonic-gate 	size_t		nb, nbd;
19097c478bd9Sstevel@tonic-gate 	off_t		lof;
19107c478bd9Sstevel@tonic-gate 	struct delta	delta;
19117c478bd9Sstevel@tonic-gate 	mt_map_t	*logmap	= ul->un_logmap;
19127c478bd9Sstevel@tonic-gate 
19137c478bd9Sstevel@tonic-gate 	ASSERT(ul->un_deltamap->mtm_next == (mapentry_t *)ul->un_deltamap);
19147c478bd9Sstevel@tonic-gate 
19157c478bd9Sstevel@tonic-gate 	/*
19167c478bd9Sstevel@tonic-gate 	 * prepare the log for a logscan
19177c478bd9Sstevel@tonic-gate 	 */
19187c478bd9Sstevel@tonic-gate 	ldl_logscan_begin(ul);
19197c478bd9Sstevel@tonic-gate 
19207c478bd9Sstevel@tonic-gate 	/*
19217c478bd9Sstevel@tonic-gate 	 * prepare the logmap for a logscan
19227c478bd9Sstevel@tonic-gate 	 */
19237c478bd9Sstevel@tonic-gate 	(void) map_free_entries(logmap);
19247c478bd9Sstevel@tonic-gate 	logmap->mtm_tid = 0;
19257c478bd9Sstevel@tonic-gate 	logmap->mtm_committid = UINT32_C(0);
19267c478bd9Sstevel@tonic-gate 	logmap->mtm_age = 0;
19277c478bd9Sstevel@tonic-gate 	logmap->mtm_dirty = 0;
19287c478bd9Sstevel@tonic-gate 	logmap->mtm_ref = 0;
19297c478bd9Sstevel@tonic-gate 
19307c478bd9Sstevel@tonic-gate 	/*
19317c478bd9Sstevel@tonic-gate 	 * while not at end of log
19327c478bd9Sstevel@tonic-gate 	 *	read delta header
19337c478bd9Sstevel@tonic-gate 	 *	add to logmap
19347c478bd9Sstevel@tonic-gate 	 *	seek to beginning of next delta
19357c478bd9Sstevel@tonic-gate 	 */
19367c478bd9Sstevel@tonic-gate 	lof = ul->un_head_lof;
19377c478bd9Sstevel@tonic-gate 	nbd = sizeof (delta);
19387c478bd9Sstevel@tonic-gate 	while (lof != ul->un_tail_lof) {
19397c478bd9Sstevel@tonic-gate 
19407c478bd9Sstevel@tonic-gate 		/* read delta header */
19417c478bd9Sstevel@tonic-gate 		if (ldl_logscan_read(ul, &lof, nbd, (caddr_t)&delta))
19427c478bd9Sstevel@tonic-gate 			break;
19437c478bd9Sstevel@tonic-gate 
19447c478bd9Sstevel@tonic-gate 		/* add to logmap */
19457c478bd9Sstevel@tonic-gate 		if (logmap_logscan_add(ul, &delta, lof, &nb))
19467c478bd9Sstevel@tonic-gate 			break;
19477c478bd9Sstevel@tonic-gate 
19487c478bd9Sstevel@tonic-gate 		/* seek to next header (skip data) */
19497c478bd9Sstevel@tonic-gate 		if (ldl_logscan_read(ul, &lof, nb, NULL))
19507c478bd9Sstevel@tonic-gate 			break;
19517c478bd9Sstevel@tonic-gate 	}
19527c478bd9Sstevel@tonic-gate 
19537c478bd9Sstevel@tonic-gate 	/*
19547c478bd9Sstevel@tonic-gate 	 * remove the last partial transaction from the logmap
19557c478bd9Sstevel@tonic-gate 	 */
19567c478bd9Sstevel@tonic-gate 	logmap_abort(ul, logmap->mtm_tid);
19577c478bd9Sstevel@tonic-gate 
19587c478bd9Sstevel@tonic-gate 	ldl_logscan_end(ul);
19597c478bd9Sstevel@tonic-gate }
19607c478bd9Sstevel@tonic-gate 
19617c478bd9Sstevel@tonic-gate void
_init_map(void)19627c478bd9Sstevel@tonic-gate _init_map(void)
19637c478bd9Sstevel@tonic-gate {
19647c478bd9Sstevel@tonic-gate 	/*
19657c478bd9Sstevel@tonic-gate 	 * Initialise the mapentry cache. No constructor or deconstructor
19667c478bd9Sstevel@tonic-gate 	 * is needed. Also no reclaim function is supplied as reclaiming
19677c478bd9Sstevel@tonic-gate 	 * current entries is not possible.
19687c478bd9Sstevel@tonic-gate 	 */
19697c478bd9Sstevel@tonic-gate 	mapentry_cache = kmem_cache_create("lufs_mapentry_cache",
19707c478bd9Sstevel@tonic-gate 	    sizeof (mapentry_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
19717c478bd9Sstevel@tonic-gate }
19727c478bd9Sstevel@tonic-gate 
19737c478bd9Sstevel@tonic-gate /*
19747c478bd9Sstevel@tonic-gate  * Special case when we replace an old map entry which carries quota
19757c478bd9Sstevel@tonic-gate  * information with a newer entry which does not.
19767c478bd9Sstevel@tonic-gate  * In that case the push function would not be called to clean up the
19777c478bd9Sstevel@tonic-gate  * dquot structure. This would be found later by invalidatedq() causing
19787c478bd9Sstevel@tonic-gate  * a panic when the filesystem in unmounted.
19797c478bd9Sstevel@tonic-gate  * We clean up the dquot manually before replacing the map entry.
19807c478bd9Sstevel@tonic-gate  */
19817c478bd9Sstevel@tonic-gate void
handle_dquot(mapentry_t * me)19827c478bd9Sstevel@tonic-gate handle_dquot(mapentry_t *me)
19837c478bd9Sstevel@tonic-gate {
19847c478bd9Sstevel@tonic-gate 	int dolock = 0;
19857c478bd9Sstevel@tonic-gate 	int domutex = 0;
19867c478bd9Sstevel@tonic-gate 	struct dquot *dqp;
19877c478bd9Sstevel@tonic-gate 
19887c478bd9Sstevel@tonic-gate 	dqp = (struct dquot *)me->me_arg;
19897c478bd9Sstevel@tonic-gate 
19907c478bd9Sstevel@tonic-gate 	/*
19917c478bd9Sstevel@tonic-gate 	 * We need vfs_dqrwlock to call dqput()
19927c478bd9Sstevel@tonic-gate 	 */
19937c478bd9Sstevel@tonic-gate 	dolock = (!RW_LOCK_HELD(&dqp->dq_ufsvfsp->vfs_dqrwlock));
19947c478bd9Sstevel@tonic-gate 	if (dolock)
19957c478bd9Sstevel@tonic-gate 		rw_enter(&dqp->dq_ufsvfsp->vfs_dqrwlock, RW_READER);
19967c478bd9Sstevel@tonic-gate 
19977c478bd9Sstevel@tonic-gate 	domutex = (!MUTEX_HELD(&dqp->dq_lock));
19987c478bd9Sstevel@tonic-gate 	if (domutex)
19997c478bd9Sstevel@tonic-gate 		mutex_enter(&dqp->dq_lock);
20007c478bd9Sstevel@tonic-gate 
20017c478bd9Sstevel@tonic-gate 	/*
20027c478bd9Sstevel@tonic-gate 	 * Only clean up if the dquot is referenced
20037c478bd9Sstevel@tonic-gate 	 */
20047c478bd9Sstevel@tonic-gate 	if (dqp->dq_cnt == 0) {
20057c478bd9Sstevel@tonic-gate 		if (domutex)
20067c478bd9Sstevel@tonic-gate 			mutex_exit(&dqp->dq_lock);
20077c478bd9Sstevel@tonic-gate 		if (dolock)
20087c478bd9Sstevel@tonic-gate 			rw_exit(&dqp->dq_ufsvfsp->vfs_dqrwlock);
20097c478bd9Sstevel@tonic-gate 		return;
20107c478bd9Sstevel@tonic-gate 	}
20117c478bd9Sstevel@tonic-gate 
20127c478bd9Sstevel@tonic-gate 	dqp->dq_flags &= ~(DQ_MOD|DQ_TRANS);
20137c478bd9Sstevel@tonic-gate 	dqput(dqp);
20147c478bd9Sstevel@tonic-gate 
20157c478bd9Sstevel@tonic-gate 	if (domutex)
20167c478bd9Sstevel@tonic-gate 		mutex_exit(&dqp->dq_lock);
20177c478bd9Sstevel@tonic-gate 
20187c478bd9Sstevel@tonic-gate 	if (dolock)
20197c478bd9Sstevel@tonic-gate 		rw_exit(&dqp->dq_ufsvfsp->vfs_dqrwlock);
20207c478bd9Sstevel@tonic-gate 
20217c478bd9Sstevel@tonic-gate }
2022