1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26/*
27 * Copyright (c) 2015 by Delphix. All rights reserved.
28 */
29
30#include <sys/systm.h>
31#include <sys/types.h>
32#include <sys/vnode.h>
33#include <sys/errno.h>
34#include <sys/sysmacros.h>
35#include <sys/debug.h>
36#include <sys/kmem.h>
37#include <sys/conf.h>
38#include <sys/proc.h>
39#include <sys/taskq.h>
40#include <sys/cmn_err.h>
41#include <sys/fs/ufs_inode.h>
42#include <sys/fs/ufs_filio.h>
43#include <sys/fs/ufs_log.h>
44#include <sys/fs/ufs_bio.h>
45
46/*
47 * FILE SYSTEM INTERFACE TO TRANSACTION OPERATIONS (TOP; like VOP)
48 */
49
50uint_t topkey; /* tsd transaction key */
51
52/*
53 * declare a delta
54 */
55void
56top_delta(
57	ufsvfs_t *ufsvfsp,
58	offset_t mof,
59	off_t nb,
60	delta_t dtyp,
61	int (*func)(),
62	ulong_t arg)
63{
64	ml_unit_t		*ul	= ufsvfsp->vfs_log;
65	threadtrans_t		*tp	= tsd_get(topkey);
66
67	ASSERT(ufsvfsp->vfs_dev == ul->un_dev);
68	ASSERT(nb);
69	ASSERT(((ul->un_debug & (MT_TRANSACT|MT_MATAMAP)) == 0) ||
70	    top_delta_debug(ul, mof, nb, dtyp));
71
72	deltamap_add(ul->un_deltamap, mof, nb, dtyp, func, arg, tp);
73
74	ul->un_logmap->mtm_ref = 1; /* for roll thread's heuristic */
75	if (tp) {
76		tp->any_deltas = 1;
77	}
78}
79
80/*
81 * cancel a delta
82 */
83void
84top_cancel(ufsvfs_t *ufsvfsp, offset_t mof, off_t nb, int flags)
85{
86	ml_unit_t	*ul	= ufsvfsp->vfs_log;
87	int		metadata = flags & (I_DIR|I_IBLK|I_SHAD|I_QUOTA);
88
89	ASSERT(ufsvfsp->vfs_dev == ul->un_dev);
90	ASSERT(nb);
91	ASSERT(((ul->un_debug & (MT_TRANSACT|MT_MATAMAP)) == 0) ||
92	    (!(flags & metadata) ||
93	    top_delta_debug(ul, mof, nb, DT_CANCEL)));
94
95	if (metadata)
96		deltamap_del(ul->un_deltamap, mof, nb);
97
98	logmap_cancel(ul, mof, nb, metadata);
99
100	/*
101	 * needed for the roll thread's heuristic
102	 */
103	ul->un_logmap->mtm_ref = 1;
104}
105
106/*
107 * check if this delta has been canceled (metadata -> userdata)
108 */
109int
110top_iscancel(ufsvfs_t *ufsvfsp, offset_t mof, off_t nb)
111{
112	ml_unit_t	*ul	= ufsvfsp->vfs_log;
113
114	ASSERT(ufsvfsp->vfs_dev == ul->un_dev);
115	ASSERT(nb);
116	if (logmap_iscancel(ul->un_logmap, mof, nb))
117		return (1);
118	if (ul->un_flags & LDL_ERROR)
119		return (1);
120	return (0);
121}
122
123/*
124 * put device into error state
125 */
126void
127top_seterror(ufsvfs_t *ufsvfsp)
128{
129	ml_unit_t	*ul	= ufsvfsp->vfs_log;
130
131	ASSERT(ufsvfsp->vfs_dev == ul->un_dev);
132	ldl_seterror(ul, "ufs is forcing a ufs log error");
133}
134
135/*
136 * issue a empty sync op to help empty the delta/log map or the log
137 */
138static void
139top_issue_sync(ufsvfs_t *ufsvfsp)
140{
141	int error = 0;
142
143	if ((curthread->t_flag & T_DONTBLOCK) == 0)
144		curthread->t_flag |= T_DONTBLOCK;
145	top_begin_sync(ufsvfsp, TOP_COMMIT_ASYNC, 0, &error);
146	if (!error) {
147		top_end_sync(ufsvfsp, &error, TOP_COMMIT_ASYNC, 0);
148	}
149}
150
151static void
152top_issue_from_taskq(void *arg)
153{
154	ufsvfs_t *ufsvfsp = arg;
155	ml_unit_t *ul = ufsvfsp->vfs_log;
156	mt_map_t *mtm = ul->un_logmap;
157
158	top_issue_sync(ufsvfsp);
159
160	/*
161	 * We were called from the taskq_dispatch() in top_begin_async(), so
162	 * decrement mtm_taskq_sync_count and wake up the thread waiting
163	 * on the mtm_cv if the mtm_taskq_sync_count hits zero.
164	 */
165	ASSERT(taskq_member(system_taskq, curthread));
166
167	mutex_enter(&mtm->mtm_lock);
168	mtm->mtm_taskq_sync_count--;
169	if (mtm->mtm_taskq_sync_count == 0) {
170		cv_signal(&mtm->mtm_cv);
171	}
172	mutex_exit(&mtm->mtm_lock);
173}
174
175/*
176 * MOBY TRANSACTION ROUTINES
177 * begin a moby transaction
178 *	sync ops enter until first sync op finishes
179 *	async ops enter until last sync op finishes
180 * end a moby transaction
181 *		outstanding deltas are pushed thru log
182 *		log buffer is committed (incore only)
183 *		next trans is open to async ops
184 *		log buffer is committed on the log
185 *		next trans is open to sync ops
186 */
187
188/*ARGSUSED*/
189void
190top_begin_sync(ufsvfs_t *ufsvfsp, top_t topid, ulong_t size, int *error)
191{
192	ml_unit_t	*ul	= ufsvfsp->vfs_log;
193	mt_map_t	*mtm = ul->un_logmap;
194	threadtrans_t	*tp;
195	ushort_t	seq;
196
197	ASSERT(ufsvfsp->vfs_dev == ul->un_dev);
198	ASSERT(error != NULL);
199	ASSERT(*error == 0);
200
201	mutex_enter(&mtm->mtm_lock);
202	if (topid == TOP_FSYNC) {
203		/*
204		 * Error the fsync immediately if this is an nfs thread
205		 * and its last transaction has already been committed.
206		 * The only transactions outstanding are those
207		 * where no commit has even started
208		 * (last_async_tid == mtm->mtm_tid)
209		 * or those where a commit is in progress
210		 * (last_async_tid == mtm->mtm_committid)
211		 */
212		if (curthread->t_flag & T_DONTPEND) {
213			tp = tsd_get(topkey);
214			if (tp && (tp->last_async_tid != mtm->mtm_tid) &&
215			    (tp->last_async_tid != mtm->mtm_committid)) {
216				mutex_exit(&mtm->mtm_lock);
217				*error = 1;
218				return;
219			}
220		}
221
222		/*
223		 * If there's already other synchronous transactions
224		 * and we haven't allowed async ones to start yet
225		 * then just wait for the commit to complete.
226		 */
227		if (((mtm->mtm_closed & (TOP_SYNC | TOP_ASYNC)) ==
228		    (TOP_SYNC | TOP_ASYNC)) || mtm->mtm_activesync) {
229			seq = mtm->mtm_seq;
230			do {
231				cv_wait(&mtm->mtm_cv_commit, &mtm->mtm_lock);
232			} while (seq == mtm->mtm_seq);
233			mutex_exit(&mtm->mtm_lock);
234			*error = 1;
235			return;
236		}
237		if (mtm->mtm_closed & TOP_SYNC) {
238			/*
239			 * We know we're in the window where a thread is
240			 * committing a transaction in top_end_sync() and
241			 * has allowed async threads to start but hasn't
242			 * got the completion on the commit write to
243			 * allow sync threads to start.
244			 * So wait for that commit completion then retest
245			 * for the quick nfs check and if that fails
246			 * go on to start a transaction
247			 */
248			seq = mtm->mtm_seq;
249			do {
250				cv_wait(&mtm->mtm_cv_commit, &mtm->mtm_lock);
251			} while (seq == mtm->mtm_seq);
252
253			/* tp is set above if T_DONTPEND */
254			if ((curthread->t_flag & T_DONTPEND) && tp &&
255			    (tp->last_async_tid != mtm->mtm_tid) &&
256			    (tp->last_async_tid != mtm->mtm_committid)) {
257				mutex_exit(&mtm->mtm_lock);
258				*error = 1;
259				return;
260			}
261		}
262	}
263retry:
264	mtm->mtm_ref = 1;
265	/*
266	 * current transaction closed to sync ops; try for next transaction
267	 */
268	if ((mtm->mtm_closed & TOP_SYNC) && !panicstr) {
269		ulong_t		resv;
270
271		/*
272		 * We know a commit is in progress, if we are trying to
273		 * commit and we haven't allowed async ones to start yet,
274		 * then just wait for the commit completion
275		 */
276		if ((size == TOP_COMMIT_SIZE) &&
277		    (((mtm->mtm_closed & (TOP_SYNC | TOP_ASYNC)) ==
278		    (TOP_SYNC | TOP_ASYNC)) || (mtm->mtm_activesync))) {
279			seq = mtm->mtm_seq;
280			do {
281				cv_wait(&mtm->mtm_cv_commit, &mtm->mtm_lock);
282			} while (seq == mtm->mtm_seq);
283			mutex_exit(&mtm->mtm_lock);
284			*error = 1;
285			return;
286		}
287
288		/*
289		 * next transaction is full; try for next transaction
290		 */
291		resv = size + ul->un_resv_wantin + ul->un_resv;
292		if (resv > ul->un_maxresv) {
293			cv_wait(&mtm->mtm_cv_commit, &mtm->mtm_lock);
294			goto retry;
295		}
296		/*
297		 * we are in the next transaction; wait for it to start
298		 */
299		mtm->mtm_wantin++;
300		ul->un_resv_wantin += size;
301		/*
302		 * The corresponding cv_broadcast wakes up
303		 * all threads that have been validated to go into
304		 * the next transaction. However, because spurious
305		 * cv_wait wakeups are possible we use a sequence
306		 * number to check that the commit and cv_broadcast
307		 * has really occurred. We couldn't use mtm_tid
308		 * because on error that doesn't get incremented.
309		 */
310		seq = mtm->mtm_seq;
311		do {
312			cv_wait(&mtm->mtm_cv_commit, &mtm->mtm_lock);
313		} while (seq == mtm->mtm_seq);
314	} else {
315		/*
316		 * if the current transaction is full; try the next one
317		 */
318		if (size && (ul->un_resv && ((size + ul->un_resv) >
319		    ul->un_maxresv)) && !panicstr) {
320			/*
321			 * log is over reserved and no one will unresv the space
322			 *	so generate empty sync op to unresv the space
323			 */
324			if (mtm->mtm_activesync == 0) {
325				mutex_exit(&mtm->mtm_lock);
326				top_issue_sync(ufsvfsp);
327				mutex_enter(&mtm->mtm_lock);
328				goto retry;
329			}
330			cv_wait(&mtm->mtm_cv_commit, &mtm->mtm_lock);
331			goto retry;
332		}
333		/*
334		 * we are in the current transaction
335		 */
336		mtm->mtm_active++;
337		mtm->mtm_activesync++;
338		ul->un_resv += size;
339	}
340
341	ASSERT(mtm->mtm_active > 0);
342	ASSERT(mtm->mtm_activesync > 0);
343	mutex_exit(&mtm->mtm_lock);
344
345	ASSERT(((ul->un_debug & MT_TRANSACT) == 0) ||
346	    top_begin_debug(ul, topid, size));
347}
348
349int tryfail_cnt;
350
351int
352top_begin_async(ufsvfs_t *ufsvfsp, top_t topid, ulong_t size, int tryasync)
353{
354	ml_unit_t	*ul	= ufsvfsp->vfs_log;
355	mt_map_t	*mtm	= ul->un_logmap;
356	threadtrans_t   *tp;
357
358	ASSERT(ufsvfsp->vfs_dev == ul->un_dev);
359
360	tp = tsd_get(topkey);
361	if (tp == NULL) {
362		tp = kmem_zalloc(sizeof (threadtrans_t), KM_SLEEP);
363		(void) tsd_set(topkey, tp);
364	}
365	tp->deltas_size = 0;
366	tp->any_deltas = 0;
367
368	mutex_enter(&mtm->mtm_lock);
369retry:
370	mtm->mtm_ref = 1;
371	/*
372	 * current transaction closed to async ops; try for next transaction
373	 */
374	if ((mtm->mtm_closed & TOP_ASYNC) && !panicstr) {
375		if (tryasync) {
376			mutex_exit(&mtm->mtm_lock);
377			tryfail_cnt++;
378			return (EWOULDBLOCK);
379		}
380		cv_wait(&mtm->mtm_cv_next, &mtm->mtm_lock);
381		goto retry;
382	}
383
384	/*
385	 * if the current transaction is full; try the next one
386	 */
387	if (((size + ul->un_resv + ul->un_resv_wantin) > ul->un_maxresv) &&
388	    !panicstr) {
389		/*
390		 * log is overreserved and no one will unresv the space
391		 *	so generate empty sync op to unresv the space
392		 * We need TOP_SYNC_FORCED because we want to know when
393		 * a top_end_sync is completed.
394		 * mtm_taskq_sync_count is needed because we want to keep track
395		 * of the pending top_issue_sync dispatches so that during
396		 * forced umount we can wait for these to complete.
397		 * mtm_taskq_sync_count is decremented in top_issue_sync and
398		 * can remain set even after top_end_sync completes.
399		 * We have a window between the clearing of TOP_SYNC_FORCED
400		 * flag and the decrementing of mtm_taskq_sync_count.
401		 * If in this window new async transactions start consuming
402		 * log space, the log can get overreserved.
403		 * Subsequently a new async transaction would fail to generate
404		 * an empty sync transaction via the taskq, since it finds
405		 * the mtm_taskq_sync_count set. This can cause a hang.
406		 * Hence we do not test for mtm_taskq_sync_count being zero.
407		 * Instead, the TOP_SYNC_FORCED flag is tested here.
408		 */
409		if ((mtm->mtm_activesync == 0) &&
410		    (!(mtm->mtm_closed & TOP_SYNC_FORCED))) {
411			/*
412			 * Set flag to stop multiple forced empty
413			 * sync transactions. Increment mtm_taskq_sync_count.
414			 */
415			mtm->mtm_closed |= TOP_SYNC_FORCED;
416			mtm->mtm_taskq_sync_count++;
417			mutex_exit(&mtm->mtm_lock);
418			(void) taskq_dispatch(system_taskq,
419			    top_issue_from_taskq, ufsvfsp, TQ_SLEEP);
420			if (tryasync) {
421				tryfail_cnt++;
422				return (EWOULDBLOCK);
423			}
424			mutex_enter(&mtm->mtm_lock);
425			goto retry;
426		}
427		if (tryasync) {
428			mutex_exit(&mtm->mtm_lock);
429			tryfail_cnt++;
430			return (EWOULDBLOCK);
431		}
432		cv_wait(&mtm->mtm_cv_next, &mtm->mtm_lock);
433		goto retry;
434	}
435	/*
436	 * we are in the current transaction
437	 */
438	mtm->mtm_active++;
439	ul->un_resv += size;
440
441	ASSERT(mtm->mtm_active > 0);
442	mutex_exit(&mtm->mtm_lock);
443
444	ASSERT(((ul->un_debug & MT_TRANSACT) == 0) ||
445	    top_begin_debug(ul, topid, size));
446	return (0);
447}
448
449/*ARGSUSED*/
450void
451top_end_sync(ufsvfs_t *ufsvfsp, int *ep, top_t topid, ulong_t size)
452{
453	ml_unit_t	*ul	= ufsvfsp->vfs_log;
454	mt_map_t	*mtm	= ul->un_logmap;
455	mapentry_t	*cancellist;
456	uint32_t	tid;
457
458	ASSERT(ufsvfsp->vfs_dev == ul->un_dev);
459	ASSERT(((ul->un_debug & MT_TRANSACT) == 0) ||
460	    top_end_debug(ul, mtm, topid, size));
461
462	mutex_enter(&mtm->mtm_lock);
463	tid = mtm->mtm_tid;
464
465	mtm->mtm_activesync--;
466	mtm->mtm_active--;
467
468	mtm->mtm_ref = 1;
469
470	/*
471	 * wait for last syncop to complete
472	 */
473	if (mtm->mtm_activesync || panicstr) {
474		ushort_t seq = mtm->mtm_seq;
475
476		mtm->mtm_closed = TOP_SYNC;
477
478		do {
479			cv_wait(&mtm->mtm_cv_commit, &mtm->mtm_lock);
480		} while (seq == mtm->mtm_seq);
481		mutex_exit(&mtm->mtm_lock);
482		goto out;
483	}
484	/*
485	 * last syncop; close current transaction to all ops
486	 */
487	mtm->mtm_closed = TOP_SYNC|TOP_ASYNC;
488
489	/*
490	 * wait for last asyncop to finish
491	 */
492	while (mtm->mtm_active) {
493		cv_wait(&mtm->mtm_cv_eot, &mtm->mtm_lock);
494	}
495
496	/*
497	 * push dirty metadata thru the log
498	 */
499	deltamap_push(ul);
500
501	ASSERT(((ul->un_debug & MT_FORCEROLL) == 0) ||
502	    top_roll_debug(ul));
503
504	mtm->mtm_tid = tid + 1;	/* can overflow to 0 */
505
506	/*
507	 * Empty the cancellist, but save it for logmap_free_cancel
508	 */
509	mutex_enter(&mtm->mtm_mutex);
510	cancellist = mtm->mtm_cancel;
511	mtm->mtm_cancel = NULL;
512	mutex_exit(&mtm->mtm_mutex);
513
514	/*
515	 * allow async ops
516	 */
517	ASSERT(mtm->mtm_active == 0);
518	ul->un_resv = 0; /* unreserve the log space */
519	mtm->mtm_closed = TOP_SYNC;
520	/*
521	 * Hold the un_log_mutex here until we are done writing
522	 * the commit record to prevent any more deltas to be written
523	 * to the log after we allow async operations.
524	 */
525	mutex_enter(&ul->un_log_mutex);
526	mutex_exit(&mtm->mtm_lock);
527	cv_broadcast(&mtm->mtm_cv_next);
528
529	/*
530	 * asynchronously write the commit record,
531	 */
532	logmap_commit(ul, tid);
533
534	/*
535	 * wait for outstanding log writes (e.g., commits) to finish
536	 */
537	ldl_waito(ul);
538
539	/*
540	 * Now that we are sure the commit has been written to the log
541	 * we can free any canceled deltas.  If we free them before
542	 * guaranteeing that the commit was written, we could panic before
543	 * the commit, but after an async thread has allocated and written
544	 * to canceled freed block.
545	 */
546
547	logmap_free_cancel(mtm, &cancellist);
548	mutex_exit(&ul->un_log_mutex);
549
550	/*
551	 * now, allow all ops
552	 */
553	mutex_enter(&mtm->mtm_lock);
554	mtm->mtm_active += mtm->mtm_wantin;
555	ul->un_resv += ul->un_resv_wantin;
556	mtm->mtm_activesync = mtm->mtm_wantin;
557	mtm->mtm_wantin = 0;
558	mtm->mtm_closed = 0;
559	ul->un_resv_wantin = 0;
560	mtm->mtm_committid = mtm->mtm_tid;
561	mtm->mtm_seq++;
562	mutex_exit(&mtm->mtm_lock);
563
564	/*
565	 * Finish any other synchronous transactions and
566	 * start any waiting new synchronous transactions
567	 */
568	cv_broadcast(&mtm->mtm_cv_commit);
569
570	/*
571	 * if the logmap is getting full; roll something
572	 */
573	if (logmap_need_roll_sync(mtm)) {
574		logmap_forceroll_nowait(mtm);
575	}
576
577out:
578	if (ul->un_flags & LDL_ERROR)
579		*ep = EIO;
580}
581
582/*ARGSUSED*/
583void
584top_end_async(ufsvfs_t *ufsvfsp, top_t topid, ulong_t size)
585{
586	ml_unit_t	*ul	= ufsvfsp->vfs_log;
587	mt_map_t	*mtm	= ul->un_logmap;
588	threadtrans_t	*tp	= tsd_get(topkey);
589	int		wakeup_needed = 0;
590
591	ASSERT(tp);
592	ASSERT(ufsvfsp->vfs_dev == ul->un_dev);
593	ASSERT(((ul->un_debug & MT_TRANSACT) == 0) ||
594	    top_end_debug(ul, mtm, topid, size));
595
596	mutex_enter(&mtm->mtm_lock);
597
598	if (size > tp->deltas_size) {
599		ul->un_resv -= (size - tp->deltas_size);
600	}
601	if (tp->any_deltas) {
602		tp->last_async_tid = mtm->mtm_tid;
603	}
604	mtm->mtm_ref = 1;
605
606	mtm->mtm_active--;
607	if ((mtm->mtm_active == 0) &&
608	    (mtm->mtm_closed == (TOP_SYNC|TOP_ASYNC))) {
609		wakeup_needed = 1;
610	}
611	mutex_exit(&mtm->mtm_lock);
612	if (wakeup_needed)
613		cv_signal(&mtm->mtm_cv_eot);
614
615	/*
616	 * Generate a sync op if the log, logmap, or deltamap are heavily used.
617	 * Unless we are possibly holding any VM locks, since if we are holding
618	 * any VM locks and we issue a top_end_sync(), we could deadlock.
619	 */
620	if ((mtm->mtm_activesync == 0) &&
621	    !(mtm->mtm_closed & TOP_SYNC) &&
622	    (deltamap_need_commit(ul->un_deltamap) ||
623	    logmap_need_commit(mtm) ||
624	    ldl_need_commit(ul)) &&
625	    (topid != TOP_GETPAGE)) {
626		top_issue_sync(ufsvfsp);
627	}
628	/*
629	 * roll something from the log if the logmap is too full
630	 */
631	if (logmap_need_roll_async(mtm))
632		logmap_forceroll_nowait(mtm);
633}
634
635/*
636 * Called from roll thread;
637 *	buffer set for reading master
638 * Returns
639 *	0 - success, can continue with next buffer
640 *	1 - failure due to logmap deltas being in use
641 */
642int
643top_read_roll(rollbuf_t *rbp, ml_unit_t *ul)
644{
645	buf_t		*bp	= &rbp->rb_bh;
646	offset_t	mof	= ldbtob(bp->b_blkno);
647
648	/*
649	 * get a list of deltas
650	 */
651	if (logmap_list_get_roll(ul->un_logmap, mof, rbp)) {
652		/* logmap deltas are in use */
653		return (1);
654	}
655
656	/*
657	 * no deltas were found, nothing to roll
658	 */
659	if (rbp->rb_age == NULL) {
660		bp->b_flags |= B_INVAL;
661		return (0);
662	}
663
664	/*
665	 * If there is one cached roll buffer that cover all the deltas then
666	 * we can use that instead of copying to a separate roll buffer.
667	 */
668	if (rbp->rb_crb) {
669		rbp->rb_bh.b_blkno = lbtodb(rbp->rb_crb->c_mof);
670		return (0);
671	}
672
673	/*
674	 * Set up the read.
675	 * If no read is needed logmap_setup_read() returns 0.
676	 */
677	if (logmap_setup_read(rbp->rb_age, rbp)) {
678		/*
679		 * async read the data from master
680		 */
681		logstats.ls_rreads.value.ui64++;
682		bp->b_bcount = MAPBLOCKSIZE;
683		(void) bdev_strategy(bp);
684		lwp_stat_update(LWP_STAT_INBLK, 1);
685	} else {
686		sema_v(&bp->b_io); /* mark read as complete */
687	}
688	return (0);
689}
690
691int ufs_crb_enable = 1;
692
693/*
694 * move deltas from deltamap into the log
695 */
696void
697top_log(ufsvfs_t *ufsvfsp, char *va, offset_t vamof, off_t nb,
698    caddr_t buf, uint32_t bufsz)
699{
700	ml_unit_t	*ul = ufsvfsp->vfs_log;
701	mapentry_t	*me;
702	offset_t	hmof;
703	uint32_t	hnb, nb1;
704
705	/*
706	 * needed for the roll thread's heuristic
707	 */
708	ul->un_logmap->mtm_ref = 1;
709
710	if (buf && ufs_crb_enable) {
711		ASSERT((bufsz & DEV_BMASK) == 0);
712		/*
713		 * Move any deltas to the logmap. Split requests that
714		 * straddle MAPBLOCKSIZE hash boundaries (i.e. summary info).
715		 */
716		for (hmof = vamof - (va - buf), nb1 = nb; bufsz;
717		    bufsz -= hnb, hmof += hnb, buf += hnb, nb1 -= hnb) {
718			hnb = MAPBLOCKSIZE - (hmof & MAPBLOCKOFF);
719			if (hnb > bufsz)
720				hnb = bufsz;
721			me = deltamap_remove(ul->un_deltamap,
722			    MAX(hmof, vamof), MIN(hnb, nb1));
723			if (me) {
724				logmap_add_buf(ul, va, hmof, me, buf, hnb);
725			}
726		}
727	} else {
728		/*
729		 * if there are deltas
730		 */
731		me = deltamap_remove(ul->un_deltamap, vamof, nb);
732		if (me) {
733			/*
734			 * move to logmap
735			 */
736			logmap_add(ul, va, vamof, me);
737		}
738	}
739
740	ASSERT((ul->un_matamap == NULL) ||
741	    matamap_within(ul->un_matamap, vamof, nb));
742}
743
744
745static void
746top_threadtrans_destroy(void *tp)
747{
748	kmem_free(tp, sizeof (threadtrans_t));
749}
750
751void
752_init_top(void)
753{
754	ASSERT(top_init_debug());
755
756	/*
757	 * set up the delta layer
758	 */
759	_init_map();
760
761	/*
762	 * Initialise the thread specific data transaction key
763	 */
764	tsd_create(&topkey, top_threadtrans_destroy);
765}
766