xref: /illumos-gate/usr/src/uts/common/fs/zfs/dsl_synctask.c (revision 9fa718d2f477620f14e3f2948dd03e3470add804)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 2012 by Delphix. All rights reserved.
24  */
25 
26 #include <sys/dmu.h>
27 #include <sys/dmu_tx.h>
28 #include <sys/dsl_pool.h>
29 #include <sys/dsl_dir.h>
30 #include <sys/dsl_synctask.h>
31 #include <sys/metaslab.h>
32 
33 #define	DST_AVG_BLKSHIFT 14
34 
35 /* ARGSUSED */
36 static int
37 dsl_null_checkfunc(void *arg1, void *arg2, dmu_tx_t *tx)
38 {
39 	return (0);
40 }
41 
42 dsl_sync_task_group_t *
43 dsl_sync_task_group_create(dsl_pool_t *dp)
44 {
45 	dsl_sync_task_group_t *dstg;
46 
47 	dstg = kmem_zalloc(sizeof (dsl_sync_task_group_t), KM_SLEEP);
48 	list_create(&dstg->dstg_tasks, sizeof (dsl_sync_task_t),
49 	    offsetof(dsl_sync_task_t, dst_node));
50 	dstg->dstg_pool = dp;
51 
52 	return (dstg);
53 }
54 
55 void
56 dsl_sync_task_create(dsl_sync_task_group_t *dstg,
57     dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
58     void *arg1, void *arg2, int blocks_modified)
59 {
60 	dsl_sync_task_t *dst;
61 
62 	if (checkfunc == NULL)
63 		checkfunc = dsl_null_checkfunc;
64 	dst = kmem_zalloc(sizeof (dsl_sync_task_t), KM_SLEEP);
65 	dst->dst_checkfunc = checkfunc;
66 	dst->dst_syncfunc = syncfunc;
67 	dst->dst_arg1 = arg1;
68 	dst->dst_arg2 = arg2;
69 	list_insert_tail(&dstg->dstg_tasks, dst);
70 
71 	dstg->dstg_space += blocks_modified << DST_AVG_BLKSHIFT;
72 }
73 
74 int
75 dsl_sync_task_group_wait(dsl_sync_task_group_t *dstg)
76 {
77 	dmu_tx_t *tx;
78 	uint64_t txg;
79 	dsl_sync_task_t *dst;
80 
81 top:
82 	tx = dmu_tx_create_dd(dstg->dstg_pool->dp_mos_dir);
83 	VERIFY(0 == dmu_tx_assign(tx, TXG_WAIT));
84 
85 	txg = dmu_tx_get_txg(tx);
86 
87 	/* Do a preliminary error check. */
88 	dstg->dstg_err = 0;
89 #ifdef ZFS_DEBUG
90 	/*
91 	 * Only check half the time, otherwise, the sync-context
92 	 * check will almost never fail.
93 	 */
94 	if (spa_get_random(2) == 0)
95 		goto skip;
96 #endif
97 	rw_enter(&dstg->dstg_pool->dp_config_rwlock, RW_READER);
98 	for (dst = list_head(&dstg->dstg_tasks); dst;
99 	    dst = list_next(&dstg->dstg_tasks, dst)) {
100 		dst->dst_err =
101 		    dst->dst_checkfunc(dst->dst_arg1, dst->dst_arg2, tx);
102 		if (dst->dst_err)
103 			dstg->dstg_err = dst->dst_err;
104 	}
105 	rw_exit(&dstg->dstg_pool->dp_config_rwlock);
106 
107 	if (dstg->dstg_err) {
108 		dmu_tx_commit(tx);
109 		return (dstg->dstg_err);
110 	}
111 skip:
112 
113 	/*
114 	 * We don't generally have many sync tasks, so pay the price of
115 	 * add_tail to get the tasks executed in the right order.
116 	 */
117 	VERIFY(0 == txg_list_add_tail(&dstg->dstg_pool->dp_sync_tasks,
118 	    dstg, txg));
119 
120 	dmu_tx_commit(tx);
121 
122 	txg_wait_synced(dstg->dstg_pool, txg);
123 
124 	if (dstg->dstg_err == EAGAIN) {
125 		txg_wait_synced(dstg->dstg_pool, txg + TXG_DEFER_SIZE);
126 		goto top;
127 	}
128 
129 	return (dstg->dstg_err);
130 }
131 
132 void
133 dsl_sync_task_group_nowait(dsl_sync_task_group_t *dstg, dmu_tx_t *tx)
134 {
135 	uint64_t txg;
136 
137 	dstg->dstg_nowaiter = B_TRUE;
138 	txg = dmu_tx_get_txg(tx);
139 	/*
140 	 * We don't generally have many sync tasks, so pay the price of
141 	 * add_tail to get the tasks executed in the right order.
142 	 */
143 	VERIFY(0 == txg_list_add_tail(&dstg->dstg_pool->dp_sync_tasks,
144 	    dstg, txg));
145 }
146 
147 void
148 dsl_sync_task_group_destroy(dsl_sync_task_group_t *dstg)
149 {
150 	dsl_sync_task_t *dst;
151 
152 	while (dst = list_head(&dstg->dstg_tasks)) {
153 		list_remove(&dstg->dstg_tasks, dst);
154 		kmem_free(dst, sizeof (dsl_sync_task_t));
155 	}
156 	kmem_free(dstg, sizeof (dsl_sync_task_group_t));
157 }
158 
159 void
160 dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx)
161 {
162 	dsl_sync_task_t *dst;
163 	dsl_pool_t *dp = dstg->dstg_pool;
164 	uint64_t quota, used;
165 
166 	ASSERT0(dstg->dstg_err);
167 
168 	/*
169 	 * Check for sufficient space.  We just check against what's
170 	 * on-disk; we don't want any in-flight accounting to get in our
171 	 * way, because open context may have already used up various
172 	 * in-core limits (arc_tempreserve, dsl_pool_tempreserve).
173 	 */
174 	quota = dsl_pool_adjustedsize(dp, B_FALSE) -
175 	    metaslab_class_get_deferred(spa_normal_class(dp->dp_spa));
176 	used = dp->dp_root_dir->dd_phys->dd_used_bytes;
177 	/* MOS space is triple-dittoed, so we multiply by 3. */
178 	if (dstg->dstg_space > 0 && used + dstg->dstg_space * 3 > quota) {
179 		dstg->dstg_err = ENOSPC;
180 		return;
181 	}
182 
183 	/*
184 	 * Check for errors by calling checkfuncs.
185 	 */
186 	rw_enter(&dp->dp_config_rwlock, RW_WRITER);
187 	for (dst = list_head(&dstg->dstg_tasks); dst;
188 	    dst = list_next(&dstg->dstg_tasks, dst)) {
189 		dst->dst_err =
190 		    dst->dst_checkfunc(dst->dst_arg1, dst->dst_arg2, tx);
191 		if (dst->dst_err)
192 			dstg->dstg_err = dst->dst_err;
193 	}
194 
195 	if (dstg->dstg_err == 0) {
196 		/*
197 		 * Execute sync tasks.
198 		 */
199 		for (dst = list_head(&dstg->dstg_tasks); dst;
200 		    dst = list_next(&dstg->dstg_tasks, dst)) {
201 			dst->dst_syncfunc(dst->dst_arg1, dst->dst_arg2, tx);
202 		}
203 	}
204 	rw_exit(&dp->dp_config_rwlock);
205 
206 	if (dstg->dstg_nowaiter)
207 		dsl_sync_task_group_destroy(dstg);
208 }
209 
210 int
211 dsl_sync_task_do(dsl_pool_t *dp,
212     dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
213     void *arg1, void *arg2, int blocks_modified)
214 {
215 	dsl_sync_task_group_t *dstg;
216 	int err;
217 
218 	ASSERT(spa_writeable(dp->dp_spa));
219 
220 	dstg = dsl_sync_task_group_create(dp);
221 	dsl_sync_task_create(dstg, checkfunc, syncfunc,
222 	    arg1, arg2, blocks_modified);
223 	err = dsl_sync_task_group_wait(dstg);
224 	dsl_sync_task_group_destroy(dstg);
225 	return (err);
226 }
227 
228 void
229 dsl_sync_task_do_nowait(dsl_pool_t *dp,
230     dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
231     void *arg1, void *arg2, int blocks_modified, dmu_tx_t *tx)
232 {
233 	dsl_sync_task_group_t *dstg;
234 
235 	if (!spa_writeable(dp->dp_spa))
236 		return;
237 
238 	dstg = dsl_sync_task_group_create(dp);
239 	dsl_sync_task_create(dstg, checkfunc, syncfunc,
240 	    arg1, arg2, blocks_modified);
241 	dsl_sync_task_group_nowait(dstg, tx);
242 }
243