1fa9e4066Sahrens /*
2fa9e4066Sahrens * CDDL HEADER START
3fa9e4066Sahrens *
4fa9e4066Sahrens * The contents of this file are subject to the terms of the
5104e2ed7Sperrin * Common Development and Distribution License (the "License").
6104e2ed7Sperrin * You may not use this file except in compliance with the License.
7fa9e4066Sahrens *
8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing.
10fa9e4066Sahrens * See the License for the specific language governing permissions
11fa9e4066Sahrens * and limitations under the License.
12fa9e4066Sahrens *
13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each
14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the
16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying
17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner]
18fa9e4066Sahrens *
19fa9e4066Sahrens * CDDL HEADER END
20fa9e4066Sahrens */
21fa9e4066Sahrens /*
225002558fSNeil Perrin * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
239a686fbcSPaul Dagnelie * Copyright (c) 2015 by Delphix. All rights reserved.
24c3d26abcSMatthew Ahrens * Copyright (c) 2014 Integros [integros.com]
25fa9e4066Sahrens */
26fa9e4066Sahrens
27fa9e4066Sahrens #include <sys/types.h>
28fa9e4066Sahrens #include <sys/param.h>
29fa9e4066Sahrens #include <sys/systm.h>
30fa9e4066Sahrens #include <sys/sysmacros.h>
31fa9e4066Sahrens #include <sys/cmn_err.h>
32fa9e4066Sahrens #include <sys/kmem.h>
33fa9e4066Sahrens #include <sys/thread.h>
34fa9e4066Sahrens #include <sys/file.h>
35fa9e4066Sahrens #include <sys/vfs.h>
36fa9e4066Sahrens #include <sys/zfs_znode.h>
37fa9e4066Sahrens #include <sys/zfs_dir.h>
38fa9e4066Sahrens #include <sys/zil.h>
396ce0521aSperrin #include <sys/zil_impl.h>
40fa9e4066Sahrens #include <sys/byteorder.h>
41fa9e4066Sahrens #include <sys/policy.h>
42fa9e4066Sahrens #include <sys/stat.h>
43fa9e4066Sahrens #include <sys/mode.h>
44fa9e4066Sahrens #include <sys/acl.h>
45fa9e4066Sahrens #include <sys/dmu.h>
46fa9e4066Sahrens #include <sys/spa.h>
47da6c28aaSamw #include <sys/zfs_fuid.h>
48fa9e4066Sahrens #include <sys/ddi.h>
491209a471SNeil Perrin #include <sys/dsl_dataset.h>
501209a471SNeil Perrin
51fa9e4066Sahrens /*
521209a471SNeil Perrin * These zfs_log_* functions must be called within a dmu tx, in one
531209a471SNeil Perrin * of 2 contexts depending on zilog->z_replay:
541209a471SNeil Perrin *
551209a471SNeil Perrin * Non replay mode
561209a471SNeil Perrin * ---------------
571209a471SNeil Perrin * We need to record the transaction so that if it is committed to
581209a471SNeil Perrin * the Intent Log then it can be replayed. An intent log transaction
591209a471SNeil Perrin * structure (itx_t) is allocated and all the information necessary to
601209a471SNeil Perrin * possibly replay the transaction is saved in it. The itx is then assigned
611209a471SNeil Perrin * a sequence number and inserted in the in-memory list anchored in the zilog.
621209a471SNeil Perrin *
631209a471SNeil Perrin * Replay mode
641209a471SNeil Perrin * -----------
651209a471SNeil Perrin * We need to mark the intent log record as replayed in the log header.
661209a471SNeil Perrin * This is done in the same transaction as the replay so that they
671209a471SNeil Perrin * commit atomically.
68fa9e4066Sahrens */
69fa9e4066Sahrens
70da6c28aaSamw int
zfs_log_create_txtype(zil_create_t type,vsecattr_t * vsecp,vattr_t * vap)71da6c28aaSamw zfs_log_create_txtype(zil_create_t type, vsecattr_t *vsecp, vattr_t *vap)
72da6c28aaSamw {
73da6c28aaSamw int isxvattr = (vap->va_mask & AT_XVATTR);
74da6c28aaSamw switch (type) {
75da6c28aaSamw case Z_FILE:
76da6c28aaSamw if (vsecp == NULL && !isxvattr)
77da6c28aaSamw return (TX_CREATE);
78da6c28aaSamw if (vsecp && isxvattr)
79da6c28aaSamw return (TX_CREATE_ACL_ATTR);
80da6c28aaSamw if (vsecp)
81da6c28aaSamw return (TX_CREATE_ACL);
82da6c28aaSamw else
83da6c28aaSamw return (TX_CREATE_ATTR);
84da6c28aaSamw /*NOTREACHED*/
85da6c28aaSamw case Z_DIR:
86da6c28aaSamw if (vsecp == NULL && !isxvattr)
87da6c28aaSamw return (TX_MKDIR);
88da6c28aaSamw if (vsecp && isxvattr)
89da6c28aaSamw return (TX_MKDIR_ACL_ATTR);
90da6c28aaSamw if (vsecp)
91da6c28aaSamw return (TX_MKDIR_ACL);
92da6c28aaSamw else
93da6c28aaSamw return (TX_MKDIR_ATTR);
94da6c28aaSamw case Z_XATTRDIR:
95da6c28aaSamw return (TX_MKXATTR);
96da6c28aaSamw }
97da6c28aaSamw ASSERT(0);
98da6c28aaSamw return (TX_MAX_TYPE);
99da6c28aaSamw }
100da6c28aaSamw
101da6c28aaSamw /*
102da6c28aaSamw * build up the log data necessary for logging xvattr_t
103da6c28aaSamw * First lr_attr_t is initialized. following the lr_attr_t
104da6c28aaSamw * is the mapsize and attribute bitmap copied from the xvattr_t.
105da6c28aaSamw * Following the bitmap and bitmapsize two 64 bit words are reserved
106da6c28aaSamw * for the create time which may be set. Following the create time
107da6c28aaSamw * records a single 64 bit integer which has the bits to set on
108da6c28aaSamw * replay for the xvattr.
109da6c28aaSamw */
110da6c28aaSamw static void
zfs_log_xvattr(lr_attr_t * lrattr,xvattr_t * xvap)111da6c28aaSamw zfs_log_xvattr(lr_attr_t *lrattr, xvattr_t *xvap)
112da6c28aaSamw {
113da6c28aaSamw uint32_t *bitmap;
114da6c28aaSamw uint64_t *attrs;
115da6c28aaSamw uint64_t *crtime;
116da6c28aaSamw xoptattr_t *xoap;
117da6c28aaSamw void *scanstamp;
118da6c28aaSamw int i;
119da6c28aaSamw
120da6c28aaSamw xoap = xva_getxoptattr(xvap);
121da6c28aaSamw ASSERT(xoap);
122da6c28aaSamw
123da6c28aaSamw lrattr->lr_attr_masksize = xvap->xva_mapsize;
124da6c28aaSamw bitmap = &lrattr->lr_attr_bitmap;
125da6c28aaSamw for (i = 0; i != xvap->xva_mapsize; i++, bitmap++) {
126da6c28aaSamw *bitmap = xvap->xva_reqattrmap[i];
127da6c28aaSamw }
128da6c28aaSamw
129da6c28aaSamw /* Now pack the attributes up in a single uint64_t */
130da6c28aaSamw attrs = (uint64_t *)bitmap;
131da6c28aaSamw crtime = attrs + 1;
132da6c28aaSamw scanstamp = (caddr_t)(crtime + 2);
133da6c28aaSamw *attrs = 0;
134da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_READONLY))
135da6c28aaSamw *attrs |= (xoap->xoa_readonly == 0) ? 0 :
136da6c28aaSamw XAT0_READONLY;
137da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_HIDDEN))
138da6c28aaSamw *attrs |= (xoap->xoa_hidden == 0) ? 0 :
139da6c28aaSamw XAT0_HIDDEN;
140da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_SYSTEM))
141da6c28aaSamw *attrs |= (xoap->xoa_system == 0) ? 0 :
142da6c28aaSamw XAT0_SYSTEM;
143da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE))
144da6c28aaSamw *attrs |= (xoap->xoa_archive == 0) ? 0 :
145da6c28aaSamw XAT0_ARCHIVE;
146da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE))
147da6c28aaSamw *attrs |= (xoap->xoa_immutable == 0) ? 0 :
148da6c28aaSamw XAT0_IMMUTABLE;
149da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK))
150da6c28aaSamw *attrs |= (xoap->xoa_nounlink == 0) ? 0 :
151da6c28aaSamw XAT0_NOUNLINK;
152da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY))
153da6c28aaSamw *attrs |= (xoap->xoa_appendonly == 0) ? 0 :
154da6c28aaSamw XAT0_APPENDONLY;
155da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_OPAQUE))
156da6c28aaSamw *attrs |= (xoap->xoa_opaque == 0) ? 0 :
157da6c28aaSamw XAT0_APPENDONLY;
158da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_NODUMP))
159da6c28aaSamw *attrs |= (xoap->xoa_nodump == 0) ? 0 :
160da6c28aaSamw XAT0_NODUMP;
161da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED))
162da6c28aaSamw *attrs |= (xoap->xoa_av_quarantined == 0) ? 0 :
163da6c28aaSamw XAT0_AV_QUARANTINED;
164da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED))
165da6c28aaSamw *attrs |= (xoap->xoa_av_modified == 0) ? 0 :
166da6c28aaSamw XAT0_AV_MODIFIED;
167da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_CREATETIME))
168da6c28aaSamw ZFS_TIME_ENCODE(&xoap->xoa_createtime, crtime);
169f67950b2SNasf-Fan if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) {
170f67950b2SNasf-Fan ASSERT(!XVA_ISSET_REQ(xvap, XAT_PROJID));
171f67950b2SNasf-Fan
172da6c28aaSamw bcopy(xoap->xoa_av_scanstamp, scanstamp, AV_SCANSTAMP_SZ);
173f67950b2SNasf-Fan } else if (XVA_ISSET_REQ(xvap, XAT_PROJID)) {
174f67950b2SNasf-Fan /*
175f67950b2SNasf-Fan * XAT_PROJID and XAT_AV_SCANSTAMP will never be valid
176f67950b2SNasf-Fan * at the same time, so we can share the same space.
177f67950b2SNasf-Fan */
178f67950b2SNasf-Fan bcopy(&xoap->xoa_projid, scanstamp, sizeof (uint64_t));
179f67950b2SNasf-Fan }
1807a286c47SDai Ngo if (XVA_ISSET_REQ(xvap, XAT_REPARSE))
1817a286c47SDai Ngo *attrs |= (xoap->xoa_reparse == 0) ? 0 :
1827a286c47SDai Ngo XAT0_REPARSE;
183fd9ee8b5Sjoyce mcintosh if (XVA_ISSET_REQ(xvap, XAT_OFFLINE))
184fd9ee8b5Sjoyce mcintosh *attrs |= (xoap->xoa_offline == 0) ? 0 :
185fd9ee8b5Sjoyce mcintosh XAT0_OFFLINE;
186fd9ee8b5Sjoyce mcintosh if (XVA_ISSET_REQ(xvap, XAT_SPARSE))
187fd9ee8b5Sjoyce mcintosh *attrs |= (xoap->xoa_sparse == 0) ? 0 :
188fd9ee8b5Sjoyce mcintosh XAT0_SPARSE;
189f67950b2SNasf-Fan if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT))
190f67950b2SNasf-Fan *attrs |= (xoap->xoa_projinherit == 0) ? 0 :
191f67950b2SNasf-Fan XAT0_PROJINHERIT;
192da6c28aaSamw }
193da6c28aaSamw
194da6c28aaSamw static void *
zfs_log_fuid_ids(zfs_fuid_info_t * fuidp,void * start)195da6c28aaSamw zfs_log_fuid_ids(zfs_fuid_info_t *fuidp, void *start)
196da6c28aaSamw {
197da6c28aaSamw zfs_fuid_t *zfuid;
198da6c28aaSamw uint64_t *fuidloc = start;
199da6c28aaSamw
200da6c28aaSamw /* First copy in the ACE FUIDs */
201da6c28aaSamw for (zfuid = list_head(&fuidp->z_fuids); zfuid;
202da6c28aaSamw zfuid = list_next(&fuidp->z_fuids, zfuid)) {
203da6c28aaSamw *fuidloc++ = zfuid->z_logfuid;
204da6c28aaSamw }
205da6c28aaSamw return (fuidloc);
206da6c28aaSamw }
207da6c28aaSamw
208da6c28aaSamw
209da6c28aaSamw static void *
zfs_log_fuid_domains(zfs_fuid_info_t * fuidp,void * start)210da6c28aaSamw zfs_log_fuid_domains(zfs_fuid_info_t *fuidp, void *start)
211da6c28aaSamw {
212da6c28aaSamw zfs_fuid_domain_t *zdomain;
213da6c28aaSamw
214da6c28aaSamw /* now copy in the domain info, if any */
215da6c28aaSamw if (fuidp->z_domain_str_sz != 0) {
216da6c28aaSamw for (zdomain = list_head(&fuidp->z_domains); zdomain;
217da6c28aaSamw zdomain = list_next(&fuidp->z_domains, zdomain)) {
218da6c28aaSamw bcopy((void *)zdomain->z_domain, start,
219da6c28aaSamw strlen(zdomain->z_domain) + 1);
220da6c28aaSamw start = (caddr_t)start +
221da6c28aaSamw strlen(zdomain->z_domain) + 1;
222da6c28aaSamw }
223da6c28aaSamw }
224da6c28aaSamw return (start);
225da6c28aaSamw }
226da6c28aaSamw
227fa9e4066Sahrens /*
228f7170741SWill Andrews * Handles TX_CREATE, TX_CREATE_ATTR, TX_MKDIR, TX_MKDIR_ATTR and
229f7170741SWill Andrews * TK_MKXATTR transactions.
230da6c28aaSamw *
231da6c28aaSamw * TX_CREATE and TX_MKDIR are standard creates, but they may have FUID
232da6c28aaSamw * domain information appended prior to the name. In this case the
233da6c28aaSamw * uid/gid in the log record will be a log centric FUID.
234da6c28aaSamw *
235da6c28aaSamw * TX_CREATE_ACL_ATTR and TX_MKDIR_ACL_ATTR handle special creates that
236da6c28aaSamw * may contain attributes, ACL and optional fuid information.
237da6c28aaSamw *
238da6c28aaSamw * TX_CREATE_ACL and TX_MKDIR_ACL handle special creates that specify
239da6c28aaSamw * and ACL and normal users/groups in the ACEs.
240da6c28aaSamw *
241da6c28aaSamw * There may be an optional xvattr attribute information similar
242da6c28aaSamw * to zfs_log_setattr.
243da6c28aaSamw *
244da6c28aaSamw * Also, after the file name "domain" strings may be appended.
245fa9e4066Sahrens */
246b19a79ecSperrin void
zfs_log_create(zilog_t * zilog,dmu_tx_t * tx,uint64_t txtype,znode_t * dzp,znode_t * zp,char * name,vsecattr_t * vsecp,zfs_fuid_info_t * fuidp,vattr_t * vap)247da6c28aaSamw zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
248da6c28aaSamw znode_t *dzp, znode_t *zp, char *name, vsecattr_t *vsecp,
249da6c28aaSamw zfs_fuid_info_t *fuidp, vattr_t *vap)
250fa9e4066Sahrens {
251fa9e4066Sahrens itx_t *itx;
252fa9e4066Sahrens lr_create_t *lr;
253da6c28aaSamw lr_acl_create_t *lracl;
254d5285caeSGeorge Wilson size_t aclsize = (vsecp != NULL) ? vsecp->vsa_aclentsz : 0;
255da6c28aaSamw size_t xvatsize = 0;
256da6c28aaSamw size_t txsize;
257da6c28aaSamw xvattr_t *xvap = (xvattr_t *)vap;
258da6c28aaSamw void *end;
259da6c28aaSamw size_t lrsize;
260fa9e4066Sahrens size_t namesize = strlen(name) + 1;
261da6c28aaSamw size_t fuidsz = 0;
262fa9e4066Sahrens
263b24ab676SJeff Bonwick if (zil_replaying(zilog, tx))
264b19a79ecSperrin return;
265fa9e4066Sahrens
266da6c28aaSamw /*
267da6c28aaSamw * If we have FUIDs present then add in space for
268da6c28aaSamw * domains and ACE fuid's if any.
269da6c28aaSamw */
270da6c28aaSamw if (fuidp) {
271da6c28aaSamw fuidsz += fuidp->z_domain_str_sz;
272da6c28aaSamw fuidsz += fuidp->z_fuid_cnt * sizeof (uint64_t);
273da6c28aaSamw }
274da6c28aaSamw
275da6c28aaSamw if (vap->va_mask & AT_XVATTR)
276da6c28aaSamw xvatsize = ZIL_XVAT_SIZE(xvap->xva_mapsize);
277da6c28aaSamw
278da6c28aaSamw if ((int)txtype == TX_CREATE_ATTR || (int)txtype == TX_MKDIR_ATTR ||
279da6c28aaSamw (int)txtype == TX_CREATE || (int)txtype == TX_MKDIR ||
280da6c28aaSamw (int)txtype == TX_MKXATTR) {
281da6c28aaSamw txsize = sizeof (*lr) + namesize + fuidsz + xvatsize;
282da6c28aaSamw lrsize = sizeof (*lr);
283da6c28aaSamw } else {
284da6c28aaSamw txsize =
285da6c28aaSamw sizeof (lr_acl_create_t) + namesize + fuidsz +
286569e6c63Smarks ZIL_ACE_LENGTH(aclsize) + xvatsize;
287da6c28aaSamw lrsize = sizeof (lr_acl_create_t);
288da6c28aaSamw }
289da6c28aaSamw
290da6c28aaSamw itx = zil_itx_create(txtype, txsize);
291da6c28aaSamw
292fa9e4066Sahrens lr = (lr_create_t *)&itx->itx_lr;
293fa9e4066Sahrens lr->lr_doid = dzp->z_id;
294fa9e4066Sahrens lr->lr_foid = zp->z_id;
29554811da5SToomas Soome /* Store dnode slot count in 8 bits above object id. */
29654811da5SToomas Soome LR_FOID_SET_SLOTS(lr->lr_foid, zp->z_dnodesize >> DNODE_SHIFT);
2970a586ceaSMark Shellenbaum lr->lr_mode = zp->z_mode;
2980a586ceaSMark Shellenbaum if (!IS_EPHEMERAL(zp->z_uid)) {
2990a586ceaSMark Shellenbaum lr->lr_uid = (uint64_t)zp->z_uid;
300da6c28aaSamw } else {
301da6c28aaSamw lr->lr_uid = fuidp->z_fuid_owner;
302da6c28aaSamw }
3030a586ceaSMark Shellenbaum if (!IS_EPHEMERAL(zp->z_gid)) {
3040a586ceaSMark Shellenbaum lr->lr_gid = (uint64_t)zp->z_gid;
305da6c28aaSamw } else {
306da6c28aaSamw lr->lr_gid = fuidp->z_fuid_group;
307da6c28aaSamw }
3080a586ceaSMark Shellenbaum (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zp->z_zfsvfs), &lr->lr_gen,
3090a586ceaSMark Shellenbaum sizeof (uint64_t));
3100a586ceaSMark Shellenbaum (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(zp->z_zfsvfs),
3110a586ceaSMark Shellenbaum lr->lr_crtime, sizeof (uint64_t) * 2);
3120a586ceaSMark Shellenbaum
3130a586ceaSMark Shellenbaum if (sa_lookup(zp->z_sa_hdl, SA_ZPL_RDEV(zp->z_zfsvfs), &lr->lr_rdev,
3140a586ceaSMark Shellenbaum sizeof (lr->lr_rdev)) != 0)
3150a586ceaSMark Shellenbaum lr->lr_rdev = 0;
316da6c28aaSamw
317da6c28aaSamw /*
318da6c28aaSamw * Fill in xvattr info if any
319da6c28aaSamw */
320da6c28aaSamw if (vap->va_mask & AT_XVATTR) {
321da6c28aaSamw zfs_log_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), xvap);
322da6c28aaSamw end = (caddr_t)lr + lrsize + xvatsize;
323da6c28aaSamw } else {
324da6c28aaSamw end = (caddr_t)lr + lrsize;
325da6c28aaSamw }
326da6c28aaSamw
327da6c28aaSamw /* Now fill in any ACL info */
328da6c28aaSamw
329da6c28aaSamw if (vsecp) {
330da6c28aaSamw lracl = (lr_acl_create_t *)&itx->itx_lr;
331da6c28aaSamw lracl->lr_aclcnt = vsecp->vsa_aclcnt;
332da6c28aaSamw lracl->lr_acl_bytes = aclsize;
333da6c28aaSamw lracl->lr_domcnt = fuidp ? fuidp->z_domain_cnt : 0;
334da6c28aaSamw lracl->lr_fuidcnt = fuidp ? fuidp->z_fuid_cnt : 0;
335da6c28aaSamw if (vsecp->vsa_aclflags & VSA_ACE_ACLFLAGS)
336da6c28aaSamw lracl->lr_acl_flags = (uint64_t)vsecp->vsa_aclflags;
337da6c28aaSamw else
338da6c28aaSamw lracl->lr_acl_flags = 0;
339da6c28aaSamw
340da6c28aaSamw bcopy(vsecp->vsa_aclentp, end, aclsize);
341569e6c63Smarks end = (caddr_t)end + ZIL_ACE_LENGTH(aclsize);
342da6c28aaSamw }
343da6c28aaSamw
344da6c28aaSamw /* drop in FUID info */
345da6c28aaSamw if (fuidp) {
346da6c28aaSamw end = zfs_log_fuid_ids(fuidp, end);
347da6c28aaSamw end = zfs_log_fuid_domains(fuidp, end);
348da6c28aaSamw }
349da6c28aaSamw /*
350da6c28aaSamw * Now place file name in log record
351da6c28aaSamw */
352da6c28aaSamw bcopy(name, end, namesize);
353fa9e4066Sahrens
3545002558fSNeil Perrin zil_itx_assign(zilog, itx, tx);
355fa9e4066Sahrens }
356fa9e4066Sahrens
357*d8849d7dSChunwei Chen void zil_remove_async(zilog_t *zilog, uint64_t oid);
358*d8849d7dSChunwei Chen
359fa9e4066Sahrens /*
360f7170741SWill Andrews * Handles both TX_REMOVE and TX_RMDIR transactions.
361fa9e4066Sahrens */
362b19a79ecSperrin void
zfs_log_remove(zilog_t * zilog,dmu_tx_t * tx,uint64_t txtype,znode_t * dzp,char * name,uint64_t foid,boolean_t unlinked)363da6c28aaSamw zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
364*d8849d7dSChunwei Chen znode_t *dzp, char *name, uint64_t foid, boolean_t unlinked)
365fa9e4066Sahrens {
366fa9e4066Sahrens itx_t *itx;
367fa9e4066Sahrens lr_remove_t *lr;
368fa9e4066Sahrens size_t namesize = strlen(name) + 1;
369fa9e4066Sahrens
370b24ab676SJeff Bonwick if (zil_replaying(zilog, tx))
371b19a79ecSperrin return;
372fa9e4066Sahrens
373fa9e4066Sahrens itx = zil_itx_create(txtype, sizeof (*lr) + namesize);
374fa9e4066Sahrens lr = (lr_remove_t *)&itx->itx_lr;
375fa9e4066Sahrens lr->lr_doid = dzp->z_id;
376fa9e4066Sahrens bcopy(name, (char *)(lr + 1), namesize);
377fa9e4066Sahrens
37851bd2f97SNeil Perrin itx->itx_oid = foid;
3795002558fSNeil Perrin
380*d8849d7dSChunwei Chen /*
381*d8849d7dSChunwei Chen * Object ids can be re-instantiated in the next txg so
382*d8849d7dSChunwei Chen * remove any async transactions to avoid future leaks.
383*d8849d7dSChunwei Chen * This can happen if a fsync occurs on the re-instantiated
384*d8849d7dSChunwei Chen * object for a WR_INDIRECT or WR_NEED_COPY write, which gets
385*d8849d7dSChunwei Chen * the new file data and flushes a write record for the old object.
386*d8849d7dSChunwei Chen */
387*d8849d7dSChunwei Chen if (unlinked) {
388*d8849d7dSChunwei Chen ASSERT((txtype & ~TX_CI) == TX_REMOVE);
389*d8849d7dSChunwei Chen zil_remove_async(zilog, foid);
390*d8849d7dSChunwei Chen }
3915002558fSNeil Perrin zil_itx_assign(zilog, itx, tx);
392fa9e4066Sahrens }
393fa9e4066Sahrens
394fa9e4066Sahrens /*
395f7170741SWill Andrews * Handles TX_LINK transactions.
396fa9e4066Sahrens */
397b19a79ecSperrin void
zfs_log_link(zilog_t * zilog,dmu_tx_t * tx,uint64_t txtype,znode_t * dzp,znode_t * zp,char * name)398da6c28aaSamw zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
3999a686fbcSPaul Dagnelie znode_t *dzp, znode_t *zp, char *name)
400fa9e4066Sahrens {
401fa9e4066Sahrens itx_t *itx;
402fa9e4066Sahrens lr_link_t *lr;
403fa9e4066Sahrens size_t namesize = strlen(name) + 1;
404fa9e4066Sahrens
405b24ab676SJeff Bonwick if (zil_replaying(zilog, tx))
406b19a79ecSperrin return;
407fa9e4066Sahrens
408fa9e4066Sahrens itx = zil_itx_create(txtype, sizeof (*lr) + namesize);
409fa9e4066Sahrens lr = (lr_link_t *)&itx->itx_lr;
410fa9e4066Sahrens lr->lr_doid = dzp->z_id;
411fa9e4066Sahrens lr->lr_link_obj = zp->z_id;
412fa9e4066Sahrens bcopy(name, (char *)(lr + 1), namesize);
413fa9e4066Sahrens
4145002558fSNeil Perrin zil_itx_assign(zilog, itx, tx);
415fa9e4066Sahrens }
416fa9e4066Sahrens
417fa9e4066Sahrens /*
418f7170741SWill Andrews * Handles TX_SYMLINK transactions.
419fa9e4066Sahrens */
420b19a79ecSperrin void
zfs_log_symlink(zilog_t * zilog,dmu_tx_t * tx,uint64_t txtype,znode_t * dzp,znode_t * zp,char * name,char * link)421da6c28aaSamw zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
422da6c28aaSamw znode_t *dzp, znode_t *zp, char *name, char *link)
423fa9e4066Sahrens {
424fa9e4066Sahrens itx_t *itx;
425fa9e4066Sahrens lr_create_t *lr;
426fa9e4066Sahrens size_t namesize = strlen(name) + 1;
427fa9e4066Sahrens size_t linksize = strlen(link) + 1;
428fa9e4066Sahrens
429b24ab676SJeff Bonwick if (zil_replaying(zilog, tx))
430b19a79ecSperrin return;
431fa9e4066Sahrens
432fa9e4066Sahrens itx = zil_itx_create(txtype, sizeof (*lr) + namesize + linksize);
433fa9e4066Sahrens lr = (lr_create_t *)&itx->itx_lr;
434fa9e4066Sahrens lr->lr_doid = dzp->z_id;
435fa9e4066Sahrens lr->lr_foid = zp->z_id;
4360a586ceaSMark Shellenbaum lr->lr_uid = zp->z_uid;
4370a586ceaSMark Shellenbaum lr->lr_gid = zp->z_gid;
4380a586ceaSMark Shellenbaum lr->lr_mode = zp->z_mode;
4390a586ceaSMark Shellenbaum (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zp->z_zfsvfs), &lr->lr_gen,
4400a586ceaSMark Shellenbaum sizeof (uint64_t));
4410a586ceaSMark Shellenbaum (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(zp->z_zfsvfs),
4420a586ceaSMark Shellenbaum lr->lr_crtime, sizeof (uint64_t) * 2);
443fa9e4066Sahrens bcopy(name, (char *)(lr + 1), namesize);
444fa9e4066Sahrens bcopy(link, (char *)(lr + 1) + namesize, linksize);
445fa9e4066Sahrens
4465002558fSNeil Perrin zil_itx_assign(zilog, itx, tx);
447fa9e4066Sahrens }
448fa9e4066Sahrens
449fa9e4066Sahrens /*
450f7170741SWill Andrews * Handles TX_RENAME transactions.
451fa9e4066Sahrens */
452b19a79ecSperrin void
zfs_log_rename(zilog_t * zilog,dmu_tx_t * tx,uint64_t txtype,znode_t * sdzp,char * sname,znode_t * tdzp,char * dname,znode_t * szp)453da6c28aaSamw zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
4549a686fbcSPaul Dagnelie znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp)
455fa9e4066Sahrens {
456fa9e4066Sahrens itx_t *itx;
457fa9e4066Sahrens lr_rename_t *lr;
458fa9e4066Sahrens size_t snamesize = strlen(sname) + 1;
459fa9e4066Sahrens size_t dnamesize = strlen(dname) + 1;
460fa9e4066Sahrens
461b24ab676SJeff Bonwick if (zil_replaying(zilog, tx))
462b19a79ecSperrin return;
463fa9e4066Sahrens
464fa9e4066Sahrens itx = zil_itx_create(txtype, sizeof (*lr) + snamesize + dnamesize);
465fa9e4066Sahrens lr = (lr_rename_t *)&itx->itx_lr;
466fa9e4066Sahrens lr->lr_sdoid = sdzp->z_id;
467fa9e4066Sahrens lr->lr_tdoid = tdzp->z_id;
468fa9e4066Sahrens bcopy(sname, (char *)(lr + 1), snamesize);
469fa9e4066Sahrens bcopy(dname, (char *)(lr + 1) + snamesize, dnamesize);
47091de656bSNeil Perrin itx->itx_oid = szp->z_id;
471fa9e4066Sahrens
4725002558fSNeil Perrin zil_itx_assign(zilog, itx, tx);
473fa9e4066Sahrens }
474fa9e4066Sahrens
475fa9e4066Sahrens /*
476f7170741SWill Andrews * Handles TX_WRITE transactions.
477fa9e4066Sahrens */
478c5c6ffa0Smaybee ssize_t zfs_immediate_write_sz = 32768;
479fa9e4066Sahrens
480b19a79ecSperrin void
zfs_log_write(zilog_t * zilog,dmu_tx_t * tx,int txtype,znode_t * zp,offset_t off,ssize_t resid,int ioflag)481fa9e4066Sahrens zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
4829a686fbcSPaul Dagnelie znode_t *zp, offset_t off, ssize_t resid, int ioflag)
483fa9e4066Sahrens {
484c5ee4681SAlexander Motin uint32_t blocksize = zp->z_blksz;
485104e2ed7Sperrin itx_wr_state_t write_state;
486ec533521Sfr uintptr_t fsync_cnt;
487fa9e4066Sahrens
488b24ab676SJeff Bonwick if (zil_replaying(zilog, tx) || zp->z_unlinked)
489b19a79ecSperrin return;
490fa9e4066Sahrens
491c5ee4681SAlexander Motin if (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
492c5ee4681SAlexander Motin write_state = WR_INDIRECT;
493c5ee4681SAlexander Motin else if (!spa_has_slogs(zilog->zl_spa) &&
494c5ee4681SAlexander Motin resid >= zfs_immediate_write_sz)
495104e2ed7Sperrin write_state = WR_INDIRECT;
496f70bbccaSperrin else if (ioflag & (FSYNC | FDSYNC))
497104e2ed7Sperrin write_state = WR_COPIED;
498feb08c6bSbillm else
499104e2ed7Sperrin write_state = WR_NEED_COPY;
500feb08c6bSbillm
501ec533521Sfr if ((fsync_cnt = (uintptr_t)tsd_get(zfs_fsyncer_key)) != 0) {
502ec533521Sfr (void) tsd_set(zfs_fsyncer_key, (void *)(fsync_cnt - 1));
503ec533521Sfr }
504ec533521Sfr
5056ce0521aSperrin while (resid) {
5066ce0521aSperrin itx_t *itx;
5076ce0521aSperrin lr_write_t *lr;
508c5ee4681SAlexander Motin itx_wr_state_t wr_state = write_state;
509c5ee4681SAlexander Motin ssize_t len = resid;
5106ce0521aSperrin
511c5ee4681SAlexander Motin if (wr_state == WR_COPIED && resid > ZIL_MAX_COPIED_DATA)
512c5ee4681SAlexander Motin wr_state = WR_NEED_COPY;
513c5ee4681SAlexander Motin else if (wr_state == WR_INDIRECT)
514c5ee4681SAlexander Motin len = MIN(blocksize - P2PHASE(off, blocksize), resid);
5156ce0521aSperrin
5166ce0521aSperrin itx = zil_itx_create(txtype, sizeof (*lr) +
517c5ee4681SAlexander Motin (wr_state == WR_COPIED ? len : 0));
5186ce0521aSperrin lr = (lr_write_t *)&itx->itx_lr;
519c5ee4681SAlexander Motin if (wr_state == WR_COPIED && dmu_read(zp->z_zfsvfs->z_os,
5207bfdf011SNeil Perrin zp->z_id, off, len, lr + 1, DMU_READ_NO_PREFETCH) != 0) {
521b24ab676SJeff Bonwick zil_itx_destroy(itx);
522104e2ed7Sperrin itx = zil_itx_create(txtype, sizeof (*lr));
523feb08c6bSbillm lr = (lr_write_t *)&itx->itx_lr;
524c5ee4681SAlexander Motin wr_state = WR_NEED_COPY;
525104e2ed7Sperrin }
526feb08c6bSbillm
527c5ee4681SAlexander Motin itx->itx_wr_state = wr_state;
5286ce0521aSperrin lr->lr_foid = zp->z_id;
5296ce0521aSperrin lr->lr_offset = off;
5306ce0521aSperrin lr->lr_length = len;
5316ce0521aSperrin lr->lr_blkoff = 0;
5326ce0521aSperrin BP_ZERO(&lr->lr_blkptr);
533fa9e4066Sahrens
5346ce0521aSperrin itx->itx_private = zp->z_zfsvfs;
535fa9e4066Sahrens
5365002558fSNeil Perrin if (!(ioflag & (FSYNC | FDSYNC)) && (zp->z_sync_cnt == 0) &&
5375002558fSNeil Perrin (fsync_cnt == 0))
538ec533521Sfr itx->itx_sync = B_FALSE;
539ec533521Sfr
5405002558fSNeil Perrin zil_itx_assign(zilog, itx, tx);
5416ce0521aSperrin
5426ce0521aSperrin off += len;
5436ce0521aSperrin resid -= len;
5446ce0521aSperrin }
545fa9e4066Sahrens }
546fa9e4066Sahrens
547fa9e4066Sahrens /*
548f7170741SWill Andrews * Handles TX_TRUNCATE transactions.
549fa9e4066Sahrens */
550b19a79ecSperrin void
zfs_log_truncate(zilog_t * zilog,dmu_tx_t * tx,int txtype,znode_t * zp,uint64_t off,uint64_t len)551fa9e4066Sahrens zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype,
5529a686fbcSPaul Dagnelie znode_t *zp, uint64_t off, uint64_t len)
553fa9e4066Sahrens {
554fa9e4066Sahrens itx_t *itx;
555fa9e4066Sahrens lr_truncate_t *lr;
556fa9e4066Sahrens
557b24ab676SJeff Bonwick if (zil_replaying(zilog, tx) || zp->z_unlinked)
558b19a79ecSperrin return;
559fa9e4066Sahrens
560fa9e4066Sahrens itx = zil_itx_create(txtype, sizeof (*lr));
561fa9e4066Sahrens lr = (lr_truncate_t *)&itx->itx_lr;
562fa9e4066Sahrens lr->lr_foid = zp->z_id;
563fa9e4066Sahrens lr->lr_offset = off;
564fa9e4066Sahrens lr->lr_length = len;
565fa9e4066Sahrens
56667bd71c6Sperrin itx->itx_sync = (zp->z_sync_cnt != 0);
5675002558fSNeil Perrin zil_itx_assign(zilog, itx, tx);
568fa9e4066Sahrens }
569fa9e4066Sahrens
570fa9e4066Sahrens /*
571f7170741SWill Andrews * Handles TX_SETATTR transactions.
572fa9e4066Sahrens */
573b19a79ecSperrin void
zfs_log_setattr(zilog_t * zilog,dmu_tx_t * tx,int txtype,znode_t * zp,vattr_t * vap,uint_t mask_applied,zfs_fuid_info_t * fuidp)574fa9e4066Sahrens zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype,
5759a686fbcSPaul Dagnelie znode_t *zp, vattr_t *vap, uint_t mask_applied, zfs_fuid_info_t *fuidp)
576fa9e4066Sahrens {
577da6c28aaSamw itx_t *itx;
578da6c28aaSamw lr_setattr_t *lr;
579da6c28aaSamw xvattr_t *xvap = (xvattr_t *)vap;
580da6c28aaSamw size_t recsize = sizeof (lr_setattr_t);
581da6c28aaSamw void *start;
582da6c28aaSamw
583b24ab676SJeff Bonwick if (zil_replaying(zilog, tx) || zp->z_unlinked)
584b19a79ecSperrin return;
585fa9e4066Sahrens
586da6c28aaSamw /*
587da6c28aaSamw * If XVATTR set, then log record size needs to allow
588da6c28aaSamw * for lr_attr_t + xvattr mask, mapsize and create time
589da6c28aaSamw * plus actual attribute values
590da6c28aaSamw */
591da6c28aaSamw if (vap->va_mask & AT_XVATTR)
592da6c28aaSamw recsize = sizeof (*lr) + ZIL_XVAT_SIZE(xvap->xva_mapsize);
593da6c28aaSamw
594da6c28aaSamw if (fuidp)
595da6c28aaSamw recsize += fuidp->z_domain_str_sz;
596da6c28aaSamw
597da6c28aaSamw itx = zil_itx_create(txtype, recsize);
598fa9e4066Sahrens lr = (lr_setattr_t *)&itx->itx_lr;
599fa9e4066Sahrens lr->lr_foid = zp->z_id;
600fa9e4066Sahrens lr->lr_mask = (uint64_t)mask_applied;
601fa9e4066Sahrens lr->lr_mode = (uint64_t)vap->va_mode;
602da6c28aaSamw if ((mask_applied & AT_UID) && IS_EPHEMERAL(vap->va_uid))
603da6c28aaSamw lr->lr_uid = fuidp->z_fuid_owner;
604da6c28aaSamw else
605da6c28aaSamw lr->lr_uid = (uint64_t)vap->va_uid;
606da6c28aaSamw
607da6c28aaSamw if ((mask_applied & AT_GID) && IS_EPHEMERAL(vap->va_gid))
608da6c28aaSamw lr->lr_gid = fuidp->z_fuid_group;
609da6c28aaSamw else
610da6c28aaSamw lr->lr_gid = (uint64_t)vap->va_gid;
611da6c28aaSamw
612fa9e4066Sahrens lr->lr_size = (uint64_t)vap->va_size;
613fa9e4066Sahrens ZFS_TIME_ENCODE(&vap->va_atime, lr->lr_atime);
614fa9e4066Sahrens ZFS_TIME_ENCODE(&vap->va_mtime, lr->lr_mtime);
615da6c28aaSamw start = (lr_setattr_t *)(lr + 1);
616da6c28aaSamw if (vap->va_mask & AT_XVATTR) {
617da6c28aaSamw zfs_log_xvattr((lr_attr_t *)start, xvap);
618da6c28aaSamw start = (caddr_t)start + ZIL_XVAT_SIZE(xvap->xva_mapsize);
619da6c28aaSamw }
620da6c28aaSamw
621da6c28aaSamw /*
622da6c28aaSamw * Now stick on domain information if any on end
623da6c28aaSamw */
624da6c28aaSamw
625da6c28aaSamw if (fuidp)
626da6c28aaSamw (void) zfs_log_fuid_domains(fuidp, start);
627fa9e4066Sahrens
62867bd71c6Sperrin itx->itx_sync = (zp->z_sync_cnt != 0);
6295002558fSNeil Perrin zil_itx_assign(zilog, itx, tx);
630fa9e4066Sahrens }
631fa9e4066Sahrens
632fa9e4066Sahrens /*
633f7170741SWill Andrews * Handles TX_ACL transactions.
634fa9e4066Sahrens */
635b19a79ecSperrin void
zfs_log_acl(zilog_t * zilog,dmu_tx_t * tx,znode_t * zp,vsecattr_t * vsecp,zfs_fuid_info_t * fuidp)636da6c28aaSamw zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp,
637da6c28aaSamw vsecattr_t *vsecp, zfs_fuid_info_t *fuidp)
638fa9e4066Sahrens {
639fa9e4066Sahrens itx_t *itx;
640da6c28aaSamw lr_acl_v0_t *lrv0;
641fa9e4066Sahrens lr_acl_t *lr;
642da6c28aaSamw int txtype;
643da6c28aaSamw int lrsize;
644da6c28aaSamw size_t txsize;
645da6c28aaSamw size_t aclbytes = vsecp->vsa_aclentsz;
646da6c28aaSamw
647b24ab676SJeff Bonwick if (zil_replaying(zilog, tx) || zp->z_unlinked)
64831e69ea4Smarks return;
64931e69ea4Smarks
65031e69ea4Smarks txtype = (zp->z_zfsvfs->z_version < ZPL_VERSION_FUID) ?
651da6c28aaSamw TX_ACL_V0 : TX_ACL;
652da6c28aaSamw
653da6c28aaSamw if (txtype == TX_ACL)
654da6c28aaSamw lrsize = sizeof (*lr);
655da6c28aaSamw else
656da6c28aaSamw lrsize = sizeof (*lrv0);
657fa9e4066Sahrens
658569e6c63Smarks txsize = lrsize +
659569e6c63Smarks ((txtype == TX_ACL) ? ZIL_ACE_LENGTH(aclbytes) : aclbytes) +
660569e6c63Smarks (fuidp ? fuidp->z_domain_str_sz : 0) +
66131e69ea4Smarks sizeof (uint64_t) * (fuidp ? fuidp->z_fuid_cnt : 0);
662da6c28aaSamw
663da6c28aaSamw itx = zil_itx_create(txtype, txsize);
664da6c28aaSamw
665fa9e4066Sahrens lr = (lr_acl_t *)&itx->itx_lr;
666fa9e4066Sahrens lr->lr_foid = zp->z_id;
667da6c28aaSamw if (txtype == TX_ACL) {
668da6c28aaSamw lr->lr_acl_bytes = aclbytes;
669da6c28aaSamw lr->lr_domcnt = fuidp ? fuidp->z_domain_cnt : 0;
670da6c28aaSamw lr->lr_fuidcnt = fuidp ? fuidp->z_fuid_cnt : 0;
671da6c28aaSamw if (vsecp->vsa_mask & VSA_ACE_ACLFLAGS)
672da6c28aaSamw lr->lr_acl_flags = (uint64_t)vsecp->vsa_aclflags;
673da6c28aaSamw else
674da6c28aaSamw lr->lr_acl_flags = 0;
675da6c28aaSamw }
676da6c28aaSamw lr->lr_aclcnt = (uint64_t)vsecp->vsa_aclcnt;
677da6c28aaSamw
678da6c28aaSamw if (txtype == TX_ACL_V0) {
679da6c28aaSamw lrv0 = (lr_acl_v0_t *)lr;
680da6c28aaSamw bcopy(vsecp->vsa_aclentp, (ace_t *)(lrv0 + 1), aclbytes);
681da6c28aaSamw } else {
682da6c28aaSamw void *start = (ace_t *)(lr + 1);
683da6c28aaSamw
684da6c28aaSamw bcopy(vsecp->vsa_aclentp, start, aclbytes);
685da6c28aaSamw
686569e6c63Smarks start = (caddr_t)start + ZIL_ACE_LENGTH(aclbytes);
687da6c28aaSamw
688da6c28aaSamw if (fuidp) {
689da6c28aaSamw start = zfs_log_fuid_ids(fuidp, start);
690da6c28aaSamw (void) zfs_log_fuid_domains(fuidp, start);
691da6c28aaSamw }
692da6c28aaSamw }
693fa9e4066Sahrens
69467bd71c6Sperrin itx->itx_sync = (zp->z_sync_cnt != 0);
6955002558fSNeil Perrin zil_itx_assign(zilog, itx, tx);
696fa9e4066Sahrens }
697