1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2013 by Delphix. All rights reserved.
24 * Copyright 2017 Nexenta Systems, Inc.  All rights reserved.
25 */
26
27#include <sys/types.h>
28#include <sys/param.h>
29#include <sys/time.h>
30#include <sys/systm.h>
31#include <sys/sysmacros.h>
32#include <sys/resource.h>
33#include <sys/vfs.h>
34#include <sys/vnode.h>
35#include <sys/sid.h>
36#include <sys/file.h>
37#include <sys/stat.h>
38#include <sys/kmem.h>
39#include <sys/cmn_err.h>
40#include <sys/errno.h>
41#include <sys/unistd.h>
42#include <sys/sdt.h>
43#include <sys/fs/zfs.h>
44#include <sys/mode.h>
45#include <sys/policy.h>
46#include <sys/zfs_znode.h>
47#include <sys/zfs_fuid.h>
48#include <sys/zfs_acl.h>
49#include <sys/zfs_dir.h>
50#include <sys/zfs_vfsops.h>
51#include <sys/dmu.h>
52#include <sys/dnode.h>
53#include <sys/zap.h>
54#include <sys/sa.h>
55#include "fs/fs_subr.h"
56#include <acl/acl_common.h>
57
58#define	ALLOW	ACE_ACCESS_ALLOWED_ACE_TYPE
59#define	DENY	ACE_ACCESS_DENIED_ACE_TYPE
60#define	MAX_ACE_TYPE	ACE_SYSTEM_ALARM_CALLBACK_OBJECT_ACE_TYPE
61#define	MIN_ACE_TYPE	ALLOW
62
63#define	OWNING_GROUP		(ACE_GROUP|ACE_IDENTIFIER_GROUP)
64#define	EVERYONE_ALLOW_MASK (ACE_READ_ACL|ACE_READ_ATTRIBUTES | \
65    ACE_READ_NAMED_ATTRS|ACE_SYNCHRONIZE)
66#define	EVERYONE_DENY_MASK (ACE_WRITE_ACL|ACE_WRITE_OWNER | \
67    ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS)
68#define	OWNER_ALLOW_MASK (ACE_WRITE_ACL | ACE_WRITE_OWNER | \
69    ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS)
70
71#define	ZFS_CHECKED_MASKS (ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_READ_DATA| \
72    ACE_READ_NAMED_ATTRS|ACE_WRITE_DATA|ACE_WRITE_ATTRIBUTES| \
73    ACE_WRITE_NAMED_ATTRS|ACE_APPEND_DATA|ACE_EXECUTE|ACE_WRITE_OWNER| \
74    ACE_WRITE_ACL|ACE_DELETE|ACE_DELETE_CHILD|ACE_SYNCHRONIZE)
75
76#define	WRITE_MASK_DATA (ACE_WRITE_DATA|ACE_APPEND_DATA|ACE_WRITE_NAMED_ATTRS)
77#define	WRITE_MASK_ATTRS (ACE_WRITE_ACL|ACE_WRITE_OWNER|ACE_WRITE_ATTRIBUTES| \
78    ACE_DELETE|ACE_DELETE_CHILD)
79#define	WRITE_MASK (WRITE_MASK_DATA|WRITE_MASK_ATTRS)
80
81#define	OGE_CLEAR	(ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
82    ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_EXECUTE)
83
84#define	OKAY_MASK_BITS (ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
85    ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_EXECUTE)
86
87#define	ALL_INHERIT	(ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE | \
88    ACE_NO_PROPAGATE_INHERIT_ACE|ACE_INHERIT_ONLY_ACE|ACE_INHERITED_ACE)
89
90#define	RESTRICTED_CLEAR	(ACE_WRITE_ACL|ACE_WRITE_OWNER)
91
92#define	V4_ACL_WIDE_FLAGS (ZFS_ACL_AUTO_INHERIT|ZFS_ACL_DEFAULTED|\
93    ZFS_ACL_PROTECTED)
94
95#define	ZFS_ACL_WIDE_FLAGS (V4_ACL_WIDE_FLAGS|ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|\
96    ZFS_ACL_OBJ_ACE)
97
98#define	ALL_MODE_EXECS (S_IXUSR | S_IXGRP | S_IXOTH)
99
100static uint16_t
101zfs_ace_v0_get_type(void *acep)
102{
103	return (((zfs_oldace_t *)acep)->z_type);
104}
105
106static uint16_t
107zfs_ace_v0_get_flags(void *acep)
108{
109	return (((zfs_oldace_t *)acep)->z_flags);
110}
111
112static uint32_t
113zfs_ace_v0_get_mask(void *acep)
114{
115	return (((zfs_oldace_t *)acep)->z_access_mask);
116}
117
118static uint64_t
119zfs_ace_v0_get_who(void *acep)
120{
121	return (((zfs_oldace_t *)acep)->z_fuid);
122}
123
124static void
125zfs_ace_v0_set_type(void *acep, uint16_t type)
126{
127	((zfs_oldace_t *)acep)->z_type = type;
128}
129
130static void
131zfs_ace_v0_set_flags(void *acep, uint16_t flags)
132{
133	((zfs_oldace_t *)acep)->z_flags = flags;
134}
135
136static void
137zfs_ace_v0_set_mask(void *acep, uint32_t mask)
138{
139	((zfs_oldace_t *)acep)->z_access_mask = mask;
140}
141
142static void
143zfs_ace_v0_set_who(void *acep, uint64_t who)
144{
145	((zfs_oldace_t *)acep)->z_fuid = who;
146}
147
148/*ARGSUSED*/
149static size_t
150zfs_ace_v0_size(void *acep)
151{
152	return (sizeof (zfs_oldace_t));
153}
154
155static size_t
156zfs_ace_v0_abstract_size(void)
157{
158	return (sizeof (zfs_oldace_t));
159}
160
161static int
162zfs_ace_v0_mask_off(void)
163{
164	return (offsetof(zfs_oldace_t, z_access_mask));
165}
166
167/*ARGSUSED*/
168static int
169zfs_ace_v0_data(void *acep, void **datap)
170{
171	*datap = NULL;
172	return (0);
173}
174
175static acl_ops_t zfs_acl_v0_ops = {
176	zfs_ace_v0_get_mask,
177	zfs_ace_v0_set_mask,
178	zfs_ace_v0_get_flags,
179	zfs_ace_v0_set_flags,
180	zfs_ace_v0_get_type,
181	zfs_ace_v0_set_type,
182	zfs_ace_v0_get_who,
183	zfs_ace_v0_set_who,
184	zfs_ace_v0_size,
185	zfs_ace_v0_abstract_size,
186	zfs_ace_v0_mask_off,
187	zfs_ace_v0_data
188};
189
190static uint16_t
191zfs_ace_fuid_get_type(void *acep)
192{
193	return (((zfs_ace_hdr_t *)acep)->z_type);
194}
195
196static uint16_t
197zfs_ace_fuid_get_flags(void *acep)
198{
199	return (((zfs_ace_hdr_t *)acep)->z_flags);
200}
201
202static uint32_t
203zfs_ace_fuid_get_mask(void *acep)
204{
205	return (((zfs_ace_hdr_t *)acep)->z_access_mask);
206}
207
208static uint64_t
209zfs_ace_fuid_get_who(void *args)
210{
211	uint16_t entry_type;
212	zfs_ace_t *acep = args;
213
214	entry_type = acep->z_hdr.z_flags & ACE_TYPE_FLAGS;
215
216	if (entry_type == ACE_OWNER || entry_type == OWNING_GROUP ||
217	    entry_type == ACE_EVERYONE)
218		return (-1);
219	return (((zfs_ace_t *)acep)->z_fuid);
220}
221
222static void
223zfs_ace_fuid_set_type(void *acep, uint16_t type)
224{
225	((zfs_ace_hdr_t *)acep)->z_type = type;
226}
227
228static void
229zfs_ace_fuid_set_flags(void *acep, uint16_t flags)
230{
231	((zfs_ace_hdr_t *)acep)->z_flags = flags;
232}
233
234static void
235zfs_ace_fuid_set_mask(void *acep, uint32_t mask)
236{
237	((zfs_ace_hdr_t *)acep)->z_access_mask = mask;
238}
239
240static void
241zfs_ace_fuid_set_who(void *arg, uint64_t who)
242{
243	zfs_ace_t *acep = arg;
244
245	uint16_t entry_type = acep->z_hdr.z_flags & ACE_TYPE_FLAGS;
246
247	if (entry_type == ACE_OWNER || entry_type == OWNING_GROUP ||
248	    entry_type == ACE_EVERYONE)
249		return;
250	acep->z_fuid = who;
251}
252
253static size_t
254zfs_ace_fuid_size(void *acep)
255{
256	zfs_ace_hdr_t *zacep = acep;
257	uint16_t entry_type;
258
259	switch (zacep->z_type) {
260	case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
261	case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
262	case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
263	case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
264		return (sizeof (zfs_object_ace_t));
265	case ALLOW:
266	case DENY:
267		entry_type =
268		    (((zfs_ace_hdr_t *)acep)->z_flags & ACE_TYPE_FLAGS);
269		if (entry_type == ACE_OWNER ||
270		    entry_type == OWNING_GROUP ||
271		    entry_type == ACE_EVERYONE)
272			return (sizeof (zfs_ace_hdr_t));
273		/*FALLTHROUGH*/
274	default:
275		return (sizeof (zfs_ace_t));
276	}
277}
278
279static size_t
280zfs_ace_fuid_abstract_size(void)
281{
282	return (sizeof (zfs_ace_hdr_t));
283}
284
285static int
286zfs_ace_fuid_mask_off(void)
287{
288	return (offsetof(zfs_ace_hdr_t, z_access_mask));
289}
290
291static int
292zfs_ace_fuid_data(void *acep, void **datap)
293{
294	zfs_ace_t *zacep = acep;
295	zfs_object_ace_t *zobjp;
296
297	switch (zacep->z_hdr.z_type) {
298	case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
299	case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
300	case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
301	case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
302		zobjp = acep;
303		*datap = (caddr_t)zobjp + sizeof (zfs_ace_t);
304		return (sizeof (zfs_object_ace_t) - sizeof (zfs_ace_t));
305	default:
306		*datap = NULL;
307		return (0);
308	}
309}
310
311static acl_ops_t zfs_acl_fuid_ops = {
312	zfs_ace_fuid_get_mask,
313	zfs_ace_fuid_set_mask,
314	zfs_ace_fuid_get_flags,
315	zfs_ace_fuid_set_flags,
316	zfs_ace_fuid_get_type,
317	zfs_ace_fuid_set_type,
318	zfs_ace_fuid_get_who,
319	zfs_ace_fuid_set_who,
320	zfs_ace_fuid_size,
321	zfs_ace_fuid_abstract_size,
322	zfs_ace_fuid_mask_off,
323	zfs_ace_fuid_data
324};
325
326/*
327 * The following three functions are provided for compatibility with
328 * older ZPL version in order to determine if the file use to have
329 * an external ACL and what version of ACL previously existed on the
330 * file.  Would really be nice to not need this, sigh.
331 */
332uint64_t
333zfs_external_acl(znode_t *zp)
334{
335	zfs_acl_phys_t acl_phys;
336	int error;
337
338	if (zp->z_is_sa)
339		return (0);
340
341	/*
342	 * Need to deal with a potential
343	 * race where zfs_sa_upgrade could cause
344	 * z_isa_sa to change.
345	 *
346	 * If the lookup fails then the state of z_is_sa should have
347	 * changed.
348	 */
349
350	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zp->z_zfsvfs),
351	    &acl_phys, sizeof (acl_phys))) == 0)
352		return (acl_phys.z_acl_extern_obj);
353	else {
354		/*
355		 * after upgrade the SA_ZPL_ZNODE_ACL should have been
356		 * removed
357		 */
358		VERIFY(zp->z_is_sa && error == ENOENT);
359		return (0);
360	}
361}
362
363/*
364 * Determine size of ACL in bytes
365 *
366 * This is more complicated than it should be since we have to deal
367 * with old external ACLs.
368 */
369static int
370zfs_acl_znode_info(znode_t *zp, int *aclsize, int *aclcount,
371    zfs_acl_phys_t *aclphys)
372{
373	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
374	uint64_t acl_count;
375	int size;
376	int error;
377
378	ASSERT(MUTEX_HELD(&zp->z_acl_lock));
379	if (zp->z_is_sa) {
380		if ((error = sa_size(zp->z_sa_hdl, SA_ZPL_DACL_ACES(zfsvfs),
381		    &size)) != 0)
382			return (error);
383		*aclsize = size;
384		if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DACL_COUNT(zfsvfs),
385		    &acl_count, sizeof (acl_count))) != 0)
386			return (error);
387		*aclcount = acl_count;
388	} else {
389		if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zfsvfs),
390		    aclphys, sizeof (*aclphys))) != 0)
391			return (error);
392
393		if (aclphys->z_acl_version == ZFS_ACL_VERSION_INITIAL) {
394			*aclsize = ZFS_ACL_SIZE(aclphys->z_acl_size);
395			*aclcount = aclphys->z_acl_size;
396		} else {
397			*aclsize = aclphys->z_acl_size;
398			*aclcount = aclphys->z_acl_count;
399		}
400	}
401	return (0);
402}
403
404int
405zfs_znode_acl_version(znode_t *zp)
406{
407	zfs_acl_phys_t acl_phys;
408
409	if (zp->z_is_sa)
410		return (ZFS_ACL_VERSION_FUID);
411	else {
412		int error;
413
414		/*
415		 * Need to deal with a potential
416		 * race where zfs_sa_upgrade could cause
417		 * z_isa_sa to change.
418		 *
419		 * If the lookup fails then the state of z_is_sa should have
420		 * changed.
421		 */
422		if ((error = sa_lookup(zp->z_sa_hdl,
423		    SA_ZPL_ZNODE_ACL(zp->z_zfsvfs),
424		    &acl_phys, sizeof (acl_phys))) == 0)
425			return (acl_phys.z_acl_version);
426		else {
427			/*
428			 * After upgrade SA_ZPL_ZNODE_ACL should have
429			 * been removed.
430			 */
431			VERIFY(zp->z_is_sa && error == ENOENT);
432			return (ZFS_ACL_VERSION_FUID);
433		}
434	}
435}
436
437static int
438zfs_acl_version(int version)
439{
440	if (version < ZPL_VERSION_FUID)
441		return (ZFS_ACL_VERSION_INITIAL);
442	else
443		return (ZFS_ACL_VERSION_FUID);
444}
445
446static int
447zfs_acl_version_zp(znode_t *zp)
448{
449	return (zfs_acl_version(zp->z_zfsvfs->z_version));
450}
451
452zfs_acl_t *
453zfs_acl_alloc(int vers)
454{
455	zfs_acl_t *aclp;
456
457	aclp = kmem_zalloc(sizeof (zfs_acl_t), KM_SLEEP);
458	list_create(&aclp->z_acl, sizeof (zfs_acl_node_t),
459	    offsetof(zfs_acl_node_t, z_next));
460	aclp->z_version = vers;
461	if (vers == ZFS_ACL_VERSION_FUID)
462		aclp->z_ops = zfs_acl_fuid_ops;
463	else
464		aclp->z_ops = zfs_acl_v0_ops;
465	return (aclp);
466}
467
468zfs_acl_node_t *
469zfs_acl_node_alloc(size_t bytes)
470{
471	zfs_acl_node_t *aclnode;
472
473	aclnode = kmem_zalloc(sizeof (zfs_acl_node_t), KM_SLEEP);
474	if (bytes) {
475		aclnode->z_acldata = kmem_alloc(bytes, KM_SLEEP);
476		aclnode->z_allocdata = aclnode->z_acldata;
477		aclnode->z_allocsize = bytes;
478		aclnode->z_size = bytes;
479	}
480
481	return (aclnode);
482}
483
484static void
485zfs_acl_node_free(zfs_acl_node_t *aclnode)
486{
487	if (aclnode->z_allocsize)
488		kmem_free(aclnode->z_allocdata, aclnode->z_allocsize);
489	kmem_free(aclnode, sizeof (zfs_acl_node_t));
490}
491
492static void
493zfs_acl_release_nodes(zfs_acl_t *aclp)
494{
495	zfs_acl_node_t *aclnode;
496
497	while (aclnode = list_head(&aclp->z_acl)) {
498		list_remove(&aclp->z_acl, aclnode);
499		zfs_acl_node_free(aclnode);
500	}
501	aclp->z_acl_count = 0;
502	aclp->z_acl_bytes = 0;
503}
504
505void
506zfs_acl_free(zfs_acl_t *aclp)
507{
508	zfs_acl_release_nodes(aclp);
509	list_destroy(&aclp->z_acl);
510	kmem_free(aclp, sizeof (zfs_acl_t));
511}
512
513static boolean_t
514zfs_acl_valid_ace_type(uint_t type, uint_t flags)
515{
516	uint16_t entry_type;
517
518	switch (type) {
519	case ALLOW:
520	case DENY:
521	case ACE_SYSTEM_AUDIT_ACE_TYPE:
522	case ACE_SYSTEM_ALARM_ACE_TYPE:
523		entry_type = flags & ACE_TYPE_FLAGS;
524		return (entry_type == ACE_OWNER ||
525		    entry_type == OWNING_GROUP ||
526		    entry_type == ACE_EVERYONE || entry_type == 0 ||
527		    entry_type == ACE_IDENTIFIER_GROUP);
528	default:
529		if (type >= MIN_ACE_TYPE && type <= MAX_ACE_TYPE)
530			return (B_TRUE);
531	}
532	return (B_FALSE);
533}
534
535static boolean_t
536zfs_ace_valid(vtype_t obj_type, zfs_acl_t *aclp, uint16_t type, uint16_t iflags)
537{
538	/*
539	 * first check type of entry
540	 */
541
542	if (!zfs_acl_valid_ace_type(type, iflags))
543		return (B_FALSE);
544
545	switch (type) {
546	case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
547	case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
548	case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
549	case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
550		if (aclp->z_version < ZFS_ACL_VERSION_FUID)
551			return (B_FALSE);
552		aclp->z_hints |= ZFS_ACL_OBJ_ACE;
553	}
554
555	/*
556	 * next check inheritance level flags
557	 */
558
559	if (obj_type == VDIR &&
560	    (iflags & (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE)))
561		aclp->z_hints |= ZFS_INHERIT_ACE;
562
563	if (iflags & (ACE_INHERIT_ONLY_ACE|ACE_NO_PROPAGATE_INHERIT_ACE)) {
564		if ((iflags & (ACE_FILE_INHERIT_ACE|
565		    ACE_DIRECTORY_INHERIT_ACE)) == 0) {
566			return (B_FALSE);
567		}
568	}
569
570	return (B_TRUE);
571}
572
573static void *
574zfs_acl_next_ace(zfs_acl_t *aclp, void *start, uint64_t *who,
575    uint32_t *access_mask, uint16_t *iflags, uint16_t *type)
576{
577	zfs_acl_node_t *aclnode;
578
579	ASSERT(aclp);
580
581	if (start == NULL) {
582		aclnode = list_head(&aclp->z_acl);
583		if (aclnode == NULL)
584			return (NULL);
585
586		aclp->z_next_ace = aclnode->z_acldata;
587		aclp->z_curr_node = aclnode;
588		aclnode->z_ace_idx = 0;
589	}
590
591	aclnode = aclp->z_curr_node;
592
593	if (aclnode == NULL)
594		return (NULL);
595
596	if (aclnode->z_ace_idx >= aclnode->z_ace_count) {
597		aclnode = list_next(&aclp->z_acl, aclnode);
598		if (aclnode == NULL)
599			return (NULL);
600		else {
601			aclp->z_curr_node = aclnode;
602			aclnode->z_ace_idx = 0;
603			aclp->z_next_ace = aclnode->z_acldata;
604		}
605	}
606
607	if (aclnode->z_ace_idx < aclnode->z_ace_count) {
608		void *acep = aclp->z_next_ace;
609		size_t ace_size;
610
611		/*
612		 * Make sure we don't overstep our bounds
613		 */
614		ace_size = aclp->z_ops.ace_size(acep);
615
616		if (((caddr_t)acep + ace_size) >
617		    ((caddr_t)aclnode->z_acldata + aclnode->z_size)) {
618			return (NULL);
619		}
620
621		*iflags = aclp->z_ops.ace_flags_get(acep);
622		*type = aclp->z_ops.ace_type_get(acep);
623		*access_mask = aclp->z_ops.ace_mask_get(acep);
624		*who = aclp->z_ops.ace_who_get(acep);
625		aclp->z_next_ace = (caddr_t)aclp->z_next_ace + ace_size;
626		aclnode->z_ace_idx++;
627
628		return ((void *)acep);
629	}
630	return (NULL);
631}
632
633/*ARGSUSED*/
634static uint64_t
635zfs_ace_walk(void *datap, uint64_t cookie, int aclcnt,
636    uint16_t *flags, uint16_t *type, uint32_t *mask)
637{
638	zfs_acl_t *aclp = datap;
639	zfs_ace_hdr_t *acep = (zfs_ace_hdr_t *)(uintptr_t)cookie;
640	uint64_t who;
641
642	acep = zfs_acl_next_ace(aclp, acep, &who, mask,
643	    flags, type);
644	return ((uint64_t)(uintptr_t)acep);
645}
646
647static zfs_acl_node_t *
648zfs_acl_curr_node(zfs_acl_t *aclp)
649{
650	ASSERT(aclp->z_curr_node);
651	return (aclp->z_curr_node);
652}
653
654/*
655 * Copy ACE to internal ZFS format.
656 * While processing the ACL each ACE will be validated for correctness.
657 * ACE FUIDs will be created later.
658 */
659int
660zfs_copy_ace_2_fuid(zfsvfs_t *zfsvfs, vtype_t obj_type, zfs_acl_t *aclp,
661    void *datap, zfs_ace_t *z_acl, uint64_t aclcnt, size_t *size,
662    zfs_fuid_info_t **fuidp, cred_t *cr)
663{
664	int i;
665	uint16_t entry_type;
666	zfs_ace_t *aceptr = z_acl;
667	ace_t *acep = datap;
668	zfs_object_ace_t *zobjacep;
669	ace_object_t *aceobjp;
670
671	for (i = 0; i != aclcnt; i++) {
672		aceptr->z_hdr.z_access_mask = acep->a_access_mask;
673		aceptr->z_hdr.z_flags = acep->a_flags;
674		aceptr->z_hdr.z_type = acep->a_type;
675		entry_type = aceptr->z_hdr.z_flags & ACE_TYPE_FLAGS;
676		if (entry_type != ACE_OWNER && entry_type != OWNING_GROUP &&
677		    entry_type != ACE_EVERYONE) {
678			aceptr->z_fuid = zfs_fuid_create(zfsvfs, acep->a_who,
679			    cr, (entry_type == 0) ?
680			    ZFS_ACE_USER : ZFS_ACE_GROUP, fuidp);
681		}
682
683		/*
684		 * Make sure ACE is valid
685		 */
686		if (zfs_ace_valid(obj_type, aclp, aceptr->z_hdr.z_type,
687		    aceptr->z_hdr.z_flags) != B_TRUE)
688			return (SET_ERROR(EINVAL));
689
690		switch (acep->a_type) {
691		case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
692		case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
693		case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
694		case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
695			zobjacep = (zfs_object_ace_t *)aceptr;
696			aceobjp = (ace_object_t *)acep;
697
698			bcopy(aceobjp->a_obj_type, zobjacep->z_object_type,
699			    sizeof (aceobjp->a_obj_type));
700			bcopy(aceobjp->a_inherit_obj_type,
701			    zobjacep->z_inherit_type,
702			    sizeof (aceobjp->a_inherit_obj_type));
703			acep = (ace_t *)((caddr_t)acep + sizeof (ace_object_t));
704			break;
705		default:
706			acep = (ace_t *)((caddr_t)acep + sizeof (ace_t));
707		}
708
709		aceptr = (zfs_ace_t *)((caddr_t)aceptr +
710		    aclp->z_ops.ace_size(aceptr));
711	}
712
713	*size = (caddr_t)aceptr - (caddr_t)z_acl;
714
715	return (0);
716}
717
718/*
719 * Copy ZFS ACEs to fixed size ace_t layout
720 */
721static void
722zfs_copy_fuid_2_ace(zfsvfs_t *zfsvfs, zfs_acl_t *aclp, cred_t *cr,
723    void *datap, int filter)
724{
725	uint64_t who;
726	uint32_t access_mask;
727	uint16_t iflags, type;
728	zfs_ace_hdr_t *zacep = NULL;
729	ace_t *acep = datap;
730	ace_object_t *objacep;
731	zfs_object_ace_t *zobjacep;
732	size_t ace_size;
733	uint16_t entry_type;
734
735	while (zacep = zfs_acl_next_ace(aclp, zacep,
736	    &who, &access_mask, &iflags, &type)) {
737
738		switch (type) {
739		case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
740		case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
741		case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
742		case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
743			if (filter) {
744				continue;
745			}
746			zobjacep = (zfs_object_ace_t *)zacep;
747			objacep = (ace_object_t *)acep;
748			bcopy(zobjacep->z_object_type,
749			    objacep->a_obj_type,
750			    sizeof (zobjacep->z_object_type));
751			bcopy(zobjacep->z_inherit_type,
752			    objacep->a_inherit_obj_type,
753			    sizeof (zobjacep->z_inherit_type));
754			ace_size = sizeof (ace_object_t);
755			break;
756		default:
757			ace_size = sizeof (ace_t);
758			break;
759		}
760
761		entry_type = (iflags & ACE_TYPE_FLAGS);
762		if ((entry_type != ACE_OWNER &&
763		    entry_type != OWNING_GROUP &&
764		    entry_type != ACE_EVERYONE)) {
765			acep->a_who = zfs_fuid_map_id(zfsvfs, who,
766			    cr, (entry_type & ACE_IDENTIFIER_GROUP) ?
767			    ZFS_ACE_GROUP : ZFS_ACE_USER);
768		} else {
769			acep->a_who = (uid_t)(int64_t)who;
770		}
771		acep->a_access_mask = access_mask;
772		acep->a_flags = iflags;
773		acep->a_type = type;
774		acep = (ace_t *)((caddr_t)acep + ace_size);
775	}
776}
777
778static int
779zfs_copy_ace_2_oldace(vtype_t obj_type, zfs_acl_t *aclp, ace_t *acep,
780    zfs_oldace_t *z_acl, int aclcnt, size_t *size)
781{
782	int i;
783	zfs_oldace_t *aceptr = z_acl;
784
785	for (i = 0; i != aclcnt; i++, aceptr++) {
786		aceptr->z_access_mask = acep[i].a_access_mask;
787		aceptr->z_type = acep[i].a_type;
788		aceptr->z_flags = acep[i].a_flags;
789		aceptr->z_fuid = acep[i].a_who;
790		/*
791		 * Make sure ACE is valid
792		 */
793		if (zfs_ace_valid(obj_type, aclp, aceptr->z_type,
794		    aceptr->z_flags) != B_TRUE)
795			return (SET_ERROR(EINVAL));
796	}
797	*size = (caddr_t)aceptr - (caddr_t)z_acl;
798	return (0);
799}
800
801/*
802 * convert old ACL format to new
803 */
804void
805zfs_acl_xform(znode_t *zp, zfs_acl_t *aclp, cred_t *cr)
806{
807	zfs_oldace_t *oldaclp;
808	int i;
809	uint16_t type, iflags;
810	uint32_t access_mask;
811	uint64_t who;
812	void *cookie = NULL;
813	zfs_acl_node_t *newaclnode;
814
815	ASSERT(aclp->z_version == ZFS_ACL_VERSION_INITIAL);
816	/*
817	 * First create the ACE in a contiguous piece of memory
818	 * for zfs_copy_ace_2_fuid().
819	 *
820	 * We only convert an ACL once, so this won't happen
821	 * everytime.
822	 */
823	oldaclp = kmem_alloc(sizeof (zfs_oldace_t) * aclp->z_acl_count,
824	    KM_SLEEP);
825	i = 0;
826	while (cookie = zfs_acl_next_ace(aclp, cookie, &who,
827	    &access_mask, &iflags, &type)) {
828		oldaclp[i].z_flags = iflags;
829		oldaclp[i].z_type = type;
830		oldaclp[i].z_fuid = who;
831		oldaclp[i++].z_access_mask = access_mask;
832	}
833
834	newaclnode = zfs_acl_node_alloc(aclp->z_acl_count *
835	    sizeof (zfs_object_ace_t));
836	aclp->z_ops = zfs_acl_fuid_ops;
837	VERIFY(zfs_copy_ace_2_fuid(zp->z_zfsvfs, ZTOV(zp)->v_type, aclp,
838	    oldaclp, newaclnode->z_acldata, aclp->z_acl_count,
839	    &newaclnode->z_size, NULL, cr) == 0);
840	newaclnode->z_ace_count = aclp->z_acl_count;
841	aclp->z_version = ZFS_ACL_VERSION;
842	kmem_free(oldaclp, aclp->z_acl_count * sizeof (zfs_oldace_t));
843
844	/*
845	 * Release all previous ACL nodes
846	 */
847
848	zfs_acl_release_nodes(aclp);
849
850	list_insert_head(&aclp->z_acl, newaclnode);
851
852	aclp->z_acl_bytes = newaclnode->z_size;
853	aclp->z_acl_count = newaclnode->z_ace_count;
854
855}
856
857/*
858 * Convert unix access mask to v4 access mask
859 */
860static uint32_t
861zfs_unix_to_v4(uint32_t access_mask)
862{
863	uint32_t new_mask = 0;
864
865	if (access_mask & S_IXOTH)
866		new_mask |= ACE_EXECUTE;
867	if (access_mask & S_IWOTH)
868		new_mask |= ACE_WRITE_DATA;
869	if (access_mask & S_IROTH)
870		new_mask |= ACE_READ_DATA;
871	return (new_mask);
872}
873
874static void
875zfs_set_ace(zfs_acl_t *aclp, void *acep, uint32_t access_mask,
876    uint16_t access_type, uint64_t fuid, uint16_t entry_type)
877{
878	uint16_t type = entry_type & ACE_TYPE_FLAGS;
879
880	aclp->z_ops.ace_mask_set(acep, access_mask);
881	aclp->z_ops.ace_type_set(acep, access_type);
882	aclp->z_ops.ace_flags_set(acep, entry_type);
883	if ((type != ACE_OWNER && type != OWNING_GROUP &&
884	    type != ACE_EVERYONE))
885		aclp->z_ops.ace_who_set(acep, fuid);
886}
887
888/*
889 * Determine mode of file based on ACL.
890 */
891uint64_t
892zfs_mode_compute(uint64_t fmode, zfs_acl_t *aclp,
893    uint64_t *pflags, uint64_t fuid, uint64_t fgid)
894{
895	int		entry_type;
896	mode_t		mode;
897	mode_t		seen = 0;
898	zfs_ace_hdr_t	*acep = NULL;
899	uint64_t	who;
900	uint16_t	iflags, type;
901	uint32_t	access_mask;
902	boolean_t	an_exec_denied = B_FALSE;
903
904	mode = (fmode & (S_IFMT | S_ISUID | S_ISGID | S_ISVTX));
905
906	while (acep = zfs_acl_next_ace(aclp, acep, &who,
907	    &access_mask, &iflags, &type)) {
908
909		if (!zfs_acl_valid_ace_type(type, iflags))
910			continue;
911
912		entry_type = (iflags & ACE_TYPE_FLAGS);
913
914		/*
915		 * Skip over any inherit_only ACEs
916		 */
917		if (iflags & ACE_INHERIT_ONLY_ACE)
918			continue;
919
920		if (entry_type == ACE_OWNER || (entry_type == 0 &&
921		    who == fuid)) {
922			if ((access_mask & ACE_READ_DATA) &&
923			    (!(seen & S_IRUSR))) {
924				seen |= S_IRUSR;
925				if (type == ALLOW) {
926					mode |= S_IRUSR;
927				}
928			}
929			if ((access_mask & ACE_WRITE_DATA) &&
930			    (!(seen & S_IWUSR))) {
931				seen |= S_IWUSR;
932				if (type == ALLOW) {
933					mode |= S_IWUSR;
934				}
935			}
936			if ((access_mask & ACE_EXECUTE) &&
937			    (!(seen & S_IXUSR))) {
938				seen |= S_IXUSR;
939				if (type == ALLOW) {
940					mode |= S_IXUSR;
941				}
942			}
943		} else if (entry_type == OWNING_GROUP ||
944		    (entry_type == ACE_IDENTIFIER_GROUP && who == fgid)) {
945			if ((access_mask & ACE_READ_DATA) &&
946			    (!(seen & S_IRGRP))) {
947				seen |= S_IRGRP;
948				if (type == ALLOW) {
949					mode |= S_IRGRP;
950				}
951			}
952			if ((access_mask & ACE_WRITE_DATA) &&
953			    (!(seen & S_IWGRP))) {
954				seen |= S_IWGRP;
955				if (type == ALLOW) {
956					mode |= S_IWGRP;
957				}
958			}
959			if ((access_mask & ACE_EXECUTE) &&
960			    (!(seen & S_IXGRP))) {
961				seen |= S_IXGRP;
962				if (type == ALLOW) {
963					mode |= S_IXGRP;
964				}
965			}
966		} else if (entry_type == ACE_EVERYONE) {
967			if ((access_mask & ACE_READ_DATA)) {
968				if (!(seen & S_IRUSR)) {
969					seen |= S_IRUSR;
970					if (type == ALLOW) {
971						mode |= S_IRUSR;
972					}
973				}
974				if (!(seen & S_IRGRP)) {
975					seen |= S_IRGRP;
976					if (type == ALLOW) {
977						mode |= S_IRGRP;
978					}
979				}
980				if (!(seen & S_IROTH)) {
981					seen |= S_IROTH;
982					if (type == ALLOW) {
983						mode |= S_IROTH;
984					}
985				}
986			}
987			if ((access_mask & ACE_WRITE_DATA)) {
988				if (!(seen & S_IWUSR)) {
989					seen |= S_IWUSR;
990					if (type == ALLOW) {
991						mode |= S_IWUSR;
992					}
993				}
994				if (!(seen & S_IWGRP)) {
995					seen |= S_IWGRP;
996					if (type == ALLOW) {
997						mode |= S_IWGRP;
998					}
999				}
1000				if (!(seen & S_IWOTH)) {
1001					seen |= S_IWOTH;
1002					if (type == ALLOW) {
1003						mode |= S_IWOTH;
1004					}
1005				}
1006			}
1007			if ((access_mask & ACE_EXECUTE)) {
1008				if (!(seen & S_IXUSR)) {
1009					seen |= S_IXUSR;
1010					if (type == ALLOW) {
1011						mode |= S_IXUSR;
1012					}
1013				}
1014				if (!(seen & S_IXGRP)) {
1015					seen |= S_IXGRP;
1016					if (type == ALLOW) {
1017						mode |= S_IXGRP;
1018					}
1019				}
1020				if (!(seen & S_IXOTH)) {
1021					seen |= S_IXOTH;
1022					if (type == ALLOW) {
1023						mode |= S_IXOTH;
1024					}
1025				}
1026			}
1027		} else {
1028			/*
1029			 * Only care if this IDENTIFIER_GROUP or
1030			 * USER ACE denies execute access to someone,
1031			 * mode is not affected
1032			 */
1033			if ((access_mask & ACE_EXECUTE) && type == DENY)
1034				an_exec_denied = B_TRUE;
1035		}
1036	}
1037
1038	/*
1039	 * Failure to allow is effectively a deny, so execute permission
1040	 * is denied if it was never mentioned or if we explicitly
1041	 * weren't allowed it.
1042	 */
1043	if (!an_exec_denied &&
1044	    ((seen & ALL_MODE_EXECS) != ALL_MODE_EXECS ||
1045	    (mode & ALL_MODE_EXECS) != ALL_MODE_EXECS))
1046		an_exec_denied = B_TRUE;
1047
1048	if (an_exec_denied)
1049		*pflags &= ~ZFS_NO_EXECS_DENIED;
1050	else
1051		*pflags |= ZFS_NO_EXECS_DENIED;
1052
1053	return (mode);
1054}
1055
1056/*
1057 * Read an external acl object.  If the intent is to modify, always
1058 * create a new acl and leave any cached acl in place.
1059 */
1060int
1061zfs_acl_node_read(struct znode *zp, boolean_t have_lock, zfs_acl_t **aclpp,
1062    boolean_t will_modify)
1063{
1064	zfs_acl_t	*aclp;
1065	int		aclsize;
1066	int		acl_count;
1067	zfs_acl_node_t	*aclnode;
1068	zfs_acl_phys_t	znode_acl;
1069	int		version;
1070	int		error;
1071	boolean_t	drop_lock = B_FALSE;
1072
1073	ASSERT(MUTEX_HELD(&zp->z_acl_lock));
1074
1075	if (zp->z_acl_cached && !will_modify) {
1076		*aclpp = zp->z_acl_cached;
1077		return (0);
1078	}
1079
1080	/*
1081	 * close race where znode could be upgrade while trying to
1082	 * read the znode attributes.
1083	 *
1084	 * But this could only happen if the file isn't already an SA
1085	 * znode
1086	 */
1087	if (!zp->z_is_sa && !have_lock) {
1088		mutex_enter(&zp->z_lock);
1089		drop_lock = B_TRUE;
1090	}
1091	version = zfs_znode_acl_version(zp);
1092
1093	if ((error = zfs_acl_znode_info(zp, &aclsize,
1094	    &acl_count, &znode_acl)) != 0) {
1095		goto done;
1096	}
1097
1098	aclp = zfs_acl_alloc(version);
1099
1100	aclp->z_acl_count = acl_count;
1101	aclp->z_acl_bytes = aclsize;
1102
1103	aclnode = zfs_acl_node_alloc(aclsize);
1104	aclnode->z_ace_count = aclp->z_acl_count;
1105	aclnode->z_size = aclsize;
1106
1107	if (!zp->z_is_sa) {
1108		if (znode_acl.z_acl_extern_obj) {
1109			error = dmu_read(zp->z_zfsvfs->z_os,
1110			    znode_acl.z_acl_extern_obj, 0, aclnode->z_size,
1111			    aclnode->z_acldata, DMU_READ_PREFETCH);
1112		} else {
1113			bcopy(znode_acl.z_ace_data, aclnode->z_acldata,
1114			    aclnode->z_size);
1115		}
1116	} else {
1117		error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DACL_ACES(zp->z_zfsvfs),
1118		    aclnode->z_acldata, aclnode->z_size);
1119	}
1120
1121	if (error != 0) {
1122		zfs_acl_free(aclp);
1123		zfs_acl_node_free(aclnode);
1124		/* convert checksum errors into IO errors */
1125		if (error == ECKSUM)
1126			error = SET_ERROR(EIO);
1127		goto done;
1128	}
1129
1130	list_insert_head(&aclp->z_acl, aclnode);
1131
1132	*aclpp = aclp;
1133	if (!will_modify)
1134		zp->z_acl_cached = aclp;
1135done:
1136	if (drop_lock)
1137		mutex_exit(&zp->z_lock);
1138	return (error);
1139}
1140
1141/*ARGSUSED*/
1142void
1143zfs_acl_data_locator(void **dataptr, uint32_t *length, uint32_t buflen,
1144    boolean_t start, void *userdata)
1145{
1146	zfs_acl_locator_cb_t *cb = (zfs_acl_locator_cb_t *)userdata;
1147
1148	if (start) {
1149		cb->cb_acl_node = list_head(&cb->cb_aclp->z_acl);
1150	} else {
1151		cb->cb_acl_node = list_next(&cb->cb_aclp->z_acl,
1152		    cb->cb_acl_node);
1153	}
1154	*dataptr = cb->cb_acl_node->z_acldata;
1155	*length = cb->cb_acl_node->z_size;
1156}
1157
1158int
1159zfs_acl_chown_setattr(znode_t *zp)
1160{
1161	int error;
1162	zfs_acl_t *aclp;
1163
1164	ASSERT(MUTEX_HELD(&zp->z_lock));
1165	ASSERT(MUTEX_HELD(&zp->z_acl_lock));
1166
1167	if ((error = zfs_acl_node_read(zp, B_TRUE, &aclp, B_FALSE)) == 0)
1168		zp->z_mode = zfs_mode_compute(zp->z_mode, aclp,
1169		    &zp->z_pflags, zp->z_uid, zp->z_gid);
1170	return (error);
1171}
1172
1173/*
1174 * common code for setting ACLs.
1175 *
1176 * This function is called from zfs_mode_update, zfs_perm_init, and zfs_setacl.
1177 * zfs_setacl passes a non-NULL inherit pointer (ihp) to indicate that it's
1178 * already checked the acl and knows whether to inherit.
1179 */
1180int
1181zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx)
1182{
1183	int			error;
1184	zfsvfs_t		*zfsvfs = zp->z_zfsvfs;
1185	dmu_object_type_t	otype;
1186	zfs_acl_locator_cb_t	locate = { 0 };
1187	uint64_t		mode;
1188	sa_bulk_attr_t		bulk[5];
1189	uint64_t		ctime[2];
1190	int			count = 0;
1191	zfs_acl_phys_t		acl_phys;
1192
1193	mode = zp->z_mode;
1194
1195	mode = zfs_mode_compute(mode, aclp, &zp->z_pflags,
1196	    zp->z_uid, zp->z_gid);
1197
1198	zp->z_mode = mode;
1199	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
1200	    &mode, sizeof (mode));
1201	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
1202	    &zp->z_pflags, sizeof (zp->z_pflags));
1203	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
1204	    &ctime, sizeof (ctime));
1205
1206	if (zp->z_acl_cached) {
1207		zfs_acl_free(zp->z_acl_cached);
1208		zp->z_acl_cached = NULL;
1209	}
1210
1211	/*
1212	 * Upgrade needed?
1213	 */
1214	if (!zfsvfs->z_use_fuids) {
1215		otype = DMU_OT_OLDACL;
1216	} else {
1217		if ((aclp->z_version == ZFS_ACL_VERSION_INITIAL) &&
1218		    (zfsvfs->z_version >= ZPL_VERSION_FUID))
1219			zfs_acl_xform(zp, aclp, cr);
1220		ASSERT(aclp->z_version >= ZFS_ACL_VERSION_FUID);
1221		otype = DMU_OT_ACL;
1222	}
1223
1224	/*
1225	 * Arrgh, we have to handle old on disk format
1226	 * as well as newer (preferred) SA format.
1227	 */
1228
1229	if (zp->z_is_sa) { /* the easy case, just update the ACL attribute */
1230		locate.cb_aclp = aclp;
1231		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DACL_ACES(zfsvfs),
1232		    zfs_acl_data_locator, &locate, aclp->z_acl_bytes);
1233		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DACL_COUNT(zfsvfs),
1234		    NULL, &aclp->z_acl_count, sizeof (uint64_t));
1235	} else { /* Painful legacy way */
1236		zfs_acl_node_t *aclnode;
1237		uint64_t off = 0;
1238		uint64_t aoid;
1239
1240		if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zfsvfs),
1241		    &acl_phys, sizeof (acl_phys))) != 0)
1242			return (error);
1243
1244		aoid = acl_phys.z_acl_extern_obj;
1245
1246		if (aclp->z_acl_bytes > ZFS_ACE_SPACE) {
1247			/*
1248			 * If ACL was previously external and we are now
1249			 * converting to new ACL format then release old
1250			 * ACL object and create a new one.
1251			 */
1252			if (aoid &&
1253			    aclp->z_version != acl_phys.z_acl_version) {
1254				error = dmu_object_free(zfsvfs->z_os, aoid, tx);
1255				if (error)
1256					return (error);
1257				aoid = 0;
1258			}
1259			if (aoid == 0) {
1260				aoid = dmu_object_alloc(zfsvfs->z_os,
1261				    otype, aclp->z_acl_bytes,
1262				    otype == DMU_OT_ACL ?
1263				    DMU_OT_SYSACL : DMU_OT_NONE,
1264				    otype == DMU_OT_ACL ?
1265				    DN_OLD_MAX_BONUSLEN : 0, tx);
1266			} else {
1267				(void) dmu_object_set_blocksize(zfsvfs->z_os,
1268				    aoid, aclp->z_acl_bytes, 0, tx);
1269			}
1270			acl_phys.z_acl_extern_obj = aoid;
1271			for (aclnode = list_head(&aclp->z_acl); aclnode;
1272			    aclnode = list_next(&aclp->z_acl, aclnode)) {
1273				if (aclnode->z_ace_count == 0)
1274					continue;
1275				dmu_write(zfsvfs->z_os, aoid, off,
1276				    aclnode->z_size, aclnode->z_acldata, tx);
1277				off += aclnode->z_size;
1278			}
1279		} else {
1280			void *start = acl_phys.z_ace_data;
1281			/*
1282			 * Migrating back embedded?
1283			 */
1284			if (acl_phys.z_acl_extern_obj) {
1285				error = dmu_object_free(zfsvfs->z_os,
1286				    acl_phys.z_acl_extern_obj, tx);
1287				if (error)
1288					return (error);
1289				acl_phys.z_acl_extern_obj = 0;
1290			}
1291
1292			for (aclnode = list_head(&aclp->z_acl); aclnode;
1293			    aclnode = list_next(&aclp->z_acl, aclnode)) {
1294				if (aclnode->z_ace_count == 0)
1295					continue;
1296				bcopy(aclnode->z_acldata, start,
1297				    aclnode->z_size);
1298				start = (caddr_t)start + aclnode->z_size;
1299			}
1300		}
1301		/*
1302		 * If Old version then swap count/bytes to match old
1303		 * layout of znode_acl_phys_t.
1304		 */
1305		if (aclp->z_version == ZFS_ACL_VERSION_INITIAL) {
1306			acl_phys.z_acl_size = aclp->z_acl_count;
1307			acl_phys.z_acl_count = aclp->z_acl_bytes;
1308		} else {
1309			acl_phys.z_acl_size = aclp->z_acl_bytes;
1310			acl_phys.z_acl_count = aclp->z_acl_count;
1311		}
1312		acl_phys.z_acl_version = aclp->z_version;
1313
1314		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ZNODE_ACL(zfsvfs), NULL,
1315		    &acl_phys, sizeof (acl_phys));
1316	}
1317
1318	/*
1319	 * Replace ACL wide bits, but first clear them.
1320	 */
1321	zp->z_pflags &= ~ZFS_ACL_WIDE_FLAGS;
1322
1323	zp->z_pflags |= aclp->z_hints;
1324
1325	if (ace_trivial_common(aclp, 0, zfs_ace_walk) == 0)
1326		zp->z_pflags |= ZFS_ACL_TRIVIAL;
1327
1328	zfs_tstamp_update_setup(zp, STATE_CHANGED, NULL, ctime, B_TRUE);
1329	return (sa_bulk_update(zp->z_sa_hdl, bulk, count, tx));
1330}
1331
1332static void
1333zfs_acl_chmod(vtype_t vtype, uint64_t mode, boolean_t split, boolean_t trim,
1334    zfs_acl_t *aclp)
1335{
1336	void		*acep = NULL;
1337	uint64_t	who;
1338	int		new_count, new_bytes;
1339	int		ace_size;
1340	int		entry_type;
1341	uint16_t	iflags, type;
1342	uint32_t	access_mask;
1343	zfs_acl_node_t	*newnode;
1344	size_t		abstract_size = aclp->z_ops.ace_abstract_size();
1345	void		*zacep;
1346	boolean_t	isdir;
1347	trivial_acl_t	masks;
1348
1349	new_count = new_bytes = 0;
1350
1351	isdir = (vtype == VDIR);
1352
1353	acl_trivial_access_masks((mode_t)mode, isdir, &masks);
1354
1355	newnode = zfs_acl_node_alloc((abstract_size * 6) + aclp->z_acl_bytes);
1356
1357	zacep = newnode->z_acldata;
1358	if (masks.allow0) {
1359		zfs_set_ace(aclp, zacep, masks.allow0, ALLOW, -1, ACE_OWNER);
1360		zacep = (void *)((uintptr_t)zacep + abstract_size);
1361		new_count++;
1362		new_bytes += abstract_size;
1363	}
1364	if (masks.deny1) {
1365		zfs_set_ace(aclp, zacep, masks.deny1, DENY, -1, ACE_OWNER);
1366		zacep = (void *)((uintptr_t)zacep + abstract_size);
1367		new_count++;
1368		new_bytes += abstract_size;
1369	}
1370	if (masks.deny2) {
1371		zfs_set_ace(aclp, zacep, masks.deny2, DENY, -1, OWNING_GROUP);
1372		zacep = (void *)((uintptr_t)zacep + abstract_size);
1373		new_count++;
1374		new_bytes += abstract_size;
1375	}
1376
1377	while (acep = zfs_acl_next_ace(aclp, acep, &who, &access_mask,
1378	    &iflags, &type)) {
1379		entry_type = (iflags & ACE_TYPE_FLAGS);
1380		/*
1381		 * ACEs used to represent the file mode may be divided
1382		 * into an equivalent pair of inherit-only and regular
1383		 * ACEs, if they are inheritable.
1384		 * Skip regular ACEs, which are replaced by the new mode.
1385		 */
1386		if (split && (entry_type == ACE_OWNER ||
1387		    entry_type == OWNING_GROUP ||
1388		    entry_type == ACE_EVERYONE)) {
1389			if (!isdir || !(iflags &
1390			    (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE)))
1391				continue;
1392			/*
1393			 * We preserve owner@, group@, or @everyone
1394			 * permissions, if they are inheritable, by
1395			 * copying them to inherit_only ACEs. This
1396			 * prevents inheritable permissions from being
1397			 * altered along with the file mode.
1398			 */
1399			iflags |= ACE_INHERIT_ONLY_ACE;
1400		}
1401
1402		/*
1403		 * If this ACL has any inheritable ACEs, mark that in
1404		 * the hints (which are later masked into the pflags)
1405		 * so create knows to do inheritance.
1406		 */
1407		if (isdir && (iflags &
1408		    (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE)))
1409			aclp->z_hints |= ZFS_INHERIT_ACE;
1410
1411		if ((type != ALLOW && type != DENY) ||
1412		    (iflags & ACE_INHERIT_ONLY_ACE)) {
1413			switch (type) {
1414			case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
1415			case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
1416			case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
1417			case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
1418				aclp->z_hints |= ZFS_ACL_OBJ_ACE;
1419				break;
1420			}
1421		} else {
1422			/*
1423			 * Limit permissions granted by ACEs to be no greater
1424			 * than permissions of the requested group mode.
1425			 * Applies when the "aclmode" property is set to
1426			 * "groupmask".
1427			 */
1428			if ((type == ALLOW) && trim)
1429				access_mask &= masks.group;
1430		}
1431		zfs_set_ace(aclp, zacep, access_mask, type, who, iflags);
1432		ace_size = aclp->z_ops.ace_size(acep);
1433		zacep = (void *)((uintptr_t)zacep + ace_size);
1434		new_count++;
1435		new_bytes += ace_size;
1436	}
1437	zfs_set_ace(aclp, zacep, masks.owner, ALLOW, -1, ACE_OWNER);
1438	zacep = (void *)((uintptr_t)zacep + abstract_size);
1439	zfs_set_ace(aclp, zacep, masks.group, ALLOW, -1, OWNING_GROUP);
1440	zacep = (void *)((uintptr_t)zacep + abstract_size);
1441	zfs_set_ace(aclp, zacep, masks.everyone, ALLOW, -1, ACE_EVERYONE);
1442
1443	new_count += 3;
1444	new_bytes += abstract_size * 3;
1445	zfs_acl_release_nodes(aclp);
1446	aclp->z_acl_count = new_count;
1447	aclp->z_acl_bytes = new_bytes;
1448	newnode->z_ace_count = new_count;
1449	newnode->z_size = new_bytes;
1450	list_insert_tail(&aclp->z_acl, newnode);
1451}
1452
1453int
1454zfs_acl_chmod_setattr(znode_t *zp, zfs_acl_t **aclp, uint64_t mode)
1455{
1456	int error = 0;
1457
1458	mutex_enter(&zp->z_acl_lock);
1459	mutex_enter(&zp->z_lock);
1460	if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_DISCARD)
1461		*aclp = zfs_acl_alloc(zfs_acl_version_zp(zp));
1462	else
1463		error = zfs_acl_node_read(zp, B_TRUE, aclp, B_TRUE);
1464
1465	if (error == 0) {
1466		(*aclp)->z_hints = zp->z_pflags & V4_ACL_WIDE_FLAGS;
1467		zfs_acl_chmod(ZTOV(zp)->v_type, mode, B_TRUE,
1468		    (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_GROUPMASK), *aclp);
1469	}
1470	mutex_exit(&zp->z_lock);
1471	mutex_exit(&zp->z_acl_lock);
1472
1473	return (error);
1474}
1475
1476/*
1477 * Should ACE be inherited?
1478 */
1479static int
1480zfs_ace_can_use(vtype_t vtype, uint16_t acep_flags)
1481{
1482	int	iflags = (acep_flags & 0xf);
1483
1484	if ((vtype == VDIR) && (iflags & ACE_DIRECTORY_INHERIT_ACE))
1485		return (1);
1486	else if (iflags & ACE_FILE_INHERIT_ACE)
1487		return (!((vtype == VDIR) &&
1488		    (iflags & ACE_NO_PROPAGATE_INHERIT_ACE)));
1489	return (0);
1490}
1491
1492/*
1493 * inherit inheritable ACEs from parent
1494 */
1495static zfs_acl_t *
1496zfs_acl_inherit(zfsvfs_t *zfsvfs, vtype_t vtype, zfs_acl_t *paclp,
1497    uint64_t mode, boolean_t *need_chmod)
1498{
1499	void		*pacep = NULL;
1500	void		*acep;
1501	zfs_acl_node_t  *aclnode;
1502	zfs_acl_t	*aclp = NULL;
1503	uint64_t	who;
1504	uint32_t	access_mask;
1505	uint16_t	iflags, newflags, type;
1506	size_t		ace_size;
1507	void		*data1, *data2;
1508	size_t		data1sz, data2sz;
1509	uint_t		aclinherit;
1510	boolean_t	isdir = (vtype == VDIR);
1511	boolean_t	isreg = (vtype == VREG);
1512
1513	*need_chmod = B_TRUE;
1514
1515	aclp = zfs_acl_alloc(paclp->z_version);
1516	aclinherit = zfsvfs->z_acl_inherit;
1517	if (aclinherit == ZFS_ACL_DISCARD || vtype == VLNK)
1518		return (aclp);
1519
1520	while (pacep = zfs_acl_next_ace(paclp, pacep, &who,
1521	    &access_mask, &iflags, &type)) {
1522
1523		/*
1524		 * don't inherit bogus ACEs
1525		 */
1526		if (!zfs_acl_valid_ace_type(type, iflags))
1527			continue;
1528
1529		/*
1530		 * Check if ACE is inheritable by this vnode
1531		 */
1532		if ((aclinherit == ZFS_ACL_NOALLOW && type == ALLOW) ||
1533		    !zfs_ace_can_use(vtype, iflags))
1534			continue;
1535
1536		/*
1537		 * If owner@, group@, or everyone@ inheritable
1538		 * then zfs_acl_chmod() isn't needed.
1539		 */
1540		if ((aclinherit == ZFS_ACL_PASSTHROUGH ||
1541		    aclinherit == ZFS_ACL_PASSTHROUGH_X) &&
1542		    ((iflags & (ACE_OWNER|ACE_EVERYONE)) ||
1543		    ((iflags & OWNING_GROUP) == OWNING_GROUP)) &&
1544		    (isreg || (isdir && (iflags & ACE_DIRECTORY_INHERIT_ACE))))
1545			*need_chmod = B_FALSE;
1546
1547		/*
1548		 * Strip inherited execute permission from file if
1549		 * not in mode
1550		 */
1551		if (aclinherit == ZFS_ACL_PASSTHROUGH_X && type == ALLOW &&
1552		    !isdir && ((mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0)) {
1553			access_mask &= ~ACE_EXECUTE;
1554		}
1555
1556		/*
1557		 * Strip write_acl and write_owner from permissions
1558		 * when inheriting an ACE
1559		 */
1560		if (aclinherit == ZFS_ACL_RESTRICTED && type == ALLOW) {
1561			access_mask &= ~RESTRICTED_CLEAR;
1562		}
1563
1564		ace_size = aclp->z_ops.ace_size(pacep);
1565		aclnode = zfs_acl_node_alloc(ace_size);
1566		list_insert_tail(&aclp->z_acl, aclnode);
1567		acep = aclnode->z_acldata;
1568
1569		zfs_set_ace(aclp, acep, access_mask, type,
1570		    who, iflags|ACE_INHERITED_ACE);
1571
1572		/*
1573		 * Copy special opaque data if any
1574		 */
1575		if ((data1sz = paclp->z_ops.ace_data(pacep, &data1)) != 0) {
1576			VERIFY((data2sz = aclp->z_ops.ace_data(acep,
1577			    &data2)) == data1sz);
1578			bcopy(data1, data2, data2sz);
1579		}
1580
1581		aclp->z_acl_count++;
1582		aclnode->z_ace_count++;
1583		aclp->z_acl_bytes += aclnode->z_size;
1584		newflags = aclp->z_ops.ace_flags_get(acep);
1585
1586		/*
1587		 * If ACE is not to be inherited further, or if the vnode is
1588		 * not a directory, remove all inheritance flags
1589		 */
1590		if (!isdir || (iflags & ACE_NO_PROPAGATE_INHERIT_ACE)) {
1591			newflags &= ~ALL_INHERIT;
1592			aclp->z_ops.ace_flags_set(acep,
1593			    newflags|ACE_INHERITED_ACE);
1594			continue;
1595		}
1596
1597		/*
1598		 * This directory has an inheritable ACE
1599		 */
1600		aclp->z_hints |= ZFS_INHERIT_ACE;
1601
1602		/*
1603		 * If only FILE_INHERIT is set then turn on
1604		 * inherit_only
1605		 */
1606		if ((iflags & (ACE_FILE_INHERIT_ACE |
1607		    ACE_DIRECTORY_INHERIT_ACE)) == ACE_FILE_INHERIT_ACE) {
1608			newflags |= ACE_INHERIT_ONLY_ACE;
1609			aclp->z_ops.ace_flags_set(acep,
1610			    newflags|ACE_INHERITED_ACE);
1611		} else {
1612			newflags &= ~ACE_INHERIT_ONLY_ACE;
1613			aclp->z_ops.ace_flags_set(acep,
1614			    newflags|ACE_INHERITED_ACE);
1615		}
1616	}
1617
1618	return (aclp);
1619}
1620
1621/*
1622 * Create file system object initial permissions
1623 * including inheritable ACEs.
1624 * Also, create FUIDs for owner and group.
1625 */
1626int
1627zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr,
1628    vsecattr_t *vsecp, zfs_acl_ids_t *acl_ids)
1629{
1630	int		error;
1631	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
1632	zfs_acl_t	*paclp;
1633	gid_t		gid;
1634	boolean_t	need_chmod = B_TRUE;
1635	boolean_t	trim = B_FALSE;
1636	boolean_t	inherited = B_FALSE;
1637
1638	bzero(acl_ids, sizeof (zfs_acl_ids_t));
1639	acl_ids->z_mode = MAKEIMODE(vap->va_type, vap->va_mode);
1640
1641	if (vsecp)
1642		if ((error = zfs_vsec_2_aclp(zfsvfs, vap->va_type, vsecp, cr,
1643		    &acl_ids->z_fuidp, &acl_ids->z_aclp)) != 0)
1644			return (error);
1645	/*
1646	 * Determine uid and gid.
1647	 */
1648	if ((flag & IS_ROOT_NODE) || zfsvfs->z_replay ||
1649	    ((flag & IS_XATTR) && (vap->va_type == VDIR))) {
1650		acl_ids->z_fuid = zfs_fuid_create(zfsvfs,
1651		    (uint64_t)vap->va_uid, cr,
1652		    ZFS_OWNER, &acl_ids->z_fuidp);
1653		acl_ids->z_fgid = zfs_fuid_create(zfsvfs,
1654		    (uint64_t)vap->va_gid, cr,
1655		    ZFS_GROUP, &acl_ids->z_fuidp);
1656		gid = vap->va_gid;
1657	} else {
1658		acl_ids->z_fuid = zfs_fuid_create_cred(zfsvfs, ZFS_OWNER,
1659		    cr, &acl_ids->z_fuidp);
1660		acl_ids->z_fgid = 0;
1661		if (vap->va_mask & AT_GID)  {
1662			acl_ids->z_fgid = zfs_fuid_create(zfsvfs,
1663			    (uint64_t)vap->va_gid,
1664			    cr, ZFS_GROUP, &acl_ids->z_fuidp);
1665			gid = vap->va_gid;
1666			if (acl_ids->z_fgid != dzp->z_gid &&
1667			    !groupmember(vap->va_gid, cr) &&
1668			    secpolicy_vnode_create_gid(cr) != 0)
1669				acl_ids->z_fgid = 0;
1670		}
1671		if (acl_ids->z_fgid == 0) {
1672			if (dzp->z_mode & S_ISGID) {
1673				char		*domain;
1674				uint32_t	rid;
1675
1676				acl_ids->z_fgid = dzp->z_gid;
1677				gid = zfs_fuid_map_id(zfsvfs, acl_ids->z_fgid,
1678				    cr, ZFS_GROUP);
1679
1680				if (zfsvfs->z_use_fuids &&
1681				    IS_EPHEMERAL(acl_ids->z_fgid)) {
1682					domain = zfs_fuid_idx_domain(
1683					    &zfsvfs->z_fuid_idx,
1684					    FUID_INDEX(acl_ids->z_fgid));
1685					rid = FUID_RID(acl_ids->z_fgid);
1686					zfs_fuid_node_add(&acl_ids->z_fuidp,
1687					    domain, rid,
1688					    FUID_INDEX(acl_ids->z_fgid),
1689					    acl_ids->z_fgid, ZFS_GROUP);
1690				}
1691			} else {
1692				acl_ids->z_fgid = zfs_fuid_create_cred(zfsvfs,
1693				    ZFS_GROUP, cr, &acl_ids->z_fuidp);
1694				gid = crgetgid(cr);
1695			}
1696		}
1697	}
1698
1699	/*
1700	 * If we're creating a directory, and the parent directory has the
1701	 * set-GID bit set, set in on the new directory.
1702	 * Otherwise, if the user is neither privileged nor a member of the
1703	 * file's new group, clear the file's set-GID bit.
1704	 */
1705
1706	if (!(flag & IS_ROOT_NODE) && (dzp->z_mode & S_ISGID) &&
1707	    (vap->va_type == VDIR)) {
1708		acl_ids->z_mode |= S_ISGID;
1709	} else {
1710		if ((acl_ids->z_mode & S_ISGID) &&
1711		    secpolicy_vnode_setids_setgids(cr, gid) != 0)
1712			acl_ids->z_mode &= ~S_ISGID;
1713	}
1714
1715	if (acl_ids->z_aclp == NULL) {
1716		mutex_enter(&dzp->z_acl_lock);
1717		mutex_enter(&dzp->z_lock);
1718		if (!(flag & IS_ROOT_NODE) &&
1719		    (dzp->z_pflags & ZFS_INHERIT_ACE) &&
1720		    !(dzp->z_pflags & ZFS_XATTR)) {
1721			VERIFY(0 == zfs_acl_node_read(dzp, B_TRUE,
1722			    &paclp, B_FALSE));
1723			acl_ids->z_aclp = zfs_acl_inherit(zfsvfs,
1724			    vap->va_type, paclp, acl_ids->z_mode, &need_chmod);
1725			inherited = B_TRUE;
1726		} else {
1727			acl_ids->z_aclp =
1728			    zfs_acl_alloc(zfs_acl_version_zp(dzp));
1729			acl_ids->z_aclp->z_hints |= ZFS_ACL_TRIVIAL;
1730		}
1731		mutex_exit(&dzp->z_lock);
1732		mutex_exit(&dzp->z_acl_lock);
1733
1734		if (need_chmod) {
1735			if (vap->va_type == VDIR)
1736				acl_ids->z_aclp->z_hints |=
1737				    ZFS_ACL_AUTO_INHERIT;
1738
1739			if (zfsvfs->z_acl_mode == ZFS_ACL_GROUPMASK &&
1740			    zfsvfs->z_acl_inherit != ZFS_ACL_PASSTHROUGH &&
1741			    zfsvfs->z_acl_inherit != ZFS_ACL_PASSTHROUGH_X)
1742				trim = B_TRUE;
1743			zfs_acl_chmod(vap->va_type, acl_ids->z_mode, B_FALSE,
1744			    trim, acl_ids->z_aclp);
1745		}
1746	}
1747
1748	if (inherited || vsecp) {
1749		acl_ids->z_mode = zfs_mode_compute(acl_ids->z_mode,
1750		    acl_ids->z_aclp, &acl_ids->z_aclp->z_hints,
1751		    acl_ids->z_fuid, acl_ids->z_fgid);
1752		if (ace_trivial_common(acl_ids->z_aclp, 0, zfs_ace_walk) == 0)
1753			acl_ids->z_aclp->z_hints |= ZFS_ACL_TRIVIAL;
1754	}
1755
1756	return (0);
1757}
1758
1759/*
1760 * Free ACL and fuid_infop, but not the acl_ids structure
1761 */
1762void
1763zfs_acl_ids_free(zfs_acl_ids_t *acl_ids)
1764{
1765	if (acl_ids->z_aclp)
1766		zfs_acl_free(acl_ids->z_aclp);
1767	if (acl_ids->z_fuidp)
1768		zfs_fuid_info_free(acl_ids->z_fuidp);
1769	acl_ids->z_aclp = NULL;
1770	acl_ids->z_fuidp = NULL;
1771}
1772
1773boolean_t
1774zfs_acl_ids_overquota(zfsvfs_t *zv, zfs_acl_ids_t *acl_ids, uint64_t projid)
1775{
1776	return (zfs_id_overquota(zv, DMU_USERUSED_OBJECT, acl_ids->z_fuid) ||
1777	    zfs_id_overquota(zv, DMU_GROUPUSED_OBJECT, acl_ids->z_fgid) ||
1778	    (projid != ZFS_DEFAULT_PROJID && projid != ZFS_INVALID_PROJID &&
1779	    zfs_id_overquota(zv, DMU_PROJECTUSED_OBJECT, projid)));
1780}
1781
1782/*
1783 * Retrieve a file's ACL
1784 */
1785int
1786zfs_getacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
1787{
1788	zfs_acl_t	*aclp;
1789	ulong_t		mask;
1790	int		error;
1791	int		count = 0;
1792	int		largeace = 0;
1793
1794	mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT |
1795	    VSA_ACE_ACLFLAGS | VSA_ACE_ALLTYPES);
1796
1797	if (mask == 0)
1798		return (SET_ERROR(ENOSYS));
1799
1800	if (error = zfs_zaccess(zp, ACE_READ_ACL, 0, skipaclchk, cr))
1801		return (error);
1802
1803	mutex_enter(&zp->z_acl_lock);
1804
1805	error = zfs_acl_node_read(zp, B_FALSE, &aclp, B_FALSE);
1806	if (error != 0) {
1807		mutex_exit(&zp->z_acl_lock);
1808		return (error);
1809	}
1810
1811	/*
1812	 * Scan ACL to determine number of ACEs
1813	 */
1814	if ((zp->z_pflags & ZFS_ACL_OBJ_ACE) && !(mask & VSA_ACE_ALLTYPES)) {
1815		void *zacep = NULL;
1816		uint64_t who;
1817		uint32_t access_mask;
1818		uint16_t type, iflags;
1819
1820		while (zacep = zfs_acl_next_ace(aclp, zacep,
1821		    &who, &access_mask, &iflags, &type)) {
1822			switch (type) {
1823			case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
1824			case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
1825			case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
1826			case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
1827				largeace++;
1828				continue;
1829			default:
1830				count++;
1831			}
1832		}
1833		vsecp->vsa_aclcnt = count;
1834	} else
1835		count = (int)aclp->z_acl_count;
1836
1837	if (mask & VSA_ACECNT) {
1838		vsecp->vsa_aclcnt = count;
1839	}
1840
1841	if (mask & VSA_ACE) {
1842		size_t aclsz;
1843
1844		aclsz = count * sizeof (ace_t) +
1845		    sizeof (ace_object_t) * largeace;
1846
1847		vsecp->vsa_aclentp = kmem_alloc(aclsz, KM_SLEEP);
1848		vsecp->vsa_aclentsz = aclsz;
1849
1850		if (aclp->z_version == ZFS_ACL_VERSION_FUID)
1851			zfs_copy_fuid_2_ace(zp->z_zfsvfs, aclp, cr,
1852			    vsecp->vsa_aclentp, !(mask & VSA_ACE_ALLTYPES));
1853		else {
1854			zfs_acl_node_t *aclnode;
1855			void *start = vsecp->vsa_aclentp;
1856
1857			for (aclnode = list_head(&aclp->z_acl); aclnode;
1858			    aclnode = list_next(&aclp->z_acl, aclnode)) {
1859				bcopy(aclnode->z_acldata, start,
1860				    aclnode->z_size);
1861				start = (caddr_t)start + aclnode->z_size;
1862			}
1863			ASSERT((caddr_t)start - (caddr_t)vsecp->vsa_aclentp ==
1864			    aclp->z_acl_bytes);
1865		}
1866	}
1867	if (mask & VSA_ACE_ACLFLAGS) {
1868		vsecp->vsa_aclflags = 0;
1869		if (zp->z_pflags & ZFS_ACL_DEFAULTED)
1870			vsecp->vsa_aclflags |= ACL_DEFAULTED;
1871		if (zp->z_pflags & ZFS_ACL_PROTECTED)
1872			vsecp->vsa_aclflags |= ACL_PROTECTED;
1873		if (zp->z_pflags & ZFS_ACL_AUTO_INHERIT)
1874			vsecp->vsa_aclflags |= ACL_AUTO_INHERIT;
1875	}
1876
1877	mutex_exit(&zp->z_acl_lock);
1878
1879	return (0);
1880}
1881
1882int
1883zfs_vsec_2_aclp(zfsvfs_t *zfsvfs, vtype_t obj_type,
1884    vsecattr_t *vsecp, cred_t *cr, zfs_fuid_info_t **fuidp, zfs_acl_t **zaclp)
1885{
1886	zfs_acl_t *aclp;
1887	zfs_acl_node_t *aclnode;
1888	int aclcnt = vsecp->vsa_aclcnt;
1889	int error;
1890
1891	if (vsecp->vsa_aclcnt > MAX_ACL_ENTRIES || vsecp->vsa_aclcnt <= 0)
1892		return (SET_ERROR(EINVAL));
1893
1894	aclp = zfs_acl_alloc(zfs_acl_version(zfsvfs->z_version));
1895
1896	aclp->z_hints = 0;
1897	aclnode = zfs_acl_node_alloc(aclcnt * sizeof (zfs_object_ace_t));
1898	if (aclp->z_version == ZFS_ACL_VERSION_INITIAL) {
1899		if ((error = zfs_copy_ace_2_oldace(obj_type, aclp,
1900		    (ace_t *)vsecp->vsa_aclentp, aclnode->z_acldata,
1901		    aclcnt, &aclnode->z_size)) != 0) {
1902			zfs_acl_free(aclp);
1903			zfs_acl_node_free(aclnode);
1904			return (error);
1905		}
1906	} else {
1907		if ((error = zfs_copy_ace_2_fuid(zfsvfs, obj_type, aclp,
1908		    vsecp->vsa_aclentp, aclnode->z_acldata, aclcnt,
1909		    &aclnode->z_size, fuidp, cr)) != 0) {
1910			zfs_acl_free(aclp);
1911			zfs_acl_node_free(aclnode);
1912			return (error);
1913		}
1914	}
1915	aclp->z_acl_bytes = aclnode->z_size;
1916	aclnode->z_ace_count = aclcnt;
1917	aclp->z_acl_count = aclcnt;
1918	list_insert_head(&aclp->z_acl, aclnode);
1919
1920	/*
1921	 * If flags are being set then add them to z_hints
1922	 */
1923	if (vsecp->vsa_mask & VSA_ACE_ACLFLAGS) {
1924		if (vsecp->vsa_aclflags & ACL_PROTECTED)
1925			aclp->z_hints |= ZFS_ACL_PROTECTED;
1926		if (vsecp->vsa_aclflags & ACL_DEFAULTED)
1927			aclp->z_hints |= ZFS_ACL_DEFAULTED;
1928		if (vsecp->vsa_aclflags & ACL_AUTO_INHERIT)
1929			aclp->z_hints |= ZFS_ACL_AUTO_INHERIT;
1930	}
1931
1932	*zaclp = aclp;
1933
1934	return (0);
1935}
1936
1937/*
1938 * Set a file's ACL
1939 */
1940int
1941zfs_setacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
1942{
1943	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
1944	zilog_t		*zilog = zfsvfs->z_log;
1945	ulong_t		mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT);
1946	dmu_tx_t	*tx;
1947	int		error;
1948	zfs_acl_t	*aclp;
1949	zfs_fuid_info_t	*fuidp = NULL;
1950	boolean_t	fuid_dirtied;
1951	uint64_t	acl_obj;
1952
1953	if (mask == 0)
1954		return (SET_ERROR(ENOSYS));
1955
1956	if (zp->z_pflags & ZFS_IMMUTABLE)
1957		return (SET_ERROR(EPERM));
1958
1959	if (error = zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr))
1960		return (error);
1961
1962	error = zfs_vsec_2_aclp(zfsvfs, ZTOV(zp)->v_type, vsecp, cr, &fuidp,
1963	    &aclp);
1964	if (error)
1965		return (error);
1966
1967	/*
1968	 * If ACL wide flags aren't being set then preserve any
1969	 * existing flags.
1970	 */
1971	if (!(vsecp->vsa_mask & VSA_ACE_ACLFLAGS)) {
1972		aclp->z_hints |=
1973		    (zp->z_pflags & V4_ACL_WIDE_FLAGS);
1974	}
1975top:
1976	mutex_enter(&zp->z_acl_lock);
1977	mutex_enter(&zp->z_lock);
1978
1979	tx = dmu_tx_create(zfsvfs->z_os);
1980
1981	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
1982
1983	fuid_dirtied = zfsvfs->z_fuid_dirty;
1984	if (fuid_dirtied)
1985		zfs_fuid_txhold(zfsvfs, tx);
1986
1987	/*
1988	 * If old version and ACL won't fit in bonus and we aren't
1989	 * upgrading then take out necessary DMU holds
1990	 */
1991
1992	if ((acl_obj = zfs_external_acl(zp)) != 0) {
1993		if (zfsvfs->z_version >= ZPL_VERSION_FUID &&
1994		    zfs_znode_acl_version(zp) <= ZFS_ACL_VERSION_INITIAL) {
1995			dmu_tx_hold_free(tx, acl_obj, 0,
1996			    DMU_OBJECT_END);
1997			dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
1998			    aclp->z_acl_bytes);
1999		} else {
2000			dmu_tx_hold_write(tx, acl_obj, 0, aclp->z_acl_bytes);
2001		}
2002	} else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) {
2003		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, aclp->z_acl_bytes);
2004	}
2005
2006	zfs_sa_upgrade_txholds(tx, zp);
2007	error = dmu_tx_assign(tx, TXG_NOWAIT);
2008	if (error) {
2009		mutex_exit(&zp->z_acl_lock);
2010		mutex_exit(&zp->z_lock);
2011
2012		if (error == ERESTART) {
2013			dmu_tx_wait(tx);
2014			dmu_tx_abort(tx);
2015			goto top;
2016		}
2017		dmu_tx_abort(tx);
2018		zfs_acl_free(aclp);
2019		return (error);
2020	}
2021
2022	error = zfs_aclset_common(zp, aclp, cr, tx);
2023	ASSERT(error == 0);
2024	ASSERT(zp->z_acl_cached == NULL);
2025	zp->z_acl_cached = aclp;
2026
2027	if (fuid_dirtied)
2028		zfs_fuid_sync(zfsvfs, tx);
2029
2030	zfs_log_acl(zilog, tx, zp, vsecp, fuidp);
2031
2032	if (fuidp)
2033		zfs_fuid_info_free(fuidp);
2034	dmu_tx_commit(tx);
2035done:
2036	mutex_exit(&zp->z_lock);
2037	mutex_exit(&zp->z_acl_lock);
2038
2039	return (error);
2040}
2041
2042/*
2043 * Check accesses of interest (AoI) against attributes of the dataset
2044 * such as read-only.  Returns zero if no AoI conflict with dataset
2045 * attributes, otherwise an appropriate errno is returned.
2046 */
2047static int
2048zfs_zaccess_dataset_check(znode_t *zp, uint32_t v4_mode)
2049{
2050	if ((v4_mode & WRITE_MASK) &&
2051	    (zp->z_zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) &&
2052	    (!IS_DEVVP(ZTOV(zp)) ||
2053	    (IS_DEVVP(ZTOV(zp)) && (v4_mode & WRITE_MASK_ATTRS)))) {
2054		return (SET_ERROR(EROFS));
2055	}
2056
2057	/*
2058	 * Intentionally allow ZFS_READONLY through here.
2059	 * See zfs_zaccess_common().
2060	 */
2061	if ((v4_mode & WRITE_MASK_DATA) &&
2062	    (zp->z_pflags & ZFS_IMMUTABLE)) {
2063		return (SET_ERROR(EPERM));
2064	}
2065
2066	if ((v4_mode & (ACE_DELETE | ACE_DELETE_CHILD)) &&
2067	    (zp->z_pflags & ZFS_NOUNLINK)) {
2068		return (SET_ERROR(EPERM));
2069	}
2070
2071	if (((v4_mode & (ACE_READ_DATA|ACE_EXECUTE)) &&
2072	    (zp->z_pflags & ZFS_AV_QUARANTINED))) {
2073		return (SET_ERROR(EACCES));
2074	}
2075
2076	return (0);
2077}
2078
2079/*
2080 * The primary usage of this function is to loop through all of the
2081 * ACEs in the znode, determining what accesses of interest (AoI) to
2082 * the caller are allowed or denied.  The AoI are expressed as bits in
2083 * the working_mode parameter.  As each ACE is processed, bits covered
2084 * by that ACE are removed from the working_mode.  This removal
2085 * facilitates two things.  The first is that when the working mode is
2086 * empty (= 0), we know we've looked at all the AoI. The second is
2087 * that the ACE interpretation rules don't allow a later ACE to undo
2088 * something granted or denied by an earlier ACE.  Removing the
2089 * discovered access or denial enforces this rule.  At the end of
2090 * processing the ACEs, all AoI that were found to be denied are
2091 * placed into the working_mode, giving the caller a mask of denied
2092 * accesses.  Returns:
2093 *	0		if all AoI granted
2094 *	EACCES		if the denied mask is non-zero
2095 *	other error	if abnormal failure (e.g., IO error)
2096 *
2097 * A secondary usage of the function is to determine if any of the
2098 * AoI are granted.  If an ACE grants any access in
2099 * the working_mode, we immediately short circuit out of the function.
2100 * This mode is chosen by setting anyaccess to B_TRUE.  The
2101 * working_mode is not a denied access mask upon exit if the function
2102 * is used in this manner.
2103 */
2104static int
2105zfs_zaccess_aces_check(znode_t *zp, uint32_t *working_mode,
2106    boolean_t anyaccess, cred_t *cr)
2107{
2108	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
2109	zfs_acl_t	*aclp;
2110	int		error;
2111	uint64_t	who;		/* FUID from the ACE */
2112	uint16_t	type, iflags;
2113	uint16_t	entry_type;
2114	uint32_t	access_mask;
2115	uint32_t	deny_mask = 0;
2116	zfs_ace_hdr_t	*acep = NULL;
2117	boolean_t	checkit;	/* ACE ID matches */
2118
2119	mutex_enter(&zp->z_acl_lock);
2120
2121	error = zfs_acl_node_read(zp, B_FALSE, &aclp, B_FALSE);
2122	if (error != 0) {
2123		mutex_exit(&zp->z_acl_lock);
2124		return (error);
2125	}
2126
2127	ASSERT(zp->z_acl_cached);
2128
2129	while (acep = zfs_acl_next_ace(aclp, acep, &who, &access_mask,
2130	    &iflags, &type)) {
2131		uint32_t mask_matched;
2132
2133		if (!zfs_acl_valid_ace_type(type, iflags))
2134			continue;
2135
2136		if (ZTOV(zp)->v_type == VDIR && (iflags & ACE_INHERIT_ONLY_ACE))
2137			continue;
2138
2139		/* Skip ACE if it does not affect any AoI */
2140		mask_matched = (access_mask & *working_mode);
2141		if (!mask_matched)
2142			continue;
2143
2144		entry_type = (iflags & ACE_TYPE_FLAGS);
2145
2146		checkit = B_FALSE;
2147
2148		switch (entry_type) {
2149		case ACE_OWNER:
2150			who = zp->z_uid;
2151			/*FALLTHROUGH*/
2152		case 0:	/* USER Entry */
2153			checkit = zfs_user_in_cred(zfsvfs, who, cr);
2154			break;
2155		case OWNING_GROUP:
2156			who = zp->z_gid;
2157			/*FALLTHROUGH*/
2158		case ACE_IDENTIFIER_GROUP:
2159			checkit = zfs_groupmember(zfsvfs, who, cr);
2160			break;
2161		case ACE_EVERYONE:
2162			checkit = B_TRUE;
2163			break;
2164
2165		default:
2166			/*
2167			 * The zfs_acl_valid_ace_type check above
2168			 * should make this case impossible.
2169			 */
2170			mutex_exit(&zp->z_acl_lock);
2171			return (SET_ERROR(EIO));
2172		}
2173
2174		if (checkit) {
2175			if (type == DENY) {
2176				DTRACE_PROBE3(zfs__ace__denies,
2177				    znode_t *, zp,
2178				    zfs_ace_hdr_t *, acep,
2179				    uint32_t, mask_matched);
2180				deny_mask |= mask_matched;
2181			} else {
2182				DTRACE_PROBE3(zfs__ace__allows,
2183				    znode_t *, zp,
2184				    zfs_ace_hdr_t *, acep,
2185				    uint32_t, mask_matched);
2186				if (anyaccess) {
2187					mutex_exit(&zp->z_acl_lock);
2188					return (0);
2189				}
2190			}
2191			*working_mode &= ~mask_matched;
2192		}
2193
2194		/* Are we done? */
2195		if (*working_mode == 0)
2196			break;
2197	}
2198
2199	mutex_exit(&zp->z_acl_lock);
2200
2201	/* Put the found 'denies' back on the working mode */
2202	if (deny_mask) {
2203		*working_mode |= deny_mask;
2204		return (SET_ERROR(EACCES));
2205	} else if (*working_mode) {
2206		return (-1);
2207	}
2208
2209	return (0);
2210}
2211
2212/*
2213 * Return true if any access whatsoever granted, we don't actually
2214 * care what access is granted.
2215 */
2216boolean_t
2217zfs_has_access(znode_t *zp, cred_t *cr)
2218{
2219	uint32_t have = ACE_ALL_PERMS;
2220
2221	if (zfs_zaccess_aces_check(zp, &have, B_TRUE, cr) != 0) {
2222		uid_t owner;
2223
2224		owner = zfs_fuid_map_id(zp->z_zfsvfs, zp->z_uid, cr, ZFS_OWNER);
2225		return (secpolicy_vnode_any_access(cr, ZTOV(zp), owner) == 0);
2226	}
2227	return (B_TRUE);
2228}
2229
2230static int
2231zfs_zaccess_common(znode_t *zp, uint32_t v4_mode, uint32_t *working_mode,
2232    boolean_t *check_privs, boolean_t skipaclchk, cred_t *cr)
2233{
2234	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
2235	int err;
2236
2237	*working_mode = v4_mode;
2238	*check_privs = B_TRUE;
2239
2240	/*
2241	 * Short circuit empty requests
2242	 */
2243	if (v4_mode == 0 || zfsvfs->z_replay) {
2244		*working_mode = 0;
2245		return (0);
2246	}
2247
2248	if ((err = zfs_zaccess_dataset_check(zp, v4_mode)) != 0) {
2249		*check_privs = B_FALSE;
2250		return (err);
2251	}
2252
2253	/*
2254	 * The caller requested that the ACL check be skipped.  This
2255	 * would only happen if the caller checked VOP_ACCESS() with a
2256	 * 32 bit ACE mask and already had the appropriate permissions.
2257	 */
2258	if (skipaclchk) {
2259		*working_mode = 0;
2260		return (0);
2261	}
2262
2263	/*
2264	 * Note: ZFS_READONLY represents the "DOS R/O" attribute.
2265	 * When that flag is set, we should behave as if write access
2266	 * were not granted by anything in the ACL.  In particular:
2267	 * We _must_ allow writes after opening the file r/w, then
2268	 * setting the DOS R/O attribute, and writing some more.
2269	 * (Similar to how you can write after fchmod(fd, 0444).)
2270	 *
2271	 * Therefore ZFS_READONLY is ignored in the dataset check
2272	 * above, and checked here as if part of the ACL check.
2273	 * Also note: DOS R/O is ignored for directories.
2274	 */
2275	if ((v4_mode & WRITE_MASK_DATA) &&
2276	    (ZTOV(zp)->v_type != VDIR) &&
2277	    (zp->z_pflags & ZFS_READONLY)) {
2278		return (SET_ERROR(EPERM));
2279	}
2280
2281	return (zfs_zaccess_aces_check(zp, working_mode, B_FALSE, cr));
2282}
2283
2284static int
2285zfs_zaccess_append(znode_t *zp, uint32_t *working_mode, boolean_t *check_privs,
2286    cred_t *cr)
2287{
2288	if (*working_mode != ACE_WRITE_DATA)
2289		return (SET_ERROR(EACCES));
2290
2291	return (zfs_zaccess_common(zp, ACE_APPEND_DATA, working_mode,
2292	    check_privs, B_FALSE, cr));
2293}
2294
2295int
2296zfs_fastaccesschk_execute(znode_t *zdp, cred_t *cr)
2297{
2298	boolean_t owner = B_FALSE;
2299	boolean_t groupmbr = B_FALSE;
2300	boolean_t is_attr;
2301	uid_t uid = crgetuid(cr);
2302	int error;
2303
2304	if (zdp->z_pflags & ZFS_AV_QUARANTINED)
2305		return (SET_ERROR(EACCES));
2306
2307	is_attr = ((zdp->z_pflags & ZFS_XATTR) &&
2308	    (ZTOV(zdp)->v_type == VDIR));
2309	if (is_attr)
2310		goto slow;
2311
2312
2313	mutex_enter(&zdp->z_acl_lock);
2314
2315	if (zdp->z_pflags & ZFS_NO_EXECS_DENIED) {
2316		mutex_exit(&zdp->z_acl_lock);
2317		return (0);
2318	}
2319
2320	if (FUID_INDEX(zdp->z_uid) != 0 || FUID_INDEX(zdp->z_gid) != 0) {
2321		mutex_exit(&zdp->z_acl_lock);
2322		goto slow;
2323	}
2324
2325	if (uid == zdp->z_uid) {
2326		owner = B_TRUE;
2327		if (zdp->z_mode & S_IXUSR) {
2328			mutex_exit(&zdp->z_acl_lock);
2329			return (0);
2330		} else {
2331			mutex_exit(&zdp->z_acl_lock);
2332			goto slow;
2333		}
2334	}
2335	if (groupmember(zdp->z_gid, cr)) {
2336		groupmbr = B_TRUE;
2337		if (zdp->z_mode & S_IXGRP) {
2338			mutex_exit(&zdp->z_acl_lock);
2339			return (0);
2340		} else {
2341			mutex_exit(&zdp->z_acl_lock);
2342			goto slow;
2343		}
2344	}
2345	if (!owner && !groupmbr) {
2346		if (zdp->z_mode & S_IXOTH) {
2347			mutex_exit(&zdp->z_acl_lock);
2348			return (0);
2349		}
2350	}
2351
2352	mutex_exit(&zdp->z_acl_lock);
2353
2354slow:
2355	DTRACE_PROBE(zfs__fastpath__execute__access__miss);
2356	ZFS_ENTER(zdp->z_zfsvfs);
2357	error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr);
2358	ZFS_EXIT(zdp->z_zfsvfs);
2359	return (error);
2360}
2361
2362/*
2363 * Determine whether Access should be granted/denied.
2364 *
2365 * The least priv subsystem is always consulted as a basic privilege
2366 * can define any form of access.
2367 */
2368int
2369zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr)
2370{
2371	uint32_t	working_mode;
2372	int		error;
2373	int		is_attr;
2374	boolean_t	check_privs;
2375	znode_t		*xzp;
2376	znode_t		*check_zp = zp;
2377	mode_t		needed_bits;
2378	uid_t		owner;
2379
2380	is_attr = ((zp->z_pflags & ZFS_XATTR) && (ZTOV(zp)->v_type == VDIR));
2381
2382	/*
2383	 * If attribute then validate against base file
2384	 */
2385	if (is_attr) {
2386		uint64_t	parent;
2387
2388		if ((error = sa_lookup(zp->z_sa_hdl,
2389		    SA_ZPL_PARENT(zp->z_zfsvfs), &parent,
2390		    sizeof (parent))) != 0)
2391			return (error);
2392
2393		if ((error = zfs_zget(zp->z_zfsvfs,
2394		    parent, &xzp)) != 0)	{
2395			return (error);
2396		}
2397
2398		check_zp = xzp;
2399
2400		/*
2401		 * fixup mode to map to xattr perms
2402		 */
2403
2404		if (mode & (ACE_WRITE_DATA|ACE_APPEND_DATA)) {
2405			mode &= ~(ACE_WRITE_DATA|ACE_APPEND_DATA);
2406			mode |= ACE_WRITE_NAMED_ATTRS;
2407		}
2408
2409		if (mode & (ACE_READ_DATA|ACE_EXECUTE)) {
2410			mode &= ~(ACE_READ_DATA|ACE_EXECUTE);
2411			mode |= ACE_READ_NAMED_ATTRS;
2412		}
2413	}
2414
2415	owner = zfs_fuid_map_id(zp->z_zfsvfs, zp->z_uid, cr, ZFS_OWNER);
2416	/*
2417	 * Map the bits required to the standard vnode flags VREAD|VWRITE|VEXEC
2418	 * in needed_bits.  Map the bits mapped by working_mode (currently
2419	 * missing) in missing_bits.
2420	 * Call secpolicy_vnode_access2() with (needed_bits & ~checkmode),
2421	 * needed_bits.
2422	 */
2423	needed_bits = 0;
2424
2425	working_mode = mode;
2426	if ((working_mode & (ACE_READ_ACL|ACE_READ_ATTRIBUTES)) &&
2427	    owner == crgetuid(cr))
2428		working_mode &= ~(ACE_READ_ACL|ACE_READ_ATTRIBUTES);
2429
2430	if (working_mode & (ACE_READ_DATA|ACE_READ_NAMED_ATTRS|
2431	    ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_SYNCHRONIZE))
2432		needed_bits |= VREAD;
2433	if (working_mode & (ACE_WRITE_DATA|ACE_WRITE_NAMED_ATTRS|
2434	    ACE_APPEND_DATA|ACE_WRITE_ATTRIBUTES|ACE_SYNCHRONIZE))
2435		needed_bits |= VWRITE;
2436	if (working_mode & ACE_EXECUTE)
2437		needed_bits |= VEXEC;
2438
2439	if ((error = zfs_zaccess_common(check_zp, mode, &working_mode,
2440	    &check_privs, skipaclchk, cr)) == 0) {
2441		if (is_attr)
2442			VN_RELE(ZTOV(xzp));
2443		return (secpolicy_vnode_access2(cr, ZTOV(zp), owner,
2444		    needed_bits, needed_bits));
2445	}
2446
2447	if (error && !check_privs) {
2448		if (is_attr)
2449			VN_RELE(ZTOV(xzp));
2450		return (error);
2451	}
2452
2453	if (error && (flags & V_APPEND)) {
2454		error = zfs_zaccess_append(zp, &working_mode, &check_privs, cr);
2455	}
2456
2457	if (error && check_privs) {
2458		mode_t		checkmode = 0;
2459
2460		/*
2461		 * First check for implicit owner permission on
2462		 * read_acl/read_attributes
2463		 */
2464
2465		error = 0;
2466		ASSERT(working_mode != 0);
2467
2468		if ((working_mode & (ACE_READ_ACL|ACE_READ_ATTRIBUTES) &&
2469		    owner == crgetuid(cr)))
2470			working_mode &= ~(ACE_READ_ACL|ACE_READ_ATTRIBUTES);
2471
2472		if (working_mode & (ACE_READ_DATA|ACE_READ_NAMED_ATTRS|
2473		    ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_SYNCHRONIZE))
2474			checkmode |= VREAD;
2475		if (working_mode & (ACE_WRITE_DATA|ACE_WRITE_NAMED_ATTRS|
2476		    ACE_APPEND_DATA|ACE_WRITE_ATTRIBUTES|ACE_SYNCHRONIZE))
2477			checkmode |= VWRITE;
2478		if (working_mode & ACE_EXECUTE)
2479			checkmode |= VEXEC;
2480
2481		error = secpolicy_vnode_access2(cr, ZTOV(check_zp), owner,
2482		    needed_bits & ~checkmode, needed_bits);
2483
2484		if (error == 0 && (working_mode & ACE_WRITE_OWNER))
2485			error = secpolicy_vnode_chown(cr, owner);
2486		if (error == 0 && (working_mode & ACE_WRITE_ACL))
2487			error = secpolicy_vnode_setdac(cr, owner);
2488
2489		if (error == 0 && (working_mode &
2490		    (ACE_DELETE|ACE_DELETE_CHILD)))
2491			error = secpolicy_vnode_remove(cr);
2492
2493		if (error == 0 && (working_mode & ACE_SYNCHRONIZE)) {
2494			error = secpolicy_vnode_chown(cr, owner);
2495		}
2496		if (error == 0) {
2497			/*
2498			 * See if any bits other than those already checked
2499			 * for are still present.  If so then return EACCES
2500			 */
2501			if (working_mode & ~(ZFS_CHECKED_MASKS)) {
2502				error = SET_ERROR(EACCES);
2503			}
2504		}
2505	} else if (error == 0) {
2506		error = secpolicy_vnode_access2(cr, ZTOV(zp), owner,
2507		    needed_bits, needed_bits);
2508	}
2509
2510
2511	if (is_attr)
2512		VN_RELE(ZTOV(xzp));
2513
2514	return (error);
2515}
2516
2517/*
2518 * Translate traditional unix VREAD/VWRITE/VEXEC mode into
2519 * native ACL format and call zfs_zaccess()
2520 */
2521int
2522zfs_zaccess_rwx(znode_t *zp, mode_t mode, int flags, cred_t *cr)
2523{
2524	return (zfs_zaccess(zp, zfs_unix_to_v4(mode >> 6), flags, B_FALSE, cr));
2525}
2526
2527/*
2528 * Access function for secpolicy_vnode_setattr
2529 */
2530int
2531zfs_zaccess_unix(znode_t *zp, mode_t mode, cred_t *cr)
2532{
2533	int v4_mode = zfs_unix_to_v4(mode >> 6);
2534
2535	return (zfs_zaccess(zp, v4_mode, 0, B_FALSE, cr));
2536}
2537
2538/* See zfs_zaccess_delete() */
2539int zfs_write_implies_delete_child = 1;
2540
2541/*
2542 * Determine whether delete access should be granted.
2543 *
2544 * The following chart outlines how we handle delete permissions which is
2545 * how recent versions of windows (Windows 2008) handles it.  The efficiency
2546 * comes from not having to check the parent ACL where the object itself grants
2547 * delete:
2548 *
2549 *      -------------------------------------------------------
2550 *      |   Parent Dir  |      Target Object Permissions      |
2551 *      |  permissions  |                                     |
2552 *      -------------------------------------------------------
2553 *      |               | ACL Allows | ACL Denies| Delete     |
2554 *      |               |  Delete    |  Delete   | unspecified|
2555 *      -------------------------------------------------------
2556 *      | ACL Allows    | Permit     | Deny *    | Permit     |
2557 *      | DELETE_CHILD  |            |           |            |
2558 *      -------------------------------------------------------
2559 *      | ACL Denies    | Permit     | Deny      | Deny       |
2560 *      | DELETE_CHILD  |            |           |            |
2561 *      -------------------------------------------------------
2562 *      | ACL specifies |            |           |            |
2563 *      | only allow    | Permit     | Deny *    | Permit     |
2564 *      | write and     |            |           |            |
2565 *      | execute       |            |           |            |
2566 *      -------------------------------------------------------
2567 *      | ACL denies    |            |           |            |
2568 *      | write and     | Permit     | Deny      | Deny       |
2569 *      | execute       |            |           |            |
2570 *      -------------------------------------------------------
2571 *         ^
2572 *         |
2573 *         Re. execute permission on the directory:  if that's missing,
2574 *	   the vnode lookup of the target will fail before we get here.
2575 *
2576 * Re [*] in the table above:  NFSv4 would normally Permit delete for
2577 * these two cells of the matrix.
2578 * See acl.h for notes on which ACE_... flags should be checked for which
2579 * operations.  Specifically, the NFSv4 committee recommendation is in
2580 * conflict with the Windows interpretation of DENY ACEs, where DENY ACEs
2581 * should take precedence ahead of ALLOW ACEs.
2582 *
2583 * This implementation always consults the target object's ACL first.
2584 * If a DENY ACE is present on the target object that specifies ACE_DELETE,
2585 * delete access is denied.  If an ALLOW ACE with ACE_DELETE is present on
2586 * the target object, access is allowed.  If and only if no entries with
2587 * ACE_DELETE are present in the object's ACL, check the container's ACL
2588 * for entries with ACE_DELETE_CHILD.
2589 *
2590 * A summary of the logic implemented from the table above is as follows:
2591 *
2592 * First check for DENY ACEs that apply.
2593 * If either target or container has a deny, EACCES.
2594 *
2595 * Delete access can then be summarized as follows:
2596 * 1: The object to be deleted grants ACE_DELETE, or
2597 * 2: The containing directory grants ACE_DELETE_CHILD.
2598 * In a Windows system, that would be the end of the story.
2599 * In this system, (2) has some complications...
2600 * 2a: "sticky" bit on a directory adds restrictions, and
2601 * 2b: existing ACEs from previous versions of ZFS may
2602 * not carry ACE_DELETE_CHILD where they should, so we
2603 * also allow delete when ACE_WRITE_DATA is granted.
2604 *
2605 * Note: 2b is technically a work-around for a prior bug,
2606 * which hopefully can go away some day.  For those who
2607 * no longer need the work around, and for testing, this
2608 * work-around is made conditional via the tunable:
2609 * zfs_write_implies_delete_child
2610 */
2611int
2612zfs_zaccess_delete(znode_t *dzp, znode_t *zp, cred_t *cr)
2613{
2614	uint32_t wanted_dirperms;
2615	uint32_t dzp_working_mode = 0;
2616	uint32_t zp_working_mode = 0;
2617	int dzp_error, zp_error;
2618	boolean_t dzpcheck_privs;
2619	boolean_t zpcheck_privs;
2620
2621	if (zp->z_pflags & (ZFS_IMMUTABLE | ZFS_NOUNLINK))
2622		return (SET_ERROR(EPERM));
2623
2624	/*
2625	 * Case 1:
2626	 * If target object grants ACE_DELETE then we are done.  This is
2627	 * indicated by a return value of 0.  For this case we don't worry
2628	 * about the sticky bit because sticky only applies to the parent
2629	 * directory and this is the child access result.
2630	 *
2631	 * If we encounter a DENY ACE here, we're also done (EACCES).
2632	 * Note that if we hit a DENY ACE here (on the target) it should
2633	 * take precedence over a DENY ACE on the container, so that when
2634	 * we have more complete auditing support we will be able to
2635	 * report an access failure against the specific target.
2636	 * (This is part of why we're checking the target first.)
2637	 */
2638	zp_error = zfs_zaccess_common(zp, ACE_DELETE, &zp_working_mode,
2639	    &zpcheck_privs, B_FALSE, cr);
2640	if (zp_error == EACCES) {
2641		/* We hit a DENY ACE. */
2642		if (!zpcheck_privs)
2643			return (SET_ERROR(zp_error));
2644		return (secpolicy_vnode_remove(cr));
2645
2646	}
2647	if (zp_error == 0)
2648		return (0);
2649
2650	/*
2651	 * Case 2:
2652	 * If the containing directory grants ACE_DELETE_CHILD,
2653	 * or we're in backward compatibility mode and the
2654	 * containing directory has ACE_WRITE_DATA, allow.
2655	 * Case 2b is handled with wanted_dirperms.
2656	 */
2657	wanted_dirperms = ACE_DELETE_CHILD;
2658	if (zfs_write_implies_delete_child)
2659		wanted_dirperms |= ACE_WRITE_DATA;
2660	dzp_error = zfs_zaccess_common(dzp, wanted_dirperms,
2661	    &dzp_working_mode, &dzpcheck_privs, B_FALSE, cr);
2662	if (dzp_error == EACCES) {
2663		/* We hit a DENY ACE. */
2664		if (!dzpcheck_privs)
2665			return (SET_ERROR(dzp_error));
2666		return (secpolicy_vnode_remove(cr));
2667	}
2668
2669	/*
2670	 * Cases 2a, 2b (continued)
2671	 *
2672	 * Note: dzp_working_mode now contains any permissions
2673	 * that were NOT granted.  Therefore, if any of the
2674	 * wanted_dirperms WERE granted, we will have:
2675	 *   dzp_working_mode != wanted_dirperms
2676	 * We're really asking if ANY of those permissions
2677	 * were granted, and if so, grant delete access.
2678	 */
2679	if (dzp_working_mode != wanted_dirperms)
2680		dzp_error = 0;
2681
2682	/*
2683	 * dzp_error is 0 if the container granted us permissions to "modify".
2684	 * If we do not have permission via one or more ACEs, our current
2685	 * privileges may still permit us to modify the container.
2686	 *
2687	 * dzpcheck_privs is false when i.e. the FS is read-only.
2688	 * Otherwise, do privilege checks for the container.
2689	 */
2690	if (dzp_error != 0 && dzpcheck_privs) {
2691		uid_t owner;
2692
2693		/*
2694		 * The secpolicy call needs the requested access and
2695		 * the current access mode of the container, but it
2696		 * only knows about Unix-style modes (VEXEC, VWRITE),
2697		 * so this must condense the fine-grained ACE bits into
2698		 * Unix modes.
2699		 *
2700		 * The VEXEC flag is easy, because we know that has
2701		 * always been checked before we get here (during the
2702		 * lookup of the target vnode).  The container has not
2703		 * granted us permissions to "modify", so we do not set
2704		 * the VWRITE flag in the current access mode.
2705		 */
2706		owner = zfs_fuid_map_id(dzp->z_zfsvfs, dzp->z_uid, cr,
2707		    ZFS_OWNER);
2708		dzp_error = secpolicy_vnode_access2(cr, ZTOV(dzp),
2709		    owner, VEXEC, VWRITE|VEXEC);
2710	}
2711	if (dzp_error != 0) {
2712		/*
2713		 * Note: We may have dzp_error = -1 here (from
2714		 * zfs_zacess_common).  Don't return that.
2715		 */
2716		return (SET_ERROR(EACCES));
2717	}
2718
2719	/*
2720	 * At this point, we know that the directory permissions allow
2721	 * us to modify, but we still need to check for the additional
2722	 * restrictions that apply when the "sticky bit" is set.
2723	 *
2724	 * Yes, zfs_sticky_remove_access() also checks this bit, but
2725	 * checking it here and skipping the call below is nice when
2726	 * you're watching all of this with dtrace.
2727	 */
2728	if ((dzp->z_mode & S_ISVTX) == 0)
2729		return (0);
2730
2731	/*
2732	 * zfs_sticky_remove_access will succeed if:
2733	 * 1. The sticky bit is absent.
2734	 * 2. We pass the sticky bit restrictions.
2735	 * 3. We have privileges that always allow file removal.
2736	 */
2737	return (zfs_sticky_remove_access(dzp, zp, cr));
2738}
2739
2740int
2741zfs_zaccess_rename(znode_t *sdzp, znode_t *szp, znode_t *tdzp,
2742    znode_t *tzp, cred_t *cr)
2743{
2744	int add_perm;
2745	int error;
2746
2747	if (szp->z_pflags & ZFS_AV_QUARANTINED)
2748		return (SET_ERROR(EACCES));
2749
2750	add_perm = (ZTOV(szp)->v_type == VDIR) ?
2751	    ACE_ADD_SUBDIRECTORY : ACE_ADD_FILE;
2752
2753	/*
2754	 * Rename permissions are combination of delete permission +
2755	 * add file/subdir permission.
2756	 */
2757
2758	/*
2759	 * first make sure we do the delete portion.
2760	 *
2761	 * If that succeeds then check for add_file/add_subdir permissions
2762	 */
2763
2764	if (error = zfs_zaccess_delete(sdzp, szp, cr))
2765		return (error);
2766
2767	/*
2768	 * If we have a tzp, see if we can delete it?
2769	 */
2770	if (tzp) {
2771		if (error = zfs_zaccess_delete(tdzp, tzp, cr))
2772			return (error);
2773	}
2774
2775	/*
2776	 * Now check for add permissions
2777	 */
2778	error = zfs_zaccess(tdzp, add_perm, 0, B_FALSE, cr);
2779
2780	return (error);
2781}
2782