1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
24 */
25
26/*
27 * Big Theory Statement for Extended Attribute (XATTR) directories
28 *
29 * The Solaris VFS layer presents extended file attributes using a special
30 * "XATTR" directory under files or directories that have extended file
31 * attributes.  See fsattr(5) for background.
32 *
33 * This design avoids the need for a separate set of VFS or vnode functions
34 * for operating on XATTR objects.  File system implementations that support
35 * XATTR instantiate a special XATTR directory using this module.
36 * Applications get to the XATTR directory by passing the LOOKUP_XATTR flag
37 * to fop_lookup.  Once the XATTR directory is obtained, all other file
38 * system operations on extended attributes happen via the normal vnode
39 * functions, applied to the XATTR directory or its contents.
40 *
41 * The XATTR directories returned by fop_lookup (with LOOKUP_XATTR) are
42 * implemented differntly, depending on whether the file system supports
43 * "extended attributes" (XATTR), "system attributes" (SYSATTR), or both.
44 *
45 * When SYSATTR=true, XATTR=true:
46 *	The XATTR directory is a "generic file system" (GFS) object
47 *	that adds the special system attribute names (SUNWattr*) to
48 *	the list of XATTR files presented by the underling FS.
49 *	In this case, many operations are "passed through" to the
50 *	lower-level FS.
51 *
52 * When SYSATTR=true, XATTR=false:
53 *	The XATTR directory is a "generic file system" (GFS) object,
54 *	presenting only the system attribute names (SUNWattr*)
55 *	In this case there's no lower-level FS, only the GFS object.
56 *
57 * When SYSATTR=false, XATTR=true:
58 *	The XATTR directory is implemented by the file system code,
59 *	and this module is not involved after xattr_dir_lookup()
60 *	returns the XATTR dir from the underlying file system.
61 *
62 * When SYSATTR=false, XATTR=false:
63 *	xattr_dir_lookup just returns EINVAL
64 *
65 * In the first two cases (where we have system attributes) this module
66 * implements what can be thought of as a "translucent" directory containing
67 * both the system attribute names (SUNWattr*) and whatever XATTR names may
68 * exist in the XATTR directory of the underlying file system, if any.
69 *
70 * This affects operations on the (GFS) XATTR directory as follows:
71 *
72 * readdir:	Merges the SUNWattr* names with any contents from the
73 *		underlying XATTR directory.
74 *
75 * rename:	If "to" or "from" is a SUNWattr name, special handling,
76 *		else pass through to the lower FS.
77 *
78 * link:	If "from" is a SUNWattr name, disallow.
79 *
80 * create:	If a SUNWattr name, disallow, else pass to lower FS.
81 * remove:	(same)
82 *
83 * open,close:	Just pass through to the XATTR dir in the lower FS.
84 *
85 * lookup:	Lookup an XATTR file in either the (GFS) XATTR directory
86 *		or the "real" XATTR directory of the underlying FS.
87 *		Note for file systems the support SYSATTR but not XATTR,
88 *		only the GFS XATTR directory will exist.  When both exist,
89 *		gfs_vop_lookup uses the xattr_lookup_cb callback function
90 *		which passes the lookup call through to the "real" FS.
91 *
92 * Operations on the XATTR _files_ are simpler:
93 *
94 * If the file vnode came from lookup at the GFS level, the file is one of
95 * the special SUNWattr* vnodes, and it's vnode operations (xattr_file_tops)
96 * allow only what's appropriate on these "files".
97 *
98 * If the file vnode came from the underlying FS, all operations on that
99 * object are handled through the vnode operations set by that FS.
100 */
101
102#include <sys/param.h>
103#include <sys/isa_defs.h>
104#include <sys/types.h>
105#include <sys/sysmacros.h>
106#include <sys/cred.h>
107#include <sys/systm.h>
108#include <sys/errno.h>
109#include <sys/fcntl.h>
110#include <sys/pathname.h>
111#include <sys/stat.h>
112#include <sys/vfs.h>
113#include <sys/acl.h>
114#include <sys/file.h>
115#include <sys/sunddi.h>
116#include <sys/debug.h>
117#include <sys/cmn_err.h>
118#include <sys/vnode.h>
119#include <sys/mode.h>
120#include <sys/nvpair.h>
121#include <sys/attr.h>
122#include <sys/gfs.h>
123#include <sys/mutex.h>
124#include <fs/fs_subr.h>
125#include <sys/kidmap.h>
126
127typedef struct {
128	gfs_file_t	xattr_gfs_private;
129	xattr_view_t	xattr_view;
130} xattr_file_t;
131
132typedef struct {
133	gfs_dir_t	xattr_gfs_private;
134	vnode_t		*xattr_realvp;
135} xattr_dir_t;
136
137/* ARGSUSED */
138static int
139xattr_file_open(vnode_t **vpp, int flags, cred_t *cr, caller_context_t *ct)
140{
141	xattr_file_t *np = (*vpp)->v_data;
142
143	if ((np->xattr_view == XATTR_VIEW_READONLY) && (flags & FWRITE))
144		return (EACCES);
145
146	return (0);
147}
148
149/* ARGSUSED */
150static int
151xattr_file_access(vnode_t *vp, int mode, int flags, cred_t *cr,
152    caller_context_t *ct)
153{
154	xattr_file_t *np = vp->v_data;
155
156	if ((np->xattr_view == XATTR_VIEW_READONLY) && (mode & VWRITE))
157		return (EACCES);
158
159	return (0);
160}
161
162/* ARGSUSED */
163static int
164xattr_file_close(vnode_t *vp, int flags, int count, offset_t off,
165    cred_t *cr, caller_context_t *ct)
166{
167	cleanlocks(vp, ddi_get_pid(), 0);
168	cleanshares(vp, ddi_get_pid());
169	return (0);
170}
171
172static int
173xattr_common_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
174{
175	xattr_fid_t	*xfidp;
176	vnode_t		*pvp, *savevp;
177	int		error;
178	uint16_t	orig_len;
179
180	if (fidp->fid_len < XATTR_FIDSZ) {
181		fidp->fid_len = XATTR_FIDSZ;
182		return (ENOSPC);
183	}
184
185	savevp = pvp = gfs_file_parent(vp);
186	mutex_enter(&savevp->v_lock);
187	if (pvp->v_flag & V_XATTRDIR) {
188		pvp = gfs_file_parent(pvp);
189	}
190	mutex_exit(&savevp->v_lock);
191
192	xfidp = (xattr_fid_t *)fidp;
193	orig_len = fidp->fid_len;
194	fidp->fid_len = sizeof (xfidp->parent_fid);
195
196	error = VOP_FID(pvp, fidp, ct);
197	if (error) {
198		fidp->fid_len = orig_len;
199		return (error);
200	}
201
202	xfidp->parent_len = fidp->fid_len;
203	fidp->fid_len = XATTR_FIDSZ;
204	xfidp->dir_offset = gfs_file_inode(vp);
205
206	return (0);
207}
208
209/* ARGSUSED */
210static int
211xattr_fill_nvlist(vnode_t *vp, xattr_view_t xattr_view, nvlist_t *nvlp,
212    cred_t *cr, caller_context_t *ct)
213{
214	int error;
215	f_attr_t attr;
216	uint64_t fsid;
217	xvattr_t xvattr;
218	xoptattr_t *xoap;	/* Pointer to optional attributes */
219	vnode_t *ppvp;
220	const char *domain;
221	uint32_t rid;
222
223	xva_init(&xvattr);
224
225	if ((xoap = xva_getxoptattr(&xvattr)) == NULL)
226		return (EINVAL);
227
228	/*
229	 * For detecting ephemeral uid/gid
230	 */
231	xvattr.xva_vattr.va_mask |= (AT_UID|AT_GID);
232
233	/*
234	 * We need to access the real fs object.
235	 * vp points to a GFS file; ppvp points to the real object.
236	 */
237	ppvp = gfs_file_parent(gfs_file_parent(vp));
238
239	/*
240	 * Iterate through the attrs associated with this view
241	 */
242
243	for (attr = 0; attr < F_ATTR_ALL; attr++) {
244		if (xattr_view != attr_to_xattr_view(attr)) {
245			continue;
246		}
247
248		switch (attr) {
249		case F_SYSTEM:
250			XVA_SET_REQ(&xvattr, XAT_SYSTEM);
251			break;
252		case F_READONLY:
253			XVA_SET_REQ(&xvattr, XAT_READONLY);
254			break;
255		case F_HIDDEN:
256			XVA_SET_REQ(&xvattr, XAT_HIDDEN);
257			break;
258		case F_ARCHIVE:
259			XVA_SET_REQ(&xvattr, XAT_ARCHIVE);
260			break;
261		case F_IMMUTABLE:
262			XVA_SET_REQ(&xvattr, XAT_IMMUTABLE);
263			break;
264		case F_APPENDONLY:
265			XVA_SET_REQ(&xvattr, XAT_APPENDONLY);
266			break;
267		case F_NOUNLINK:
268			XVA_SET_REQ(&xvattr, XAT_NOUNLINK);
269			break;
270		case F_OPAQUE:
271			XVA_SET_REQ(&xvattr, XAT_OPAQUE);
272			break;
273		case F_NODUMP:
274			XVA_SET_REQ(&xvattr, XAT_NODUMP);
275			break;
276		case F_AV_QUARANTINED:
277			XVA_SET_REQ(&xvattr, XAT_AV_QUARANTINED);
278			break;
279		case F_AV_MODIFIED:
280			XVA_SET_REQ(&xvattr, XAT_AV_MODIFIED);
281			break;
282		case F_AV_SCANSTAMP:
283			if (ppvp->v_type == VREG)
284				XVA_SET_REQ(&xvattr, XAT_AV_SCANSTAMP);
285			break;
286		case F_CRTIME:
287			XVA_SET_REQ(&xvattr, XAT_CREATETIME);
288			break;
289		case F_FSID:
290			fsid = (((uint64_t)vp->v_vfsp->vfs_fsid.val[0] << 32) |
291			    (uint64_t)(vp->v_vfsp->vfs_fsid.val[1] &
292			    0xffffffff));
293			VERIFY(nvlist_add_uint64(nvlp, attr_to_name(attr),
294			    fsid) == 0);
295			break;
296		case F_REPARSE:
297			XVA_SET_REQ(&xvattr, XAT_REPARSE);
298			break;
299		case F_GEN:
300			XVA_SET_REQ(&xvattr, XAT_GEN);
301			break;
302		case F_OFFLINE:
303			XVA_SET_REQ(&xvattr, XAT_OFFLINE);
304			break;
305		case F_SPARSE:
306			XVA_SET_REQ(&xvattr, XAT_SPARSE);
307			break;
308		default:
309			break;
310		}
311	}
312
313	error = VOP_GETATTR(ppvp, &xvattr.xva_vattr, 0, cr, ct);
314	if (error)
315		return (error);
316
317	/*
318	 * Process all the optional attributes together here.  Notice that
319	 * xoap was set when the optional attribute bits were set above.
320	 */
321	if ((xvattr.xva_vattr.va_mask & AT_XVATTR) && xoap) {
322		if (XVA_ISSET_RTN(&xvattr, XAT_READONLY)) {
323			VERIFY(nvlist_add_boolean_value(nvlp,
324			    attr_to_name(F_READONLY),
325			    xoap->xoa_readonly) == 0);
326		}
327		if (XVA_ISSET_RTN(&xvattr, XAT_HIDDEN)) {
328			VERIFY(nvlist_add_boolean_value(nvlp,
329			    attr_to_name(F_HIDDEN),
330			    xoap->xoa_hidden) == 0);
331		}
332		if (XVA_ISSET_RTN(&xvattr, XAT_SYSTEM)) {
333			VERIFY(nvlist_add_boolean_value(nvlp,
334			    attr_to_name(F_SYSTEM),
335			    xoap->xoa_system) == 0);
336		}
337		if (XVA_ISSET_RTN(&xvattr, XAT_ARCHIVE)) {
338			VERIFY(nvlist_add_boolean_value(nvlp,
339			    attr_to_name(F_ARCHIVE),
340			    xoap->xoa_archive) == 0);
341		}
342		if (XVA_ISSET_RTN(&xvattr, XAT_IMMUTABLE)) {
343			VERIFY(nvlist_add_boolean_value(nvlp,
344			    attr_to_name(F_IMMUTABLE),
345			    xoap->xoa_immutable) == 0);
346		}
347		if (XVA_ISSET_RTN(&xvattr, XAT_NOUNLINK)) {
348			VERIFY(nvlist_add_boolean_value(nvlp,
349			    attr_to_name(F_NOUNLINK),
350			    xoap->xoa_nounlink) == 0);
351		}
352		if (XVA_ISSET_RTN(&xvattr, XAT_APPENDONLY)) {
353			VERIFY(nvlist_add_boolean_value(nvlp,
354			    attr_to_name(F_APPENDONLY),
355			    xoap->xoa_appendonly) == 0);
356		}
357		if (XVA_ISSET_RTN(&xvattr, XAT_NODUMP)) {
358			VERIFY(nvlist_add_boolean_value(nvlp,
359			    attr_to_name(F_NODUMP),
360			    xoap->xoa_nodump) == 0);
361		}
362		if (XVA_ISSET_RTN(&xvattr, XAT_OPAQUE)) {
363			VERIFY(nvlist_add_boolean_value(nvlp,
364			    attr_to_name(F_OPAQUE),
365			    xoap->xoa_opaque) == 0);
366		}
367		if (XVA_ISSET_RTN(&xvattr, XAT_AV_QUARANTINED)) {
368			VERIFY(nvlist_add_boolean_value(nvlp,
369			    attr_to_name(F_AV_QUARANTINED),
370			    xoap->xoa_av_quarantined) == 0);
371		}
372		if (XVA_ISSET_RTN(&xvattr, XAT_AV_MODIFIED)) {
373			VERIFY(nvlist_add_boolean_value(nvlp,
374			    attr_to_name(F_AV_MODIFIED),
375			    xoap->xoa_av_modified) == 0);
376		}
377		if (XVA_ISSET_RTN(&xvattr, XAT_AV_SCANSTAMP)) {
378			VERIFY(nvlist_add_uint8_array(nvlp,
379			    attr_to_name(F_AV_SCANSTAMP),
380			    xoap->xoa_av_scanstamp,
381			    sizeof (xoap->xoa_av_scanstamp)) == 0);
382		}
383		if (XVA_ISSET_RTN(&xvattr, XAT_CREATETIME)) {
384			VERIFY(nvlist_add_uint64_array(nvlp,
385			    attr_to_name(F_CRTIME),
386			    (uint64_t *)&(xoap->xoa_createtime),
387			    sizeof (xoap->xoa_createtime) /
388			    sizeof (uint64_t)) == 0);
389		}
390		if (XVA_ISSET_RTN(&xvattr, XAT_REPARSE)) {
391			VERIFY(nvlist_add_boolean_value(nvlp,
392			    attr_to_name(F_REPARSE),
393			    xoap->xoa_reparse) == 0);
394		}
395		if (XVA_ISSET_RTN(&xvattr, XAT_GEN)) {
396			VERIFY(nvlist_add_uint64(nvlp,
397			    attr_to_name(F_GEN),
398			    xoap->xoa_generation) == 0);
399		}
400		if (XVA_ISSET_RTN(&xvattr, XAT_OFFLINE)) {
401			VERIFY(nvlist_add_boolean_value(nvlp,
402			    attr_to_name(F_OFFLINE),
403			    xoap->xoa_offline) == 0);
404		}
405		if (XVA_ISSET_RTN(&xvattr, XAT_SPARSE)) {
406			VERIFY(nvlist_add_boolean_value(nvlp,
407			    attr_to_name(F_SPARSE),
408			    xoap->xoa_sparse) == 0);
409		}
410	}
411	/*
412	 * Check for optional ownersid/groupsid
413	 */
414
415	if (xvattr.xva_vattr.va_uid > MAXUID) {
416		nvlist_t *nvl_sid;
417
418		if (nvlist_alloc(&nvl_sid, NV_UNIQUE_NAME, KM_SLEEP))
419			return (ENOMEM);
420
421		if (kidmap_getsidbyuid(crgetzone(cr), xvattr.xva_vattr.va_uid,
422		    &domain, &rid) == 0) {
423			VERIFY(nvlist_add_string(nvl_sid,
424			    SID_DOMAIN, domain) == 0);
425			VERIFY(nvlist_add_uint32(nvl_sid, SID_RID, rid) == 0);
426			VERIFY(nvlist_add_nvlist(nvlp, attr_to_name(F_OWNERSID),
427			    nvl_sid) == 0);
428		}
429		nvlist_free(nvl_sid);
430	}
431	if (xvattr.xva_vattr.va_gid > MAXUID) {
432		nvlist_t *nvl_sid;
433
434		if (nvlist_alloc(&nvl_sid, NV_UNIQUE_NAME, KM_SLEEP))
435			return (ENOMEM);
436
437		if (kidmap_getsidbygid(crgetzone(cr), xvattr.xva_vattr.va_gid,
438		    &domain, &rid) == 0) {
439			VERIFY(nvlist_add_string(nvl_sid,
440			    SID_DOMAIN, domain) == 0);
441			VERIFY(nvlist_add_uint32(nvl_sid, SID_RID, rid) == 0);
442			VERIFY(nvlist_add_nvlist(nvlp, attr_to_name(F_GROUPSID),
443			    nvl_sid) == 0);
444		}
445		nvlist_free(nvl_sid);
446	}
447
448	return (0);
449}
450
451/*
452 * The size of a sysattr file is the size of the nvlist that will be
453 * returned by xattr_file_read().  A call to xattr_file_write() could
454 * change the size of that nvlist.  That size is not stored persistently
455 * so xattr_fill_nvlist() calls VOP_GETATTR so that it can be calculated.
456 */
457static int
458xattr_file_size(vnode_t *vp, xattr_view_t xattr_view, size_t *size,
459    cred_t *cr, caller_context_t *ct)
460{
461	nvlist_t *nvl;
462
463	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) {
464		return (ENOMEM);
465	}
466
467	if (xattr_fill_nvlist(vp, xattr_view, nvl, cr, ct)) {
468		nvlist_free(nvl);
469		return (EFAULT);
470	}
471
472	VERIFY(nvlist_size(nvl, size, NV_ENCODE_XDR) == 0);
473	nvlist_free(nvl);
474	return (0);
475}
476
477/* ARGSUSED */
478static int
479xattr_file_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
480    caller_context_t *ct)
481{
482	xattr_file_t *np = vp->v_data;
483	timestruc_t now;
484	size_t size;
485	int error;
486	vnode_t *pvp;
487	vattr_t pvattr;
488
489	vap->va_type = VREG;
490	vap->va_mode = MAKEIMODE(vap->va_type,
491	    (np->xattr_view == XATTR_VIEW_READONLY ? 0444 : 0644));
492	vap->va_nodeid = gfs_file_inode(vp);
493	vap->va_nlink = 1;
494	pvp = gfs_file_parent(vp);
495	(void) memset(&pvattr, 0, sizeof (pvattr));
496	pvattr.va_mask = AT_CTIME|AT_MTIME;
497	error = VOP_GETATTR(pvp, &pvattr, flags, cr, ct);
498	if (error) {
499		return (error);
500	}
501	vap->va_ctime = pvattr.va_ctime;
502	vap->va_mtime = pvattr.va_mtime;
503	gethrestime(&now);
504	vap->va_atime = now;
505	vap->va_uid = 0;
506	vap->va_gid = 0;
507	vap->va_rdev = 0;
508	vap->va_blksize = DEV_BSIZE;
509	vap->va_seq = 0;
510	vap->va_fsid = vp->v_vfsp->vfs_dev;
511	error = xattr_file_size(vp, np->xattr_view, &size, cr, ct);
512	vap->va_size = size;
513	vap->va_nblocks = howmany(vap->va_size, vap->va_blksize);
514	return (error);
515}
516
517/* ARGSUSED */
518static int
519xattr_file_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
520    caller_context_t *ct)
521{
522	xattr_file_t *np = vp->v_data;
523	xattr_view_t xattr_view = np->xattr_view;
524	char *buf;
525	size_t filesize;
526	nvlist_t *nvl;
527	int error;
528
529	/*
530	 * Validate file offset and fasttrack empty reads
531	 */
532	if (uiop->uio_loffset < (offset_t)0)
533		return (EINVAL);
534
535	if (uiop->uio_resid == 0)
536		return (0);
537
538	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP))
539		return (ENOMEM);
540
541	if (xattr_fill_nvlist(vp, xattr_view, nvl, cr, ct)) {
542		nvlist_free(nvl);
543		return (EFAULT);
544	}
545
546	VERIFY(nvlist_size(nvl, &filesize, NV_ENCODE_XDR) == 0);
547
548	if (uiop->uio_loffset >= filesize) {
549		nvlist_free(nvl);
550		return (0);
551	}
552
553	buf = kmem_alloc(filesize, KM_SLEEP);
554	VERIFY(nvlist_pack(nvl, &buf, &filesize, NV_ENCODE_XDR,
555	    KM_SLEEP) == 0);
556
557	error = uiomove((caddr_t)buf, filesize, UIO_READ, uiop);
558	kmem_free(buf, filesize);
559	nvlist_free(nvl);
560	return (error);
561}
562
563/* ARGSUSED */
564static int
565xattr_file_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
566    caller_context_t *ct)
567{
568	int error = 0;
569	char *buf;
570	char *domain;
571	uint32_t rid;
572	ssize_t size = uiop->uio_resid;
573	nvlist_t *nvp;
574	nvpair_t *pair = NULL;
575	vnode_t *ppvp;
576	xvattr_t xvattr;
577	xoptattr_t *xoap = NULL;	/* Pointer to optional attributes */
578
579	if (vfs_has_feature(vp->v_vfsp, VFSFT_XVATTR) == 0)
580		return (EINVAL);
581
582	/*
583	 * Validate file offset and size.
584	 */
585	if (uiop->uio_loffset < (offset_t)0)
586		return (EINVAL);
587
588	if (size == 0)
589		return (EINVAL);
590
591	xva_init(&xvattr);
592
593	if ((xoap = xva_getxoptattr(&xvattr)) == NULL) {
594		return (EINVAL);
595	}
596
597	/*
598	 * Copy and unpack the nvlist
599	 */
600	buf = kmem_alloc(size, KM_SLEEP);
601	if (uiomove((caddr_t)buf, size, UIO_WRITE, uiop)) {
602		return (EFAULT);
603	}
604
605	if (nvlist_unpack(buf, size, &nvp, KM_SLEEP) != 0) {
606		kmem_free(buf, size);
607		uiop->uio_resid = size;
608		return (EINVAL);
609	}
610	kmem_free(buf, size);
611
612	/*
613	 * Fasttrack empty writes (nvlist with no nvpairs)
614	 */
615	if (nvlist_next_nvpair(nvp, NULL) == 0)
616		return (0);
617
618	ppvp = gfs_file_parent(gfs_file_parent(vp));
619
620	while (pair = nvlist_next_nvpair(nvp, pair)) {
621		data_type_t type;
622		f_attr_t attr;
623		boolean_t value;
624		uint64_t *time, *times;
625		uint_t elem, nelems;
626		nvlist_t *nvp_sid;
627		uint8_t *scanstamp;
628
629		/*
630		 * Validate the name and type of each attribute.
631		 * Log any unknown names and continue.  This will
632		 * help if additional attributes are added later.
633		 */
634		type = nvpair_type(pair);
635		if ((attr = name_to_attr(nvpair_name(pair))) == F_ATTR_INVAL) {
636			cmn_err(CE_WARN, "Unknown attribute %s",
637			    nvpair_name(pair));
638			continue;
639		}
640
641		/*
642		 * Verify nvlist type matches required type and view is OK
643		 */
644
645		if (type != attr_to_data_type(attr) ||
646		    (attr_to_xattr_view(attr) == XATTR_VIEW_READONLY)) {
647			nvlist_free(nvp);
648			return (EINVAL);
649		}
650
651		/*
652		 * For OWNERSID/GROUPSID make sure the target
653		 * file system support ephemeral ID's
654		 */
655		if ((attr == F_OWNERSID || attr == F_GROUPSID) &&
656		    (!(vp->v_vfsp->vfs_flag & VFS_XID))) {
657			nvlist_free(nvp);
658			return (EINVAL);
659		}
660
661		/*
662		 * Retrieve data from nvpair
663		 */
664		switch (type) {
665		case DATA_TYPE_BOOLEAN_VALUE:
666			if (nvpair_value_boolean_value(pair, &value)) {
667				nvlist_free(nvp);
668				return (EINVAL);
669			}
670			break;
671		case DATA_TYPE_UINT64_ARRAY:
672			if (nvpair_value_uint64_array(pair, &times, &nelems)) {
673				nvlist_free(nvp);
674				return (EINVAL);
675			}
676			break;
677		case DATA_TYPE_NVLIST:
678			if (nvpair_value_nvlist(pair, &nvp_sid)) {
679				nvlist_free(nvp);
680				return (EINVAL);
681			}
682			break;
683		case DATA_TYPE_UINT8_ARRAY:
684			if (nvpair_value_uint8_array(pair,
685			    &scanstamp, &nelems)) {
686				nvlist_free(nvp);
687				return (EINVAL);
688			}
689			break;
690		default:
691			nvlist_free(nvp);
692			return (EINVAL);
693		}
694
695		switch (attr) {
696		/*
697		 * If we have several similar optional attributes to
698		 * process then we should do it all together here so that
699		 * xoap and the requested bitmap can be set in one place.
700		 */
701		case F_READONLY:
702			XVA_SET_REQ(&xvattr, XAT_READONLY);
703			xoap->xoa_readonly = value;
704			break;
705		case F_HIDDEN:
706			XVA_SET_REQ(&xvattr, XAT_HIDDEN);
707			xoap->xoa_hidden = value;
708			break;
709		case F_SYSTEM:
710			XVA_SET_REQ(&xvattr, XAT_SYSTEM);
711			xoap->xoa_system = value;
712			break;
713		case F_ARCHIVE:
714			XVA_SET_REQ(&xvattr, XAT_ARCHIVE);
715			xoap->xoa_archive = value;
716			break;
717		case F_IMMUTABLE:
718			XVA_SET_REQ(&xvattr, XAT_IMMUTABLE);
719			xoap->xoa_immutable = value;
720			break;
721		case F_NOUNLINK:
722			XVA_SET_REQ(&xvattr, XAT_NOUNLINK);
723			xoap->xoa_nounlink = value;
724			break;
725		case F_APPENDONLY:
726			XVA_SET_REQ(&xvattr, XAT_APPENDONLY);
727			xoap->xoa_appendonly = value;
728			break;
729		case F_NODUMP:
730			XVA_SET_REQ(&xvattr, XAT_NODUMP);
731			xoap->xoa_nodump = value;
732			break;
733		case F_AV_QUARANTINED:
734			XVA_SET_REQ(&xvattr, XAT_AV_QUARANTINED);
735			xoap->xoa_av_quarantined = value;
736			break;
737		case F_AV_MODIFIED:
738			XVA_SET_REQ(&xvattr, XAT_AV_MODIFIED);
739			xoap->xoa_av_modified = value;
740			break;
741		case F_CRTIME:
742			XVA_SET_REQ(&xvattr, XAT_CREATETIME);
743			time = (uint64_t *)&(xoap->xoa_createtime);
744			for (elem = 0; elem < nelems; elem++)
745				*time++ = times[elem];
746			break;
747		case F_OWNERSID:
748		case F_GROUPSID:
749			if (nvlist_lookup_string(nvp_sid, SID_DOMAIN,
750			    &domain) || nvlist_lookup_uint32(nvp_sid, SID_RID,
751			    &rid)) {
752				nvlist_free(nvp);
753				return (EINVAL);
754			}
755
756			/*
757			 * Now map domain+rid to ephemeral id's
758			 *
759			 * If mapping fails, then the uid/gid will
760			 * be set to UID_NOBODY by Winchester.
761			 */
762
763			if (attr == F_OWNERSID) {
764				(void) kidmap_getuidbysid(crgetzone(cr), domain,
765				    rid, &xvattr.xva_vattr.va_uid);
766				xvattr.xva_vattr.va_mask |= AT_UID;
767			} else {
768				(void) kidmap_getgidbysid(crgetzone(cr), domain,
769				    rid, &xvattr.xva_vattr.va_gid);
770				xvattr.xva_vattr.va_mask |= AT_GID;
771			}
772			break;
773		case F_AV_SCANSTAMP:
774			if (ppvp->v_type == VREG) {
775				XVA_SET_REQ(&xvattr, XAT_AV_SCANSTAMP);
776				(void) memcpy(xoap->xoa_av_scanstamp,
777				    scanstamp, nelems);
778			} else {
779				nvlist_free(nvp);
780				return (EINVAL);
781			}
782			break;
783		case F_REPARSE:
784			XVA_SET_REQ(&xvattr, XAT_REPARSE);
785			xoap->xoa_reparse = value;
786			break;
787		case F_OFFLINE:
788			XVA_SET_REQ(&xvattr, XAT_OFFLINE);
789			xoap->xoa_offline = value;
790			break;
791		case F_SPARSE:
792			XVA_SET_REQ(&xvattr, XAT_SPARSE);
793			xoap->xoa_sparse = value;
794			break;
795		default:
796			break;
797		}
798	}
799
800	ppvp = gfs_file_parent(gfs_file_parent(vp));
801	error = VOP_SETATTR(ppvp, &xvattr.xva_vattr, 0, cr, ct);
802	if (error)
803		uiop->uio_resid = size;
804
805	nvlist_free(nvp);
806	return (error);
807}
808
809static int
810xattr_file_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
811    caller_context_t *ct)
812{
813	switch (cmd) {
814	case _PC_XATTR_EXISTS:
815	case _PC_SATTR_ENABLED:
816	case _PC_SATTR_EXISTS:
817		*valp = 0;
818		return (0);
819	default:
820		return (fs_pathconf(vp, cmd, valp, cr, ct));
821	}
822}
823
824vnodeops_t *xattr_file_ops;
825
826static const fs_operation_def_t xattr_file_tops[] = {
827	{ VOPNAME_OPEN,		{ .vop_open = xattr_file_open }		},
828	{ VOPNAME_CLOSE,	{ .vop_close = xattr_file_close }	},
829	{ VOPNAME_READ,		{ .vop_read = xattr_file_read }		},
830	{ VOPNAME_WRITE,	{ .vop_write = xattr_file_write }	},
831	{ VOPNAME_IOCTL,	{ .error = fs_ioctl }			},
832	{ VOPNAME_GETATTR,	{ .vop_getattr = xattr_file_getattr }	},
833	{ VOPNAME_ACCESS,	{ .vop_access = xattr_file_access }	},
834	{ VOPNAME_READDIR,	{ .error = fs_notdir }			},
835	{ VOPNAME_SEEK,		{ .vop_seek = fs_seek }			},
836	{ VOPNAME_INACTIVE,	{ .vop_inactive = gfs_vop_inactive }	},
837	{ VOPNAME_FID,		{ .vop_fid = xattr_common_fid }		},
838	{ VOPNAME_PATHCONF,	{ .vop_pathconf = xattr_file_pathconf }	},
839	{ VOPNAME_PUTPAGE,	{ .error = fs_putpage }			},
840	{ VOPNAME_FSYNC,	{ .error = fs_fsync }			},
841	{ NULL }
842};
843
844vnode_t *
845xattr_mkfile(vnode_t *pvp, xattr_view_t xattr_view)
846{
847	vnode_t *vp;
848	xattr_file_t *np;
849
850	vp = gfs_file_create(sizeof (xattr_file_t), pvp, xattr_file_ops);
851	np = vp->v_data;
852	np->xattr_view = xattr_view;
853	vp->v_flag |= V_SYSATTR;
854	return (vp);
855}
856
857vnode_t *
858xattr_mkfile_ro(vnode_t *pvp)
859{
860	return (xattr_mkfile(pvp, XATTR_VIEW_READONLY));
861}
862
863vnode_t *
864xattr_mkfile_rw(vnode_t *pvp)
865{
866	return (xattr_mkfile(pvp, XATTR_VIEW_READWRITE));
867}
868
869vnodeops_t *xattr_dir_ops;
870
871static gfs_dirent_t xattr_dirents[] = {
872	{ VIEW_READONLY, xattr_mkfile_ro, GFS_CACHE_VNODE, },
873	{ VIEW_READWRITE, xattr_mkfile_rw, GFS_CACHE_VNODE, },
874	{ NULL },
875};
876
877#define	XATTRDIR_NENTS	((sizeof (xattr_dirents) / sizeof (gfs_dirent_t)) - 1)
878
879static int
880is_sattr_name(char *s)
881{
882	int i;
883
884	for (i = 0; i < XATTRDIR_NENTS; ++i) {
885		if (strcmp(s, xattr_dirents[i].gfse_name) == 0) {
886			return (1);
887		}
888	}
889	return (0);
890}
891
892/*
893 * Given the name of an extended attribute file, determine if there is a
894 * normalization conflict with a sysattr view name.
895 */
896int
897xattr_sysattr_casechk(char *s)
898{
899	int i;
900
901	for (i = 0; i < XATTRDIR_NENTS; ++i) {
902		if (strcasecmp(s, xattr_dirents[i].gfse_name) == 0)
903			return (1);
904	}
905	return (0);
906}
907
908static int
909xattr_copy(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm,
910    cred_t *cr, caller_context_t *ct)
911{
912	xvattr_t xvattr;
913	vnode_t *pdvp;
914	int error;
915
916	/*
917	 * Only copy system attrs if the views are the same
918	 */
919	if (strcmp(snm, tnm) != 0)
920		return (EINVAL);
921
922	xva_init(&xvattr);
923
924	XVA_SET_REQ(&xvattr, XAT_SYSTEM);
925	XVA_SET_REQ(&xvattr, XAT_READONLY);
926	XVA_SET_REQ(&xvattr, XAT_HIDDEN);
927	XVA_SET_REQ(&xvattr, XAT_ARCHIVE);
928	XVA_SET_REQ(&xvattr, XAT_APPENDONLY);
929	XVA_SET_REQ(&xvattr, XAT_NOUNLINK);
930	XVA_SET_REQ(&xvattr, XAT_IMMUTABLE);
931	XVA_SET_REQ(&xvattr, XAT_NODUMP);
932	XVA_SET_REQ(&xvattr, XAT_AV_MODIFIED);
933	XVA_SET_REQ(&xvattr, XAT_AV_QUARANTINED);
934	XVA_SET_REQ(&xvattr, XAT_CREATETIME);
935	XVA_SET_REQ(&xvattr, XAT_REPARSE);
936	XVA_SET_REQ(&xvattr, XAT_OFFLINE);
937	XVA_SET_REQ(&xvattr, XAT_SPARSE);
938
939	pdvp = gfs_file_parent(sdvp);
940	error = VOP_GETATTR(pdvp, &xvattr.xva_vattr, 0, cr, ct);
941	if (error)
942		return (error);
943
944	pdvp = gfs_file_parent(tdvp);
945	error = VOP_SETATTR(pdvp, &xvattr.xva_vattr, 0, cr, ct);
946	return (error);
947}
948
949/*
950 * Get the "real" XATTR directory associtated with the GFS XATTR directory.
951 * Note: This does NOT take any additional hold on the returned real_vp,
952 * because when this lookup succeeds we save the result in xattr_realvp
953 * and keep that hold until the GFS XATTR directory goes inactive.
954 */
955static int
956xattr_dir_realdir(vnode_t *gfs_dvp, vnode_t **ret_vpp, int flags,
957    cred_t *cr, caller_context_t *ct)
958{
959	struct pathname pn;
960	char *nm = "";
961	xattr_dir_t *xattr_dir;
962	vnode_t *realvp;
963	int error;
964
965	*ret_vpp = NULL;
966
967	/*
968	 * Usually, we've already found the underlying XATTR directory
969	 * during some previous lookup and stored it in xattr_realvp.
970	 */
971	mutex_enter(&gfs_dvp->v_lock);
972	xattr_dir = gfs_dvp->v_data;
973	realvp = xattr_dir->xattr_realvp;
974	mutex_exit(&gfs_dvp->v_lock);
975	if (realvp != NULL) {
976		*ret_vpp = realvp;
977		return (0);
978	}
979
980	/*
981	 * Lookup the XATTR dir in the underlying FS, relative to our
982	 * "parent", which is the real object for which this GFS XATTR
983	 * directory was created.  Set the LOOKUP_HAVE_SYSATTR_DIR flag
984	 * so that we don't get into an infinite loop with fop_lookup
985	 * calling back to xattr_dir_lookup.
986	 */
987	error = pn_get(nm, UIO_SYSSPACE, &pn);
988	if (error != 0)
989		return (error);
990	error = VOP_LOOKUP(gfs_file_parent(gfs_dvp), nm, &realvp, &pn,
991	    flags | LOOKUP_HAVE_SYSATTR_DIR, rootvp, cr, ct, NULL, NULL);
992	pn_free(&pn);
993	if (error != 0)
994		return (error);
995
996	/*
997	 * Have the real XATTR directory.  Save it -- but first
998	 * check whether we lost a race doing the lookup.
999	 */
1000	mutex_enter(&gfs_dvp->v_lock);
1001	xattr_dir = gfs_dvp->v_data;
1002	if (xattr_dir->xattr_realvp == NULL) {
1003		/*
1004		 * Note that the hold taken by the VOP_LOOKUP above is
1005		 * retained from here until xattr_dir_inactive.
1006		 */
1007		xattr_dir->xattr_realvp = realvp;
1008	} else {
1009		/* We lost the race. */
1010		VN_RELE(realvp);
1011		realvp = xattr_dir->xattr_realvp;
1012	}
1013	mutex_exit(&gfs_dvp->v_lock);
1014
1015	*ret_vpp = realvp;
1016	return (0);
1017}
1018
1019/* ARGSUSED */
1020static int
1021xattr_dir_open(vnode_t **vpp, int flags, cred_t *cr, caller_context_t *ct)
1022{
1023	vnode_t *realvp;
1024	int error;
1025
1026	if (flags & FWRITE) {
1027		return (EACCES);
1028	}
1029
1030	/*
1031	 * If there is a real extended attribute directory,
1032	 * let the underlying FS see the VOP_OPEN call;
1033	 * otherwise just return zero.
1034	 */
1035	error = xattr_dir_realdir(*vpp, &realvp, LOOKUP_XATTR, cr, ct);
1036	if (error == 0) {
1037		error = VOP_OPEN(&realvp, flags, cr, ct);
1038	} else {
1039		error = 0;
1040	}
1041
1042	return (error);
1043}
1044
1045/* ARGSUSED */
1046static int
1047xattr_dir_close(vnode_t *vp, int flags, int count, offset_t off, cred_t *cr,
1048    caller_context_t *ct)
1049{
1050	vnode_t *realvp;
1051	int error;
1052
1053	/*
1054	 * If there is a real extended attribute directory,
1055	 * let the underlying FS see the VOP_CLOSE call;
1056	 * otherwise just return zero.
1057	 */
1058	error = xattr_dir_realdir(vp, &realvp, LOOKUP_XATTR, cr, ct);
1059	if (error == 0) {
1060		error = VOP_CLOSE(realvp, flags, count, off, cr, ct);
1061	} else {
1062		error = 0;
1063	}
1064
1065	return (error);
1066}
1067
1068/*
1069 * Retrieve the attributes on an xattr directory.  If there is a "real"
1070 * xattr directory, use that.  Otherwise, get the attributes (represented
1071 * by PARENT_ATTRMASK) from the "parent" node and fill in the rest.  Note
1072 * that VOP_GETATTR() could turn off bits in the va_mask.
1073 */
1074
1075#define	PARENT_ATTRMASK	(AT_UID|AT_GID|AT_RDEV|AT_CTIME|AT_MTIME)
1076
1077/* ARGSUSED */
1078static int
1079xattr_dir_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
1080    caller_context_t *ct)
1081{
1082	timestruc_t now;
1083	vnode_t *pvp;
1084	int error;
1085
1086	error = xattr_dir_realdir(vp, &pvp, LOOKUP_XATTR, cr, ct);
1087	if (error == 0) {
1088		error = VOP_GETATTR(pvp, vap, 0, cr, ct);
1089		if (error) {
1090			return (error);
1091		}
1092		vap->va_nlink += XATTRDIR_NENTS;
1093		vap->va_size += XATTRDIR_NENTS;
1094		return (0);
1095	}
1096
1097	/*
1098	 * There is no real xattr directory.  Cobble together
1099	 * an entry using info from the parent object (if needed)
1100	 * plus information common to all xattrs.
1101	 */
1102	if (vap->va_mask & PARENT_ATTRMASK) {
1103		vattr_t pvattr;
1104		uint_t  off_bits;
1105
1106		pvp = gfs_file_parent(vp);
1107		(void) memset(&pvattr, 0, sizeof (pvattr));
1108		pvattr.va_mask = PARENT_ATTRMASK;
1109		error = VOP_GETATTR(pvp, &pvattr, 0, cr, ct);
1110		if (error) {
1111			return (error);
1112		}
1113
1114		/*
1115		 * VOP_GETATTR() might have turned off some bits in
1116		 * pvattr.va_mask.  This means that the underlying
1117		 * file system couldn't process those attributes.
1118		 * We need to make sure those bits get turned off
1119		 * in the vattr_t structure that gets passed back
1120		 * to the caller.  Figure out which bits were turned
1121		 * off (if any) then set pvattr.va_mask before it
1122		 * gets copied to the vattr_t that the caller sees.
1123		 */
1124		off_bits = (pvattr.va_mask ^ PARENT_ATTRMASK) & PARENT_ATTRMASK;
1125		pvattr.va_mask = vap->va_mask & ~off_bits;
1126		*vap = pvattr;
1127	}
1128
1129	vap->va_type = VDIR;
1130	vap->va_mode = MAKEIMODE(vap->va_type, S_ISVTX | 0777);
1131	vap->va_fsid = vp->v_vfsp->vfs_dev;
1132	vap->va_nodeid = gfs_file_inode(vp);
1133	vap->va_nlink = XATTRDIR_NENTS+2;
1134	vap->va_size = vap->va_nlink;
1135	gethrestime(&now);
1136	vap->va_atime = now;
1137	vap->va_blksize = 0;
1138	vap->va_nblocks = 0;
1139	vap->va_seq = 0;
1140	return (0);
1141}
1142
1143static int
1144xattr_dir_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
1145    caller_context_t *ct)
1146{
1147	vnode_t *realvp;
1148	int error;
1149
1150	/*
1151	 * If there is a real xattr directory, do the setattr there.
1152	 * Otherwise, just return success.  The GFS directory is transient,
1153	 * and any setattr changes can disappear anyway.
1154	 */
1155	error = xattr_dir_realdir(vp, &realvp, LOOKUP_XATTR, cr, ct);
1156	if (error == 0) {
1157		error = VOP_SETATTR(realvp, vap, flags, cr, ct);
1158	}
1159	if (error == ENOENT) {
1160		error = 0;
1161	}
1162	return (error);
1163}
1164
1165/* ARGSUSED */
1166static int
1167xattr_dir_access(vnode_t *vp, int mode, int flags, cred_t *cr,
1168    caller_context_t *ct)
1169{
1170	int error;
1171	vnode_t *realvp = NULL;
1172
1173	if (mode & VWRITE) {
1174		return (EACCES);
1175	}
1176
1177	error = xattr_dir_realdir(vp, &realvp, LOOKUP_XATTR, cr, ct);
1178	if ((error == ENOENT || error == EINVAL)) {
1179		/*
1180		 * These errors mean there's no "real" xattr dir.
1181		 * The GFS xattr dir always allows access.
1182		 */
1183		return (0);
1184	}
1185	if (error != 0) {
1186		/*
1187		 * The "real" xattr dir was not accessible.
1188		 */
1189		return (error);
1190	}
1191	/*
1192	 * We got the "real" xattr dir.
1193	 * Pass through the access call.
1194	 */
1195	error = VOP_ACCESS(realvp, mode, flags, cr, ct);
1196
1197	return (error);
1198}
1199
1200static int
1201xattr_dir_create(vnode_t *dvp, char *name, vattr_t *vap, vcexcl_t excl,
1202    int mode, vnode_t **vpp, cred_t *cr, int flag, caller_context_t *ct,
1203    vsecattr_t *vsecp)
1204{
1205	vnode_t *pvp;
1206	int error;
1207
1208	*vpp = NULL;
1209
1210	/*
1211	 * Don't allow creation of extended attributes with sysattr names.
1212	 */
1213	if (is_sattr_name(name)) {
1214		return (gfs_dir_lookup(dvp, name, vpp, cr, 0, NULL, NULL));
1215	}
1216
1217	error = xattr_dir_realdir(dvp, &pvp, LOOKUP_XATTR|CREATE_XATTR_DIR,
1218	    cr, ct);
1219	if (error == 0) {
1220		error = VOP_CREATE(pvp, name, vap, excl, mode, vpp, cr, flag,
1221		    ct, vsecp);
1222	}
1223	return (error);
1224}
1225
1226static int
1227xattr_dir_remove(vnode_t *dvp, char *name, cred_t *cr, caller_context_t *ct,
1228    int flags)
1229{
1230	vnode_t *pvp;
1231	int error;
1232
1233	if (is_sattr_name(name)) {
1234		return (EACCES);
1235	}
1236
1237	error = xattr_dir_realdir(dvp, &pvp, LOOKUP_XATTR, cr, ct);
1238	if (error == 0) {
1239		error = VOP_REMOVE(pvp, name, cr, ct, flags);
1240	}
1241	return (error);
1242}
1243
1244static int
1245xattr_dir_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr,
1246    caller_context_t *ct, int flags)
1247{
1248	vnode_t *pvp;
1249	int error;
1250
1251	if (svp->v_flag & V_SYSATTR) {
1252		return (EINVAL);
1253	}
1254
1255	error = xattr_dir_realdir(tdvp, &pvp, LOOKUP_XATTR, cr, ct);
1256	if (error == 0) {
1257		error = VOP_LINK(pvp, svp, name, cr, ct, flags);
1258	}
1259	return (error);
1260}
1261
1262static int
1263xattr_dir_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm,
1264    cred_t *cr, caller_context_t *ct, int flags)
1265{
1266	vnode_t *spvp, *tpvp;
1267	int error;
1268
1269	if (is_sattr_name(snm) || is_sattr_name(tnm))
1270		return (xattr_copy(sdvp, snm, tdvp, tnm, cr, ct));
1271	/*
1272	 * We know that sdvp is a GFS dir, or we wouldn't be here.
1273	 * Get the real unnamed directory.
1274	 */
1275	error = xattr_dir_realdir(sdvp, &spvp, LOOKUP_XATTR, cr, ct);
1276	if (error) {
1277		return (error);
1278	}
1279
1280	if (sdvp == tdvp) {
1281		/*
1282		 * If the source and target are the same GFS directory, the
1283		 * underlying unnamed source and target dir will be the same.
1284		 */
1285		tpvp = spvp;
1286	} else if (tdvp->v_flag & V_SYSATTR) {
1287		/*
1288		 * If the target dir is a different GFS directory,
1289		 * find its underlying unnamed dir.
1290		 */
1291		error = xattr_dir_realdir(tdvp, &tpvp, LOOKUP_XATTR, cr, ct);
1292		if (error) {
1293			return (error);
1294		}
1295	} else {
1296		/*
1297		 * Target dir is outside of GFS, pass it on through.
1298		 */
1299		tpvp = tdvp;
1300	}
1301
1302	error = VOP_RENAME(spvp, snm, tpvp, tnm, cr, ct, flags);
1303
1304	return (error);
1305}
1306
1307/*
1308 * readdir_xattr_casecmp: given a system attribute name, see if there
1309 * is a real xattr with the same normalized name.
1310 */
1311static int
1312readdir_xattr_casecmp(vnode_t *dvp, char *nm, cred_t *cr, caller_context_t *ct,
1313    int *eflags)
1314{
1315	int error;
1316	vnode_t *vp;
1317	struct pathname pn;
1318
1319	*eflags = 0;
1320
1321	error = pn_get(nm, UIO_SYSSPACE, &pn);
1322	if (error == 0) {
1323		error = VOP_LOOKUP(dvp, nm, &vp, &pn,
1324		    FIGNORECASE, rootvp, cr, ct, NULL, NULL);
1325		if (error == 0) {
1326			*eflags = ED_CASE_CONFLICT;
1327			VN_RELE(vp);
1328		} else if (error == ENOENT) {
1329			error = 0;
1330		}
1331		pn_free(&pn);
1332	}
1333
1334	return (error);
1335}
1336
1337static int
1338xattr_dir_readdir(vnode_t *dvp, uio_t *uiop, cred_t *cr, int *eofp,
1339    caller_context_t *ct, int flags)
1340{
1341	vnode_t *pvp;
1342	int error;
1343	int local_eof;
1344	int reset_off = 0;
1345	int has_xattrs = 0;
1346
1347	if (eofp == NULL) {
1348		eofp = &local_eof;
1349	}
1350	*eofp = 0;
1351
1352	/*
1353	 * See if there is a real extended attribute directory.
1354	 */
1355	error = xattr_dir_realdir(dvp, &pvp, LOOKUP_XATTR, cr, ct);
1356	if (error == 0) {
1357		has_xattrs = 1;
1358	}
1359
1360	/*
1361	 * Start by reading up the static entries.
1362	 */
1363	if (uiop->uio_loffset == 0) {
1364		ino64_t pino, ino;
1365		offset_t off;
1366		gfs_dir_t *dp = dvp->v_data;
1367		gfs_readdir_state_t gstate;
1368
1369		if (has_xattrs) {
1370			/*
1371			 * If there is a real xattr dir, skip . and ..
1372			 * in the GFS dir.  We'll pick them up below
1373			 * when we call into the underlying fs.
1374			 */
1375			uiop->uio_loffset = GFS_STATIC_ENTRY_OFFSET;
1376		}
1377		error = gfs_get_parent_ino(dvp, cr, ct, &pino, &ino);
1378		if (error == 0) {
1379			error = gfs_readdir_init(&gstate, dp->gfsd_maxlen, 1,
1380			    uiop, pino, ino, flags);
1381		}
1382		if (error) {
1383			return (error);
1384		}
1385
1386		while ((error = gfs_readdir_pred(&gstate, uiop, &off)) == 0 &&
1387		    !*eofp) {
1388			if (off >= 0 && off < dp->gfsd_nstatic) {
1389				int eflags;
1390
1391				/*
1392				 * Check to see if this sysattr set name has a
1393				 * case-insensitive conflict with a real xattr
1394				 * name.
1395				 */
1396				eflags = 0;
1397				if ((flags & V_RDDIR_ENTFLAGS) && has_xattrs) {
1398					error = readdir_xattr_casecmp(pvp,
1399					    dp->gfsd_static[off].gfse_name,
1400					    cr, ct, &eflags);
1401					if (error)
1402						break;
1403				}
1404				ino = dp->gfsd_inode(dvp, off);
1405
1406				error = gfs_readdir_emit(&gstate, uiop, off,
1407				    ino, dp->gfsd_static[off].gfse_name,
1408				    eflags);
1409				if (error)
1410					break;
1411			} else {
1412				*eofp = 1;
1413			}
1414		}
1415
1416		error = gfs_readdir_fini(&gstate, error, eofp, *eofp);
1417		if (error) {
1418			return (error);
1419		}
1420
1421		/*
1422		 * We must read all of the static entries in the first
1423		 * call.  Otherwise we won't know if uio_loffset in a
1424		 * subsequent call refers to the static entries or to those
1425		 * in an underlying fs.
1426		 */
1427		if (*eofp == 0)
1428			return (EINVAL);
1429		reset_off = 1;
1430	}
1431
1432	if (!has_xattrs) {
1433		*eofp = 1;
1434		return (0);
1435	}
1436
1437	*eofp = 0;
1438	if (reset_off) {
1439		uiop->uio_loffset = 0;
1440	}
1441	(void) VOP_RWLOCK(pvp, V_WRITELOCK_FALSE, NULL);
1442	error = VOP_READDIR(pvp, uiop, cr, eofp, ct, flags);
1443	VOP_RWUNLOCK(pvp, V_WRITELOCK_FALSE, NULL);
1444
1445	return (error);
1446}
1447
1448/*
1449 * Last reference on a (GFS) XATTR directory.
1450 *
1451 * If there's a real XATTR directory in the underlying FS, we will have
1452 * taken a hold on that directory in xattr_dir_realdir.  Now that the
1453 * last hold on the GFS directory is gone, it's time to release that
1454 * hold on the underlying XATTR directory.
1455 */
1456/* ARGSUSED */
1457static void
1458xattr_dir_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
1459{
1460	xattr_dir_t *dp;
1461
1462	dp = gfs_dir_inactive(vp);	/* will track v_count */
1463	if (dp != NULL) {
1464		/* vp was freed */
1465		if (dp->xattr_realvp != NULL)
1466			VN_RELE(dp->xattr_realvp);
1467
1468		kmem_free(dp, ((gfs_file_t *)dp)->gfs_size);
1469	}
1470}
1471
1472static int
1473xattr_dir_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
1474    caller_context_t *ct)
1475{
1476	switch (cmd) {
1477	case _PC_XATTR_EXISTS:
1478	case _PC_SATTR_ENABLED:
1479	case _PC_SATTR_EXISTS:
1480		*valp = 0;
1481		return (0);
1482	default:
1483		return (fs_pathconf(vp, cmd, valp, cr, ct));
1484	}
1485}
1486
1487/* ARGSUSED */
1488static int
1489xattr_dir_realvp(vnode_t *vp, vnode_t **realvp, caller_context_t *ct)
1490{
1491	int error;
1492
1493	error = xattr_dir_realdir(vp, realvp, LOOKUP_XATTR, kcred, NULL);
1494	return (error);
1495
1496}
1497
1498static const fs_operation_def_t xattr_dir_tops[] = {
1499	{ VOPNAME_OPEN,		{ .vop_open = xattr_dir_open }		},
1500	{ VOPNAME_CLOSE,	{ .vop_close = xattr_dir_close }	},
1501	{ VOPNAME_IOCTL,	{ .error = fs_inval }			},
1502	{ VOPNAME_GETATTR,	{ .vop_getattr = xattr_dir_getattr }	},
1503	{ VOPNAME_SETATTR,	{ .vop_setattr = xattr_dir_setattr }	},
1504	{ VOPNAME_ACCESS,	{ .vop_access = xattr_dir_access }	},
1505	{ VOPNAME_READDIR,	{ .vop_readdir = xattr_dir_readdir }	},
1506	{ VOPNAME_LOOKUP,	{ .vop_lookup = gfs_vop_lookup }	},
1507	{ VOPNAME_CREATE,	{ .vop_create = xattr_dir_create }	},
1508	{ VOPNAME_REMOVE,	{ .vop_remove = xattr_dir_remove }	},
1509	{ VOPNAME_LINK,		{ .vop_link = xattr_dir_link }		},
1510	{ VOPNAME_RENAME,	{ .vop_rename = xattr_dir_rename }	},
1511	{ VOPNAME_MKDIR,	{ .error = fs_inval }			},
1512	{ VOPNAME_SEEK,		{ .vop_seek = fs_seek }			},
1513	{ VOPNAME_INACTIVE,	{ .vop_inactive = xattr_dir_inactive }	},
1514	{ VOPNAME_FID,		{ .vop_fid = xattr_common_fid }		},
1515	{ VOPNAME_PATHCONF,	{ .vop_pathconf = xattr_dir_pathconf }	},
1516	{ VOPNAME_REALVP,	{ .vop_realvp = xattr_dir_realvp } },
1517	{ NULL, NULL }
1518};
1519
1520static gfs_opsvec_t xattr_opsvec[] = {
1521	{ "xattr dir", xattr_dir_tops, &xattr_dir_ops },
1522	{ "system attributes", xattr_file_tops, &xattr_file_ops },
1523	{ NULL, NULL, NULL }
1524};
1525
1526/*
1527 * Callback supporting lookup in a GFS XATTR directory.
1528 */
1529static int
1530xattr_lookup_cb(vnode_t *vp, const char *nm, vnode_t **vpp, ino64_t *inop,
1531    cred_t *cr, int flags, int *deflags, pathname_t *rpnp)
1532{
1533	vnode_t *pvp;
1534	struct pathname pn;
1535	int error;
1536
1537	*vpp = NULL;
1538	*inop = 0;
1539
1540	error = xattr_dir_realdir(vp, &pvp, LOOKUP_XATTR, cr, NULL);
1541
1542	/*
1543	 * Return ENOENT for EACCES requests during lookup.  Once an
1544	 * attribute create is attempted EACCES will be returned.
1545	 */
1546	if (error) {
1547		if (error == EACCES)
1548			return (ENOENT);
1549		return (error);
1550	}
1551
1552	error = pn_get((char *)nm, UIO_SYSSPACE, &pn);
1553	if (error == 0) {
1554		error = VOP_LOOKUP(pvp, (char *)nm, vpp, &pn, flags, rootvp,
1555		    cr, NULL, deflags, rpnp);
1556		pn_free(&pn);
1557	}
1558
1559	return (error);
1560}
1561
1562/* ARGSUSED */
1563static ino64_t
1564xattrdir_do_ino(vnode_t *vp, int index)
1565{
1566	/*
1567	 * We use index 0 for the directory fid.  Start
1568	 * the file numbering at 1.
1569	 */
1570	return ((ino64_t)index+1);
1571}
1572
1573void
1574xattr_init(void)
1575{
1576	VERIFY(gfs_make_opsvec(xattr_opsvec) == 0);
1577}
1578
1579/*
1580 * Get the XATTR dir for some file or directory.
1581 * See vnode.c: fop_lookup()
1582 *
1583 * Note this only gets the GFS XATTR directory.  We'll get the
1584 * real XATTR directory later, in xattr_dir_realdir.
1585 */
1586int
1587xattr_dir_lookup(vnode_t *dvp, vnode_t **vpp, int flags, cred_t *cr)
1588{
1589	int error = 0;
1590
1591	*vpp = NULL;
1592
1593	if (dvp->v_type != VDIR && dvp->v_type != VREG)
1594		return (EINVAL);
1595
1596	mutex_enter(&dvp->v_lock);
1597
1598	/*
1599	 * If we're already in sysattr space, don't allow creation
1600	 * of another level of sysattrs.
1601	 */
1602	if (dvp->v_flag & V_SYSATTR) {
1603		mutex_exit(&dvp->v_lock);
1604		return (EINVAL);
1605	}
1606
1607	if (dvp->v_xattrdir != NULL) {
1608		*vpp = dvp->v_xattrdir;
1609		VN_HOLD(*vpp);
1610	} else {
1611		ulong_t val;
1612		int xattrs_allowed = dvp->v_vfsp->vfs_flag & VFS_XATTR;
1613		int sysattrs_allowed = 1;
1614
1615		/*
1616		 * We have to drop the lock on dvp.  gfs_dir_create will
1617		 * grab it for a VN_HOLD.
1618		 */
1619		mutex_exit(&dvp->v_lock);
1620
1621		/*
1622		 * If dvp allows xattr creation, but not sysattr
1623		 * creation, return the real xattr dir vp. We can't
1624		 * use the vfs feature mask here because _PC_SATTR_ENABLED
1625		 * has vnode-level granularity (e.g. .zfs).
1626		 */
1627		error = VOP_PATHCONF(dvp, _PC_SATTR_ENABLED, &val, cr, NULL);
1628		if (error != 0 || val == 0)
1629			sysattrs_allowed = 0;
1630
1631		if (!xattrs_allowed && !sysattrs_allowed)
1632			return (EINVAL);
1633
1634		if (!sysattrs_allowed) {
1635			struct pathname pn;
1636			char *nm = "";
1637
1638			error = pn_get(nm, UIO_SYSSPACE, &pn);
1639			if (error)
1640				return (error);
1641			error = VOP_LOOKUP(dvp, nm, vpp, &pn,
1642			    flags|LOOKUP_HAVE_SYSATTR_DIR, rootvp, cr, NULL,
1643			    NULL, NULL);
1644			pn_free(&pn);
1645			return (error);
1646		}
1647
1648		/*
1649		 * Note that we act as if we were given CREATE_XATTR_DIR,
1650		 * but only for creation of the GFS directory.
1651		 */
1652		*vpp = gfs_dir_create(
1653		    sizeof (xattr_dir_t), dvp, xattr_dir_ops, xattr_dirents,
1654		    xattrdir_do_ino, MAXNAMELEN, NULL, xattr_lookup_cb);
1655		mutex_enter(&dvp->v_lock);
1656		if (dvp->v_xattrdir != NULL) {
1657			/*
1658			 * We lost the race to create the xattr dir.
1659			 * Destroy this one, use the winner.  We can't
1660			 * just call VN_RELE(*vpp), because the vnode
1661			 * is only partially initialized.
1662			 */
1663			gfs_dir_t *dp = (*vpp)->v_data;
1664
1665			ASSERT((*vpp)->v_count == 1);
1666			vn_free(*vpp);
1667			VN_RELE_LOCKED(dvp);
1668
1669			mutex_destroy(&dp->gfsd_lock);
1670			kmem_free(dp->gfsd_static,
1671			    dp->gfsd_nstatic * sizeof (gfs_dirent_t));
1672			kmem_free(dp, dp->gfsd_file.gfs_size);
1673
1674			/* dvp was held by winner in gfs_dir_create */
1675			*vpp = dvp->v_xattrdir;
1676			VN_HOLD(*vpp);
1677		} else {
1678			/* winner */
1679			(*vpp)->v_flag |= (V_XATTRDIR|V_SYSATTR);
1680			dvp->v_xattrdir = *vpp;
1681		}
1682	}
1683	mutex_exit(&dvp->v_lock);
1684
1685	return (error);
1686}
1687
1688int
1689xattr_dir_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp)
1690{
1691	int error;
1692	vnode_t *pvp, *dvp;
1693	xattr_fid_t *xfidp;
1694	struct pathname pn;
1695	char *nm;
1696	uint16_t orig_len;
1697
1698	*vpp = NULL;
1699
1700	if (fidp->fid_len < XATTR_FIDSZ)
1701		return (EINVAL);
1702
1703	xfidp = (xattr_fid_t *)fidp;
1704	orig_len = fidp->fid_len;
1705	fidp->fid_len = xfidp->parent_len;
1706
1707	error = VFS_VGET(vfsp, &pvp, fidp);
1708	fidp->fid_len = orig_len;
1709	if (error)
1710		return (error);
1711
1712	/*
1713	 * Start by getting the GFS sysattr directory.	We might need
1714	 * to recreate it during the VOP_LOOKUP.
1715	 */
1716	nm = "";
1717	error = pn_get(nm, UIO_SYSSPACE, &pn);
1718	if (error) {
1719		VN_RELE(pvp);
1720		return (EINVAL);
1721	}
1722
1723	error = VOP_LOOKUP(pvp, nm, &dvp, &pn, LOOKUP_XATTR|CREATE_XATTR_DIR,
1724	    rootvp, CRED(), NULL, NULL, NULL);
1725	pn_free(&pn);
1726	VN_RELE(pvp);
1727	if (error)
1728		return (error);
1729
1730	if (xfidp->dir_offset == 0) {
1731		/*
1732		 * If we were looking for the directory, we're done.
1733		 */
1734		*vpp = dvp;
1735		return (0);
1736	}
1737
1738	if (xfidp->dir_offset > XATTRDIR_NENTS) {
1739		VN_RELE(dvp);
1740		return (EINVAL);
1741	}
1742
1743	nm = xattr_dirents[xfidp->dir_offset - 1].gfse_name;
1744
1745	error = pn_get(nm, UIO_SYSSPACE, &pn);
1746	if (error) {
1747		VN_RELE(dvp);
1748		return (EINVAL);
1749	}
1750
1751	error = VOP_LOOKUP(dvp, nm, vpp, &pn, 0, rootvp, CRED(), NULL,
1752	    NULL, NULL);
1753
1754	pn_free(&pn);
1755	VN_RELE(dvp);
1756
1757	return (error);
1758}
1759