1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*
28 * Copyright 2018 Nexenta Systems, Inc.
29 */
30
31#include <fs/fs_subr.h>
32
33#include <sys/errno.h>
34#include <sys/file.h>
35#include <sys/kmem.h>
36#include <sys/kobj.h>
37#include <sys/cmn_err.h>
38#include <sys/stat.h>
39#include <sys/systm.h>
40#include <sys/sysmacros.h>
41#include <sys/atomic.h>
42#include <sys/vfs.h>
43#include <sys/vfs_opreg.h>
44
45#include <sharefs/sharefs.h>
46
47/*
48 * sharefs_snap_create: create a large character buffer with
49 * the shares enumerated.
50 */
51static int
52sharefs_snap_create(sharetab_globals_t *sg, shnode_t *sft)
53{
54	sharetab_t		*sht;
55	share_t			*sh;
56	size_t			sWritten = 0;
57	int			iCount = 0;
58	char			*buf;
59
60	rw_enter(&sg->sharefs_lock, RW_WRITER);
61	rw_enter(&sg->sharetab_lock, RW_READER);
62
63	if (sft->sharefs_snap) {
64		/*
65		 * Nothing has changed, so no need to grab a new copy!
66		 */
67		if (sft->sharefs_generation == sg->sharetab_generation) {
68			rw_exit(&sg->sharetab_lock);
69			rw_exit(&sg->sharefs_lock);
70			return (0);
71		}
72
73		ASSERT(sft->sharefs_size != 0);
74		kmem_free(sft->sharefs_snap, sft->sharefs_size + 1);
75		sft->sharefs_snap = NULL;
76	}
77
78	sft->sharefs_size = sg->sharetab_size;
79	sft->sharefs_count = sg->sharetab_count;
80
81	if (sft->sharefs_size == 0) {
82		rw_exit(&sg->sharetab_lock);
83		rw_exit(&sg->sharefs_lock);
84		return (0);
85	}
86
87	sft->sharefs_snap = kmem_zalloc(sft->sharefs_size + 1, KM_SLEEP);
88
89	buf = sft->sharefs_snap;
90
91	/*
92	 * Walk the Sharetab, dumping each entry.
93	 */
94	for (sht = sg->sharefs_sharetab; sht != NULL; sht = sht->s_next) {
95		int	i;
96
97		for (i = 0; i < SHARETAB_HASHES; i++) {
98			for (sh = sht->s_buckets[i].ssh_sh;
99			    sh != NULL;
100			    sh = sh->sh_next) {
101				int	n;
102
103				if ((sWritten + sh->sh_size) >
104				    sft->sharefs_size) {
105					goto error_fault;
106				}
107
108				/*
109				 * Note that sh->sh_size accounts
110				 * for the field seperators.
111				 * We need to add one for the EOL
112				 * marker. And we should note that
113				 * the space is accounted for in
114				 * each share by the EOS marker.
115				 */
116				n = snprintf(&buf[sWritten],
117				    sh->sh_size + 1,
118				    "%s\t%s\t%s\t%s\t%s\n",
119				    sh->sh_path,
120				    sh->sh_res,
121				    sh->sh_fstype,
122				    sh->sh_opts,
123				    sh->sh_descr);
124
125				if (n != sh->sh_size) {
126					goto error_fault;
127				}
128
129				sWritten += n;
130				iCount++;
131			}
132		}
133	}
134
135	/*
136	 * We want to record the generation number and
137	 * mtime inside this snapshot.
138	 */
139	gethrestime(&sg->sharetab_snap_time);
140	sft->sharefs_snap_time = sg->sharetab_snap_time;
141	sft->sharefs_generation = sg->sharetab_generation;
142
143	ASSERT(iCount == sft->sharefs_count);
144
145	rw_exit(&sg->sharetab_lock);
146	rw_exit(&sg->sharefs_lock);
147	return (0);
148
149error_fault:
150
151	kmem_free(sft->sharefs_snap, sft->sharefs_size + 1);
152	sft->sharefs_size = 0;
153	sft->sharefs_count = 0;
154	sft->sharefs_snap = NULL;
155	rw_exit(&sg->sharetab_lock);
156	rw_exit(&sg->sharefs_lock);
157
158	return (EFAULT);
159}
160
161/* ARGSUSED */
162static int
163sharefs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
164    caller_context_t *ct)
165{
166	timestruc_t	now;
167	shnode_t	*sft = VTOSH(vp);
168	sharetab_globals_t *sg = sharetab_get_globals(vp->v_vfsp->vfs_zone);
169
170	vap->va_type = VREG;
171	vap->va_mode = S_IRUSR | S_IRGRP | S_IROTH;
172	vap->va_nodeid = SHAREFS_INO_FILE;
173	vap->va_nlink = 1;
174
175	rw_enter(&sg->sharefs_lock, RW_READER);
176
177	/*
178	 * If we get asked about a snapped vnode, then
179	 * we must report the data in that vnode.
180	 *
181	 * Else we report what is currently in the
182	 * sharetab.
183	 */
184	if (sft->sharefs_real_vp) {
185		rw_enter(&sg->sharetab_lock, RW_READER);
186		vap->va_size = sg->sharetab_size;
187		vap->va_mtime = sg->sharetab_mtime;
188		rw_exit(&sg->sharetab_lock);
189	} else {
190		vap->va_size = sft->sharefs_size;
191		vap->va_mtime = sft->sharefs_snap_time;
192	}
193	rw_exit(&sg->sharefs_lock);
194
195	gethrestime(&now);
196	vap->va_atime = vap->va_ctime = now;
197
198	vap->va_uid = 0;
199	vap->va_gid = 0;
200	vap->va_rdev = 0;
201	vap->va_blksize = DEV_BSIZE;
202	vap->va_nblocks = howmany(vap->va_size, vap->va_blksize);
203	vap->va_seq = 0;
204	vap->va_fsid = vp->v_vfsp->vfs_dev;
205
206	return (0);
207}
208
209/* ARGSUSED */
210static int
211sharefs_access(vnode_t *vp, int mode, int flags, cred_t *cr,
212    caller_context_t *ct)
213{
214	if (mode & (VWRITE|VEXEC))
215		return (EROFS);
216
217	return (0);
218}
219
220/* ARGSUSED */
221int
222sharefs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
223{
224	vnode_t		*vp;
225	vnode_t		*ovp = *vpp;
226	shnode_t	*sft;
227	int		error = 0;
228
229	if (flag & FWRITE)
230		return (EINVAL);
231
232	/*
233	 * Create a new sharefs vnode for each operation. In order to
234	 * avoid locks, we create a snapshot which can not change during
235	 * reads.
236	 */
237	vp = gfs_file_create(sizeof (shnode_t), NULL, sharefs_ops_data);
238
239	((gfs_file_t *)vp->v_data)->gfs_ino = SHAREFS_INO_FILE;
240
241	/*
242	 * Hold the parent!
243	 */
244	VFS_HOLD(ovp->v_vfsp);
245
246	VN_SET_VFS_TYPE_DEV(vp, ovp->v_vfsp, VREG, 0);
247
248	vp->v_flag |= VROOT | VNOCACHE | VNOMAP | VNOSWAP | VNOMOUNT;
249
250	*vpp = vp;
251	VN_RELE(ovp);
252
253	sft = VTOSH(vp);
254
255	/*
256	 * No need for the lock, no other thread can be accessing
257	 * this data structure.
258	 */
259	atomic_inc_32(&sft->sharefs_refs);
260	sft->sharefs_real_vp = 0;
261
262	/*
263	 * Since the sharetab could easily change on us whilst we
264	 * are dumping an extremely huge sharetab, we make a copy
265	 * of it here and use it to dump instead.
266	 */
267	error = sharefs_snap_create(sharetab_get_globals(vp->v_vfsp->vfs_zone),
268	    sft);
269
270	return (error);
271}
272
273/* ARGSUSED */
274int
275sharefs_close(vnode_t *vp, int flag, int count,
276    offset_t off, cred_t *cr, caller_context_t *ct)
277{
278	shnode_t	*sft = VTOSH(vp);
279	sharetab_globals_t *sg = sharetab_get_globals(vp->v_vfsp->vfs_zone);
280
281	if (count > 1)
282		return (0);
283
284	rw_enter(&sg->sharefs_lock, RW_WRITER);
285	if (vp->v_count == 1) {
286		if (sft->sharefs_snap != NULL) {
287			kmem_free(sft->sharefs_snap, sft->sharefs_size + 1);
288			sft->sharefs_size = 0;
289			sft->sharefs_snap = NULL;
290			sft->sharefs_generation = 0;
291		}
292	}
293	atomic_dec_32(&sft->sharefs_refs);
294	rw_exit(&sg->sharefs_lock);
295
296	return (0);
297}
298
299/* ARGSUSED */
300static int
301sharefs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr,
302    caller_context_t *ct)
303{
304	shnode_t	*sft = VTOSH(vp);
305	off_t		off = uio->uio_offset;
306	size_t		len = uio->uio_resid;
307	int		error = 0;
308	sharetab_globals_t *sg = sharetab_get_globals(vp->v_vfsp->vfs_zone);
309
310	rw_enter(&sg->sharefs_lock, RW_READER);
311
312	/*
313	 * First check to see if we need to grab a new snapshot.
314	 */
315	if (off == (off_t)0) {
316		rw_exit(&sg->sharefs_lock);
317		error = sharefs_snap_create(sg, sft);
318		if (error) {
319			return (EFAULT);
320		}
321		rw_enter(&sg->sharefs_lock, RW_READER);
322	}
323
324	/* LINTED */
325	if (len <= 0 || off >= sft->sharefs_size) {
326		rw_exit(&sg->sharefs_lock);
327		return (error);
328	}
329
330	if ((size_t)(off + len) > sft->sharefs_size)
331		len = sft->sharefs_size - off;
332
333	if (off < 0 || len > sft->sharefs_size) {
334		rw_exit(&sg->sharefs_lock);
335		return (EFAULT);
336	}
337
338	if (len != 0) {
339		error = uiomove(sft->sharefs_snap + off,
340		    len, UIO_READ, uio);
341	}
342
343	rw_exit(&sg->sharefs_lock);
344	return (error);
345}
346
347/* ARGSUSED */
348static void
349sharefs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *tx)
350{
351	gfs_file_t	*fp = vp->v_data;
352	shnode_t	*sft;
353	sharetab_globals_t *sg = sharetab_get_globals(vp->v_vfsp->vfs_zone);
354
355	sft = (shnode_t *)gfs_file_inactive(vp);
356	if (sft) {
357		rw_enter(&sg->sharefs_lock, RW_WRITER);
358		if (sft->sharefs_snap != NULL) {
359			kmem_free(sft->sharefs_snap, sft->sharefs_size + 1);
360		}
361
362		kmem_free(sft, fp->gfs_size);
363		rw_exit(&sg->sharefs_lock);
364	}
365}
366
367vnode_t *
368sharefs_create_root_file(vfs_t *vfsp)
369{
370	vnode_t		*vp;
371	shnode_t	*sft;
372
373	vp = gfs_root_create_file(sizeof (shnode_t),
374	    vfsp, sharefs_ops_data, SHAREFS_INO_FILE);
375
376	sft = VTOSH(vp);
377
378	sft->sharefs_real_vp = 1;
379
380	return (vp);
381}
382
383const fs_operation_def_t sharefs_tops_data[] = {
384	{ VOPNAME_OPEN,		{ .vop_open = sharefs_open } },
385	{ VOPNAME_CLOSE,	{ .vop_close = sharefs_close } },
386	{ VOPNAME_IOCTL,	{ .error = fs_inval } },
387	{ VOPNAME_GETATTR,	{ .vop_getattr = sharefs_getattr } },
388	{ VOPNAME_ACCESS,	{ .vop_access = sharefs_access } },
389	{ VOPNAME_INACTIVE,	{ .vop_inactive = sharefs_inactive } },
390	{ VOPNAME_READ,		{ .vop_read = sharefs_read } },
391	{ VOPNAME_SEEK,		{ .vop_seek = fs_seek } },
392	{ NULL }
393};
394