1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright (c) 2015 Joyent, Inc.  All rights reserved.
14  */
15 
16 /*
17  * bootfs vnode operations
18  */
19 
20 #include <sys/types.h>
21 #include <sys/uio.h>
22 #include <sys/sunddi.h>
23 #include <sys/errno.h>
24 #include <sys/vfs_opreg.h>
25 #include <sys/vnode.h>
26 #include <sys/mman.h>
27 #include <fs/fs_subr.h>
28 #include <sys/policy.h>
29 #include <sys/sysmacros.h>
30 #include <sys/dirent.h>
31 #include <sys/uio.h>
32 #include <vm/pvn.h>
33 #include <vm/hat.h>
34 #include <vm/seg_map.h>
35 #include <vm/seg_vn.h>
36 #include <sys/vmsystm.h>
37 
38 #include <sys/fs/bootfs_impl.h>
39 
40 struct vnodeops *bootfs_vnodeops;
41 
42 /*ARGSUSED*/
43 static int
bootfs_open(vnode_t ** vpp,int flag,cred_t * cr,caller_context_t * ct)44 bootfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
45 {
46 	return (0);
47 }
48 
49 /*ARGSUSED*/
50 static int
bootfs_close(vnode_t * vp,int flag,int count,offset_t offset,cred_t * cr,caller_context_t * ct)51 bootfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
52     caller_context_t *ct)
53 {
54 	return (0);
55 }
56 
57 /*ARGSUSED*/
58 static int
bootfs_read(vnode_t * vp,struct uio * uiop,int ioflag,cred_t * cr,caller_context_t * ct)59 bootfs_read(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr,
60     caller_context_t *ct)
61 {
62 	int err;
63 	ssize_t sres = uiop->uio_resid;
64 	bootfs_node_t *bnp = vp->v_data;
65 
66 	if (vp->v_type == VDIR)
67 		return (EISDIR);
68 
69 	if (vp->v_type != VREG)
70 		return (EINVAL);
71 
72 	if (uiop->uio_loffset < 0)
73 		return (EINVAL);
74 
75 	if (uiop->uio_loffset >= bnp->bvn_size)
76 		return (0);
77 
78 	err = 0;
79 	while (uiop->uio_resid != 0) {
80 		caddr_t base;
81 		long offset, frem;
82 		ulong_t poff, segoff;
83 		size_t bytes;
84 		int relerr;
85 
86 		offset = uiop->uio_loffset;
87 		poff = offset & PAGEOFFSET;
88 		bytes = MIN(PAGESIZE - poff, uiop->uio_resid);
89 
90 		frem = bnp->bvn_size - offset;
91 		if (frem <= 0) {
92 			err = 0;
93 			break;
94 		}
95 
96 		/* Don't read past EOF */
97 		bytes = MIN(bytes, frem);
98 
99 		/*
100 		 * Segmaps are likely larger than our page size, so make sure we
101 		 * have the proper offfset into the resulting segmap data.
102 		 */
103 		segoff = (offset & PAGEMASK) & MAXBOFFSET;
104 
105 		base = segmap_getmapflt(segkmap, vp, offset & MAXBMASK, bytes,
106 		    1, S_READ);
107 
108 		err = uiomove(base + segoff + poff, bytes, UIO_READ, uiop);
109 		relerr = segmap_release(segkmap, base, 0);
110 
111 		if (err == 0)
112 			err = relerr;
113 
114 		if (err != 0)
115 			break;
116 	}
117 
118 	/* Even if we had an error in a partial read, return success */
119 	if (uiop->uio_resid > sres)
120 		err = 0;
121 
122 	gethrestime(&bnp->bvn_attr.va_atime);
123 
124 	return (err);
125 }
126 
127 /*ARGSUSED*/
128 static int
bootfs_ioctl(vnode_t * vp,int cmd,intptr_t data,int flag,cred_t * cr,int * rvalp,caller_context_t * ct)129 bootfs_ioctl(vnode_t *vp, int cmd, intptr_t data, int flag,
130     cred_t *cr, int *rvalp, caller_context_t *ct)
131 {
132 	return (ENOTTY);
133 }
134 
135 /*ARGSUSED*/
136 static int
bootfs_getattr(vnode_t * vp,vattr_t * vap,int flags,cred_t * cr,caller_context_t * ct)137 bootfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
138     caller_context_t *ct)
139 {
140 	uint32_t mask;
141 	bootfs_node_t *bpn = (bootfs_node_t *)vp->v_data;
142 
143 	mask = vap->va_mask;
144 	bcopy(&bpn->bvn_attr, vap, sizeof (vattr_t));
145 	vap->va_mask = mask;
146 	return (0);
147 }
148 
149 /*ARGSUSED*/
150 static int
bootfs_access(vnode_t * vp,int mode,int flags,cred_t * cr,caller_context_t * ct)151 bootfs_access(vnode_t *vp, int mode, int flags, cred_t *cr,
152     caller_context_t *ct)
153 {
154 	int shift = 0;
155 	bootfs_node_t *bpn = (bootfs_node_t *)vp->v_data;
156 
157 	if (crgetuid(cr) != bpn->bvn_attr.va_uid) {
158 		shift += 3;
159 		if (groupmember(bpn->bvn_attr.va_gid, cr) == 0)
160 			shift += 3;
161 	}
162 
163 	return (secpolicy_vnode_access2(cr, vp, bpn->bvn_attr.va_uid,
164 	    bpn->bvn_attr.va_mode << shift, mode));
165 }
166 
167 /*ARGSUSED*/
168 static int
bootfs_lookup(vnode_t * dvp,char * nm,vnode_t ** vpp,struct pathname * pnp,int flags,vnode_t * rdir,cred_t * cr,caller_context_t * ct,int * direntflags,pathname_t * realpnp)169 bootfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp,
170     int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
171     int *direntflags, pathname_t *realpnp)
172 {
173 	avl_index_t where;
174 	bootfs_node_t sn, *bnp;
175 	bootfs_node_t *bpp = (bootfs_node_t *)dvp->v_data;
176 
177 	if (flags & LOOKUP_XATTR)
178 		return (EINVAL);
179 
180 	if (bpp->bvn_attr.va_type != VDIR)
181 		return (ENOTDIR);
182 
183 	if (*nm == '\0' || strcmp(nm, ".") == 0) {
184 		VN_HOLD(dvp);
185 		*vpp = dvp;
186 		return (0);
187 	}
188 
189 	if (strcmp(nm, "..") == 0) {
190 		VN_HOLD(bpp->bvn_parent->bvn_vnp);
191 		*vpp = bpp->bvn_parent->bvn_vnp;
192 		return (0);
193 	}
194 
195 	sn.bvn_name = nm;
196 	bnp = avl_find(&bpp->bvn_dir, &sn, &where);
197 	if (bnp == NULL)
198 		return (ENOENT);
199 
200 	VN_HOLD(bnp->bvn_vnp);
201 	*vpp = bnp->bvn_vnp;
202 	return (0);
203 }
204 
205 /*ARGSUSED*/
206 static int
bootfs_readdir(vnode_t * vp,struct uio * uiop,cred_t * cr,int * eofp,caller_context_t * ct,int flags)207 bootfs_readdir(vnode_t *vp, struct uio *uiop, cred_t *cr, int *eofp,
208     caller_context_t *ct, int flags)
209 {
210 	bootfs_node_t *bnp = (bootfs_node_t *)vp->v_data;
211 	dirent64_t *dp;
212 	void *buf;
213 	ulong_t bsize, brem;
214 	offset_t coff, roff;
215 	int dlen, ret;
216 	bootfs_node_t *dnp;
217 	boolean_t first = B_TRUE;
218 
219 	if (uiop->uio_loffset >= MAXOFF_T) {
220 		if (eofp != NULL)
221 			*eofp = 1;
222 		return (0);
223 	}
224 
225 	if (uiop->uio_iovcnt != 1)
226 		return (EINVAL);
227 
228 	if (!(uiop->uio_iov->iov_len > 0))
229 		return (EINVAL);
230 
231 	if (vp->v_type != VDIR)
232 		return (ENOTDIR);
233 
234 	roff = uiop->uio_loffset;
235 	coff = 0;
236 	brem = bsize = uiop->uio_iov->iov_len;
237 	buf = kmem_alloc(bsize, KM_SLEEP);
238 	dp = buf;
239 
240 	/*
241 	 * Recall that offsets here are done based on the name of the dirent
242 	 * excluding the null terminator. Therefore `.` is always at 0, `..` is
243 	 * always at 1, and then the first real dirent is at 3. This offset is
244 	 * what's actually stored when we update the offset in the structure.
245 	 */
246 	if (roff == 0) {
247 		dlen = DIRENT64_RECLEN(1);
248 		if (first == B_TRUE) {
249 			if (dlen > brem) {
250 				kmem_free(buf, bsize);
251 				return (EINVAL);
252 			}
253 			first = B_FALSE;
254 		}
255 		dp->d_ino = (ino64_t)bnp->bvn_attr.va_nodeid;
256 		dp->d_off = 0;
257 		dp->d_reclen = (ushort_t)dlen;
258 		(void) strncpy(dp->d_name, ".", DIRENT64_NAMELEN(dlen));
259 		dp = (struct dirent64 *)((uintptr_t)dp + dp->d_reclen);
260 		brem -= dlen;
261 	}
262 
263 	if (roff <= 1) {
264 		dlen = DIRENT64_RECLEN(2);
265 		if (first == B_TRUE) {
266 			if (dlen > brem) {
267 				kmem_free(buf, bsize);
268 				return (EINVAL);
269 			}
270 			first = B_FALSE;
271 		}
272 		dp->d_ino = (ino64_t)bnp->bvn_parent->bvn_attr.va_nodeid;
273 		dp->d_off = 1;
274 		dp->d_reclen = (ushort_t)dlen;
275 		(void) strncpy(dp->d_name, "..", DIRENT64_NAMELEN(dlen));
276 		dp = (struct dirent64 *)((uintptr_t)dp + dp->d_reclen);
277 		brem -= dlen;
278 	}
279 
280 	coff = 3;
281 	for (dnp = avl_first(&bnp->bvn_dir); dnp != NULL;
282 	    dnp = AVL_NEXT(&bnp->bvn_dir, dnp)) {
283 		size_t nlen = strlen(dnp->bvn_name);
284 
285 		if (roff > coff) {
286 			coff += nlen;
287 			continue;
288 		}
289 
290 		dlen = DIRENT64_RECLEN(nlen);
291 		if (dlen > brem) {
292 			if (first == B_TRUE) {
293 				kmem_free(buf, bsize);
294 				return (EINVAL);
295 			}
296 			break;
297 		}
298 		first = B_FALSE;
299 
300 		dp->d_ino = (ino64_t)dnp->bvn_attr.va_nodeid;
301 		dp->d_off = coff;
302 		dp->d_reclen = (ushort_t)dlen;
303 		(void) strncpy(dp->d_name, dnp->bvn_name,
304 		    DIRENT64_NAMELEN(dlen));
305 		dp = (struct dirent64 *)((uintptr_t)dp + dp->d_reclen);
306 		brem -= dlen;
307 		coff += nlen;
308 	}
309 
310 	ret = uiomove(buf, (bsize - brem), UIO_READ, uiop);
311 
312 	if (ret == 0) {
313 		if (dnp == NULL) {
314 			coff++;
315 			if (eofp != NULL)
316 				*eofp = 1;
317 		} else if (eofp != NULL) {
318 			*eofp = 0;
319 		}
320 		uiop->uio_loffset = coff;
321 	}
322 	gethrestime(&bnp->bvn_attr.va_atime);
323 	kmem_free(buf, bsize);
324 	return (ret);
325 }
326 
327 /*ARGSUSED*/
328 static void
bootfs_inactive(vnode_t * vp,cred_t * cr,caller_context_t * ct)329 bootfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
330 {
331 }
332 
333 /*ARGSUSED*/
334 static int
bootfs_rwlock(vnode_t * vp,int write_lock,caller_context_t * ct)335 bootfs_rwlock(vnode_t *vp, int write_lock, caller_context_t *ct)
336 {
337 	if (write_lock != 0)
338 		return (EINVAL);
339 	return (0);
340 }
341 
342 /*ARGSUSED*/
343 static void
bootfs_rwunlock(vnode_t * vp,int write_lock,caller_context_t * ct)344 bootfs_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ct)
345 {
346 }
347 
348 /*ARGSUSED*/
349 static int
bootfs_seek(vnode_t * vp,offset_t ooff,offset_t * noffp,caller_context_t * ct)350 bootfs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp,
351     caller_context_t *ct)
352 {
353 	bootfs_node_t *bnp = (bootfs_node_t *)vp->v_data;
354 	if (vp->v_type == VDIR)
355 		return (0);
356 	return ((*noffp < 0 || *noffp > bnp->bvn_size ? EINVAL : 0));
357 }
358 
359 /*
360  * We need to fill in a single page of a vnode's memory based on the actual data
361  * from the kernel. We'll use this node's sliding window into physical memory
362  * and update one page at a time.
363  */
364 /*ARGSUSED*/
365 static int
bootfs_getapage(vnode_t * vp,u_offset_t off,size_t len,uint_t * protp,page_t * pl[],size_t plsz,struct seg * seg,caddr_t addr,enum seg_rw rw,cred_t * cr)366 bootfs_getapage(vnode_t *vp, u_offset_t off, size_t len, uint_t *protp,
367     page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, enum seg_rw rw,
368     cred_t *cr)
369 {
370 	bootfs_node_t *bnp = vp->v_data;
371 	page_t *pp, *fpp;
372 	pfn_t pfn;
373 
374 	for (;;) {
375 		/* Easy case where the page exists */
376 		pp = page_lookup(vp, off, rw == S_CREATE ? SE_EXCL : SE_SHARED);
377 		if (pp != NULL) {
378 			if (pl != NULL) {
379 				pl[0] = pp;
380 				pl[1] = NULL;
381 			} else {
382 				page_unlock(pp);
383 			}
384 			return (0);
385 		}
386 
387 		pp = page_create_va(vp, off, PAGESIZE, PG_EXCL | PG_WAIT, seg,
388 		    addr);
389 
390 		/*
391 		 * If we didn't get the page, that means someone else beat us to
392 		 * creating this so we need to try again.
393 		 */
394 		if (pp != NULL)
395 			break;
396 	}
397 
398 	pfn = btop((bnp->bvn_addr + off) & PAGEMASK);
399 	fpp = page_numtopp_nolock(pfn);
400 
401 	if (ppcopy(fpp, pp) == 0) {
402 		pvn_read_done(pp, B_ERROR);
403 		return (EIO);
404 	}
405 
406 	if (pl != NULL) {
407 		pvn_plist_init(pp, pl, plsz, off, PAGESIZE, rw);
408 	} else {
409 		pvn_io_done(pp);
410 	}
411 
412 	return (0);
413 }
414 
415 /*ARGSUSED*/
416 static int
bootfs_getpage(vnode_t * vp,offset_t off,size_t len,uint_t * protp,page_t * pl[],size_t plsz,struct seg * seg,caddr_t addr,enum seg_rw rw,cred_t * cr,caller_context_t * ct)417 bootfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp,
418     page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, enum seg_rw rw,
419     cred_t *cr, caller_context_t *ct)
420 {
421 	int err;
422 	bootfs_node_t *bnp = vp->v_data;
423 
424 	if (off + len > bnp->bvn_size + PAGEOFFSET)
425 		return (EFAULT);
426 
427 	if (protp != NULL)
428 		*protp = PROT_ALL;
429 
430 	if (len <= PAGESIZE)
431 		err = bootfs_getapage(vp, (u_offset_t)off, len, protp, pl,
432 		    plsz, seg, addr, rw, cr);
433 	else
434 		err = pvn_getpages(bootfs_getapage, vp, (u_offset_t)off, len,
435 		    protp, pl, plsz, seg, addr, rw, cr);
436 
437 	return (err);
438 }
439 
440 /*ARGSUSED*/
441 static int
bootfs_map(vnode_t * vp,offset_t off,struct as * as,caddr_t * addrp,size_t len,uchar_t prot,uchar_t maxprot,uint_t flags,cred_t * cr,caller_context_t * ct)442 bootfs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp,
443     size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr,
444     caller_context_t *ct)
445 {
446 	int ret;
447 	segvn_crargs_t vn_a;
448 
449 #ifdef	_ILP32
450 	if (len > MAXOFF_T)
451 		return (ENOMEM);
452 #endif
453 
454 	if (vp->v_flag & VNOMAP)
455 		return (ENOSYS);
456 
457 	if (off < 0 || off > MAXOFFSET_T - off)
458 		return (ENXIO);
459 
460 	if (vp->v_type != VREG)
461 		return (ENODEV);
462 
463 	if ((prot & PROT_WRITE) && (flags & MAP_SHARED))
464 		return (ENOTSUP);
465 
466 	as_rangelock(as);
467 	ret = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
468 	if (ret != 0) {
469 		as_rangeunlock(as);
470 		return (ret);
471 	}
472 
473 	vn_a.vp = vp;
474 	vn_a.offset = (u_offset_t)off;
475 	vn_a.type = flags & MAP_TYPE;
476 	vn_a.prot = prot;
477 	vn_a.maxprot = maxprot;
478 	vn_a.cred = cr;
479 	vn_a.amp = NULL;
480 	vn_a.flags = flags & ~MAP_TYPE;
481 	vn_a.szc = 0;
482 	vn_a.lgrp_mem_policy_flags = 0;
483 
484 	ret = as_map(as, *addrp, len, segvn_create, &vn_a);
485 
486 	as_rangeunlock(as);
487 	return (ret);
488 
489 }
490 
491 /*ARGSUSED*/
492 static int
bootfs_addmap(vnode_t * vp,offset_t off,struct as * as,caddr_t addr,size_t len,uchar_t prot,uchar_t maxprot,uint_t flags,cred_t * cr,caller_context_t * ct)493 bootfs_addmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
494     size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr,
495     caller_context_t *ct)
496 {
497 	return (0);
498 }
499 
500 /*ARGSUSED*/
501 static int
bootfs_delmap(vnode_t * vp,offset_t off,struct as * as,caddr_t addr,size_t len,uint_t prot,uint_t maxprot,uint_t flags,cred_t * cr,caller_context_t * ct)502 bootfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
503     size_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr,
504     caller_context_t *ct)
505 {
506 	return (0);
507 }
508 
509 static int
bootfs_pathconf(vnode_t * vp,int cmd,ulong_t * valp,cred_t * cr,caller_context_t * ct)510 bootfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
511     caller_context_t *ct)
512 {
513 	int ret;
514 
515 	switch (cmd) {
516 	case _PC_TIMESTAMP_RESOLUTION:
517 		*valp = 1L;
518 		ret = 0;
519 		break;
520 	default:
521 		ret = fs_pathconf(vp, cmd, valp, cr, ct);
522 	}
523 
524 	return (ret);
525 }
526 
527 const fs_operation_def_t bootfs_vnodeops_template[] = {
528 	VOPNAME_OPEN,		{ .vop_open = bootfs_open },
529 	VOPNAME_CLOSE,		{ .vop_close = bootfs_close },
530 	VOPNAME_READ,		{ .vop_read = bootfs_read },
531 	VOPNAME_IOCTL,		{ .vop_ioctl = bootfs_ioctl },
532 	VOPNAME_GETATTR,	{ .vop_getattr = bootfs_getattr },
533 	VOPNAME_ACCESS,		{ .vop_access = bootfs_access },
534 	VOPNAME_LOOKUP,		{ .vop_lookup = bootfs_lookup },
535 	VOPNAME_READDIR,	{ .vop_readdir = bootfs_readdir },
536 	VOPNAME_INACTIVE,	{ .vop_inactive = bootfs_inactive },
537 	VOPNAME_RWLOCK,		{ .vop_rwlock = bootfs_rwlock },
538 	VOPNAME_RWUNLOCK,	{ .vop_rwunlock = bootfs_rwunlock },
539 	VOPNAME_SEEK,		{ .vop_seek = bootfs_seek },
540 	VOPNAME_GETPAGE,	{ .vop_getpage = bootfs_getpage },
541 	VOPNAME_MAP,		{ .vop_map = bootfs_map },
542 	VOPNAME_ADDMAP,		{ .vop_addmap = bootfs_addmap },
543 	VOPNAME_DELMAP,		{ .vop_delmap = bootfs_delmap },
544 	VOPNAME_PATHCONF,	{ .vop_pathconf = bootfs_pathconf },
545 	VOPNAME_VNEVENT,	{ .vop_vnevent = fs_vnevent_nosupport },
546 	NULL,			NULL
547 };
548