1 /*
2 * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
3 * All rights reserved
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted providing that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
18 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 * POSSIBILITY OF SUCH DAMAGE.
25 *
26 * Copyright 2021 Joyent, Inc.
27 */
28
29 /*
30 * Based on libixp code: �2007-2010 Kris Maglione <maglione.k at Gmail>
31 */
32
33 #include <stdlib.h>
34 #include <string.h>
35 #include <unistd.h>
36 #include <stdbool.h>
37 #include <fcntl.h>
38 #include <errno.h>
39 #include <assert.h>
40 #include <sys/types.h>
41 #include <sys/stat.h>
42 #include <sys/mount.h>
43 #include <sys/param.h>
44 #include <sys/queue.h>
45 #include <sys/socket.h>
46 #include <sys/un.h>
47 #include <dirent.h>
48 #include <pwd.h>
49 #include <grp.h>
50 #include <libgen.h>
51 #include <pthread.h>
52 #include "../lib9p.h"
53 #include "../lib9p_impl.h"
54 #include "../fid.h"
55 #include "../log.h"
56 #include "../rfuncs.h"
57 #include "../genacl.h"
58 #include "backend.h"
59 #include "fs.h"
60
61 #if defined(WITH_CASPER)
62 #include <libcasper.h>
63 #include <casper/cap_pwd.h>
64 #include <casper/cap_grp.h>
65 #endif
66
67 #if defined(__FreeBSD__)
68 #include <sys/param.h>
69 #if __FreeBSD_version >= 1000000
70 #define HAVE_BINDAT
71 #endif
72 #endif
73
74 #if defined(__FreeBSD__)
75 #define HAVE_BIRTHTIME
76 #endif
77
78 #if defined(__APPLE__)
79 #include <sys/syscall.h>
80 #include "Availability.h"
81 #define ACL_TYPE_NFS4 ACL_TYPE_EXTENDED
82 #endif
83
84 #if defined (__illumos__)
85 #include <sys/sysmacros.h>
86 #include <sys/statvfs.h>
87 #include <sys/un.h>
88 #include <attr.h>
89 #include <sys/nvpair.h>
90 #endif
91
92 struct fs_softc {
93 int fs_rootfd;
94 bool fs_readonly;
95 #if defined(__illumos__)
96 /*
97 * On illumos, the file creation time (birthtime) is stored (on
98 * supported filesystems -- i.e. zfs) in an extended attribute.
99 * If for some reason the fs doesn't support extended attributes,
100 * we skip trying to read the creation time.
101 */
102 bool fs_hasxattr;
103 #endif
104 #if defined(WITH_CASPER)
105 cap_channel_t *fs_cappwd;
106 cap_channel_t *fs_capgrp;
107 #endif
108 };
109
110 struct fs_fid {
111 DIR *ff_dir;
112 int ff_dirfd;
113 int ff_fd;
114 int ff_flags;
115 char *ff_name;
116 struct fs_authinfo *ff_ai;
117 pthread_mutex_t ff_mtx;
118 struct l9p_acl *ff_acl; /* cached ACL if any */
119 };
120
121 #if defined(__FreeBSD__)
122 # define STATFS_FSID(_s) \
123 (((uint64_t)(_s)->f_fsid.val[0] << 32) | (uint64_t)(_s)->f_fsid.val[1])
124
125 # define STAT_ATIME(_s) ((_s)->st_atimespec)
126 # define STAT_MTIME(_s) ((_s)->st_mtimespec)
127 # define STAT_CTIME(_s) ((_s)->st_ctimespec)
128 #elif defined (__illumos__)
129 # define STATFS_FSID(_s) ((_s)->f_fsid)
130
131 # define STAT_ATIME(_s) ((_s)->st_atim)
132 # define STAT_MTIME(_s) ((_s)->st_mtim)
133 # define STAT_CTIME(_s) ((_s)->st_ctim)
134 #else
135 #error "Port me"
136 #endif
137
138 #define FF_NO_NFSV4_ACL 0x01 /* don't go looking for NFSv4 ACLs */
139 /* FF_NO_POSIX_ACL 0x02 -- not yet */
140
141 /*
142 * Our authinfo consists of:
143 *
144 * - a reference count
145 * - a uid
146 * - a gid-set
147 *
148 * The "default" gid is the first gid in the git-set, provided the
149 * set size is at least 1. The set-size may be zero, though.
150 *
151 * Adjustments to the ref-count must be atomic, once it's shared.
152 * It would be nice to use C11 atomics here but they are not common
153 * enough to all systems just yet; for now, we use a mutex.
154 *
155 * Note that some ops (Linux style ones) pass an effective gid for
156 * the op, in which case, that gid may override. To achieve this
157 * effect, permissions testing functions also take an extra gid.
158 * If this gid is (gid_t)-1 it is not used and only the remaining
159 * gids take part.
160 *
161 * The uid may also be (uid_t)-1, meaning "no uid was available
162 * at all at attach time". In this case, new files inherit parent
163 * directory uids.
164 *
165 * The refcount is simply the number of "openfile"s using this
166 * authinfo (so that when the last ref goes away, we can free it).
167 *
168 * There are also master ACL flags (same as in ff_flags).
169 */
170 struct fs_authinfo {
171 pthread_mutex_t ai_mtx; /* lock for refcnt */
172 uint32_t ai_refcnt;
173 int ai_flags;
174 uid_t ai_uid;
175 int ai_ngids;
176 gid_t ai_gids[]; /* NB: flexible array member */
177 };
178
179 /*
180 * We have a global-static mutex for single-threading Tattach
181 * requests, which use getpwnam (and indirectly, getgr* functions)
182 * which are not reentrant.
183 */
184 static bool fs_attach_mutex_inited;
185 static pthread_mutex_t fs_attach_mutex;
186
187 static pthread_mutexattr_t fs_mutexattr;
188
189 /*
190 * Internal functions (except inline functions).
191 */
192 static struct passwd *fs_getpwuid(struct fs_softc *, uid_t, struct r_pgdata *);
193 static struct group *fs_getgrgid(struct fs_softc *, gid_t, struct r_pgdata *);
194 static int fs_buildname(struct l9p_fid *, char *, char *, size_t);
195 static int fs_pdir(struct fs_softc *, struct l9p_fid *, char *, size_t,
196 struct stat *st);
197 static int fs_dpf(char *, char *, size_t);
198 static int fs_oflags_dotu(int, int *);
199 static int fs_oflags_dotl(uint32_t, int *, enum l9p_omode *);
200 static int fs_nde(struct fs_softc *, struct l9p_fid *, bool, gid_t,
201 struct stat *, uid_t *, gid_t *);
202 static struct fs_fid *open_fid(int, const char *, struct fs_authinfo *, bool);
203 static void dostat(struct fs_softc *, struct l9p_stat *, char *,
204 struct stat *, bool dotu);
205 #ifdef __illumos__
206 static void getcrtime(struct fs_softc *, int, const char *, uint64_t *,
207 uint64_t *);
208 static void dostatfs(struct l9p_statfs *, struct statvfs *, long);
209 #define ACL_TYPE_NFS4 1
210 acl_t *acl_get_fd_np(int fd, int type);
211 #else
212 static void dostatfs(struct l9p_statfs *, struct statfs *, long);
213 #endif
214 static void fillacl(struct fs_fid *ff);
215 static struct l9p_acl *getacl(struct fs_fid *ff, int fd, const char *path);
216 static void dropacl(struct fs_fid *ff);
217 static struct l9p_acl *look_for_nfsv4_acl(struct fs_fid *ff, int fd,
218 const char *path);
219 static int check_access(int32_t,
220 struct l9p_acl *, struct stat *, struct l9p_acl *, struct stat *,
221 struct fs_authinfo *, gid_t);
222 static void generate_qid(struct stat *, struct l9p_qid *);
223
224 static int fs_icreate(void *, struct l9p_fid *, char *, int,
225 bool, mode_t, gid_t, struct stat *);
226 static int fs_iopen(void *, struct l9p_fid *, int, enum l9p_omode,
227 gid_t, struct stat *);
228 static int fs_imkdir(void *, struct l9p_fid *, char *,
229 bool, mode_t, gid_t, struct stat *);
230 static int fs_imkfifo(void *, struct l9p_fid *, char *,
231 bool, mode_t, gid_t, struct stat *);
232 static int fs_imknod(void *, struct l9p_fid *, char *,
233 bool, mode_t, dev_t, gid_t, struct stat *);
234 static int fs_imksocket(void *, struct l9p_fid *, char *,
235 bool, mode_t, gid_t, struct stat *);
236 static int fs_isymlink(void *, struct l9p_fid *, char *, char *,
237 gid_t, struct stat *);
238
239 /*
240 * Internal functions implementing backend.
241 */
242 static int fs_attach(void *, struct l9p_request *);
243 static int fs_clunk(void *, struct l9p_fid *);
244 static int fs_create(void *, struct l9p_request *);
245 static int fs_open(void *, struct l9p_request *);
246 static int fs_read(void *, struct l9p_request *);
247 static int fs_remove(void *, struct l9p_fid *);
248 static int fs_stat(void *, struct l9p_request *);
249 static int fs_walk(void *, struct l9p_request *);
250 static int fs_write(void *, struct l9p_request *);
251 static int fs_wstat(void *, struct l9p_request *);
252 static int fs_statfs(void *, struct l9p_request *);
253 static int fs_lopen(void *, struct l9p_request *);
254 static int fs_lcreate(void *, struct l9p_request *);
255 static int fs_symlink(void *, struct l9p_request *);
256 static int fs_mknod(void *, struct l9p_request *);
257 static int fs_rename(void *, struct l9p_request *);
258 static int fs_readlink(void *, struct l9p_request *);
259 static int fs_getattr(void *, struct l9p_request *);
260 static int fs_setattr(void *, struct l9p_request *);
261 static int fs_xattrwalk(void *, struct l9p_request *);
262 static int fs_xattrcreate(void *, struct l9p_request *);
263 static int fs_readdir(void *, struct l9p_request *);
264 static int fs_fsync(void *, struct l9p_request *);
265 static int fs_lock(void *, struct l9p_request *);
266 static int fs_getlock(void *, struct l9p_request *);
267 static int fs_link(void *, struct l9p_request *);
268 static int fs_renameat(void *, struct l9p_request *);
269 static int fs_unlinkat(void *, struct l9p_request *);
270 static void fs_freefid(void *, struct l9p_fid *);
271
272 /*
273 * Convert from 9p2000 open/create mode to Unix-style O_* flags.
274 * This includes 9p2000.u extensions, but not 9p2000.L protocol,
275 * which has entirely different open, create, etc., flag bits.
276 *
277 * The <mode> given here is the one-byte (uint8_t) "mode"
278 * argument to Tcreate or Topen, so it can have at most 8 bits.
279 *
280 * https://swtch.com/plan9port/man/man9/open.html and
281 * http://plan9.bell-labs.com/magic/man2html/5/open
282 * both say:
283 *
284 * The [low two bits of the] mode field determines the
285 * type of I/O ... [I]f mode has the OTRUNC (0x10) bit
286 * set, the file is to be truncated, which requires write
287 * permission ...; if the mode has the ORCLOSE (0x40) bit
288 * set, the file is to be removed when the fid is clunked,
289 * which requires permission to remove the file from its
290 * directory. All other bits in mode should be zero. It
291 * is illegal to write a directory, truncate it, or
292 * attempt to remove it on close.
293 *
294 * 9P2000.u may add ODIRECT (0x80); this is not completely clear.
295 * The fcall.h header defines OCEXEC (0x20) as well, but it makes
296 * no sense to send this to a server. There seem to be no bits
297 * 0x04 and 0x08.
298 *
299 * We always turn on O_NOCTTY since as a server, we never want
300 * to gain a controlling terminal. We always turn on O_NOFOLLOW
301 * for reasons described elsewhere.
302 */
303 static int
fs_oflags_dotu(int mode,int * aflags)304 fs_oflags_dotu(int mode, int *aflags)
305 {
306 int flags;
307 #define CONVERT(theirs, ours) \
308 do { \
309 if (mode & (theirs)) { \
310 mode &= ~(theirs); \
311 flags |= ours; \
312 } \
313 } while (0)
314
315 switch (mode & L9P_OACCMODE) {
316
317 case L9P_OREAD:
318 default:
319 flags = O_RDONLY;
320 break;
321
322 case L9P_OWRITE:
323 flags = O_WRONLY;
324 break;
325
326 case L9P_ORDWR:
327 flags = O_RDWR;
328 break;
329
330 case L9P_OEXEC:
331 if (mode & L9P_OTRUNC)
332 return (EINVAL);
333 flags = O_RDONLY;
334 break;
335 }
336
337 flags |= O_NOCTTY | O_NOFOLLOW;
338
339 CONVERT(L9P_OTRUNC, O_TRUNC);
340
341 /*
342 * Now take away some flags locally:
343 * the access mode (already translated)
344 * ORCLOSE - caller only
345 * OCEXEC - makes no sense in server
346 * ODIRECT - not applicable here
347 * If there are any flag bits left after this,
348 * we were unable to translate them. For now, let's
349 * treat this as EINVAL so that we can catch problems.
350 */
351 mode &= ~(L9P_OACCMODE | L9P_ORCLOSE | L9P_OCEXEC | L9P_ODIRECT);
352 if (mode != 0) {
353 L9P_LOG(L9P_INFO,
354 "fs_oflags_dotu: untranslated bits: %#x",
355 (unsigned)mode);
356 return (EINVAL);
357 }
358
359 *aflags = flags;
360 return (0);
361 #undef CONVERT
362 }
363
364 /*
365 * Convert from 9P2000.L (Linux) open mode bits to O_* flags.
366 * See fs_oflags_dotu above.
367 *
368 * Linux currently does not have open-for-exec, but there is a
369 * proposal for it using O_PATH|O_NOFOLLOW, now handled here.
370 *
371 * We may eventually also set L9P_ORCLOSE for L_O_TMPFILE.
372 */
373 static int
fs_oflags_dotl(uint32_t l_mode,int * aflags,enum l9p_omode * ap9)374 fs_oflags_dotl(uint32_t l_mode, int *aflags, enum l9p_omode *ap9)
375 {
376 int flags;
377 enum l9p_omode p9;
378 #define CLEAR(theirs) l_mode &= ~(uint32_t)(theirs)
379 #define CONVERT(theirs, ours) \
380 do { \
381 if (l_mode & (theirs)) { \
382 CLEAR(theirs); \
383 flags |= ours; \
384 } \
385 } while (0)
386
387 /*
388 * Linux O_RDONLY, O_WRONLY, O_RDWR (0,1,2) match BSD/MacOS.
389 */
390 flags = l_mode & O_ACCMODE;
391 if (flags == 3)
392 return (EINVAL);
393 CLEAR(O_ACCMODE);
394
395 if ((l_mode & (L9P_L_O_PATH | L9P_L_O_NOFOLLOW)) ==
396 (L9P_L_O_PATH | L9P_L_O_NOFOLLOW)) {
397 CLEAR(L9P_L_O_PATH | L9P_L_O_NOFOLLOW);
398 p9 = L9P_OEXEC;
399 } else {
400 /*
401 * Slightly dirty, but same dirt, really, as
402 * setting flags from l_mode & O_ACCMODE.
403 */
404 p9 = (enum l9p_omode)flags; /* slightly dirty */
405 }
406
407 /* turn L_O_TMPFILE into L9P_ORCLOSE in *p9? */
408 if (l_mode & L9P_L_O_TRUNC)
409 p9 |= L9P_OTRUNC; /* but don't CLEAR yet */
410
411 flags |= O_NOCTTY | O_NOFOLLOW;
412
413 /*
414 * L_O_CREAT seems to be noise, since we get separate open
415 * and create. But it is actually set sometimes. We just
416 * throw it out here; create ops must set it themselves and
417 * open ops have no permissions bits and hence cannot create.
418 *
419 * L_O_EXCL does make sense on create ops, i.e., we can
420 * take a create op with or without L_O_EXCL. We pass that
421 * through.
422 */
423 CLEAR(L9P_L_O_CREAT);
424 CONVERT(L9P_L_O_EXCL, O_EXCL);
425 CONVERT(L9P_L_O_TRUNC, O_TRUNC);
426 CONVERT(L9P_L_O_DIRECTORY, O_DIRECTORY);
427 CONVERT(L9P_L_O_APPEND, O_APPEND);
428 CONVERT(L9P_L_O_NONBLOCK, O_NONBLOCK);
429
430 /*
431 * Discard these as useless noise at our (server) end.
432 * (NOATIME might be useful but we can only set it on a
433 * per-mount basis.)
434 */
435 CLEAR(L9P_L_O_CLOEXEC);
436 CLEAR(L9P_L_O_DIRECT);
437 CLEAR(L9P_L_O_DSYNC);
438 CLEAR(L9P_L_O_FASYNC);
439 CLEAR(L9P_L_O_LARGEFILE);
440 CLEAR(L9P_L_O_NOATIME);
441 CLEAR(L9P_L_O_NOCTTY);
442 CLEAR(L9P_L_O_NOFOLLOW);
443 CLEAR(L9P_L_O_SYNC);
444
445 if (l_mode != 0) {
446 L9P_LOG(L9P_INFO,
447 "fs_oflags_dotl: untranslated bits: %#x",
448 (unsigned)l_mode);
449 return (EINVAL);
450 }
451
452 *aflags = flags;
453 *ap9 = p9;
454 return (0);
455 #undef CLEAR
456 #undef CONVERT
457 }
458
459 static struct passwd *
fs_getpwuid(struct fs_softc * sc,uid_t uid,struct r_pgdata * pg)460 fs_getpwuid(struct fs_softc *sc, uid_t uid, struct r_pgdata *pg)
461 {
462 #if defined(WITH_CASPER)
463 return (r_cap_getpwuid(sc->fs_cappwd, uid, pg));
464 #else
465 (void)sc;
466 return (r_getpwuid(uid, pg));
467 #endif
468 }
469
470 static struct group *
fs_getgrgid(struct fs_softc * sc,gid_t gid,struct r_pgdata * pg)471 fs_getgrgid(struct fs_softc *sc, gid_t gid, struct r_pgdata *pg)
472 {
473 #if defined(WITH_CASPER)
474 return (r_cap_getgrgid(sc->fs_capgrp, gid, pg));
475 #else
476 (void)sc;
477 return (r_getgrgid(gid, pg));
478 #endif
479 }
480
481 /*
482 * Build full name of file by appending given name to directory name.
483 */
484 static int
fs_buildname(struct l9p_fid * dir,char * name,char * buf,size_t size)485 fs_buildname(struct l9p_fid *dir, char *name, char *buf, size_t size)
486 {
487 struct fs_fid *dirf = dir->lo_aux;
488 size_t dlen, nlen1;
489
490 assert(dirf != NULL);
491 dlen = strlen(dirf->ff_name);
492 nlen1 = strlen(name) + 1; /* +1 for '\0' */
493 if (dlen + 1 + nlen1 > size)
494 return (ENAMETOOLONG);
495 memcpy(buf, dirf->ff_name, dlen);
496 buf[dlen] = '/';
497 memcpy(buf + dlen + 1, name, nlen1);
498 return (0);
499 }
500
501 /*
502 * Build parent name of file by splitting it off. Return an error
503 * if the given fid represents the root, so that there is no such
504 * parent, or if the discovered parent is not a directory.
505 */
506 static int
fs_pdir(struct fs_softc * sc __unused,struct l9p_fid * fid,char * buf,size_t size,struct stat * st)507 fs_pdir(struct fs_softc *sc __unused, struct l9p_fid *fid, char *buf,
508 size_t size, struct stat *st)
509 {
510 struct fs_fid *ff;
511 char *path;
512
513 ff = fid->lo_aux;
514 assert(ff != NULL);
515 path = ff->ff_name;
516 path = r_dirname(path, buf, size);
517 if (path == NULL)
518 return (ENAMETOOLONG);
519 if (fstatat(ff->ff_dirfd, path, st, AT_SYMLINK_NOFOLLOW) != 0)
520 return (errno);
521 if (!S_ISDIR(st->st_mode))
522 return (ENOTDIR);
523 return (0);
524 }
525
526 /*
527 * Like fs_buildname() but for adding a file name to a buffer
528 * already holding a directory name. Essentially does
529 * strcat(dbuf, "/");
530 * strcat(dbuf, fname);
531 * but with size checking and an ENAMETOOLONG error as needed.
532 *
533 * (Think of the function name as "directory plus-equals file".)
534 */
535 static int
fs_dpf(char * dbuf,char * fname,size_t size)536 fs_dpf(char *dbuf, char *fname, size_t size)
537 {
538 size_t dlen, nlen1;
539
540 dlen = strlen(dbuf);
541 nlen1 = strlen(fname) + 1;
542 if (dlen + 1 + nlen1 > size)
543 return (ENAMETOOLONG);
544 dbuf[dlen] = '/';
545 memcpy(dbuf + dlen + 1, fname, nlen1);
546 return (0);
547 }
548
549 /*
550 * Prepare to create a new directory entry (open with O_CREAT,
551 * mkdir, etc -- any operation that creates a new inode),
552 * operating in parent data <dir>, based on authinfo <ai> and
553 * effective gid <egid>.
554 *
555 * The new entity should be owned by user/group <*nuid, *ngid>,
556 * if it's really a new entity. It will be a directory if isdir.
557 *
558 * Returns an error number if the entry should not be created
559 * (e.g., read-only file system or no permission to write in
560 * parent directory). Always sets *nuid and *ngid on success:
561 * in the worst case, when there is no available ID, this will
562 * use the parent directory's IDs. Fills in <*st> on success.
563 */
564 static int
fs_nde(struct fs_softc * sc,struct l9p_fid * dir,bool isdir,gid_t egid,struct stat * st,uid_t * nuid,gid_t * ngid)565 fs_nde(struct fs_softc *sc, struct l9p_fid *dir, bool isdir, gid_t egid,
566 struct stat *st, uid_t *nuid, gid_t *ngid)
567 {
568 struct fs_fid *dirf;
569 struct fs_authinfo *ai;
570 int32_t op;
571 int error;
572
573 if (sc->fs_readonly)
574 return (EROFS);
575 dirf = dir->lo_aux;
576 assert(dirf != NULL);
577 if (fstatat(dirf->ff_dirfd, dirf->ff_name, st,
578 AT_SYMLINK_NOFOLLOW) != 0)
579 return (errno);
580 if (!S_ISDIR(st->st_mode))
581 return (ENOTDIR);
582 dirf = dir->lo_aux;
583 ai = dirf->ff_ai;
584 fillacl(dirf);
585 op = isdir ? L9P_ACE_ADD_SUBDIRECTORY : L9P_ACE_ADD_FILE;
586 error = check_access(op, dirf->ff_acl, st, NULL, NULL, ai, egid);
587 if (error)
588 return (EPERM);
589
590 *nuid = ai->ai_uid != (uid_t)-1 ? ai->ai_uid : st->st_uid;
591 *ngid = egid != (gid_t)-1 ? egid :
592 ai->ai_ngids > 0 ? ai->ai_gids[0] : st->st_gid;
593 return (0);
594 }
595
596 /*
597 * Allocate new open-file data structure to attach to a fid.
598 *
599 * The new file's authinfo is the same as the old one's, and
600 * we gain a reference.
601 */
602 static struct fs_fid *
open_fid(int dirfd,const char * path,struct fs_authinfo * ai,bool creating)603 open_fid(int dirfd, const char *path, struct fs_authinfo *ai, bool creating)
604 {
605 struct fs_fid *ret;
606 uint32_t newcount;
607 int error;
608
609 ret = l9p_calloc(1, sizeof(*ret));
610 #ifdef __illumos__
611 error = pthread_mutex_init(&ret->ff_mtx, &fs_mutexattr);
612 #else
613 error = pthread_mutex_init(&ret->ff_mtx, NULL);
614 #endif
615 if (error) {
616 free(ret);
617 return (NULL);
618 }
619 ret->ff_fd = -1;
620 ret->ff_dirfd = dirfd;
621 ret->ff_name = strdup(path);
622 if (ret->ff_name == NULL) {
623 (void) pthread_mutex_destroy(&ret->ff_mtx);
624 free(ret);
625 return (NULL);
626 }
627 if (pthread_mutex_lock(&ai->ai_mtx) != 0) {
628 (void) pthread_mutex_destroy(&ret->ff_mtx);
629 free(ret->ff_name);
630 free(ret);
631 return (NULL);
632 }
633 newcount = ++ai->ai_refcnt;
634 (void) pthread_mutex_unlock(&ai->ai_mtx);
635 /*
636 * If we just incremented the count to 1, we're the *first*
637 * reference. This is only allowed when creating the authinfo,
638 * otherwise it means something has gone wrong. This cannot
639 * catch every bad (re)use of a freed authinfo but it may catch
640 * a few.
641 */
642 assert(newcount > 1 || creating);
643 L9P_LOG(L9P_DEBUG, "authinfo %p now used by %lu",
644 (void *)ai, (u_long)newcount);
645 ret->ff_ai = ai;
646 return (ret);
647 }
648
649 static void
dostat(struct fs_softc * sc,struct l9p_stat * s,char * name,struct stat * buf,bool dotu)650 dostat(struct fs_softc *sc, struct l9p_stat *s, char *name,
651 struct stat *buf, bool dotu)
652 {
653 struct passwd *user;
654 struct group *group;
655
656 memset(s, 0, sizeof(struct l9p_stat));
657
658 generate_qid(buf, &s->qid);
659
660 s->type = 0;
661 s->dev = 0;
662 s->mode = buf->st_mode & 0777;
663
664 if (S_ISDIR(buf->st_mode))
665 s->mode |= L9P_DMDIR;
666
667 if (S_ISLNK(buf->st_mode) && dotu)
668 s->mode |= L9P_DMSYMLINK;
669
670 if (S_ISCHR(buf->st_mode) || S_ISBLK(buf->st_mode))
671 s->mode |= L9P_DMDEVICE;
672
673 if (S_ISSOCK(buf->st_mode))
674 s->mode |= L9P_DMSOCKET;
675
676 if (S_ISFIFO(buf->st_mode))
677 s->mode |= L9P_DMNAMEDPIPE;
678
679 s->atime = (uint32_t)buf->st_atime;
680 s->mtime = (uint32_t)buf->st_mtime;
681 s->length = (uint64_t)buf->st_size;
682
683 s->name = r_basename(name, NULL, 0);
684
685 if (!dotu) {
686 struct r_pgdata udata, gdata;
687
688 user = fs_getpwuid(sc, buf->st_uid, &udata);
689 group = fs_getgrgid(sc, buf->st_gid, &gdata);
690 s->uid = user != NULL ? strdup(user->pw_name) : NULL;
691 s->gid = group != NULL ? strdup(group->gr_name) : NULL;
692 s->muid = user != NULL ? strdup(user->pw_name) : NULL;
693 r_pgfree(&udata);
694 r_pgfree(&gdata);
695 } else {
696 /*
697 * When using 9P2000.u, we don't need to bother about
698 * providing user and group names in textual form.
699 *
700 * NB: if the asprintf()s fail, s->extension should
701 * be unset so we can ignore these.
702 */
703 s->n_uid = buf->st_uid;
704 s->n_gid = buf->st_gid;
705 s->n_muid = buf->st_uid;
706
707 if (S_ISLNK(buf->st_mode)) {
708 char target[MAXPATHLEN];
709 ssize_t ret = readlink(name, target, MAXPATHLEN);
710
711 if (ret < 0) {
712 s->extension = NULL;
713 return;
714 }
715
716 s->extension = strndup(target, (size_t)ret);
717 }
718
719 if (S_ISBLK(buf->st_mode)) {
720 asprintf(&s->extension, "b %d %d", major(buf->st_rdev),
721 minor(buf->st_rdev));
722 }
723
724 if (S_ISCHR(buf->st_mode)) {
725 asprintf(&s->extension, "c %d %d", major(buf->st_rdev),
726 minor(buf->st_rdev));
727 }
728 }
729 }
730
731 #ifndef __illumos__
732 static void
dostatfs(struct l9p_statfs * out,struct statfs * in,long namelen)733 dostatfs(struct l9p_statfs *out, struct statfs *in, long namelen)
734 #else
735 static void
736 dostatfs(struct l9p_statfs *out, struct statvfs *in, long namelen)
737 #endif
738 {
739
740 out->type = L9P_FSTYPE;
741 out->bsize = in->f_bsize;
742 #ifndef __illumos__
743 out->blocks = in->f_blocks;
744 out->bfree = in->f_bfree;
745 out->bavail = in->f_bavail;
746 #else
747 out->blocks = in->f_blocks * in->f_frsize / in->f_bsize;
748 out->bfree = in->f_bfree * in->f_frsize / in->f_bsize;
749 out->bavail = in->f_bavail * in->f_frsize / in->f_bsize;
750 #endif
751 out->files = in->f_files;
752 out->ffree = in->f_ffree;
753 out->namelen = (uint32_t)namelen;
754 out->fsid = STATFS_FSID(in);
755 }
756
757 static void
generate_qid(struct stat * buf,struct l9p_qid * qid)758 generate_qid(struct stat *buf, struct l9p_qid *qid)
759 {
760 qid->path = buf->st_ino;
761 qid->version = 0;
762
763 if (S_ISREG(buf->st_mode))
764 qid->type |= L9P_QTFILE;
765
766 if (S_ISDIR(buf->st_mode))
767 qid->type |= L9P_QTDIR;
768
769 if (S_ISLNK(buf->st_mode))
770 qid->type |= L9P_QTSYMLINK;
771 }
772
773 /*
774 * Fill in ff->ff_acl if it's not set yet. Skip if the "don't use
775 * ACLs" flag is set, and use the flag to remember failure so
776 * we don't bother retrying either.
777 */
778 static void
fillacl(struct fs_fid * ff)779 fillacl(struct fs_fid *ff)
780 {
781
782 if (ff->ff_acl == NULL && (ff->ff_flags & FF_NO_NFSV4_ACL) == 0) {
783 ff->ff_acl = look_for_nfsv4_acl(ff, ff->ff_fd, ff->ff_name);
784 if (ff->ff_acl == NULL)
785 ff->ff_flags |= FF_NO_NFSV4_ACL;
786 }
787 }
788
789 /*
790 * Get an ACL given fd and/or path name. We check for the "don't get
791 * ACL" flag in the given ff_fid data structure first, but don't set
792 * the flag here. The fillacl() code is similar but will set the
793 * flag; it also uses the ff_fd and ff_name directly.
794 *
795 * (This is used to get ACLs for parent directories, for instance.)
796 */
797 static struct l9p_acl *
getacl(struct fs_fid * ff,int fd,const char * path)798 getacl(struct fs_fid *ff, int fd, const char *path)
799 {
800
801 if (ff->ff_flags & FF_NO_NFSV4_ACL)
802 return (NULL);
803 return look_for_nfsv4_acl(ff, fd, path);
804 }
805
806 /*
807 * Drop cached ff->ff_acl, e.g., after moving from one directory to
808 * another, where inherited ACLs might change.
809 */
810 static void
dropacl(struct fs_fid * ff)811 dropacl(struct fs_fid *ff)
812 {
813
814 l9p_acl_free(ff->ff_acl);
815 ff->ff_acl = NULL;
816 ff->ff_flags = ff->ff_ai->ai_flags;
817 }
818
819 /*
820 * Check to see if we can find NFSv4 ACLs for the given file.
821 * If we have an open fd, we can use that, otherwise we need
822 * to use the path.
823 */
824 static struct l9p_acl *
look_for_nfsv4_acl(struct fs_fid * ff,int fd,const char * path)825 look_for_nfsv4_acl(struct fs_fid *ff, int fd, const char *path)
826 {
827 struct l9p_acl *acl;
828 #ifdef __illumos__
829 acl_t *sysacl;
830 #else
831 acl_t sysacl;
832 #endif
833 int doclose = 0;
834
835 if (fd < 0) {
836 fd = openat(ff->ff_dirfd, path, 0);
837 doclose = 1;
838 }
839
840 sysacl = acl_get_fd_np(fd, ACL_TYPE_NFS4);
841 if (sysacl == NULL) {
842 /*
843 * EINVAL means no NFSv4 ACLs apply for this file.
844 * Other error numbers indicate some kind of problem.
845 */
846 if (errno != EINVAL) {
847 L9P_LOG(L9P_ERROR,
848 "error retrieving NFSv4 ACL from "
849 "fdesc %d (%s): %s", fd,
850 path, strerror(errno));
851 }
852
853 if (doclose)
854 close(fd);
855
856 return (NULL);
857 }
858 #if defined(HAVE_FREEBSD_ACLS)
859 acl = l9p_freebsd_nfsv4acl_to_acl(sysacl);
860 #elif defined(HAVE__ILLUMOS_ACLS)
861 acl = l9p_illumos_nfsv4acl_to_acl(sysacl);
862 #else
863 acl = NULL; /* XXX need a l9p_darwin_acl_to_acl */
864 #endif
865 acl_free(sysacl);
866
867 if (doclose)
868 close(fd);
869
870 return (acl);
871 }
872
873 /*
874 * Verify that the user whose authinfo is in <ai> and effective
875 * group ID is <egid> ((gid_t)-1 means no egid supplied) has
876 * permission to do something.
877 *
878 * The "something" may be rather complex: we allow NFSv4 style
879 * operation masks here, and provide parent and child ACLs and
880 * stat data. At most one of pacl+pst and cacl+cst can be NULL,
881 * unless ACLs are not supported; then pacl and cacl can both
882 * be NULL but pst or cst must be non-NULL depending on the
883 * operation.
884 */
885 static int
check_access(int32_t opmask,struct l9p_acl * pacl,struct stat * pst,struct l9p_acl * cacl,struct stat * cst,struct fs_authinfo * ai,gid_t egid)886 check_access(int32_t opmask,
887 struct l9p_acl *pacl, struct stat *pst,
888 struct l9p_acl *cacl, struct stat *cst,
889 struct fs_authinfo *ai, gid_t egid)
890 {
891 struct l9p_acl_check_args args;
892
893 /*
894 * If we have ACLs, use them exclusively, ignoring Unix
895 * permissions. Otherwise, fall back on stat st_mode
896 * bits, and allow super-user as well.
897 */
898 args.aca_uid = ai->ai_uid;
899 args.aca_gid = egid;
900 args.aca_groups = ai->ai_gids;
901 args.aca_ngroups = (size_t)ai->ai_ngids;
902 args.aca_parent = pacl;
903 args.aca_pstat = pst;
904 args.aca_child = cacl;
905 args.aca_cstat = cst;
906 args.aca_aclmode = pacl == NULL && cacl == NULL
907 ? L9P_ACM_STAT_MODE
908 : L9P_ACM_NFS_ACL | L9P_ACM_ZFS_ACL;
909
910 args.aca_superuser = true;
911 return (l9p_acl_check_access(opmask, &args));
912 }
913
914 static int
fs_attach(void * softc,struct l9p_request * req)915 fs_attach(void *softc, struct l9p_request *req)
916 {
917 struct fs_authinfo *ai;
918 struct fs_softc *sc = (struct fs_softc *)softc;
919 struct fs_fid *file;
920 struct passwd *pwd;
921 struct stat st;
922 struct r_pgdata udata;
923 uint32_t n_uname;
924 gid_t *gids;
925 uid_t uid;
926 int error;
927 int ngroups;
928
929 assert(req->lr_fid != NULL);
930
931 /*
932 * Single-thread pwd/group related items. We have a reentrant
933 * r_getpwuid but not a reentrant r_getpwnam, and l9p_getgrlist
934 * may use non-reentrant C library getgr* routines.
935 */
936 if ((error = pthread_mutex_lock(&fs_attach_mutex)) != 0)
937 return (error);
938
939 n_uname = req->lr_req.tattach.n_uname;
940 if (n_uname != L9P_NONUNAME) {
941 uid = (uid_t)n_uname;
942 pwd = fs_getpwuid(sc, uid, &udata);
943 #if defined(L9P_DEBUG)
944 if (pwd == NULL)
945 L9P_LOG(L9P_DEBUG,
946 "Tattach: uid %ld: no such user", (long)uid);
947 #endif
948 } else {
949 uid = (uid_t)-1;
950 #if defined(WITH_CASPER)
951 pwd = cap_getpwnam(sc->fs_cappwd, req->lr_req.tattach.uname);
952 #else
953 pwd = getpwnam(req->lr_req.tattach.uname);
954 #endif
955 #if defined(L9P_DEBUG)
956 if (pwd == NULL)
957 L9P_LOG(L9P_DEBUG,
958 "Tattach: %s: no such user",
959 req->lr_req.tattach.uname);
960 #endif
961 }
962
963 /*
964 * If caller didn't give a numeric UID, pick it up from pwd
965 * if possible. If that doesn't work we can't continue.
966 *
967 * Note that pwd also supplies the group set. This assumes
968 * the server has the right mapping; this needs improvement.
969 * We do at least support ai->ai_ngids==0 properly now though.
970 */
971 if (uid == (uid_t)-1 && pwd != NULL)
972 uid = pwd->pw_uid;
973 if (uid == (uid_t)-1)
974 error = EPERM;
975 else {
976 error = 0;
977 if (fstat(sc->fs_rootfd, &st) != 0)
978 error = errno;
979 else if (!S_ISDIR(st.st_mode))
980 error = ENOTDIR;
981 }
982 if (error) {
983 (void) pthread_mutex_unlock(&fs_attach_mutex);
984 L9P_LOG(L9P_DEBUG,
985 "Tattach: denying uid=%ld access to rootdir: %s",
986 (long)uid, strerror(error));
987 /*
988 * Pass ENOENT and ENOTDIR through for diagnosis;
989 * others become EPERM. This should not leak too
990 * much security.
991 */
992 return (error == ENOENT || error == ENOTDIR ? error : EPERM);
993 }
994
995 if (pwd != NULL) {
996 /*
997 * This either succeeds and fills in ngroups and
998 * returns non-NULL, or fails and sets ngroups to 0
999 * and returns NULL. Either way ngroups is correct.
1000 */
1001 gids = l9p_getgrlist(pwd->pw_name, pwd->pw_gid, &ngroups);
1002 } else {
1003 gids = NULL;
1004 ngroups = 0;
1005 }
1006
1007 /*
1008 * Done with pwd and group related items that may use
1009 * non-reentrant C library routines; allow other threads in.
1010 */
1011 (void) pthread_mutex_unlock(&fs_attach_mutex);
1012
1013 ai = malloc(sizeof(*ai) + (size_t)ngroups * sizeof(gid_t));
1014 if (ai == NULL) {
1015 free(gids);
1016 return (ENOMEM);
1017 }
1018 #ifdef __illumos__
1019 error = pthread_mutex_init(&ai->ai_mtx, &fs_mutexattr);
1020 #else
1021 error = pthread_mutex_init(&ai->ai_mtx, NULL);
1022 #endif
1023 if (error) {
1024 free(gids);
1025 free(ai);
1026 return (error);
1027 }
1028 ai->ai_refcnt = 0;
1029 ai->ai_uid = uid;
1030 ai->ai_flags = 0; /* XXX for now */
1031 ai->ai_ngids = ngroups;
1032 memcpy(ai->ai_gids, gids, (size_t)ngroups * sizeof(gid_t));
1033 free(gids);
1034
1035 file = open_fid(sc->fs_rootfd, ".", ai, true);
1036 if (file == NULL) {
1037 (void) pthread_mutex_destroy(&ai->ai_mtx);
1038 free(ai);
1039 return (ENOMEM);
1040 }
1041
1042 req->lr_fid->lo_aux = file;
1043 generate_qid(&st, &req->lr_resp.rattach.qid);
1044 return (0);
1045 }
1046
1047 static int
fs_clunk(void * softc __unused,struct l9p_fid * fid)1048 fs_clunk(void *softc __unused, struct l9p_fid *fid)
1049 {
1050 struct fs_fid *file;
1051
1052 file = fid->lo_aux;
1053 assert(file != NULL);
1054
1055 if (file->ff_dir) {
1056 closedir(file->ff_dir);
1057 file->ff_dir = NULL;
1058 } else if (file->ff_fd != -1) {
1059 close(file->ff_fd);
1060 file->ff_fd = -1;
1061 }
1062
1063 return (0);
1064 }
1065
1066 /*
1067 * Create ops.
1068 *
1069 * We are to create a new file under some existing path,
1070 * where the new file's name is in the Tcreate request and the
1071 * existing path is due to a fid-based file (req->lr_fid).
1072 *
1073 * One op (create regular file) sets file->fd, the rest do not.
1074 */
1075 static int
fs_create(void * softc,struct l9p_request * req)1076 fs_create(void *softc, struct l9p_request *req)
1077 {
1078 struct l9p_fid *dir;
1079 struct stat st;
1080 uint32_t dmperm;
1081 mode_t perm;
1082 char *name;
1083 int error;
1084
1085 dir = req->lr_fid;
1086 name = req->lr_req.tcreate.name;
1087 dmperm = req->lr_req.tcreate.perm;
1088 perm = (mode_t)(dmperm & 0777);
1089
1090 if (dmperm & L9P_DMDIR)
1091 error = fs_imkdir(softc, dir, name, true,
1092 perm, (gid_t)-1, &st);
1093 else if (dmperm & L9P_DMSYMLINK)
1094 error = fs_isymlink(softc, dir, name,
1095 req->lr_req.tcreate.extension, (gid_t)-1, &st);
1096 else if (dmperm & L9P_DMNAMEDPIPE)
1097 error = fs_imkfifo(softc, dir, name, true,
1098 perm, (gid_t)-1, &st);
1099 else if (dmperm & L9P_DMSOCKET)
1100 error = fs_imksocket(softc, dir, name, true,
1101 perm, (gid_t)-1, &st);
1102 else if (dmperm & L9P_DMDEVICE) {
1103 unsigned int major, minor;
1104 char type;
1105 dev_t dev;
1106
1107 /*
1108 * ??? Should this be testing < 3? For now, allow a single
1109 * integer mode with minor==0 implied.
1110 */
1111 minor = 0;
1112 if (sscanf(req->lr_req.tcreate.extension, "%c %u %u",
1113 &type, &major, &minor) < 2) {
1114 return (EINVAL);
1115 }
1116
1117 switch (type) {
1118 case 'b':
1119 perm |= S_IFBLK;
1120 break;
1121 case 'c':
1122 perm |= S_IFCHR;
1123 break;
1124 default:
1125 return (EINVAL);
1126 }
1127 dev = makedev(major, minor);
1128 error = fs_imknod(softc, dir, name, true, perm, dev,
1129 (gid_t)-1, &st);
1130 } else {
1131 enum l9p_omode p9;
1132 int flags;
1133
1134 p9 = req->lr_req.tcreate.mode;
1135 error = fs_oflags_dotu(p9, &flags);
1136 if (error)
1137 return (error);
1138 error = fs_icreate(softc, dir, name, flags,
1139 true, perm, (gid_t)-1, &st);
1140 req->lr_resp.rcreate.iounit = req->lr_conn->lc_max_io_size;
1141 }
1142
1143 if (error == 0)
1144 generate_qid(&st, &req->lr_resp.rcreate.qid);
1145
1146 return (error);
1147 }
1148
1149 /*
1150 * https://swtch.com/plan9port/man/man9/open.html and
1151 * http://plan9.bell-labs.com/magic/man2html/5/open
1152 * say that permissions are actually
1153 * perm & (~0666 | (dir.perm & 0666))
1154 * for files, and
1155 * perm & (~0777 | (dir.perm & 0777))
1156 * for directories. That is, the parent directory may
1157 * take away permissions granted by the operation.
1158 *
1159 * This seems a bit restrictive; probably
1160 * there should be a control knob for this.
1161 */
1162 static inline mode_t
fs_p9perm(mode_t perm,mode_t dir_perm,bool isdir)1163 fs_p9perm(mode_t perm, mode_t dir_perm, bool isdir)
1164 {
1165
1166 if (isdir)
1167 perm &= ~0777 | (dir_perm & 0777);
1168 else
1169 perm &= ~0666 | (dir_perm & 0666);
1170 return (perm);
1171 }
1172
1173 /*
1174 * Internal form of create (plain file).
1175 *
1176 * Our caller takes care of splitting off all the special
1177 * types of create (mknod, etc), so this is purely for files.
1178 * We receive the fs_softc <softc>, the directory fid <dir>
1179 * in which the new file is to be created, the name of the
1180 * new file, a flag <isp9> indicating whether to do plan9 style
1181 * permissions or Linux style permissions, the permissions <perm>,
1182 * an effective group id <egid>, and a pointer to a stat structure
1183 * <st> to fill in describing the final result on success.
1184 *
1185 * On successful create, the fid switches to the newly created
1186 * file, which is now open; its associated file-name changes too.
1187 *
1188 * Note that the original (dir) fid is never currently open,
1189 * so there is nothing to close.
1190 */
1191 static int
fs_icreate(void * softc,struct l9p_fid * dir,char * name,int flags,bool isp9,mode_t perm,gid_t egid,struct stat * st)1192 fs_icreate(void *softc, struct l9p_fid *dir, char *name, int flags,
1193 bool isp9, mode_t perm, gid_t egid, struct stat *st)
1194 {
1195 struct fs_fid *file;
1196 gid_t gid;
1197 uid_t uid;
1198 char newname[MAXPATHLEN];
1199 int error, fd;
1200
1201 file = dir->lo_aux;
1202
1203 /*
1204 * Build full path name from directory + file name. We'll
1205 * check permissions on the parent directory, then race to
1206 * create the file before anything bad happens like symlinks.
1207 *
1208 * (To close this race we need to use openat(), which is
1209 * left for a later version of this code.)
1210 */
1211 error = fs_buildname(dir, name, newname, sizeof(newname));
1212 if (error)
1213 return (error);
1214
1215 /* In case of success, we will need a new file->ff_name. */
1216 name = strdup(newname);
1217 if (name == NULL)
1218 return (ENOMEM);
1219
1220 /* Check create permission and compute new file ownership. */
1221 error = fs_nde(softc, dir, false, egid, st, &uid, &gid);
1222 if (error) {
1223 free(name);
1224 return (error);
1225 }
1226
1227 /* Adjust new-file permissions for Plan9 protocol. */
1228 if (isp9)
1229 perm = fs_p9perm(perm, st->st_mode, false);
1230
1231 /* Create is always exclusive so O_TRUNC is irrelevant. */
1232 fd = openat(file->ff_dirfd, newname, flags | O_CREAT | O_EXCL, perm);
1233 if (fd < 0) {
1234 error = errno;
1235 free(name);
1236 return (error);
1237 }
1238
1239 /* Fix permissions and owner. */
1240 if (fchmod(fd, perm) != 0 ||
1241 fchown(fd, uid, gid) != 0 ||
1242 fstat(fd, st) != 0) {
1243 error = errno;
1244 (void) close(fd);
1245 /* unlink(newname); ? */
1246 free(name);
1247 return (error);
1248 }
1249
1250 /* It *was* a directory; now it's a file, and it's open. */
1251 free(file->ff_name);
1252 file->ff_name = name;
1253 file->ff_fd = fd;
1254 return (0);
1255 }
1256
1257 /*
1258 * Internal form of open: stat file and verify permissions (from p9
1259 * argument), then open the file-or-directory, leaving the internal
1260 * fs_fid fields set up. If we cannot open the file, return a
1261 * suitable error number, and leave everything unchanged.
1262 *
1263 * To mitigate the race between permissions testing and the actual
1264 * open, we can stat the file twice (once with lstat() before open,
1265 * then with fstat() after). We assume O_NOFOLLOW is set in flags,
1266 * so if some other race-winner substitutes in a symlink we won't
1267 * open it here. (However, embedded symlinks, if they occur, are
1268 * still an issue. Ideally we would like to have an O_NEVERFOLLOW
1269 * that fails on embedded symlinks, and a way to pass this to
1270 * lstat() as well.)
1271 *
1272 * When we use opendir() we cannot pass O_NOFOLLOW, so we must rely
1273 * on substitution-detection via fstat(). To simplify the code we
1274 * just always re-check.
1275 *
1276 * (For a proper fix in the future, we can require openat(), keep
1277 * each parent directory open during walk etc, and allow only final
1278 * name components with O_NOFOLLOW.)
1279 *
1280 * On successful return, st has been filled in.
1281 */
1282 static int
fs_iopen(void * softc,struct l9p_fid * fid,int flags,enum l9p_omode p9,gid_t egid __unused,struct stat * st)1283 fs_iopen(void *softc, struct l9p_fid *fid, int flags, enum l9p_omode p9,
1284 gid_t egid __unused, struct stat *st)
1285 {
1286 struct fs_softc *sc = softc;
1287 struct fs_fid *file;
1288 struct stat first;
1289 int32_t op;
1290 char *name;
1291 int error;
1292 int fd;
1293 DIR *dirp;
1294
1295 /* Forbid write ops on read-only file system. */
1296 if (sc->fs_readonly) {
1297 if ((flags & O_TRUNC) != 0)
1298 return (EROFS);
1299 if ((flags & O_ACCMODE) != O_RDONLY)
1300 return (EROFS);
1301 if (p9 & L9P_ORCLOSE)
1302 return (EROFS);
1303 }
1304
1305 file = fid->lo_aux;
1306 assert(file != NULL);
1307 name = file->ff_name;
1308
1309 if (fstatat(file->ff_dirfd, name, &first, AT_SYMLINK_NOFOLLOW) != 0)
1310 return (errno);
1311 if (S_ISLNK(first.st_mode))
1312 return (EPERM);
1313
1314 /* Can we rely on O_APPEND here? Best not, can be cleared. */
1315 switch (flags & O_ACCMODE) {
1316 case O_RDONLY:
1317 op = L9P_ACE_READ_DATA;
1318 break;
1319 case O_WRONLY:
1320 op = L9P_ACE_WRITE_DATA;
1321 break;
1322 case O_RDWR:
1323 op = L9P_ACE_READ_DATA | L9P_ACE_WRITE_DATA;
1324 break;
1325 default:
1326 return (EINVAL);
1327 }
1328 fillacl(file);
1329 error = check_access(op, NULL, NULL, file->ff_acl, &first,
1330 file->ff_ai, (gid_t)-1);
1331 if (error)
1332 return (error);
1333
1334 if (S_ISDIR(first.st_mode)) {
1335 /* Forbid write or truncate on directory. */
1336 if ((flags & O_ACCMODE) != O_RDONLY || (flags & O_TRUNC))
1337 return (EPERM);
1338 fd = openat(file->ff_dirfd, name, O_DIRECTORY);
1339 dirp = fdopendir(fd);
1340 if (dirp == NULL)
1341 return (EPERM);
1342 fd = dirfd(dirp);
1343 } else {
1344 dirp = NULL;
1345 fd = openat(file->ff_dirfd, name, flags);
1346 if (fd < 0)
1347 return (EPERM);
1348 }
1349
1350 /*
1351 * We have a valid fd, and maybe non-null dirp. Re-check
1352 * the file, and fail if st_dev or st_ino changed.
1353 */
1354 if (fstat(fd, st) != 0 ||
1355 first.st_dev != st->st_dev ||
1356 first.st_ino != st->st_ino) {
1357 if (dirp != NULL)
1358 (void) closedir(dirp);
1359 else
1360 (void) close(fd);
1361 return (EPERM);
1362 }
1363 if (dirp != NULL)
1364 file->ff_dir = dirp;
1365 else
1366 file->ff_fd = fd;
1367 return (0);
1368 }
1369
1370 /*
1371 * Internal form of mkdir (common code for all forms).
1372 * We receive the fs_softc <softc>, the directory fid <dir>
1373 * in which the new entry is to be created, the name of the
1374 * new entry, a flag <isp9> indicating whether to do plan9 style
1375 * permissions or Linux style permissions, the permissions <perm>,
1376 * an effective group id <egid>, and a pointer to a stat structure
1377 * <st> to fill in describing the final result on success.
1378 *
1379 * See also fs_icreate() above.
1380 */
1381 static int
fs_imkdir(void * softc,struct l9p_fid * dir,char * name,bool isp9,mode_t perm,gid_t egid,struct stat * st)1382 fs_imkdir(void *softc, struct l9p_fid *dir, char *name,
1383 bool isp9, mode_t perm, gid_t egid, struct stat *st)
1384 {
1385 struct fs_fid *ff;
1386 gid_t gid;
1387 uid_t uid;
1388 char newname[MAXPATHLEN];
1389 int error, fd;
1390
1391 ff = dir->lo_aux;
1392 error = fs_buildname(dir, name, newname, sizeof(newname));
1393 if (error)
1394 return (error);
1395
1396 error = fs_nde(softc, dir, true, egid, st, &uid, &gid);
1397 if (error)
1398 return (error);
1399
1400 if (isp9)
1401 perm = fs_p9perm(perm, st->st_mode, true);
1402
1403 if (mkdirat(ff->ff_dirfd, newname, perm) != 0)
1404 return (errno);
1405
1406 fd = openat(ff->ff_dirfd, newname,
1407 O_DIRECTORY | O_RDONLY | O_NOFOLLOW);
1408 if (fd < 0 ||
1409 fchown(fd, uid, gid) != 0 ||
1410 fchmod(fd, perm) != 0 ||
1411 fstat(fd, st) != 0) {
1412 error = errno;
1413 /* rmdir(newname) ? */
1414 }
1415 if (fd >= 0)
1416 (void) close(fd);
1417
1418 return (error);
1419 }
1420
1421 #ifdef __APPLE__
1422 /*
1423 * This is an undocumented OS X syscall. It would be best to avoid it,
1424 * but there doesn't seem to be another safe way to implement mknodat.
1425 * Dear Apple, please implement mknodat before you remove this syscall.
1426 */
fs_ifchdir_thread_local(int fd)1427 static int fs_ifchdir_thread_local(int fd)
1428 {
1429 #pragma clang diagnostic push
1430 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
1431 return syscall(SYS___pthread_fchdir, fd);
1432 #pragma clang diagnostic pop
1433 }
1434 #endif
1435
1436 /*
1437 * Internal form of mknod (special device).
1438 *
1439 * The device type (S_IFBLK, S_IFCHR) is included in the <mode> parameter.
1440 */
1441 static int
fs_imknod(void * softc,struct l9p_fid * dir,char * name,bool isp9,mode_t mode,dev_t dev,gid_t egid,struct stat * st)1442 fs_imknod(void *softc, struct l9p_fid *dir, char *name,
1443 bool isp9, mode_t mode, dev_t dev, gid_t egid, struct stat *st)
1444 {
1445 struct fs_fid *ff;
1446 mode_t perm;
1447 gid_t gid;
1448 uid_t uid;
1449 char newname[MAXPATHLEN];
1450 int error;
1451
1452 ff = dir->lo_aux;
1453 error = fs_buildname(dir, name, newname, sizeof(newname));
1454 if (error)
1455 return (error);
1456
1457 error = fs_nde(softc, dir, false, egid, st, &uid, &gid);
1458 if (error)
1459 return (error);
1460
1461 if (isp9) {
1462 perm = fs_p9perm(mode & 0777, st->st_mode, false);
1463 mode = (mode & ~0777) | perm;
1464 } else {
1465 perm = mode & 0777;
1466 }
1467
1468 #ifdef __APPLE__
1469 if (fs_ifchdir_thread_local(ff->ff_dirfd) < 0) {
1470 return -1;
1471 }
1472 error = mknod(newname, mode, dev);
1473 int preserved_errno = errno;
1474 /* Stop using the thread-local cwd */
1475 fs_ifchdir_thread_local(-1);
1476 if (error < 0) {
1477 errno = preserved_errno;
1478 return errno;
1479 }
1480 #else
1481 if (mknodat(ff->ff_dirfd, newname, mode, dev) != 0)
1482 return (errno);
1483 #endif
1484
1485 /* We cannot open the new name; race to use l* syscalls. */
1486 if (fchownat(ff->ff_dirfd, newname, uid, gid, AT_SYMLINK_NOFOLLOW) != 0 ||
1487 fchmodat(ff->ff_dirfd, newname, perm, 0) != 0 ||
1488 fstatat(ff->ff_dirfd, newname, st, AT_SYMLINK_NOFOLLOW) != 0)
1489 error = errno;
1490 else if ((st->st_mode & S_IFMT) != (mode & S_IFMT))
1491 error = EPERM; /* ??? lost a race anyway */
1492
1493 /* if (error) unlink(newname) ? */
1494
1495 return (error);
1496 }
1497
1498 /*
1499 * Internal form of mkfifo.
1500 */
1501 static int
fs_imkfifo(void * softc,struct l9p_fid * dir,char * name,bool isp9,mode_t perm,gid_t egid,struct stat * st)1502 fs_imkfifo(void *softc, struct l9p_fid *dir, char *name,
1503 bool isp9, mode_t perm, gid_t egid, struct stat *st)
1504 {
1505 struct fs_fid *ff;
1506 gid_t gid;
1507 uid_t uid;
1508 char newname[MAXPATHLEN];
1509 int error;
1510
1511 ff = dir->lo_aux;
1512 error = fs_buildname(dir, name, newname, sizeof(newname));
1513 if (error)
1514 return (error);
1515
1516 error = fs_nde(softc, dir, false, egid, st, &uid, &gid);
1517 if (error)
1518 return (error);
1519
1520 if (isp9)
1521 perm = fs_p9perm(perm, st->st_mode, false);
1522
1523 if (mkfifo(newname, perm) != 0)
1524 return (errno);
1525
1526 /* We cannot open the new name; race to use l* syscalls. */
1527 if (fchownat(ff->ff_dirfd, newname, uid, gid, AT_SYMLINK_NOFOLLOW) != 0 ||
1528 fchmodat(ff->ff_dirfd, newname, perm, 0) != 0 ||
1529 fstatat(ff->ff_dirfd, newname, st, AT_SYMLINK_NOFOLLOW) != 0)
1530 error = errno;
1531 else if (!S_ISFIFO(st->st_mode))
1532 error = EPERM; /* ??? lost a race anyway */
1533
1534 /* if (error) unlink(newname) ? */
1535
1536 return (error);
1537 }
1538
1539 /*
1540 * Internal form of mksocket.
1541 *
1542 * This is a bit different because of the horrible socket naming
1543 * system (bind() with sockaddr_un sun_path).
1544 */
1545 static int
fs_imksocket(void * softc,struct l9p_fid * dir,char * name,bool isp9,mode_t perm,gid_t egid,struct stat * st)1546 fs_imksocket(void *softc, struct l9p_fid *dir, char *name,
1547 bool isp9, mode_t perm, gid_t egid, struct stat *st)
1548 {
1549 struct fs_fid *ff;
1550 struct sockaddr_un un;
1551 char *path;
1552 char newname[MAXPATHLEN];
1553 gid_t gid;
1554 uid_t uid;
1555 int error = 0, s, fd, slen;
1556
1557 ff = dir->lo_aux;
1558 error = fs_buildname(dir, name, newname, sizeof(newname));
1559 if (error)
1560 return (error);
1561
1562 error = fs_nde(softc, dir, false, egid, st, &uid, &gid);
1563 if (error)
1564 return (error);
1565
1566 if (isp9)
1567 perm = fs_p9perm(perm, st->st_mode, false);
1568
1569 s = socket(AF_UNIX, SOCK_STREAM, 0);
1570 if (s < 0)
1571 return (errno);
1572
1573 path = newname;
1574 fd = -1;
1575 #ifdef HAVE_BINDAT
1576 /* Try bindat() if needed. */
1577 if (strlen(path) >= sizeof(un.sun_path)) {
1578 fd = openat(ff->ff_dirfd, ff->ff_name,
1579 O_RDONLY | O_DIRECTORY | O_NOFOLLOW);
1580 if (fd >= 0)
1581 path = name;
1582 }
1583 #endif
1584
1585 /*
1586 * Can only create the socket if the path will fit.
1587 * Even if we are using bindat() there are limits
1588 * (the API for AF_UNIX sockets is ... not good).
1589 *
1590 * Note: in theory we can fill sun_path to the end
1591 * (omitting a terminating '\0') but in at least one
1592 * Unix-like system, this was known to behave oddly,
1593 * so we test for ">=" rather than just ">".
1594 */
1595 if (strlen(path) >= sizeof(un.sun_path)) {
1596 error = ENAMETOOLONG;
1597 goto out;
1598 }
1599 un.sun_family = AF_UNIX;
1600 #ifndef __illumos__
1601 slen = un.sun_len = sizeof(struct sockaddr_un);
1602 #else
1603 slen = SUN_LEN(&un);
1604 #endif
1605
1606 strncpy(un.sun_path, path, sizeof(un.sun_path));
1607
1608 #ifdef HAVE_BINDAT
1609 if (fd >= 0) {
1610 if (bindat(fd, s, (struct sockaddr *)&un, slen) < 0)
1611 error = errno;
1612 goto out; /* done now, for good or ill */
1613 }
1614 #endif
1615
1616 if (bind(s, (struct sockaddr *)&un, slen) < 0)
1617 error = errno;
1618 out:
1619
1620 if (error == 0) {
1621 /*
1622 * We believe we created the socket-inode. Fix
1623 * permissions etc. Note that we cannot use
1624 * fstat() on the socket descriptor: it succeeds,
1625 * but we get bogus data!
1626 */
1627 if (fchownat(ff->ff_dirfd, newname, uid, gid, AT_SYMLINK_NOFOLLOW) != 0 ||
1628 fchmodat(ff->ff_dirfd, newname, perm, 0) != 0 ||
1629 fstatat(ff->ff_dirfd, newname, st, AT_SYMLINK_NOFOLLOW) != 0)
1630 error = errno;
1631 else if (!S_ISSOCK(st->st_mode))
1632 error = EPERM; /* ??? lost a race anyway */
1633
1634 /* if (error) unlink(newname) ? */
1635 }
1636
1637 /*
1638 * It's not clear which error should override, although
1639 * ideally we should never see either close() call fail.
1640 * In any case we do want to try to close both fd and s,
1641 * always. Let's set error only if it is not already set,
1642 * so that all exit paths can use the same code.
1643 */
1644 if (fd >= 0 && close(fd) != 0)
1645 if (error == 0)
1646 error = errno;
1647 if (close(s) != 0)
1648 if (error == 0)
1649 error = errno;
1650
1651 return (error);
1652 }
1653
1654 /*
1655 * Internal form of symlink.
1656 *
1657 * Note that symlinks are presumed to carry no permission bits.
1658 * They do have owners, however (who may be charged for quotas).
1659 */
1660 static int
fs_isymlink(void * softc,struct l9p_fid * dir,char * name,char * symtgt,gid_t egid,struct stat * st)1661 fs_isymlink(void *softc, struct l9p_fid *dir, char *name,
1662 char *symtgt, gid_t egid, struct stat *st)
1663 {
1664 struct fs_fid *ff;
1665 gid_t gid;
1666 uid_t uid;
1667 char newname[MAXPATHLEN];
1668 int error;
1669
1670 ff = dir->lo_aux;
1671 error = fs_buildname(dir, name, newname, sizeof(newname));
1672 if (error)
1673 return (error);
1674
1675 error = fs_nde(softc, dir, false, egid, st, &uid, &gid);
1676 if (error)
1677 return (error);
1678
1679 if (symlinkat(symtgt, ff->ff_dirfd, newname) != 0)
1680 return (errno);
1681
1682 /* We cannot open the new name; race to use l* syscalls. */
1683 if (fchownat(ff->ff_dirfd, newname, uid, gid, AT_SYMLINK_NOFOLLOW) != 0 ||
1684 fstatat(ff->ff_dirfd, newname, st, AT_SYMLINK_NOFOLLOW) != 0)
1685 error = errno;
1686 else if (!S_ISLNK(st->st_mode))
1687 error = EPERM; /* ??? lost a race anyway */
1688
1689 /* if (error) unlink(newname) ? */
1690
1691 return (error);
1692 }
1693
1694 static int
fs_open(void * softc,struct l9p_request * req)1695 fs_open(void *softc, struct l9p_request *req)
1696 {
1697 struct l9p_fid *fid = req->lr_fid;
1698 struct stat st;
1699 enum l9p_omode p9;
1700 int error, flags;
1701
1702 p9 = req->lr_req.topen.mode;
1703 error = fs_oflags_dotu(p9, &flags);
1704 if (error)
1705 return (error);
1706
1707 error = fs_iopen(softc, fid, flags, p9, (gid_t)-1, &st);
1708 if (error)
1709 return (error);
1710
1711 generate_qid(&st, &req->lr_resp.ropen.qid);
1712 req->lr_resp.ropen.iounit = req->lr_conn->lc_max_io_size;
1713 return (0);
1714 }
1715
1716 /*
1717 * Helper for directory read. We want to run an lstat on each
1718 * file name within the directory. This is a lot faster if we
1719 * have lstatat (or fstatat with AT_SYMLINK_NOFOLLOW), but not
1720 * all systems do, so hide the ifdef-ed code in an inline function.
1721 */
1722 static inline int
fs_lstatat(struct fs_fid * file,char * name,struct stat * st)1723 fs_lstatat(struct fs_fid *file, char *name, struct stat *st)
1724 {
1725
1726 return (fstatat(dirfd(file->ff_dir), name, st, AT_SYMLINK_NOFOLLOW));
1727 }
1728
1729 static int
fs_read(void * softc,struct l9p_request * req)1730 fs_read(void *softc, struct l9p_request *req)
1731 {
1732 struct l9p_stat l9stat;
1733 struct fs_softc *sc;
1734 struct fs_fid *file;
1735 bool dotu = req->lr_conn->lc_version >= L9P_2000U;
1736 ssize_t ret;
1737
1738 sc = softc;
1739 file = req->lr_fid->lo_aux;
1740 assert(file != NULL);
1741
1742 if (file->ff_dir != NULL) {
1743 struct dirent *d;
1744 struct stat st;
1745 struct l9p_message msg;
1746 long o;
1747 int err;
1748
1749 if ((err = pthread_mutex_lock(&file->ff_mtx)) != 0)
1750 return (err);
1751
1752 /*
1753 * Must use telldir before readdir since seekdir
1754 * takes cookie values. Unfortunately this wastes
1755 * a lot of time (and memory) building unneeded
1756 * cookies that can only be flushed by closing
1757 * the directory.
1758 *
1759 * NB: FreeBSD libc seekdir has SINGLEUSE defined,
1760 * so in fact, we can discard the cookies by
1761 * calling seekdir on them. This clears up wasted
1762 * memory at the cost of even more wasted time...
1763 *
1764 * XXX: readdir/telldir/seekdir not thread safe
1765 */
1766 l9p_init_msg(&msg, req, L9P_PACK);
1767 for (;;) {
1768 o = telldir(file->ff_dir);
1769 d = readdir(file->ff_dir);
1770 if (d == NULL)
1771 break;
1772 if (fs_lstatat(file, d->d_name, &st))
1773 continue;
1774 dostat(sc, &l9stat, d->d_name, &st, dotu);
1775 if (l9p_pack_stat(&msg, req, &l9stat) != 0) {
1776 seekdir(file->ff_dir, o);
1777 break;
1778 }
1779 #if defined(__FreeBSD__)
1780 seekdir(file->ff_dir, o);
1781 (void) readdir(file->ff_dir);
1782 #endif
1783 }
1784
1785 (void) pthread_mutex_unlock(&file->ff_mtx);
1786 } else {
1787 size_t niov = l9p_truncate_iov(req->lr_data_iov,
1788 req->lr_data_niov, req->lr_req.io.count);
1789
1790 #if defined(__FreeBSD__) || defined(__illumos__)
1791 ret = preadv(file->ff_fd, req->lr_data_iov, niov,
1792 req->lr_req.io.offset);
1793 #else
1794 /* XXX: not thread safe, should really use aio_listio. */
1795 if (lseek(file->ff_fd, (off_t)req->lr_req.io.offset, SEEK_SET) < 0)
1796 return (errno);
1797
1798 ret = (uint32_t)readv(file->ff_fd, req->lr_data_iov, (int)niov);
1799 #endif
1800
1801 if (ret < 0)
1802 return (errno);
1803
1804 req->lr_resp.io.count = (uint32_t)ret;
1805 }
1806
1807 return (0);
1808 }
1809
1810 static int
fs_remove(void * softc,struct l9p_fid * fid)1811 fs_remove(void *softc, struct l9p_fid *fid)
1812 {
1813 struct fs_softc *sc = softc;
1814 struct l9p_acl *parent_acl;
1815 struct fs_fid *file;
1816 struct stat pst, cst;
1817 char dirname[MAXPATHLEN];
1818 int error;
1819
1820 if (sc->fs_readonly)
1821 return (EROFS);
1822
1823 error = fs_pdir(sc, fid, dirname, sizeof(dirname), &pst);
1824 if (error)
1825 return (error);
1826
1827 file = fid->lo_aux;
1828 if (fstatat(file->ff_dirfd, file->ff_name, &cst, AT_SYMLINK_NOFOLLOW) != 0)
1829 return (error);
1830
1831 parent_acl = getacl(file, -1, dirname);
1832 fillacl(file);
1833
1834 error = check_access(L9P_ACOP_UNLINK,
1835 parent_acl, &pst, file->ff_acl, &cst, file->ff_ai, (gid_t)-1);
1836 l9p_acl_free(parent_acl);
1837 if (error)
1838 return (error);
1839
1840 if (unlinkat(file->ff_dirfd, file->ff_name,
1841 S_ISDIR(cst.st_mode) ? AT_REMOVEDIR : 0) != 0)
1842 error = errno;
1843
1844 return (error);
1845 }
1846
1847 static int
fs_stat(void * softc,struct l9p_request * req)1848 fs_stat(void *softc, struct l9p_request *req)
1849 {
1850 struct fs_softc *sc;
1851 struct fs_fid *file;
1852 struct stat st;
1853 bool dotu = req->lr_conn->lc_version >= L9P_2000U;
1854
1855 sc = softc;
1856 file = req->lr_fid->lo_aux;
1857 assert(file);
1858
1859 if (fstatat(file->ff_dirfd, file->ff_name, &st,
1860 AT_SYMLINK_NOFOLLOW) != 0)
1861 return (errno);
1862
1863 dostat(sc, &req->lr_resp.rstat.stat, file->ff_name, &st, dotu);
1864 return (0);
1865 }
1866
1867 static int
fs_walk(void * softc,struct l9p_request * req)1868 fs_walk(void *softc, struct l9p_request *req)
1869 {
1870 struct l9p_acl *acl;
1871 struct fs_authinfo *ai;
1872 struct fs_fid *file = req->lr_fid->lo_aux;
1873 struct fs_fid *newfile;
1874 struct stat st;
1875 size_t clen, namelen, need;
1876 char *comp, *succ, *next, *swtmp;
1877 bool atroot;
1878 bool dotdot;
1879 int i, nwname;
1880 int error = 0;
1881 char namebufs[2][MAXPATHLEN];
1882
1883 /*
1884 * https://swtch.com/plan9port/man/man9/walk.html:
1885 *
1886 * It is legal for nwname to be zero, in which case newfid
1887 * will represent the same file as fid and the walk will
1888 * usually succeed; this is equivalent to walking to dot.
1889 * [Aside: it's not clear if we should test S_ISDIR here.]
1890 * ...
1891 * The name ".." ... represents the parent directory.
1892 * The name "." ... is not used in the protocol.
1893 * ... A walk of the name ".." in the root directory
1894 * of the server is equivalent to a walk with no name
1895 * elements.
1896 *
1897 * Note that req.twalk.nwname never exceeds L9P_MAX_WELEM,
1898 * so it is safe to convert to plain int.
1899 *
1900 * We are to return an error only if the first walk fails,
1901 * else stop at the end of the names or on the first error.
1902 * The final fid is based on the last name successfully
1903 * walked.
1904 *
1905 * Note that we *do* get Twalk requests with nwname==0 on files.
1906 *
1907 * Set up "successful name" buffer pointer with base fid name,
1908 * initially. We'll swap each new success into it as we go.
1909 *
1910 * Invariant: atroot and stat data correspond to current
1911 * (succ) path.
1912 */
1913 succ = namebufs[0];
1914 next = namebufs[1];
1915 namelen = strlcpy(succ, file->ff_name, MAXPATHLEN);
1916 if (namelen >= MAXPATHLEN)
1917 return (ENAMETOOLONG);
1918 if (fstatat(file->ff_dirfd, succ, &st, AT_SYMLINK_NOFOLLOW) < 0)
1919 return (errno);
1920 ai = file->ff_ai;
1921 atroot = strlen(succ) == 0; /* XXX? */
1922 fillacl(file);
1923 acl = file->ff_acl;
1924
1925 nwname = (int)req->lr_req.twalk.nwname;
1926
1927 for (i = 0; i < nwname; i++) {
1928 /*
1929 * Must have execute permission to search a directory.
1930 * Then, look up each component in its directory-so-far.
1931 * Check for ".." along the way, handlng specially
1932 * as needed. Forbid "/" in name components.
1933 *
1934 */
1935 if (!S_ISDIR(st.st_mode)) {
1936 error = ENOTDIR;
1937 goto out;
1938 }
1939 error = check_access(L9P_ACE_EXECUTE,
1940 NULL, NULL, acl, &st, ai, (gid_t)-1);
1941 if (error) {
1942 L9P_LOG(L9P_DEBUG,
1943 "Twalk: denying dir-walk on \"%s\" for uid %u",
1944 succ, (unsigned)ai->ai_uid);
1945 error = EPERM;
1946 goto out;
1947 }
1948 comp = req->lr_req.twalk.wname[i];
1949 if (strchr(comp, '/') != NULL) {
1950 error = EINVAL;
1951 break;
1952 }
1953
1954 clen = strlen(comp);
1955 dotdot = false;
1956
1957 /*
1958 * Build next pathname (into "next"). If "..",
1959 * just strip one name component off the success
1960 * name so far. Since we know this name fits, the
1961 * stripped down version also fits. Otherwise,
1962 * the name is the base name plus '/' plus the
1963 * component name plus terminating '\0'; this may
1964 * or may not fit.
1965 */
1966 if (comp[0] == '.') {
1967 if (clen == 1) {
1968 error = EINVAL;
1969 break;
1970 }
1971 if (comp[1] == '.' && clen == 2)
1972 dotdot = true;
1973 }
1974 if (dotdot) {
1975 /*
1976 * It's not clear how ".." at root should
1977 * be handled when i > 0. Obeying the man
1978 * page exactly, we reset i to 0 and stop,
1979 * declaring terminal success.
1980 *
1981 * Otherwise, we just climbed up one level
1982 * so adjust "atroot".
1983 */
1984 if (atroot) {
1985 i = 0;
1986 break;
1987 }
1988 (void) r_dirname(succ, next, MAXPATHLEN);
1989 namelen = strlen(next);
1990 atroot = strlen(next) == 0; /* XXX? */
1991 } else {
1992 need = namelen + 1 + clen + 1;
1993 if (need > MAXPATHLEN) {
1994 error = ENAMETOOLONG;
1995 break;
1996 }
1997 memcpy(next, succ, namelen);
1998 next[namelen++] = '/';
1999 memcpy(&next[namelen], comp, clen + 1);
2000 namelen += clen;
2001 /*
2002 * Since name is never ".", we are necessarily
2003 * descending below the root now.
2004 */
2005 atroot = false;
2006 }
2007
2008 if (fstatat(file->ff_dirfd, next, &st, AT_SYMLINK_NOFOLLOW) < 0) {
2009 error = ENOENT;
2010 break;
2011 }
2012
2013 /*
2014 * Success: generate qid and swap this
2015 * successful name into place. Update acl.
2016 */
2017 generate_qid(&st, &req->lr_resp.rwalk.wqid[i]);
2018 swtmp = succ;
2019 succ = next;
2020 next = swtmp;
2021 if (acl != NULL && acl != file->ff_acl)
2022 l9p_acl_free(acl);
2023 acl = getacl(file, -1, next);
2024 }
2025
2026 /*
2027 * Fail only if we failed on the first name.
2028 * Otherwise we succeeded on something, and "succ"
2029 * points to the last successful name in namebufs[].
2030 */
2031 if (error) {
2032 if (i == 0)
2033 goto out;
2034 error = 0;
2035 }
2036
2037 newfile = open_fid(file->ff_dirfd, succ, ai, false);
2038 if (newfile == NULL) {
2039 error = ENOMEM;
2040 goto out;
2041 }
2042 if (req->lr_newfid == req->lr_fid) {
2043 /*
2044 * Before overwriting fid->lo_aux, free the old value.
2045 * Note that this doesn't free the l9p_fid data,
2046 * just the fs_fid data. (But it does ditch ff_acl.)
2047 */
2048 if (acl == file->ff_acl)
2049 acl = NULL;
2050 fs_freefid(softc, req->lr_fid);
2051 file = NULL;
2052 }
2053 req->lr_newfid->lo_aux = newfile;
2054 if (file != NULL && acl != file->ff_acl) {
2055 newfile->ff_acl = acl;
2056 acl = NULL;
2057 }
2058 req->lr_resp.rwalk.nwqid = (uint16_t)i;
2059 out:
2060 if (file != NULL && acl != file->ff_acl)
2061 l9p_acl_free(acl);
2062 return (error);
2063 }
2064
2065 static int
fs_write(void * softc,struct l9p_request * req)2066 fs_write(void *softc, struct l9p_request *req)
2067 {
2068 struct fs_softc *sc = softc;
2069 struct fs_fid *file;
2070 ssize_t ret;
2071
2072 file = req->lr_fid->lo_aux;
2073 assert(file != NULL);
2074
2075 if (sc->fs_readonly)
2076 return (EROFS);
2077
2078 size_t niov = l9p_truncate_iov(req->lr_data_iov,
2079 req->lr_data_niov, req->lr_req.io.count);
2080
2081 #if defined(__FreeBSD__) || defined(__illumos__)
2082 ret = pwritev(file->ff_fd, req->lr_data_iov, niov,
2083 req->lr_req.io.offset);
2084 #else
2085 /* XXX: not thread safe, should really use aio_listio. */
2086 if (lseek(file->ff_fd, (off_t)req->lr_req.io.offset, SEEK_SET) < 0)
2087 return (errno);
2088
2089 ret = writev(file->ff_fd, req->lr_data_iov,
2090 (int)niov);
2091 #endif
2092
2093 if (ret < 0)
2094 return (errno);
2095
2096 req->lr_resp.io.count = (uint32_t)ret;
2097 return (0);
2098 }
2099
2100 static int
fs_wstat(void * softc,struct l9p_request * req)2101 fs_wstat(void *softc, struct l9p_request *req)
2102 {
2103 struct fs_softc *sc = softc;
2104 struct l9p_stat *l9stat = &req->lr_req.twstat.stat;
2105 struct l9p_fid *fid;
2106 struct fs_fid *file;
2107 int error = 0;
2108
2109 fid = req->lr_fid;
2110 file = fid->lo_aux;
2111 assert(file != NULL);
2112
2113 /*
2114 * XXX:
2115 *
2116 * stat(9P) sez:
2117 *
2118 * Either all the changes in wstat request happen, or none of them
2119 * does: if the request succeeds, all changes were made; if it fails,
2120 * none were.
2121 *
2122 * Atomicity is clearly missing in current implementation.
2123 */
2124
2125 if (sc->fs_readonly)
2126 return (EROFS);
2127
2128 if (l9stat->atime != (uint32_t)~0) {
2129 /* XXX: not implemented, ignore */
2130 }
2131
2132 if (l9stat->mtime != (uint32_t)~0) {
2133 /* XXX: not implemented, ignore */
2134 }
2135
2136 if (l9stat->dev != (uint32_t)~0) {
2137 error = EPERM;
2138 goto out;
2139 }
2140
2141 if (l9stat->length != (uint64_t)~0) {
2142 if (file->ff_dir != NULL) {
2143 error = EINVAL;
2144 goto out;
2145 }
2146
2147 if (truncate(file->ff_name, (off_t)l9stat->length) != 0) {
2148 error = errno;
2149 goto out;
2150 }
2151 }
2152
2153 if (req->lr_conn->lc_version >= L9P_2000U) {
2154 if (fchownat(file->ff_dirfd, file->ff_name, l9stat->n_uid,
2155 l9stat->n_gid, AT_SYMLINK_NOFOLLOW) != 0) {
2156 error = errno;
2157 goto out;
2158 }
2159 }
2160
2161 if (l9stat->mode != (uint32_t)~0) {
2162 if (fchmodat(file->ff_dirfd, file->ff_name,
2163 l9stat->mode & 0777, 0) != 0) {
2164 error = errno;
2165 goto out;
2166 }
2167 }
2168
2169 if (strlen(l9stat->name) > 0) {
2170 struct l9p_acl *parent_acl;
2171 struct stat st;
2172 char *tmp;
2173 char newname[MAXPATHLEN];
2174
2175 /*
2176 * Rename-within-directory: it's not deleting anything,
2177 * but we need write permission on the directory. This
2178 * should suffice.
2179 */
2180 error = fs_pdir(softc, fid, newname, sizeof(newname), &st);
2181 if (error)
2182 goto out;
2183 parent_acl = getacl(file, -1, newname);
2184 error = check_access(L9P_ACE_ADD_FILE,
2185 parent_acl, &st, NULL, NULL, file->ff_ai, (gid_t)-1);
2186 l9p_acl_free(parent_acl);
2187 if (error)
2188 goto out;
2189 error = fs_dpf(newname, l9stat->name, sizeof(newname));
2190 if (error)
2191 goto out;
2192 tmp = strdup(newname);
2193 if (tmp == NULL) {
2194 error = ENOMEM;
2195 goto out;
2196 }
2197 if (renameat(file->ff_dirfd, file->ff_name, file->ff_dirfd,
2198 tmp) != 0) {
2199 error = errno;
2200 free(tmp);
2201 goto out;
2202 }
2203 /* Successful rename, update file->ff_name. ACL can stay. */
2204 free(file->ff_name);
2205 file->ff_name = tmp;
2206 }
2207 out:
2208 return (error);
2209 }
2210
2211 static int
fs_statfs(void * softc __unused,struct l9p_request * req)2212 fs_statfs(void *softc __unused, struct l9p_request *req)
2213 {
2214 struct fs_fid *file;
2215 struct stat st;
2216 #ifdef __illumos__
2217 struct statvfs f;
2218 #else
2219 struct statfs f;
2220 #endif
2221 long name_max;
2222 int error;
2223 int fd;
2224
2225 file = req->lr_fid->lo_aux;
2226 assert(file);
2227
2228 if (fstatat(file->ff_dirfd, file->ff_name, &st,
2229 AT_SYMLINK_NOFOLLOW) != 0)
2230 return (errno);
2231
2232 /*
2233 * Not entirely clear what access to require; we'll go
2234 * for "read data".
2235 */
2236 fillacl(file);
2237 error = check_access(L9P_ACE_READ_DATA, NULL, NULL,
2238 file->ff_acl, &st, file->ff_ai, (gid_t)-1);
2239 if (error)
2240 return (error);
2241
2242 fd = openat(file->ff_dirfd, file->ff_name, 0);
2243 if (fd < 0)
2244 return (errno);
2245
2246 #ifdef __illumos__
2247 if (fstatvfs(fd, &f) != 0)
2248 return (errno);
2249 #else
2250 if (fstatfs(fd, &f) != 0)
2251 return (errno);
2252 #endif
2253
2254 name_max = fpathconf(fd, _PC_NAME_MAX);
2255 error = errno;
2256 close(fd);
2257
2258 if (name_max == -1)
2259 return (error);
2260
2261 dostatfs(&req->lr_resp.rstatfs.statfs, &f, name_max);
2262
2263 return (0);
2264 }
2265
2266 static int
fs_lopen(void * softc,struct l9p_request * req)2267 fs_lopen(void *softc, struct l9p_request *req)
2268 {
2269 struct l9p_fid *fid = req->lr_fid;
2270 struct stat st;
2271 enum l9p_omode p9;
2272 gid_t gid;
2273 int error, flags;
2274
2275 error = fs_oflags_dotl(req->lr_req.tlopen.flags, &flags, &p9);
2276 if (error)
2277 return (error);
2278
2279 gid = req->lr_req.tlopen.gid;
2280 error = fs_iopen(softc, fid, flags, p9, gid, &st);
2281 if (error)
2282 return (error);
2283
2284 generate_qid(&st, &req->lr_resp.rlopen.qid);
2285 req->lr_resp.rlopen.iounit = req->lr_conn->lc_max_io_size;
2286 return (0);
2287 }
2288
2289 static int
fs_lcreate(void * softc,struct l9p_request * req)2290 fs_lcreate(void *softc, struct l9p_request *req)
2291 {
2292 struct l9p_fid *dir;
2293 struct stat st;
2294 enum l9p_omode p9;
2295 char *name;
2296 mode_t perm;
2297 gid_t gid;
2298 int error, flags;
2299
2300 dir = req->lr_fid;
2301 name = req->lr_req.tlcreate.name;
2302
2303 error = fs_oflags_dotl(req->lr_req.tlcreate.flags, &flags, &p9);
2304 if (error)
2305 return (error);
2306
2307 perm = (mode_t)req->lr_req.tlcreate.mode & 0777; /* ? set-id bits? */
2308 gid = req->lr_req.tlcreate.gid;
2309 error = fs_icreate(softc, dir, name, flags, false, perm, gid, &st);
2310 if (error == 0)
2311 generate_qid(&st, &req->lr_resp.rlcreate.qid);
2312 req->lr_resp.rlcreate.iounit = req->lr_conn->lc_max_io_size;
2313 return (error);
2314 }
2315
2316 static int
fs_symlink(void * softc,struct l9p_request * req)2317 fs_symlink(void *softc, struct l9p_request *req)
2318 {
2319 struct l9p_fid *dir;
2320 struct stat st;
2321 gid_t gid;
2322 char *name, *symtgt;
2323 int error;
2324
2325 dir = req->lr_fid;
2326 name = req->lr_req.tsymlink.name;
2327 symtgt = req->lr_req.tsymlink.symtgt;
2328 gid = req->lr_req.tsymlink.gid;
2329 error = fs_isymlink(softc, dir, name, symtgt, gid, &st);
2330 if (error == 0)
2331 generate_qid(&st, &req->lr_resp.rsymlink.qid);
2332 return (error);
2333 }
2334
2335 static int
fs_mknod(void * softc,struct l9p_request * req)2336 fs_mknod(void *softc, struct l9p_request *req)
2337 {
2338 struct l9p_fid *dir;
2339 struct stat st;
2340 uint32_t mode, major, minor;
2341 dev_t dev;
2342 gid_t gid;
2343 char *name;
2344 int error;
2345
2346 dir = req->lr_fid;
2347 name = req->lr_req.tmknod.name;
2348 mode = req->lr_req.tmknod.mode;
2349 gid = req->lr_req.tmknod.gid;
2350
2351 switch (mode & S_IFMT) {
2352 case S_IFBLK:
2353 case S_IFCHR:
2354 mode = (mode & S_IFMT) | (mode & 0777); /* ??? */
2355 major = req->lr_req.tmknod.major;
2356 minor = req->lr_req.tmknod.major;
2357 dev = makedev(major, minor);
2358 error = fs_imknod(softc, dir, name, false,
2359 (mode_t)mode, dev, gid, &st);
2360 break;
2361
2362 case S_IFIFO:
2363 error = fs_imkfifo(softc, dir, name, false,
2364 (mode_t)(mode & 0777), gid, &st);
2365 break;
2366
2367 case S_IFSOCK:
2368 error = fs_imksocket(softc, dir, name, false,
2369 (mode_t)(mode & 0777), gid, &st);
2370 break;
2371
2372 default:
2373 error = EINVAL;
2374 break;
2375 }
2376 if (error == 0)
2377 generate_qid(&st, &req->lr_resp.rmknod.qid);
2378 return (error);
2379 }
2380
2381 static int
fs_rename(void * softc,struct l9p_request * req)2382 fs_rename(void *softc, struct l9p_request *req)
2383 {
2384 struct fs_softc *sc = softc;
2385 struct fs_authinfo *ai;
2386 struct l9p_acl *oparent_acl;
2387 struct l9p_fid *fid, *f2;
2388 struct fs_fid *file, *f2ff;
2389 struct stat cst, opst, npst;
2390 int32_t op;
2391 bool reparenting;
2392 char *tmp;
2393 char olddir[MAXPATHLEN], newname[MAXPATHLEN];
2394 int error;
2395
2396 if (sc->fs_readonly)
2397 return (EROFS);
2398
2399 /*
2400 * Note: lr_fid represents the file that is to be renamed,
2401 * so we must locate its parent directory and verify that
2402 * both this parent directory and the new directory f2 are
2403 * writable. But if the new parent directory is the same
2404 * path as the old parent directory, our job is simpler.
2405 */
2406 fid = req->lr_fid;
2407 file = fid->lo_aux;
2408 assert(file != NULL);
2409 ai = file->ff_ai;
2410
2411 error = fs_pdir(sc, fid, olddir, sizeof(olddir), &opst);
2412 if (error)
2413 return (error);
2414
2415 f2 = req->lr_fid2;
2416 f2ff = f2->lo_aux;
2417 assert(f2ff != NULL);
2418
2419 reparenting = strcmp(olddir, f2ff->ff_name) != 0;
2420
2421 fillacl(file);
2422 fillacl(f2ff);
2423
2424 if (fstatat(file->ff_dirfd, file->ff_name, &cst,
2425 AT_SYMLINK_NOFOLLOW) != 0)
2426 return (errno);
2427
2428 /*
2429 * Are we moving from olddir? If so, we're unlinking
2430 * from it, in terms of ACL access.
2431 */
2432 if (reparenting) {
2433 oparent_acl = getacl(file, -1, olddir);
2434 error = check_access(L9P_ACOP_UNLINK,
2435 oparent_acl, &opst, file->ff_acl, &cst, ai, (gid_t)-1);
2436 l9p_acl_free(oparent_acl);
2437 if (error)
2438 return (error);
2439 }
2440
2441 /*
2442 * Now check that we're allowed to "create" a file or directory in
2443 * f2. (Should we do this, too, only if reparenting? Maybe check
2444 * for dir write permission if not reparenting -- but that's just
2445 * add-file/add-subdir, which means doing this always.)
2446 */
2447 if (fstatat(f2ff->ff_dirfd, f2ff->ff_name, &npst,
2448 AT_SYMLINK_NOFOLLOW) != 0)
2449 return (errno);
2450
2451 op = S_ISDIR(cst.st_mode) ? L9P_ACE_ADD_SUBDIRECTORY : L9P_ACE_ADD_FILE;
2452 error = check_access(op, f2ff->ff_acl, &npst, NULL, NULL,
2453 ai, (gid_t)-1);
2454 if (error)
2455 return (error);
2456
2457 /*
2458 * Directories OK, file systems not R/O, etc; build final name.
2459 * f2ff->ff_name cannot exceed MAXPATHLEN, but out of general
2460 * paranoia, let's double check anyway.
2461 */
2462 if (strlcpy(newname, f2ff->ff_name, sizeof(newname)) >= sizeof(newname))
2463 return (ENAMETOOLONG);
2464 error = fs_dpf(newname, req->lr_req.trename.name, sizeof(newname));
2465 if (error)
2466 return (error);
2467 tmp = strdup(newname);
2468 if (tmp == NULL)
2469 return (ENOMEM);
2470
2471 if (renameat(file->ff_dirfd, file->ff_name, file->ff_dirfd, tmp) != 0) {
2472 error = errno;
2473 free(tmp);
2474 return (error);
2475 }
2476
2477 /* file has been renamed but old fid is not clunked */
2478 free(file->ff_name);
2479 file->ff_name = tmp;
2480
2481 dropacl(file);
2482 return (0);
2483 }
2484
2485 static int
fs_readlink(void * softc __unused,struct l9p_request * req)2486 fs_readlink(void *softc __unused, struct l9p_request *req)
2487 {
2488 struct fs_fid *file;
2489 ssize_t linklen;
2490 char buf[MAXPATHLEN];
2491 int error = 0;
2492
2493 file = req->lr_fid->lo_aux;
2494 assert(file);
2495
2496 linklen = readlinkat(file->ff_dirfd, file->ff_name, buf, sizeof(buf));
2497 if (linklen < 0)
2498 error = errno;
2499 else if ((size_t)linklen >= sizeof(buf))
2500 error = ENOMEM; /* todo: allocate dynamically */
2501 else if ((req->lr_resp.rreadlink.target = strndup(buf,
2502 (size_t)linklen)) == NULL)
2503 error = ENOMEM;
2504 return (error);
2505 }
2506
2507 static int
fs_getattr(void * softc __unused,struct l9p_request * req)2508 fs_getattr(void *softc __unused, struct l9p_request *req)
2509 {
2510 uint64_t mask, valid;
2511 struct fs_fid *file;
2512 struct stat st;
2513 int error = 0;
2514
2515 file = req->lr_fid->lo_aux;
2516 assert(file);
2517
2518 valid = 0;
2519 if (fstatat(file->ff_dirfd, file->ff_name, &st, AT_SYMLINK_NOFOLLOW)) {
2520 error = errno;
2521 goto out;
2522 }
2523 /* ?? Can we provide items not-requested? If so, can skip tests. */
2524 mask = req->lr_req.tgetattr.request_mask;
2525 if (mask & L9PL_GETATTR_MODE) {
2526 /* It is not clear if we need any translations. */
2527 req->lr_resp.rgetattr.mode = st.st_mode;
2528 valid |= L9PL_GETATTR_MODE;
2529 }
2530 if (mask & L9PL_GETATTR_NLINK) {
2531 req->lr_resp.rgetattr.nlink = st.st_nlink;
2532 valid |= L9PL_GETATTR_NLINK;
2533 }
2534 if (mask & L9PL_GETATTR_UID) {
2535 /* provide st_uid, or file->ff_uid? */
2536 req->lr_resp.rgetattr.uid = st.st_uid;
2537 valid |= L9PL_GETATTR_UID;
2538 }
2539 if (mask & L9PL_GETATTR_GID) {
2540 /* provide st_gid, or file->ff_gid? */
2541 req->lr_resp.rgetattr.gid = st.st_gid;
2542 valid |= L9PL_GETATTR_GID;
2543 }
2544 if (mask & L9PL_GETATTR_RDEV) {
2545 /* It is not clear if we need any translations. */
2546 req->lr_resp.rgetattr.rdev = (uint64_t)st.st_rdev;
2547 valid |= L9PL_GETATTR_RDEV;
2548 }
2549 if (mask & L9PL_GETATTR_ATIME) {
2550 req->lr_resp.rgetattr.atime_sec =
2551 (uint64_t)STAT_ATIME(&st).tv_sec;
2552 req->lr_resp.rgetattr.atime_nsec =
2553 (uint64_t)STAT_ATIME(&st).tv_nsec;
2554 valid |= L9PL_GETATTR_ATIME;
2555 }
2556 if (mask & L9PL_GETATTR_MTIME) {
2557 req->lr_resp.rgetattr.mtime_sec =
2558 (uint64_t)STAT_MTIME(&st).tv_sec;
2559 req->lr_resp.rgetattr.mtime_nsec =
2560 (uint64_t)STAT_MTIME(&st).tv_nsec;
2561 valid |= L9PL_GETATTR_MTIME;
2562 }
2563 if (mask & L9PL_GETATTR_CTIME) {
2564 req->lr_resp.rgetattr.ctime_sec =
2565 (uint64_t)STAT_CTIME(&st).tv_sec;
2566 req->lr_resp.rgetattr.ctime_nsec =
2567 (uint64_t)STAT_CTIME(&st).tv_nsec;
2568 valid |= L9PL_GETATTR_CTIME;
2569 }
2570 if (mask & L9PL_GETATTR_BTIME) {
2571 #if defined(HAVE_BIRTHTIME)
2572 req->lr_resp.rgetattr.btime_sec =
2573 (uint64_t)st.st_birthtim.tv_sec;
2574 req->lr_resp.rgetattr.btime_nsec =
2575 (uint64_t)st.st_birthtim.tv_nsec;
2576 #elif defined(__illumos__)
2577 getcrtime(softc, file->ff_dirfd, file->ff_name,
2578 &req->lr_resp.rgetattr.btime_sec,
2579 &req->lr_resp.rgetattr.btime_nsec);
2580 #else
2581 req->lr_resp.rgetattr.btime_sec = 0;
2582 req->lr_resp.rgetattr.btime_nsec = 0;
2583 #endif
2584 valid |= L9PL_GETATTR_BTIME;
2585 }
2586 if (mask & L9PL_GETATTR_INO)
2587 valid |= L9PL_GETATTR_INO;
2588 if (mask & L9PL_GETATTR_SIZE) {
2589 req->lr_resp.rgetattr.size = (uint64_t)st.st_size;
2590 valid |= L9PL_GETATTR_SIZE;
2591 }
2592 if (mask & L9PL_GETATTR_BLOCKS) {
2593 req->lr_resp.rgetattr.blksize = (uint64_t)st.st_blksize;
2594 req->lr_resp.rgetattr.blocks = (uint64_t)st.st_blocks;
2595 valid |= L9PL_GETATTR_BLOCKS;
2596 }
2597 #ifndef __illumos__
2598 if (mask & L9PL_GETATTR_GEN) {
2599 req->lr_resp.rgetattr.gen = st.st_gen;
2600 valid |= L9PL_GETATTR_GEN;
2601 }
2602 #endif
2603 /* don't know what to do with data version yet */
2604
2605 generate_qid(&st, &req->lr_resp.rgetattr.qid);
2606 out:
2607 req->lr_resp.rgetattr.valid = valid;
2608 return (error);
2609 }
2610
2611 /*
2612 * Should combine some of this with wstat code.
2613 */
2614 static int
fs_setattr(void * softc,struct l9p_request * req)2615 fs_setattr(void *softc, struct l9p_request *req)
2616 {
2617 uint64_t mask;
2618 struct fs_softc *sc = softc;
2619 struct timespec ts[2];
2620 struct fs_fid *file;
2621 struct stat st;
2622 int error = 0;
2623 uid_t uid, gid;
2624
2625 file = req->lr_fid->lo_aux;
2626 assert(file);
2627
2628 if (sc->fs_readonly)
2629 return (EROFS);
2630
2631 /*
2632 * As with WSTAT we have atomicity issues.
2633 */
2634 mask = req->lr_req.tsetattr.valid;
2635
2636 if (fstatat(file->ff_dirfd, file->ff_name, &st, AT_SYMLINK_NOFOLLOW)) {
2637 error = errno;
2638 goto out;
2639 }
2640
2641 if ((mask & L9PL_SETATTR_SIZE) && S_ISDIR(st.st_mode)) {
2642 error = EISDIR;
2643 goto out;
2644 }
2645
2646 if (mask & L9PL_SETATTR_MODE) {
2647 if (fchmodat(file->ff_dirfd, file->ff_name,
2648 req->lr_req.tsetattr.mode & 0777,
2649 0)) {
2650 error = errno;
2651 goto out;
2652 }
2653 }
2654
2655 if (mask & (L9PL_SETATTR_UID | L9PL_SETATTR_GID)) {
2656 uid = mask & L9PL_SETATTR_UID
2657 ? req->lr_req.tsetattr.uid
2658 : (uid_t)-1;
2659
2660 gid = mask & L9PL_SETATTR_GID
2661 ? req->lr_req.tsetattr.gid
2662 : (gid_t)-1;
2663
2664 if (fchownat(file->ff_dirfd, file->ff_name, uid, gid,
2665 AT_SYMLINK_NOFOLLOW)) {
2666 error = errno;
2667 goto out;
2668 }
2669 }
2670
2671 if (mask & L9PL_SETATTR_SIZE) {
2672 /* Truncate follows symlinks, is this OK? */
2673 int fd = openat(file->ff_dirfd, file->ff_name, O_RDWR);
2674 if (ftruncate(fd, (off_t)req->lr_req.tsetattr.size)) {
2675 error = errno;
2676 (void) close(fd);
2677 goto out;
2678 }
2679 (void) close(fd);
2680 }
2681
2682 if (mask & (L9PL_SETATTR_ATIME | L9PL_SETATTR_MTIME)) {
2683 ts[0].tv_sec = STAT_ATIME(&st).tv_sec;
2684 ts[0].tv_nsec = STAT_ATIME(&st).tv_nsec;
2685 ts[1].tv_sec = STAT_MTIME(&st).tv_sec;
2686 ts[1].tv_nsec = STAT_MTIME(&st).tv_nsec;
2687
2688 if (mask & L9PL_SETATTR_ATIME) {
2689 if (mask & L9PL_SETATTR_ATIME_SET) {
2690 ts[0].tv_sec = req->lr_req.tsetattr.atime_sec;
2691 ts[0].tv_nsec = req->lr_req.tsetattr.atime_nsec;
2692 } else {
2693 if (clock_gettime(CLOCK_REALTIME, &ts[0]) != 0) {
2694 error = errno;
2695 goto out;
2696 }
2697 }
2698 }
2699
2700 if (mask & L9PL_SETATTR_MTIME) {
2701 if (mask & L9PL_SETATTR_MTIME_SET) {
2702 ts[1].tv_sec = req->lr_req.tsetattr.mtime_sec;
2703 ts[1].tv_nsec = req->lr_req.tsetattr.mtime_nsec;
2704 } else {
2705 if (clock_gettime(CLOCK_REALTIME, &ts[1]) != 0) {
2706 error = errno;
2707 goto out;
2708 }
2709 }
2710 }
2711
2712 if (utimensat(file->ff_dirfd, file->ff_name, ts,
2713 AT_SYMLINK_NOFOLLOW)) {
2714 error = errno;
2715 goto out;
2716 }
2717 }
2718 out:
2719 return (error);
2720 }
2721
2722 static int
fs_xattrwalk(void * softc __unused,struct l9p_request * req __unused)2723 fs_xattrwalk(void *softc __unused, struct l9p_request *req __unused)
2724 {
2725 return (EOPNOTSUPP);
2726 }
2727
2728 static int
fs_xattrcreate(void * softc __unused,struct l9p_request * req __unused)2729 fs_xattrcreate(void *softc __unused, struct l9p_request *req __unused)
2730 {
2731 return (EOPNOTSUPP);
2732 }
2733
2734 static int
fs_readdir(void * softc __unused,struct l9p_request * req)2735 fs_readdir(void *softc __unused, struct l9p_request *req)
2736 {
2737 struct l9p_message msg;
2738 struct l9p_dirent de;
2739 struct fs_fid *file;
2740 struct dirent *dp;
2741 struct stat st;
2742 uint32_t count;
2743 int error = 0;
2744
2745 file = req->lr_fid->lo_aux;
2746 assert(file);
2747
2748 if (file->ff_dir == NULL)
2749 return (ENOTDIR);
2750
2751 if ((error = pthread_mutex_lock(&file->ff_mtx)) != 0)
2752 return (error);
2753
2754 /*
2755 * It's not clear whether we can use the same trick for
2756 * discarding offsets here as we do in fs_read. It
2757 * probably should work, we'll have to see if some
2758 * client(s) use the zero-offset thing to rescan without
2759 * clunking the directory first.
2760 *
2761 * Probably the thing to do is switch to calling
2762 * getdirentries() / getdents() directly, instead of
2763 * going through libc.
2764 */
2765 if (req->lr_req.io.offset == 0)
2766 rewinddir(file->ff_dir);
2767 else
2768 seekdir(file->ff_dir, (long)req->lr_req.io.offset);
2769
2770 l9p_init_msg(&msg, req, L9P_PACK);
2771 count = (uint32_t)msg.lm_size; /* in case we get no entries */
2772 while ((dp = readdir(file->ff_dir)) != NULL) {
2773 /*
2774 * Although "." is forbidden in naming and ".." is
2775 * special cased, testing shows that we must transmit
2776 * them through readdir. (For ".." at root, we
2777 * should perhaps alter the inode number, but not
2778 * yet.)
2779 */
2780
2781 /*
2782 * TODO: we do a full lstat here; could use dp->d_*
2783 * to construct the qid more efficiently, as long
2784 * as dp->d_type != DT_UNKNOWN.
2785 */
2786 if (fs_lstatat(file, dp->d_name, &st))
2787 continue;
2788
2789 de.qid.type = 0;
2790 generate_qid(&st, &de.qid);
2791 de.offset = (uint64_t)telldir(file->ff_dir);
2792 #ifdef __illumos__
2793 de.type = st.st_mode & S_IFMT;
2794 #else
2795 de.type = dp->d_type;
2796 #endif
2797 de.name = dp->d_name;
2798
2799 /* Update count only if we completely pack the dirent. */
2800 if (l9p_pudirent(&msg, &de) < 0)
2801 break;
2802 count = (uint32_t)msg.lm_size;
2803 }
2804
2805 (void) pthread_mutex_unlock(&file->ff_mtx);
2806 req->lr_resp.io.count = count;
2807 return (error);
2808 }
2809
2810 static int
fs_fsync(void * softc __unused,struct l9p_request * req)2811 fs_fsync(void *softc __unused, struct l9p_request *req)
2812 {
2813 struct fs_fid *file;
2814 int error = 0;
2815
2816 file = req->lr_fid->lo_aux;
2817 assert(file);
2818 if (fsync(file->ff_dir != NULL ? dirfd(file->ff_dir) : file->ff_fd))
2819 error = errno;
2820 return (error);
2821 }
2822
2823 static int
fs_lock(void * softc __unused,struct l9p_request * req)2824 fs_lock(void *softc __unused, struct l9p_request *req)
2825 {
2826
2827 switch (req->lr_req.tlock.type) {
2828 case L9PL_LOCK_TYPE_RDLOCK:
2829 case L9PL_LOCK_TYPE_WRLOCK:
2830 case L9PL_LOCK_TYPE_UNLOCK:
2831 break;
2832 default:
2833 return (EINVAL);
2834 }
2835
2836 req->lr_resp.rlock.status = L9PL_LOCK_SUCCESS;
2837 return (0);
2838 }
2839
2840 static int
fs_getlock(void * softc __unused,struct l9p_request * req)2841 fs_getlock(void *softc __unused, struct l9p_request *req)
2842 {
2843
2844 /*
2845 * Client wants to see if a request to lock a region would
2846 * block. This is, of course, not atomic anyway, so the
2847 * op is useless. QEMU simply says "unlocked!", so we do
2848 * too.
2849 */
2850 switch (req->lr_req.getlock.type) {
2851 case L9PL_LOCK_TYPE_RDLOCK:
2852 case L9PL_LOCK_TYPE_WRLOCK:
2853 case L9PL_LOCK_TYPE_UNLOCK:
2854 break;
2855 default:
2856 return (EINVAL);
2857 }
2858
2859 req->lr_resp.getlock = req->lr_req.getlock;
2860 req->lr_resp.getlock.type = L9PL_LOCK_TYPE_UNLOCK;
2861 req->lr_resp.getlock.client_id = strdup(""); /* XXX what should go here? */
2862 return (0);
2863 }
2864
2865 static int
fs_link(void * softc __unused,struct l9p_request * req)2866 fs_link(void *softc __unused, struct l9p_request *req)
2867 {
2868 struct l9p_fid *dir;
2869 struct fs_fid *file;
2870 struct fs_fid *dirf;
2871 struct stat fst, tdst;
2872 int32_t op;
2873 char *name;
2874 char newname[MAXPATHLEN];
2875 int error;
2876
2877 /* N.B.: lr_fid is the file to link, lr_fid2 is the target dir */
2878 dir = req->lr_fid2;
2879 dirf = dir->lo_aux;
2880 assert(dirf != NULL);
2881
2882 name = req->lr_req.tlink.name;
2883 error = fs_buildname(dir, name, newname, sizeof(newname));
2884 if (error)
2885 return (error);
2886
2887 file = req->lr_fid->lo_aux;
2888 assert(file != NULL);
2889
2890 if (fstatat(dirf->ff_dirfd, dirf->ff_name, &tdst, AT_SYMLINK_NOFOLLOW) != 0 ||
2891 fstatat(file->ff_dirfd, file->ff_name, &fst, AT_SYMLINK_NOFOLLOW) != 0)
2892 return (errno);
2893 if (S_ISDIR(fst.st_mode))
2894 return (EISDIR);
2895 fillacl(dirf);
2896 op = S_ISDIR(fst.st_mode) ? L9P_ACE_ADD_SUBDIRECTORY : L9P_ACE_ADD_FILE;
2897 error = check_access(op,
2898 dirf->ff_acl, &tdst, NULL, NULL, file->ff_ai, (gid_t)-1);
2899 if (error)
2900 return (error);
2901
2902 if (linkat(file->ff_dirfd, file->ff_name, file->ff_dirfd,
2903 newname, 0) != 0)
2904 error = errno;
2905 else
2906 dropacl(file);
2907
2908 return (error);
2909 }
2910
2911 static int
fs_mkdir(void * softc,struct l9p_request * req)2912 fs_mkdir(void *softc, struct l9p_request *req)
2913 {
2914 struct l9p_fid *dir;
2915 struct stat st;
2916 mode_t perm;
2917 gid_t gid;
2918 char *name;
2919 int error;
2920
2921 dir = req->lr_fid;
2922 name = req->lr_req.tmkdir.name;
2923 perm = (mode_t)req->lr_req.tmkdir.mode;
2924 gid = req->lr_req.tmkdir.gid;
2925
2926 error = fs_imkdir(softc, dir, name, false, perm, gid, &st);
2927 if (error == 0)
2928 generate_qid(&st, &req->lr_resp.rmkdir.qid);
2929 return (error);
2930 }
2931
2932 static int
fs_renameat(void * softc,struct l9p_request * req)2933 fs_renameat(void *softc, struct l9p_request *req)
2934 {
2935 struct fs_softc *sc = softc;
2936 struct l9p_fid *olddir, *newdir;
2937 struct l9p_acl *facl;
2938 struct fs_fid *off, *nff;
2939 struct stat odst, ndst, fst;
2940 int32_t op;
2941 bool reparenting;
2942 char *onp, *nnp;
2943 char onb[MAXPATHLEN], nnb[MAXPATHLEN];
2944 int error;
2945
2946 if (sc->fs_readonly)
2947 return (EROFS);
2948
2949 olddir = req->lr_fid;
2950 newdir = req->lr_fid2;
2951 assert(olddir != NULL && newdir != NULL);
2952 off = olddir->lo_aux;
2953 nff = newdir->lo_aux;
2954 assert(off != NULL && nff != NULL);
2955
2956 onp = req->lr_req.trenameat.oldname;
2957 nnp = req->lr_req.trenameat.newname;
2958 error = fs_buildname(olddir, onp, onb, sizeof(onb));
2959 if (error)
2960 return (error);
2961 error = fs_buildname(newdir, nnp, nnb, sizeof(nnb));
2962 if (error)
2963 return (error);
2964 if (fstatat(off->ff_dirfd, onb, &fst, AT_SYMLINK_NOFOLLOW) != 0)
2965 return (errno);
2966
2967 reparenting = olddir != newdir &&
2968 strcmp(off->ff_name, nff->ff_name) != 0;
2969
2970 if (fstatat(off->ff_dirfd, off->ff_name, &odst, AT_SYMLINK_NOFOLLOW) != 0)
2971 return (errno);
2972 if (!S_ISDIR(odst.st_mode))
2973 return (ENOTDIR);
2974 fillacl(off);
2975
2976 if (reparenting) {
2977 if (fstatat(nff->ff_dirfd, nff->ff_name, &ndst, AT_SYMLINK_NOFOLLOW) != 0)
2978 return (errno);
2979 if (!S_ISDIR(ndst.st_mode))
2980 return (ENOTDIR);
2981 facl = getacl(off, -1, onb);
2982 fillacl(nff);
2983
2984 error = check_access(L9P_ACOP_UNLINK,
2985 off->ff_acl, &odst, facl, &fst, off->ff_ai, (gid_t)-1);
2986 l9p_acl_free(facl);
2987 if (error)
2988 return (error);
2989 op = S_ISDIR(fst.st_mode) ? L9P_ACE_ADD_SUBDIRECTORY :
2990 L9P_ACE_ADD_FILE;
2991 error = check_access(op,
2992 nff->ff_acl, &ndst, NULL, NULL, nff->ff_ai, (gid_t)-1);
2993 if (error)
2994 return (error);
2995 }
2996
2997 if (renameat(off->ff_dirfd, onb, nff->ff_dirfd, nnb))
2998 error = errno;
2999
3000 return (error);
3001 }
3002
3003 /*
3004 * Unlink file in given directory, or remove directory in given
3005 * directory, based on flags.
3006 */
3007 static int
fs_unlinkat(void * softc,struct l9p_request * req)3008 fs_unlinkat(void *softc, struct l9p_request *req)
3009 {
3010 struct fs_softc *sc = softc;
3011 struct l9p_acl *facl;
3012 struct l9p_fid *dir;
3013 struct fs_fid *dirff;
3014 struct stat dirst, fst;
3015 char *name;
3016 char newname[MAXPATHLEN];
3017 int error;
3018
3019 if (sc->fs_readonly)
3020 return (EROFS);
3021
3022 dir = req->lr_fid;
3023 dirff = dir->lo_aux;
3024 assert(dirff != NULL);
3025 name = req->lr_req.tunlinkat.name;
3026 error = fs_buildname(dir, name, newname, sizeof(newname));
3027 if (error)
3028 return (error);
3029 if (fstatat(dirff->ff_dirfd, newname, &fst, AT_SYMLINK_NOFOLLOW) != 0 ||
3030 fstatat(dirff->ff_dirfd, dirff->ff_name, &dirst, AT_SYMLINK_NOFOLLOW) != 0)
3031 return (errno);
3032 fillacl(dirff);
3033 facl = getacl(dirff, -1, newname);
3034 error = check_access(L9P_ACOP_UNLINK,
3035 dirff->ff_acl, &dirst, facl, &fst, dirff->ff_ai, (gid_t)-1);
3036 l9p_acl_free(facl);
3037 if (error)
3038 return (error);
3039
3040 if (req->lr_req.tunlinkat.flags & L9PL_AT_REMOVEDIR) {
3041 if (unlinkat(dirff->ff_dirfd, newname, AT_REMOVEDIR) != 0)
3042 error = errno;
3043 } else {
3044 if (unlinkat(dirff->ff_dirfd, newname, 0) != 0)
3045 error = errno;
3046 }
3047 return (error);
3048 }
3049
3050 static void
fs_freefid(void * softc __unused,struct l9p_fid * fid)3051 fs_freefid(void *softc __unused, struct l9p_fid *fid)
3052 {
3053 struct fs_fid *f = fid->lo_aux;
3054 struct fs_authinfo *ai;
3055 uint32_t newcount;
3056
3057 if (f == NULL) {
3058 /* Nothing to do here */
3059 return;
3060 }
3061
3062 if (f->ff_fd != -1)
3063 close(f->ff_fd);
3064
3065 if (f->ff_dir)
3066 closedir(f->ff_dir);
3067
3068 (void) pthread_mutex_destroy(&f->ff_mtx);
3069 free(f->ff_name);
3070 ai = f->ff_ai;
3071 l9p_acl_free(f->ff_acl);
3072 free(f);
3073 (void) pthread_mutex_lock(&ai->ai_mtx);
3074 newcount = --ai->ai_refcnt;
3075 (void) pthread_mutex_unlock(&ai->ai_mtx);
3076 if (newcount == 0) {
3077 /*
3078 * We *were* the last ref, no one can have gained a ref.
3079 */
3080 L9P_LOG(L9P_DEBUG, "dropped last ref to authinfo %p",
3081 (void *)ai);
3082 (void) pthread_mutex_destroy(&ai->ai_mtx);
3083 free(ai);
3084 } else {
3085 L9P_LOG(L9P_DEBUG, "authinfo %p now used by %lu",
3086 (void *)ai, (u_long)newcount);
3087 }
3088 }
3089
3090 int
l9p_backend_fs_init(struct l9p_backend ** backendp,int rootfd,bool ro)3091 l9p_backend_fs_init(struct l9p_backend **backendp, int rootfd, bool ro)
3092 {
3093 struct l9p_backend *backend;
3094 struct fs_softc *sc;
3095 int error;
3096 #if defined(WITH_CASPER)
3097 cap_channel_t *capcas;
3098 #endif
3099
3100 if (!fs_attach_mutex_inited) {
3101 #ifdef __illumos__
3102 if ((error = pthread_mutexattr_init(&fs_mutexattr)) != 0) {
3103 errno = error;
3104 return (-1);
3105 }
3106 if ((error = pthread_mutexattr_settype(&fs_mutexattr,
3107 PTHREAD_MUTEX_ERRORCHECK)) != 0) {
3108 errno = error;
3109 return (-1);
3110 }
3111 error = pthread_mutex_init(&fs_attach_mutex, &fs_mutexattr);
3112 #else
3113 error = pthread_mutex_init(&fs_attach_mutex, NULL);
3114 #endif
3115 if (error) {
3116 errno = error;
3117 return (-1);
3118 }
3119 fs_attach_mutex_inited = true;
3120 }
3121
3122 backend = l9p_malloc(sizeof(*backend));
3123 backend->attach = fs_attach;
3124 backend->clunk = fs_clunk;
3125 backend->create = fs_create;
3126 backend->open = fs_open;
3127 backend->read = fs_read;
3128 backend->remove = fs_remove;
3129 backend->stat = fs_stat;
3130 backend->walk = fs_walk;
3131 backend->write = fs_write;
3132 backend->wstat = fs_wstat;
3133 backend->statfs = fs_statfs;
3134 backend->lopen = fs_lopen;
3135 backend->lcreate = fs_lcreate;
3136 backend->symlink = fs_symlink;
3137 backend->mknod = fs_mknod;
3138 backend->rename = fs_rename;
3139 backend->readlink = fs_readlink;
3140 backend->getattr = fs_getattr;
3141 backend->setattr = fs_setattr;
3142 backend->xattrwalk = fs_xattrwalk;
3143 backend->xattrcreate = fs_xattrcreate;
3144 backend->readdir = fs_readdir;
3145 backend->fsync = fs_fsync;
3146 backend->lock = fs_lock;
3147 backend->getlock = fs_getlock;
3148 backend->link = fs_link;
3149 backend->mkdir = fs_mkdir;
3150 backend->renameat = fs_renameat;
3151 backend->unlinkat = fs_unlinkat;
3152 backend->freefid = fs_freefid;
3153
3154 sc = l9p_malloc(sizeof(*sc));
3155 sc->fs_rootfd = rootfd;
3156 sc->fs_readonly = ro;
3157 backend->softc = sc;
3158
3159 #if defined(__illumos__)
3160 if (fpathconf(rootfd, _PC_XATTR_ENABLED) > 0)
3161 sc->fs_hasxattr = 1;
3162 #endif
3163
3164 #if defined(WITH_CASPER)
3165 capcas = cap_init();
3166 if (capcas == NULL)
3167 return (-1);
3168
3169 sc->fs_cappwd = cap_service_open(capcas, "system.pwd");
3170 if (sc->fs_cappwd == NULL)
3171 return (-1);
3172
3173 sc->fs_capgrp = cap_service_open(capcas, "system.grp");
3174 if (sc->fs_capgrp == NULL)
3175 return (-1);
3176
3177 cap_setpassent(sc->fs_cappwd, 1);
3178 cap_setgroupent(sc->fs_capgrp, 1);
3179 cap_close(capcas);
3180 #elif defined(__illumos__)
3181 setpwent();
3182 #else
3183 setpassent(1);
3184 #endif
3185
3186 *backendp = backend;
3187 return (0);
3188 }
3189
3190 #ifdef __illumos__
3191 acl_t *
acl_get_fd_np(int fd,int type)3192 acl_get_fd_np(int fd, int type)
3193 {
3194 acl_t *acl;
3195 int flag, ret;
3196
3197 flag = 0;
3198 if (type == ACL_TYPE_NFS4)
3199 flag = ACL_NO_TRIVIAL;
3200
3201 ret = facl_get(fd, flag, &acl);
3202 if (ret != 0)
3203 return (NULL);
3204
3205 return (acl);
3206 }
3207
3208 static void
getcrtime(struct fs_softc * sc,int dirfd,const char * fname,uint64_t * secp,uint64_t * nsp)3209 getcrtime(struct fs_softc *sc, int dirfd, const char *fname, uint64_t *secp,
3210 uint64_t *nsp)
3211 {
3212 nvlist_t *nvl = NULL;
3213 uint64_t *vals = NULL;
3214 uint_t nvals = 0;
3215 int error;
3216
3217 *secp = 0;
3218 *nsp = 0;
3219
3220 if (!sc->fs_hasxattr)
3221 return;
3222
3223 if ((error = getattrat(dirfd, XATTR_VIEW_READWRITE, fname, &nvl)) != 0)
3224 return;
3225
3226 if (nvlist_lookup_uint64_array(nvl, "crtime", &vals, &nvals) != 0)
3227 goto done;
3228
3229 if (nvals != 2)
3230 goto done;
3231
3232 *secp = vals[0];
3233 *nsp = vals[1];
3234
3235 done:
3236