1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
28 * All Rights Reserved
29 */
30
31 /*
32 * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
33 * Copyright 2019 Nexenta Systems, Inc.
34 * Copyright 2019 Nexenta by DDN, Inc.
35 * Copyright 2021 Racktop Systems, Inc.
36 */
37
38 #include <sys/param.h>
39 #include <sys/types.h>
40 #include <sys/systm.h>
41 #include <sys/cred.h>
42 #include <sys/buf.h>
43 #include <sys/vfs.h>
44 #include <sys/vfs_opreg.h>
45 #include <sys/vnode.h>
46 #include <sys/uio.h>
47 #include <sys/errno.h>
48 #include <sys/sysmacros.h>
49 #include <sys/statvfs.h>
50 #include <sys/kmem.h>
51 #include <sys/dirent.h>
52 #include <sys/cmn_err.h>
53 #include <sys/debug.h>
54 #include <sys/systeminfo.h>
55 #include <sys/flock.h>
56 #include <sys/pathname.h>
57 #include <sys/nbmlock.h>
58 #include <sys/share.h>
59 #include <sys/atomic.h>
60 #include <sys/policy.h>
61 #include <sys/fem.h>
62 #include <sys/sdt.h>
63 #include <sys/ddi.h>
64 #include <sys/zone.h>
65
66 #include <fs/fs_reparse.h>
67
68 #include <rpc/types.h>
69 #include <rpc/auth.h>
70 #include <rpc/rpcsec_gss.h>
71 #include <rpc/svc.h>
72
73 #include <nfs/nfs.h>
74 #include <nfs/nfssys.h>
75 #include <nfs/export.h>
76 #include <nfs/nfs_cmd.h>
77 #include <nfs/lm.h>
78 #include <nfs/nfs4.h>
79 #include <nfs/nfs4_drc.h>
80
81 #include <sys/strsubr.h>
82 #include <sys/strsun.h>
83
84 #include <inet/common.h>
85 #include <inet/ip.h>
86 #include <inet/ip6.h>
87
88 #include <sys/tsol/label.h>
89 #include <sys/tsol/tndb.h>
90
91 #define RFS4_MAXLOCK_TRIES 4 /* Try to get the lock this many times */
92 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
93 #define RFS4_LOCK_DELAY 10 /* Milliseconds */
94 static clock_t rfs4_lock_delay = RFS4_LOCK_DELAY;
95 extern struct svc_ops rdma_svc_ops;
96 extern int nfs_loaned_buffers;
97 /* End of Tunables */
98
99 static int rdma_setup_read_data4(READ4args *, READ4res *);
100
101 /*
102 * Used to bump the stateid4.seqid value and show changes in the stateid
103 */
104 #define next_stateid(sp) (++(sp)->bits.chgseq)
105
106 /*
107 * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
108 * This is used to return NFS4ERR_TOOSMALL when clients specify
109 * maxcount that isn't large enough to hold the smallest possible
110 * XDR encoded dirent.
111 *
112 * sizeof cookie (8 bytes) +
113 * sizeof name_len (4 bytes) +
114 * sizeof smallest (padded) name (4 bytes) +
115 * sizeof bitmap4_len (12 bytes) + NOTE: we always encode len=2 bm4
116 * sizeof attrlist4_len (4 bytes) +
117 * sizeof next boolean (4 bytes)
118 *
119 * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
120 * the smallest possible entry4 (assumes no attrs requested).
121 * sizeof nfsstat4 (4 bytes) +
122 * sizeof verifier4 (8 bytes) +
123 * sizeof entry4list bool (4 bytes) +
124 * sizeof entry4 (36 bytes) +
125 * sizeof eof bool (4 bytes)
126 *
127 * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
128 * VOP_READDIR. Its value is the size of the maximum possible dirent
129 * for solaris. The DIRENT64_RECLEN macro returns the size of dirent
130 * required for a given name length. MAXNAMELEN is the maximum
131 * filename length allowed in Solaris. The first two DIRENT64_RECLEN()
132 * macros are to allow for . and .. entries -- just a minor tweak to try
133 * and guarantee that buffer we give to VOP_READDIR will be large enough
134 * to hold ., .., and the largest possible solaris dirent64.
135 */
136 #define RFS4_MINLEN_ENTRY4 36
137 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
138 #define RFS4_MINLEN_RDDIR_BUF \
139 (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
140
141 /*
142 * It would be better to pad to 4 bytes since that's what XDR would do,
143 * but the dirents UFS gives us are already padded to 8, so just take
144 * what we're given. Dircount is only a hint anyway. Currently the
145 * solaris kernel is ASCII only, so there's no point in calling the
146 * UTF8 functions.
147 *
148 * dirent64: named padded to provide 8 byte struct alignment
149 * d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
150 *
151 * cookie: uint64_t + utf8namelen: uint_t + utf8name padded to 8 bytes
152 *
153 */
154 #define DIRENT64_TO_DIRCOUNT(dp) \
155 (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
156
157
158 static sysid_t lockt_sysid; /* dummy sysid for all LOCKT calls */
159
160 u_longlong_t nfs4_srv_caller_id;
161 uint_t nfs4_srv_vkey = 0;
162
163 void rfs4_init_compound_state(struct compound_state *);
164
165 static void nullfree(caddr_t);
166 static void rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
167 struct compound_state *);
168 static void rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
169 struct compound_state *);
170 static void rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
171 struct compound_state *);
172 static void rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
173 struct compound_state *);
174 static void rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
175 struct compound_state *);
176 static void rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
177 struct svc_req *, struct compound_state *);
178 static void rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
179 struct svc_req *, struct compound_state *);
180 static void rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
181 struct compound_state *);
182 static void rfs4_op_getattr_free(nfs_resop4 *);
183 static void rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
184 struct compound_state *);
185 static void rfs4_op_getfh_free(nfs_resop4 *);
186 static void rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
187 struct compound_state *);
188 static void rfs4_op_notsup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
189 struct compound_state *);
190 static void rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
191 struct compound_state *);
192 static void rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
193 struct compound_state *);
194 static void lock_denied_free(nfs_resop4 *);
195 static void rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
196 struct compound_state *);
197 static void rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
198 struct compound_state *);
199 static void rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
200 struct compound_state *);
201 static void rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
202 struct compound_state *);
203 static void rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
204 struct svc_req *req, struct compound_state *cs);
205 static void rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
206 struct compound_state *);
207 static void rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
208 struct compound_state *);
209 static void rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
210 struct svc_req *, struct compound_state *);
211 static void rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
212 struct svc_req *, struct compound_state *);
213 static void rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
214 struct compound_state *);
215 static void rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
216 struct compound_state *);
217 static void rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
218 struct compound_state *);
219 static void rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
220 struct compound_state *);
221 static void rfs4_op_read_free(nfs_resop4 *);
222 static void rfs4_op_readdir_free(nfs_resop4 *resop);
223 static void rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
224 struct compound_state *);
225 static void rfs4_op_readlink_free(nfs_resop4 *);
226 static void rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
227 struct svc_req *, struct compound_state *);
228 static void rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
229 struct compound_state *);
230 static void rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
231 struct compound_state *);
232 static void rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
233 struct compound_state *);
234 static void rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
235 struct compound_state *);
236 static void rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
237 struct compound_state *);
238 static void rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
239 struct compound_state *);
240 static void rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
241 struct compound_state *);
242 static void rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
243 struct compound_state *);
244 static void rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
245 struct svc_req *, struct compound_state *);
246 static void rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
247 struct svc_req *req, struct compound_state *);
248 static void rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
249 struct compound_state *);
250 static void rfs4_op_secinfo_free(nfs_resop4 *);
251
252 void rfs4x_op_exchange_id(nfs_argop4 *argop, nfs_resop4 *resop,
253 struct svc_req *req, struct compound_state *cs);
254 void rfs4x_exchange_id_free(nfs_resop4 *);
255
256 void rfs4x_op_create_session(nfs_argop4 *argop, nfs_resop4 *resop,
257 struct svc_req *req, struct compound_state *cs);
258
259 void rfs4x_op_destroy_session(nfs_argop4 *argop, nfs_resop4 *resop,
260 struct svc_req *req, compound_state_t *cs);
261
262 void rfs4x_op_sequence(nfs_argop4 *argop, nfs_resop4 *resop,
263 struct svc_req *req, struct compound_state *cs);
264
265 void rfs4x_op_reclaim_complete(nfs_argop4 *argop, nfs_resop4 *resop,
266 struct svc_req *req, compound_state_t *cs);
267
268 void rfs4x_op_destroy_clientid(nfs_argop4 *argop, nfs_resop4 *resop,
269 struct svc_req *req, compound_state_t *cs);
270
271 void rfs4x_op_bind_conn_to_session(nfs_argop4 *argop, nfs_resop4 *resop,
272 struct svc_req *req, compound_state_t *cs);
273
274 void rfs4x_op_secinfo_noname(nfs_argop4 *argop, nfs_resop4 *resop,
275 struct svc_req *req, compound_state_t *cs);
276
277 static nfsstat4 check_open_access(uint32_t, struct compound_state *,
278 struct svc_req *);
279 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
280 void rfs4_ss_clid(nfs4_srv_t *, rfs4_client_t *);
281
282 /*
283 * translation table for attrs
284 */
285 struct nfs4_ntov_table {
286 union nfs4_attr_u *na;
287 uint8_t amap[NFS4_MAXNUM_ATTRS];
288 int attrcnt;
289 bool_t vfsstat;
290 };
291
292 static void nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
293 static void nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
294 struct nfs4_svgetit_arg *sargp);
295
296 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
297 struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
298 struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
299
300 static void hanfsv4_failover(nfs4_srv_t *);
301
302 fem_t *deleg_rdops;
303 fem_t *deleg_wrops;
304
305 /*
306 * NFS4 op dispatch table
307 */
308
309 struct rfsv4disp {
310 void (*dis_proc)(); /* proc to call */
311 void (*dis_resfree)(); /* frees space allocated by proc */
312 int dis_flags; /* RPC_IDEMPOTENT, etc... */
313 };
314
315 static struct rfsv4disp rfsv4disptab[] = {
316 /*
317 * NFS VERSION 4
318 */
319
320 /* RFS_NULL = 0 */
321 {rfs4_op_illegal, nullfree, 0},
322
323 /* UNUSED = 1 */
324 {rfs4_op_illegal, nullfree, 0},
325
326 /* UNUSED = 2 */
327 {rfs4_op_illegal, nullfree, 0},
328
329 /* OP_ACCESS = 3 */
330 {rfs4_op_access, nullfree, RPC_IDEMPOTENT},
331
332 /* OP_CLOSE = 4 */
333 {rfs4_op_close, nullfree, 0},
334
335 /* OP_COMMIT = 5 */
336 {rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
337
338 /* OP_CREATE = 6 */
339 {rfs4_op_create, nullfree, 0},
340
341 /* OP_DELEGPURGE = 7 */
342 {rfs4_op_delegpurge, nullfree, 0},
343
344 /* OP_DELEGRETURN = 8 */
345 {rfs4_op_delegreturn, nullfree, 0},
346
347 /* OP_GETATTR = 9 */
348 {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
349
350 /* OP_GETFH = 10 */
351 {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
352
353 /* OP_LINK = 11 */
354 {rfs4_op_link, nullfree, 0},
355
356 /* OP_LOCK = 12 */
357 {rfs4_op_lock, lock_denied_free, 0},
358
359 /* OP_LOCKT = 13 */
360 {rfs4_op_lockt, lock_denied_free, 0},
361
362 /* OP_LOCKU = 14 */
363 {rfs4_op_locku, nullfree, 0},
364
365 /* OP_LOOKUP = 15 */
366 {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
367
368 /* OP_LOOKUPP = 16 */
369 {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
370
371 /* OP_NVERIFY = 17 */
372 {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
373
374 /* OP_OPEN = 18 */
375 {rfs4_op_open, rfs4_free_reply, 0},
376
377 /* OP_OPENATTR = 19 */
378 {rfs4_op_openattr, nullfree, 0},
379
380 /* OP_OPEN_CONFIRM = 20 */
381 {rfs4_op_open_confirm, nullfree, 0},
382
383 /* OP_OPEN_DOWNGRADE = 21 */
384 {rfs4_op_open_downgrade, nullfree, 0},
385
386 /* OP_OPEN_PUTFH = 22 */
387 {rfs4_op_putfh, nullfree, RPC_ALL},
388
389 /* OP_PUTPUBFH = 23 */
390 {rfs4_op_putpubfh, nullfree, RPC_ALL},
391
392 /* OP_PUTROOTFH = 24 */
393 {rfs4_op_putrootfh, nullfree, RPC_ALL},
394
395 /* OP_READ = 25 */
396 {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
397
398 /* OP_READDIR = 26 */
399 {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
400
401 /* OP_READLINK = 27 */
402 {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
403
404 /* OP_REMOVE = 28 */
405 {rfs4_op_remove, nullfree, 0},
406
407 /* OP_RENAME = 29 */
408 {rfs4_op_rename, nullfree, 0},
409
410 /* OP_RENEW = 30 */
411 {rfs4_op_renew, nullfree, 0},
412
413 /* OP_RESTOREFH = 31 */
414 {rfs4_op_restorefh, nullfree, RPC_ALL},
415
416 /* OP_SAVEFH = 32 */
417 {rfs4_op_savefh, nullfree, RPC_ALL},
418
419 /* OP_SECINFO = 33 */
420 {rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
421
422 /* OP_SETATTR = 34 */
423 {rfs4_op_setattr, nullfree, 0},
424
425 /* OP_SETCLIENTID = 35 */
426 {rfs4_op_setclientid, nullfree, 0},
427
428 /* OP_SETCLIENTID_CONFIRM = 36 */
429 {rfs4_op_setclientid_confirm, nullfree, 0},
430
431 /* OP_VERIFY = 37 */
432 {rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
433
434 /* OP_WRITE = 38 */
435 {rfs4_op_write, nullfree, 0},
436
437 /* OP_RELEASE_LOCKOWNER = 39 */
438 {rfs4_op_release_lockowner, nullfree, 0},
439
440 /*
441 * NFSv4.1 operations
442 */
443
444 /* OP_BACKCHANNEL_CTL = 40 */
445 {rfs4_op_notsup, nullfree, 0},
446
447 /* OP_BIND_CONN_TO_SESSION = 41 */
448 {rfs4x_op_bind_conn_to_session, nullfree, 0},
449
450 /* OP_EXCHANGE_ID = 42 */
451 {rfs4x_op_exchange_id, rfs4x_exchange_id_free, 0},
452
453 /* OP_CREATE_SESSION = 43 */
454 {rfs4x_op_create_session, nullfree, 0},
455
456 /* OP_DESTROY_SESSION = 44 */
457 {rfs4x_op_destroy_session, nullfree, 0},
458
459 /* OP_FREE_STATEID = 45 */
460 {rfs4_op_notsup, nullfree, 0},
461
462 /* OP_GET_DIR_DELEGATION = 46 */
463 {rfs4_op_notsup, nullfree, 0},
464
465 /* OP_GETDEVICEINFO = 47 */
466 {rfs4_op_notsup, nullfree, 0},
467
468 /* OP_GETDEVICELIST = 48 */
469 {rfs4_op_notsup, nullfree, 0},
470
471 /* OP_LAYOUTCOMMIT = 49 */
472 {rfs4_op_notsup, nullfree, 0},
473
474 /* OP_LAYOUTGET = 50 */
475 {rfs4_op_notsup, nullfree, 0},
476
477 /* OP_LAYOUTRETURN = 51 */
478 {rfs4_op_notsup, nullfree, 0},
479
480 /* OP_SECINFO_NO_NAME = 52 */
481 {rfs4x_op_secinfo_noname, rfs4_op_secinfo_free, 0},
482
483 /* OP_SEQUENCE = 53 */
484 {rfs4x_op_sequence, nullfree, 0},
485
486 /* OP_SET_SSV = 54 */
487 {rfs4_op_notsup, nullfree, 0},
488
489 /* OP_TEST_STATEID = 55 */
490 {rfs4_op_notsup, nullfree, 0},
491
492 /* OP_WANT_DELEGATION = 56 */
493 {rfs4_op_notsup, nullfree, 0},
494
495 /* OP_DESTROY_CLIENTID = 57 */
496 {rfs4x_op_destroy_clientid, nullfree, 0},
497
498 /* OP_RECLAIM_COMPLETE = 58 */
499 {rfs4x_op_reclaim_complete, nullfree, 0},
500 };
501
502 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
503
504 #define OP_ILLEGAL_IDX (rfsv4disp_cnt)
505
506 #ifdef DEBUG
507
508 int rfs4_fillone_debug = 0;
509 int rfs4_no_stub_access = 1;
510 int rfs4_rddir_debug = 0;
511
512 static char *rfs4_op_string[] = {
513 "rfs4_op_null",
514 "rfs4_op_1 unused",
515 "rfs4_op_2 unused",
516 "rfs4_op_access",
517 "rfs4_op_close",
518 "rfs4_op_commit",
519 "rfs4_op_create",
520 "rfs4_op_delegpurge",
521 "rfs4_op_delegreturn",
522 "rfs4_op_getattr",
523 "rfs4_op_getfh",
524 "rfs4_op_link",
525 "rfs4_op_lock",
526 "rfs4_op_lockt",
527 "rfs4_op_locku",
528 "rfs4_op_lookup",
529 "rfs4_op_lookupp",
530 "rfs4_op_nverify",
531 "rfs4_op_open",
532 "rfs4_op_openattr",
533 "rfs4_op_open_confirm",
534 "rfs4_op_open_downgrade",
535 "rfs4_op_putfh",
536 "rfs4_op_putpubfh",
537 "rfs4_op_putrootfh",
538 "rfs4_op_read",
539 "rfs4_op_readdir",
540 "rfs4_op_readlink",
541 "rfs4_op_remove",
542 "rfs4_op_rename",
543 "rfs4_op_renew",
544 "rfs4_op_restorefh",
545 "rfs4_op_savefh",
546 "rfs4_op_secinfo",
547 "rfs4_op_setattr",
548 "rfs4_op_setclientid",
549 "rfs4_op_setclient_confirm",
550 "rfs4_op_verify",
551 "rfs4_op_write",
552 "rfs4_op_release_lockowner",
553 /* NFSv4.1 */
554 "backchannel_ctl",
555 "bind_conn_to_session",
556 "exchange_id",
557 "create_session",
558 "destroy_session",
559 "free_stateid",
560 "get_dir_delegation",
561 "getdeviceinfo",
562 "getdevicelist",
563 "layoutcommit",
564 "layoutget",
565 "layoutreturn",
566 "secinfo_no_name",
567 "sequence",
568 "set_ssv",
569 "test_stateid",
570 "want_delegation",
571 "destroy_clientid",
572 "reclaim_complete",
573 "rfs4_op_illegal"
574 };
575
576 #endif
577
578 void rfs4_ss_chkclid(nfs4_srv_t *, rfs4_client_t *);
579
580 extern size_t strlcpy(char *dst, const char *src, size_t dstsize);
581
582 extern void rfs4_free_fs_locations4(fs_locations4 *);
583
584 #ifdef nextdp
585 #undef nextdp
586 #endif
587 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
588
589 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
590 VOPNAME_OPEN, { .femop_open = deleg_rd_open },
591 VOPNAME_WRITE, { .femop_write = deleg_rd_write },
592 VOPNAME_SETATTR, { .femop_setattr = deleg_rd_setattr },
593 VOPNAME_RWLOCK, { .femop_rwlock = deleg_rd_rwlock },
594 VOPNAME_SPACE, { .femop_space = deleg_rd_space },
595 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_rd_setsecattr },
596 VOPNAME_VNEVENT, { .femop_vnevent = deleg_rd_vnevent },
597 NULL, NULL
598 };
599 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
600 VOPNAME_OPEN, { .femop_open = deleg_wr_open },
601 VOPNAME_READ, { .femop_read = deleg_wr_read },
602 VOPNAME_WRITE, { .femop_write = deleg_wr_write },
603 VOPNAME_SETATTR, { .femop_setattr = deleg_wr_setattr },
604 VOPNAME_RWLOCK, { .femop_rwlock = deleg_wr_rwlock },
605 VOPNAME_SPACE, { .femop_space = deleg_wr_space },
606 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_wr_setsecattr },
607 VOPNAME_VNEVENT, { .femop_vnevent = deleg_wr_vnevent },
608 NULL, NULL
609 };
610
611 nfs4_srv_t *
nfs4_get_srv(void)612 nfs4_get_srv(void)
613 {
614 nfs_globals_t *ng = nfs_srv_getzg();
615 nfs4_srv_t *srv = ng->nfs4_srv;
616 ASSERT(srv != NULL);
617 return (srv);
618 }
619
620 void
rfs4_srv_zone_init(nfs_globals_t * ng)621 rfs4_srv_zone_init(nfs_globals_t *ng)
622 {
623 nfs4_srv_t *nsrv4;
624 timespec32_t verf;
625
626 nsrv4 = kmem_zalloc(sizeof (*nsrv4), KM_SLEEP);
627
628 /*
629 * The following algorithm attempts to find a unique verifier
630 * to be used as the write verifier returned from the server
631 * to the client. It is important that this verifier change
632 * whenever the server reboots. Of secondary importance, it
633 * is important for the verifier to be unique between two
634 * different servers.
635 *
636 * Thus, an attempt is made to use the system hostid and the
637 * current time in seconds when the nfssrv kernel module is
638 * loaded. It is assumed that an NFS server will not be able
639 * to boot and then to reboot in less than a second. If the
640 * hostid has not been set, then the current high resolution
641 * time is used. This will ensure different verifiers each
642 * time the server reboots and minimize the chances that two
643 * different servers will have the same verifier.
644 * XXX - this is broken on LP64 kernels.
645 */
646 verf.tv_sec = (time_t)zone_get_hostid(NULL);
647 if (verf.tv_sec != 0) {
648 verf.tv_nsec = gethrestime_sec();
649 } else {
650 timespec_t tverf;
651
652 gethrestime(&tverf);
653 verf.tv_sec = (time_t)tverf.tv_sec;
654 verf.tv_nsec = tverf.tv_nsec;
655 }
656 nsrv4->write4verf = *(uint64_t *)&verf;
657
658 /* Used to manage create/destroy of server state */
659 nsrv4->nfs4_server_state = NULL;
660 nsrv4->nfs4_cur_servinst = NULL;
661 nsrv4->nfs4_deleg_policy = SRV_NEVER_DELEGATE;
662 mutex_init(&nsrv4->deleg_lock, NULL, MUTEX_DEFAULT, NULL);
663 mutex_init(&nsrv4->state_lock, NULL, MUTEX_DEFAULT, NULL);
664 mutex_init(&nsrv4->servinst_lock, NULL, MUTEX_DEFAULT, NULL);
665 rw_init(&nsrv4->deleg_policy_lock, NULL, RW_DEFAULT, NULL);
666
667 ng->nfs4_srv = nsrv4;
668 }
669
670 void
rfs4_srv_zone_fini(nfs_globals_t * ng)671 rfs4_srv_zone_fini(nfs_globals_t *ng)
672 {
673 nfs4_srv_t *nsrv4 = ng->nfs4_srv;
674
675 ng->nfs4_srv = NULL;
676
677 mutex_destroy(&nsrv4->deleg_lock);
678 mutex_destroy(&nsrv4->state_lock);
679 mutex_destroy(&nsrv4->servinst_lock);
680 rw_destroy(&nsrv4->deleg_policy_lock);
681
682 kmem_free(nsrv4, sizeof (*nsrv4));
683 }
684
685 void
rfs4_srvrinit(void)686 rfs4_srvrinit(void)
687 {
688 extern void rfs4_attr_init();
689
690 rfs4_attr_init();
691
692 if (fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops) != 0) {
693 rfs4_disable_delegation();
694 } else if (fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
695 &deleg_wrops) != 0) {
696 rfs4_disable_delegation();
697 fem_free(deleg_rdops);
698 }
699
700 nfs4_srv_caller_id = fs_new_caller_id();
701 lockt_sysid = lm_alloc_sysidt();
702 vsd_create(&nfs4_srv_vkey, NULL);
703 rfs4_state_g_init();
704 }
705
706 void
rfs4_srvrfini(void)707 rfs4_srvrfini(void)
708 {
709 if (lockt_sysid != LM_NOSYSID) {
710 lm_free_sysidt(lockt_sysid);
711 lockt_sysid = LM_NOSYSID;
712 }
713
714 rfs4_state_g_fini();
715
716 fem_free(deleg_rdops);
717 fem_free(deleg_wrops);
718 }
719
720 void
rfs4_do_server_start(int server_upordown,int srv_delegation,nfs4_minor_t nfs4_minor_max,int cluster_booted)721 rfs4_do_server_start(int server_upordown, int srv_delegation,
722 nfs4_minor_t nfs4_minor_max, int cluster_booted)
723 {
724 nfs4_srv_t *nsrv4 = nfs4_get_srv();
725
726 /* Is this a warm start? */
727 if (server_upordown == NFS_SERVER_QUIESCED) {
728 cmn_err(CE_NOTE, "nfs4_srv: "
729 "server was previously quiesced; "
730 "existing NFSv4 state will be re-used");
731
732 /*
733 * HA-NFSv4: this is also the signal
734 * that a Resource Group failover has
735 * occurred.
736 */
737 if (cluster_booted)
738 hanfsv4_failover(nsrv4);
739 } else {
740 /* Cold start */
741 nsrv4->rfs4_start_time = 0;
742 rfs4_state_zone_init(nsrv4);
743 nsrv4->nfs4_drc = rfs4_init_drc(nfs4_drc_max,
744 nfs4_drc_hash);
745
746 /*
747 * The nfsd service was started with the -s option
748 * we need to pull in any state from the paths indicated.
749 */
750 if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
751 /* read in the stable storage state from these paths */
752 rfs4_dss_readstate(nsrv4, rfs4_dss_numnewpaths,
753 rfs4_dss_newpaths);
754 }
755 }
756
757 nsrv4->nfs4_minor_max = nfs4_minor_max;
758
759 /* Check if delegation is to be enabled */
760 if (srv_delegation != FALSE)
761 rfs4_set_deleg_policy(nsrv4, SRV_NORMAL_DELEGATE);
762 }
763
764 void
rfs4_init_compound_state(struct compound_state * cs)765 rfs4_init_compound_state(struct compound_state *cs)
766 {
767 bzero(cs, sizeof (*cs));
768 cs->cont = TRUE;
769 cs->access = CS_ACCESS_DENIED;
770 cs->deleg = FALSE;
771 cs->mandlock = FALSE;
772 cs->fh.nfs_fh4_val = cs->fhbuf;
773 }
774
775 /* Do cleanup of the compound_state */
776 void
rfs4_fini_compound_state(struct compound_state * cs)777 rfs4_fini_compound_state(struct compound_state *cs)
778 {
779 if (cs->vp) {
780 VN_RELE(cs->vp);
781 }
782 if (cs->saved_vp) {
783 VN_RELE(cs->saved_vp);
784 }
785 if (cs->cr) {
786 crfree(cs->cr);
787 }
788 if (cs->saved_fh.nfs_fh4_val) {
789 kmem_free(cs->saved_fh.nfs_fh4_val, NFS4_FHSIZE);
790 }
791 if (cs->sp) {
792 rfs4x_session_rele(cs->sp);
793 }
794 }
795
796 void
rfs4_grace_start(rfs4_servinst_t * sip)797 rfs4_grace_start(rfs4_servinst_t *sip)
798 {
799 rw_enter(&sip->rwlock, RW_WRITER);
800 sip->start_time = nfs_sys_uptime();
801 sip->grace_period = rfs4_grace_period;
802 rw_exit(&sip->rwlock);
803 }
804
805 /*
806 * returns true if the instance's grace period has never been started
807 */
808 int
rfs4_servinst_grace_new(rfs4_servinst_t * sip)809 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
810 {
811 time_t start_time;
812
813 rw_enter(&sip->rwlock, RW_READER);
814 start_time = sip->start_time;
815 rw_exit(&sip->rwlock);
816
817 return (start_time == 0);
818 }
819
820 /*
821 * Indicates if server instance is within the
822 * grace period.
823 */
824 int
rfs4_servinst_in_grace(rfs4_servinst_t * sip)825 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
826 {
827 time_t grace_expiry;
828
829 /* All clients called reclaim-complete */
830 if (sip->nreclaim == 0 || sip->grace_period == 0)
831 return (0);
832
833 rw_enter(&sip->rwlock, RW_READER);
834 grace_expiry = sip->start_time + sip->grace_period;
835 rw_exit(&sip->rwlock);
836
837 if (nfs_sys_uptime() < grace_expiry)
838 return (1);
839
840 /* Once grace period ends, optimize next calls */
841 sip->grace_period = 0;
842 return (0);
843 }
844
845 int
rfs4_clnt_in_grace(rfs4_client_t * cp)846 rfs4_clnt_in_grace(rfs4_client_t *cp)
847 {
848 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
849
850 return (rfs4_servinst_in_grace(cp->rc_server_instance));
851 }
852
853 /*
854 * reset all currently active grace periods
855 */
856 void
rfs4_grace_reset_all(nfs4_srv_t * nsrv4)857 rfs4_grace_reset_all(nfs4_srv_t *nsrv4)
858 {
859 rfs4_servinst_t *sip;
860
861 mutex_enter(&nsrv4->servinst_lock);
862 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
863 if (rfs4_servinst_in_grace(sip))
864 rfs4_grace_start(sip);
865 mutex_exit(&nsrv4->servinst_lock);
866 }
867
868 /*
869 * start any new instances' grace periods
870 */
871 void
rfs4_grace_start_new(nfs4_srv_t * nsrv4)872 rfs4_grace_start_new(nfs4_srv_t *nsrv4)
873 {
874 rfs4_servinst_t *sip;
875
876 mutex_enter(&nsrv4->servinst_lock);
877 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
878 if (rfs4_servinst_grace_new(sip))
879 rfs4_grace_start(sip);
880 mutex_exit(&nsrv4->servinst_lock);
881 }
882
883 static rfs4_dss_path_t *
rfs4_dss_newpath(nfs4_srv_t * nsrv4,rfs4_servinst_t * sip,char * path,unsigned index)884 rfs4_dss_newpath(nfs4_srv_t *nsrv4, rfs4_servinst_t *sip,
885 char *path, unsigned index)
886 {
887 size_t len;
888 rfs4_dss_path_t *dss_path;
889
890 dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
891
892 /*
893 * Take a copy of the string, since the original may be overwritten.
894 * Sadly, no strdup() in the kernel.
895 */
896 /* allow for NUL */
897 len = strlen(path) + 1;
898 dss_path->path = kmem_alloc(len, KM_SLEEP);
899 (void) strlcpy(dss_path->path, path, len);
900
901 /* associate with servinst */
902 dss_path->sip = sip;
903 dss_path->index = index;
904
905 /*
906 * Add to list of served paths.
907 * No locking required, as we're only ever called at startup.
908 */
909 if (nsrv4->dss_pathlist == NULL) {
910 /* this is the first dss_path_t */
911
912 /* needed for insque/remque */
913 dss_path->next = dss_path->prev = dss_path;
914
915 nsrv4->dss_pathlist = dss_path;
916 } else {
917 insque(dss_path, nsrv4->dss_pathlist);
918 }
919
920 return (dss_path);
921 }
922
923 /*
924 * Create a new server instance, and make it the currently active instance.
925 * Note that starting the grace period too early will reduce the clients'
926 * recovery window.
927 */
928 void
rfs4_servinst_create(nfs4_srv_t * nsrv4,int start_grace,int dss_npaths,char ** dss_paths)929 rfs4_servinst_create(nfs4_srv_t *nsrv4, int start_grace,
930 int dss_npaths, char **dss_paths)
931 {
932 unsigned i;
933 rfs4_servinst_t *sip;
934 rfs4_oldstate_t *oldstate;
935
936 sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
937 rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
938
939 sip->nreclaim = 0;
940 sip->start_time = (time_t)0;
941 sip->grace_period = (time_t)0;
942 sip->next = NULL;
943 sip->prev = NULL;
944
945 rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
946 /*
947 * This initial dummy entry is required to setup for insque/remque.
948 * It must be skipped over whenever the list is traversed.
949 */
950 oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
951 /* insque/remque require initial list entry to be self-terminated */
952 oldstate->next = oldstate;
953 oldstate->prev = oldstate;
954 sip->oldstate = oldstate;
955
956
957 sip->dss_npaths = dss_npaths;
958 sip->dss_paths = kmem_alloc(dss_npaths *
959 sizeof (rfs4_dss_path_t *), KM_SLEEP);
960
961 for (i = 0; i < dss_npaths; i++) {
962 sip->dss_paths[i] =
963 rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i);
964 }
965
966 mutex_enter(&nsrv4->servinst_lock);
967 if (nsrv4->nfs4_cur_servinst != NULL) {
968 /* add to linked list */
969 sip->prev = nsrv4->nfs4_cur_servinst;
970 nsrv4->nfs4_cur_servinst->next = sip;
971 }
972 if (start_grace)
973 rfs4_grace_start(sip);
974 /* make the new instance "current" */
975 nsrv4->nfs4_cur_servinst = sip;
976
977 mutex_exit(&nsrv4->servinst_lock);
978 }
979
980 /*
981 * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
982 * all instances directly.
983 */
984 void
rfs4_servinst_destroy_all(nfs4_srv_t * nsrv4)985 rfs4_servinst_destroy_all(nfs4_srv_t *nsrv4)
986 {
987 rfs4_servinst_t *sip, *prev, *current;
988 #ifdef DEBUG
989 int n = 0;
990 #endif
991
992 mutex_enter(&nsrv4->servinst_lock);
993 ASSERT(nsrv4->nfs4_cur_servinst != NULL);
994 current = nsrv4->nfs4_cur_servinst;
995 nsrv4->nfs4_cur_servinst = NULL;
996 for (sip = current; sip != NULL; sip = prev) {
997 prev = sip->prev;
998 rw_destroy(&sip->rwlock);
999 if (sip->oldstate)
1000 kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
1001 if (sip->dss_paths) {
1002 int i = sip->dss_npaths;
1003
1004 while (i > 0) {
1005 i--;
1006 if (sip->dss_paths[i] != NULL) {
1007 char *path = sip->dss_paths[i]->path;
1008
1009 if (path != NULL) {
1010 kmem_free(path,
1011 strlen(path) + 1);
1012 }
1013 kmem_free(sip->dss_paths[i],
1014 sizeof (rfs4_dss_path_t));
1015 }
1016 }
1017 kmem_free(sip->dss_paths,
1018 sip->dss_npaths * sizeof (rfs4_dss_path_t *));
1019 }
1020 kmem_free(sip, sizeof (rfs4_servinst_t));
1021 #ifdef DEBUG
1022 n++;
1023 #endif
1024 }
1025 mutex_exit(&nsrv4->servinst_lock);
1026 }
1027
1028 /*
1029 * Assign the current server instance to a client_t.
1030 * Should be called with cp->rc_dbe held.
1031 */
1032 void
rfs4_servinst_assign(nfs4_srv_t * nsrv4,rfs4_client_t * cp,rfs4_servinst_t * sip)1033 rfs4_servinst_assign(nfs4_srv_t *nsrv4, rfs4_client_t *cp,
1034 rfs4_servinst_t *sip)
1035 {
1036 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
1037
1038 /*
1039 * The lock ensures that if the current instance is in the process
1040 * of changing, we will see the new one.
1041 */
1042 mutex_enter(&nsrv4->servinst_lock);
1043 cp->rc_server_instance = sip;
1044 mutex_exit(&nsrv4->servinst_lock);
1045 }
1046
1047 rfs4_servinst_t *
rfs4_servinst(rfs4_client_t * cp)1048 rfs4_servinst(rfs4_client_t *cp)
1049 {
1050 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
1051
1052 return (cp->rc_server_instance);
1053 }
1054
1055 /* ARGSUSED */
1056 static void
nullfree(caddr_t resop)1057 nullfree(caddr_t resop)
1058 {
1059 }
1060
1061 /*
1062 * This is a fall-through for invalid or not implemented (yet) ops
1063 */
1064 /* ARGSUSED */
1065 static void
rfs4_op_inval(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)1066 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1067 struct compound_state *cs)
1068 {
1069 *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
1070 }
1071
1072 /*
1073 * Check if the security flavor, nfsnum, is in the flavor_list.
1074 */
1075 bool_t
in_flavor_list(int nfsnum,int * flavor_list,int count)1076 in_flavor_list(int nfsnum, int *flavor_list, int count)
1077 {
1078 int i;
1079
1080 for (i = 0; i < count; i++) {
1081 if (nfsnum == flavor_list[i])
1082 return (TRUE);
1083 }
1084 return (FALSE);
1085 }
1086
1087 /*
1088 * Used by rfs4_op_secinfo to get the security information from the
1089 * export structure associated with the component.
1090 */
1091 /* ARGSUSED */
1092 nfsstat4
do_rfs4_op_secinfo(struct compound_state * cs,char * nm,SECINFO4res * resp)1093 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
1094 {
1095 int error, different_export = 0;
1096 vnode_t *dvp, *vp;
1097 struct exportinfo *exi;
1098 fid_t fid;
1099 uint_t count, i;
1100 secinfo4 *resok_val;
1101 struct secinfo *secp;
1102 seconfig_t *si;
1103 bool_t did_traverse = FALSE;
1104 int dotdot, walk;
1105 nfs_export_t *ne = nfs_get_export();
1106
1107 dvp = cs->vp;
1108 exi = cs->exi;
1109 ASSERT(exi != NULL);
1110 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
1111
1112 /*
1113 * If dotdotting, then need to check whether it's above the
1114 * root of a filesystem, or above an export point.
1115 */
1116 if (dotdot) {
1117 vnode_t *zone_rootvp = ne->exi_root->exi_vp;
1118
1119 ASSERT3U(exi->exi_zoneid, ==, ne->exi_root->exi_zoneid);
1120 /*
1121 * If dotdotting at the root of a filesystem, then
1122 * need to traverse back to the mounted-on filesystem
1123 * and do the dotdot lookup there.
1124 */
1125 if ((dvp->v_flag & VROOT) || VN_CMP(dvp, zone_rootvp)) {
1126
1127 /*
1128 * If at the system root, then can
1129 * go up no further.
1130 */
1131 if (VN_CMP(dvp, zone_rootvp))
1132 return (puterrno4(ENOENT));
1133
1134 /*
1135 * Traverse back to the mounted-on filesystem
1136 */
1137 dvp = untraverse(dvp, zone_rootvp);
1138
1139 /*
1140 * Set the different_export flag so we remember
1141 * to pick up a new exportinfo entry for
1142 * this new filesystem.
1143 */
1144 different_export = 1;
1145 } else {
1146
1147 /*
1148 * If dotdotting above an export point then set
1149 * the different_export to get new export info.
1150 */
1151 different_export = nfs_exported(exi, dvp);
1152 }
1153 }
1154
1155 /*
1156 * Get the vnode for the component "nm".
1157 */
1158 error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
1159 NULL, NULL, NULL);
1160 if (error)
1161 return (puterrno4(error));
1162
1163 /*
1164 * If the vnode is in a pseudo filesystem, or if the security flavor
1165 * used in the request is valid but not an explicitly shared flavor,
1166 * or the access bit indicates that this is a limited access,
1167 * check whether this vnode is visible.
1168 */
1169 if (!different_export &&
1170 (PSEUDO(exi) || !is_exported_sec(cs->nfsflavor, exi) ||
1171 cs->access & CS_ACCESS_LIMITED)) {
1172 if (! nfs_visible(exi, vp, &different_export)) {
1173 VN_RELE(vp);
1174 return (puterrno4(ENOENT));
1175 }
1176 }
1177
1178 /*
1179 * If it's a mountpoint, then traverse it.
1180 */
1181 if (vn_ismntpt(vp)) {
1182 if ((error = traverse(&vp)) != 0) {
1183 VN_RELE(vp);
1184 return (puterrno4(error));
1185 }
1186 /* remember that we had to traverse mountpoint */
1187 did_traverse = TRUE;
1188 different_export = 1;
1189 } else if (vp->v_vfsp != dvp->v_vfsp) {
1190 /*
1191 * If vp isn't a mountpoint and the vfs ptrs aren't the same,
1192 * then vp is probably an LOFS object. We don't need the
1193 * realvp, we just need to know that we might have crossed
1194 * a server fs boundary and need to call checkexport4.
1195 * (LOFS lookup hides server fs mountpoints, and actually calls
1196 * traverse)
1197 */
1198 different_export = 1;
1199 }
1200
1201 /*
1202 * Get the export information for it.
1203 */
1204 if (different_export) {
1205
1206 bzero(&fid, sizeof (fid));
1207 fid.fid_len = MAXFIDSZ;
1208 error = vop_fid_pseudo(vp, &fid);
1209 if (error) {
1210 VN_RELE(vp);
1211 return (puterrno4(error));
1212 }
1213
1214 /* We'll need to reassign "exi". */
1215 if (dotdot)
1216 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
1217 else
1218 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
1219
1220 if (exi == NULL) {
1221 if (did_traverse == TRUE) {
1222 /*
1223 * If this vnode is a mounted-on vnode,
1224 * but the mounted-on file system is not
1225 * exported, send back the secinfo for
1226 * the exported node that the mounted-on
1227 * vnode lives in.
1228 */
1229 exi = cs->exi;
1230 } else {
1231 VN_RELE(vp);
1232 return (puterrno4(EACCES));
1233 }
1234 }
1235 }
1236 ASSERT(exi != NULL);
1237
1238
1239 /*
1240 * Create the secinfo result based on the security information
1241 * from the exportinfo structure (exi).
1242 *
1243 * Return all flavors for a pseudo node.
1244 * For a real export node, return the flavor that the client
1245 * has access with.
1246 */
1247 ASSERT(RW_LOCK_HELD(&ne->exported_lock));
1248 if (PSEUDO(exi)) {
1249 count = exi->exi_export.ex_seccnt; /* total sec count */
1250 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1251 secp = exi->exi_export.ex_secinfo;
1252
1253 for (i = 0; i < count; i++) {
1254 si = &secp[i].s_secinfo;
1255 resok_val[i].flavor = si->sc_rpcnum;
1256 if (resok_val[i].flavor == RPCSEC_GSS) {
1257 rpcsec_gss_info *info;
1258
1259 info = &resok_val[i].flavor_info;
1260 info->qop = si->sc_qop;
1261 info->service = (rpc_gss_svc_t)si->sc_service;
1262
1263 /* get oid opaque data */
1264 info->oid.sec_oid4_len =
1265 si->sc_gss_mech_type->length;
1266 info->oid.sec_oid4_val = kmem_alloc(
1267 si->sc_gss_mech_type->length, KM_SLEEP);
1268 bcopy(
1269 si->sc_gss_mech_type->elements,
1270 info->oid.sec_oid4_val,
1271 info->oid.sec_oid4_len);
1272 }
1273 }
1274 resp->SECINFO4resok_len = count;
1275 resp->SECINFO4resok_val = resok_val;
1276 } else {
1277 int ret_cnt = 0, k = 0;
1278 int *flavor_list;
1279
1280 count = exi->exi_export.ex_seccnt; /* total sec count */
1281 secp = exi->exi_export.ex_secinfo;
1282
1283 flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1284 /* find out which flavors to return */
1285 for (i = 0; i < count; i ++) {
1286 int access, flavor, perm;
1287
1288 flavor = secp[i].s_secinfo.sc_nfsnum;
1289 perm = secp[i].s_flags;
1290
1291 access = nfsauth4_secinfo_access(exi, cs->req,
1292 flavor, perm, cs->basecr);
1293
1294 if (! (access & NFSAUTH_DENIED) &&
1295 ! (access & NFSAUTH_WRONGSEC)) {
1296 flavor_list[ret_cnt] = flavor;
1297 ret_cnt++;
1298 }
1299 }
1300
1301 /* Create the returning SECINFO value */
1302 resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1303
1304 for (i = 0; i < count; i++) {
1305 /*
1306 * If the flavor is in the flavor list,
1307 * fill in resok_val.
1308 */
1309 si = &secp[i].s_secinfo;
1310 if (in_flavor_list(si->sc_nfsnum,
1311 flavor_list, ret_cnt)) {
1312 resok_val[k].flavor = si->sc_rpcnum;
1313 if (resok_val[k].flavor == RPCSEC_GSS) {
1314 rpcsec_gss_info *info;
1315
1316 info = &resok_val[k].flavor_info;
1317 info->qop = si->sc_qop;
1318 info->service = (rpc_gss_svc_t)
1319 si->sc_service;
1320
1321 /* get oid opaque data */
1322 info->oid.sec_oid4_len =
1323 si->sc_gss_mech_type->length;
1324 info->oid.sec_oid4_val = kmem_alloc(
1325 si->sc_gss_mech_type->length,
1326 KM_SLEEP);
1327 bcopy(si->sc_gss_mech_type->elements,
1328 info->oid.sec_oid4_val,
1329 info->oid.sec_oid4_len);
1330 }
1331 k++;
1332 }
1333 if (k >= ret_cnt)
1334 break;
1335 }
1336 resp->SECINFO4resok_len = ret_cnt;
1337 resp->SECINFO4resok_val = resok_val;
1338 kmem_free(flavor_list, count * sizeof (int));
1339 }
1340
1341 VN_RELE(vp);
1342 return (NFS4_OK);
1343 }
1344
1345 /*
1346 * SECINFO (Operation 33): Obtain required security information on
1347 * the component name in the format of (security-mechanism-oid, qop, service)
1348 * triplets.
1349 */
1350 /* ARGSUSED */
1351 static void
rfs4_op_secinfo(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)1352 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1353 struct compound_state *cs)
1354 {
1355 SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1356 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1357 utf8string *utfnm = &args->name;
1358 uint_t len;
1359 char *nm;
1360 struct sockaddr *ca;
1361 char *name = NULL;
1362 nfsstat4 status = NFS4_OK;
1363
1364 DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1365 SECINFO4args *, args);
1366
1367 /*
1368 * Current file handle (cfh) should have been set before getting
1369 * into this function. If not, return error.
1370 */
1371 if (cs->vp == NULL) {
1372 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1373 goto out;
1374 }
1375
1376 if (cs->vp->v_type != VDIR) {
1377 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1378 goto out;
1379 }
1380
1381 /*
1382 * Verify the component name. If failed, error out, but
1383 * do not error out if the component name is a "..".
1384 * SECINFO will return its parents secinfo data for SECINFO "..".
1385 */
1386 status = utf8_dir_verify(utfnm);
1387 if (status != NFS4_OK) {
1388 if (utfnm->utf8string_len != 2 ||
1389 utfnm->utf8string_val[0] != '.' ||
1390 utfnm->utf8string_val[1] != '.') {
1391 *cs->statusp = resp->status = status;
1392 goto out;
1393 }
1394 }
1395
1396 nm = utf8_to_str(utfnm, &len, NULL);
1397 if (nm == NULL) {
1398 *cs->statusp = resp->status = NFS4ERR_INVAL;
1399 goto out;
1400 }
1401
1402 if (len > MAXNAMELEN) {
1403 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1404 kmem_free(nm, len);
1405 goto out;
1406 }
1407
1408 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1409 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1410 MAXPATHLEN + 1);
1411
1412 if (name == NULL) {
1413 *cs->statusp = resp->status = NFS4ERR_INVAL;
1414 kmem_free(nm, len);
1415 goto out;
1416 }
1417
1418 *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1419
1420 if (resp->status == NFS4_OK && rfs4_has_session(cs)) {
1421 /*
1422 * See rfc 5661 section 2.6.3.1.1.8 and 18.29.3
1423 *
1424 * 2.6.3.1.1.8
1425 * SECINFO and SECINFO_NO_NAME consume the current
1426 * filehandle (note that this is a change from NFSv4.0).
1427 *
1428 * 18.29.3
1429 * On success, the current filehandle is consumed (see
1430 * Section 2.6.3.1.1.8), and if the next operation after
1431 * SECINFO tries to use the current filehandle, that
1432 * operation will fail with the status
1433 * NFS4ERR_NOFILEHANDLE.
1434 */
1435 VN_RELE(cs->vp);
1436 cs->vp = NULL;
1437 }
1438
1439 if (name != nm)
1440 kmem_free(name, MAXPATHLEN + 1);
1441 kmem_free(nm, len);
1442
1443 out:
1444 DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1445 SECINFO4res *, resp);
1446 }
1447
1448 /*
1449 * Free SECINFO result.
1450 */
1451 /* ARGSUSED */
1452 static void
rfs4_op_secinfo_free(nfs_resop4 * resop)1453 rfs4_op_secinfo_free(nfs_resop4 *resop)
1454 {
1455 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1456 int count, i;
1457 secinfo4 *resok_val;
1458
1459 /* If this is not an Ok result, nothing to free. */
1460 if (resp->status != NFS4_OK) {
1461 return;
1462 }
1463
1464 count = resp->SECINFO4resok_len;
1465 resok_val = resp->SECINFO4resok_val;
1466
1467 for (i = 0; i < count; i++) {
1468 if (resok_val[i].flavor == RPCSEC_GSS) {
1469 rpcsec_gss_info *info;
1470
1471 info = &resok_val[i].flavor_info;
1472 kmem_free(info->oid.sec_oid4_val,
1473 info->oid.sec_oid4_len);
1474 }
1475 }
1476 kmem_free(resok_val, count * sizeof (secinfo4));
1477 resp->SECINFO4resok_len = 0;
1478 resp->SECINFO4resok_val = NULL;
1479 }
1480
1481 /* ARGSUSED */
1482 static void
rfs4_op_access(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)1483 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1484 struct compound_state *cs)
1485 {
1486 ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1487 ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1488 int error;
1489 vnode_t *vp;
1490 struct vattr va;
1491 int checkwriteperm;
1492 cred_t *cr = cs->cr;
1493 bslabel_t *clabel, *slabel;
1494 ts_label_t *tslabel;
1495 boolean_t admin_low_client;
1496
1497 DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1498 ACCESS4args *, args);
1499
1500 #if 0 /* XXX allow access even if !cs->access. Eventually only pseudo fs */
1501 if (cs->access == CS_ACCESS_DENIED) {
1502 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1503 goto out;
1504 }
1505 #endif
1506 if (cs->vp == NULL) {
1507 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1508 goto out;
1509 }
1510
1511 ASSERT(cr != NULL);
1512
1513 vp = cs->vp;
1514
1515 /*
1516 * If the file system is exported read only, it is not appropriate
1517 * to check write permissions for regular files and directories.
1518 * Special files are interpreted by the client, so the underlying
1519 * permissions are sent back to the client for interpretation.
1520 */
1521 if (rdonly4(req, cs) &&
1522 (vp->v_type == VREG || vp->v_type == VDIR))
1523 checkwriteperm = 0;
1524 else
1525 checkwriteperm = 1;
1526
1527 /*
1528 * XXX
1529 * We need the mode so that we can correctly determine access
1530 * permissions relative to a mandatory lock file. Access to
1531 * mandatory lock files is denied on the server, so it might
1532 * as well be reflected to the server during the open.
1533 */
1534 va.va_mask = AT_MODE;
1535 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1536 if (error) {
1537 *cs->statusp = resp->status = puterrno4(error);
1538 goto out;
1539 }
1540 resp->access = 0;
1541 resp->supported = 0;
1542
1543 if (is_system_labeled()) {
1544 ASSERT(req->rq_label != NULL);
1545 clabel = req->rq_label;
1546 DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1547 "got client label from request(1)",
1548 struct svc_req *, req);
1549 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1550 if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1551 *cs->statusp = resp->status = puterrno4(EACCES);
1552 goto out;
1553 }
1554 slabel = label2bslabel(tslabel);
1555 DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1556 char *, "got server label(1) for vp(2)",
1557 bslabel_t *, slabel, vnode_t *, vp);
1558
1559 admin_low_client = B_FALSE;
1560 } else
1561 admin_low_client = B_TRUE;
1562 }
1563
1564 if (args->access & ACCESS4_READ) {
1565 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1566 if (!error && !MANDLOCK(vp, va.va_mode) &&
1567 (!is_system_labeled() || admin_low_client ||
1568 bldominates(clabel, slabel)))
1569 resp->access |= ACCESS4_READ;
1570 resp->supported |= ACCESS4_READ;
1571 }
1572 if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1573 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1574 if (!error && (!is_system_labeled() || admin_low_client ||
1575 bldominates(clabel, slabel)))
1576 resp->access |= ACCESS4_LOOKUP;
1577 resp->supported |= ACCESS4_LOOKUP;
1578 }
1579 if (checkwriteperm &&
1580 (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1581 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1582 if (!error && !MANDLOCK(vp, va.va_mode) &&
1583 (!is_system_labeled() || admin_low_client ||
1584 blequal(clabel, slabel)))
1585 resp->access |=
1586 (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1587 resp->supported |=
1588 resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
1589 }
1590
1591 if (checkwriteperm &&
1592 (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1593 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1594 if (!error && (!is_system_labeled() || admin_low_client ||
1595 blequal(clabel, slabel)))
1596 resp->access |= ACCESS4_DELETE;
1597 resp->supported |= ACCESS4_DELETE;
1598 }
1599 if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1600 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1601 if (!error && !MANDLOCK(vp, va.va_mode) &&
1602 (!is_system_labeled() || admin_low_client ||
1603 bldominates(clabel, slabel)))
1604 resp->access |= ACCESS4_EXECUTE;
1605 resp->supported |= ACCESS4_EXECUTE;
1606 }
1607
1608 if (is_system_labeled() && !admin_low_client)
1609 label_rele(tslabel);
1610
1611 *cs->statusp = resp->status = NFS4_OK;
1612 out:
1613 DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1614 ACCESS4res *, resp);
1615 }
1616
1617 /* ARGSUSED */
1618 static void
rfs4_op_commit(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)1619 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1620 struct compound_state *cs)
1621 {
1622 COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1623 COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1624 int error;
1625 vnode_t *vp = cs->vp;
1626 cred_t *cr = cs->cr;
1627 vattr_t va;
1628 nfs4_srv_t *nsrv4;
1629
1630 DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1631 COMMIT4args *, args);
1632
1633 if (vp == NULL) {
1634 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1635 goto out;
1636 }
1637 if (cs->access == CS_ACCESS_DENIED) {
1638 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1639 goto out;
1640 }
1641
1642 if (args->offset + args->count < args->offset) {
1643 *cs->statusp = resp->status = NFS4ERR_INVAL;
1644 goto out;
1645 }
1646
1647 va.va_mask = AT_UID;
1648 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1649
1650 /*
1651 * If we can't get the attributes, then we can't do the
1652 * right access checking. So, we'll fail the request.
1653 */
1654 if (error) {
1655 *cs->statusp = resp->status = puterrno4(error);
1656 goto out;
1657 }
1658 if (rdonly4(req, cs)) {
1659 *cs->statusp = resp->status = NFS4ERR_ROFS;
1660 goto out;
1661 }
1662
1663 if (vp->v_type != VREG) {
1664 if (vp->v_type == VDIR)
1665 resp->status = NFS4ERR_ISDIR;
1666 else
1667 resp->status = NFS4ERR_INVAL;
1668 *cs->statusp = resp->status;
1669 goto out;
1670 }
1671
1672 if (crgetuid(cr) != va.va_uid &&
1673 (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1674 *cs->statusp = resp->status = puterrno4(error);
1675 goto out;
1676 }
1677
1678 error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1679
1680 if (error) {
1681 *cs->statusp = resp->status = puterrno4(error);
1682 goto out;
1683 }
1684
1685 nsrv4 = nfs4_get_srv();
1686 *cs->statusp = resp->status = NFS4_OK;
1687 resp->writeverf = nsrv4->write4verf;
1688 out:
1689 DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1690 COMMIT4res *, resp);
1691 }
1692
1693 /*
1694 * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1695 * was completed. It does the nfsv4 create for special files.
1696 */
1697 /* ARGSUSED */
1698 static vnode_t *
do_rfs4_op_mknod(CREATE4args * args,CREATE4res * resp,struct svc_req * req,struct compound_state * cs,vattr_t * vap,char * nm)1699 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1700 struct compound_state *cs, vattr_t *vap, char *nm)
1701 {
1702 int error;
1703 cred_t *cr = cs->cr;
1704 vnode_t *dvp = cs->vp;
1705 vnode_t *vp = NULL;
1706 int mode;
1707 enum vcexcl excl;
1708
1709 switch (args->type) {
1710 case NF4CHR:
1711 case NF4BLK:
1712 if (secpolicy_sys_devices(cr) != 0) {
1713 *cs->statusp = resp->status = NFS4ERR_PERM;
1714 return (NULL);
1715 }
1716 if (args->type == NF4CHR)
1717 vap->va_type = VCHR;
1718 else
1719 vap->va_type = VBLK;
1720 vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1721 args->ftype4_u.devdata.specdata2);
1722 vap->va_mask |= AT_RDEV;
1723 break;
1724 case NF4SOCK:
1725 vap->va_type = VSOCK;
1726 break;
1727 case NF4FIFO:
1728 vap->va_type = VFIFO;
1729 break;
1730 default:
1731 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1732 return (NULL);
1733 }
1734
1735 /*
1736 * Must specify the mode.
1737 */
1738 if (!(vap->va_mask & AT_MODE)) {
1739 *cs->statusp = resp->status = NFS4ERR_INVAL;
1740 return (NULL);
1741 }
1742
1743 excl = EXCL;
1744
1745 mode = 0;
1746
1747 error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1748 if (error) {
1749 *cs->statusp = resp->status = puterrno4(error);
1750 return (NULL);
1751 }
1752 return (vp);
1753 }
1754
1755 /*
1756 * nfsv4 create is used to create non-regular files. For regular files,
1757 * use nfsv4 open.
1758 */
1759 /* ARGSUSED */
1760 static void
rfs4_op_create(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)1761 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1762 struct compound_state *cs)
1763 {
1764 CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1765 CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1766 int error;
1767 struct vattr bva, iva, iva2, ava, *vap;
1768 cred_t *cr = cs->cr;
1769 vnode_t *dvp = cs->vp;
1770 vnode_t *vp = NULL;
1771 vnode_t *realvp;
1772 char *nm, *lnm;
1773 uint_t len, llen;
1774 int syncval = 0;
1775 struct nfs4_svgetit_arg sarg;
1776 struct nfs4_ntov_table ntov;
1777 struct statvfs64 sb;
1778 nfsstat4 status;
1779 struct sockaddr *ca;
1780 char *name = NULL;
1781 char *lname = NULL;
1782
1783 DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1784 CREATE4args *, args);
1785
1786 resp->attrset = 0;
1787
1788 if (dvp == NULL) {
1789 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1790 goto out;
1791 }
1792
1793 /*
1794 * If there is an unshared filesystem mounted on this vnode,
1795 * do not allow to create an object in this directory.
1796 */
1797 if (vn_ismntpt(dvp)) {
1798 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1799 goto out;
1800 }
1801
1802 /* Verify that type is correct */
1803 switch (args->type) {
1804 case NF4LNK:
1805 case NF4BLK:
1806 case NF4CHR:
1807 case NF4SOCK:
1808 case NF4FIFO:
1809 case NF4DIR:
1810 break;
1811 default:
1812 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1813 goto out;
1814 };
1815
1816 if (cs->access == CS_ACCESS_DENIED) {
1817 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1818 goto out;
1819 }
1820 if (dvp->v_type != VDIR) {
1821 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1822 goto out;
1823 }
1824 status = utf8_dir_verify(&args->objname);
1825 if (status != NFS4_OK) {
1826 *cs->statusp = resp->status = status;
1827 goto out;
1828 }
1829
1830 if (rdonly4(req, cs)) {
1831 *cs->statusp = resp->status = NFS4ERR_ROFS;
1832 goto out;
1833 }
1834
1835 /*
1836 * Name of newly created object
1837 */
1838 nm = utf8_to_fn(&args->objname, &len, NULL);
1839 if (nm == NULL) {
1840 *cs->statusp = resp->status = NFS4ERR_INVAL;
1841 goto out;
1842 }
1843
1844 if (len > MAXNAMELEN) {
1845 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1846 kmem_free(nm, len);
1847 goto out;
1848 }
1849
1850 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1851 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1852 MAXPATHLEN + 1);
1853
1854 if (name == NULL) {
1855 *cs->statusp = resp->status = NFS4ERR_INVAL;
1856 kmem_free(nm, len);
1857 goto out;
1858 }
1859
1860 resp->attrset = 0;
1861
1862 sarg.sbp = &sb;
1863 sarg.is_referral = B_FALSE;
1864 nfs4_ntov_table_init(&ntov);
1865
1866 status = do_rfs4_set_attrs(&resp->attrset,
1867 &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1868
1869 if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1870 status = NFS4ERR_INVAL;
1871
1872 if (status != NFS4_OK) {
1873 *cs->statusp = resp->status = status;
1874 if (name != nm)
1875 kmem_free(name, MAXPATHLEN + 1);
1876 kmem_free(nm, len);
1877 nfs4_ntov_table_free(&ntov, &sarg);
1878 resp->attrset = 0;
1879 goto out;
1880 }
1881
1882 /* Get "before" change value */
1883 bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1884 error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1885 if (error) {
1886 *cs->statusp = resp->status = puterrno4(error);
1887 if (name != nm)
1888 kmem_free(name, MAXPATHLEN + 1);
1889 kmem_free(nm, len);
1890 nfs4_ntov_table_free(&ntov, &sarg);
1891 resp->attrset = 0;
1892 goto out;
1893 }
1894 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1895
1896 vap = sarg.vap;
1897
1898 /*
1899 * Set the default initial values for attributes when the parent
1900 * directory does not have the VSUID/VSGID bit set and they have
1901 * not been specified in createattrs.
1902 */
1903 if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1904 vap->va_uid = crgetuid(cr);
1905 vap->va_mask |= AT_UID;
1906 }
1907 if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1908 vap->va_gid = crgetgid(cr);
1909 vap->va_mask |= AT_GID;
1910 }
1911
1912 vap->va_mask |= AT_TYPE;
1913 switch (args->type) {
1914 case NF4DIR:
1915 vap->va_type = VDIR;
1916 if ((vap->va_mask & AT_MODE) == 0) {
1917 vap->va_mode = 0700; /* default: owner rwx only */
1918 vap->va_mask |= AT_MODE;
1919 }
1920 error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1921 if (error)
1922 break;
1923
1924 /*
1925 * Get the initial "after" sequence number, if it fails,
1926 * set to zero
1927 */
1928 iva.va_mask = AT_SEQ;
1929 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1930 iva.va_seq = 0;
1931 break;
1932 case NF4LNK:
1933 vap->va_type = VLNK;
1934 if ((vap->va_mask & AT_MODE) == 0) {
1935 vap->va_mode = 0700; /* default: owner rwx only */
1936 vap->va_mask |= AT_MODE;
1937 }
1938
1939 /*
1940 * symlink names must be treated as data
1941 */
1942 lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata,
1943 &llen, NULL);
1944
1945 if (lnm == NULL) {
1946 *cs->statusp = resp->status = NFS4ERR_INVAL;
1947 if (name != nm)
1948 kmem_free(name, MAXPATHLEN + 1);
1949 kmem_free(nm, len);
1950 nfs4_ntov_table_free(&ntov, &sarg);
1951 resp->attrset = 0;
1952 goto out;
1953 }
1954
1955 if (llen > MAXPATHLEN) {
1956 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1957 if (name != nm)
1958 kmem_free(name, MAXPATHLEN + 1);
1959 kmem_free(nm, len);
1960 kmem_free(lnm, llen);
1961 nfs4_ntov_table_free(&ntov, &sarg);
1962 resp->attrset = 0;
1963 goto out;
1964 }
1965
1966 lname = nfscmd_convname(ca, cs->exi, lnm,
1967 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1968
1969 if (lname == NULL) {
1970 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1971 if (name != nm)
1972 kmem_free(name, MAXPATHLEN + 1);
1973 kmem_free(nm, len);
1974 kmem_free(lnm, llen);
1975 nfs4_ntov_table_free(&ntov, &sarg);
1976 resp->attrset = 0;
1977 goto out;
1978 }
1979
1980 error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
1981 if (lname != lnm)
1982 kmem_free(lname, MAXPATHLEN + 1);
1983 kmem_free(lnm, llen);
1984 if (error)
1985 break;
1986
1987 /*
1988 * Get the initial "after" sequence number, if it fails,
1989 * set to zero
1990 */
1991 iva.va_mask = AT_SEQ;
1992 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1993 iva.va_seq = 0;
1994
1995 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
1996 NULL, NULL, NULL);
1997 if (error)
1998 break;
1999
2000 /*
2001 * va_seq is not safe over VOP calls, check it again
2002 * if it has changed zero out iva to force atomic = FALSE.
2003 */
2004 iva2.va_mask = AT_SEQ;
2005 if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
2006 iva2.va_seq != iva.va_seq)
2007 iva.va_seq = 0;
2008 break;
2009 default:
2010 /*
2011 * probably a special file.
2012 */
2013 if ((vap->va_mask & AT_MODE) == 0) {
2014 vap->va_mode = 0600; /* default: owner rw only */
2015 vap->va_mask |= AT_MODE;
2016 }
2017 syncval = FNODSYNC;
2018 /*
2019 * We know this will only generate one VOP call
2020 */
2021 vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
2022
2023 if (vp == NULL) {
2024 if (name != nm)
2025 kmem_free(name, MAXPATHLEN + 1);
2026 kmem_free(nm, len);
2027 nfs4_ntov_table_free(&ntov, &sarg);
2028 resp->attrset = 0;
2029 goto out;
2030 }
2031
2032 /*
2033 * Get the initial "after" sequence number, if it fails,
2034 * set to zero
2035 */
2036 iva.va_mask = AT_SEQ;
2037 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
2038 iva.va_seq = 0;
2039
2040 break;
2041 }
2042 if (name != nm)
2043 kmem_free(name, MAXPATHLEN + 1);
2044 kmem_free(nm, len);
2045
2046 if (error) {
2047 *cs->statusp = resp->status = puterrno4(error);
2048 }
2049
2050 /*
2051 * Force modified data and metadata out to stable storage.
2052 */
2053 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2054
2055 if (resp->status != NFS4_OK) {
2056 if (vp != NULL)
2057 VN_RELE(vp);
2058 nfs4_ntov_table_free(&ntov, &sarg);
2059 resp->attrset = 0;
2060 goto out;
2061 }
2062
2063 /*
2064 * Finish setup of cinfo response, "before" value already set.
2065 * Get "after" change value, if it fails, simply return the
2066 * before value.
2067 */
2068 ava.va_mask = AT_CTIME|AT_SEQ;
2069 if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
2070 ava.va_ctime = bva.va_ctime;
2071 ava.va_seq = 0;
2072 }
2073 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
2074
2075 /*
2076 * True verification that object was created with correct
2077 * attrs is impossible. The attrs could have been changed
2078 * immediately after object creation. If attributes did
2079 * not verify, the only recourse for the server is to
2080 * destroy the object. Maybe if some attrs (like gid)
2081 * are set incorrectly, the object should be destroyed;
2082 * however, seems bad as a default policy. Do we really
2083 * want to destroy an object over one of the times not
2084 * verifying correctly? For these reasons, the server
2085 * currently sets bits in attrset for createattrs
2086 * that were set; however, no verification is done.
2087 *
2088 * vmask_to_nmask accounts for vattr bits set on create
2089 * [do_rfs4_set_attrs() only sets resp bits for
2090 * non-vattr/vfs bits.]
2091 * Mask off any bits set by default so as not to return
2092 * more attrset bits than were requested in createattrs
2093 */
2094 nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
2095 resp->attrset &= args->createattrs.attrmask;
2096 nfs4_ntov_table_free(&ntov, &sarg);
2097
2098 error = makefh4(&cs->fh, vp, cs->exi);
2099 if (error) {
2100 *cs->statusp = resp->status = puterrno4(error);
2101 }
2102
2103 /*
2104 * The cinfo.atomic = TRUE only if we got no errors, we have
2105 * non-zero va_seq's, and it has incremented by exactly one
2106 * during the creation and it didn't change during the VOP_LOOKUP
2107 * or VOP_FSYNC.
2108 */
2109 if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
2110 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
2111 resp->cinfo.atomic = TRUE;
2112 else
2113 resp->cinfo.atomic = FALSE;
2114
2115 /*
2116 * Force modified metadata out to stable storage.
2117 *
2118 * if a underlying vp exists, pass it to VOP_FSYNC
2119 */
2120 if (VOP_REALVP(vp, &realvp, NULL) == 0)
2121 (void) VOP_FSYNC(realvp, syncval, cr, NULL);
2122 else
2123 (void) VOP_FSYNC(vp, syncval, cr, NULL);
2124
2125 if (resp->status != NFS4_OK) {
2126 VN_RELE(vp);
2127 goto out;
2128 }
2129 if (cs->vp)
2130 VN_RELE(cs->vp);
2131
2132 cs->vp = vp;
2133 *cs->statusp = resp->status = NFS4_OK;
2134 out:
2135 DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
2136 CREATE4res *, resp);
2137 }
2138
2139 /*ARGSUSED*/
2140 static void
rfs4_op_delegpurge(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)2141 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2142 struct compound_state *cs)
2143 {
2144 DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
2145 DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
2146
2147 rfs4_op_inval(argop, resop, req, cs);
2148
2149 DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
2150 DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
2151 }
2152
2153 /*ARGSUSED*/
2154 static void
rfs4_op_delegreturn(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)2155 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2156 struct compound_state *cs)
2157 {
2158 DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
2159 DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
2160 rfs4_deleg_state_t *dsp;
2161 nfsstat4 status;
2162
2163 DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
2164 DELEGRETURN4args *, args);
2165
2166 status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
2167 resp->status = *cs->statusp = status;
2168 if (status != NFS4_OK)
2169 goto out;
2170
2171 /* Ensure specified filehandle matches */
2172 if (cs->vp != dsp->rds_finfo->rf_vp) {
2173 resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
2174 } else
2175 rfs4_return_deleg(dsp, FALSE);
2176
2177 rfs4_update_lease(dsp->rds_client);
2178
2179 rfs4_deleg_state_rele(dsp);
2180 out:
2181 DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
2182 DELEGRETURN4res *, resp);
2183 }
2184
2185 /*
2186 * Check to see if a given "flavor" is an explicitly shared flavor.
2187 * The assumption of this routine is the "flavor" is already a valid
2188 * flavor in the secinfo list of "exi".
2189 *
2190 * e.g.
2191 * # share -o sec=flavor1 /export
2192 * # share -o sec=flavor2 /export/home
2193 *
2194 * flavor2 is not an explicitly shared flavor for /export,
2195 * however it is in the secinfo list for /export thru the
2196 * server namespace setup.
2197 */
2198 int
is_exported_sec(int flavor,struct exportinfo * exi)2199 is_exported_sec(int flavor, struct exportinfo *exi)
2200 {
2201 int i;
2202 struct secinfo *sp;
2203
2204 sp = exi->exi_export.ex_secinfo;
2205 for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2206 if (flavor == sp[i].s_secinfo.sc_nfsnum ||
2207 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
2208 return (SEC_REF_EXPORTED(&sp[i]));
2209 }
2210 }
2211
2212 /* Should not reach this point based on the assumption */
2213 return (0);
2214 }
2215
2216 /*
2217 * Check if the security flavor used in the request matches what is
2218 * required at the export point or at the root pseudo node (exi_root).
2219 *
2220 * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
2221 *
2222 */
2223 static int
secinfo_match_or_authnone(struct compound_state * cs)2224 secinfo_match_or_authnone(struct compound_state *cs)
2225 {
2226 int i;
2227 struct secinfo *sp;
2228
2229 /*
2230 * Check cs->nfsflavor (from the request) against
2231 * the current export data in cs->exi.
2232 */
2233 sp = cs->exi->exi_export.ex_secinfo;
2234 for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
2235 if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
2236 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
2237 return (1);
2238 }
2239
2240 return (0);
2241 }
2242
2243 /*
2244 * Check the access authority for the client and return the correct error.
2245 */
2246 nfsstat4
call_checkauth4(struct compound_state * cs,struct svc_req * req)2247 call_checkauth4(struct compound_state *cs, struct svc_req *req)
2248 {
2249 int authres;
2250
2251 /*
2252 * First, check if the security flavor used in the request
2253 * are among the flavors set in the server namespace.
2254 */
2255 if (!secinfo_match_or_authnone(cs)) {
2256 *cs->statusp = NFS4ERR_WRONGSEC;
2257 return (*cs->statusp);
2258 }
2259
2260 authres = checkauth4(cs, req);
2261
2262 if (authres > 0) {
2263 *cs->statusp = NFS4_OK;
2264 if (! (cs->access & CS_ACCESS_LIMITED))
2265 cs->access = CS_ACCESS_OK;
2266 } else if (authres == 0) {
2267 *cs->statusp = NFS4ERR_ACCESS;
2268 } else if (authres == -2) {
2269 *cs->statusp = NFS4ERR_WRONGSEC;
2270 } else {
2271 *cs->statusp = NFS4ERR_DELAY;
2272 }
2273 return (*cs->statusp);
2274 }
2275
2276 /*
2277 * bitmap4_to_attrmask is called by getattr and readdir.
2278 * It sets up the vattr mask and determines whether vfsstat call is needed
2279 * based on the input bitmap.
2280 * Returns nfsv4 status.
2281 */
2282 static nfsstat4
bitmap4_to_attrmask(bitmap4 breq,struct nfs4_svgetit_arg * sargp)2283 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2284 {
2285 int i;
2286 uint_t va_mask;
2287 struct statvfs64 *sbp = sargp->sbp;
2288
2289 sargp->sbp = NULL;
2290 sargp->flag = 0;
2291 sargp->rdattr_error = NFS4_OK;
2292 sargp->mntdfid_set = FALSE;
2293 if (sargp->cs->vp)
2294 sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2295 FH4_ATTRDIR | FH4_NAMEDATTR);
2296 else
2297 sargp->xattr = 0;
2298
2299 /*
2300 * Set rdattr_error_req to true if return error per
2301 * failed entry rather than fail the readdir.
2302 */
2303 if (breq & FATTR4_RDATTR_ERROR_MASK)
2304 sargp->rdattr_error_req = 1;
2305 else
2306 sargp->rdattr_error_req = 0;
2307
2308 /*
2309 * generate the va_mask
2310 * Handle the easy cases first
2311 */
2312 switch (breq) {
2313 case NFS4_NTOV_ATTR_MASK:
2314 sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2315 return (NFS4_OK);
2316
2317 case NFS4_FS_ATTR_MASK:
2318 sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2319 sargp->sbp = sbp;
2320 return (NFS4_OK);
2321
2322 case NFS4_NTOV_ATTR_CACHE_MASK:
2323 sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2324 return (NFS4_OK);
2325
2326 case FATTR4_LEASE_TIME_MASK:
2327 sargp->vap->va_mask = 0;
2328 return (NFS4_OK);
2329
2330 default:
2331 va_mask = 0;
2332 for (i = 0; i < nfs4_ntov_map_size; i++) {
2333 if ((breq & nfs4_ntov_map[i].fbit) &&
2334 nfs4_ntov_map[i].vbit)
2335 va_mask |= nfs4_ntov_map[i].vbit;
2336 }
2337
2338 /*
2339 * Check is vfsstat is needed
2340 */
2341 if (breq & NFS4_FS_ATTR_MASK)
2342 sargp->sbp = sbp;
2343
2344 sargp->vap->va_mask = va_mask;
2345 return (NFS4_OK);
2346 }
2347 /* NOTREACHED */
2348 }
2349
2350 /*
2351 * bitmap4_get_sysattrs is called by getattr and readdir.
2352 * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2353 * Returns nfsv4 status.
2354 */
2355 static nfsstat4
bitmap4_get_sysattrs(struct nfs4_svgetit_arg * sargp)2356 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2357 {
2358 int error;
2359 struct compound_state *cs = sargp->cs;
2360 vnode_t *vp = cs->vp;
2361
2362 if (sargp->sbp != NULL) {
2363 error = VFS_STATVFS(vp->v_vfsp, sargp->sbp);
2364 if (error != 0) {
2365 sargp->sbp = NULL; /* to identify error */
2366 return (puterrno4(error));
2367 }
2368 }
2369
2370 return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2371 }
2372
2373 static void
nfs4_ntov_table_init(struct nfs4_ntov_table * ntovp)2374 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2375 {
2376 ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2377 KM_SLEEP);
2378 ntovp->attrcnt = 0;
2379 ntovp->vfsstat = FALSE;
2380 }
2381
2382 static void
nfs4_ntov_table_free(struct nfs4_ntov_table * ntovp,struct nfs4_svgetit_arg * sargp)2383 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2384 struct nfs4_svgetit_arg *sargp)
2385 {
2386 int i;
2387 union nfs4_attr_u *na;
2388 uint8_t *amap;
2389
2390 /*
2391 * XXX Should do the same checks for whether the bit is set
2392 */
2393 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2394 i < ntovp->attrcnt; i++, na++, amap++) {
2395 (void) (*nfs4_ntov_map[*amap].sv_getit)(
2396 NFS4ATTR_FREEIT, sargp, na);
2397 }
2398 if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2399 /*
2400 * xdr_free for getattr will be done later
2401 */
2402 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2403 i < ntovp->attrcnt; i++, na++, amap++) {
2404 xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2405 }
2406 }
2407 kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2408 }
2409
2410 /*
2411 * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2412 */
2413 static nfsstat4
do_rfs4_op_getattr(bitmap4 breq,fattr4 * fattrp,struct nfs4_svgetit_arg * sargp)2414 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2415 struct nfs4_svgetit_arg *sargp)
2416 {
2417 int error = 0;
2418 int i, k;
2419 struct nfs4_ntov_table ntov;
2420 XDR xdr;
2421 ulong_t xdr_size;
2422 char *xdr_attrs;
2423 nfsstat4 status = NFS4_OK;
2424 nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2425 union nfs4_attr_u *na;
2426 uint8_t *amap;
2427
2428 sargp->op = NFS4ATTR_GETIT;
2429 sargp->flag = 0;
2430
2431 fattrp->attrmask = 0;
2432 /* if no bits requested, then return empty fattr4 */
2433 if (breq == 0) {
2434 fattrp->attrlist4_len = 0;
2435 fattrp->attrlist4 = NULL;
2436 return (NFS4_OK);
2437 }
2438
2439 /*
2440 * return NFS4ERR_INVAL when client requests write-only attrs
2441 */
2442 if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2443 return (NFS4ERR_INVAL);
2444
2445 nfs4_ntov_table_init(&ntov);
2446 na = ntov.na;
2447 amap = ntov.amap;
2448
2449 /*
2450 * Now loop to get or verify the attrs
2451 */
2452 for (i = 0; i < nfs4_ntov_map_size; i++) {
2453 if (breq & nfs4_ntov_map[i].fbit) {
2454 if ((*nfs4_ntov_map[i].sv_getit)(
2455 NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2456
2457 error = (*nfs4_ntov_map[i].sv_getit)(
2458 NFS4ATTR_GETIT, sargp, na);
2459
2460 /*
2461 * Possible error values:
2462 * >0 if sv_getit failed to
2463 * get the attr; 0 if succeeded;
2464 * <0 if rdattr_error and the
2465 * attribute cannot be returned.
2466 */
2467 if (error && !(sargp->rdattr_error_req))
2468 goto done;
2469 /*
2470 * If error then just for entry
2471 */
2472 if (error == 0) {
2473 fattrp->attrmask |=
2474 nfs4_ntov_map[i].fbit;
2475 *amap++ =
2476 (uint8_t)nfs4_ntov_map[i].nval;
2477 na++;
2478 (ntov.attrcnt)++;
2479 } else if ((error > 0) &&
2480 (sargp->rdattr_error == NFS4_OK)) {
2481 sargp->rdattr_error = puterrno4(error);
2482 }
2483 error = 0;
2484 }
2485 }
2486 }
2487
2488 /*
2489 * If rdattr_error was set after the return value for it was assigned,
2490 * update it.
2491 */
2492 if (prev_rdattr_error != sargp->rdattr_error) {
2493 na = ntov.na;
2494 amap = ntov.amap;
2495 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2496 k = *amap;
2497 if (k < FATTR4_RDATTR_ERROR) {
2498 continue;
2499 }
2500 if ((k == FATTR4_RDATTR_ERROR) &&
2501 ((*nfs4_ntov_map[k].sv_getit)(
2502 NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2503
2504 (void) (*nfs4_ntov_map[k].sv_getit)(
2505 NFS4ATTR_GETIT, sargp, na);
2506 }
2507 break;
2508 }
2509 }
2510
2511 xdr_size = 0;
2512 na = ntov.na;
2513 amap = ntov.amap;
2514 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2515 xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2516 }
2517
2518 fattrp->attrlist4_len = xdr_size;
2519 if (xdr_size) {
2520 /* freed by rfs4_op_getattr_free() */
2521 fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2522
2523 xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2524
2525 na = ntov.na;
2526 amap = ntov.amap;
2527 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2528 if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2529 DTRACE_PROBE1(nfss__e__getattr4_encfail,
2530 int, *amap);
2531 status = NFS4ERR_SERVERFAULT;
2532 break;
2533 }
2534 }
2535 /* xdrmem_destroy(&xdrs); */ /* NO-OP */
2536 } else {
2537 fattrp->attrlist4 = NULL;
2538 }
2539 done:
2540
2541 nfs4_ntov_table_free(&ntov, sargp);
2542
2543 if (error != 0)
2544 status = puterrno4(error);
2545
2546 return (status);
2547 }
2548
2549 /* ARGSUSED */
2550 static void
rfs4_op_getattr(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)2551 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2552 struct compound_state *cs)
2553 {
2554 GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2555 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2556 struct nfs4_svgetit_arg sarg;
2557 struct statvfs64 sb;
2558 nfsstat4 status;
2559
2560 DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2561 GETATTR4args *, args);
2562
2563 if (cs->vp == NULL) {
2564 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2565 goto out;
2566 }
2567
2568 if (cs->access == CS_ACCESS_DENIED) {
2569 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2570 goto out;
2571 }
2572
2573 sarg.sbp = &sb;
2574 sarg.cs = cs;
2575 sarg.is_referral = B_FALSE;
2576
2577 status = bitmap4_to_attrmask(args->attr_request, &sarg);
2578 if (status == NFS4_OK) {
2579
2580 status = bitmap4_get_sysattrs(&sarg);
2581 if (status == NFS4_OK) {
2582
2583 /* Is this a referral? */
2584 if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2585 /* Older V4 Solaris client sees a link */
2586 if (client_is_downrev(req))
2587 sarg.vap->va_type = VLNK;
2588 else
2589 sarg.is_referral = B_TRUE;
2590 }
2591
2592 status = do_rfs4_op_getattr(args->attr_request,
2593 &resp->obj_attributes, &sarg);
2594 }
2595 }
2596 *cs->statusp = resp->status = status;
2597 out:
2598 DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2599 GETATTR4res *, resp);
2600 }
2601
2602 static void
rfs4_op_getattr_free(nfs_resop4 * resop)2603 rfs4_op_getattr_free(nfs_resop4 *resop)
2604 {
2605 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2606
2607 nfs4_fattr4_free(&resp->obj_attributes);
2608 }
2609
2610 /* ARGSUSED */
2611 static void
rfs4_op_getfh(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)2612 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2613 struct compound_state *cs)
2614 {
2615 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2616
2617 DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2618
2619 if (cs->vp == NULL) {
2620 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2621 goto out;
2622 }
2623 if (cs->access == CS_ACCESS_DENIED) {
2624 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2625 goto out;
2626 }
2627
2628 /* check for reparse point at the share point */
2629 if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2630 /* it's all bad */
2631 cs->exi->exi_moved = 1;
2632 *cs->statusp = resp->status = NFS4ERR_MOVED;
2633 DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2634 vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2635 return;
2636 }
2637
2638 /* check for reparse point at vp */
2639 if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2640 /* it's not all bad */
2641 *cs->statusp = resp->status = NFS4ERR_MOVED;
2642 DTRACE_PROBE2(nfs4serv__func__referral__moved,
2643 vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2644 return;
2645 }
2646
2647 resp->object.nfs_fh4_val =
2648 kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2649 nfs_fh4_copy(&cs->fh, &resp->object);
2650 *cs->statusp = resp->status = NFS4_OK;
2651 out:
2652 DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2653 GETFH4res *, resp);
2654 }
2655
2656 static void
rfs4_op_getfh_free(nfs_resop4 * resop)2657 rfs4_op_getfh_free(nfs_resop4 *resop)
2658 {
2659 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2660
2661 if (resp->status == NFS4_OK &&
2662 resp->object.nfs_fh4_val != NULL) {
2663 kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2664 resp->object.nfs_fh4_val = NULL;
2665 resp->object.nfs_fh4_len = 0;
2666 }
2667 }
2668
2669 /*
2670 * illegal: args: void
2671 * res : status (NFS4ERR_OP_ILLEGAL)
2672 */
2673 /* ARGSUSED */
2674 static void
rfs4_op_illegal(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)2675 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2676 struct svc_req *req, struct compound_state *cs)
2677 {
2678 ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2679
2680 resop->resop = OP_ILLEGAL;
2681 *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2682 }
2683
2684 /* ARGSUSED */
2685 static void
rfs4_op_notsup(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)2686 rfs4_op_notsup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2687 struct compound_state *cs)
2688 {
2689 *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_NOTSUPP;
2690 }
2691
2692 /*
2693 * link: args: SAVED_FH: file, CURRENT_FH: target directory
2694 * res: status. If success - CURRENT_FH unchanged, return change_info
2695 */
2696 /* ARGSUSED */
2697 static void
rfs4_op_link(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)2698 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2699 struct compound_state *cs)
2700 {
2701 LINK4args *args = &argop->nfs_argop4_u.oplink;
2702 LINK4res *resp = &resop->nfs_resop4_u.oplink;
2703 int error;
2704 vnode_t *vp;
2705 vnode_t *dvp;
2706 struct vattr bdva, idva, adva;
2707 char *nm;
2708 uint_t len;
2709 struct sockaddr *ca;
2710 char *name = NULL;
2711 nfsstat4 status;
2712
2713 DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2714 LINK4args *, args);
2715
2716 /* SAVED_FH: source object */
2717 vp = cs->saved_vp;
2718 if (vp == NULL) {
2719 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2720 goto out;
2721 }
2722
2723 /* CURRENT_FH: target directory */
2724 dvp = cs->vp;
2725 if (dvp == NULL) {
2726 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2727 goto out;
2728 }
2729
2730 /*
2731 * If there is a non-shared filesystem mounted on this vnode,
2732 * do not allow to link any file in this directory.
2733 */
2734 if (vn_ismntpt(dvp)) {
2735 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2736 goto out;
2737 }
2738
2739 if (cs->access == CS_ACCESS_DENIED) {
2740 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2741 goto out;
2742 }
2743
2744 /* Check source object's type validity */
2745 if (vp->v_type == VDIR) {
2746 *cs->statusp = resp->status = NFS4ERR_ISDIR;
2747 goto out;
2748 }
2749
2750 /* Check target directory's type */
2751 if (dvp->v_type != VDIR) {
2752 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2753 goto out;
2754 }
2755
2756 if (cs->saved_exi != cs->exi) {
2757 *cs->statusp = resp->status = NFS4ERR_XDEV;
2758 goto out;
2759 }
2760
2761 status = utf8_dir_verify(&args->newname);
2762 if (status != NFS4_OK) {
2763 *cs->statusp = resp->status = status;
2764 goto out;
2765 }
2766
2767 nm = utf8_to_fn(&args->newname, &len, NULL);
2768 if (nm == NULL) {
2769 *cs->statusp = resp->status = NFS4ERR_INVAL;
2770 goto out;
2771 }
2772
2773 if (len > MAXNAMELEN) {
2774 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2775 kmem_free(nm, len);
2776 goto out;
2777 }
2778
2779 if (rdonly4(req, cs)) {
2780 *cs->statusp = resp->status = NFS4ERR_ROFS;
2781 kmem_free(nm, len);
2782 goto out;
2783 }
2784
2785 /* Get "before" change value */
2786 bdva.va_mask = AT_CTIME|AT_SEQ;
2787 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2788 if (error) {
2789 *cs->statusp = resp->status = puterrno4(error);
2790 kmem_free(nm, len);
2791 goto out;
2792 }
2793
2794 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2795 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2796 MAXPATHLEN + 1);
2797
2798 if (name == NULL) {
2799 *cs->statusp = resp->status = NFS4ERR_INVAL;
2800 kmem_free(nm, len);
2801 goto out;
2802 }
2803
2804 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2805
2806 error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2807
2808 if (nm != name)
2809 kmem_free(name, MAXPATHLEN + 1);
2810 kmem_free(nm, len);
2811
2812 /*
2813 * Get the initial "after" sequence number, if it fails, set to zero
2814 */
2815 idva.va_mask = AT_SEQ;
2816 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2817 idva.va_seq = 0;
2818
2819 /*
2820 * Force modified data and metadata out to stable storage.
2821 */
2822 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2823 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2824
2825 if (error) {
2826 *cs->statusp = resp->status = puterrno4(error);
2827 goto out;
2828 }
2829
2830 /*
2831 * Get "after" change value, if it fails, simply return the
2832 * before value.
2833 */
2834 adva.va_mask = AT_CTIME|AT_SEQ;
2835 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2836 adva.va_ctime = bdva.va_ctime;
2837 adva.va_seq = 0;
2838 }
2839
2840 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2841
2842 /*
2843 * The cinfo.atomic = TRUE only if we have
2844 * non-zero va_seq's, and it has incremented by exactly one
2845 * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2846 */
2847 if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2848 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2849 resp->cinfo.atomic = TRUE;
2850 else
2851 resp->cinfo.atomic = FALSE;
2852
2853 *cs->statusp = resp->status = NFS4_OK;
2854 out:
2855 DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2856 LINK4res *, resp);
2857 }
2858
2859 /*
2860 * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2861 */
2862
2863 /* ARGSUSED */
2864 static nfsstat4
do_rfs4_op_lookup(char * nm,struct svc_req * req,struct compound_state * cs)2865 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2866 {
2867 int error;
2868 int different_export = 0;
2869 vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL;
2870 struct exportinfo *exi = NULL, *pre_exi = NULL;
2871 nfsstat4 stat;
2872 fid_t fid;
2873 int attrdir, dotdot, walk;
2874 bool_t is_newvp = FALSE;
2875
2876 if (cs->vp->v_flag & V_XATTRDIR) {
2877 attrdir = 1;
2878 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2879 } else {
2880 attrdir = 0;
2881 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2882 }
2883
2884 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2885
2886 /*
2887 * If dotdotting, then need to check whether it's
2888 * above the root of a filesystem, or above an
2889 * export point.
2890 */
2891 if (dotdot) {
2892 vnode_t *zone_rootvp;
2893
2894 ASSERT(cs->exi != NULL);
2895 zone_rootvp = cs->exi->exi_ne->exi_root->exi_vp;
2896 /*
2897 * If dotdotting at the root of a filesystem, then
2898 * need to traverse back to the mounted-on filesystem
2899 * and do the dotdot lookup there.
2900 */
2901 if ((cs->vp->v_flag & VROOT) || VN_CMP(cs->vp, zone_rootvp)) {
2902
2903 /*
2904 * If at the system root, then can
2905 * go up no further.
2906 */
2907 if (VN_CMP(cs->vp, zone_rootvp))
2908 return (puterrno4(ENOENT));
2909
2910 /*
2911 * Traverse back to the mounted-on filesystem
2912 */
2913 cs->vp = untraverse(cs->vp, zone_rootvp);
2914
2915 /*
2916 * Set the different_export flag so we remember
2917 * to pick up a new exportinfo entry for
2918 * this new filesystem.
2919 */
2920 different_export = 1;
2921 } else {
2922
2923 /*
2924 * If dotdotting above an export point then set
2925 * the different_export to get new export info.
2926 */
2927 different_export = nfs_exported(cs->exi, cs->vp);
2928 }
2929 }
2930
2931 error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2932 NULL, NULL, NULL);
2933 if (error)
2934 return (puterrno4(error));
2935
2936 /*
2937 * If the vnode is in a pseudo filesystem, check whether it is visible.
2938 *
2939 * XXX if the vnode is a symlink and it is not visible in
2940 * a pseudo filesystem, return ENOENT (not following symlink).
2941 * V4 client can not mount such symlink. This is a regression
2942 * from V2/V3.
2943 *
2944 * In the same exported filesystem, if the security flavor used
2945 * is not an explicitly shared flavor, limit the view to the visible
2946 * list entries only. This is not a WRONGSEC case because it's already
2947 * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2948 */
2949 if (!different_export &&
2950 (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2951 cs->access & CS_ACCESS_LIMITED)) {
2952 if (! nfs_visible(cs->exi, vp, &different_export)) {
2953 VN_RELE(vp);
2954 return (puterrno4(ENOENT));
2955 }
2956 }
2957
2958 /*
2959 * If it's a mountpoint, then traverse it.
2960 */
2961 if (vn_ismntpt(vp)) {
2962 pre_exi = cs->exi; /* save pre-traversed exportinfo */
2963 pre_tvp = vp; /* save pre-traversed vnode */
2964
2965 /*
2966 * hold pre_tvp to counteract rele by traverse. We will
2967 * need pre_tvp below if checkexport4 fails
2968 */
2969 VN_HOLD(pre_tvp);
2970 if ((error = traverse(&vp)) != 0) {
2971 VN_RELE(vp);
2972 VN_RELE(pre_tvp);
2973 return (puterrno4(error));
2974 }
2975 different_export = 1;
2976 } else if (vp->v_vfsp != cs->vp->v_vfsp) {
2977 /*
2978 * The vfsp comparison is to handle the case where
2979 * a LOFS mount is shared. lo_lookup traverses mount points,
2980 * and NFS is unaware of local fs transistions because
2981 * v_vfsmountedhere isn't set. For this special LOFS case,
2982 * the dir and the obj returned by lookup will have different
2983 * vfs ptrs.
2984 */
2985 different_export = 1;
2986 }
2987
2988 if (different_export) {
2989
2990 bzero(&fid, sizeof (fid));
2991 fid.fid_len = MAXFIDSZ;
2992 error = vop_fid_pseudo(vp, &fid);
2993 if (error) {
2994 VN_RELE(vp);
2995 if (pre_tvp)
2996 VN_RELE(pre_tvp);
2997 return (puterrno4(error));
2998 }
2999
3000 if (dotdot)
3001 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
3002 else
3003 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
3004
3005 if (exi == NULL) {
3006 if (pre_tvp) {
3007 /*
3008 * If this vnode is a mounted-on vnode,
3009 * but the mounted-on file system is not
3010 * exported, send back the filehandle for
3011 * the mounted-on vnode, not the root of
3012 * the mounted-on file system.
3013 */
3014 VN_RELE(vp);
3015 vp = pre_tvp;
3016 exi = pre_exi;
3017 } else {
3018 VN_RELE(vp);
3019 return (puterrno4(EACCES));
3020 }
3021 } else if (pre_tvp) {
3022 /* we're done with pre_tvp now. release extra hold */
3023 VN_RELE(pre_tvp);
3024 }
3025
3026 cs->exi = exi;
3027
3028 /*
3029 * Now we do a checkauth4. The reason is that
3030 * this client/user may not have access to the new
3031 * exported file system, and if they do,
3032 * the client/user may be mapped to a different uid.
3033 *
3034 * We start with a new cr, because the checkauth4 done
3035 * in the PUT*FH operation over wrote the cred's uid,
3036 * gid, etc, and we want the real thing before calling
3037 * checkauth4()
3038 */
3039 crfree(cs->cr);
3040 cs->cr = crdup(cs->basecr);
3041
3042 oldvp = cs->vp;
3043 cs->vp = vp;
3044 is_newvp = TRUE;
3045
3046 stat = call_checkauth4(cs, req);
3047 if (stat != NFS4_OK) {
3048 VN_RELE(cs->vp);
3049 cs->vp = oldvp;
3050 return (stat);
3051 }
3052 }
3053
3054 /*
3055 * After various NFS checks, do a label check on the path
3056 * component. The label on this path should either be the
3057 * global zone's label or a zone's label. We are only
3058 * interested in the zone's label because exported files
3059 * in global zone is accessible (though read-only) to
3060 * clients. The exportability/visibility check is already
3061 * done before reaching this code.
3062 */
3063 if (is_system_labeled()) {
3064 bslabel_t *clabel;
3065
3066 ASSERT(req->rq_label != NULL);
3067 clabel = req->rq_label;
3068 DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
3069 "got client label from request(1)", struct svc_req *, req);
3070
3071 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3072 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3073 cs->exi)) {
3074 error = EACCES;
3075 goto err_out;
3076 }
3077 } else {
3078 /*
3079 * We grant access to admin_low label clients
3080 * only if the client is trusted, i.e. also
3081 * running Solaris Trusted Extension.
3082 */
3083 struct sockaddr *ca;
3084 int addr_type;
3085 void *ipaddr;
3086 tsol_tpc_t *tp;
3087
3088 ca = (struct sockaddr *)svc_getrpccaller(
3089 req->rq_xprt)->buf;
3090 if (ca->sa_family == AF_INET) {
3091 addr_type = IPV4_VERSION;
3092 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
3093 } else if (ca->sa_family == AF_INET6) {
3094 addr_type = IPV6_VERSION;
3095 ipaddr = &((struct sockaddr_in6 *)
3096 ca)->sin6_addr;
3097 }
3098 tp = find_tpc(ipaddr, addr_type, B_FALSE);
3099 if (tp == NULL || tp->tpc_tp.tp_doi !=
3100 l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
3101 SUN_CIPSO) {
3102 if (tp != NULL)
3103 TPC_RELE(tp);
3104 error = EACCES;
3105 goto err_out;
3106 }
3107 TPC_RELE(tp);
3108 }
3109 }
3110
3111 error = makefh4(&cs->fh, vp, cs->exi);
3112
3113 err_out:
3114 if (error) {
3115 if (is_newvp) {
3116 VN_RELE(cs->vp);
3117 cs->vp = oldvp;
3118 } else
3119 VN_RELE(vp);
3120 return (puterrno4(error));
3121 }
3122
3123 if (!is_newvp) {
3124 if (cs->vp)
3125 VN_RELE(cs->vp);
3126 cs->vp = vp;
3127 } else if (oldvp)
3128 VN_RELE(oldvp);
3129
3130 /*
3131 * if did lookup on attrdir and didn't lookup .., set named
3132 * attr fh flag
3133 */
3134 if (attrdir && ! dotdot)
3135 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
3136
3137 /* Assume false for now, open proc will set this */
3138 cs->mandlock = FALSE;
3139
3140 return (NFS4_OK);
3141 }
3142
3143 /* ARGSUSED */
3144 static void
rfs4_op_lookup(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)3145 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3146 struct compound_state *cs)
3147 {
3148 LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
3149 LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
3150 char *nm;
3151 uint_t len;
3152 struct sockaddr *ca;
3153 char *name = NULL;
3154 nfsstat4 status;
3155
3156 DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
3157 LOOKUP4args *, args);
3158
3159 if (cs->vp == NULL) {
3160 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3161 goto out;
3162 }
3163
3164 if (cs->vp->v_type == VLNK) {
3165 *cs->statusp = resp->status = NFS4ERR_SYMLINK;
3166 goto out;
3167 }
3168
3169 if (cs->vp->v_type != VDIR) {
3170 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
3171 goto out;
3172 }
3173
3174 status = utf8_dir_verify(&args->objname);
3175 if (status != NFS4_OK) {
3176 *cs->statusp = resp->status = status;
3177 goto out;
3178 }
3179
3180 nm = utf8_to_str(&args->objname, &len, NULL);
3181 if (nm == NULL) {
3182 *cs->statusp = resp->status = NFS4ERR_INVAL;
3183 goto out;
3184 }
3185
3186 if (len > MAXNAMELEN) {
3187 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
3188 kmem_free(nm, len);
3189 goto out;
3190 }
3191
3192 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3193 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
3194 MAXPATHLEN + 1);
3195
3196 if (name == NULL) {
3197 *cs->statusp = resp->status = NFS4ERR_INVAL;
3198 kmem_free(nm, len);
3199 goto out;
3200 }
3201
3202 *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
3203
3204 if (name != nm)
3205 kmem_free(name, MAXPATHLEN + 1);
3206 kmem_free(nm, len);
3207
3208 out:
3209 DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
3210 LOOKUP4res *, resp);
3211 }
3212
3213 /* ARGSUSED */
3214 static void
rfs4_op_lookupp(nfs_argop4 * args,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)3215 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3216 struct compound_state *cs)
3217 {
3218 LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
3219
3220 DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
3221
3222 if (cs->vp == NULL) {
3223 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3224 goto out;
3225 }
3226
3227 if (cs->vp->v_type != VDIR) {
3228 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
3229 goto out;
3230 }
3231
3232 *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
3233
3234 /*
3235 * From NFSV4 Specification, LOOKUPP should not check for
3236 * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
3237 */
3238 if (resp->status == NFS4ERR_WRONGSEC) {
3239 *cs->statusp = resp->status = NFS4_OK;
3240 }
3241
3242 out:
3243 DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
3244 LOOKUPP4res *, resp);
3245 }
3246
3247
3248 /*ARGSUSED2*/
3249 static void
rfs4_op_openattr(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)3250 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3251 struct compound_state *cs)
3252 {
3253 OPENATTR4args *args = &argop->nfs_argop4_u.opopenattr;
3254 OPENATTR4res *resp = &resop->nfs_resop4_u.opopenattr;
3255 vnode_t *avp = NULL;
3256 int lookup_flags = LOOKUP_XATTR, error;
3257 int exp_ro = 0;
3258
3259 DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
3260 OPENATTR4args *, args);
3261
3262 if (cs->vp == NULL) {
3263 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3264 goto out;
3265 }
3266
3267 if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3268 !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3269 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3270 goto out;
3271 }
3272
3273 /*
3274 * If file system supports passing ACE mask to VOP_ACCESS then
3275 * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3276 */
3277
3278 if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3279 error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3280 V_ACE_MASK, cs->cr, NULL);
3281 else
3282 error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3283 (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3284 (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3285
3286 if (error) {
3287 *cs->statusp = resp->status = puterrno4(EACCES);
3288 goto out;
3289 }
3290
3291 /*
3292 * The CREATE_XATTR_DIR VOP flag cannot be specified if
3293 * the file system is exported read-only -- regardless of
3294 * createdir flag. Otherwise the attrdir would be created
3295 * (assuming server fs isn't mounted readonly locally). If
3296 * VOP_LOOKUP returns ENOENT in this case, the error will
3297 * be translated into EROFS. ENOSYS is mapped to ENOTSUP
3298 * because specfs has no VOP_LOOKUP op, so the macro would
3299 * return ENOSYS. EINVAL is returned by all (current)
3300 * Solaris file system implementations when any of their
3301 * restrictions are violated (xattr(dir) can't have xattrdir).
3302 * Returning NOTSUPP is more appropriate in this case
3303 * because the object will never be able to have an attrdir.
3304 */
3305 if (args->createdir && ! (exp_ro = rdonly4(req, cs)))
3306 lookup_flags |= CREATE_XATTR_DIR;
3307
3308 error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3309 NULL, NULL, NULL);
3310
3311 if (error) {
3312 if (error == ENOENT && args->createdir && exp_ro)
3313 *cs->statusp = resp->status = puterrno4(EROFS);
3314 else if (error == EINVAL || error == ENOSYS)
3315 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3316 else
3317 *cs->statusp = resp->status = puterrno4(error);
3318 goto out;
3319 }
3320
3321 ASSERT(avp->v_flag & V_XATTRDIR);
3322
3323 error = makefh4(&cs->fh, avp, cs->exi);
3324
3325 if (error) {
3326 VN_RELE(avp);
3327 *cs->statusp = resp->status = puterrno4(error);
3328 goto out;
3329 }
3330
3331 VN_RELE(cs->vp);
3332 cs->vp = avp;
3333
3334 /*
3335 * There is no requirement for an attrdir fh flag
3336 * because the attrdir has a vnode flag to distinguish
3337 * it from regular (non-xattr) directories. The
3338 * FH4_ATTRDIR flag is set for future sanity checks.
3339 */
3340 set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3341 *cs->statusp = resp->status = NFS4_OK;
3342
3343 out:
3344 DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3345 OPENATTR4res *, resp);
3346 }
3347
3348 static int
do_io(int direction,vnode_t * vp,struct uio * uio,int ioflag,cred_t * cred,caller_context_t * ct)3349 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3350 caller_context_t *ct)
3351 {
3352 int error;
3353 int i;
3354 clock_t delaytime;
3355
3356 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3357
3358 /*
3359 * Don't block on mandatory locks. If this routine returns
3360 * EAGAIN, the caller should return NFS4ERR_LOCKED.
3361 */
3362 uio->uio_fmode = FNONBLOCK;
3363
3364 for (i = 0; i < rfs4_maxlock_tries; i++) {
3365
3366
3367 if (direction == FREAD) {
3368 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3369 error = VOP_READ(vp, uio, ioflag, cred, ct);
3370 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3371 } else {
3372 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3373 error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3374 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3375 }
3376
3377 if (error != EAGAIN)
3378 break;
3379
3380 if (i < rfs4_maxlock_tries - 1) {
3381 delay(delaytime);
3382 delaytime *= 2;
3383 }
3384 }
3385
3386 return (error);
3387 }
3388
3389 /* ARGSUSED */
3390 static void
rfs4_op_read(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)3391 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3392 struct compound_state *cs)
3393 {
3394 READ4args *args = &argop->nfs_argop4_u.opread;
3395 READ4res *resp = &resop->nfs_resop4_u.opread;
3396 int error;
3397 int verror;
3398 vnode_t *vp;
3399 struct vattr va;
3400 struct iovec iov, *iovp = NULL;
3401 int iovcnt;
3402 struct uio uio;
3403 u_offset_t offset;
3404 bool_t *deleg = &cs->deleg;
3405 nfsstat4 stat;
3406 int in_crit = 0;
3407 mblk_t *mp = NULL;
3408 int alloc_err = 0;
3409 int rdma_used = 0;
3410 int loaned_buffers;
3411 caller_context_t ct;
3412 struct uio *uiop;
3413
3414 DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3415 READ4args, args);
3416
3417 vp = cs->vp;
3418 if (vp == NULL) {
3419 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3420 goto out;
3421 }
3422 if (cs->access == CS_ACCESS_DENIED) {
3423 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3424 goto out;
3425 }
3426
3427 if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3428 deleg, TRUE, &ct, cs)) != NFS4_OK) {
3429 *cs->statusp = resp->status = stat;
3430 goto out;
3431 }
3432
3433 /*
3434 * Enter the critical region before calling VOP_RWLOCK
3435 * to avoid a deadlock with write requests.
3436 */
3437 if (nbl_need_check(vp)) {
3438 nbl_start_crit(vp, RW_READER);
3439 in_crit = 1;
3440 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3441 &ct)) {
3442 *cs->statusp = resp->status = NFS4ERR_LOCKED;
3443 goto out;
3444 }
3445 }
3446
3447 if (args->wlist) {
3448 if (args->count > clist_len(args->wlist)) {
3449 *cs->statusp = resp->status = NFS4ERR_INVAL;
3450 goto out;
3451 }
3452 rdma_used = 1;
3453 }
3454
3455 /* use loaned buffers for TCP */
3456 loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3457
3458 va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3459 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3460
3461 /*
3462 * If we can't get the attributes, then we can't do the
3463 * right access checking. So, we'll fail the request.
3464 */
3465 if (verror) {
3466 *cs->statusp = resp->status = puterrno4(verror);
3467 goto out;
3468 }
3469
3470 if (vp->v_type != VREG) {
3471 *cs->statusp = resp->status =
3472 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3473 goto out;
3474 }
3475
3476 if (crgetuid(cs->cr) != va.va_uid &&
3477 (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3478 (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3479 *cs->statusp = resp->status = puterrno4(error);
3480 goto out;
3481 }
3482
3483 if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3484 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3485 goto out;
3486 }
3487
3488 offset = args->offset;
3489 if (offset >= va.va_size) {
3490 *cs->statusp = resp->status = NFS4_OK;
3491 resp->eof = TRUE;
3492 resp->data_len = 0;
3493 resp->data_val = NULL;
3494 resp->mblk = NULL;
3495 /* RDMA */
3496 resp->wlist = args->wlist;
3497 resp->wlist_len = resp->data_len;
3498 *cs->statusp = resp->status = NFS4_OK;
3499 if (resp->wlist)
3500 clist_zero_len(resp->wlist);
3501 goto out;
3502 }
3503
3504 if (args->count == 0) {
3505 *cs->statusp = resp->status = NFS4_OK;
3506 resp->eof = FALSE;
3507 resp->data_len = 0;
3508 resp->data_val = NULL;
3509 resp->mblk = NULL;
3510 /* RDMA */
3511 resp->wlist = args->wlist;
3512 resp->wlist_len = resp->data_len;
3513 if (resp->wlist)
3514 clist_zero_len(resp->wlist);
3515 goto out;
3516 }
3517
3518 /*
3519 * Do not allocate memory more than maximum allowed
3520 * transfer size
3521 */
3522 if (args->count > rfs4_tsize(req))
3523 args->count = rfs4_tsize(req);
3524
3525 if (loaned_buffers) {
3526 uiop = (uio_t *)rfs_setup_xuio(vp);
3527 ASSERT(uiop != NULL);
3528 uiop->uio_segflg = UIO_SYSSPACE;
3529 uiop->uio_loffset = args->offset;
3530 uiop->uio_resid = args->count;
3531
3532 /* Jump to do the read if successful */
3533 if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3534 /*
3535 * Need to hold the vnode until after VOP_RETZCBUF()
3536 * is called.
3537 */
3538 VN_HOLD(vp);
3539 goto doio_read;
3540 }
3541
3542 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3543 uiop->uio_loffset, int, uiop->uio_resid);
3544
3545 uiop->uio_extflg = 0;
3546
3547 /* failure to setup for zero copy */
3548 rfs_free_xuio((void *)uiop);
3549 loaned_buffers = 0;
3550 }
3551
3552 /*
3553 * If returning data via RDMA Write, then grab the chunk list. If we
3554 * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3555 */
3556 if (rdma_used) {
3557 mp = NULL;
3558 (void) rdma_get_wchunk(req, &iov, args->wlist);
3559 uio.uio_iov = &iov;
3560 uio.uio_iovcnt = 1;
3561 } else {
3562 /*
3563 * mp will contain the data to be sent out in the read reply.
3564 * It will be freed after the reply has been sent.
3565 */
3566 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
3567 ASSERT(mp != NULL);
3568 ASSERT(alloc_err == 0);
3569 uio.uio_iov = iovp;
3570 uio.uio_iovcnt = iovcnt;
3571 }
3572
3573 uio.uio_segflg = UIO_SYSSPACE;
3574 uio.uio_extflg = UIO_COPY_CACHED;
3575 uio.uio_loffset = args->offset;
3576 uio.uio_resid = args->count;
3577 uiop = &uio;
3578
3579 doio_read:
3580 error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3581
3582 va.va_mask = AT_SIZE;
3583 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3584
3585 if (error) {
3586 if (mp)
3587 freemsg(mp);
3588 *cs->statusp = resp->status = puterrno4(error);
3589 goto out;
3590 }
3591
3592 /* make mblk using zc buffers */
3593 if (loaned_buffers) {
3594 mp = uio_to_mblk(uiop);
3595 ASSERT(mp != NULL);
3596 }
3597
3598 *cs->statusp = resp->status = NFS4_OK;
3599
3600 ASSERT(uiop->uio_resid >= 0);
3601 resp->data_len = args->count - uiop->uio_resid;
3602 if (mp) {
3603 resp->data_val = (char *)mp->b_datap->db_base;
3604 rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3605 } else {
3606 resp->data_val = (caddr_t)iov.iov_base;
3607 }
3608
3609 resp->mblk = mp;
3610
3611 if (!verror && offset + resp->data_len == va.va_size)
3612 resp->eof = TRUE;
3613 else
3614 resp->eof = FALSE;
3615
3616 if (rdma_used) {
3617 if (!rdma_setup_read_data4(args, resp)) {
3618 *cs->statusp = resp->status = NFS4ERR_INVAL;
3619 }
3620 } else {
3621 resp->wlist = NULL;
3622 }
3623
3624 out:
3625 if (in_crit)
3626 nbl_end_crit(vp);
3627
3628 if (iovp != NULL)
3629 kmem_free(iovp, iovcnt * sizeof (struct iovec));
3630
3631 DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3632 READ4res *, resp);
3633 }
3634
3635 static void
rfs4_op_read_free(nfs_resop4 * resop)3636 rfs4_op_read_free(nfs_resop4 *resop)
3637 {
3638 READ4res *resp = &resop->nfs_resop4_u.opread;
3639
3640 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3641 freemsg(resp->mblk);
3642 resp->mblk = NULL;
3643 resp->data_val = NULL;
3644 resp->data_len = 0;
3645 }
3646 }
3647
3648 static void
rfs4_op_readdir_free(nfs_resop4 * resop)3649 rfs4_op_readdir_free(nfs_resop4 * resop)
3650 {
3651 READDIR4res *resp = &resop->nfs_resop4_u.opreaddir;
3652
3653 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3654 freeb(resp->mblk);
3655 resp->mblk = NULL;
3656 resp->data_len = 0;
3657 }
3658 }
3659
3660
3661 /* ARGSUSED */
3662 static void
rfs4_op_putpubfh(nfs_argop4 * args,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)3663 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3664 struct compound_state *cs)
3665 {
3666 PUTPUBFH4res *resp = &resop->nfs_resop4_u.opputpubfh;
3667 int error;
3668 vnode_t *vp;
3669 struct exportinfo *exi, *sav_exi;
3670 nfs_fh4_fmt_t *fh_fmtp;
3671 nfs_export_t *ne = nfs_get_export();
3672
3673 DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3674
3675 if (cs->vp) {
3676 VN_RELE(cs->vp);
3677 cs->vp = NULL;
3678 }
3679
3680 if (cs->cr)
3681 crfree(cs->cr);
3682
3683 cs->cr = crdup(cs->basecr);
3684
3685 vp = ne->exi_public->exi_vp;
3686 if (vp == NULL) {
3687 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3688 goto out;
3689 }
3690
3691 error = makefh4(&cs->fh, vp, ne->exi_public);
3692 if (error != 0) {
3693 *cs->statusp = resp->status = puterrno4(error);
3694 goto out;
3695 }
3696 sav_exi = cs->exi;
3697 if (ne->exi_public == ne->exi_root) {
3698 /*
3699 * No filesystem is actually shared public, so we default
3700 * to exi_root. In this case, we must check whether root
3701 * is exported.
3702 */
3703 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3704
3705 /*
3706 * if root filesystem is exported, the exportinfo struct that we
3707 * should use is what checkexport4 returns, because root_exi is
3708 * actually a mostly empty struct.
3709 */
3710 exi = checkexport4(&fh_fmtp->fh4_fsid,
3711 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3712 cs->exi = ((exi != NULL) ? exi : ne->exi_public);
3713 } else {
3714 /*
3715 * it's a properly shared filesystem
3716 */
3717 cs->exi = ne->exi_public;
3718 }
3719
3720 if (is_system_labeled()) {
3721 bslabel_t *clabel;
3722
3723 ASSERT(req->rq_label != NULL);
3724 clabel = req->rq_label;
3725 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3726 "got client label from request(1)",
3727 struct svc_req *, req);
3728 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3729 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3730 cs->exi)) {
3731 *cs->statusp = resp->status =
3732 NFS4ERR_SERVERFAULT;
3733 goto out;
3734 }
3735 }
3736 }
3737
3738 VN_HOLD(vp);
3739 cs->vp = vp;
3740
3741 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3742 VN_RELE(cs->vp);
3743 cs->vp = NULL;
3744 cs->exi = sav_exi;
3745 goto out;
3746 }
3747
3748 *cs->statusp = resp->status = NFS4_OK;
3749 out:
3750 DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3751 PUTPUBFH4res *, resp);
3752 }
3753
3754 /*
3755 * XXX - issue with put*fh operations. Suppose /export/home is exported.
3756 * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3757 * or joe have restrictive search permissions, then we shouldn't let
3758 * the client get a file handle. This is easy to enforce. However, we
3759 * don't know what security flavor should be used until we resolve the
3760 * path name. Another complication is uid mapping. If root is
3761 * the user, then it will be mapped to the anonymous user by default,
3762 * but we won't know that till we've resolved the path name. And we won't
3763 * know what the anonymous user is.
3764 * Luckily, SECINFO is specified to take a full filename.
3765 * So what we will have to in rfs4_op_lookup is check that flavor of
3766 * the target object matches that of the request, and if root was the
3767 * caller, check for the root= and anon= options, and if necessary,
3768 * repeat the lookup using the right cred_t. But that's not done yet.
3769 */
3770 /* ARGSUSED */
3771 static void
rfs4_op_putfh(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)3772 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3773 struct compound_state *cs)
3774 {
3775 PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3776 PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3777 nfs_fh4_fmt_t *fh_fmtp;
3778
3779 DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3780 PUTFH4args *, args);
3781
3782 if (cs->vp) {
3783 VN_RELE(cs->vp);
3784 cs->vp = NULL;
3785 }
3786
3787 if (cs->cr) {
3788 crfree(cs->cr);
3789 cs->cr = NULL;
3790 }
3791
3792
3793 if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3794 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3795 goto out;
3796 }
3797
3798 fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3799 cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3800 NULL);
3801
3802 if (cs->exi == NULL) {
3803 *cs->statusp = resp->status = NFS4ERR_STALE;
3804 goto out;
3805 }
3806
3807 cs->cr = crdup(cs->basecr);
3808
3809 ASSERT(cs->cr != NULL);
3810
3811 if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3812 *cs->statusp = resp->status;
3813 goto out;
3814 }
3815
3816 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3817 VN_RELE(cs->vp);
3818 cs->vp = NULL;
3819 goto out;
3820 }
3821
3822 nfs_fh4_copy(&args->object, &cs->fh);
3823 *cs->statusp = resp->status = NFS4_OK;
3824 cs->deleg = FALSE;
3825
3826 out:
3827 DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3828 PUTFH4res *, resp);
3829 }
3830
3831 /* ARGSUSED */
3832 static void
rfs4_op_putrootfh(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)3833 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3834 struct compound_state *cs)
3835 {
3836 PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3837 int error;
3838 fid_t fid;
3839 struct exportinfo *exi, *sav_exi;
3840
3841 DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3842
3843 if (cs->vp) {
3844 VN_RELE(cs->vp);
3845 cs->vp = NULL;
3846 }
3847
3848 if (cs->cr)
3849 crfree(cs->cr);
3850
3851 cs->cr = crdup(cs->basecr);
3852
3853 /*
3854 * Using rootdir, the system root vnode,
3855 * get its fid.
3856 */
3857 bzero(&fid, sizeof (fid));
3858 fid.fid_len = MAXFIDSZ;
3859 error = vop_fid_pseudo(ZONE_ROOTVP(), &fid);
3860 if (error != 0) {
3861 *cs->statusp = resp->status = puterrno4(error);
3862 goto out;
3863 }
3864
3865 /*
3866 * Then use the root fsid & fid it to find out if it's exported
3867 *
3868 * If the server root isn't exported directly, then
3869 * it should at least be a pseudo export based on
3870 * one or more exports further down in the server's
3871 * file tree.
3872 */
3873 exi = checkexport4(&ZONE_ROOTVP()->v_vfsp->vfs_fsid, &fid, NULL);
3874 if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3875 NFS4_DEBUG(rfs4_debug,
3876 (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3877 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3878 goto out;
3879 }
3880
3881 /*
3882 * Now make a filehandle based on the root
3883 * export and root vnode.
3884 */
3885 error = makefh4(&cs->fh, ZONE_ROOTVP(), exi);
3886 if (error != 0) {
3887 *cs->statusp = resp->status = puterrno4(error);
3888 goto out;
3889 }
3890
3891 sav_exi = cs->exi;
3892 cs->exi = exi;
3893
3894 VN_HOLD(ZONE_ROOTVP());
3895 cs->vp = ZONE_ROOTVP();
3896
3897 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3898 VN_RELE(cs->vp);
3899 cs->vp = NULL;
3900 cs->exi = sav_exi;
3901 goto out;
3902 }
3903
3904 *cs->statusp = resp->status = NFS4_OK;
3905 cs->deleg = FALSE;
3906 out:
3907 DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3908 PUTROOTFH4res *, resp);
3909 }
3910
3911 /*
3912 * readlink: args: CURRENT_FH.
3913 * res: status. If success - CURRENT_FH unchanged, return linktext.
3914 */
3915
3916 /* ARGSUSED */
3917 static void
rfs4_op_readlink(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)3918 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3919 struct compound_state *cs)
3920 {
3921 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3922 int error;
3923 vnode_t *vp;
3924 struct iovec iov;
3925 struct vattr va;
3926 struct uio uio;
3927 char *data;
3928 struct sockaddr *ca;
3929 char *name = NULL;
3930 int is_referral;
3931
3932 DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3933
3934 /* CURRENT_FH: directory */
3935 vp = cs->vp;
3936 if (vp == NULL) {
3937 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3938 goto out;
3939 }
3940
3941 if (cs->access == CS_ACCESS_DENIED) {
3942 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3943 goto out;
3944 }
3945
3946 /* Is it a referral? */
3947 if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3948
3949 is_referral = 1;
3950
3951 } else {
3952
3953 is_referral = 0;
3954
3955 if (vp->v_type == VDIR) {
3956 *cs->statusp = resp->status = NFS4ERR_ISDIR;
3957 goto out;
3958 }
3959
3960 if (vp->v_type != VLNK) {
3961 *cs->statusp = resp->status = NFS4ERR_INVAL;
3962 goto out;
3963 }
3964
3965 }
3966
3967 va.va_mask = AT_MODE;
3968 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3969 if (error) {
3970 *cs->statusp = resp->status = puterrno4(error);
3971 goto out;
3972 }
3973
3974 if (MANDLOCK(vp, va.va_mode)) {
3975 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3976 goto out;
3977 }
3978
3979 data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3980
3981 if (is_referral) {
3982 char *s;
3983 size_t strsz;
3984 kstat_named_t *stat =
3985 cs->exi->exi_ne->ne_globals->svstat[NFS_V4];
3986
3987 /* Get an artificial symlink based on a referral */
3988 s = build_symlink(vp, cs->cr, &strsz);
3989 stat[NFS_REFERLINKS].value.ui64++;
3990 DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3991 vnode_t *, vp, char *, s);
3992 if (s == NULL)
3993 error = EINVAL;
3994 else {
3995 error = 0;
3996 (void) strlcpy(data, s, MAXPATHLEN + 1);
3997 kmem_free(s, strsz);
3998 }
3999
4000 } else {
4001
4002 iov.iov_base = data;
4003 iov.iov_len = MAXPATHLEN;
4004 uio.uio_iov = &iov;
4005 uio.uio_iovcnt = 1;
4006 uio.uio_segflg = UIO_SYSSPACE;
4007 uio.uio_extflg = UIO_COPY_CACHED;
4008 uio.uio_loffset = 0;
4009 uio.uio_resid = MAXPATHLEN;
4010
4011 error = VOP_READLINK(vp, &uio, cs->cr, NULL);
4012
4013 if (!error)
4014 *(data + MAXPATHLEN - uio.uio_resid) = '\0';
4015 }
4016
4017 if (error) {
4018 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
4019 *cs->statusp = resp->status = puterrno4(error);
4020 goto out;
4021 }
4022
4023 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4024 name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
4025 MAXPATHLEN + 1);
4026
4027 if (name == NULL) {
4028 /*
4029 * Even though the conversion failed, we return
4030 * something. We just don't translate it.
4031 */
4032 name = data;
4033 }
4034
4035 /*
4036 * treat link name as data
4037 */
4038 (void) str_to_utf8(name, (utf8string *)&resp->link);
4039
4040 if (name != data)
4041 kmem_free(name, MAXPATHLEN + 1);
4042 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
4043 *cs->statusp = resp->status = NFS4_OK;
4044
4045 out:
4046 DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
4047 READLINK4res *, resp);
4048 }
4049
4050 static void
rfs4_op_readlink_free(nfs_resop4 * resop)4051 rfs4_op_readlink_free(nfs_resop4 *resop)
4052 {
4053 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
4054 utf8string *symlink = (utf8string *)&resp->link;
4055
4056 if (symlink->utf8string_val) {
4057 UTF8STRING_FREE(*symlink)
4058 }
4059 }
4060
4061 /*
4062 * release_lockowner:
4063 * Release any state associated with the supplied
4064 * lockowner. Note if any lo_state is holding locks we will not
4065 * rele that lo_state and thus the lockowner will not be destroyed.
4066 * A client using lock after the lock owner stateid has been released
4067 * will suffer the consequence of NFS4ERR_BAD_STATEID and would have
4068 * to reissue the lock with new_lock_owner set to TRUE.
4069 * args: lock_owner
4070 * res: status
4071 */
4072 /* ARGSUSED */
4073 static void
rfs4_op_release_lockowner(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)4074 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
4075 struct svc_req *req, struct compound_state *cs)
4076 {
4077 RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
4078 RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
4079 rfs4_lockowner_t *lo;
4080 rfs4_openowner_t *oo;
4081 rfs4_state_t *sp;
4082 rfs4_lo_state_t *lsp;
4083 rfs4_client_t *cp;
4084 bool_t create = FALSE;
4085 locklist_t *llist;
4086 sysid_t sysid;
4087
4088 DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
4089 cs, RELEASE_LOCKOWNER4args *, ap);
4090
4091 /* Make sure there is a clientid around for this request */
4092 cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
4093
4094 if (cp == NULL) {
4095 *cs->statusp = resp->status =
4096 rfs4_check_clientid(&ap->lock_owner.clientid, 0);
4097 goto out;
4098 }
4099 rfs4_client_rele(cp);
4100
4101 lo = rfs4_findlockowner(&ap->lock_owner, &create);
4102 if (lo == NULL) {
4103 *cs->statusp = resp->status = NFS4_OK;
4104 goto out;
4105 }
4106 ASSERT(lo->rl_client != NULL);
4107
4108 /*
4109 * Check for EXPIRED client. If so will reap state with in a lease
4110 * period or on next set_clientid_confirm step
4111 */
4112 if (rfs4_lease_expired(lo->rl_client)) {
4113 rfs4_lockowner_rele(lo);
4114 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4115 goto out;
4116 }
4117
4118 /*
4119 * If no sysid has been assigned, then no locks exist; just return.
4120 */
4121 rfs4_dbe_lock(lo->rl_client->rc_dbe);
4122 if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
4123 rfs4_lockowner_rele(lo);
4124 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4125 goto out;
4126 }
4127
4128 sysid = lo->rl_client->rc_sysidt;
4129 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4130
4131 /*
4132 * Mark the lockowner invalid.
4133 */
4134 rfs4_dbe_hide(lo->rl_dbe);
4135
4136 /*
4137 * sysid-pid pair should now not be used since the lockowner is
4138 * invalid. If the client were to instantiate the lockowner again
4139 * it would be assigned a new pid. Thus we can get the list of
4140 * current locks.
4141 */
4142
4143 llist = flk_get_active_locks(sysid, lo->rl_pid);
4144 /* If we are still holding locks fail */
4145 if (llist != NULL) {
4146
4147 *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
4148
4149 flk_free_locklist(llist);
4150 /*
4151 * We need to unhide the lockowner so the client can
4152 * try it again. The bad thing here is if the client
4153 * has a logic error that took it here in the first place
4154 * they probably have lost accounting of the locks that it
4155 * is holding. So we may have dangling state until the
4156 * open owner state is reaped via close. One scenario
4157 * that could possibly occur is that the client has
4158 * sent the unlock request(s) in separate threads
4159 * and has not waited for the replies before sending the
4160 * RELEASE_LOCKOWNER request. Presumably, it would expect
4161 * and deal appropriately with NFS4ERR_LOCKS_HELD, by
4162 * reissuing the request.
4163 */
4164 rfs4_dbe_unhide(lo->rl_dbe);
4165 rfs4_lockowner_rele(lo);
4166 goto out;
4167 }
4168
4169 /*
4170 * For the corresponding client we need to check each open
4171 * owner for any opens that have lockowner state associated
4172 * with this lockowner.
4173 */
4174
4175 rfs4_dbe_lock(lo->rl_client->rc_dbe);
4176 for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
4177 oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
4178
4179 rfs4_dbe_lock(oo->ro_dbe);
4180 for (sp = list_head(&oo->ro_statelist); sp != NULL;
4181 sp = list_next(&oo->ro_statelist, sp)) {
4182
4183 rfs4_dbe_lock(sp->rs_dbe);
4184 for (lsp = list_head(&sp->rs_lostatelist);
4185 lsp != NULL;
4186 lsp = list_next(&sp->rs_lostatelist, lsp)) {
4187 if (lsp->rls_locker == lo) {
4188 rfs4_dbe_lock(lsp->rls_dbe);
4189 rfs4_dbe_invalidate(lsp->rls_dbe);
4190 rfs4_dbe_unlock(lsp->rls_dbe);
4191 }
4192 }
4193 rfs4_dbe_unlock(sp->rs_dbe);
4194 }
4195 rfs4_dbe_unlock(oo->ro_dbe);
4196 }
4197 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4198
4199 rfs4_lockowner_rele(lo);
4200
4201 *cs->statusp = resp->status = NFS4_OK;
4202
4203 out:
4204 DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
4205 cs, RELEASE_LOCKOWNER4res *, resp);
4206 }
4207
4208 /*
4209 * short utility function to lookup a file and recall the delegation
4210 */
4211 static rfs4_file_t *
rfs4_lookup_and_findfile(vnode_t * dvp,char * nm,vnode_t ** vpp,int * lkup_error,cred_t * cr)4212 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
4213 int *lkup_error, cred_t *cr)
4214 {
4215 vnode_t *vp;
4216 rfs4_file_t *fp = NULL;
4217 bool_t fcreate = FALSE;
4218 int error;
4219
4220 if (vpp)
4221 *vpp = NULL;
4222
4223 if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
4224 NULL)) == 0) {
4225 if (vp->v_type == VREG)
4226 fp = rfs4_findfile(vp, NULL, &fcreate);
4227 if (vpp)
4228 *vpp = vp;
4229 else
4230 VN_RELE(vp);
4231 }
4232
4233 if (lkup_error)
4234 *lkup_error = error;
4235
4236 return (fp);
4237 }
4238
4239 /*
4240 * remove: args: CURRENT_FH: directory; name.
4241 * res: status. If success - CURRENT_FH unchanged, return change_info
4242 * for directory.
4243 */
4244 /* ARGSUSED */
4245 static void
rfs4_op_remove(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)4246 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4247 struct compound_state *cs)
4248 {
4249 REMOVE4args *args = &argop->nfs_argop4_u.opremove;
4250 REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
4251 int error;
4252 vnode_t *dvp, *vp;
4253 struct vattr bdva, idva, adva;
4254 char *nm;
4255 uint_t len;
4256 rfs4_file_t *fp;
4257 int in_crit = 0;
4258 bslabel_t *clabel;
4259 struct sockaddr *ca;
4260 char *name = NULL;
4261 nfsstat4 status;
4262
4263 DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4264 REMOVE4args *, args);
4265
4266 /* CURRENT_FH: directory */
4267 dvp = cs->vp;
4268 if (dvp == NULL) {
4269 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4270 goto out;
4271 }
4272
4273 if (cs->access == CS_ACCESS_DENIED) {
4274 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4275 goto out;
4276 }
4277
4278 /*
4279 * If there is an unshared filesystem mounted on this vnode,
4280 * Do not allow to remove anything in this directory.
4281 */
4282 if (vn_ismntpt(dvp)) {
4283 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4284 goto out;
4285 }
4286
4287 if (dvp->v_type != VDIR) {
4288 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4289 goto out;
4290 }
4291
4292 status = utf8_dir_verify(&args->target);
4293 if (status != NFS4_OK) {
4294 *cs->statusp = resp->status = status;
4295 goto out;
4296 }
4297
4298 /*
4299 * Lookup the file so that we can check if it's a directory
4300 */
4301 nm = utf8_to_fn(&args->target, &len, NULL);
4302 if (nm == NULL) {
4303 *cs->statusp = resp->status = NFS4ERR_INVAL;
4304 goto out;
4305 }
4306
4307 if (len > MAXNAMELEN) {
4308 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4309 kmem_free(nm, len);
4310 goto out;
4311 }
4312
4313 if (rdonly4(req, cs)) {
4314 *cs->statusp = resp->status = NFS4ERR_ROFS;
4315 kmem_free(nm, len);
4316 goto out;
4317 }
4318
4319 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4320 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4321 MAXPATHLEN + 1);
4322
4323 if (name == NULL) {
4324 *cs->statusp = resp->status = NFS4ERR_INVAL;
4325 kmem_free(nm, len);
4326 goto out;
4327 }
4328
4329 /*
4330 * Lookup the file to determine type and while we are see if
4331 * there is a file struct around and check for delegation.
4332 * We don't need to acquire va_seq before this lookup, if
4333 * it causes an update, cinfo.before will not match, which will
4334 * trigger a cache flush even if atomic is TRUE.
4335 */
4336 fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr);
4337 if (fp != NULL) {
4338 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4339 NULL)) {
4340 VN_RELE(vp);
4341 rfs4_file_rele(fp);
4342 *cs->statusp = resp->status = NFS4ERR_DELAY;
4343 if (nm != name)
4344 kmem_free(name, MAXPATHLEN + 1);
4345 kmem_free(nm, len);
4346 goto out;
4347 }
4348 }
4349
4350 /* Didn't find anything to remove */
4351 if (vp == NULL) {
4352 *cs->statusp = resp->status = error;
4353 if (nm != name)
4354 kmem_free(name, MAXPATHLEN + 1);
4355 kmem_free(nm, len);
4356 goto out;
4357 }
4358
4359 if (nbl_need_check(vp)) {
4360 nbl_start_crit(vp, RW_READER);
4361 in_crit = 1;
4362 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4363 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4364 if (nm != name)
4365 kmem_free(name, MAXPATHLEN + 1);
4366 kmem_free(nm, len);
4367 nbl_end_crit(vp);
4368 VN_RELE(vp);
4369 if (fp) {
4370 rfs4_clear_dont_grant(fp);
4371 rfs4_file_rele(fp);
4372 }
4373 goto out;
4374 }
4375 }
4376
4377 /* check label before allowing removal */
4378 if (is_system_labeled()) {
4379 ASSERT(req->rq_label != NULL);
4380 clabel = req->rq_label;
4381 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4382 "got client label from request(1)",
4383 struct svc_req *, req);
4384 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4385 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4386 cs->exi)) {
4387 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4388 if (name != nm)
4389 kmem_free(name, MAXPATHLEN + 1);
4390 kmem_free(nm, len);
4391 if (in_crit)
4392 nbl_end_crit(vp);
4393 VN_RELE(vp);
4394 if (fp) {
4395 rfs4_clear_dont_grant(fp);
4396 rfs4_file_rele(fp);
4397 }
4398 goto out;
4399 }
4400 }
4401 }
4402
4403 /* Get dir "before" change value */
4404 bdva.va_mask = AT_CTIME|AT_SEQ;
4405 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4406 if (error) {
4407 *cs->statusp = resp->status = puterrno4(error);
4408 if (nm != name)
4409 kmem_free(name, MAXPATHLEN + 1);
4410 kmem_free(nm, len);
4411 if (in_crit)
4412 nbl_end_crit(vp);
4413 VN_RELE(vp);
4414 if (fp) {
4415 rfs4_clear_dont_grant(fp);
4416 rfs4_file_rele(fp);
4417 }
4418 goto out;
4419 }
4420 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4421
4422 /* Actually do the REMOVE operation */
4423 if (vp->v_type == VDIR) {
4424 /*
4425 * Can't remove a directory that has a mounted-on filesystem.
4426 */
4427 if (vn_ismntpt(vp)) {
4428 error = EACCES;
4429 } else {
4430 /*
4431 * System V defines rmdir to return EEXIST,
4432 * not ENOTEMPTY, if the directory is not
4433 * empty. A System V NFS server needs to map
4434 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4435 * transmit over the wire.
4436 */
4437 if ((error = VOP_RMDIR(dvp, name, ZONE_ROOTVP(), cs->cr,
4438 NULL, 0)) == EEXIST)
4439 error = ENOTEMPTY;
4440 }
4441 } else {
4442 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4443 fp != NULL) {
4444 struct vattr va;
4445 vnode_t *tvp;
4446
4447 rfs4_dbe_lock(fp->rf_dbe);
4448 tvp = fp->rf_vp;
4449 if (tvp)
4450 VN_HOLD(tvp);
4451 rfs4_dbe_unlock(fp->rf_dbe);
4452
4453 if (tvp) {
4454 /*
4455 * This is va_seq safe because we are not
4456 * manipulating dvp.
4457 */
4458 va.va_mask = AT_NLINK;
4459 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4460 va.va_nlink == 0) {
4461 /* Remove state on file remove */
4462 if (in_crit) {
4463 nbl_end_crit(vp);
4464 in_crit = 0;
4465 }
4466 rfs4_close_all_state(fp);
4467 }
4468 VN_RELE(tvp);
4469 }
4470 }
4471 }
4472
4473 if (in_crit)
4474 nbl_end_crit(vp);
4475 VN_RELE(vp);
4476
4477 if (fp) {
4478 rfs4_clear_dont_grant(fp);
4479 rfs4_file_rele(fp);
4480 }
4481 if (nm != name)
4482 kmem_free(name, MAXPATHLEN + 1);
4483 kmem_free(nm, len);
4484
4485 if (error) {
4486 *cs->statusp = resp->status = puterrno4(error);
4487 goto out;
4488 }
4489
4490 /*
4491 * Get the initial "after" sequence number, if it fails, set to zero
4492 */
4493 idva.va_mask = AT_SEQ;
4494 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4495 idva.va_seq = 0;
4496
4497 /*
4498 * Force modified data and metadata out to stable storage.
4499 */
4500 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4501
4502 /*
4503 * Get "after" change value, if it fails, simply return the
4504 * before value.
4505 */
4506 adva.va_mask = AT_CTIME|AT_SEQ;
4507 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4508 adva.va_ctime = bdva.va_ctime;
4509 adva.va_seq = 0;
4510 }
4511
4512 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4513
4514 /*
4515 * The cinfo.atomic = TRUE only if we have
4516 * non-zero va_seq's, and it has incremented by exactly one
4517 * during the VOP_REMOVE/RMDIR and it didn't change during
4518 * the VOP_FSYNC.
4519 */
4520 if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4521 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4522 resp->cinfo.atomic = TRUE;
4523 else
4524 resp->cinfo.atomic = FALSE;
4525
4526 *cs->statusp = resp->status = NFS4_OK;
4527
4528 out:
4529 DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4530 REMOVE4res *, resp);
4531 }
4532
4533 /*
4534 * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4535 * oldname and newname.
4536 * res: status. If success - CURRENT_FH unchanged, return change_info
4537 * for both from and target directories.
4538 */
4539 /* ARGSUSED */
4540 static void
rfs4_op_rename(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)4541 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4542 struct compound_state *cs)
4543 {
4544 RENAME4args *args = &argop->nfs_argop4_u.oprename;
4545 RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4546 int error;
4547 vnode_t *odvp;
4548 vnode_t *ndvp;
4549 vnode_t *srcvp, *targvp, *tvp;
4550 struct vattr obdva, oidva, oadva;
4551 struct vattr nbdva, nidva, nadva;
4552 char *onm, *nnm;
4553 uint_t olen, nlen;
4554 rfs4_file_t *fp, *sfp;
4555 int in_crit_src, in_crit_targ;
4556 int fp_rele_grant_hold, sfp_rele_grant_hold;
4557 int unlinked;
4558 bslabel_t *clabel;
4559 struct sockaddr *ca;
4560 char *converted_onm = NULL;
4561 char *converted_nnm = NULL;
4562 nfsstat4 status;
4563
4564 DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4565 RENAME4args *, args);
4566
4567 fp = sfp = NULL;
4568 srcvp = targvp = tvp = NULL;
4569 in_crit_src = in_crit_targ = 0;
4570 fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4571 unlinked = 0;
4572
4573 /* CURRENT_FH: target directory */
4574 ndvp = cs->vp;
4575 if (ndvp == NULL) {
4576 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4577 goto out;
4578 }
4579
4580 /* SAVED_FH: from directory */
4581 odvp = cs->saved_vp;
4582 if (odvp == NULL) {
4583 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4584 goto out;
4585 }
4586
4587 if (cs->access == CS_ACCESS_DENIED) {
4588 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4589 goto out;
4590 }
4591
4592 /*
4593 * If there is an unshared filesystem mounted on this vnode,
4594 * do not allow to rename objects in this directory.
4595 */
4596 if (vn_ismntpt(odvp)) {
4597 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4598 goto out;
4599 }
4600
4601 /*
4602 * If there is an unshared filesystem mounted on this vnode,
4603 * do not allow to rename to this directory.
4604 */
4605 if (vn_ismntpt(ndvp)) {
4606 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4607 goto out;
4608 }
4609
4610 if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4611 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4612 goto out;
4613 }
4614
4615 if (cs->saved_exi != cs->exi) {
4616 *cs->statusp = resp->status = NFS4ERR_XDEV;
4617 goto out;
4618 }
4619
4620 status = utf8_dir_verify(&args->oldname);
4621 if (status != NFS4_OK) {
4622 *cs->statusp = resp->status = status;
4623 goto out;
4624 }
4625
4626 status = utf8_dir_verify(&args->newname);
4627 if (status != NFS4_OK) {
4628 *cs->statusp = resp->status = status;
4629 goto out;
4630 }
4631
4632 onm = utf8_to_fn(&args->oldname, &olen, NULL);
4633 if (onm == NULL) {
4634 *cs->statusp = resp->status = NFS4ERR_INVAL;
4635 goto out;
4636 }
4637 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4638 nlen = MAXPATHLEN + 1;
4639 converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4640 nlen);
4641
4642 if (converted_onm == NULL) {
4643 *cs->statusp = resp->status = NFS4ERR_INVAL;
4644 kmem_free(onm, olen);
4645 goto out;
4646 }
4647
4648 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4649 if (nnm == NULL) {
4650 *cs->statusp = resp->status = NFS4ERR_INVAL;
4651 if (onm != converted_onm)
4652 kmem_free(converted_onm, MAXPATHLEN + 1);
4653 kmem_free(onm, olen);
4654 goto out;
4655 }
4656 converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4657 MAXPATHLEN + 1);
4658
4659 if (converted_nnm == NULL) {
4660 *cs->statusp = resp->status = NFS4ERR_INVAL;
4661 kmem_free(nnm, nlen);
4662 nnm = NULL;
4663 if (onm != converted_onm)
4664 kmem_free(converted_onm, MAXPATHLEN + 1);
4665 kmem_free(onm, olen);
4666 goto out;
4667 }
4668
4669
4670 if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4671 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4672 kmem_free(onm, olen);
4673 kmem_free(nnm, nlen);
4674 goto out;
4675 }
4676
4677
4678 if (rdonly4(req, cs)) {
4679 *cs->statusp = resp->status = NFS4ERR_ROFS;
4680 if (onm != converted_onm)
4681 kmem_free(converted_onm, MAXPATHLEN + 1);
4682 kmem_free(onm, olen);
4683 if (nnm != converted_nnm)
4684 kmem_free(converted_nnm, MAXPATHLEN + 1);
4685 kmem_free(nnm, nlen);
4686 goto out;
4687 }
4688
4689 /* check label of the target dir */
4690 if (is_system_labeled()) {
4691 ASSERT(req->rq_label != NULL);
4692 clabel = req->rq_label;
4693 DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4694 "got client label from request(1)",
4695 struct svc_req *, req);
4696 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4697 if (!do_rfs_label_check(clabel, ndvp,
4698 EQUALITY_CHECK, cs->exi)) {
4699 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4700 goto err_out;
4701 }
4702 }
4703 }
4704
4705 /*
4706 * Is the source a file and have a delegation?
4707 * We don't need to acquire va_seq before these lookups, if
4708 * it causes an update, cinfo.before will not match, which will
4709 * trigger a cache flush even if atomic is TRUE.
4710 */
4711 sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4712 &error, cs->cr);
4713 if (sfp != NULL) {
4714 if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4715 NULL)) {
4716 *cs->statusp = resp->status = NFS4ERR_DELAY;
4717 goto err_out;
4718 }
4719 }
4720
4721 if (srcvp == NULL) {
4722 *cs->statusp = resp->status = puterrno4(error);
4723 if (onm != converted_onm)
4724 kmem_free(converted_onm, MAXPATHLEN + 1);
4725 kmem_free(onm, olen);
4726 if (nnm != converted_nnm)
4727 kmem_free(converted_nnm, MAXPATHLEN + 1);
4728 kmem_free(nnm, nlen);
4729 goto out;
4730 }
4731
4732 sfp_rele_grant_hold = 1;
4733
4734 /* Does the destination exist and a file and have a delegation? */
4735 fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp, NULL,
4736 cs->cr);
4737 if (fp != NULL) {
4738 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4739 NULL)) {
4740 *cs->statusp = resp->status = NFS4ERR_DELAY;
4741 goto err_out;
4742 }
4743 }
4744 fp_rele_grant_hold = 1;
4745
4746 /* Check for NBMAND lock on both source and target */
4747 if (nbl_need_check(srcvp)) {
4748 nbl_start_crit(srcvp, RW_READER);
4749 in_crit_src = 1;
4750 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4751 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4752 goto err_out;
4753 }
4754 }
4755
4756 if (targvp && nbl_need_check(targvp)) {
4757 nbl_start_crit(targvp, RW_READER);
4758 in_crit_targ = 1;
4759 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4760 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4761 goto err_out;
4762 }
4763 }
4764
4765 /* Get source "before" change value */
4766 obdva.va_mask = AT_CTIME|AT_SEQ;
4767 error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4768 if (!error) {
4769 nbdva.va_mask = AT_CTIME|AT_SEQ;
4770 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4771 }
4772 if (error) {
4773 *cs->statusp = resp->status = puterrno4(error);
4774 goto err_out;
4775 }
4776
4777 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4778 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4779
4780 error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm, cs->cr,
4781 NULL, 0);
4782
4783 /*
4784 * If target existed and was unlinked by VOP_RENAME, state will need
4785 * closed. To avoid deadlock, rfs4_close_all_state will be done after
4786 * any necessary nbl_end_crit on srcvp and tgtvp.
4787 */
4788 if (error == 0 && fp != NULL) {
4789 rfs4_dbe_lock(fp->rf_dbe);
4790 tvp = fp->rf_vp;
4791 if (tvp)
4792 VN_HOLD(tvp);
4793 rfs4_dbe_unlock(fp->rf_dbe);
4794
4795 if (tvp) {
4796 struct vattr va;
4797 va.va_mask = AT_NLINK;
4798
4799 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4800 va.va_nlink == 0) {
4801 unlinked = 1;
4802
4803 /* DEBUG data */
4804 if ((srcvp == targvp) || (tvp != targvp)) {
4805 cmn_err(CE_WARN, "rfs4_op_rename: "
4806 "srcvp %p, targvp: %p, tvp: %p",
4807 (void *)srcvp, (void *)targvp,
4808 (void *)tvp);
4809 }
4810 } else {
4811 VN_RELE(tvp);
4812 }
4813 }
4814 }
4815 if (error == 0)
4816 vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4817
4818 if (in_crit_src)
4819 nbl_end_crit(srcvp);
4820 if (srcvp)
4821 VN_RELE(srcvp);
4822 if (in_crit_targ)
4823 nbl_end_crit(targvp);
4824 if (targvp)
4825 VN_RELE(targvp);
4826
4827 if (unlinked) {
4828 ASSERT(fp != NULL);
4829 ASSERT(tvp != NULL);
4830
4831 /* DEBUG data */
4832 if (RW_READ_HELD(&tvp->v_nbllock)) {
4833 cmn_err(CE_WARN, "rfs4_op_rename: "
4834 "RW_READ_HELD(%p)", (void *)tvp);
4835 }
4836
4837 /* The file is gone and so should the state */
4838 rfs4_close_all_state(fp);
4839 VN_RELE(tvp);
4840 }
4841
4842 if (sfp) {
4843 rfs4_clear_dont_grant(sfp);
4844 rfs4_file_rele(sfp);
4845 }
4846 if (fp) {
4847 rfs4_clear_dont_grant(fp);
4848 rfs4_file_rele(fp);
4849 }
4850
4851 if (converted_onm != onm)
4852 kmem_free(converted_onm, MAXPATHLEN + 1);
4853 kmem_free(onm, olen);
4854 if (converted_nnm != nnm)
4855 kmem_free(converted_nnm, MAXPATHLEN + 1);
4856 kmem_free(nnm, nlen);
4857
4858 /*
4859 * Get the initial "after" sequence number, if it fails, set to zero
4860 */
4861 oidva.va_mask = AT_SEQ;
4862 if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4863 oidva.va_seq = 0;
4864
4865 nidva.va_mask = AT_SEQ;
4866 if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4867 nidva.va_seq = 0;
4868
4869 /*
4870 * Force modified data and metadata out to stable storage.
4871 */
4872 (void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4873 (void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4874
4875 if (error) {
4876 *cs->statusp = resp->status = puterrno4(error);
4877 goto out;
4878 }
4879
4880 /*
4881 * Get "after" change values, if it fails, simply return the
4882 * before value.
4883 */
4884 oadva.va_mask = AT_CTIME|AT_SEQ;
4885 if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4886 oadva.va_ctime = obdva.va_ctime;
4887 oadva.va_seq = 0;
4888 }
4889
4890 nadva.va_mask = AT_CTIME|AT_SEQ;
4891 if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4892 nadva.va_ctime = nbdva.va_ctime;
4893 nadva.va_seq = 0;
4894 }
4895
4896 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4897 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4898
4899 /*
4900 * The cinfo.atomic = TRUE only if we have
4901 * non-zero va_seq's, and it has incremented by exactly one
4902 * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4903 */
4904 if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4905 oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4906 resp->source_cinfo.atomic = TRUE;
4907 else
4908 resp->source_cinfo.atomic = FALSE;
4909
4910 if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4911 nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4912 resp->target_cinfo.atomic = TRUE;
4913 else
4914 resp->target_cinfo.atomic = FALSE;
4915
4916 #ifdef VOLATILE_FH_TEST
4917 {
4918 extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4919
4920 /*
4921 * Add the renamed file handle to the volatile rename list
4922 */
4923 if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4924 /* file handles may expire on rename */
4925 vnode_t *vp;
4926
4927 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4928 /*
4929 * Already know that nnm will be a valid string
4930 */
4931 error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4932 NULL, NULL, NULL);
4933 kmem_free(nnm, nlen);
4934 if (!error) {
4935 add_volrnm_fh(cs->exi, vp);
4936 VN_RELE(vp);
4937 }
4938 }
4939 }
4940 #endif /* VOLATILE_FH_TEST */
4941
4942 *cs->statusp = resp->status = NFS4_OK;
4943 out:
4944 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4945 RENAME4res *, resp);
4946 return;
4947
4948 err_out:
4949 if (onm != converted_onm)
4950 kmem_free(converted_onm, MAXPATHLEN + 1);
4951 if (onm != NULL)
4952 kmem_free(onm, olen);
4953 if (nnm != converted_nnm)
4954 kmem_free(converted_nnm, MAXPATHLEN + 1);
4955 if (nnm != NULL)
4956 kmem_free(nnm, nlen);
4957
4958 if (in_crit_src) nbl_end_crit(srcvp);
4959 if (in_crit_targ) nbl_end_crit(targvp);
4960 if (targvp) VN_RELE(targvp);
4961 if (srcvp) VN_RELE(srcvp);
4962 if (sfp) {
4963 if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4964 rfs4_file_rele(sfp);
4965 }
4966 if (fp) {
4967 if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4968 rfs4_file_rele(fp);
4969 }
4970
4971 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4972 RENAME4res *, resp);
4973 }
4974
4975 /* ARGSUSED */
4976 static void
rfs4_op_renew(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)4977 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4978 struct compound_state *cs)
4979 {
4980 RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4981 RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4982 rfs4_client_t *cp;
4983
4984 DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4985 RENEW4args *, args);
4986
4987 if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4988 *cs->statusp = resp->status =
4989 rfs4_check_clientid(&args->clientid, 0);
4990 goto out;
4991 }
4992
4993 if (rfs4_lease_expired(cp)) {
4994 rfs4_client_rele(cp);
4995 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4996 goto out;
4997 }
4998
4999 rfs4_update_lease(cp);
5000
5001 mutex_enter(cp->rc_cbinfo.cb_lock);
5002 if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
5003 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
5004 *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
5005 } else {
5006 *cs->statusp = resp->status = NFS4_OK;
5007 }
5008 mutex_exit(cp->rc_cbinfo.cb_lock);
5009
5010 rfs4_client_rele(cp);
5011
5012 out:
5013 DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
5014 RENEW4res *, resp);
5015 }
5016
5017 /* ARGSUSED */
5018 static void
rfs4_op_restorefh(nfs_argop4 * args,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)5019 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
5020 struct compound_state *cs)
5021 {
5022 RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
5023
5024 DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
5025
5026 /* No need to check cs->access - we are not accessing any object */
5027 if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
5028 *cs->statusp = resp->status = NFS4ERR_RESTOREFH;
5029 goto out;
5030 }
5031 if (cs->vp != NULL) {
5032 VN_RELE(cs->vp);
5033 }
5034 cs->vp = cs->saved_vp;
5035 cs->saved_vp = NULL;
5036 cs->exi = cs->saved_exi;
5037 nfs_fh4_copy(&cs->saved_fh, &cs->fh);
5038 *cs->statusp = resp->status = NFS4_OK;
5039 cs->deleg = FALSE;
5040
5041 out:
5042 DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
5043 RESTOREFH4res *, resp);
5044 }
5045
5046 /* ARGSUSED */
5047 static void
rfs4_op_savefh(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)5048 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5049 struct compound_state *cs)
5050 {
5051 SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
5052
5053 DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
5054
5055 /* No need to check cs->access - we are not accessing any object */
5056 if (cs->vp == NULL) {
5057 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5058 goto out;
5059 }
5060 if (cs->saved_vp != NULL) {
5061 VN_RELE(cs->saved_vp);
5062 }
5063 cs->saved_vp = cs->vp;
5064 VN_HOLD(cs->saved_vp);
5065 cs->saved_exi = cs->exi;
5066 /*
5067 * since SAVEFH is fairly rare, don't alloc space for its fh
5068 * unless necessary.
5069 */
5070 if (cs->saved_fh.nfs_fh4_val == NULL) {
5071 cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
5072 }
5073 nfs_fh4_copy(&cs->fh, &cs->saved_fh);
5074 *cs->statusp = resp->status = NFS4_OK;
5075
5076 out:
5077 DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
5078 SAVEFH4res *, resp);
5079 }
5080
5081 /*
5082 * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
5083 * return the bitmap of attrs that were set successfully. It is also
5084 * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
5085 * always be called only after rfs4_do_set_attrs().
5086 *
5087 * Verify that the attributes are same as the expected ones. sargp->vap
5088 * and sargp->sbp contain the input attributes as translated from fattr4.
5089 *
5090 * This function verifies only the attrs that correspond to a vattr or
5091 * vfsstat struct. That is because of the extra step needed to get the
5092 * corresponding system structs. Other attributes have already been set or
5093 * verified by do_rfs4_set_attrs.
5094 *
5095 * Return 0 if all attrs match, -1 if some don't, error if error processing.
5096 */
5097 static int
rfs4_verify_attr(struct nfs4_svgetit_arg * sargp,bitmap4 * resp,struct nfs4_ntov_table * ntovp)5098 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
5099 bitmap4 *resp, struct nfs4_ntov_table *ntovp)
5100 {
5101 int error, ret_error = 0;
5102 int i, k;
5103 uint_t sva_mask = sargp->vap->va_mask;
5104 uint_t vbit;
5105 union nfs4_attr_u *na;
5106 uint8_t *amap;
5107 bool_t getsb = ntovp->vfsstat;
5108
5109 if (sva_mask != 0) {
5110 /*
5111 * Okay to overwrite sargp->vap because we verify based
5112 * on the incoming values.
5113 */
5114 ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
5115 sargp->cs->cr, NULL);
5116 if (ret_error) {
5117 if (resp == NULL)
5118 return (ret_error);
5119 /*
5120 * Must return bitmap of successful attrs
5121 */
5122 sva_mask = 0; /* to prevent checking vap later */
5123 } else {
5124 /*
5125 * Some file systems clobber va_mask. it is probably
5126 * wrong of them to do so, nonethless we practice
5127 * defensive coding.
5128 * See bug id 4276830.
5129 */
5130 sargp->vap->va_mask = sva_mask;
5131 }
5132 }
5133
5134 if (getsb) {
5135 /*
5136 * Now get the superblock and loop on the bitmap, as there is
5137 * no simple way of translating from superblock to bitmap4.
5138 */
5139 ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
5140 if (ret_error) {
5141 if (resp == NULL)
5142 goto errout;
5143 getsb = FALSE;
5144 }
5145 }
5146
5147 /*
5148 * Now loop and verify each attribute which getattr returned
5149 * whether it's the same as the input.
5150 */
5151 if (resp == NULL && !getsb && (sva_mask == 0))
5152 goto errout;
5153
5154 na = ntovp->na;
5155 amap = ntovp->amap;
5156 k = 0;
5157 for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
5158 k = *amap;
5159 ASSERT(nfs4_ntov_map[k].nval == k);
5160 vbit = nfs4_ntov_map[k].vbit;
5161
5162 /*
5163 * If vattr attribute but VOP_GETATTR failed, or it's
5164 * superblock attribute but VFS_STATVFS failed, skip
5165 */
5166 if (vbit) {
5167 if ((vbit & sva_mask) == 0)
5168 continue;
5169 } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
5170 continue;
5171 }
5172 error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
5173 if (resp != NULL) {
5174 if (error)
5175 ret_error = -1; /* not all match */
5176 else /* update response bitmap */
5177 *resp |= nfs4_ntov_map[k].fbit;
5178 continue;
5179 }
5180 if (error) {
5181 ret_error = -1; /* not all match */
5182 break;
5183 }
5184 }
5185 errout:
5186 return (ret_error);
5187 }
5188
5189 /*
5190 * Decode the attribute to be set/verified. If the attr requires a sys op
5191 * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
5192 * call the sv_getit function for it, because the sys op hasn't yet been done.
5193 * Return 0 for success, error code if failed.
5194 *
5195 * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
5196 */
5197 static int
decode_fattr4_attr(nfs4_attr_cmd_t cmd,struct nfs4_svgetit_arg * sargp,int k,XDR * xdrp,bitmap4 * resp_bval,union nfs4_attr_u * nap)5198 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
5199 int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
5200 {
5201 int error = 0;
5202 bool_t set_later;
5203
5204 sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
5205
5206 if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
5207 set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
5208 /*
5209 * don't verify yet if a vattr or sb dependent attr,
5210 * because we don't have their sys values yet.
5211 * Will be done later.
5212 */
5213 if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
5214 /*
5215 * ACLs are a special case, since setting the MODE
5216 * conflicts with setting the ACL. We delay setting
5217 * the ACL until all other attributes have been set.
5218 * The ACL gets set in do_rfs4_op_setattr().
5219 */
5220 if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
5221 error = (*nfs4_ntov_map[k].sv_getit)(cmd,
5222 sargp, nap);
5223 if (error) {
5224 xdr_free(nfs4_ntov_map[k].xfunc,
5225 (caddr_t)nap);
5226 }
5227 }
5228 }
5229 } else {
5230 #ifdef DEBUG
5231 cmn_err(CE_NOTE, "decode_fattr4_attr: error "
5232 "decoding attribute %d\n", k);
5233 #endif
5234 error = EINVAL;
5235 }
5236 if (!error && resp_bval && !set_later) {
5237 *resp_bval |= nfs4_ntov_map[k].fbit;
5238 }
5239
5240 return (error);
5241 }
5242
5243 /*
5244 * Set vattr based on incoming fattr4 attrs - used by setattr.
5245 * Set response mask. Ignore any values that are not writable vattr attrs.
5246 */
5247 static nfsstat4
do_rfs4_set_attrs(bitmap4 * resp,fattr4 * fattrp,struct compound_state * cs,struct nfs4_svgetit_arg * sargp,struct nfs4_ntov_table * ntovp,nfs4_attr_cmd_t cmd)5248 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5249 struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
5250 nfs4_attr_cmd_t cmd)
5251 {
5252 int error = 0;
5253 int i;
5254 char *attrs = fattrp->attrlist4;
5255 uint32_t attrslen = fattrp->attrlist4_len;
5256 XDR xdr;
5257 nfsstat4 status = NFS4_OK;
5258 vnode_t *vp = cs->vp;
5259 union nfs4_attr_u *na;
5260 uint8_t *amap;
5261
5262 #ifndef lint
5263 /*
5264 * Make sure that maximum attribute number can be expressed as an
5265 * 8 bit quantity.
5266 */
5267 ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
5268 #endif
5269
5270 if (vp == NULL) {
5271 if (resp)
5272 *resp = 0;
5273 return (NFS4ERR_NOFILEHANDLE);
5274 }
5275 if (cs->access == CS_ACCESS_DENIED) {
5276 if (resp)
5277 *resp = 0;
5278 return (NFS4ERR_ACCESS);
5279 }
5280
5281 sargp->op = cmd;
5282 sargp->cs = cs;
5283 sargp->flag = 0; /* may be set later */
5284 sargp->vap->va_mask = 0;
5285 sargp->rdattr_error = NFS4_OK;
5286 sargp->rdattr_error_req = FALSE;
5287 /* sargp->sbp is set by the caller */
5288
5289 xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
5290
5291 na = ntovp->na;
5292 amap = ntovp->amap;
5293
5294 /*
5295 * The following loop iterates on the nfs4_ntov_map checking
5296 * if the fbit is set in the requested bitmap.
5297 * If set then we process the arguments using the
5298 * rfs4_fattr4 conversion functions to populate the setattr
5299 * vattr and va_mask. Any settable attrs that are not using vattr
5300 * will be set in this loop.
5301 */
5302 for (i = 0; i < nfs4_ntov_map_size; i++) {
5303 if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5304 continue;
5305 }
5306 /*
5307 * If setattr, must be a writable attr.
5308 * If verify/nverify, must be a readable attr.
5309 */
5310 if ((error = (*nfs4_ntov_map[i].sv_getit)(
5311 NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5312 /*
5313 * Client tries to set/verify an
5314 * unsupported attribute, tries to set
5315 * a read only attr or verify a write
5316 * only one - error!
5317 */
5318 break;
5319 }
5320 /*
5321 * Decode the attribute to set/verify
5322 */
5323 error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5324 &xdr, resp ? resp : NULL, na);
5325 if (error)
5326 break;
5327 *amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5328 na++;
5329 (ntovp->attrcnt)++;
5330 if (nfs4_ntov_map[i].vfsstat)
5331 ntovp->vfsstat = TRUE;
5332 }
5333
5334 if (error != 0)
5335 status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5336 puterrno4(error));
5337 /* xdrmem_destroy(&xdrs); */ /* NO-OP */
5338 return (status);
5339 }
5340
5341 static nfsstat4
do_rfs4_op_setattr(bitmap4 * resp,fattr4 * fattrp,struct compound_state * cs,stateid4 * stateid)5342 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5343 stateid4 *stateid)
5344 {
5345 int error = 0;
5346 struct nfs4_svgetit_arg sarg;
5347 bool_t trunc;
5348
5349 nfsstat4 status = NFS4_OK;
5350 cred_t *cr = cs->cr;
5351 vnode_t *vp = cs->vp;
5352 struct nfs4_ntov_table ntov;
5353 struct statvfs64 sb;
5354 struct vattr bva;
5355 struct flock64 bf;
5356 int in_crit = 0;
5357 uint_t saved_mask = 0;
5358 caller_context_t ct;
5359
5360 *resp = 0;
5361 sarg.sbp = &sb;
5362 sarg.is_referral = B_FALSE;
5363 nfs4_ntov_table_init(&ntov);
5364 status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5365 NFS4ATTR_SETIT);
5366 if (status != NFS4_OK) {
5367 /*
5368 * failed set attrs
5369 */
5370 goto done;
5371 }
5372
5373 if ((sarg.vap->va_mask == 0) &&
5374 (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5375 /*
5376 * no further work to be done
5377 */
5378 goto done;
5379 }
5380
5381 /*
5382 * If we got a request to set the ACL and the MODE, only
5383 * allow changing VSUID, VSGID, and VSVTX. Attempting
5384 * to change any other bits, along with setting an ACL,
5385 * gives NFS4ERR_INVAL.
5386 */
5387 if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5388 (fattrp->attrmask & FATTR4_MODE_MASK)) {
5389 vattr_t va;
5390
5391 va.va_mask = AT_MODE;
5392 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5393 if (error) {
5394 status = puterrno4(error);
5395 goto done;
5396 }
5397 if ((sarg.vap->va_mode ^ va.va_mode) &
5398 ~(VSUID | VSGID | VSVTX)) {
5399 status = NFS4ERR_INVAL;
5400 goto done;
5401 }
5402 }
5403
5404 /* Check stateid only if size has been set */
5405 if (sarg.vap->va_mask & AT_SIZE) {
5406 trunc = (sarg.vap->va_size == 0);
5407 status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5408 trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct, cs);
5409 if (status != NFS4_OK)
5410 goto done;
5411 } else {
5412 ct.cc_sysid = 0;
5413 ct.cc_pid = 0;
5414 ct.cc_caller_id = nfs4_srv_caller_id;
5415 ct.cc_flags = CC_DONTBLOCK;
5416 }
5417
5418 /* XXX start of possible race with delegations */
5419
5420 /*
5421 * We need to specially handle size changes because it is
5422 * possible for the client to create a file with read-only
5423 * modes, but with the file opened for writing. If the client
5424 * then tries to set the file size, e.g. ftruncate(3C),
5425 * fcntl(F_FREESP), the normal access checking done in
5426 * VOP_SETATTR would prevent the client from doing it even though
5427 * it should be allowed to do so. To get around this, we do the
5428 * access checking for ourselves and use VOP_SPACE which doesn't
5429 * do the access checking.
5430 * Also the client should not be allowed to change the file
5431 * size if there is a conflicting non-blocking mandatory lock in
5432 * the region of the change.
5433 */
5434 if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5435 u_offset_t offset;
5436 ssize_t length;
5437
5438 /*
5439 * ufs_setattr clears AT_SIZE from vap->va_mask, but
5440 * before returning, sarg.vap->va_mask is used to
5441 * generate the setattr reply bitmap. We also clear
5442 * AT_SIZE below before calling VOP_SPACE. For both
5443 * of these cases, the va_mask needs to be saved here
5444 * and restored after calling VOP_SETATTR.
5445 */
5446 saved_mask = sarg.vap->va_mask;
5447
5448 /*
5449 * Check any possible conflict due to NBMAND locks.
5450 * Get into critical region before VOP_GETATTR, so the
5451 * size attribute is valid when checking conflicts.
5452 */
5453 if (nbl_need_check(vp)) {
5454 nbl_start_crit(vp, RW_READER);
5455 in_crit = 1;
5456 }
5457
5458 bva.va_mask = AT_UID|AT_SIZE;
5459 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5460 if (error != 0) {
5461 status = puterrno4(error);
5462 goto done;
5463 }
5464
5465 if (in_crit) {
5466 if (sarg.vap->va_size < bva.va_size) {
5467 offset = sarg.vap->va_size;
5468 length = bva.va_size - sarg.vap->va_size;
5469 } else {
5470 offset = bva.va_size;
5471 length = sarg.vap->va_size - bva.va_size;
5472 }
5473 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5474 &ct)) {
5475 status = NFS4ERR_LOCKED;
5476 goto done;
5477 }
5478 }
5479
5480 if (crgetuid(cr) == bva.va_uid) {
5481 sarg.vap->va_mask &= ~AT_SIZE;
5482 bf.l_type = F_WRLCK;
5483 bf.l_whence = 0;
5484 bf.l_start = (off64_t)sarg.vap->va_size;
5485 bf.l_len = 0;
5486 bf.l_sysid = 0;
5487 bf.l_pid = 0;
5488 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5489 (offset_t)sarg.vap->va_size, cr, &ct);
5490 }
5491 }
5492
5493 if (!error && sarg.vap->va_mask != 0)
5494 error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5495
5496 /* restore va_mask -- ufs_setattr clears AT_SIZE */
5497 if (saved_mask & AT_SIZE)
5498 sarg.vap->va_mask |= AT_SIZE;
5499
5500 /*
5501 * If an ACL was being set, it has been delayed until now,
5502 * in order to set the mode (via the VOP_SETATTR() above) first.
5503 */
5504 if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5505 int i;
5506
5507 for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5508 if (ntov.amap[i] == FATTR4_ACL)
5509 break;
5510 if (i < NFS4_MAXNUM_ATTRS) {
5511 error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5512 NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5513 if (error == 0) {
5514 *resp |= FATTR4_ACL_MASK;
5515 } else if (error == ENOTSUP) {
5516 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5517 status = NFS4ERR_ATTRNOTSUPP;
5518 goto done;
5519 }
5520 } else {
5521 NFS4_DEBUG(rfs4_debug,
5522 (CE_NOTE, "do_rfs4_op_setattr: "
5523 "unable to find ACL in fattr4"));
5524 error = EINVAL;
5525 }
5526 }
5527
5528 if (error) {
5529 /* check if a monitor detected a delegation conflict */
5530 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5531 status = NFS4ERR_DELAY;
5532 else
5533 status = puterrno4(error);
5534
5535 /*
5536 * Set the response bitmap when setattr failed.
5537 * If VOP_SETATTR partially succeeded, test by doing a
5538 * VOP_GETATTR on the object and comparing the data
5539 * to the setattr arguments.
5540 */
5541 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5542 } else {
5543 /*
5544 * Force modified metadata out to stable storage.
5545 */
5546 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5547 /*
5548 * Set response bitmap
5549 */
5550 nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5551 }
5552
5553 /* Return early and already have a NFSv4 error */
5554 done:
5555 /*
5556 * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5557 * conversion sets both readable and writeable NFS4 attrs
5558 * for AT_MTIME and AT_ATIME. The line below masks out
5559 * unrequested attrs from the setattr result bitmap. This
5560 * is placed after the done: label to catch the ATTRNOTSUP
5561 * case.
5562 */
5563 *resp &= fattrp->attrmask;
5564
5565 if (in_crit)
5566 nbl_end_crit(vp);
5567
5568 nfs4_ntov_table_free(&ntov, &sarg);
5569
5570 return (status);
5571 }
5572
5573 /* ARGSUSED */
5574 static void
rfs4_op_setattr(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)5575 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5576 struct compound_state *cs)
5577 {
5578 SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5579 SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5580 bslabel_t *clabel;
5581
5582 DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5583 SETATTR4args *, args);
5584
5585 if (cs->vp == NULL) {
5586 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5587 goto out;
5588 }
5589
5590 /*
5591 * If there is an unshared filesystem mounted on this vnode,
5592 * do not allow to setattr on this vnode.
5593 */
5594 if (vn_ismntpt(cs->vp)) {
5595 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5596 goto out;
5597 }
5598
5599 resp->attrsset = 0;
5600
5601 if (rdonly4(req, cs)) {
5602 *cs->statusp = resp->status = NFS4ERR_ROFS;
5603 goto out;
5604 }
5605
5606 /* check label before setting attributes */
5607 if (is_system_labeled()) {
5608 ASSERT(req->rq_label != NULL);
5609 clabel = req->rq_label;
5610 DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5611 "got client label from request(1)",
5612 struct svc_req *, req);
5613 if (!blequal(&l_admin_low->tsl_label, clabel)) {
5614 if (!do_rfs_label_check(clabel, cs->vp,
5615 EQUALITY_CHECK, cs->exi)) {
5616 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5617 goto out;
5618 }
5619 }
5620 }
5621
5622 *cs->statusp = resp->status =
5623 do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5624 &args->stateid);
5625
5626 out:
5627 DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5628 SETATTR4res *, resp);
5629 }
5630
5631 /* ARGSUSED */
5632 static void
rfs4_op_verify(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)5633 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5634 struct compound_state *cs)
5635 {
5636 /*
5637 * verify and nverify are exactly the same, except that nverify
5638 * succeeds when some argument changed, and verify succeeds when
5639 * when none changed.
5640 */
5641
5642 VERIFY4args *args = &argop->nfs_argop4_u.opverify;
5643 VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5644
5645 int error;
5646 struct nfs4_svgetit_arg sarg;
5647 struct statvfs64 sb;
5648 struct nfs4_ntov_table ntov;
5649
5650 DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5651 VERIFY4args *, args);
5652
5653 if (cs->vp == NULL) {
5654 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5655 goto out;
5656 }
5657
5658 sarg.sbp = &sb;
5659 sarg.is_referral = B_FALSE;
5660 nfs4_ntov_table_init(&ntov);
5661 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5662 &sarg, &ntov, NFS4ATTR_VERIT);
5663 if (resp->status != NFS4_OK) {
5664 /*
5665 * do_rfs4_set_attrs will try to verify systemwide attrs,
5666 * so could return -1 for "no match".
5667 */
5668 if (resp->status == -1)
5669 resp->status = NFS4ERR_NOT_SAME;
5670 goto done;
5671 }
5672 error = rfs4_verify_attr(&sarg, NULL, &ntov);
5673 switch (error) {
5674 case 0:
5675 resp->status = NFS4_OK;
5676 break;
5677 case -1:
5678 resp->status = NFS4ERR_NOT_SAME;
5679 break;
5680 default:
5681 resp->status = puterrno4(error);
5682 break;
5683 }
5684 done:
5685 *cs->statusp = resp->status;
5686 nfs4_ntov_table_free(&ntov, &sarg);
5687 out:
5688 DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5689 VERIFY4res *, resp);
5690 }
5691
5692 /* ARGSUSED */
5693 static void
rfs4_op_nverify(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)5694 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5695 struct compound_state *cs)
5696 {
5697 /*
5698 * verify and nverify are exactly the same, except that nverify
5699 * succeeds when some argument changed, and verify succeeds when
5700 * when none changed.
5701 */
5702
5703 NVERIFY4args *args = &argop->nfs_argop4_u.opnverify;
5704 NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5705
5706 int error;
5707 struct nfs4_svgetit_arg sarg;
5708 struct statvfs64 sb;
5709 struct nfs4_ntov_table ntov;
5710
5711 DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5712 NVERIFY4args *, args);
5713
5714 if (cs->vp == NULL) {
5715 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5716 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5717 NVERIFY4res *, resp);
5718 return;
5719 }
5720 sarg.sbp = &sb;
5721 sarg.is_referral = B_FALSE;
5722 nfs4_ntov_table_init(&ntov);
5723 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5724 &sarg, &ntov, NFS4ATTR_VERIT);
5725 if (resp->status != NFS4_OK) {
5726 /*
5727 * do_rfs4_set_attrs will try to verify systemwide attrs,
5728 * so could return -1 for "no match".
5729 */
5730 if (resp->status == -1)
5731 resp->status = NFS4_OK;
5732 goto done;
5733 }
5734 error = rfs4_verify_attr(&sarg, NULL, &ntov);
5735 switch (error) {
5736 case 0:
5737 resp->status = NFS4ERR_SAME;
5738 break;
5739 case -1:
5740 resp->status = NFS4_OK;
5741 break;
5742 default:
5743 resp->status = puterrno4(error);
5744 break;
5745 }
5746 done:
5747 *cs->statusp = resp->status;
5748 nfs4_ntov_table_free(&ntov, &sarg);
5749
5750 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5751 NVERIFY4res *, resp);
5752 }
5753
5754 /*
5755 * XXX - This should live in an NFS header file.
5756 */
5757 #define MAX_IOVECS 12
5758
5759 /* ARGSUSED */
5760 static void
rfs4_op_write(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)5761 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5762 struct compound_state *cs)
5763 {
5764 WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5765 WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5766 int error;
5767 vnode_t *vp;
5768 struct vattr bva;
5769 u_offset_t rlimit;
5770 struct uio uio;
5771 struct iovec iov[MAX_IOVECS];
5772 struct iovec *iovp;
5773 int iovcnt;
5774 int ioflag;
5775 cred_t *savecred, *cr;
5776 bool_t *deleg = &cs->deleg;
5777 nfsstat4 stat;
5778 int in_crit = 0;
5779 caller_context_t ct;
5780 nfs4_srv_t *nsrv4;
5781
5782 DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5783 WRITE4args *, args);
5784
5785 vp = cs->vp;
5786 if (vp == NULL) {
5787 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5788 goto out;
5789 }
5790 if (cs->access == CS_ACCESS_DENIED) {
5791 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5792 goto out;
5793 }
5794
5795 cr = cs->cr;
5796
5797 if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5798 deleg, TRUE, &ct, cs)) != NFS4_OK) {
5799 *cs->statusp = resp->status = stat;
5800 goto out;
5801 }
5802
5803 /*
5804 * We have to enter the critical region before calling VOP_RWLOCK
5805 * to avoid a deadlock with ufs.
5806 */
5807 if (nbl_need_check(vp)) {
5808 nbl_start_crit(vp, RW_READER);
5809 in_crit = 1;
5810 if (nbl_conflict(vp, NBL_WRITE,
5811 args->offset, args->data_len, 0, &ct)) {
5812 *cs->statusp = resp->status = NFS4ERR_LOCKED;
5813 goto out;
5814 }
5815 }
5816
5817 bva.va_mask = AT_MODE | AT_UID;
5818 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5819
5820 /*
5821 * If we can't get the attributes, then we can't do the
5822 * right access checking. So, we'll fail the request.
5823 */
5824 if (error) {
5825 *cs->statusp = resp->status = puterrno4(error);
5826 goto out;
5827 }
5828
5829 if (rdonly4(req, cs)) {
5830 *cs->statusp = resp->status = NFS4ERR_ROFS;
5831 goto out;
5832 }
5833
5834 if (vp->v_type != VREG) {
5835 *cs->statusp = resp->status =
5836 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5837 goto out;
5838 }
5839
5840 if (crgetuid(cr) != bva.va_uid &&
5841 (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5842 *cs->statusp = resp->status = puterrno4(error);
5843 goto out;
5844 }
5845
5846 if (MANDLOCK(vp, bva.va_mode)) {
5847 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5848 goto out;
5849 }
5850
5851 nsrv4 = nfs4_get_srv();
5852 if (args->data_len == 0) {
5853 *cs->statusp = resp->status = NFS4_OK;
5854 resp->count = 0;
5855 resp->committed = args->stable;
5856 resp->writeverf = nsrv4->write4verf;
5857 goto out;
5858 }
5859
5860 if (args->mblk != NULL) {
5861 mblk_t *m;
5862 uint_t bytes, round_len;
5863
5864 iovcnt = 0;
5865 bytes = 0;
5866 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5867 for (m = args->mblk;
5868 m != NULL && bytes < round_len;
5869 m = m->b_cont) {
5870 iovcnt++;
5871 bytes += MBLKL(m);
5872 }
5873 #ifdef DEBUG
5874 /* should have ended on an mblk boundary */
5875 if (bytes != round_len) {
5876 printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5877 bytes, round_len, args->data_len);
5878 printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5879 (void *)args->mblk, (void *)m);
5880 ASSERT(bytes == round_len);
5881 }
5882 #endif
5883 if (iovcnt <= MAX_IOVECS) {
5884 iovp = iov;
5885 } else {
5886 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5887 }
5888 mblk_to_iov(args->mblk, iovcnt, iovp);
5889 } else if (args->rlist != NULL) {
5890 iovcnt = 1;
5891 iovp = iov;
5892 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5893 iovp->iov_len = args->data_len;
5894 } else {
5895 iovcnt = 1;
5896 iovp = iov;
5897 iovp->iov_base = args->data_val;
5898 iovp->iov_len = args->data_len;
5899 }
5900
5901 uio.uio_iov = iovp;
5902 uio.uio_iovcnt = iovcnt;
5903
5904 uio.uio_segflg = UIO_SYSSPACE;
5905 uio.uio_extflg = UIO_COPY_DEFAULT;
5906 uio.uio_loffset = args->offset;
5907 uio.uio_resid = args->data_len;
5908 uio.uio_llimit = curproc->p_fsz_ctl;
5909 rlimit = uio.uio_llimit - args->offset;
5910 if (rlimit < (u_offset_t)uio.uio_resid)
5911 uio.uio_resid = (int)rlimit;
5912
5913 if (args->stable == UNSTABLE4)
5914 ioflag = 0;
5915 else if (args->stable == FILE_SYNC4)
5916 ioflag = FSYNC;
5917 else if (args->stable == DATA_SYNC4)
5918 ioflag = FDSYNC;
5919 else {
5920 if (iovp != iov)
5921 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5922 *cs->statusp = resp->status = NFS4ERR_INVAL;
5923 goto out;
5924 }
5925
5926 /*
5927 * We're changing creds because VM may fault and we need
5928 * the cred of the current thread to be used if quota
5929 * checking is enabled.
5930 */
5931 savecred = curthread->t_cred;
5932 curthread->t_cred = cr;
5933 error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5934 curthread->t_cred = savecred;
5935
5936 if (iovp != iov)
5937 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5938
5939 if (error) {
5940 *cs->statusp = resp->status = puterrno4(error);
5941 goto out;
5942 }
5943
5944 *cs->statusp = resp->status = NFS4_OK;
5945 resp->count = args->data_len - uio.uio_resid;
5946
5947 if (ioflag == 0)
5948 resp->committed = UNSTABLE4;
5949 else
5950 resp->committed = FILE_SYNC4;
5951
5952 resp->writeverf = nsrv4->write4verf;
5953
5954 out:
5955 if (in_crit)
5956 nbl_end_crit(vp);
5957
5958 DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5959 WRITE4res *, resp);
5960 }
5961
5962 static inline int
rfs4_opnum_in_range(const compound_state_t * cs,int opnum)5963 rfs4_opnum_in_range(const compound_state_t *cs, int opnum)
5964 {
5965 if (opnum < FIRST_NFS4_OP || opnum > LAST_NFS4_OP)
5966 return (0);
5967 else if (cs->minorversion == 0 && opnum > LAST_NFS40_OP)
5968 return (0);
5969 else if (cs->minorversion == 1 && opnum > LAST_NFS41_OP)
5970 return (0);
5971 else if (cs->minorversion == 2 && opnum > LAST_NFS42_OP)
5972 return (0);
5973 return (1);
5974 }
5975
5976 void
rfs4_compound(COMPOUND4args * args,COMPOUND4res * resp,compound_state_t * cs,struct svc_req * req,int * rv)5977 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, compound_state_t *cs,
5978 struct svc_req *req, int *rv)
5979 {
5980 uint_t i;
5981 cred_t *cr;
5982 nfs4_srv_t *nsrv4;
5983 nfs_export_t *ne = nfs_get_export();
5984
5985 if (rv != NULL)
5986 *rv = 0;
5987 /*
5988 * Form a reply tag by copying over the request tag.
5989 */
5990 resp->tag.utf8string_len = args->tag.utf8string_len;
5991 if (args->tag.utf8string_len != 0) {
5992 resp->tag.utf8string_val =
5993 kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5994 bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5995 resp->tag.utf8string_len);
5996 } else {
5997 resp->tag.utf8string_val = NULL;
5998 }
5999
6000 cs->statusp = &resp->status;
6001 cs->req = req;
6002 cs->minorversion = args->minorversion;
6003 resp->array = NULL;
6004 resp->array_len = 0;
6005
6006 if (args->array_len == 0) {
6007 resp->status = NFS4_OK;
6008 return;
6009 }
6010
6011 cr = svc_xprt_cred(req->rq_xprt);
6012 ASSERT(cr != NULL);
6013
6014 if (sec_svc_getcred(req, cr, &cs->principal, &cs->nfsflavor) == 0) {
6015 DTRACE_NFSV4_2(compound__start, struct compound_state *,
6016 cs, COMPOUND4args *, args);
6017 DTRACE_NFSV4_2(compound__done, struct compound_state *,
6018 cs, COMPOUND4res *, resp);
6019 svcerr_badcred(req->rq_xprt);
6020 if (rv != NULL)
6021 *rv = 1;
6022 return;
6023 }
6024
6025 resp->array_len = args->array_len;
6026 resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
6027 KM_SLEEP);
6028
6029 cs->op_len = args->array_len;
6030 cs->basecr = cr;
6031 nsrv4 = nfs4_get_srv();
6032
6033 DTRACE_NFSV4_2(compound__start, struct compound_state *, cs,
6034 COMPOUND4args *, args);
6035
6036 /*
6037 * For now, NFS4 compound processing must be protected by
6038 * exported_lock because it can access more than one exportinfo
6039 * per compound and share/unshare can now change multiple
6040 * exinfo structs. The NFS2/3 code only refs 1 exportinfo
6041 * per proc (excluding public exinfo), and exi_count design
6042 * is sufficient to protect concurrent execution of NFS2/3
6043 * ops along with unexport. This lock will be removed as
6044 * part of the NFSv4 phase 2 namespace redesign work.
6045 */
6046 rw_enter(&ne->exported_lock, RW_READER);
6047
6048 /*
6049 * If this is the first compound we've seen, we need to start all
6050 * new instances' grace periods.
6051 */
6052 if (nsrv4->seen_first_compound == 0) {
6053 rfs4_grace_start_new(nsrv4);
6054 /*
6055 * This must be set after rfs4_grace_start_new(), otherwise
6056 * another thread could proceed past here before the former
6057 * is finished.
6058 */
6059 nsrv4->seen_first_compound = 1;
6060 }
6061
6062 for (i = 0; i < args->array_len && cs->cont; i++) {
6063 nfs_argop4 *argop;
6064 nfs_resop4 *resop;
6065 uint_t op;
6066 kstat_named_t *stat = ne->ne_globals->rfsproccnt[NFS_V4];
6067
6068 argop = &args->array[i];
6069 resop = &resp->array[i];
6070 resop->resop = argop->argop;
6071 op = (uint_t)resop->resop;
6072
6073 cs->op_pos = i;
6074 if (op < rfsv4disp_cnt && rfs4_opnum_in_range(cs, op)) {
6075 /*
6076 * Count the individual ops here; NULL and COMPOUND
6077 * are counted in common_dispatch()
6078 */
6079 stat[op].value.ui64++;
6080
6081 NFS4_DEBUG(rfs4_debug > 1,
6082 (CE_NOTE, "Executing %s", rfs4_op_string[op]));
6083 (*rfsv4disptab[op].dis_proc)(argop, resop, req, cs);
6084 NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
6085 rfs4_op_string[op], *cs->statusp));
6086 if (*cs->statusp != NFS4_OK)
6087 cs->cont = FALSE;
6088 } else {
6089 /*
6090 * This is effectively dead code since XDR code
6091 * will have already returned BADXDR if op doesn't
6092 * decode to legal value. This only done for a
6093 * day when XDR code doesn't verify v4 opcodes.
6094 */
6095 op = OP_ILLEGAL;
6096 stat[OP_ILLEGAL_IDX].value.ui64++;
6097
6098 rfs4_op_illegal(argop, resop, req, cs);
6099 cs->cont = FALSE;
6100 }
6101
6102 /*
6103 * If not at last op, and if we are to stop, then
6104 * compact the results array.
6105 */
6106 if ((i + 1) < args->array_len && !cs->cont) {
6107 nfs_resop4 *new_res = kmem_alloc(
6108 (i+1) * sizeof (nfs_resop4), KM_SLEEP);
6109 bcopy(resp->array,
6110 new_res, (i+1) * sizeof (nfs_resop4));
6111 kmem_free(resp->array,
6112 args->array_len * sizeof (nfs_resop4));
6113
6114 resp->array_len = i + 1;
6115 resp->array = new_res;
6116 }
6117 }
6118
6119 rw_exit(&ne->exported_lock);
6120
6121 DTRACE_NFSV4_2(compound__done, struct compound_state *, cs,
6122 COMPOUND4res *, resp);
6123
6124 /*
6125 * done with this compound request, free the label
6126 */
6127
6128 if (req->rq_label != NULL) {
6129 kmem_free(req->rq_label, sizeof (bslabel_t));
6130 req->rq_label = NULL;
6131 }
6132 }
6133
6134 /*
6135 * XXX because of what appears to be duplicate calls to rfs4_compound_free
6136 * XXX zero out the tag and array values. Need to investigate why the
6137 * XXX calls occur, but at least prevent the panic for now.
6138 */
6139 void
rfs4_compound_free(COMPOUND4res * resp)6140 rfs4_compound_free(COMPOUND4res *resp)
6141 {
6142 uint_t i;
6143
6144 if (resp->tag.utf8string_val) {
6145 UTF8STRING_FREE(resp->tag)
6146 }
6147
6148 for (i = 0; i < resp->array_len; i++) {
6149 nfs_resop4 *resop;
6150 uint_t op;
6151
6152 resop = &resp->array[i];
6153 op = (uint_t)resop->resop;
6154 if (op < rfsv4disp_cnt) {
6155 (*rfsv4disptab[op].dis_resfree)(resop);
6156 }
6157 }
6158 if (resp->array != NULL) {
6159 kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
6160 }
6161 }
6162
6163 /*
6164 * Process the value of the compound request rpc flags, as a bit-AND
6165 * of the individual per-op flags (idempotent, allowork, publicfh_ok)
6166 */
6167 void
rfs4_compound_flagproc(COMPOUND4args * args,int * flagp)6168 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
6169 {
6170 int i;
6171 int flag = RPC_ALL;
6172
6173 for (i = 0; flag && i < args->array_len; i++) {
6174 uint_t op;
6175
6176 op = (uint_t)args->array[i].argop;
6177
6178 if (op < rfsv4disp_cnt)
6179 flag &= rfsv4disptab[op].dis_flags;
6180 else
6181 flag = 0;
6182 }
6183 *flagp = flag;
6184 }
6185
6186 nfsstat4
rfs4_client_sysid(rfs4_client_t * cp,sysid_t * sp)6187 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
6188 {
6189 nfsstat4 e;
6190
6191 rfs4_dbe_lock(cp->rc_dbe);
6192
6193 if (cp->rc_sysidt != LM_NOSYSID) {
6194 *sp = cp->rc_sysidt;
6195 e = NFS4_OK;
6196
6197 } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
6198 *sp = cp->rc_sysidt;
6199 e = NFS4_OK;
6200
6201 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6202 "rfs4_client_sysid: allocated 0x%x\n", *sp));
6203 } else
6204 e = NFS4ERR_DELAY;
6205
6206 rfs4_dbe_unlock(cp->rc_dbe);
6207 return (e);
6208 }
6209
6210 #if defined(DEBUG) && ! defined(lint)
lock_print(char * str,int operation,struct flock64 * flk)6211 static void lock_print(char *str, int operation, struct flock64 *flk)
6212 {
6213 char *op, *type;
6214
6215 switch (operation) {
6216 case F_GETLK: op = "F_GETLK";
6217 break;
6218 case F_SETLK: op = "F_SETLK";
6219 break;
6220 case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
6221 break;
6222 default: op = "F_UNKNOWN";
6223 break;
6224 }
6225 switch (flk->l_type) {
6226 case F_UNLCK: type = "F_UNLCK";
6227 break;
6228 case F_RDLCK: type = "F_RDLCK";
6229 break;
6230 case F_WRLCK: type = "F_WRLCK";
6231 break;
6232 default: type = "F_UNKNOWN";
6233 break;
6234 }
6235
6236 ASSERT(flk->l_whence == 0);
6237 cmn_err(CE_NOTE, "%s: %s, type = %s, off = %llx len = %llx pid = %d",
6238 str, op, type, (longlong_t)flk->l_start,
6239 flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
6240 }
6241
6242 #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
6243 #else
6244 #define LOCK_PRINT(d, s, t, f)
6245 #endif
6246
6247 /*ARGSUSED*/
6248 static bool_t
creds_ok(cred_set_t * cr_set,struct svc_req * req,struct compound_state * cs)6249 creds_ok(cred_set_t *cr_set, struct svc_req *req, struct compound_state *cs)
6250 {
6251 return (TRUE);
6252 }
6253
6254 /*
6255 * Look up the pathname using the vp in cs as the directory vnode.
6256 * cs->vp will be the vnode for the file on success
6257 */
6258
6259 static nfsstat4
rfs4_lookup(component4 * component,struct svc_req * req,struct compound_state * cs)6260 rfs4_lookup(component4 *component, struct svc_req *req,
6261 struct compound_state *cs)
6262 {
6263 char *nm;
6264 uint32_t len;
6265 nfsstat4 status;
6266 struct sockaddr *ca;
6267 char *name;
6268
6269 if (cs->vp == NULL) {
6270 return (NFS4ERR_NOFILEHANDLE);
6271 }
6272 if (cs->vp->v_type != VDIR) {
6273 return (NFS4ERR_NOTDIR);
6274 }
6275
6276 status = utf8_dir_verify(component);
6277 if (status != NFS4_OK)
6278 return (status);
6279
6280 nm = utf8_to_fn(component, &len, NULL);
6281 if (nm == NULL) {
6282 return (NFS4ERR_INVAL);
6283 }
6284
6285 if (len > MAXNAMELEN) {
6286 kmem_free(nm, len);
6287 return (NFS4ERR_NAMETOOLONG);
6288 }
6289
6290 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6291 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6292 MAXPATHLEN + 1);
6293
6294 if (name == NULL) {
6295 kmem_free(nm, len);
6296 return (NFS4ERR_INVAL);
6297 }
6298
6299 status = do_rfs4_op_lookup(name, req, cs);
6300
6301 if (name != nm)
6302 kmem_free(name, MAXPATHLEN + 1);
6303
6304 kmem_free(nm, len);
6305
6306 return (status);
6307 }
6308
6309 static nfsstat4
rfs4_lookupfile(component4 * component,struct svc_req * req,struct compound_state * cs,uint32_t access,change_info4 * cinfo)6310 rfs4_lookupfile(component4 *component, struct svc_req *req,
6311 struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6312 {
6313 nfsstat4 status;
6314 vnode_t *dvp = cs->vp;
6315 vattr_t bva, ava, fva;
6316 int error;
6317
6318 /* Get "before" change value */
6319 bva.va_mask = AT_CTIME|AT_SEQ;
6320 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6321 if (error)
6322 return (puterrno4(error));
6323
6324 /* rfs4_lookup may VN_RELE directory */
6325 VN_HOLD(dvp);
6326
6327 status = rfs4_lookup(component, req, cs);
6328 if (status != NFS4_OK) {
6329 VN_RELE(dvp);
6330 return (status);
6331 }
6332
6333 /*
6334 * Get "after" change value, if it fails, simply return the
6335 * before value.
6336 */
6337 ava.va_mask = AT_CTIME|AT_SEQ;
6338 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6339 ava.va_ctime = bva.va_ctime;
6340 ava.va_seq = 0;
6341 }
6342 VN_RELE(dvp);
6343
6344 /*
6345 * Validate the file is a file
6346 */
6347 fva.va_mask = AT_TYPE|AT_MODE;
6348 error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6349 if (error)
6350 return (puterrno4(error));
6351
6352 if (fva.va_type != VREG) {
6353 if (fva.va_type == VDIR)
6354 return (NFS4ERR_ISDIR);
6355 if (fva.va_type == VLNK)
6356 return (NFS4ERR_SYMLINK);
6357 return (NFS4ERR_INVAL);
6358 }
6359
6360 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6361 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6362
6363 /*
6364 * It is undefined if VOP_LOOKUP will change va_seq, so
6365 * cinfo.atomic = TRUE only if we have
6366 * non-zero va_seq's, and they have not changed.
6367 */
6368 if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6369 cinfo->atomic = TRUE;
6370 else
6371 cinfo->atomic = FALSE;
6372
6373 /* Check for mandatory locking */
6374 cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6375 return (check_open_access(access, cs, req));
6376 }
6377
6378 static nfsstat4
create_vnode(vnode_t * dvp,char * nm,vattr_t * vap,createmode4 mode,cred_t * cr,vnode_t ** vpp,bool_t * created)6379 create_vnode(vnode_t *dvp, char *nm, vattr_t *vap, createmode4 mode,
6380 cred_t *cr, vnode_t **vpp, bool_t *created)
6381 {
6382 int error;
6383 nfsstat4 status = NFS4_OK;
6384 vattr_t va;
6385
6386 tryagain:
6387
6388 /*
6389 * The file open mode used is VWRITE. If the client needs
6390 * some other semantic, then it should do the access checking
6391 * itself. It would have been nice to have the file open mode
6392 * passed as part of the arguments.
6393 */
6394
6395 *created = TRUE;
6396 error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6397
6398 if (error) {
6399 *created = FALSE;
6400
6401 /*
6402 * If we got something other than file already exists
6403 * then just return this error. Otherwise, we got
6404 * EEXIST. If we were doing a GUARDED create, then
6405 * just return this error. Otherwise, we need to
6406 * make sure that this wasn't a duplicate of an
6407 * exclusive create request.
6408 *
6409 * The assumption is made that a non-exclusive create
6410 * request will never return EEXIST.
6411 */
6412
6413 if (error != EEXIST || mode == GUARDED4) {
6414 status = puterrno4(error);
6415 return (status);
6416 }
6417 error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6418 NULL, NULL, NULL);
6419
6420 if (error) {
6421 /*
6422 * We couldn't find the file that we thought that
6423 * we just created. So, we'll just try creating
6424 * it again.
6425 */
6426 if (error == ENOENT)
6427 goto tryagain;
6428
6429 status = puterrno4(error);
6430 return (status);
6431 }
6432
6433 if (mode == UNCHECKED4) {
6434 /* existing object must be regular file */
6435 if ((*vpp)->v_type != VREG) {
6436 if ((*vpp)->v_type == VDIR)
6437 status = NFS4ERR_ISDIR;
6438 else if ((*vpp)->v_type == VLNK)
6439 status = NFS4ERR_SYMLINK;
6440 else
6441 status = NFS4ERR_INVAL;
6442 VN_RELE(*vpp);
6443 return (status);
6444 }
6445
6446 return (NFS4_OK);
6447 }
6448
6449 /* Check for duplicate request */
6450 va.va_mask = AT_MTIME;
6451 error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6452 if (!error) {
6453 /* We found the file */
6454 const timestruc_t *mtime = &vap->va_mtime;
6455
6456 if (va.va_mtime.tv_sec != mtime->tv_sec ||
6457 va.va_mtime.tv_nsec != mtime->tv_nsec) {
6458 /* but its not our creation */
6459 VN_RELE(*vpp);
6460 return (NFS4ERR_EXIST);
6461 }
6462 *created = TRUE; /* retrans of create == created */
6463 return (NFS4_OK);
6464 }
6465 VN_RELE(*vpp);
6466 return (NFS4ERR_EXIST);
6467 }
6468
6469 return (NFS4_OK);
6470 }
6471
6472 static nfsstat4
check_open_access(uint32_t access,struct compound_state * cs,struct svc_req * req)6473 check_open_access(uint32_t access, struct compound_state *cs,
6474 struct svc_req *req)
6475 {
6476 int error;
6477 vnode_t *vp;
6478 bool_t readonly;
6479 cred_t *cr = cs->cr;
6480
6481 /* For now we don't allow mandatory locking as per V2/V3 */
6482 if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6483 return (NFS4ERR_ACCESS);
6484 }
6485
6486 vp = cs->vp;
6487 ASSERT(cr != NULL && vp->v_type == VREG);
6488
6489 /*
6490 * If the file system is exported read only and we are trying
6491 * to open for write, then return NFS4ERR_ROFS
6492 */
6493
6494 readonly = rdonly4(req, cs);
6495
6496 if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6497 return (NFS4ERR_ROFS);
6498
6499 if (access & OPEN4_SHARE_ACCESS_READ) {
6500 if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6501 (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6502 return (NFS4ERR_ACCESS);
6503 }
6504 }
6505
6506 if (access & OPEN4_SHARE_ACCESS_WRITE) {
6507 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6508 if (error)
6509 return (NFS4ERR_ACCESS);
6510 }
6511
6512 return (NFS4_OK);
6513 }
6514
6515 static void
rfs4_verifier_to_mtime(verifier4 v,timestruc_t * mtime)6516 rfs4_verifier_to_mtime(verifier4 v, timestruc_t *mtime)
6517 {
6518 timespec32_t *time = (timespec32_t *)&v;
6519
6520 /*
6521 * Ensure no time overflows. Assumes underlying
6522 * filesystem supports at least 32 bits.
6523 * Truncate nsec to usec resolution to allow valid
6524 * compares even if the underlying filesystem truncates.
6525 */
6526 mtime->tv_sec = time->tv_sec % TIME32_MAX;
6527 mtime->tv_nsec = (time->tv_nsec / 1000) * 1000;
6528 }
6529
6530 static nfsstat4
rfs4_createfile(OPEN4args * args,struct svc_req * req,struct compound_state * cs,change_info4 * cinfo,bitmap4 * attrset,clientid4 clientid)6531 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6532 change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6533 {
6534 struct nfs4_svgetit_arg sarg;
6535 struct nfs4_ntov_table ntov;
6536
6537 bool_t ntov_table_init = FALSE;
6538 struct statvfs64 sb;
6539 nfsstat4 status;
6540 vnode_t *vp;
6541 vattr_t bva, ava, iva, cva, *vap;
6542 vnode_t *dvp;
6543 char *nm = NULL;
6544 uint_t buflen;
6545 bool_t created;
6546 bool_t setsize = FALSE;
6547 len_t reqsize;
6548 int error;
6549 bool_t trunc;
6550 caller_context_t ct;
6551 component4 *component;
6552 bslabel_t *clabel;
6553 struct sockaddr *ca;
6554 char *name = NULL;
6555 fattr4 *fattr = NULL;
6556
6557 ASSERT(*attrset == 0);
6558
6559 sarg.sbp = &sb;
6560 sarg.is_referral = B_FALSE;
6561
6562 dvp = cs->vp;
6563
6564 /* Check if the file system is read only */
6565 if (rdonly4(req, cs))
6566 return (NFS4ERR_ROFS);
6567
6568 /* check the label of including directory */
6569 if (is_system_labeled()) {
6570 ASSERT(req->rq_label != NULL);
6571 clabel = req->rq_label;
6572 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6573 "got client label from request(1)",
6574 struct svc_req *, req);
6575 if (!blequal(&l_admin_low->tsl_label, clabel)) {
6576 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6577 cs->exi)) {
6578 return (NFS4ERR_ACCESS);
6579 }
6580 }
6581 }
6582
6583 if ((args->mode == EXCLUSIVE4 || args->mode == EXCLUSIVE4_1) &&
6584 dvp->v_flag & V_XATTRDIR) {
6585 /* prohibit EXCL create of named attributes */
6586 return (NFS4ERR_INVAL);
6587 }
6588
6589 /*
6590 * Get the last component of path name in nm. cs will reference
6591 * the including directory on success.
6592 */
6593 component = &args->claim.open_claim4_u.file;
6594 status = utf8_dir_verify(component);
6595 if (status != NFS4_OK)
6596 return (status);
6597
6598 nm = utf8_to_fn(component, &buflen, NULL);
6599
6600 if (nm == NULL)
6601 return (NFS4ERR_RESOURCE);
6602
6603 if (buflen > MAXNAMELEN) {
6604 kmem_free(nm, buflen);
6605 return (NFS4ERR_NAMETOOLONG);
6606 }
6607
6608 bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6609 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6610 if (error) {
6611 kmem_free(nm, buflen);
6612 return (puterrno4(error));
6613 }
6614
6615 if (bva.va_type != VDIR) {
6616 kmem_free(nm, buflen);
6617 return (NFS4ERR_NOTDIR);
6618 }
6619
6620 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6621
6622 switch (args->mode) {
6623 case GUARDED4:
6624 /*FALLTHROUGH*/
6625 case UNCHECKED4:
6626 case EXCLUSIVE4_1:
6627 nfs4_ntov_table_init(&ntov);
6628 ntov_table_init = TRUE;
6629
6630 if (args->mode == EXCLUSIVE4_1)
6631 fattr = &args->createhow4_u.ch_createboth.cva_attrs;
6632 else
6633 fattr = &args->createhow4_u.createattrs;
6634
6635 status = do_rfs4_set_attrs(attrset,
6636 fattr,
6637 cs, &sarg, &ntov, NFS4ATTR_SETIT);
6638
6639 if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6640 sarg.vap->va_type != VREG) {
6641 if (sarg.vap->va_type == VDIR)
6642 status = NFS4ERR_ISDIR;
6643 else if (sarg.vap->va_type == VLNK)
6644 status = NFS4ERR_SYMLINK;
6645 else
6646 status = NFS4ERR_INVAL;
6647 }
6648
6649 if (status != NFS4_OK) {
6650 kmem_free(nm, buflen);
6651 nfs4_ntov_table_free(&ntov, &sarg);
6652 *attrset = 0;
6653 return (status);
6654 }
6655
6656 vap = sarg.vap;
6657 vap->va_type = VREG;
6658 vap->va_mask |= AT_TYPE;
6659
6660 if ((vap->va_mask & AT_MODE) == 0) {
6661 vap->va_mask |= AT_MODE;
6662 vap->va_mode = (mode_t)0600;
6663 }
6664
6665 if (vap->va_mask & AT_SIZE) {
6666
6667 /* Disallow create with a non-zero size */
6668
6669 if ((reqsize = sarg.vap->va_size) != 0) {
6670 kmem_free(nm, buflen);
6671 nfs4_ntov_table_free(&ntov, &sarg);
6672 *attrset = 0;
6673 return (NFS4ERR_INVAL);
6674 }
6675 setsize = TRUE;
6676 }
6677 if (args->mode == EXCLUSIVE4_1) {
6678 rfs4_verifier_to_mtime(
6679 args->createhow4_u.ch_createboth.cva_verf,
6680 &vap->va_mtime);
6681 /* attrset will be set later */
6682 fattr->attrmask |= FATTR4_TIME_MODIFY_MASK;
6683 vap->va_mask |= AT_MTIME;
6684 }
6685 break;
6686
6687 case EXCLUSIVE4:
6688 cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6689 cva.va_type = VREG;
6690 cva.va_mode = (mode_t)0;
6691
6692 rfs4_verifier_to_mtime(args->createhow4_u.createverf,
6693 &cva.va_mtime);
6694
6695 vap = &cva;
6696
6697 /*
6698 * For EXCL create, attrset is set to the server attr
6699 * used to cache the client's verifier.
6700 */
6701 *attrset = FATTR4_TIME_MODIFY_MASK;
6702 break;
6703 }
6704
6705 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6706 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6707 MAXPATHLEN + 1);
6708
6709 if (name == NULL) {
6710 kmem_free(nm, buflen);
6711 return (NFS4ERR_SERVERFAULT);
6712 }
6713
6714 status = create_vnode(dvp, name, vap, args->mode,
6715 cs->cr, &vp, &created);
6716 if (nm != name)
6717 kmem_free(name, MAXPATHLEN + 1);
6718 kmem_free(nm, buflen);
6719
6720 if (status != NFS4_OK) {
6721 if (ntov_table_init)
6722 nfs4_ntov_table_free(&ntov, &sarg);
6723 *attrset = 0;
6724 return (status);
6725 }
6726
6727 trunc = (setsize && !created);
6728
6729 if (args->mode != EXCLUSIVE4) {
6730 bitmap4 createmask = fattr->attrmask;
6731
6732 /*
6733 * True verification that object was created with correct
6734 * attrs is impossible. The attrs could have been changed
6735 * immediately after object creation. If attributes did
6736 * not verify, the only recourse for the server is to
6737 * destroy the object. Maybe if some attrs (like gid)
6738 * are set incorrectly, the object should be destroyed;
6739 * however, seems bad as a default policy. Do we really
6740 * want to destroy an object over one of the times not
6741 * verifying correctly? For these reasons, the server
6742 * currently sets bits in attrset for createattrs
6743 * that were set; however, no verification is done.
6744 *
6745 * vmask_to_nmask accounts for vattr bits set on create
6746 * [do_rfs4_set_attrs() only sets resp bits for
6747 * non-vattr/vfs bits.]
6748 * Mask off any bits we set by default so as not to return
6749 * more attrset bits than were requested in createattrs
6750 */
6751 if (created) {
6752 nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6753 *attrset &= createmask;
6754 } else {
6755 /*
6756 * We did not create the vnode (we tried but it
6757 * already existed). In this case, the only createattr
6758 * that the spec allows the server to set is size,
6759 * and even then, it can only be set if it is 0.
6760 */
6761 *attrset = 0;
6762 if (trunc)
6763 *attrset = FATTR4_SIZE_MASK;
6764 }
6765 }
6766 if (ntov_table_init)
6767 nfs4_ntov_table_free(&ntov, &sarg);
6768
6769 /*
6770 * Get the initial "after" sequence number, if it fails,
6771 * set to zero, time to before.
6772 */
6773 iva.va_mask = AT_CTIME|AT_SEQ;
6774 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6775 iva.va_seq = 0;
6776 iva.va_ctime = bva.va_ctime;
6777 }
6778
6779 /*
6780 * create_vnode attempts to create the file exclusive,
6781 * if it already exists the VOP_CREATE will fail and
6782 * may not increase va_seq. It is atomic if
6783 * we haven't changed the directory, but if it has changed
6784 * we don't know what changed it.
6785 */
6786 if (!created) {
6787 if (bva.va_seq && iva.va_seq &&
6788 bva.va_seq == iva.va_seq)
6789 cinfo->atomic = TRUE;
6790 else
6791 cinfo->atomic = FALSE;
6792 NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6793 } else {
6794 /*
6795 * The entry was created, we need to sync the
6796 * directory metadata.
6797 */
6798 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6799
6800 /*
6801 * Get "after" change value, if it fails, simply return the
6802 * before value.
6803 */
6804 ava.va_mask = AT_CTIME|AT_SEQ;
6805 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6806 ava.va_ctime = bva.va_ctime;
6807 ava.va_seq = 0;
6808 }
6809
6810 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6811
6812 /*
6813 * The cinfo->atomic = TRUE only if we have
6814 * non-zero va_seq's, and it has incremented by exactly one
6815 * during the create_vnode and it didn't
6816 * change during the VOP_FSYNC.
6817 */
6818 if (bva.va_seq && iva.va_seq && ava.va_seq &&
6819 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6820 cinfo->atomic = TRUE;
6821 else
6822 cinfo->atomic = FALSE;
6823 }
6824
6825 /* Check for mandatory locking and that the size gets set. */
6826 cva.va_mask = AT_MODE;
6827 if (setsize)
6828 cva.va_mask |= AT_SIZE;
6829
6830 /* Assume the worst */
6831 cs->mandlock = TRUE;
6832
6833 if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6834 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6835
6836 /*
6837 * Truncate the file if necessary; this would be
6838 * the case for create over an existing file.
6839 */
6840
6841 if (trunc) {
6842 int in_crit = 0;
6843 rfs4_file_t *fp;
6844 nfs4_srv_t *nsrv4;
6845 bool_t create = FALSE;
6846
6847 /*
6848 * We are writing over an existing file.
6849 * Check to see if we need to recall a delegation.
6850 */
6851 nsrv4 = nfs4_get_srv();
6852 rfs4_hold_deleg_policy(nsrv4);
6853 if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6854 if (rfs4_check_delegated_byfp(FWRITE, fp,
6855 (reqsize == 0), FALSE, FALSE, &clientid)) {
6856 rfs4_file_rele(fp);
6857 rfs4_rele_deleg_policy(nsrv4);
6858 VN_RELE(vp);
6859 *attrset = 0;
6860 return (NFS4ERR_DELAY);
6861 }
6862 rfs4_file_rele(fp);
6863 }
6864 rfs4_rele_deleg_policy(nsrv4);
6865
6866 if (nbl_need_check(vp)) {
6867 in_crit = 1;
6868
6869 ASSERT(reqsize == 0);
6870
6871 nbl_start_crit(vp, RW_READER);
6872 if (nbl_conflict(vp, NBL_WRITE, 0,
6873 cva.va_size, 0, NULL)) {
6874 in_crit = 0;
6875 nbl_end_crit(vp);
6876 VN_RELE(vp);
6877 *attrset = 0;
6878 return (NFS4ERR_ACCESS);
6879 }
6880 }
6881 ct.cc_sysid = 0;
6882 ct.cc_pid = 0;
6883 ct.cc_caller_id = nfs4_srv_caller_id;
6884 ct.cc_flags = CC_DONTBLOCK;
6885
6886 cva.va_mask = AT_SIZE;
6887 cva.va_size = reqsize;
6888 (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6889 if (in_crit)
6890 nbl_end_crit(vp);
6891 }
6892 }
6893
6894 error = makefh4(&cs->fh, vp, cs->exi);
6895
6896 /*
6897 * Force modified data and metadata out to stable storage.
6898 */
6899 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6900
6901 if (error) {
6902 VN_RELE(vp);
6903 *attrset = 0;
6904 return (puterrno4(error));
6905 }
6906
6907 /* if parent dir is attrdir, set namedattr fh flag */
6908 if (dvp->v_flag & V_XATTRDIR)
6909 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6910
6911 if (cs->vp)
6912 VN_RELE(cs->vp);
6913
6914 cs->vp = vp;
6915
6916 /*
6917 * if we did not create the file, we will need to check
6918 * the access bits on the file
6919 */
6920
6921 if (!created) {
6922 if (setsize)
6923 args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6924 status = check_open_access(args->share_access, cs, req);
6925 if (status != NFS4_OK)
6926 *attrset = 0;
6927 }
6928 return (status);
6929 }
6930
6931 /*ARGSUSED*/
6932 static void
rfs4_do_open(struct compound_state * cs,struct svc_req * req,rfs4_openowner_t * oo,delegreq_t deleg,uint32_t access,uint32_t deny,OPEN4res * resp,int deleg_cur)6933 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6934 rfs4_openowner_t *oo, delegreq_t deleg,
6935 uint32_t access, uint32_t deny,
6936 OPEN4res *resp, int deleg_cur)
6937 {
6938 /* XXX Currently not using req */
6939 rfs4_state_t *sp;
6940 rfs4_file_t *fp;
6941 bool_t screate = TRUE;
6942 bool_t fcreate = TRUE;
6943 uint32_t open_a, share_a;
6944 uint32_t open_d, share_d;
6945 rfs4_deleg_state_t *dsp;
6946 sysid_t sysid;
6947 nfsstat4 status;
6948 caller_context_t ct;
6949 int fflags = 0;
6950 int recall = 0;
6951 int err;
6952 int first_open;
6953
6954 /* get the file struct and hold a lock on it during initial open */
6955 fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
6956 if (fp == NULL) {
6957 resp->status = NFS4ERR_RESOURCE;
6958 DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
6959 return;
6960 }
6961
6962 sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
6963 if (sp == NULL) {
6964 resp->status = NFS4ERR_RESOURCE;
6965 DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
6966 /* No need to keep any reference */
6967 rw_exit(&fp->rf_file_rwlock);
6968 rfs4_file_rele(fp);
6969 return;
6970 }
6971
6972 /* try to get the sysid before continuing */
6973 if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
6974 resp->status = status;
6975 rfs4_file_rele(fp);
6976 /* Not a fully formed open; "close" it */
6977 if (screate == TRUE)
6978 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6979 rfs4_state_rele(sp);
6980 return;
6981 }
6982
6983 /* Calculate the fflags for this OPEN. */
6984 if (access & OPEN4_SHARE_ACCESS_READ)
6985 fflags |= FREAD;
6986 if (access & OPEN4_SHARE_ACCESS_WRITE)
6987 fflags |= FWRITE;
6988
6989 rfs4_dbe_lock(sp->rs_dbe);
6990
6991 /*
6992 * Calculate the new deny and access mode that this open is adding to
6993 * the file for this open owner;
6994 */
6995 open_d = (deny & ~sp->rs_open_deny);
6996 open_a = (access & ~sp->rs_open_access);
6997
6998 /*
6999 * Calculate the new share access and share deny modes that this open
7000 * is adding to the file for this open owner;
7001 */
7002 share_a = (access & ~sp->rs_share_access);
7003 share_d = (deny & ~sp->rs_share_deny);
7004
7005 first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
7006
7007 /*
7008 * Check to see the client has already sent an open for this
7009 * open owner on this file with the same share/deny modes.
7010 * If so, we don't need to check for a conflict and we don't
7011 * need to add another shrlock. If not, then we need to
7012 * check for conflicts in deny and access before checking for
7013 * conflicts in delegation. We don't want to recall a
7014 * delegation based on an open that will eventually fail based
7015 * on shares modes.
7016 */
7017
7018 if (share_a || share_d) {
7019 if ((err = rfs4_share(sp, access, deny)) != 0) {
7020 rfs4_dbe_unlock(sp->rs_dbe);
7021 resp->status = err;
7022
7023 rfs4_file_rele(fp);
7024 /* Not a fully formed open; "close" it */
7025 if (screate == TRUE)
7026 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7027 rfs4_state_rele(sp);
7028 return;
7029 }
7030 }
7031
7032 rfs4_dbe_lock(fp->rf_dbe);
7033
7034 /*
7035 * Check to see if this file is delegated and if so, if a
7036 * recall needs to be done.
7037 */
7038 if (rfs4_check_recall(sp, access)) {
7039 rfs4_dbe_unlock(fp->rf_dbe);
7040 rfs4_dbe_unlock(sp->rs_dbe);
7041 rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
7042 delay(NFS4_DELEGATION_CONFLICT_DELAY);
7043 rfs4_dbe_lock(sp->rs_dbe);
7044
7045 /* if state closed while lock was dropped */
7046 if (sp->rs_closed) {
7047 if (share_a || share_d)
7048 (void) rfs4_unshare(sp);
7049 rfs4_dbe_unlock(sp->rs_dbe);
7050 rfs4_file_rele(fp);
7051 /* Not a fully formed open; "close" it */
7052 if (screate == TRUE)
7053 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7054 rfs4_state_rele(sp);
7055 resp->status = NFS4ERR_OLD_STATEID;
7056 return;
7057 }
7058
7059 rfs4_dbe_lock(fp->rf_dbe);
7060 /* Let's see if the delegation was returned */
7061 if (rfs4_check_recall(sp, access)) {
7062 rfs4_dbe_unlock(fp->rf_dbe);
7063 if (share_a || share_d)
7064 (void) rfs4_unshare(sp);
7065 rfs4_dbe_unlock(sp->rs_dbe);
7066 rfs4_file_rele(fp);
7067 rfs4_update_lease(sp->rs_owner->ro_client);
7068
7069 /* Not a fully formed open; "close" it */
7070 if (screate == TRUE)
7071 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7072 rfs4_state_rele(sp);
7073 resp->status = NFS4ERR_DELAY;
7074 return;
7075 }
7076 }
7077 /*
7078 * the share check passed and any delegation conflict has been
7079 * taken care of, now call vop_open.
7080 * if this is the first open then call vop_open with fflags.
7081 * if not, call vn_open_upgrade with just the upgrade flags.
7082 *
7083 * if the file has been opened already, it will have the current
7084 * access mode in the state struct. if it has no share access, then
7085 * this is a new open.
7086 *
7087 * However, if this is open with CLAIM_DLEGATE_CUR, then don't
7088 * call VOP_OPEN(), just do the open upgrade.
7089 */
7090 if (first_open && !deleg_cur) {
7091 ct.cc_sysid = sysid;
7092 ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
7093 ct.cc_caller_id = nfs4_srv_caller_id;
7094 ct.cc_flags = CC_DONTBLOCK;
7095 err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
7096 if (err) {
7097 rfs4_dbe_unlock(fp->rf_dbe);
7098 if (share_a || share_d)
7099 (void) rfs4_unshare(sp);
7100 rfs4_dbe_unlock(sp->rs_dbe);
7101 rfs4_file_rele(fp);
7102
7103 /* Not a fully formed open; "close" it */
7104 if (screate == TRUE)
7105 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7106 rfs4_state_rele(sp);
7107 /* check if a monitor detected a delegation conflict */
7108 if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
7109 resp->status = NFS4ERR_DELAY;
7110 else
7111 resp->status = NFS4ERR_SERVERFAULT;
7112 return;
7113 }
7114 } else { /* open upgrade */
7115 /*
7116 * calculate the fflags for the new mode that is being added
7117 * by this upgrade.
7118 */
7119 fflags = 0;
7120 if (open_a & OPEN4_SHARE_ACCESS_READ)
7121 fflags |= FREAD;
7122 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
7123 fflags |= FWRITE;
7124 vn_open_upgrade(cs->vp, fflags);
7125 }
7126 sp->rs_open_access |= access;
7127 sp->rs_open_deny |= deny;
7128
7129 if (open_d & OPEN4_SHARE_DENY_READ)
7130 fp->rf_deny_read++;
7131 if (open_d & OPEN4_SHARE_DENY_WRITE)
7132 fp->rf_deny_write++;
7133 fp->rf_share_deny |= deny;
7134
7135 if (open_a & OPEN4_SHARE_ACCESS_READ)
7136 fp->rf_access_read++;
7137 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
7138 fp->rf_access_write++;
7139 fp->rf_share_access |= access;
7140
7141 /*
7142 * Check for delegation here. if the deleg argument is not
7143 * DELEG_ANY, then this is a reclaim from a client and
7144 * we must honor the delegation requested. If necessary we can
7145 * set the recall flag.
7146 */
7147
7148 dsp = rfs4_grant_delegation(deleg, sp, &recall);
7149
7150 cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
7151
7152 next_stateid(&sp->rs_stateid);
7153
7154 resp->stateid = sp->rs_stateid.stateid;
7155
7156 rfs4_dbe_unlock(fp->rf_dbe);
7157 rfs4_dbe_unlock(sp->rs_dbe);
7158
7159 if (dsp) {
7160 rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
7161 rfs4_deleg_state_rele(dsp);
7162 }
7163
7164 rfs4_file_rele(fp);
7165 rfs4_state_rele(sp);
7166
7167 resp->status = NFS4_OK;
7168 }
7169
7170 /*ARGSUSED*/
7171 static void
rfs4_do_openfh(struct compound_state * cs,struct svc_req * req,OPEN4args * args,rfs4_openowner_t * oo,OPEN4res * resp)7172 rfs4_do_openfh(struct compound_state *cs, struct svc_req *req, OPEN4args *args,
7173 rfs4_openowner_t *oo, OPEN4res *resp)
7174 {
7175 /* cs->vp and cs->fh have been updated by putfh. */
7176 rfs4_do_open(cs, req, oo, DELEG_ANY,
7177 (args->share_access & 0xff), args->share_deny, resp, 0);
7178 }
7179
7180 /*ARGSUSED*/
7181 static void
rfs4_do_opennull(struct compound_state * cs,struct svc_req * req,OPEN4args * args,rfs4_openowner_t * oo,OPEN4res * resp)7182 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
7183 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7184 {
7185 change_info4 *cinfo = &resp->cinfo;
7186 bitmap4 *attrset = &resp->attrset;
7187
7188 if (args->opentype == OPEN4_NOCREATE)
7189 resp->status = rfs4_lookupfile(&args->claim.open_claim4_u.file,
7190 req, cs, args->share_access, cinfo);
7191 else {
7192 /* inhibit delegation grants during exclusive create */
7193
7194 if (args->mode == EXCLUSIVE4)
7195 rfs4_disable_delegation();
7196
7197 resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
7198 oo->ro_client->rc_clientid);
7199 }
7200
7201 if (resp->status == NFS4_OK) {
7202
7203 /* cs->vp cs->fh now reference the desired file */
7204
7205 rfs4_do_open(cs, req, oo,
7206 oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
7207 args->share_access, args->share_deny, resp, 0);
7208
7209 /*
7210 * If rfs4_createfile set attrset, we must
7211 * clear this attrset before the response is copied.
7212 */
7213 if (resp->status != NFS4_OK && resp->attrset) {
7214 resp->attrset = 0;
7215 }
7216 }
7217 else
7218 *cs->statusp = resp->status;
7219
7220 if (args->mode == EXCLUSIVE4)
7221 rfs4_enable_delegation();
7222 }
7223
7224 /*ARGSUSED*/
7225 static void
rfs4_do_openprev(struct compound_state * cs,struct svc_req * req,OPEN4args * args,rfs4_openowner_t * oo,OPEN4res * resp)7226 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
7227 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7228 {
7229 change_info4 *cinfo = &resp->cinfo;
7230 vattr_t va;
7231 vtype_t v_type = cs->vp->v_type;
7232 int error = 0;
7233
7234 /* Verify that we have a regular file */
7235 if (v_type != VREG) {
7236 if (v_type == VDIR)
7237 resp->status = NFS4ERR_ISDIR;
7238 else if (v_type == VLNK)
7239 resp->status = NFS4ERR_SYMLINK;
7240 else
7241 resp->status = NFS4ERR_INVAL;
7242 return;
7243 }
7244
7245 va.va_mask = AT_MODE|AT_UID;
7246 error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
7247 if (error) {
7248 resp->status = puterrno4(error);
7249 return;
7250 }
7251
7252 cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
7253
7254 /*
7255 * Check if we have access to the file, Note the the file
7256 * could have originally been open UNCHECKED or GUARDED
7257 * with mode bits that will now fail, but there is nothing
7258 * we can really do about that except in the case that the
7259 * owner of the file is the one requesting the open.
7260 */
7261 if (crgetuid(cs->cr) != va.va_uid) {
7262 resp->status = check_open_access(args->share_access, cs, req);
7263 if (resp->status != NFS4_OK) {
7264 return;
7265 }
7266 }
7267
7268 /*
7269 * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
7270 */
7271 cinfo->before = 0;
7272 cinfo->after = 0;
7273 cinfo->atomic = FALSE;
7274
7275 rfs4_do_open(cs, req, oo,
7276 NFS4_DELEG4TYPE2REQTYPE(args->claim.open_claim4_u.delegate_type),
7277 args->share_access, args->share_deny, resp, 0);
7278 }
7279
7280 static void
rfs4_do_opendelcur(struct compound_state * cs,struct svc_req * req,OPEN4args * args,rfs4_openowner_t * oo,OPEN4res * resp)7281 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
7282 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7283 {
7284 int error;
7285 nfsstat4 status;
7286 stateid4 stateid =
7287 args->claim.open_claim4_u.delegate_cur_info.delegate_stateid;
7288 rfs4_deleg_state_t *dsp;
7289
7290 /*
7291 * Find the state info from the stateid and confirm that the
7292 * file is delegated. If the state openowner is the same as
7293 * the supplied openowner we're done. If not, get the file
7294 * info from the found state info. Use that file info to
7295 * create the state for this lock owner. Note solaris doen't
7296 * really need the pathname to find the file. We may want to
7297 * lookup the pathname and make sure that the vp exist and
7298 * matches the vp in the file structure. However it is
7299 * possible that the pathname nolonger exists (local process
7300 * unlinks the file), so this may not be that useful.
7301 */
7302
7303 status = rfs4_get_deleg_state(&stateid, &dsp);
7304 if (status != NFS4_OK) {
7305 resp->status = status;
7306 return;
7307 }
7308
7309 ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
7310
7311 /*
7312 * New lock owner, create state. Since this was probably called
7313 * in response to a CB_RECALL we set deleg to DELEG_NONE
7314 */
7315
7316 ASSERT(cs->vp != NULL);
7317 VN_RELE(cs->vp);
7318 VN_HOLD(dsp->rds_finfo->rf_vp);
7319 cs->vp = dsp->rds_finfo->rf_vp;
7320
7321 error = makefh4(&cs->fh, cs->vp, cs->exi);
7322 if (error != 0) {
7323 rfs4_deleg_state_rele(dsp);
7324 *cs->statusp = resp->status = puterrno4(error);
7325 return;
7326 }
7327
7328 /* Mark progress for delegation returns */
7329 dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7330 rfs4_deleg_state_rele(dsp);
7331 rfs4_do_open(cs, req, oo, DELEG_NONE,
7332 args->share_access, args->share_deny, resp, 1);
7333 }
7334
7335 /*ARGSUSED*/
7336 static void
rfs4_do_opendelprev(struct compound_state * cs,struct svc_req * req,OPEN4args * args,rfs4_openowner_t * oo,OPEN4res * resp)7337 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7338 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7339 {
7340 /*
7341 * Lookup the pathname, it must already exist since this file
7342 * was delegated.
7343 *
7344 * Find the file and state info for this vp and open owner pair.
7345 * check that they are in fact delegated.
7346 * check that the state access and deny modes are the same.
7347 *
7348 * Return the delgation possibly seting the recall flag.
7349 */
7350 rfs4_file_t *fp;
7351 rfs4_state_t *sp;
7352 bool_t create = FALSE;
7353 bool_t dcreate = FALSE;
7354 rfs4_deleg_state_t *dsp;
7355 nfsace4 *ace;
7356
7357 /* Note we ignore oflags */
7358 resp->status = rfs4_lookupfile(
7359 &args->claim.open_claim4_u.file_delegate_prev,
7360 req, cs, args->share_access, &resp->cinfo);
7361
7362 if (resp->status != NFS4_OK) {
7363 return;
7364 }
7365
7366 /* get the file struct and hold a lock on it during initial open */
7367 fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7368 if (fp == NULL) {
7369 resp->status = NFS4ERR_RESOURCE;
7370 DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7371 return;
7372 }
7373
7374 sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7375 if (sp == NULL) {
7376 resp->status = NFS4ERR_SERVERFAULT;
7377 DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7378 rw_exit(&fp->rf_file_rwlock);
7379 rfs4_file_rele(fp);
7380 return;
7381 }
7382
7383 rfs4_dbe_lock(sp->rs_dbe);
7384 rfs4_dbe_lock(fp->rf_dbe);
7385 if (args->share_access != sp->rs_share_access ||
7386 args->share_deny != sp->rs_share_deny ||
7387 sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7388 NFS4_DEBUG(rfs4_debug,
7389 (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7390 rfs4_dbe_unlock(fp->rf_dbe);
7391 rfs4_dbe_unlock(sp->rs_dbe);
7392 rfs4_file_rele(fp);
7393 rfs4_state_rele(sp);
7394 resp->status = NFS4ERR_SERVERFAULT;
7395 return;
7396 }
7397 rfs4_dbe_unlock(fp->rf_dbe);
7398 rfs4_dbe_unlock(sp->rs_dbe);
7399
7400 dsp = rfs4_finddeleg(sp, &dcreate);
7401 if (dsp == NULL) {
7402 rfs4_state_rele(sp);
7403 rfs4_file_rele(fp);
7404 resp->status = NFS4ERR_SERVERFAULT;
7405 return;
7406 }
7407
7408 next_stateid(&sp->rs_stateid);
7409
7410 resp->stateid = sp->rs_stateid.stateid;
7411
7412 resp->delegation.delegation_type = dsp->rds_dtype;
7413
7414 if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7415 open_read_delegation4 *rv =
7416 &resp->delegation.open_delegation4_u.read;
7417
7418 rv->stateid = dsp->rds_delegid.stateid;
7419 rv->recall = FALSE; /* no policy in place to set to TRUE */
7420 ace = &rv->permissions;
7421 } else {
7422 open_write_delegation4 *rv =
7423 &resp->delegation.open_delegation4_u.write;
7424
7425 rv->stateid = dsp->rds_delegid.stateid;
7426 rv->recall = FALSE; /* no policy in place to set to TRUE */
7427 ace = &rv->permissions;
7428 rv->space_limit.limitby = NFS_LIMIT_SIZE;
7429 rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7430 }
7431
7432 /* XXX For now */
7433 ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7434 ace->flag = 0;
7435 ace->access_mask = 0;
7436 ace->who.utf8string_len = 0;
7437 ace->who.utf8string_val = 0;
7438
7439 rfs4_deleg_state_rele(dsp);
7440 rfs4_state_rele(sp);
7441 rfs4_file_rele(fp);
7442 }
7443
7444 typedef enum {
7445 NFS4_CHKSEQ_OKAY = 0,
7446 NFS4_CHKSEQ_REPLAY = 1,
7447 NFS4_CHKSEQ_BAD = 2
7448 } rfs4_chkseq_t;
7449
7450 /*
7451 * Generic function for sequence number checks.
7452 */
7453 static rfs4_chkseq_t
rfs4_check_seqid(seqid4 seqid,nfs_resop4 * lastop,seqid4 rqst_seq,nfs_resop4 * resop,bool_t copyres)7454 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7455 seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7456 {
7457 /* Same sequence ids and matching operations? */
7458 if (seqid == rqst_seq && resop->resop == lastop->resop) {
7459 if (copyres == TRUE) {
7460 rfs4_free_reply(resop);
7461 rfs4_copy_reply(resop, lastop);
7462 }
7463 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7464 "Replayed SEQID %d\n", seqid));
7465 return (NFS4_CHKSEQ_REPLAY);
7466 }
7467
7468 /* If the incoming sequence is not the next expected then it is bad */
7469 if (rqst_seq != seqid + 1) {
7470 if (rqst_seq == seqid) {
7471 NFS4_DEBUG(rfs4_debug,
7472 (CE_NOTE, "BAD SEQID: Replayed sequence id "
7473 "but last op was %d current op is %d\n",
7474 lastop->resop, resop->resop));
7475 return (NFS4_CHKSEQ_BAD);
7476 }
7477 NFS4_DEBUG(rfs4_debug,
7478 (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7479 rqst_seq, seqid));
7480 return (NFS4_CHKSEQ_BAD);
7481 }
7482
7483 /* Everything okay -- next expected */
7484 return (NFS4_CHKSEQ_OKAY);
7485 }
7486
7487
7488 static rfs4_chkseq_t
rfs4_check_open_seqid(seqid4 seqid,rfs4_openowner_t * op,nfs_resop4 * resop,const compound_state_t * cs)7489 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop,
7490 const compound_state_t *cs)
7491 {
7492 rfs4_chkseq_t rc;
7493
7494 if (rfs4_has_session(cs))
7495 return (NFS4_CHKSEQ_OKAY);
7496
7497 rfs4_dbe_lock(op->ro_dbe);
7498 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7499 TRUE);
7500 rfs4_dbe_unlock(op->ro_dbe);
7501
7502 if (rc == NFS4_CHKSEQ_OKAY)
7503 rfs4_update_lease(op->ro_client);
7504
7505 return (rc);
7506 }
7507
7508 static rfs4_chkseq_t
rfs4_check_olo_seqid(seqid4 olo_seqid,rfs4_openowner_t * op,nfs_resop4 * resop)7509 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7510 {
7511 rfs4_chkseq_t rc;
7512
7513 rfs4_dbe_lock(op->ro_dbe);
7514 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7515 olo_seqid, resop, FALSE);
7516 rfs4_dbe_unlock(op->ro_dbe);
7517
7518 return (rc);
7519 }
7520
7521 static rfs4_chkseq_t
rfs4_check_lock_seqid(seqid4 seqid,rfs4_lo_state_t * lsp,nfs_resop4 * resop)7522 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7523 {
7524 rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7525
7526 rfs4_dbe_lock(lsp->rls_dbe);
7527 if (!lsp->rls_skip_seqid_check)
7528 rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7529 resop, TRUE);
7530 rfs4_dbe_unlock(lsp->rls_dbe);
7531
7532 return (rc);
7533 }
7534
7535 static void
rfs4_op_open(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)7536 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7537 struct svc_req *req, struct compound_state *cs)
7538 {
7539 OPEN4args *args = &argop->nfs_argop4_u.opopen;
7540 OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7541 open_owner4 *owner = &args->owner;
7542 open_claim_type4 claim = args->claim.claim;
7543 rfs4_client_t *cp;
7544 rfs4_openowner_t *oo;
7545 bool_t create;
7546 bool_t replay = FALSE;
7547 int can_reclaim;
7548
7549 DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7550 OPEN4args *, args);
7551
7552 if (cs->vp == NULL) {
7553 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7554 goto end;
7555 }
7556
7557 /* rfc5661 section 18.16.3 */
7558 if (rfs4_has_session(cs))
7559 owner->clientid = cs->client->rc_clientid;
7560
7561 /*
7562 * Need to check clientid and lease expiration first based on
7563 * error ordering and incrementing sequence id.
7564 */
7565 cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7566 if (cp == NULL) {
7567 *cs->statusp = resp->status =
7568 rfs4_check_clientid(&owner->clientid, 0);
7569 goto end;
7570 }
7571
7572 if (rfs4_lease_expired(cp)) {
7573 rfs4_client_close(cp);
7574 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7575 goto end;
7576 }
7577 can_reclaim = cp->rc_can_reclaim;
7578
7579 /*
7580 * Find the open_owner for use from this point forward. Take
7581 * care in updating the sequence id based on the type of error
7582 * being returned.
7583 */
7584 retry:
7585 create = TRUE;
7586 oo = rfs4_findopenowner(owner, &create, args->seqid);
7587 if (oo == NULL) {
7588 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
7589 rfs4_client_rele(cp);
7590 goto end;
7591 }
7592
7593 /*
7594 * OPEN_CONFIRM must not be implemented in v4.1
7595 */
7596 if (rfs4_has_session(cs)) {
7597 oo->ro_need_confirm = FALSE;
7598 }
7599
7600 /* Hold off access to the sequence space while the open is done */
7601 /* Workaround to avoid deadlock */
7602 if (!rfs4_has_session(cs))
7603 rfs4_sw_enter(&oo->ro_sw);
7604
7605 /*
7606 * If the open_owner existed before at the server, then check
7607 * the sequence id.
7608 */
7609 if (!create && !oo->ro_postpone_confirm) {
7610 switch (rfs4_check_open_seqid(args->seqid, oo, resop, cs)) {
7611 case NFS4_CHKSEQ_BAD:
7612 ASSERT(!rfs4_has_session(cs));
7613 if ((args->seqid > oo->ro_open_seqid) &&
7614 oo->ro_need_confirm) {
7615 rfs4_free_opens(oo, TRUE, FALSE);
7616 rfs4_sw_exit(&oo->ro_sw);
7617 rfs4_openowner_rele(oo);
7618 goto retry;
7619 }
7620 resp->status = NFS4ERR_BAD_SEQID;
7621 goto out;
7622 case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7623 replay = TRUE;
7624 goto out;
7625 default:
7626 break;
7627 }
7628
7629 /*
7630 * Sequence was ok and open owner exists
7631 * check to see if we have yet to see an
7632 * open_confirm.
7633 */
7634 if (oo->ro_need_confirm) {
7635 rfs4_free_opens(oo, TRUE, FALSE);
7636 ASSERT(!rfs4_has_session(cs));
7637 rfs4_sw_exit(&oo->ro_sw);
7638 rfs4_openowner_rele(oo);
7639 goto retry;
7640 }
7641 }
7642 /* Grace only applies to regular-type OPENs */
7643 if (rfs4_clnt_in_grace(cp) &&
7644 (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR ||
7645 claim == CLAIM_FH)) {
7646 *cs->statusp = resp->status = NFS4ERR_GRACE;
7647 goto out;
7648 }
7649
7650 /*
7651 * If previous state at the server existed then can_reclaim
7652 * will be set. If not reply NFS4ERR_NO_GRACE to the
7653 * client.
7654 */
7655 if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7656 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7657 goto out;
7658 }
7659
7660
7661 /*
7662 * Reject the open if the client has missed the grace period
7663 */
7664 if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7665 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7666 goto out;
7667 }
7668
7669 /* Couple of up-front bookkeeping items */
7670 if (oo->ro_need_confirm) {
7671 /*
7672 * If this is a reclaim OPEN then we should not ask
7673 * for a confirmation of the open_owner per the
7674 * protocol specification.
7675 */
7676 if (claim == CLAIM_PREVIOUS)
7677 oo->ro_need_confirm = FALSE;
7678 else
7679 resp->rflags |= OPEN4_RESULT_CONFIRM;
7680 }
7681 resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7682
7683 /*
7684 * If there is an unshared filesystem mounted on this vnode,
7685 * do not allow to open/create in this directory.
7686 */
7687 if (vn_ismntpt(cs->vp)) {
7688 *cs->statusp = resp->status = NFS4ERR_ACCESS;
7689 goto out;
7690 }
7691
7692 /*
7693 * access must READ, WRITE, or BOTH. No access is invalid.
7694 * deny can be READ, WRITE, BOTH, or NONE.
7695 * bits not defined for access/deny are invalid.
7696 */
7697 if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7698 (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7699 (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7700 *cs->statusp = resp->status = NFS4ERR_INVAL;
7701 goto out;
7702 }
7703
7704
7705 /*
7706 * make sure attrset is zero before response is built.
7707 */
7708 resp->attrset = 0;
7709
7710 switch (claim) {
7711 case CLAIM_NULL:
7712 rfs4_do_opennull(cs, req, args, oo, resp);
7713 break;
7714 case CLAIM_PREVIOUS:
7715 rfs4_do_openprev(cs, req, args, oo, resp);
7716 break;
7717 case CLAIM_DELEGATE_CUR:
7718 rfs4_do_opendelcur(cs, req, args, oo, resp);
7719 break;
7720 case CLAIM_DELEGATE_PREV:
7721 rfs4_do_opendelprev(cs, req, args, oo, resp);
7722 break;
7723 case CLAIM_FH:
7724 rfs4_do_openfh(cs, req, args, oo, resp);
7725 break;
7726 default:
7727 resp->status = NFS4ERR_INVAL;
7728 break;
7729 }
7730
7731 out:
7732 rfs4_client_rele(cp);
7733
7734 /* Catch sequence id handling here to make it a little easier */
7735 switch (resp->status) {
7736 case NFS4ERR_BADXDR:
7737 case NFS4ERR_BAD_SEQID:
7738 case NFS4ERR_BAD_STATEID:
7739 case NFS4ERR_NOFILEHANDLE:
7740 case NFS4ERR_RESOURCE:
7741 case NFS4ERR_STALE_CLIENTID:
7742 case NFS4ERR_STALE_STATEID:
7743 /*
7744 * The protocol states that if any of these errors are
7745 * being returned, the sequence id should not be
7746 * incremented. Any other return requires an
7747 * increment.
7748 */
7749 break;
7750 default:
7751 /* Always update the lease in this case */
7752 rfs4_update_lease(oo->ro_client);
7753
7754 /* Regular response - copy the result */
7755 if (!replay)
7756 rfs4_update_open_resp(oo, resop, &cs->fh);
7757
7758 /*
7759 * REPLAY case: Only if the previous response was OK
7760 * do we copy the filehandle. If not OK, no
7761 * filehandle to copy.
7762 */
7763 if (replay == TRUE &&
7764 resp->status == NFS4_OK &&
7765 oo->ro_reply_fh.nfs_fh4_val) {
7766 /*
7767 * If this is a replay, we must restore the
7768 * current filehandle/vp to that of what was
7769 * returned originally. Try our best to do
7770 * it.
7771 */
7772 nfs_fh4_fmt_t *fh_fmtp =
7773 (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7774
7775 cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7776 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7777
7778 if (cs->exi == NULL) {
7779 resp->status = NFS4ERR_STALE;
7780 goto finish;
7781 }
7782
7783 VN_RELE(cs->vp);
7784
7785 cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7786 &resp->status);
7787
7788 if (cs->vp == NULL)
7789 goto finish;
7790
7791 nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7792 }
7793
7794 /*
7795 * If this was a replay, no need to update the
7796 * sequence id. If the open_owner was not created on
7797 * this pass, then update. The first use of an
7798 * open_owner will not bump the sequence id.
7799 */
7800 if (replay == FALSE && !create)
7801 rfs4_update_open_sequence(oo);
7802 /*
7803 * If the client is receiving an error and the
7804 * open_owner needs to be confirmed, there is no way
7805 * to notify the client of this fact ignoring the fact
7806 * that the server has no method of returning a
7807 * stateid to confirm. Therefore, the server needs to
7808 * mark this open_owner in a way as to avoid the
7809 * sequence id checking the next time the client uses
7810 * this open_owner.
7811 */
7812 if (resp->status != NFS4_OK && oo->ro_need_confirm)
7813 oo->ro_postpone_confirm = TRUE;
7814 /*
7815 * If OK response then clear the postpone flag and
7816 * reset the sequence id to keep in sync with the
7817 * client.
7818 */
7819 if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7820 oo->ro_postpone_confirm = FALSE;
7821 oo->ro_open_seqid = args->seqid;
7822 }
7823 break;
7824 }
7825
7826 finish:
7827 *cs->statusp = resp->status;
7828
7829 if (!rfs4_has_session(cs))
7830 rfs4_sw_exit(&oo->ro_sw);
7831 rfs4_openowner_rele(oo);
7832
7833 end:
7834 DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7835 OPEN4res *, resp);
7836 }
7837
7838 /*ARGSUSED*/
7839 void
rfs4_op_open_confirm(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)7840 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7841 struct svc_req *req, struct compound_state *cs)
7842 {
7843 OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7844 OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7845 rfs4_state_t *sp;
7846 nfsstat4 status;
7847
7848 DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7849 OPEN_CONFIRM4args *, args);
7850
7851 ASSERT(!rfs4_has_session(cs));
7852
7853 if (cs->vp == NULL) {
7854 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7855 goto out;
7856 }
7857
7858 if (cs->vp->v_type != VREG) {
7859 *cs->statusp = resp->status =
7860 cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL;
7861 return;
7862 }
7863
7864 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7865 if (status != NFS4_OK) {
7866 *cs->statusp = resp->status = status;
7867 goto out;
7868 }
7869
7870 /* Ensure specified filehandle matches */
7871 if (cs->vp != sp->rs_finfo->rf_vp) {
7872 rfs4_state_rele(sp);
7873 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7874 goto out;
7875 }
7876
7877 /* hold off other access to open_owner while we tinker */
7878 rfs4_sw_enter(&sp->rs_owner->ro_sw);
7879
7880 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid, cs)) {
7881 case NFS4_CHECK_STATEID_OKAY:
7882 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7883 resop, cs) != 0) {
7884 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7885 break;
7886 }
7887 /*
7888 * If it is the appropriate stateid and determined to
7889 * be "OKAY" then this means that the stateid does not
7890 * need to be confirmed and the client is in error for
7891 * sending an OPEN_CONFIRM.
7892 */
7893 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7894 break;
7895 case NFS4_CHECK_STATEID_OLD:
7896 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7897 break;
7898 case NFS4_CHECK_STATEID_BAD:
7899 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7900 break;
7901 case NFS4_CHECK_STATEID_EXPIRED:
7902 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7903 break;
7904 case NFS4_CHECK_STATEID_CLOSED:
7905 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7906 break;
7907 case NFS4_CHECK_STATEID_REPLAY:
7908 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7909 resop, cs)) {
7910 case NFS4_CHKSEQ_OKAY:
7911 /*
7912 * This is replayed stateid; if seqid matches
7913 * next expected, then client is using wrong seqid.
7914 */
7915 /* fall through */
7916 case NFS4_CHKSEQ_BAD:
7917 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7918 break;
7919 case NFS4_CHKSEQ_REPLAY:
7920 /*
7921 * Note this case is the duplicate case so
7922 * resp->status is already set.
7923 */
7924 *cs->statusp = resp->status;
7925 rfs4_update_lease(sp->rs_owner->ro_client);
7926 break;
7927 }
7928 break;
7929 case NFS4_CHECK_STATEID_UNCONFIRMED:
7930 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7931 resop, cs) != NFS4_CHKSEQ_OKAY) {
7932 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7933 break;
7934 }
7935 *cs->statusp = resp->status = NFS4_OK;
7936
7937 next_stateid(&sp->rs_stateid);
7938 resp->open_stateid = sp->rs_stateid.stateid;
7939 sp->rs_owner->ro_need_confirm = FALSE;
7940 rfs4_update_lease(sp->rs_owner->ro_client);
7941 rfs4_update_open_sequence(sp->rs_owner);
7942 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7943 break;
7944 default:
7945 ASSERT(FALSE);
7946 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7947 break;
7948 }
7949 rfs4_sw_exit(&sp->rs_owner->ro_sw);
7950 rfs4_state_rele(sp);
7951
7952 out:
7953 DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7954 OPEN_CONFIRM4res *, resp);
7955 }
7956
7957 /*ARGSUSED*/
7958 void
rfs4_op_open_downgrade(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)7959 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7960 struct svc_req *req, struct compound_state *cs)
7961 {
7962 OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7963 OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7964 uint32_t access = args->share_access;
7965 uint32_t deny = args->share_deny;
7966 nfsstat4 status;
7967 rfs4_state_t *sp;
7968 rfs4_file_t *fp;
7969 int fflags = 0;
7970
7971 DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7972 OPEN_DOWNGRADE4args *, args);
7973
7974 if (cs->vp == NULL) {
7975 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7976 goto out;
7977 }
7978
7979 if (cs->vp->v_type != VREG) {
7980 *cs->statusp = resp->status = NFS4ERR_INVAL;
7981 return;
7982 }
7983
7984 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7985 if (status != NFS4_OK) {
7986 *cs->statusp = resp->status = status;
7987 goto out;
7988 }
7989
7990 /* Ensure specified filehandle matches */
7991 if (cs->vp != sp->rs_finfo->rf_vp) {
7992 rfs4_state_rele(sp);
7993 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7994 goto out;
7995 }
7996
7997 /* hold off other access to open_owner while we tinker */
7998 rfs4_sw_enter(&sp->rs_owner->ro_sw);
7999
8000 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid, cs)) {
8001 case NFS4_CHECK_STATEID_OKAY:
8002 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8003 resop, cs) != NFS4_CHKSEQ_OKAY) {
8004 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8005 goto end;
8006 }
8007 break;
8008 case NFS4_CHECK_STATEID_OLD:
8009 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8010 goto end;
8011 case NFS4_CHECK_STATEID_BAD:
8012 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8013 goto end;
8014 case NFS4_CHECK_STATEID_EXPIRED:
8015 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8016 goto end;
8017 case NFS4_CHECK_STATEID_CLOSED:
8018 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8019 goto end;
8020 case NFS4_CHECK_STATEID_UNCONFIRMED:
8021 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8022 goto end;
8023 case NFS4_CHECK_STATEID_REPLAY:
8024 ASSERT(!rfs4_has_session(cs));
8025
8026 /* Check the sequence id for the open owner */
8027 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8028 resop, cs)) {
8029 case NFS4_CHKSEQ_OKAY:
8030 /*
8031 * This is replayed stateid; if seqid matches
8032 * next expected, then client is using wrong seqid.
8033 */
8034 /* fall through */
8035 case NFS4_CHKSEQ_BAD:
8036 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8037 goto end;
8038 case NFS4_CHKSEQ_REPLAY:
8039 /*
8040 * Note this case is the duplicate case so
8041 * resp->status is already set.
8042 */
8043 *cs->statusp = resp->status;
8044 rfs4_update_lease(sp->rs_owner->ro_client);
8045 goto end;
8046 }
8047 break;
8048 default:
8049 ASSERT(FALSE);
8050 break;
8051 }
8052
8053 rfs4_dbe_lock(sp->rs_dbe);
8054 /*
8055 * Check that the new access modes and deny modes are valid.
8056 * Check that no invalid bits are set.
8057 */
8058 if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
8059 (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
8060 *cs->statusp = resp->status = NFS4ERR_INVAL;
8061 rfs4_update_open_sequence(sp->rs_owner);
8062 rfs4_dbe_unlock(sp->rs_dbe);
8063 goto end;
8064 }
8065
8066 /*
8067 * The new modes must be a subset of the current modes and
8068 * the access must specify at least one mode. To test that
8069 * the new mode is a subset of the current modes we bitwise
8070 * AND them together and check that the result equals the new
8071 * mode. For example:
8072 * New mode, access == R and current mode, sp->rs_open_access == RW
8073 * access & sp->rs_open_access == R == access, so the new access mode
8074 * is valid. Consider access == RW, sp->rs_open_access = R
8075 * access & sp->rs_open_access == R != access, so the new access mode
8076 * is invalid.
8077 */
8078 if ((access & sp->rs_open_access) != access ||
8079 (deny & sp->rs_open_deny) != deny ||
8080 (access &
8081 (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
8082 *cs->statusp = resp->status = NFS4ERR_INVAL;
8083 rfs4_update_open_sequence(sp->rs_owner);
8084 rfs4_dbe_unlock(sp->rs_dbe);
8085 goto end;
8086 }
8087
8088 /*
8089 * Release any share locks associated with this stateID.
8090 * Strictly speaking, this violates the spec because the
8091 * spec effectively requires that open downgrade be atomic.
8092 * At present, fs_shrlock does not have this capability.
8093 */
8094 (void) rfs4_unshare(sp);
8095
8096 status = rfs4_share(sp, access, deny);
8097 if (status != NFS4_OK) {
8098 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8099 rfs4_update_open_sequence(sp->rs_owner);
8100 rfs4_dbe_unlock(sp->rs_dbe);
8101 goto end;
8102 }
8103
8104 fp = sp->rs_finfo;
8105 rfs4_dbe_lock(fp->rf_dbe);
8106
8107 /*
8108 * If the current mode has deny read and the new mode
8109 * does not, decrement the number of deny read mode bits
8110 * and if it goes to zero turn off the deny read bit
8111 * on the file.
8112 */
8113 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
8114 (deny & OPEN4_SHARE_DENY_READ) == 0) {
8115 fp->rf_deny_read--;
8116 if (fp->rf_deny_read == 0)
8117 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8118 }
8119
8120 /*
8121 * If the current mode has deny write and the new mode
8122 * does not, decrement the number of deny write mode bits
8123 * and if it goes to zero turn off the deny write bit
8124 * on the file.
8125 */
8126 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
8127 (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
8128 fp->rf_deny_write--;
8129 if (fp->rf_deny_write == 0)
8130 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8131 }
8132
8133 /*
8134 * If the current mode has access read and the new mode
8135 * does not, decrement the number of access read mode bits
8136 * and if it goes to zero turn off the access read bit
8137 * on the file. set fflags to FREAD for the call to
8138 * vn_open_downgrade().
8139 */
8140 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
8141 (access & OPEN4_SHARE_ACCESS_READ) == 0) {
8142 fp->rf_access_read--;
8143 if (fp->rf_access_read == 0)
8144 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8145 fflags |= FREAD;
8146 }
8147
8148 /*
8149 * If the current mode has access write and the new mode
8150 * does not, decrement the number of access write mode bits
8151 * and if it goes to zero turn off the access write bit
8152 * on the file. set fflags to FWRITE for the call to
8153 * vn_open_downgrade().
8154 */
8155 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
8156 (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8157 fp->rf_access_write--;
8158 if (fp->rf_access_write == 0)
8159 fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
8160 fflags |= FWRITE;
8161 }
8162
8163 /* Check that the file is still accessible */
8164 ASSERT(fp->rf_share_access);
8165
8166 rfs4_dbe_unlock(fp->rf_dbe);
8167
8168 /* now set the new open access and deny modes */
8169 sp->rs_open_access = access;
8170 sp->rs_open_deny = deny;
8171
8172 /*
8173 * we successfully downgraded the share lock, now we need to downgrade
8174 * the open. it is possible that the downgrade was only for a deny
8175 * mode and we have nothing else to do.
8176 */
8177 if ((fflags & (FREAD|FWRITE)) != 0)
8178 vn_open_downgrade(cs->vp, fflags);
8179
8180 /* Update the stateid */
8181 next_stateid(&sp->rs_stateid);
8182 resp->open_stateid = sp->rs_stateid.stateid;
8183
8184 rfs4_dbe_unlock(sp->rs_dbe);
8185
8186 *cs->statusp = resp->status = NFS4_OK;
8187 /* Update the lease */
8188 rfs4_update_lease(sp->rs_owner->ro_client);
8189 /* And the sequence */
8190 rfs4_update_open_sequence(sp->rs_owner);
8191 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8192
8193 end:
8194 rfs4_sw_exit(&sp->rs_owner->ro_sw);
8195 rfs4_state_rele(sp);
8196 out:
8197 DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
8198 OPEN_DOWNGRADE4res *, resp);
8199 }
8200
8201 static void *
memstr(const void * s1,const char * s2,size_t n)8202 memstr(const void *s1, const char *s2, size_t n)
8203 {
8204 size_t l = strlen(s2);
8205 char *p = (char *)s1;
8206
8207 while (n >= l) {
8208 if (bcmp(p, s2, l) == 0)
8209 return (p);
8210 p++;
8211 n--;
8212 }
8213
8214 return (NULL);
8215 }
8216
8217 /*
8218 * The logic behind this function is detailed in the NFSv4 RFC in the
8219 * SETCLIENTID operation description under IMPLEMENTATION. Refer to
8220 * that section for explicit guidance to server behavior for
8221 * SETCLIENTID.
8222 */
8223 void
rfs4_op_setclientid(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)8224 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
8225 struct svc_req *req, struct compound_state *cs)
8226 {
8227 SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
8228 SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
8229 rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
8230 rfs4_clntip_t *ci;
8231 bool_t create;
8232 char *addr, *netid;
8233 int len;
8234
8235 DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
8236 SETCLIENTID4args *, args);
8237 retry:
8238 newcp = cp_confirmed = cp_unconfirmed = NULL;
8239
8240 /*
8241 * Save the caller's IP address
8242 */
8243 args->client.cl_addr =
8244 (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
8245
8246 /*
8247 * Record if it is a Solaris client that cannot handle referrals.
8248 */
8249 if (memstr(args->client.id_val, "Solaris", args->client.id_len) &&
8250 !memstr(args->client.id_val, "+referrals", args->client.id_len)) {
8251 /* Add a "yes, it's downrev" record */
8252 create = TRUE;
8253 ci = rfs4_find_clntip(args->client.cl_addr, &create);
8254 ASSERT(ci != NULL);
8255 rfs4_dbe_rele(ci->ri_dbe);
8256 } else {
8257 /* Remove any previous record */
8258 rfs4_invalidate_clntip(args->client.cl_addr);
8259 }
8260
8261 /*
8262 * In search of an EXISTING client matching the incoming
8263 * request to establish a new client identifier at the server
8264 */
8265 create = TRUE;
8266 cp = rfs4_findclient(&args->client, &create, NULL);
8267
8268 /* Should never happen */
8269 ASSERT(cp != NULL);
8270
8271 if (cp == NULL) {
8272 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8273 goto out;
8274 }
8275
8276 /*
8277 * Easiest case. Client identifier is newly created and is
8278 * unconfirmed. Also note that for this case, no other
8279 * entries exist for the client identifier. Nothing else to
8280 * check. Just setup the response and respond.
8281 */
8282 if (create) {
8283 *cs->statusp = res->status = NFS4_OK;
8284 res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
8285 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8286 cp->rc_confirm_verf;
8287 /* Setup callback information; CB_NULL confirmation later */
8288 rfs4_client_setcb(cp, &args->callback, args->callback_ident);
8289
8290 rfs4_client_rele(cp);
8291 goto out;
8292 }
8293
8294 /*
8295 * An existing, confirmed client may exist but it may not have
8296 * been active for at least one lease period. If so, then
8297 * "close" the client and create a new client identifier
8298 */
8299 if (rfs4_lease_expired(cp)) {
8300 rfs4_client_close(cp);
8301 goto retry;
8302 }
8303
8304 if (cp->rc_need_confirm == TRUE)
8305 cp_unconfirmed = cp;
8306 else
8307 cp_confirmed = cp;
8308
8309 cp = NULL;
8310
8311 /*
8312 * We have a confirmed client, now check for an
8313 * unconfimred entry
8314 */
8315 if (cp_confirmed) {
8316 /* If creds don't match then client identifier is inuse */
8317 if (!creds_ok(&cp_confirmed->rc_cr_set, req, cs)) {
8318 rfs4_cbinfo_t *cbp;
8319 /*
8320 * Some one else has established this client
8321 * id. Try and say * who they are. We will use
8322 * the call back address supplied by * the
8323 * first client.
8324 */
8325 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8326
8327 addr = netid = NULL;
8328
8329 cbp = &cp_confirmed->rc_cbinfo;
8330 if (cbp->cb_callback.cb_location.r_addr &&
8331 cbp->cb_callback.cb_location.r_netid) {
8332 cb_client4 *cbcp = &cbp->cb_callback;
8333
8334 len = strlen(cbcp->cb_location.r_addr)+1;
8335 addr = kmem_alloc(len, KM_SLEEP);
8336 bcopy(cbcp->cb_location.r_addr, addr, len);
8337 len = strlen(cbcp->cb_location.r_netid)+1;
8338 netid = kmem_alloc(len, KM_SLEEP);
8339 bcopy(cbcp->cb_location.r_netid, netid, len);
8340 }
8341
8342 res->SETCLIENTID4res_u.client_using.r_addr = addr;
8343 res->SETCLIENTID4res_u.client_using.r_netid = netid;
8344
8345 rfs4_client_rele(cp_confirmed);
8346 }
8347
8348 /*
8349 * Confirmed, creds match, and verifier matches; must
8350 * be an update of the callback info
8351 */
8352 if (cp_confirmed->rc_nfs_client.verifier ==
8353 args->client.verifier) {
8354 /* Setup callback information */
8355 rfs4_client_setcb(cp_confirmed, &args->callback,
8356 args->callback_ident);
8357
8358 /* everything okay -- move ahead */
8359 *cs->statusp = res->status = NFS4_OK;
8360 res->SETCLIENTID4res_u.resok4.clientid =
8361 cp_confirmed->rc_clientid;
8362
8363 /* update the confirm_verifier and return it */
8364 rfs4_client_scv_next(cp_confirmed);
8365 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8366 cp_confirmed->rc_confirm_verf;
8367
8368 rfs4_client_rele(cp_confirmed);
8369 goto out;
8370 }
8371
8372 /*
8373 * Creds match but the verifier doesn't. Must search
8374 * for an unconfirmed client that would be replaced by
8375 * this request.
8376 */
8377 create = FALSE;
8378 cp_unconfirmed = rfs4_findclient(&args->client, &create,
8379 cp_confirmed);
8380 }
8381
8382 /*
8383 * At this point, we have taken care of the brand new client
8384 * struct, INUSE case, update of an existing, and confirmed
8385 * client struct.
8386 */
8387
8388 /*
8389 * check to see if things have changed while we originally
8390 * picked up the client struct. If they have, then return and
8391 * retry the processing of this SETCLIENTID request.
8392 */
8393 if (cp_unconfirmed) {
8394 rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8395 if (!cp_unconfirmed->rc_need_confirm) {
8396 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8397 rfs4_client_rele(cp_unconfirmed);
8398 if (cp_confirmed)
8399 rfs4_client_rele(cp_confirmed);
8400 goto retry;
8401 }
8402 /* do away with the old unconfirmed one */
8403 rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8404 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8405 rfs4_client_rele(cp_unconfirmed);
8406 cp_unconfirmed = NULL;
8407 }
8408
8409 /*
8410 * This search will temporarily hide the confirmed client
8411 * struct while a new client struct is created as the
8412 * unconfirmed one.
8413 */
8414 create = TRUE;
8415 newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8416
8417 ASSERT(newcp != NULL);
8418
8419 if (newcp == NULL) {
8420 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8421 rfs4_client_rele(cp_confirmed);
8422 goto out;
8423 }
8424
8425 /*
8426 * If one was not created, then a similar request must be in
8427 * process so release and start over with this one
8428 */
8429 if (create != TRUE) {
8430 rfs4_client_rele(newcp);
8431 if (cp_confirmed)
8432 rfs4_client_rele(cp_confirmed);
8433 goto retry;
8434 }
8435
8436 *cs->statusp = res->status = NFS4_OK;
8437 res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8438 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8439 newcp->rc_confirm_verf;
8440 /* Setup callback information; CB_NULL confirmation later */
8441 rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8442
8443 newcp->rc_cp_confirmed = cp_confirmed;
8444
8445 rfs4_client_rele(newcp);
8446
8447 out:
8448 DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8449 SETCLIENTID4res *, res);
8450 }
8451
8452 /*ARGSUSED*/
8453 void
rfs4_op_setclientid_confirm(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)8454 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8455 struct svc_req *req, struct compound_state *cs)
8456 {
8457 SETCLIENTID_CONFIRM4args *args =
8458 &argop->nfs_argop4_u.opsetclientid_confirm;
8459 SETCLIENTID_CONFIRM4res *res =
8460 &resop->nfs_resop4_u.opsetclientid_confirm;
8461 rfs4_client_t *cp, *cptoclose = NULL;
8462 nfs4_srv_t *nsrv4;
8463
8464 DTRACE_NFSV4_2(op__setclientid__confirm__start,
8465 struct compound_state *, cs,
8466 SETCLIENTID_CONFIRM4args *, args);
8467
8468 nsrv4 = nfs4_get_srv();
8469 *cs->statusp = res->status = NFS4_OK;
8470
8471 cp = rfs4_findclient_by_id(args->clientid, TRUE);
8472
8473 if (cp == NULL) {
8474 *cs->statusp = res->status =
8475 rfs4_check_clientid(&args->clientid, 1);
8476 goto out;
8477 }
8478
8479 if (!creds_ok(&cp->rc_cr_set, req, cs)) {
8480 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8481 rfs4_client_rele(cp);
8482 goto out;
8483 }
8484
8485 /* If the verifier doesn't match, the record doesn't match */
8486 if (cp->rc_confirm_verf != args->setclientid_confirm) {
8487 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8488 rfs4_client_rele(cp);
8489 goto out;
8490 }
8491
8492 rfs4_dbe_lock(cp->rc_dbe);
8493 cp->rc_need_confirm = FALSE;
8494 if (cp->rc_cp_confirmed) {
8495 cptoclose = cp->rc_cp_confirmed;
8496 cptoclose->rc_ss_remove = 1;
8497 cp->rc_cp_confirmed = NULL;
8498 }
8499
8500 /*
8501 * Update the client's associated server instance, if it's changed
8502 * since the client was created.
8503 */
8504 if (rfs4_servinst(cp) != nsrv4->nfs4_cur_servinst)
8505 rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
8506
8507 /*
8508 * Record clientid in stable storage.
8509 * Must be done after server instance has been assigned.
8510 */
8511 rfs4_ss_clid(nsrv4, cp);
8512
8513 rfs4_dbe_unlock(cp->rc_dbe);
8514
8515 if (cptoclose)
8516 /* don't need to rele, client_close does it */
8517 rfs4_client_close(cptoclose);
8518
8519 /* If needed, initiate CB_NULL call for callback path */
8520 rfs4_deleg_cb_check(cp);
8521 rfs4_update_lease(cp);
8522
8523 /*
8524 * Check to see if client can perform reclaims
8525 */
8526 rfs4_ss_chkclid(nsrv4, cp);
8527
8528 rfs4_client_rele(cp);
8529
8530 out:
8531 DTRACE_NFSV4_2(op__setclientid__confirm__done,
8532 struct compound_state *, cs,
8533 SETCLIENTID_CONFIRM4 *, res);
8534 }
8535
8536
8537 /*ARGSUSED*/
8538 void
rfs4_op_close(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)8539 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8540 struct svc_req *req, struct compound_state *cs)
8541 {
8542 CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8543 CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8544 rfs4_state_t *sp;
8545 nfsstat4 status;
8546
8547 DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8548 CLOSE4args *, args);
8549
8550 if (cs->vp == NULL) {
8551 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8552 goto out;
8553 }
8554
8555 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8556 if (status != NFS4_OK) {
8557 *cs->statusp = resp->status = status;
8558 goto out;
8559 }
8560
8561 /* Ensure specified filehandle matches */
8562 if (cs->vp != sp->rs_finfo->rf_vp) {
8563 rfs4_state_rele(sp);
8564 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8565 goto out;
8566 }
8567
8568 /* hold off other access to open_owner while we tinker */
8569 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8570
8571 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid, cs)) {
8572 case NFS4_CHECK_STATEID_OKAY:
8573 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8574 resop, cs) != NFS4_CHKSEQ_OKAY) {
8575 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8576 goto end;
8577 }
8578 break;
8579 case NFS4_CHECK_STATEID_OLD:
8580 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8581 goto end;
8582 case NFS4_CHECK_STATEID_BAD:
8583 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8584 goto end;
8585 case NFS4_CHECK_STATEID_EXPIRED:
8586 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8587 goto end;
8588 case NFS4_CHECK_STATEID_CLOSED:
8589 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8590 goto end;
8591 case NFS4_CHECK_STATEID_UNCONFIRMED:
8592 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8593 goto end;
8594 case NFS4_CHECK_STATEID_REPLAY:
8595 ASSERT(!rfs4_has_session(cs));
8596
8597 /* Check the sequence id for the open owner */
8598 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8599 resop, cs)) {
8600 case NFS4_CHKSEQ_OKAY:
8601 /*
8602 * This is replayed stateid; if seqid matches
8603 * next expected, then client is using wrong seqid.
8604 */
8605 /* FALL THROUGH */
8606 case NFS4_CHKSEQ_BAD:
8607 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8608 goto end;
8609 case NFS4_CHKSEQ_REPLAY:
8610 /*
8611 * Note this case is the duplicate case so
8612 * resp->status is already set.
8613 */
8614 *cs->statusp = resp->status;
8615 rfs4_update_lease(sp->rs_owner->ro_client);
8616 goto end;
8617 }
8618 break;
8619 default:
8620 ASSERT(FALSE);
8621 break;
8622 }
8623
8624 rfs4_dbe_lock(sp->rs_dbe);
8625
8626 /* Update the stateid. */
8627 next_stateid(&sp->rs_stateid);
8628 resp->open_stateid = sp->rs_stateid.stateid;
8629
8630 rfs4_dbe_unlock(sp->rs_dbe);
8631
8632 rfs4_update_lease(sp->rs_owner->ro_client);
8633 rfs4_update_open_sequence(sp->rs_owner);
8634 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8635
8636 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8637
8638 *cs->statusp = resp->status = status;
8639
8640 end:
8641 rfs4_sw_exit(&sp->rs_owner->ro_sw);
8642 rfs4_state_rele(sp);
8643 out:
8644 DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8645 CLOSE4res *, resp);
8646 }
8647
8648 /*
8649 * Manage the counts on the file struct and close all file locks
8650 */
8651 /*ARGSUSED*/
8652 void
rfs4_release_share_lock_state(rfs4_state_t * sp,cred_t * cr,bool_t close_of_client)8653 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8654 bool_t close_of_client)
8655 {
8656 rfs4_file_t *fp = sp->rs_finfo;
8657 rfs4_lo_state_t *lsp;
8658 int fflags = 0;
8659
8660 /*
8661 * If this call is part of the larger closing down of client
8662 * state then it is just easier to release all locks
8663 * associated with this client instead of going through each
8664 * individual file and cleaning locks there.
8665 */
8666 if (close_of_client) {
8667 if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8668 !list_is_empty(&sp->rs_lostatelist) &&
8669 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8670 /* Is the PxFS kernel module loaded? */
8671 if (lm_remove_file_locks != NULL) {
8672 int new_sysid;
8673
8674 /* Encode the cluster nodeid in new sysid */
8675 new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8676 lm_set_nlmid_flk(&new_sysid);
8677
8678 /*
8679 * This PxFS routine removes file locks for a
8680 * client over all nodes of a cluster.
8681 */
8682 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8683 "lm_remove_file_locks(sysid=0x%x)\n",
8684 new_sysid));
8685 (*lm_remove_file_locks)(new_sysid);
8686 } else {
8687 struct flock64 flk;
8688
8689 /* Release all locks for this client */
8690 flk.l_type = F_UNLKSYS;
8691 flk.l_whence = 0;
8692 flk.l_start = 0;
8693 flk.l_len = 0;
8694 flk.l_sysid =
8695 sp->rs_owner->ro_client->rc_sysidt;
8696 flk.l_pid = 0;
8697 (void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8698 &flk, F_REMOTELOCK | FREAD | FWRITE,
8699 (u_offset_t)0, NULL, CRED(), NULL);
8700 }
8701
8702 sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8703 }
8704 }
8705
8706 /*
8707 * Release all locks on this file by this lock owner or at
8708 * least mark the locks as having been released
8709 */
8710 for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8711 lsp = list_next(&sp->rs_lostatelist, lsp)) {
8712 lsp->rls_locks_cleaned = TRUE;
8713
8714 /* Was this already taken care of above? */
8715 if (!close_of_client &&
8716 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8717 (void) cleanlocks(sp->rs_finfo->rf_vp,
8718 lsp->rls_locker->rl_pid,
8719 lsp->rls_locker->rl_client->rc_sysidt);
8720 }
8721
8722 /*
8723 * Release any shrlocks associated with this open state ID.
8724 * This must be done before the rfs4_state gets marked closed.
8725 */
8726 if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8727 (void) rfs4_unshare(sp);
8728
8729 if (sp->rs_open_access) {
8730 rfs4_dbe_lock(fp->rf_dbe);
8731
8732 /*
8733 * Decrement the count for each access and deny bit that this
8734 * state has contributed to the file.
8735 * If the file counts go to zero
8736 * clear the appropriate bit in the appropriate mask.
8737 */
8738 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8739 fp->rf_access_read--;
8740 fflags |= FREAD;
8741 if (fp->rf_access_read == 0)
8742 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8743 }
8744 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8745 fp->rf_access_write--;
8746 fflags |= FWRITE;
8747 if (fp->rf_access_write == 0)
8748 fp->rf_share_access &=
8749 ~OPEN4_SHARE_ACCESS_WRITE;
8750 }
8751 if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8752 fp->rf_deny_read--;
8753 if (fp->rf_deny_read == 0)
8754 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8755 }
8756 if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8757 fp->rf_deny_write--;
8758 if (fp->rf_deny_write == 0)
8759 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8760 }
8761
8762 (void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8763
8764 rfs4_dbe_unlock(fp->rf_dbe);
8765
8766 sp->rs_open_access = 0;
8767 sp->rs_open_deny = 0;
8768 }
8769 }
8770
8771 /*
8772 * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8773 */
8774 static nfsstat4
lock_denied(LOCK4denied * dp,struct flock64 * flk)8775 lock_denied(LOCK4denied *dp, struct flock64 *flk)
8776 {
8777 rfs4_lockowner_t *lo;
8778 rfs4_client_t *cp;
8779 uint32_t len;
8780
8781 lo = rfs4_findlockowner_by_pid(flk->l_pid);
8782 if (lo != NULL) {
8783 cp = lo->rl_client;
8784 if (rfs4_lease_expired(cp)) {
8785 rfs4_lockowner_rele(lo);
8786 rfs4_dbe_hold(cp->rc_dbe);
8787 rfs4_client_close(cp);
8788 return (NFS4ERR_EXPIRED);
8789 }
8790 dp->owner.clientid = lo->rl_owner.clientid;
8791 len = lo->rl_owner.owner_len;
8792 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8793 bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8794 dp->owner.owner_len = len;
8795 rfs4_lockowner_rele(lo);
8796 goto finish;
8797 }
8798
8799 /*
8800 * Its not a NFS4 lock. We take advantage that the upper 32 bits
8801 * of the client id contain the boot time for a NFS4 lock. So we
8802 * fabricate and identity by setting clientid to the sysid, and
8803 * the lock owner to the pid.
8804 */
8805 dp->owner.clientid = flk->l_sysid;
8806 len = sizeof (pid_t);
8807 dp->owner.owner_len = len;
8808 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8809 bcopy(&flk->l_pid, dp->owner.owner_val, len);
8810 finish:
8811 dp->offset = flk->l_start;
8812 dp->length = flk->l_len;
8813
8814 if (flk->l_type == F_RDLCK)
8815 dp->locktype = READ_LT;
8816 else if (flk->l_type == F_WRLCK)
8817 dp->locktype = WRITE_LT;
8818 else
8819 return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */
8820
8821 return (NFS4_OK);
8822 }
8823
8824 /*
8825 * The NFSv4.0 LOCK operation does not support the blocking lock (at the
8826 * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a
8827 * case the lock is denied by the NFSv4.0 server. NFSv4.0 clients are prepared
8828 * for that (obviously); they are sending the LOCK requests with some delays
8829 * between the attempts. See nfs4frlock() and nfs4_block_and_wait() for the
8830 * locking and delay implementation at the client side.
8831 *
8832 * To make the life of the clients easier, the NFSv4.0 server tries to do some
8833 * fast retries on its own (the for loop below) in a hope the lock will be
8834 * available soon. And if not, the client won't need to resend the LOCK
8835 * requests so fast to check the lock availability. This basically saves some
8836 * network traffic and tries to make sure the client gets the lock ASAP.
8837 */
8838 static int
setlock(vnode_t * vp,struct flock64 * flock,int flag,cred_t * cred)8839 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8840 {
8841 int error;
8842 struct flock64 flk;
8843 int i;
8844 clock_t delaytime;
8845 int cmd;
8846 int spin_cnt = 0;
8847
8848 cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8849 retry:
8850 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8851
8852 for (i = 0; i < rfs4_maxlock_tries; i++) {
8853 LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8854 error = VOP_FRLOCK(vp, cmd,
8855 flock, flag, (u_offset_t)0, NULL, cred, NULL);
8856
8857 if (error != EAGAIN && error != EACCES)
8858 break;
8859
8860 if (i < rfs4_maxlock_tries - 1) {
8861 delay(delaytime);
8862 delaytime *= 2;
8863 }
8864 }
8865
8866 if (error == EAGAIN || error == EACCES) {
8867 /* Get the owner of the lock */
8868 flk = *flock;
8869 LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8870 if (VOP_FRLOCK(vp, F_GETLK, &flk, flag, 0, NULL, cred,
8871 NULL) == 0) {
8872 /*
8873 * There's a race inherent in the current VOP_FRLOCK
8874 * design where:
8875 * a: "other guy" takes a lock that conflicts with a
8876 * lock we want
8877 * b: we attempt to take our lock (non-blocking) and
8878 * the attempt fails.
8879 * c: "other guy" releases the conflicting lock
8880 * d: we ask what lock conflicts with the lock we want,
8881 * getting F_UNLCK (no lock blocks us)
8882 *
8883 * If we retry the non-blocking lock attempt in this
8884 * case (restart at step 'b') there's some possibility
8885 * that many such attempts might fail. However a test
8886 * designed to actually provoke this race shows that
8887 * the vast majority of cases require no retry, and
8888 * only a few took as many as three retries. Here's
8889 * the test outcome:
8890 *
8891 * number of retries how many times we needed
8892 * that many retries
8893 * 0 79461
8894 * 1 862
8895 * 2 49
8896 * 3 5
8897 *
8898 * Given those empirical results, we arbitrarily limit
8899 * the retry count to ten.
8900 *
8901 * If we actually make to ten retries and give up,
8902 * nothing catastrophic happens, but we're unable to
8903 * return the information about the conflicting lock to
8904 * the NFS client. That's an acceptable trade off vs.
8905 * letting this retry loop run forever.
8906 */
8907 if (flk.l_type == F_UNLCK) {
8908 if (spin_cnt++ < 10) {
8909 /* No longer locked, retry */
8910 goto retry;
8911 }
8912 } else {
8913 *flock = flk;
8914 LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8915 F_GETLK, &flk);
8916 }
8917 }
8918 }
8919
8920 return (error);
8921 }
8922
8923 /*ARGSUSED*/
8924 static nfsstat4
rfs4_do_lock(rfs4_lo_state_t * lsp,nfs_lock_type4 locktype,offset4 offset,length4 length,cred_t * cred,nfs_resop4 * resop)8925 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8926 offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8927 {
8928 nfsstat4 status;
8929 rfs4_lockowner_t *lo = lsp->rls_locker;
8930 rfs4_state_t *sp = lsp->rls_state;
8931 struct flock64 flock;
8932 int16_t ltype;
8933 int flag;
8934 int error;
8935 sysid_t sysid;
8936 LOCK4res *lres;
8937 vnode_t *vp;
8938
8939 if (rfs4_lease_expired(lo->rl_client)) {
8940 return (NFS4ERR_EXPIRED);
8941 }
8942
8943 if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8944 return (status);
8945
8946 /* Check for zero length. To lock to end of file use all ones for V4 */
8947 if (length == 0)
8948 return (NFS4ERR_INVAL);
8949 else if (length == (length4)(~0))
8950 length = 0; /* Posix to end of file */
8951
8952 retry:
8953 rfs4_dbe_lock(sp->rs_dbe);
8954 if (sp->rs_closed == TRUE) {
8955 rfs4_dbe_unlock(sp->rs_dbe);
8956 return (NFS4ERR_OLD_STATEID);
8957 }
8958
8959 if (resop->resop != OP_LOCKU) {
8960 switch (locktype) {
8961 case READ_LT:
8962 case READW_LT:
8963 if ((sp->rs_share_access
8964 & OPEN4_SHARE_ACCESS_READ) == 0) {
8965 rfs4_dbe_unlock(sp->rs_dbe);
8966
8967 return (NFS4ERR_OPENMODE);
8968 }
8969 ltype = F_RDLCK;
8970 break;
8971 case WRITE_LT:
8972 case WRITEW_LT:
8973 if ((sp->rs_share_access
8974 & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8975 rfs4_dbe_unlock(sp->rs_dbe);
8976
8977 return (NFS4ERR_OPENMODE);
8978 }
8979 ltype = F_WRLCK;
8980 break;
8981 }
8982 } else
8983 ltype = F_UNLCK;
8984
8985 flock.l_type = ltype;
8986 flock.l_whence = 0; /* SEEK_SET */
8987 flock.l_start = offset;
8988 flock.l_len = length;
8989 flock.l_sysid = sysid;
8990 flock.l_pid = lsp->rls_locker->rl_pid;
8991
8992 /* Note that length4 is uint64_t but l_len and l_start are off64_t */
8993 if (flock.l_len < 0 || flock.l_start < 0) {
8994 rfs4_dbe_unlock(sp->rs_dbe);
8995 return (NFS4ERR_INVAL);
8996 }
8997
8998 /*
8999 * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
9000 * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
9001 */
9002 flag = (int)sp->rs_share_access | F_REMOTELOCK;
9003
9004 vp = sp->rs_finfo->rf_vp;
9005 VN_HOLD(vp);
9006
9007 /*
9008 * We need to unlock sp before we call the underlying filesystem to
9009 * acquire the file lock.
9010 */
9011 rfs4_dbe_unlock(sp->rs_dbe);
9012
9013 error = setlock(vp, &flock, flag, cred);
9014
9015 /*
9016 * Make sure the file is still open. In a case the file was closed in
9017 * the meantime, clean the lock we acquired using the setlock() call
9018 * above, and return the appropriate error.
9019 */
9020 rfs4_dbe_lock(sp->rs_dbe);
9021 if (sp->rs_closed == TRUE) {
9022 cleanlocks(vp, lsp->rls_locker->rl_pid, sysid);
9023 rfs4_dbe_unlock(sp->rs_dbe);
9024
9025 VN_RELE(vp);
9026
9027 return (NFS4ERR_OLD_STATEID);
9028 }
9029 rfs4_dbe_unlock(sp->rs_dbe);
9030
9031 VN_RELE(vp);
9032
9033 if (error == 0) {
9034 rfs4_dbe_lock(lsp->rls_dbe);
9035 next_stateid(&lsp->rls_lockid);
9036 rfs4_dbe_unlock(lsp->rls_dbe);
9037 }
9038
9039 /*
9040 * N.B. We map error values to nfsv4 errors. This is differrent
9041 * than puterrno4 routine.
9042 */
9043 switch (error) {
9044 case 0:
9045 status = NFS4_OK;
9046 break;
9047 case EAGAIN:
9048 case EACCES: /* Old value */
9049 /* Can only get here if op is OP_LOCK */
9050 ASSERT(resop->resop == OP_LOCK);
9051 lres = &resop->nfs_resop4_u.oplock;
9052 status = NFS4ERR_DENIED;
9053 if (lock_denied(&lres->LOCK4res_u.denied, &flock)
9054 == NFS4ERR_EXPIRED)
9055 goto retry;
9056 break;
9057 case ENOLCK:
9058 status = NFS4ERR_DELAY;
9059 break;
9060 case EOVERFLOW:
9061 status = NFS4ERR_INVAL;
9062 break;
9063 case EINVAL:
9064 status = NFS4ERR_NOTSUPP;
9065 break;
9066 default:
9067 status = NFS4ERR_SERVERFAULT;
9068 break;
9069 }
9070
9071 return (status);
9072 }
9073
9074 /*ARGSUSED*/
9075 void
rfs4_op_lock(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)9076 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
9077 struct svc_req *req, struct compound_state *cs)
9078 {
9079 LOCK4args *args = &argop->nfs_argop4_u.oplock;
9080 LOCK4res *resp = &resop->nfs_resop4_u.oplock;
9081 nfsstat4 status;
9082 stateid4 *stateid;
9083 rfs4_lockowner_t *lo;
9084 rfs4_client_t *cp;
9085 rfs4_state_t *sp = NULL;
9086 rfs4_lo_state_t *lsp = NULL;
9087 bool_t ls_sw_held = FALSE;
9088 bool_t create = TRUE;
9089 bool_t lcreate = TRUE;
9090 bool_t dup_lock = FALSE;
9091 int rc;
9092
9093 DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
9094 LOCK4args *, args);
9095
9096 if (cs->vp == NULL) {
9097 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9098 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9099 cs, LOCK4res *, resp);
9100 return;
9101 }
9102
9103 if (args->locker.new_lock_owner) {
9104 /* Create a new lockowner for this instance */
9105 open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
9106
9107 NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
9108
9109 stateid = &olo->open_stateid;
9110 status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
9111 if (status != NFS4_OK) {
9112 NFS4_DEBUG(rfs4_debug,
9113 (CE_NOTE, "Get state failed in lock %d", status));
9114 *cs->statusp = resp->status = status;
9115 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9116 cs, LOCK4res *, resp);
9117 return;
9118 }
9119
9120 /* Ensure specified filehandle matches */
9121 if (cs->vp != sp->rs_finfo->rf_vp) {
9122 rfs4_state_rele(sp);
9123 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9124 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9125 cs, LOCK4res *, resp);
9126 return;
9127 }
9128
9129 /* hold off other access to open_owner while we tinker */
9130 rfs4_sw_enter(&sp->rs_owner->ro_sw);
9131
9132 switch (rc = rfs4_check_stateid_seqid(sp, stateid, cs)) {
9133 case NFS4_CHECK_STATEID_OLD:
9134 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9135 goto end;
9136 case NFS4_CHECK_STATEID_BAD:
9137 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9138 goto end;
9139 case NFS4_CHECK_STATEID_EXPIRED:
9140 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9141 goto end;
9142 case NFS4_CHECK_STATEID_UNCONFIRMED:
9143 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9144 goto end;
9145 case NFS4_CHECK_STATEID_CLOSED:
9146 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9147 goto end;
9148 case NFS4_CHECK_STATEID_OKAY:
9149 if (rfs4_has_session(cs))
9150 break;
9151 /* FALLTHROUGH */
9152 case NFS4_CHECK_STATEID_REPLAY:
9153 ASSERT(!rfs4_has_session(cs));
9154
9155 switch (rfs4_check_olo_seqid(olo->open_seqid,
9156 sp->rs_owner, resop)) {
9157 case NFS4_CHKSEQ_OKAY:
9158 if (rc == NFS4_CHECK_STATEID_OKAY)
9159 break;
9160 /*
9161 * This is replayed stateid; if seqid
9162 * matches next expected, then client
9163 * is using wrong seqid.
9164 */
9165 /* FALLTHROUGH */
9166 case NFS4_CHKSEQ_BAD:
9167 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9168 goto end;
9169 case NFS4_CHKSEQ_REPLAY:
9170 /* This is a duplicate LOCK request */
9171 dup_lock = TRUE;
9172
9173 /*
9174 * For a duplicate we do not want to
9175 * create a new lockowner as it should
9176 * already exist.
9177 * Turn off the lockowner create flag.
9178 */
9179 lcreate = FALSE;
9180 }
9181 break;
9182 }
9183
9184 lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
9185 if (lo == NULL) {
9186 NFS4_DEBUG(rfs4_debug,
9187 (CE_NOTE, "rfs4_op_lock: no lock owner"));
9188 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
9189 goto end;
9190 }
9191
9192 lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
9193 if (lsp == NULL) {
9194 rfs4_update_lease(sp->rs_owner->ro_client);
9195 /*
9196 * Only update theh open_seqid if this is not
9197 * a duplicate request
9198 */
9199 if (dup_lock == FALSE) {
9200 rfs4_update_open_sequence(sp->rs_owner);
9201 }
9202
9203 NFS4_DEBUG(rfs4_debug,
9204 (CE_NOTE, "rfs4_op_lock: no state"));
9205 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
9206 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9207 rfs4_lockowner_rele(lo);
9208 goto end;
9209 }
9210
9211 /*
9212 * This is the new_lock_owner branch and the client is
9213 * supposed to be associating a new lock_owner with
9214 * the open file at this point. If we find that a
9215 * lock_owner/state association already exists and a
9216 * successful LOCK request was returned to the client,
9217 * an error is returned to the client since this is
9218 * not appropriate. The client should be using the
9219 * existing lock_owner branch.
9220 */
9221 if (!rfs4_has_session(cs) && !dup_lock && !create) {
9222 if (lsp->rls_lock_completed == TRUE) {
9223 *cs->statusp =
9224 resp->status = NFS4ERR_BAD_SEQID;
9225 rfs4_lockowner_rele(lo);
9226 goto end;
9227 }
9228 }
9229
9230 rfs4_update_lease(sp->rs_owner->ro_client);
9231
9232 /*
9233 * Only update theh open_seqid if this is not
9234 * a duplicate request
9235 */
9236 if (dup_lock == FALSE) {
9237 rfs4_update_open_sequence(sp->rs_owner);
9238 }
9239
9240 /*
9241 * If this is a duplicate lock request, just copy the
9242 * previously saved reply and return.
9243 */
9244 if (dup_lock == TRUE) {
9245 /* verify that lock_seqid's match */
9246 if (lsp->rls_seqid != olo->lock_seqid) {
9247 NFS4_DEBUG(rfs4_debug,
9248 (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
9249 "lsp->seqid=%d old->seqid=%d",
9250 lsp->rls_seqid, olo->lock_seqid));
9251 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9252 } else {
9253 rfs4_copy_reply(resop, &lsp->rls_reply);
9254 /*
9255 * Make sure to copy the just
9256 * retrieved reply status into the
9257 * overall compound status
9258 */
9259 *cs->statusp = resp->status;
9260 }
9261 rfs4_lockowner_rele(lo);
9262 goto end;
9263 }
9264
9265 rfs4_dbe_lock(lsp->rls_dbe);
9266
9267 /* Make sure to update the lock sequence id */
9268 lsp->rls_seqid = olo->lock_seqid;
9269
9270 NFS4_DEBUG(rfs4_debug,
9271 (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
9272
9273 /*
9274 * This is used to signify the newly created lockowner
9275 * stateid and its sequence number. The checks for
9276 * sequence number and increment don't occur on the
9277 * very first lock request for a lockowner.
9278 */
9279 lsp->rls_skip_seqid_check = TRUE;
9280
9281 /* hold off other access to lsp while we tinker */
9282 rfs4_sw_enter(&lsp->rls_sw);
9283 ls_sw_held = TRUE;
9284
9285 rfs4_dbe_unlock(lsp->rls_dbe);
9286
9287 rfs4_lockowner_rele(lo);
9288 } else {
9289 stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
9290 /* get lsp and hold the lock on the underlying file struct */
9291 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
9292 != NFS4_OK) {
9293 *cs->statusp = resp->status = status;
9294 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9295 cs, LOCK4res *, resp);
9296 return;
9297 }
9298 create = FALSE; /* We didn't create lsp */
9299
9300 /* Ensure specified filehandle matches */
9301 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9302 rfs4_lo_state_rele(lsp, TRUE);
9303 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9304 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9305 cs, LOCK4res *, resp);
9306 return;
9307 }
9308
9309 /* hold off other access to lsp while we tinker */
9310 rfs4_sw_enter(&lsp->rls_sw);
9311 ls_sw_held = TRUE;
9312
9313 switch (rfs4_check_lo_stateid_seqid(lsp, stateid, cs)) {
9314 /*
9315 * The stateid looks like it was okay (expected to be
9316 * the next one)
9317 */
9318 case NFS4_CHECK_STATEID_OKAY:
9319 if (rfs4_has_session(cs))
9320 break;
9321
9322 /*
9323 * The sequence id is now checked. Determine
9324 * if this is a replay or if it is in the
9325 * expected (next) sequence. In the case of a
9326 * replay, there are two replay conditions
9327 * that may occur. The first is the normal
9328 * condition where a LOCK is done with a
9329 * NFS4_OK response and the stateid is
9330 * updated. That case is handled below when
9331 * the stateid is identified as a REPLAY. The
9332 * second is the case where an error is
9333 * returned, like NFS4ERR_DENIED, and the
9334 * sequence number is updated but the stateid
9335 * is not updated. This second case is dealt
9336 * with here. So it may seem odd that the
9337 * stateid is okay but the sequence id is a
9338 * replay but it is okay.
9339 */
9340 switch (rfs4_check_lock_seqid(
9341 args->locker.locker4_u.lock_owner.lock_seqid,
9342 lsp, resop)) {
9343 case NFS4_CHKSEQ_REPLAY:
9344 if (resp->status != NFS4_OK) {
9345 /*
9346 * Here is our replay and need
9347 * to verify that the last
9348 * response was an error.
9349 */
9350 *cs->statusp = resp->status;
9351 goto end;
9352 }
9353 /*
9354 * This is done since the sequence id
9355 * looked like a replay but it didn't
9356 * pass our check so a BAD_SEQID is
9357 * returned as a result.
9358 */
9359 /*FALLTHROUGH*/
9360 case NFS4_CHKSEQ_BAD:
9361 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9362 goto end;
9363 case NFS4_CHKSEQ_OKAY:
9364 /* Everything looks okay move ahead */
9365 break;
9366 }
9367 break;
9368 case NFS4_CHECK_STATEID_OLD:
9369 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9370 goto end;
9371 case NFS4_CHECK_STATEID_BAD:
9372 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9373 goto end;
9374 case NFS4_CHECK_STATEID_EXPIRED:
9375 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9376 goto end;
9377 case NFS4_CHECK_STATEID_CLOSED:
9378 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9379 goto end;
9380 case NFS4_CHECK_STATEID_REPLAY:
9381 ASSERT(!rfs4_has_session(cs));
9382
9383 switch (rfs4_check_lock_seqid(
9384 args->locker.locker4_u.lock_owner.lock_seqid,
9385 lsp, resop)) {
9386 case NFS4_CHKSEQ_OKAY:
9387 /*
9388 * This is a replayed stateid; if
9389 * seqid matches the next expected,
9390 * then client is using wrong seqid.
9391 */
9392 case NFS4_CHKSEQ_BAD:
9393 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9394 goto end;
9395 case NFS4_CHKSEQ_REPLAY:
9396 rfs4_update_lease(lsp->rls_locker->rl_client);
9397 *cs->statusp = status = resp->status;
9398 goto end;
9399 }
9400 break;
9401 default:
9402 ASSERT(FALSE);
9403 break;
9404 }
9405
9406 rfs4_update_lock_sequence(lsp);
9407 rfs4_update_lease(lsp->rls_locker->rl_client);
9408 }
9409
9410 /*
9411 * NFS4 only allows locking on regular files, so
9412 * verify type of object.
9413 */
9414 if (cs->vp->v_type != VREG) {
9415 if (cs->vp->v_type == VDIR)
9416 status = NFS4ERR_ISDIR;
9417 else
9418 status = NFS4ERR_INVAL;
9419 goto out;
9420 }
9421
9422 cp = lsp->rls_state->rs_owner->ro_client;
9423
9424 if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9425 status = NFS4ERR_GRACE;
9426 goto out;
9427 }
9428
9429 if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9430 status = NFS4ERR_NO_GRACE;
9431 goto out;
9432 }
9433
9434 if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9435 status = NFS4ERR_NO_GRACE;
9436 goto out;
9437 }
9438
9439 if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9440 cs->deleg = TRUE;
9441
9442 status = rfs4_do_lock(lsp, args->locktype,
9443 args->offset, args->length, cs->cr, resop);
9444
9445 out:
9446 lsp->rls_skip_seqid_check = FALSE;
9447
9448 *cs->statusp = resp->status = status;
9449
9450 if (status == NFS4_OK) {
9451 resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9452 lsp->rls_lock_completed = TRUE;
9453 }
9454 /*
9455 * Only update the "OPEN" response here if this was a new
9456 * lock_owner
9457 */
9458 if (sp)
9459 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9460
9461 rfs4_update_lock_resp(lsp, resop);
9462
9463 end:
9464 if (lsp) {
9465 if (ls_sw_held)
9466 rfs4_sw_exit(&lsp->rls_sw);
9467 /*
9468 * If an sp obtained, then the lsp does not represent
9469 * a lock on the file struct.
9470 */
9471 if (sp != NULL)
9472 rfs4_lo_state_rele(lsp, FALSE);
9473 else
9474 rfs4_lo_state_rele(lsp, TRUE);
9475 }
9476 if (sp) {
9477 rfs4_sw_exit(&sp->rs_owner->ro_sw);
9478 rfs4_state_rele(sp);
9479 }
9480
9481 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9482 LOCK4res *, resp);
9483 }
9484
9485 /* free function for LOCK/LOCKT */
9486 static void
lock_denied_free(nfs_resop4 * resop)9487 lock_denied_free(nfs_resop4 *resop)
9488 {
9489 LOCK4denied *dp = NULL;
9490
9491 switch (resop->resop) {
9492 case OP_LOCK:
9493 if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9494 dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9495 break;
9496 case OP_LOCKT:
9497 if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9498 dp = &resop->nfs_resop4_u.oplockt.denied;
9499 break;
9500 default:
9501 break;
9502 }
9503
9504 if (dp)
9505 kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9506 }
9507
9508 /*ARGSUSED*/
9509 void
rfs4_op_locku(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)9510 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9511 struct svc_req *req, struct compound_state *cs)
9512 {
9513 LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9514 LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9515 nfsstat4 status;
9516 stateid4 *stateid = &args->lock_stateid;
9517 rfs4_lo_state_t *lsp;
9518
9519 DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9520 LOCKU4args *, args);
9521
9522 if (cs->vp == NULL) {
9523 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9524 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9525 LOCKU4res *, resp);
9526 return;
9527 }
9528
9529 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9530 *cs->statusp = resp->status = status;
9531 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9532 LOCKU4res *, resp);
9533 return;
9534 }
9535
9536 /* Ensure specified filehandle matches */
9537 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9538 rfs4_lo_state_rele(lsp, TRUE);
9539 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9540 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9541 LOCKU4res *, resp);
9542 return;
9543 }
9544
9545 /* hold off other access to lsp while we tinker */
9546 rfs4_sw_enter(&lsp->rls_sw);
9547
9548 switch (rfs4_check_lo_stateid_seqid(lsp, stateid, cs)) {
9549 case NFS4_CHECK_STATEID_OKAY:
9550 if (rfs4_has_session(cs))
9551 break;
9552
9553 if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9554 != NFS4_CHKSEQ_OKAY) {
9555 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9556 goto end;
9557 }
9558 break;
9559 case NFS4_CHECK_STATEID_OLD:
9560 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9561 goto end;
9562 case NFS4_CHECK_STATEID_BAD:
9563 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9564 goto end;
9565 case NFS4_CHECK_STATEID_EXPIRED:
9566 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9567 goto end;
9568 case NFS4_CHECK_STATEID_CLOSED:
9569 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9570 goto end;
9571 case NFS4_CHECK_STATEID_REPLAY:
9572 ASSERT(!rfs4_has_session(cs));
9573
9574 switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9575 case NFS4_CHKSEQ_OKAY:
9576 /*
9577 * This is a replayed stateid; if
9578 * seqid matches the next expected,
9579 * then client is using wrong seqid.
9580 */
9581 case NFS4_CHKSEQ_BAD:
9582 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9583 goto end;
9584 case NFS4_CHKSEQ_REPLAY:
9585 rfs4_update_lease(lsp->rls_locker->rl_client);
9586 *cs->statusp = status = resp->status;
9587 goto end;
9588 }
9589 break;
9590 default:
9591 ASSERT(FALSE);
9592 break;
9593 }
9594
9595 rfs4_update_lock_sequence(lsp);
9596 rfs4_update_lease(lsp->rls_locker->rl_client);
9597
9598 /*
9599 * NFS4 only allows locking on regular files, so
9600 * verify type of object.
9601 */
9602 if (cs->vp->v_type != VREG) {
9603 if (cs->vp->v_type == VDIR)
9604 status = NFS4ERR_ISDIR;
9605 else
9606 status = NFS4ERR_INVAL;
9607 goto out;
9608 }
9609
9610 if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9611 status = NFS4ERR_GRACE;
9612 goto out;
9613 }
9614
9615 status = rfs4_do_lock(lsp, args->locktype,
9616 args->offset, args->length, cs->cr, resop);
9617
9618 out:
9619 *cs->statusp = resp->status = status;
9620
9621 if (status == NFS4_OK)
9622 resp->lock_stateid = lsp->rls_lockid.stateid;
9623
9624 rfs4_update_lock_resp(lsp, resop);
9625
9626 end:
9627 rfs4_sw_exit(&lsp->rls_sw);
9628 rfs4_lo_state_rele(lsp, TRUE);
9629
9630 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9631 LOCKU4res *, resp);
9632 }
9633
9634 /*
9635 * LOCKT is a best effort routine, the client can not be guaranteed that
9636 * the status return is still in effect by the time the reply is received.
9637 * They are numerous race conditions in this routine, but we are not required
9638 * and can not be accurate.
9639 */
9640 /*ARGSUSED*/
9641 void
rfs4_op_lockt(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)9642 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9643 struct svc_req *req, struct compound_state *cs)
9644 {
9645 LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9646 LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9647 rfs4_lockowner_t *lo;
9648 rfs4_client_t *cp;
9649 bool_t create = FALSE;
9650 struct flock64 flk;
9651 int error;
9652 int flag = FREAD | FWRITE;
9653 int ltype;
9654 length4 posix_length;
9655 sysid_t sysid;
9656 pid_t pid;
9657
9658 DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9659 LOCKT4args *, args);
9660
9661 if (cs->vp == NULL) {
9662 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9663 goto out;
9664 }
9665
9666 /*
9667 * NFS4 only allows locking on regular files, so
9668 * verify type of object.
9669 */
9670 if (cs->vp->v_type != VREG) {
9671 if (cs->vp->v_type == VDIR)
9672 *cs->statusp = resp->status = NFS4ERR_ISDIR;
9673 else
9674 *cs->statusp = resp->status = NFS4ERR_INVAL;
9675 goto out;
9676 }
9677
9678 /*
9679 * Check out the clientid to ensure the server knows about it
9680 * so that we correctly inform the client of a server reboot.
9681 */
9682 if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9683 == NULL) {
9684 *cs->statusp = resp->status =
9685 rfs4_check_clientid(&args->owner.clientid, 0);
9686 goto out;
9687 }
9688 if (rfs4_lease_expired(cp)) {
9689 rfs4_client_close(cp);
9690 /*
9691 * Protocol doesn't allow returning NFS4ERR_STALE as
9692 * other operations do on this check so STALE_CLIENTID
9693 * is returned instead
9694 */
9695 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9696 goto out;
9697 }
9698
9699 if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9700 *cs->statusp = resp->status = NFS4ERR_GRACE;
9701 rfs4_client_rele(cp);
9702 goto out;
9703 }
9704 rfs4_client_rele(cp);
9705
9706 resp->status = NFS4_OK;
9707
9708 switch (args->locktype) {
9709 case READ_LT:
9710 case READW_LT:
9711 ltype = F_RDLCK;
9712 break;
9713 case WRITE_LT:
9714 case WRITEW_LT:
9715 ltype = F_WRLCK;
9716 break;
9717 }
9718
9719 posix_length = args->length;
9720 /* Check for zero length. To lock to end of file use all ones for V4 */
9721 if (posix_length == 0) {
9722 *cs->statusp = resp->status = NFS4ERR_INVAL;
9723 goto out;
9724 } else if (posix_length == (length4)(~0)) {
9725 posix_length = 0; /* Posix to end of file */
9726 }
9727
9728 /* Find or create a lockowner */
9729 lo = rfs4_findlockowner(&args->owner, &create);
9730
9731 if (lo) {
9732 pid = lo->rl_pid;
9733 if ((resp->status =
9734 rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9735 goto err;
9736 } else {
9737 pid = 0;
9738 sysid = lockt_sysid;
9739 }
9740 retry:
9741 flk.l_type = ltype;
9742 flk.l_whence = 0; /* SEEK_SET */
9743 flk.l_start = args->offset;
9744 flk.l_len = posix_length;
9745 flk.l_sysid = sysid;
9746 flk.l_pid = pid;
9747 flag |= F_REMOTELOCK;
9748
9749 LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9750
9751 /* Note that length4 is uint64_t but l_len and l_start are off64_t */
9752 if (flk.l_len < 0 || flk.l_start < 0) {
9753 resp->status = NFS4ERR_INVAL;
9754 goto err;
9755 }
9756 error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9757 NULL, cs->cr, NULL);
9758
9759 /*
9760 * N.B. We map error values to nfsv4 errors. This is differrent
9761 * than puterrno4 routine.
9762 */
9763 switch (error) {
9764 case 0:
9765 if (flk.l_type == F_UNLCK)
9766 resp->status = NFS4_OK;
9767 else {
9768 if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9769 goto retry;
9770 resp->status = NFS4ERR_DENIED;
9771 }
9772 break;
9773 case EOVERFLOW:
9774 resp->status = NFS4ERR_INVAL;
9775 break;
9776 case EINVAL:
9777 resp->status = NFS4ERR_NOTSUPP;
9778 break;
9779 default:
9780 cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9781 error);
9782 resp->status = NFS4ERR_SERVERFAULT;
9783 break;
9784 }
9785
9786 err:
9787 if (lo)
9788 rfs4_lockowner_rele(lo);
9789 *cs->statusp = resp->status;
9790 out:
9791 DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9792 LOCKT4res *, resp);
9793 }
9794
9795 int
rfs4_share(rfs4_state_t * sp,uint32_t access,uint32_t deny)9796 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9797 {
9798 int err;
9799 int cmd;
9800 vnode_t *vp;
9801 struct shrlock shr;
9802 struct shr_locowner shr_loco;
9803 int fflags = 0;
9804
9805 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9806 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9807
9808 if (sp->rs_closed)
9809 return (NFS4ERR_OLD_STATEID);
9810
9811 vp = sp->rs_finfo->rf_vp;
9812 ASSERT(vp);
9813
9814 shr.s_access = shr.s_deny = 0;
9815
9816 if (access & OPEN4_SHARE_ACCESS_READ) {
9817 fflags |= FREAD;
9818 shr.s_access |= F_RDACC;
9819 }
9820 if (access & OPEN4_SHARE_ACCESS_WRITE) {
9821 fflags |= FWRITE;
9822 shr.s_access |= F_WRACC;
9823 }
9824 ASSERT(shr.s_access);
9825
9826 if (deny & OPEN4_SHARE_DENY_READ)
9827 shr.s_deny |= F_RDDNY;
9828 if (deny & OPEN4_SHARE_DENY_WRITE)
9829 shr.s_deny |= F_WRDNY;
9830
9831 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9832 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9833 shr_loco.sl_pid = shr.s_pid;
9834 shr_loco.sl_id = shr.s_sysid;
9835 shr.s_owner = (caddr_t)&shr_loco;
9836 shr.s_own_len = sizeof (shr_loco);
9837
9838 cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9839
9840 err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9841 if (err != 0) {
9842 if (err == EAGAIN)
9843 err = NFS4ERR_SHARE_DENIED;
9844 else
9845 err = puterrno4(err);
9846 return (err);
9847 }
9848
9849 sp->rs_share_access |= access;
9850 sp->rs_share_deny |= deny;
9851
9852 return (0);
9853 }
9854
9855 int
rfs4_unshare(rfs4_state_t * sp)9856 rfs4_unshare(rfs4_state_t *sp)
9857 {
9858 int err;
9859 struct shrlock shr;
9860 struct shr_locowner shr_loco;
9861
9862 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9863
9864 if (sp->rs_closed || sp->rs_share_access == 0)
9865 return (0);
9866
9867 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9868 ASSERT(sp->rs_finfo->rf_vp);
9869
9870 shr.s_access = shr.s_deny = 0;
9871 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9872 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9873 shr_loco.sl_pid = shr.s_pid;
9874 shr_loco.sl_id = shr.s_sysid;
9875 shr.s_owner = (caddr_t)&shr_loco;
9876 shr.s_own_len = sizeof (shr_loco);
9877
9878 err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9879 NULL);
9880 if (err != 0) {
9881 err = puterrno4(err);
9882 return (err);
9883 }
9884
9885 sp->rs_share_access = 0;
9886 sp->rs_share_deny = 0;
9887
9888 return (0);
9889
9890 }
9891
9892 static int
rdma_setup_read_data4(READ4args * args,READ4res * rok)9893 rdma_setup_read_data4(READ4args *args, READ4res *rok)
9894 {
9895 struct clist *wcl;
9896 count4 count = rok->data_len;
9897 int wlist_len;
9898
9899 wcl = args->wlist;
9900 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9901 return (FALSE);
9902 }
9903 wcl = args->wlist;
9904 rok->wlist_len = wlist_len;
9905 rok->wlist = wcl;
9906 return (TRUE);
9907 }
9908
9909 /* tunable to disable server referrals */
9910 int rfs4_no_referrals = 0;
9911
9912 /*
9913 * Find an NFS record in reparse point data.
9914 * Returns 0 for success and <0 or an errno value on failure.
9915 */
9916 int
vn_find_nfs_record(vnode_t * vp,nvlist_t ** nvlp,char ** svcp,char ** datap)9917 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9918 {
9919 int err;
9920 char *stype, *val;
9921 nvlist_t *nvl;
9922 nvpair_t *curr;
9923
9924 if ((nvl = reparse_init()) == NULL)
9925 return (-1);
9926
9927 if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9928 reparse_free(nvl);
9929 return (err);
9930 }
9931
9932 curr = NULL;
9933 while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9934 if ((stype = nvpair_name(curr)) == NULL) {
9935 reparse_free(nvl);
9936 return (-2);
9937 }
9938 if (strncasecmp(stype, "NFS", 3) == 0)
9939 break;
9940 }
9941
9942 if ((curr == NULL) ||
9943 (nvpair_value_string(curr, &val))) {
9944 reparse_free(nvl);
9945 return (-3);
9946 }
9947 *nvlp = nvl;
9948 *svcp = stype;
9949 *datap = val;
9950 return (0);
9951 }
9952
9953 int
vn_is_nfs_reparse(vnode_t * vp,cred_t * cr)9954 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9955 {
9956 nvlist_t *nvl;
9957 char *s, *d;
9958
9959 if (rfs4_no_referrals != 0)
9960 return (B_FALSE);
9961
9962 if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9963 return (B_FALSE);
9964
9965 if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9966 return (B_FALSE);
9967
9968 reparse_free(nvl);
9969
9970 return (B_TRUE);
9971 }
9972
9973 /*
9974 * There is a user-level copy of this routine in ref_subr.c.
9975 * Changes should be kept in sync.
9976 */
9977 static int
nfs4_create_components(char * path,component4 * comp4)9978 nfs4_create_components(char *path, component4 *comp4)
9979 {
9980 int slen, plen, ncomp;
9981 char *ori_path, *nxtc, buf[MAXNAMELEN];
9982
9983 if (path == NULL)
9984 return (0);
9985
9986 plen = strlen(path) + 1; /* include the terminator */
9987 ori_path = path;
9988 ncomp = 0;
9989
9990 /* count number of components in the path */
9991 for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9992 if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9993 if ((slen = nxtc - path) == 0) {
9994 path = nxtc + 1;
9995 continue;
9996 }
9997
9998 if (comp4 != NULL) {
9999 bcopy(path, buf, slen);
10000 buf[slen] = '\0';
10001 (void) str_to_utf8(buf, &comp4[ncomp]);
10002 }
10003
10004 ncomp++; /* 1 valid component */
10005 path = nxtc + 1;
10006 }
10007 if (*nxtc == '\0' || *nxtc == '\n')
10008 break;
10009 }
10010
10011 return (ncomp);
10012 }
10013
10014 /*
10015 * There is a user-level copy of this routine in ref_subr.c.
10016 * Changes should be kept in sync.
10017 */
10018 static int
make_pathname4(char * path,pathname4 * pathname)10019 make_pathname4(char *path, pathname4 *pathname)
10020 {
10021 int ncomp;
10022 component4 *comp4;
10023
10024 if (pathname == NULL)
10025 return (0);
10026
10027 if (path == NULL) {
10028 pathname->pathname4_val = NULL;
10029 pathname->pathname4_len = 0;
10030 return (0);
10031 }
10032
10033 /* count number of components to alloc buffer */
10034 if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
10035 pathname->pathname4_val = NULL;
10036 pathname->pathname4_len = 0;
10037 return (0);
10038 }
10039 comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
10040
10041 /* copy components into allocated buffer */
10042 ncomp = nfs4_create_components(path, comp4);
10043
10044 pathname->pathname4_val = comp4;
10045 pathname->pathname4_len = ncomp;
10046
10047 return (ncomp);
10048 }
10049
10050 #define xdr_fs_locations4 xdr_fattr4_fs_locations
10051
10052 fs_locations4 *
fetch_referral(vnode_t * vp,cred_t * cr)10053 fetch_referral(vnode_t *vp, cred_t *cr)
10054 {
10055 nvlist_t *nvl;
10056 char *stype, *sdata;
10057 fs_locations4 *result;
10058 char buf[1024];
10059 size_t bufsize;
10060 XDR xdr;
10061 int err;
10062
10063 /*
10064 * Check attrs to ensure it's a reparse point
10065 */
10066 if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
10067 return (NULL);
10068
10069 /*
10070 * Look for an NFS record and get the type and data
10071 */
10072 if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
10073 return (NULL);
10074
10075 /*
10076 * With the type and data, upcall to get the referral
10077 */
10078 bufsize = sizeof (buf);
10079 bzero(buf, sizeof (buf));
10080 err = reparse_kderef((const char *)stype, (const char *)sdata,
10081 buf, &bufsize);
10082 reparse_free(nvl);
10083
10084 DTRACE_PROBE4(nfs4serv__func__referral__upcall,
10085 char *, stype, char *, sdata, char *, buf, int, err);
10086 if (err) {
10087 cmn_err(CE_NOTE,
10088 "reparsed daemon not running: unable to get referral (%d)",
10089 err);
10090 return (NULL);
10091 }
10092
10093 /*
10094 * We get an XDR'ed record back from the kderef call
10095 */
10096 xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
10097 result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
10098 err = xdr_fs_locations4(&xdr, result);
10099 XDR_DESTROY(&xdr);
10100 if (err != TRUE) {
10101 DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
10102 int, err);
10103 return (NULL);
10104 }
10105
10106 /*
10107 * Look at path to recover fs_root, ignoring the leading '/'
10108 */
10109 (void) make_pathname4(vp->v_path, &result->fs_root);
10110
10111 return (result);
10112 }
10113
10114 char *
build_symlink(vnode_t * vp,cred_t * cr,size_t * strsz)10115 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
10116 {
10117 fs_locations4 *fsl;
10118 fs_location4 *fs;
10119 char *server, *path, *symbuf;
10120 static char *prefix = "/net/";
10121 int i, size, npaths;
10122 uint_t len;
10123
10124 /* Get the referral */
10125 if ((fsl = fetch_referral(vp, cr)) == NULL)
10126 return (NULL);
10127
10128 /* Deal with only the first location and first server */
10129 fs = &fsl->locations_val[0];
10130 server = utf8_to_str(&fs->server_val[0], &len, NULL);
10131 if (server == NULL) {
10132 rfs4_free_fs_locations4(fsl);
10133 kmem_free(fsl, sizeof (fs_locations4));
10134 return (NULL);
10135 }
10136
10137 /* Figure out size for "/net/" + host + /path/path/path + NULL */
10138 size = strlen(prefix) + len;
10139 for (i = 0; i < fs->rootpath.pathname4_len; i++)
10140 size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
10141
10142 /* Allocate the symlink buffer and fill it */
10143 symbuf = kmem_zalloc(size, KM_SLEEP);
10144 (void) strcat(symbuf, prefix);
10145 (void) strcat(symbuf, server);
10146 kmem_free(server, len);
10147
10148 npaths = 0;
10149 for (i = 0; i < fs->rootpath.pathname4_len; i++) {
10150 path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
10151 if (path == NULL)
10152 continue;
10153 (void) strcat(symbuf, "/");
10154 (void) strcat(symbuf, path);
10155 npaths++;
10156 kmem_free(path, len);
10157 }
10158
10159 rfs4_free_fs_locations4(fsl);
10160 kmem_free(fsl, sizeof (fs_locations4));
10161
10162 if (strsz != NULL)
10163 *strsz = size;
10164 return (symbuf);
10165 }
10166
10167 /*
10168 * Check to see if we have a downrev Solaris client, so that we
10169 * can send it a symlink instead of a referral.
10170 */
10171 int
client_is_downrev(struct svc_req * req)10172 client_is_downrev(struct svc_req *req)
10173 {
10174 struct sockaddr *ca;
10175 rfs4_clntip_t *ci;
10176 bool_t create = FALSE;
10177 int is_downrev;
10178
10179 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
10180 ASSERT(ca);
10181 ci = rfs4_find_clntip(ca, &create);
10182 if (ci == NULL)
10183 return (0);
10184 is_downrev = ci->ri_no_referrals;
10185 rfs4_dbe_rele(ci->ri_dbe);
10186 return (is_downrev);
10187 }
10188
10189 /*
10190 * Do the main work of handling HA-NFSv4 Resource Group failover on
10191 * Sun Cluster.
10192 * We need to detect whether any RG admin paths have been added or removed,
10193 * and adjust resources accordingly.
10194 * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
10195 * order to scale, the list and array of paths need to be held in more
10196 * suitable data structures.
10197 */
10198 static void
hanfsv4_failover(nfs4_srv_t * nsrv4)10199 hanfsv4_failover(nfs4_srv_t *nsrv4)
10200 {
10201 int i, start_grace, numadded_paths = 0;
10202 char **added_paths = NULL;
10203 rfs4_dss_path_t *dss_path;
10204
10205 /*
10206 * Note: currently, dss_pathlist cannot be NULL, since
10207 * it will always include an entry for NFS4_DSS_VAR_DIR. If we
10208 * make the latter dynamically specified too, the following will
10209 * need to be adjusted.
10210 */
10211
10212 /*
10213 * First, look for removed paths: RGs that have been failed-over
10214 * away from this node.
10215 * Walk the "currently-serving" dss_pathlist and, for each
10216 * path, check if it is on the "passed-in" rfs4_dss_newpaths array
10217 * from nfsd. If not, that RG path has been removed.
10218 *
10219 * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
10220 * any duplicates.
10221 */
10222 dss_path = nsrv4->dss_pathlist;
10223 do {
10224 int found = 0;
10225 char *path = dss_path->path;
10226
10227 /* used only for non-HA so may not be removed */
10228 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
10229 dss_path = dss_path->next;
10230 continue;
10231 }
10232
10233 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10234 int cmpret;
10235 char *newpath = rfs4_dss_newpaths[i];
10236
10237 /*
10238 * Since nfsd has sorted rfs4_dss_newpaths for us,
10239 * once the return from strcmp is negative we know
10240 * we've passed the point where "path" should be,
10241 * and can stop searching: "path" has been removed.
10242 */
10243 cmpret = strcmp(path, newpath);
10244 if (cmpret < 0)
10245 break;
10246 if (cmpret == 0) {
10247 found = 1;
10248 break;
10249 }
10250 }
10251
10252 if (found == 0) {
10253 unsigned index = dss_path->index;
10254 rfs4_servinst_t *sip = dss_path->sip;
10255 rfs4_dss_path_t *path_next = dss_path->next;
10256
10257 /*
10258 * This path has been removed.
10259 * We must clear out the servinst reference to
10260 * it, since it's now owned by another
10261 * node: we should not attempt to touch it.
10262 */
10263 ASSERT(dss_path == sip->dss_paths[index]);
10264 sip->dss_paths[index] = NULL;
10265
10266 /* remove from "currently-serving" list, and destroy */
10267 remque(dss_path);
10268 /* allow for NUL */
10269 kmem_free(dss_path->path, strlen(dss_path->path) + 1);
10270 kmem_free(dss_path, sizeof (rfs4_dss_path_t));
10271
10272 dss_path = path_next;
10273 } else {
10274 /* path was found; not removed */
10275 dss_path = dss_path->next;
10276 }
10277 } while (dss_path != nsrv4->dss_pathlist);
10278
10279 /*
10280 * Now, look for added paths: RGs that have been failed-over
10281 * to this node.
10282 * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
10283 * for each path, check if it is on the "currently-serving"
10284 * dss_pathlist. If not, that RG path has been added.
10285 *
10286 * Note: we don't do duplicate detection here; nfsd does that for us.
10287 *
10288 * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
10289 * an upper bound for the size needed for added_paths[numadded_paths].
10290 */
10291
10292 /* probably more space than we need, but guaranteed to be enough */
10293 if (rfs4_dss_numnewpaths > 0) {
10294 size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
10295 added_paths = kmem_zalloc(sz, KM_SLEEP);
10296 }
10297
10298 /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
10299 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10300 int found = 0;
10301 char *newpath = rfs4_dss_newpaths[i];
10302
10303 dss_path = nsrv4->dss_pathlist;
10304 do {
10305 char *path = dss_path->path;
10306
10307 /* used only for non-HA */
10308 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
10309 dss_path = dss_path->next;
10310 continue;
10311 }
10312
10313 if (strncmp(path, newpath, strlen(path)) == 0) {
10314 found = 1;
10315 break;
10316 }
10317
10318 dss_path = dss_path->next;
10319 } while (dss_path != nsrv4->dss_pathlist);
10320
10321 if (found == 0) {
10322 added_paths[numadded_paths] = newpath;
10323 numadded_paths++;
10324 }
10325 }
10326
10327 /* did we find any added paths? */
10328 if (numadded_paths > 0) {
10329
10330 /* create a new server instance, and start its grace period */
10331 start_grace = 1;
10332 /* CSTYLED */
10333 rfs4_servinst_create(nsrv4, start_grace, numadded_paths, added_paths);
10334
10335 /* read in the stable storage state from these paths */
10336 rfs4_dss_readstate(nsrv4, numadded_paths, added_paths);
10337
10338 /*
10339 * Multiple failovers during a grace period will cause
10340 * clients of the same resource group to be partitioned
10341 * into different server instances, with different
10342 * grace periods. Since clients of the same resource
10343 * group must be subject to the same grace period,
10344 * we need to reset all currently active grace periods.
10345 */
10346 rfs4_grace_reset_all(nsrv4);
10347 }
10348
10349 if (rfs4_dss_numnewpaths > 0)
10350 kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
10351 }
10352