1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 23 /* 24 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 25 * Use is subject to license terms. 26 */ 27 28 #pragma ident "%Z%%M% %I% %E% SMI" 29 30 31 #include <rpc/types.h> 32 #include <rpc/auth.h> 33 #include <rpc/auth_unix.h> 34 #include <rpc/auth_des.h> 35 #include <rpc/svc.h> 36 #include <rpc/xdr.h> 37 #include <nfs/nfs4.h> 38 #include <nfs/nfs_dispatch.h> 39 #include <nfs/nfs4_drc.h> 40 41 /* 42 * This is the duplicate request cache for NFSv4 43 */ 44 rfs4_drc_t *nfs4_drc = NULL; 45 46 /* 47 * How long the entry can remain in the cache 48 * once it has been sent to the client and not 49 * used in a reply (in seconds) 50 */ 51 unsigned nfs4_drc_lifetime = 1; 52 53 /* 54 * The default size of the duplicate request cache 55 */ 56 uint32_t nfs4_drc_max = 8 * 1024; 57 58 /* 59 * The number of buckets we'd like to hash the 60 * replies into.. do not change this on the fly. 61 */ 62 uint32_t nfs4_drc_hash = 541; 63 64 /* 65 * Initialize a duplicate request cache. 66 */ 67 rfs4_drc_t * 68 rfs4_init_drc(uint32_t drc_size, uint32_t drc_hash_size, unsigned ttl) 69 { 70 rfs4_drc_t *drc; 71 uint32_t bki; 72 73 ASSERT(drc_size); 74 ASSERT(drc_hash_size); 75 76 drc = kmem_alloc(sizeof (rfs4_drc_t), KM_SLEEP); 77 78 drc->max_size = drc_size; 79 drc->in_use = 0; 80 drc->drc_ttl = ttl; 81 82 mutex_init(&drc->lock, NULL, MUTEX_DEFAULT, NULL); 83 84 drc->dr_hash = drc_hash_size; 85 86 drc->dr_buckets = kmem_alloc(sizeof (list_t)*drc_hash_size, KM_SLEEP); 87 88 for (bki = 0; bki < drc_hash_size; bki++) { 89 list_create(&drc->dr_buckets[bki], sizeof (rfs4_dupreq_t), 90 offsetof(rfs4_dupreq_t, dr_bkt_next)); 91 } 92 93 list_create(&(drc->dr_cache), sizeof (rfs4_dupreq_t), 94 offsetof(rfs4_dupreq_t, dr_next)); 95 96 return (drc); 97 } 98 99 /* 100 * Destroy a duplicate request cache. 101 */ 102 void 103 rfs4_fini_drc(rfs4_drc_t *drc) 104 { 105 rfs4_dupreq_t *drp, *drp_next; 106 107 ASSERT(drc); 108 109 /* iterate over the dr_cache and free the enties */ 110 for (drp = list_head(&(drc->dr_cache)); drp != NULL; drp = drp_next) { 111 112 if (drp->dr_state == NFS4_DUP_REPLAY) 113 rfs4_compound_free(&(drp->dr_res)); 114 115 if (drp->dr_addr.buf != NULL) 116 kmem_free(drp->dr_addr.buf, drp->dr_addr.maxlen); 117 118 drp_next = list_next(&(drc->dr_cache), drp); 119 120 kmem_free(drp, sizeof (rfs4_dupreq_t)); 121 } 122 123 mutex_destroy(&drc->lock); 124 kmem_free(drc->dr_buckets, 125 sizeof (list_t)*drc->dr_hash); 126 kmem_free(drc, sizeof (rfs4_drc_t)); 127 } 128 129 /* 130 * rfs4_dr_chstate: 131 * 132 * Change the state of a rfs4_dupreq. If it's not in transition 133 * to the FREE state, update the time used and return. If we 134 * are moving to the FREE state then we need to clean up the 135 * compound results and move the entry to the end of the list. 136 */ 137 void 138 rfs4_dr_chstate(rfs4_dupreq_t *drp, int new_state) 139 { 140 rfs4_drc_t *drc; 141 142 ASSERT(drp); 143 ASSERT(drp->drc); 144 ASSERT(drp->dr_bkt); 145 ASSERT(MUTEX_HELD(&drp->drc->lock)); 146 147 drp->dr_state = new_state; 148 149 if (new_state != NFS4_DUP_FREE) { 150 gethrestime(&drp->dr_time_used); 151 return; 152 } 153 154 drc = drp->drc; 155 156 /* 157 * Remove entry from the bucket and 158 * dr_cache list, free compound results. 159 */ 160 list_remove(drp->dr_bkt, drp); 161 list_remove(&(drc->dr_cache), drp); 162 rfs4_compound_free(&(drp->dr_res)); 163 } 164 165 /* 166 * rfs4_alloc_dr: 167 * 168 * Pick an entry off the tail -- Use if it is 169 * marked NFS4_DUP_FREE, or is an entry in the 170 * NFS4_DUP_REPLAY state that has timed-out... 171 * Otherwise malloc a new one if we have not reached 172 * our maximum cache limit. 173 * 174 * The list should be in time order, so no need 175 * to traverse backwards looking for a timed out 176 * entry, NFS4_DUP_FREE's are place on the tail. 177 */ 178 rfs4_dupreq_t * 179 rfs4_alloc_dr(rfs4_drc_t *drc) 180 { 181 rfs4_dupreq_t *drp_tail, *drp = NULL; 182 183 ASSERT(drc); 184 ASSERT(MUTEX_HELD(&drc->lock)); 185 186 if ((drp_tail = list_tail(&drc->dr_cache)) != NULL) { 187 188 switch (drp_tail->dr_state) { 189 190 case NFS4_DUP_FREE: 191 list_remove(&(drc->dr_cache), drp_tail); 192 DTRACE_PROBE1(nfss__i__drc_freeclaim, 193 rfs4_dupreq_t *, drp_tail); 194 return (drp_tail); 195 /* NOTREACHED */ 196 197 case NFS4_DUP_REPLAY: 198 if (gethrestime_sec() > 199 drp_tail->dr_time_used.tv_sec+drc->drc_ttl) { 200 /* this entry has timedout so grab it. */ 201 rfs4_dr_chstate(drp_tail, NFS4_DUP_FREE); 202 DTRACE_PROBE1(nfss__i__drc_ttlclaim, 203 rfs4_dupreq_t *, drp_tail); 204 return (drp_tail); 205 } 206 break; 207 } 208 } 209 210 /* 211 * Didn't find something to recycle have 212 * we hit the cache limit ? 213 */ 214 if (drc->in_use >= drc->max_size) { 215 DTRACE_PROBE1(nfss__i__drc_full, 216 rfs4_drc_t *, drc); 217 return (NULL); 218 } 219 220 221 /* nope, so let's malloc a new one */ 222 drp = kmem_zalloc(sizeof (rfs4_dupreq_t), KM_SLEEP); 223 drp->drc = drc; 224 drc->in_use++; 225 gethrestime(&drp->dr_time_created); 226 DTRACE_PROBE1(nfss__i__drc_new, rfs4_dupreq_t *, drp); 227 228 return (drp); 229 } 230 231 /* 232 * rfs4_find_dr: 233 * 234 * Search for an entry in the duplicate request cache by 235 * calculating the hash index based on the XID, and examining 236 * the entries in the hash bucket. If we find a match stamp the 237 * time_used and return. If the entry does not match it could be 238 * ready to be freed. Once we have searched the bucket and we 239 * have not exhausted the maximum limit for the cache we will 240 * allocate a new entry. 241 */ 242 int 243 rfs4_find_dr(struct svc_req *req, rfs4_drc_t *drc, rfs4_dupreq_t **dup) 244 { 245 246 uint32_t the_xid; 247 list_t *dr_bkt; 248 rfs4_dupreq_t *drp; 249 int bktdex; 250 251 /* 252 * Get the XID, calculate the bucket and search to 253 * see if we need to replay from the cache. 254 */ 255 the_xid = req->rq_xprt->xp_xid; 256 bktdex = the_xid % drc->dr_hash; 257 258 dr_bkt = (list_t *) 259 &(drc->dr_buckets[(the_xid % drc->dr_hash)]); 260 261 DTRACE_PROBE3(nfss__i__drc_bktdex, 262 int, bktdex, 263 uint32_t, the_xid, 264 list_t *, dr_bkt); 265 266 *dup = NULL; 267 268 mutex_enter(&drc->lock); 269 /* 270 * Search the bucket for a matching xid and address. 271 */ 272 for (drp = list_head(dr_bkt); drp != NULL; 273 drp = list_next(dr_bkt, drp)) { 274 275 if (drp->dr_xid == the_xid && 276 drp->dr_addr.len == req->rq_xprt->xp_rtaddr.len && 277 bcmp((caddr_t)drp->dr_addr.buf, 278 (caddr_t)req->rq_xprt->xp_rtaddr.buf, 279 drp->dr_addr.len) == 0) { 280 281 /* 282 * Found a match so REPLAY the Reply 283 */ 284 if (drp->dr_state == NFS4_DUP_REPLAY) { 285 gethrestime(&drp->dr_time_used); 286 mutex_exit(&drc->lock); 287 *dup = drp; 288 DTRACE_PROBE1(nfss__i__drc_replay, 289 rfs4_dupreq_t *, drp); 290 return (NFS4_DUP_REPLAY); 291 } 292 293 /* 294 * This entry must be in transition, so return 295 * the 'pending' status. 296 */ 297 mutex_exit(&drc->lock); 298 return (NFS4_DUP_PENDING); 299 } 300 301 /* 302 * Not a match, but maybe this entry is ready 303 * to be reused. 304 */ 305 if (drp->dr_state == NFS4_DUP_REPLAY && 306 (gethrestime_sec() > 307 drp->dr_time_used.tv_sec+drc->drc_ttl)) { 308 rfs4_dr_chstate(drp, NFS4_DUP_FREE); 309 list_insert_tail(&(drp->drc->dr_cache), drp); 310 } 311 } 312 313 drp = rfs4_alloc_dr(drc); 314 mutex_exit(&drc->lock); 315 316 if (drp == NULL) { 317 return (NFS4_DUP_ERROR); 318 } 319 320 /* 321 * Place at the head of the list, init the state 322 * to NEW and clear the time used field. 323 */ 324 325 drp->dr_state = NFS4_DUP_NEW; 326 drp->dr_time_used.tv_sec = drp->dr_time_used.tv_nsec = 0; 327 328 /* 329 * If needed, resize the address buffer 330 */ 331 if (drp->dr_addr.maxlen < req->rq_xprt->xp_rtaddr.len) { 332 if (drp->dr_addr.buf != NULL) 333 kmem_free(drp->dr_addr.buf, drp->dr_addr.maxlen); 334 drp->dr_addr.maxlen = req->rq_xprt->xp_rtaddr.len; 335 drp->dr_addr.buf = kmem_alloc(drp->dr_addr.maxlen, KM_NOSLEEP); 336 if (drp->dr_addr.buf == NULL) { 337 /* 338 * If the malloc fails, mark the entry 339 * as free and put on the tail. 340 */ 341 drp->dr_addr.maxlen = 0; 342 drp->dr_state = NFS4_DUP_FREE; 343 mutex_enter(&drc->lock); 344 list_insert_tail(&(drc->dr_cache), drp); 345 mutex_exit(&drc->lock); 346 return (NFS4_DUP_ERROR); 347 } 348 } 349 350 351 /* 352 * Copy the address. 353 */ 354 drp->dr_addr.len = req->rq_xprt->xp_rtaddr.len; 355 356 bcopy((caddr_t)req->rq_xprt->xp_rtaddr.buf, 357 (caddr_t)drp->dr_addr.buf, 358 drp->dr_addr.len); 359 360 drp->dr_xid = the_xid; 361 drp->dr_bkt = dr_bkt; 362 363 /* 364 * Insert at the head of the bucket and 365 * the drc lists.. 366 */ 367 mutex_enter(&drc->lock); 368 list_insert_head(&drc->dr_cache, drp); 369 list_insert_head(dr_bkt, drp); 370 mutex_exit(&drc->lock); 371 372 *dup = drp; 373 374 return (NFS4_DUP_NEW); 375 } 376 377 /* 378 * 379 * This function handles the duplicate request cache, 380 * NULL_PROC and COMPOUND procedure calls for NFSv4; 381 * 382 * Passed into this function are:- 383 * 384 * disp A pointer to our dispatch table entry 385 * req The request to process 386 * xprt The server transport handle 387 * ap A pointer to the arguments 388 * 389 * 390 * When appropriate this function is responsible for inserting 391 * the reply into the duplicate cache or replaying an existing 392 * cached reply. 393 * 394 * dr_stat reflects the state of the duplicate request that 395 * has been inserted into or retrieved from the cache 396 * 397 * drp is the duplicate request entry 398 * 399 */ 400 int 401 rfs4_dispatch(struct rpcdisp *disp, struct svc_req *req, 402 SVCXPRT *xprt, char *ap) 403 { 404 405 COMPOUND4res res_buf, *rbp; 406 COMPOUND4args *cap; 407 408 cred_t *cr = NULL; 409 int error = 0; 410 int dis_flags = 0; 411 int dr_stat = NFS4_NOT_DUP; 412 rfs4_dupreq_t *drp = NULL; 413 414 ASSERT(disp); 415 416 /* 417 * Short circuit the RPC_NULL proc. 418 */ 419 if (disp->dis_proc == rpc_null) { 420 if (!svc_sendreply(xprt, xdr_void, NULL)) { 421 return (1); 422 } 423 return (0); 424 } 425 426 /* Only NFSv4 Compounds from this point onward */ 427 428 rbp = &res_buf; 429 cap = (COMPOUND4args *)ap; 430 431 /* 432 * Figure out the disposition of the whole COMPOUND 433 * and record it's IDEMPOTENTCY. 434 */ 435 rfs4_compound_flagproc(cap, &dis_flags); 436 437 /* 438 * If NON-IDEMPOTENT then we need to figure out if this 439 * request can be replied from the duplicate cache. 440 * 441 * If this is a new request then we need to insert the 442 * reply into the duplicate cache. 443 */ 444 if (!(dis_flags & RPC_IDEMPOTENT)) { 445 /* look for a replay from the cache or allocate */ 446 dr_stat = rfs4_find_dr(req, nfs4_drc, &drp); 447 448 switch (dr_stat) { 449 450 case NFS4_DUP_ERROR: 451 svcerr_systemerr(xprt); 452 return (1); 453 /* NOTREACHED */ 454 455 case NFS4_DUP_PENDING: 456 /* 457 * reply has previously been inserted into the 458 * duplicate cache, however the reply has 459 * not yet been sent via svc_sendreply() 460 */ 461 return (1); 462 /* NOTREACHED */ 463 464 case NFS4_DUP_NEW: 465 curthread->t_flag |= T_DONTPEND; 466 /* NON-IDEMPOTENT proc call */ 467 rfs4_compound(cap, rbp, NULL, req, cr); 468 469 curthread->t_flag &= ~T_DONTPEND; 470 if (curthread->t_flag & T_WOULDBLOCK) { 471 curthread->t_flag &= ~T_WOULDBLOCK; 472 /* 473 * mark this entry as FREE and plop 474 * on the end of the cache list 475 */ 476 mutex_enter(&drp->drc->lock); 477 rfs4_dr_chstate(drp, NFS4_DUP_FREE); 478 list_insert_tail(&(drp->drc->dr_cache), drp); 479 mutex_exit(&drp->drc->lock); 480 return (1); 481 } 482 drp->dr_res = res_buf; 483 break; 484 485 case NFS4_DUP_REPLAY: 486 /* replay from the cache */ 487 rbp = &(drp->dr_res); 488 break; 489 } 490 } else { 491 curthread->t_flag |= T_DONTPEND; 492 /* IDEMPOTENT proc call */ 493 rfs4_compound(cap, rbp, NULL, req, cr); 494 495 curthread->t_flag &= ~T_DONTPEND; 496 if (curthread->t_flag & T_WOULDBLOCK) { 497 curthread->t_flag &= ~T_WOULDBLOCK; 498 return (1); 499 } 500 } 501 502 /* 503 * Send out the replayed reply or the 'real' one. 504 */ 505 if (!svc_sendreply(xprt, xdr_COMPOUND4res, (char *)rbp)) { 506 DTRACE_PROBE2(nfss__e__dispatch_sendfail, 507 struct svc_req *, xprt, 508 char *, rbp); 509 error++; 510 } 511 512 /* 513 * If this reply was just inserted into the duplicate cache 514 * mark it as available for replay 515 */ 516 if (dr_stat == NFS4_DUP_NEW) { 517 mutex_enter(&drp->drc->lock); 518 rfs4_dr_chstate(drp, NFS4_DUP_REPLAY); 519 mutex_exit(&drp->drc->lock); 520 } else if (dr_stat == NFS4_NOT_DUP) { 521 rfs4_compound_free(rbp); 522 } 523 524 return (error); 525 } 526