/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2013 Nexenta Systems, Inc. All rights reserved. */ /* * This module provides range lock functionality for CIFS/SMB clients. * Lock range service functions process SMB lock and and unlock * requests for a file by applying lock rules and marks file range * as locked if the lock is successful otherwise return proper * error code. */ #include #include #include #include extern caller_context_t smb_ct; static void smb_lock_posix_unlock(smb_node_t *, smb_lock_t *, cred_t *); static boolean_t smb_is_range_unlocked(uint64_t, uint64_t, uint32_t, smb_llist_t *, uint64_t *); static int smb_lock_range_overlap(smb_lock_t *, uint64_t, uint64_t); static uint32_t smb_lock_range_lckrules(smb_request_t *, smb_ofile_t *, smb_node_t *, smb_lock_t *, smb_lock_t **); static clock_t smb_lock_wait(smb_request_t *, smb_lock_t *, smb_lock_t *); static uint32_t smb_lock_range_ulckrules(smb_request_t *, smb_node_t *, uint64_t, uint64_t, smb_lock_t **nodelock); static smb_lock_t *smb_lock_create(smb_request_t *, uint64_t, uint64_t, uint32_t, uint32_t); static void smb_lock_destroy(smb_lock_t *); static void smb_lock_free(smb_lock_t *); /* * Return the number of range locks on the specified ofile. */ uint32_t smb_lock_get_lock_count(smb_node_t *node, smb_ofile_t *of) { smb_lock_t *lock; smb_llist_t *llist; uint32_t count = 0; SMB_NODE_VALID(node); SMB_OFILE_VALID(of); llist = &node->n_lock_list; smb_llist_enter(llist, RW_READER); for (lock = smb_llist_head(llist); lock != NULL; lock = smb_llist_next(llist, lock)) { if (lock->l_file == of) ++count; } smb_llist_exit(llist); return (count); } /* * smb_unlock_range * * locates lock range performed for corresponding to unlock request. * * NT_STATUS_SUCCESS - Lock range performed successfully. * !NT_STATUS_SUCCESS - Error in unlock range operation. */ uint32_t smb_unlock_range( smb_request_t *sr, smb_node_t *node, uint64_t start, uint64_t length) { smb_lock_t *lock = NULL; uint32_t status; /* Apply unlocking rules */ smb_llist_enter(&node->n_lock_list, RW_WRITER); status = smb_lock_range_ulckrules(sr, node, start, length, &lock); if (status != NT_STATUS_SUCCESS) { /* * If lock range is not matching in the list * return error. */ ASSERT(lock == NULL); smb_llist_exit(&node->n_lock_list); return (status); } smb_llist_remove(&node->n_lock_list, lock); smb_lock_posix_unlock(node, lock, sr->user_cr); smb_llist_exit(&node->n_lock_list); smb_lock_destroy(lock); return (status); } /* * smb_lock_range * * Checks for integrity of file lock operation for the given range of file data. * This is performed by applying lock rules with all the elements of the node * lock list. * * Break shared (levelII) oplocks. If there is an exclusive oplock, it is * owned by this ofile and therefore should not be broken. * * The function returns with new lock added if lock request is non-conflicting * with existing range lock for the file. Otherwise smb request is filed * without returning. * * NT_STATUS_SUCCESS - Lock range performed successfully. * !NT_STATUS_SUCCESS - Error in lock range operation. */ uint32_t smb_lock_range( smb_request_t *sr, uint64_t start, uint64_t length, uint32_t timeout, uint32_t locktype) { smb_ofile_t *file = sr->fid_ofile; smb_node_t *node = file->f_node; smb_lock_t *lock; smb_lock_t *clock = NULL; uint32_t result = NT_STATUS_SUCCESS; boolean_t lock_has_timeout = (timeout != 0); lock = smb_lock_create(sr, start, length, locktype, timeout); smb_llist_enter(&node->n_lock_list, RW_WRITER); for (;;) { clock_t rc; /* Apply locking rules */ result = smb_lock_range_lckrules(sr, file, node, lock, &clock); if ((result == NT_STATUS_CANCELLED) || (result == NT_STATUS_SUCCESS) || (result == NT_STATUS_RANGE_NOT_LOCKED)) { ASSERT(clock == NULL); break; } else if (timeout == 0) { break; } ASSERT(result == NT_STATUS_LOCK_NOT_GRANTED); ASSERT(clock); /* * Call smb_lock_wait holding write lock for * node lock list. smb_lock_wait will release * this lock if it blocks. */ ASSERT(node == clock->l_file->f_node); rc = smb_lock_wait(sr, lock, clock); if (rc == 0) { result = NT_STATUS_CANCELLED; break; } if (rc == -1) timeout = 0; clock = NULL; } lock->l_blocked_by = NULL; if (result != NT_STATUS_SUCCESS) { /* * Under certain conditions NT_STATUS_FILE_LOCK_CONFLICT * should be returned instead of NT_STATUS_LOCK_NOT_GRANTED. */ if (result == NT_STATUS_LOCK_NOT_GRANTED) { /* * Locks with timeouts always return * NT_STATUS_FILE_LOCK_CONFLICT */ if (lock_has_timeout) result = NT_STATUS_FILE_LOCK_CONFLICT; /* * Locks starting higher than 0xef000000 that do not * have the MSB set always return * NT_STATUS_FILE_LOCK_CONFLICT */ if ((lock->l_start >= 0xef000000) && !(lock->l_start & (1ULL << 63))) { result = NT_STATUS_FILE_LOCK_CONFLICT; } /* * If the last lock attempt to fail on this file handle * started at the same offset as this one then return * NT_STATUS_FILE_LOCK_CONFLICT */ mutex_enter(&file->f_mutex); if ((file->f_flags & SMB_OFLAGS_LLF_POS_VALID) && (lock->l_start == file->f_llf_pos)) { result = NT_STATUS_FILE_LOCK_CONFLICT; } mutex_exit(&file->f_mutex); } /* Update last lock failed offset */ mutex_enter(&file->f_mutex); file->f_llf_pos = lock->l_start; file->f_flags |= SMB_OFLAGS_LLF_POS_VALID; mutex_exit(&file->f_mutex); smb_lock_free(lock); } else { /* * don't insert into the CIFS lock list unless the * posix lock worked */ if (smb_fsop_frlock(node, lock, B_FALSE, sr->user_cr)) result = NT_STATUS_FILE_LOCK_CONFLICT; else smb_llist_insert_tail(&node->n_lock_list, lock); } smb_llist_exit(&node->n_lock_list); if (result == NT_STATUS_SUCCESS) smb_oplock_break_levelII(node); return (result); } /* * smb_lock_range_access * * scans node lock list * to check if there is any overlapping lock. Overlapping * lock is allowed only under same session and client pid. * * Return values * NT_STATUS_SUCCESS lock access granted. * NT_STATUS_FILE_LOCK_CONFLICT access denied due to lock conflict. */ int smb_lock_range_access( smb_request_t *sr, smb_node_t *node, uint64_t start, uint64_t length, /* zero means to EoF */ boolean_t will_write) { smb_lock_t *lock; smb_llist_t *llist; int status = NT_STATUS_SUCCESS; llist = &node->n_lock_list; smb_llist_enter(llist, RW_READER); /* Search for any applicable lock */ for (lock = smb_llist_head(llist); lock != NULL; lock = smb_llist_next(llist, lock)) { if (!smb_lock_range_overlap(lock, start, length)) /* Lock does not overlap */ continue; if (lock->l_type == SMB_LOCK_TYPE_READONLY && !will_write) continue; if (lock->l_type == SMB_LOCK_TYPE_READWRITE && lock->l_session_kid == sr->session->s_kid && lock->l_pid == sr->smb_pid) continue; status = NT_STATUS_FILE_LOCK_CONFLICT; break; } smb_llist_exit(llist); return (status); } void smb_node_destroy_lock_by_ofile(smb_node_t *node, smb_ofile_t *file) { smb_lock_t *lock; smb_lock_t *nxtl; list_t destroy_list; SMB_NODE_VALID(node); ASSERT(node->n_refcnt); /* * Move locks matching the specified file from the node->n_lock_list * to a temporary list (holding the lock the entire time) then * destroy all the matching locks. We can't call smb_lock_destroy * while we are holding the lock for node->n_lock_list because we will * deadlock and we can't drop the lock because the list contents might * change (for example nxtl might get removed on another thread). */ list_create(&destroy_list, sizeof (smb_lock_t), offsetof(smb_lock_t, l_lnd)); smb_llist_enter(&node->n_lock_list, RW_WRITER); lock = smb_llist_head(&node->n_lock_list); while (lock) { nxtl = smb_llist_next(&node->n_lock_list, lock); if (lock->l_file == file) { smb_llist_remove(&node->n_lock_list, lock); smb_lock_posix_unlock(node, lock, file->f_user->u_cred); list_insert_tail(&destroy_list, lock); } lock = nxtl; } smb_llist_exit(&node->n_lock_list); lock = list_head(&destroy_list); while (lock) { nxtl = list_next(&destroy_list, lock); list_remove(&destroy_list, lock); smb_lock_destroy(lock); lock = nxtl; } list_destroy(&destroy_list); } void smb_lock_range_error(smb_request_t *sr, uint32_t status32) { uint16_t errcode; if (status32 == NT_STATUS_CANCELLED) errcode = ERROR_OPERATION_ABORTED; else errcode = ERRlock; smbsr_error(sr, status32, ERRDOS, errcode); } /* * An SMB variant of nbl_conflict(). * * SMB prevents remove or rename when conflicting locks exist * (unlike NFS, which is why we can't just use nbl_conflict). * * Returns: * NT_STATUS_SHARING_VIOLATION - nbl_share_conflict * NT_STATUS_FILE_LOCK_CONFLICT - nbl_lock_conflict * NT_STATUS_SUCCESS - operation can proceed * * NB: This function used to also check the list of ofiles, * via: smb_lock_range_access() but we _can't_ do that here * due to lock order constraints between node->n_lock_list * and node->vp->vnbllock (taken via nvl_start_crit). * They must be taken in that order, and in here, we * already hold vp->vnbllock. */ DWORD smb_nbl_conflict(smb_node_t *node, uint64_t off, uint64_t len, nbl_op_t op) { int svmand; SMB_NODE_VALID(node); ASSERT(smb_node_in_crit(node)); ASSERT(op == NBL_READ || op == NBL_WRITE || op == NBL_READWRITE || op == NBL_REMOVE || op == NBL_RENAME); if (smb_node_is_dir(node)) return (NT_STATUS_SUCCESS); if (nbl_share_conflict(node->vp, op, &smb_ct)) return (NT_STATUS_SHARING_VIOLATION); /* * When checking for lock conflicts, rename and remove * are not allowed, so treat those as read/write. */ if (op == NBL_RENAME || op == NBL_REMOVE) op = NBL_READWRITE; if (nbl_svmand(node->vp, zone_kcred(), &svmand)) svmand = 1; if (nbl_lock_conflict(node->vp, op, off, len, svmand, &smb_ct)) return (NT_STATUS_FILE_LOCK_CONFLICT); return (NT_STATUS_SUCCESS); } /* * smb_lock_posix_unlock * * checks if the current unlock request is in another lock and repeatedly calls * smb_is_range_unlocked on a sliding basis to unlock all bits of the lock * that are not in other locks * */ static void smb_lock_posix_unlock(smb_node_t *node, smb_lock_t *lock, cred_t *cr) { uint64_t new_mark; uint64_t unlock_start; uint64_t unlock_end; smb_lock_t new_unlock; smb_llist_t *llist; boolean_t can_unlock; new_mark = 0; unlock_start = lock->l_start; unlock_end = unlock_start + lock->l_length; llist = &node->n_lock_list; for (;;) { can_unlock = smb_is_range_unlocked(unlock_start, unlock_end, lock->l_file->f_uniqid, llist, &new_mark); if (can_unlock) { if (new_mark) { new_unlock = *lock; new_unlock.l_start = unlock_start; new_unlock.l_length = new_mark - unlock_start; (void) smb_fsop_frlock(node, &new_unlock, B_TRUE, cr); unlock_start = new_mark; } else { new_unlock = *lock; new_unlock.l_start = unlock_start; new_unlock.l_length = unlock_end - unlock_start; (void) smb_fsop_frlock(node, &new_unlock, B_TRUE, cr); break; } } else if (new_mark) { unlock_start = new_mark; } else { break; } } } /* * smb_lock_range_overlap * * Checks if lock range(start, length) overlaps range in lock structure. * * Zero-length byte range locks actually affect no single byte of the stream, * meaning they can still be accessed even with such locks in place. However, * they do conflict with other ranges in the following manner: * conflict will only exist if the positive-length range contains the * zero-length range's offset but doesn't start at it * * return values: * 0 - Lock range doesn't overlap * 1 - Lock range overlaps. */ #define RANGE_NO_OVERLAP 0 #define RANGE_OVERLAP 1 static int smb_lock_range_overlap(struct smb_lock *lock, uint64_t start, uint64_t length) { if (length == 0) { if ((lock->l_start < start) && ((lock->l_start + lock->l_length) > start)) return (RANGE_OVERLAP); return (RANGE_NO_OVERLAP); } /* The following test is intended to catch roll over locks. */ if ((start == lock->l_start) && (length == lock->l_length)) return (RANGE_OVERLAP); if (start < lock->l_start) { if (start + length > lock->l_start) return (RANGE_OVERLAP); } else if (start < lock->l_start + lock->l_length) return (RANGE_OVERLAP); return (RANGE_NO_OVERLAP); } /* * smb_lock_range_lckrules * * Lock range rules: * 1. Overlapping read locks are allowed if the * current locks in the region are only read locks * irrespective of pid of smb client issuing lock request. * * 2. Read lock in the overlapped region of write lock * are allowed if the pervious lock is performed by the * same pid and connection. * * return status: * NT_STATUS_SUCCESS - Input lock range adapts to lock rules. * NT_STATUS_LOCK_NOT_GRANTED - Input lock conflicts lock rules. * NT_STATUS_CANCELLED - Error in processing lock rules */ static uint32_t smb_lock_range_lckrules( smb_request_t *sr, smb_ofile_t *file, smb_node_t *node, smb_lock_t *dlock, smb_lock_t **clockp) { smb_lock_t *lock; uint32_t status = NT_STATUS_SUCCESS; /* Check if file is closed */ if (!smb_ofile_is_open(file)) { return (NT_STATUS_RANGE_NOT_LOCKED); } /* Caller must hold lock for node->n_lock_list */ for (lock = smb_llist_head(&node->n_lock_list); lock != NULL; lock = smb_llist_next(&node->n_lock_list, lock)) { if (!smb_lock_range_overlap(lock, dlock->l_start, dlock->l_length)) continue; /* * Check to see if lock in the overlapping record * is only read lock. Current finding is read * locks can overlapped irrespective of pids. */ if ((lock->l_type == SMB_LOCK_TYPE_READONLY) && (dlock->l_type == SMB_LOCK_TYPE_READONLY)) { continue; } /* * When the read lock overlaps write lock, check if * allowed. */ if ((dlock->l_type == SMB_LOCK_TYPE_READONLY) && !(lock->l_type == SMB_LOCK_TYPE_READONLY)) { if (lock->l_file == sr->fid_ofile && lock->l_session_kid == sr->session->s_kid && lock->l_pid == sr->smb_pid && lock->l_uid == sr->smb_uid) { continue; } } /* Conflict in overlapping lock element */ *clockp = lock; status = NT_STATUS_LOCK_NOT_GRANTED; break; } return (status); } /* * smb_lock_wait * * Wait operation for smb overlapping lock to be released. Caller must hold * write lock for node->n_lock_list so that the set of active locks can't * change unexpectedly. The lock for node->n_lock_list will be released * within this function during the sleep after the lock dependency has * been recorded. * * return value * * 0 The request was canceled. * -1 The timeout was reached. * >0 Condition met. */ static clock_t smb_lock_wait(smb_request_t *sr, smb_lock_t *b_lock, smb_lock_t *c_lock) { clock_t rc; ASSERT(sr->sr_awaiting == NULL); mutex_enter(&sr->sr_mutex); switch (sr->sr_state) { case SMB_REQ_STATE_ACTIVE: /* * Wait up till the timeout time keeping track of actual * time waited for possible retry failure. */ sr->sr_state = SMB_REQ_STATE_WAITING_LOCK; sr->sr_awaiting = c_lock; mutex_exit(&sr->sr_mutex); mutex_enter(&c_lock->l_mutex); /* * The conflict list (l_conflict_list) for a lock contains * all the locks that are blocked by and in conflict with * that lock. Add the new lock to the conflict list for the * active lock. * * l_conflict_list is currently a fancy way of representing * the references/dependencies on a lock. It could be * replaced with a reference count but this approach * has the advantage that MDB can display the lock * dependencies at any point in time. In the future * we should be able to leverage the list to implement * an asynchronous locking model. * * l_blocked_by is the reverse of the conflict list. It * points to the lock that the new lock conflicts with. * As currently implemented this value is purely for * debug purposes -- there are windows of time when * l_blocked_by may be non-NULL even though there is no * conflict list */ b_lock->l_blocked_by = c_lock; smb_slist_insert_tail(&c_lock->l_conflict_list, b_lock); smb_llist_exit(&c_lock->l_file->f_node->n_lock_list); if (SMB_LOCK_INDEFINITE_WAIT(b_lock)) { cv_wait(&c_lock->l_cv, &c_lock->l_mutex); } else { rc = cv_timedwait(&c_lock->l_cv, &c_lock->l_mutex, b_lock->l_end_time); } mutex_exit(&c_lock->l_mutex); smb_llist_enter(&c_lock->l_file->f_node->n_lock_list, RW_WRITER); smb_slist_remove(&c_lock->l_conflict_list, b_lock); mutex_enter(&sr->sr_mutex); sr->sr_awaiting = NULL; if (sr->sr_state == SMB_REQ_STATE_CANCELED) { rc = 0; } else { sr->sr_state = SMB_REQ_STATE_ACTIVE; } break; default: ASSERT(sr->sr_state == SMB_REQ_STATE_CANCELED); rc = 0; break; } mutex_exit(&sr->sr_mutex); return (rc); } /* * smb_lock_range_ulckrules * * 1. Unlock should be performed at exactly matching ends. * This has been changed because overlapping ends is * allowed and there is no other precise way of locating * lock entity in node lock list. * * 2. Unlock is failed if there is no corresponding lock exists. * * Return values * * NT_STATUS_SUCCESS Unlock request matches lock record * pointed by 'nodelock' lock structure. * * NT_STATUS_RANGE_NOT_LOCKED Unlock request doen't match any * of lock record in node lock request or * error in unlock range processing. */ static uint32_t smb_lock_range_ulckrules( smb_request_t *sr, smb_node_t *node, uint64_t start, uint64_t length, smb_lock_t **nodelock) { smb_lock_t *lock; uint32_t status = NT_STATUS_RANGE_NOT_LOCKED; /* Caller must hold lock for node->n_lock_list */ for (lock = smb_llist_head(&node->n_lock_list); lock != NULL; lock = smb_llist_next(&node->n_lock_list, lock)) { if ((start == lock->l_start) && (length == lock->l_length) && lock->l_file == sr->fid_ofile && lock->l_session_kid == sr->session->s_kid && lock->l_pid == sr->smb_pid && lock->l_uid == sr->smb_uid) { *nodelock = lock; status = NT_STATUS_SUCCESS; break; } } return (status); } static smb_lock_t * smb_lock_create( smb_request_t *sr, uint64_t start, uint64_t length, uint32_t locktype, uint32_t timeout) { smb_lock_t *lock; ASSERT(locktype == SMB_LOCK_TYPE_READWRITE || locktype == SMB_LOCK_TYPE_READONLY); lock = kmem_zalloc(sizeof (smb_lock_t), KM_SLEEP); lock->l_magic = SMB_LOCK_MAGIC; lock->l_sr = sr; /* Invalid after lock is active */ lock->l_session_kid = sr->session->s_kid; lock->l_session = sr->session; lock->l_file = sr->fid_ofile; lock->l_uid = sr->smb_uid; lock->l_pid = sr->smb_pid; lock->l_type = locktype; lock->l_start = start; lock->l_length = length; /* * Calculate the absolute end time so that we can use it * in cv_timedwait. */ lock->l_end_time = ddi_get_lbolt() + MSEC_TO_TICK(timeout); if (timeout == UINT_MAX) lock->l_flags |= SMB_LOCK_FLAG_INDEFINITE; mutex_init(&lock->l_mutex, NULL, MUTEX_DEFAULT, NULL); cv_init(&lock->l_cv, NULL, CV_DEFAULT, NULL); smb_slist_constructor(&lock->l_conflict_list, sizeof (smb_lock_t), offsetof(smb_lock_t, l_conflict_lnd)); return (lock); } static void smb_lock_free(smb_lock_t *lock) { smb_slist_destructor(&lock->l_conflict_list); cv_destroy(&lock->l_cv); mutex_destroy(&lock->l_mutex); kmem_free(lock, sizeof (smb_lock_t)); } /* * smb_lock_destroy * * Caller must hold node->n_lock_list */ static void smb_lock_destroy(smb_lock_t *lock) { /* * Caller must hold node->n_lock_list lock. */ mutex_enter(&lock->l_mutex); cv_broadcast(&lock->l_cv); mutex_exit(&lock->l_mutex); /* * The cv_broadcast above should wake up any locks that previous * had conflicts with this lock. Wait for the locking threads * to remove their references to this lock. */ smb_slist_wait_for_empty(&lock->l_conflict_list); smb_lock_free(lock); } /* * smb_is_range_unlocked * * Checks if the current unlock byte range request overlaps another lock * This function is used to determine where POSIX unlocks should be * applied. * * The return code and the value of new_mark must be interpreted as * follows: * * B_TRUE and (new_mark == 0): * This is the last or only lock left to be unlocked * * B_TRUE and (new_mark > 0): * The range from start to new_mark can be unlocked * * B_FALSE and (new_mark == 0): * The unlock can't be performed and we are done * * B_FALSE and (new_mark > 0), * The range from start to new_mark can't be unlocked * Start should be reset to new_mark for the next pass */ static boolean_t smb_is_range_unlocked(uint64_t start, uint64_t end, uint32_t uniqid, smb_llist_t *llist_head, uint64_t *new_mark) { struct smb_lock *lk = NULL; uint64_t low_water_mark = MAXOFFSET_T; uint64_t lk_start; uint64_t lk_end; *new_mark = 0; lk = smb_llist_head(llist_head); while (lk) { if (lk->l_length == 0) { lk = smb_llist_next(llist_head, lk); continue; } if (lk->l_file->f_uniqid != uniqid) { lk = smb_llist_next(llist_head, lk); continue; } lk_end = lk->l_start + lk->l_length - 1; lk_start = lk->l_start; /* * there is no overlap for the first 2 cases * check next node */ if (lk_end < start) { lk = smb_llist_next(llist_head, lk); continue; } if (lk_start > end) { lk = smb_llist_next(llist_head, lk); continue; } /* this range is completely locked */ if ((lk_start <= start) && (lk_end >= end)) { return (B_FALSE); } /* the first part of this range is locked */ if ((start >= lk_start) && (start <= lk_end)) { if (end > lk_end) *new_mark = lk_end + 1; return (B_FALSE); } /* this piece is unlocked */ if ((lk_start >= start) && (lk_start <= end)) { if (low_water_mark > lk_start) low_water_mark = lk_start; } lk = smb_llist_next(llist_head, lk); } if (low_water_mark != MAXOFFSET_T) { *new_mark = low_water_mark; return (B_TRUE); } /* the range is completely unlocked */ return (B_TRUE); }