1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2017 Nexenta Systems, Inc.  All rights reserved.
24 */
25
26/*
27 * This module provides range lock functionality for CIFS/SMB clients.
28 * Lock range service functions process SMB lock and and unlock
29 * requests for a file by applying lock rules and marks file range
30 * as locked if the lock is successful otherwise return proper
31 * error code.
32 */
33
34#include <smbsrv/smb_kproto.h>
35#include <smbsrv/smb_fsops.h>
36#include <sys/nbmlock.h>
37#include <sys/param.h>
38
39extern caller_context_t smb_ct;
40
41#ifdef	DEBUG
42int smb_lock_debug = 0;
43static void smb_lock_dump1(smb_lock_t *);
44static void smb_lock_dumplist(smb_llist_t *);
45static void smb_lock_dumpnode(smb_node_t *);
46#endif
47
48static void smb_lock_posix_unlock(smb_node_t *, smb_lock_t *, cred_t *);
49static boolean_t smb_is_range_unlocked(uint64_t, uint64_t, uint32_t,
50    smb_llist_t *, uint64_t *);
51static int smb_lock_range_overlap(smb_lock_t *, uint64_t, uint64_t);
52static uint32_t smb_lock_range_lckrules(smb_ofile_t *, smb_lock_t *,
53    smb_lock_t **);
54static uint32_t smb_lock_wait(smb_request_t *, smb_lock_t *, smb_lock_t *);
55static uint32_t smb_lock_range_ulckrules(smb_ofile_t *,
56    uint64_t, uint64_t, uint32_t, smb_lock_t **);
57static smb_lock_t *smb_lock_create(smb_request_t *, uint64_t, uint64_t,
58    uint32_t, uint32_t, uint32_t);
59static void smb_lock_destroy(smb_lock_t *);
60static void smb_lock_free(smb_lock_t *);
61
62/*
63 * Return the number of range locks on the specified ofile.
64 */
65uint32_t
66smb_lock_get_lock_count(smb_node_t *node, smb_ofile_t *of)
67{
68	smb_lock_t	*lock;
69	smb_llist_t	*llist;
70	uint32_t	count = 0;
71
72	SMB_NODE_VALID(node);
73	SMB_OFILE_VALID(of);
74
75	llist = &node->n_lock_list;
76
77	smb_llist_enter(llist, RW_READER);
78	for (lock = smb_llist_head(llist);
79	    lock != NULL;
80	    lock = smb_llist_next(llist, lock)) {
81		if (lock->l_file == of)
82			++count;
83	}
84	smb_llist_exit(llist);
85
86	return (count);
87}
88
89/*
90 * smb_unlock_range
91 *
92 * locates lock range performed for corresponding to unlock request.
93 *
94 * NT_STATUS_SUCCESS - Lock range performed successfully.
95 * !NT_STATUS_SUCCESS - Error in unlock range operation.
96 */
97uint32_t
98smb_unlock_range(
99    smb_request_t	*sr,
100    uint64_t		start,
101    uint64_t		length,
102    uint32_t		pid)
103{
104	smb_ofile_t	*file = sr->fid_ofile;
105	smb_node_t	*node = file->f_node;
106	smb_lock_t	*lock = NULL;
107	uint32_t	status;
108
109	if (length > 1 &&
110	    (start + length) < start)
111		return (NT_STATUS_INVALID_LOCK_RANGE);
112
113#ifdef	DEBUG
114	if (smb_lock_debug) {
115		cmn_err(CE_CONT, "smb_unlock_range "
116		    "off=0x%llx, len=0x%llx, f=%p, pid=%d\n",
117		    (long long)start, (long long)length,
118		    (void *)sr->fid_ofile, pid);
119	}
120#endif
121
122	/* Apply unlocking rules */
123	smb_llist_enter(&node->n_lock_list, RW_WRITER);
124	status = smb_lock_range_ulckrules(file, start, length, pid, &lock);
125	if (status != NT_STATUS_SUCCESS) {
126		/*
127		 * If lock range is not matching in the list
128		 * return error.
129		 */
130		ASSERT(lock == NULL);
131	}
132	if (lock != NULL) {
133		smb_llist_remove(&node->n_lock_list, lock);
134		smb_lock_posix_unlock(node, lock, sr->user_cr);
135	}
136
137#ifdef	DEBUG
138	if (smb_lock_debug && lock == NULL) {
139		cmn_err(CE_CONT, "unlock failed, 0x%x\n", status);
140		smb_lock_dumpnode(node);
141	}
142#endif
143
144	smb_llist_exit(&node->n_lock_list);
145
146	if (lock != NULL)
147		smb_lock_destroy(lock);
148
149	return (status);
150}
151
152/*
153 * smb_lock_range
154 *
155 * Checks for integrity of file lock operation for the given range of file data.
156 * This is performed by applying lock rules with all the elements of the node
157 * lock list.
158 *
159 * Break shared (levelII) oplocks. If there is an exclusive oplock, it is
160 * owned by this ofile and therefore should not be broken.
161 *
162 * The function returns with new lock added if lock request is non-conflicting
163 * with existing range lock for the file. Otherwise smb request is filed
164 * without returning.
165 *
166 * NT_STATUS_SUCCESS - Lock range performed successfully.
167 * !NT_STATUS_SUCCESS - Error in lock range operation.
168 */
169uint32_t
170smb_lock_range(
171    smb_request_t	*sr,
172    uint64_t		start,
173    uint64_t		length,
174    uint32_t		pid,
175    uint32_t		locktype,
176    uint32_t		timeout)
177{
178	smb_ofile_t	*file = sr->fid_ofile;
179	smb_node_t	*node = file->f_node;
180	smb_lock_t	*lock;
181	smb_lock_t	*conflict = NULL;
182	uint32_t	result;
183	int		rc;
184	boolean_t	lock_has_timeout =
185	    (timeout != 0 && timeout != UINT_MAX);
186
187	if (length > 1 &&
188	    (start + length) < start)
189		return (NT_STATUS_INVALID_LOCK_RANGE);
190
191#ifdef	DEBUG
192	if (smb_lock_debug) {
193		cmn_err(CE_CONT, "smb_lock_range "
194		    "off=0x%llx, len=0x%llx, "
195		    "f=%p, pid=%d, typ=%d, tmo=%d\n",
196		    (long long)start, (long long)length,
197		    (void *)sr->fid_ofile, pid, locktype, timeout);
198	}
199#endif
200
201	lock = smb_lock_create(sr, start, length, pid, locktype, timeout);
202
203	smb_llist_enter(&node->n_lock_list, RW_WRITER);
204	for (;;) {
205
206		/* Apply locking rules */
207		result = smb_lock_range_lckrules(file, lock, &conflict);
208		switch (result) {
209		case NT_STATUS_LOCK_NOT_GRANTED: /* conflict! */
210			/* may need to wait */
211			break;
212		case NT_STATUS_SUCCESS:
213		case NT_STATUS_FILE_CLOSED:
214			goto break_loop;
215		default:
216			cmn_err(CE_CONT, "smb_lock_range1, status 0x%x\n",
217			    result);
218			goto break_loop;
219		}
220		if (timeout == 0)
221			goto break_loop;
222
223		/*
224		 * Call smb_lock_wait holding write lock for
225		 * node lock list.  smb_lock_wait will release
226		 * the node list lock if it blocks, so after
227		 * the call, (*conflict) may no longer exist.
228		 */
229		result = smb_lock_wait(sr, lock, conflict);
230		conflict = NULL;
231		switch (result) {
232		case NT_STATUS_SUCCESS:
233			/* conflict gone, try again */
234			break;
235		case NT_STATUS_TIMEOUT:
236			/* try just once more */
237			timeout = 0;
238			break;
239		case NT_STATUS_CANCELLED:
240		case NT_STATUS_FILE_CLOSED:
241			goto break_loop;
242		default:
243			cmn_err(CE_CONT, "smb_lock_range2, status 0x%x\n",
244			    result);
245			goto break_loop;
246		}
247	}
248
249break_loop:
250	lock->l_blocked_by = NULL;
251
252	if (result != NT_STATUS_SUCCESS) {
253		if (result == NT_STATUS_FILE_CLOSED)
254			result = NT_STATUS_RANGE_NOT_LOCKED;
255
256		/*
257		 * Under certain conditions NT_STATUS_FILE_LOCK_CONFLICT
258		 * should be returned instead of NT_STATUS_LOCK_NOT_GRANTED.
259		 * All of this appears to be specific to SMB1
260		 */
261		if (sr->session->dialect <= NT_LM_0_12 &&
262		    result == NT_STATUS_LOCK_NOT_GRANTED) {
263			/*
264			 * Locks with timeouts always return
265			 * NT_STATUS_FILE_LOCK_CONFLICT
266			 */
267			if (lock_has_timeout)
268				result = NT_STATUS_FILE_LOCK_CONFLICT;
269
270			/*
271			 * Locks starting higher than 0xef000000 that do not
272			 * have the MSB set always return
273			 * NT_STATUS_FILE_LOCK_CONFLICT
274			 */
275			if ((lock->l_start >= 0xef000000) &&
276			    !(lock->l_start & (1ULL << 63))) {
277				result = NT_STATUS_FILE_LOCK_CONFLICT;
278			}
279
280			/*
281			 * If the last lock attempt to fail on this file handle
282			 * started at the same offset as this one then return
283			 * NT_STATUS_FILE_LOCK_CONFLICT
284			 */
285			mutex_enter(&file->f_mutex);
286			if ((file->f_flags & SMB_OFLAGS_LLF_POS_VALID) &&
287			    (lock->l_start == file->f_llf_pos)) {
288				result = NT_STATUS_FILE_LOCK_CONFLICT;
289			}
290			mutex_exit(&file->f_mutex);
291		}
292
293		/* Update last lock failed offset */
294		mutex_enter(&file->f_mutex);
295		file->f_llf_pos = lock->l_start;
296		file->f_flags |= SMB_OFLAGS_LLF_POS_VALID;
297		mutex_exit(&file->f_mutex);
298
299		smb_lock_free(lock);
300	} else {
301		/*
302		 * don't insert into the CIFS lock list unless the
303		 * posix lock worked
304		 */
305		rc = smb_fsop_frlock(node, lock, B_FALSE, sr->user_cr);
306		if (rc != 0) {
307#ifdef	DEBUG
308			if (smb_lock_debug)
309				cmn_err(CE_CONT, "fop_frlock, err=%d\n", rc);
310#endif
311			result = NT_STATUS_FILE_LOCK_CONFLICT;
312		} else {
313			/*
314			 * We want unlock to find exclusive locks before
315			 * shared locks, so insert those at the head.
316			 */
317			if (lock->l_type == SMB_LOCK_TYPE_READWRITE)
318				smb_llist_insert_head(&node->n_lock_list, lock);
319			else
320				smb_llist_insert_tail(&node->n_lock_list, lock);
321		}
322	}
323
324#ifdef	DEBUG
325	if (smb_lock_debug && result != 0) {
326		cmn_err(CE_CONT, "lock failed, 0x%x\n", result);
327		smb_lock_dumpnode(node);
328	}
329#endif
330
331	smb_llist_exit(&node->n_lock_list);
332
333	if (result == NT_STATUS_SUCCESS) {
334		/* This revokes read cache delegations. */
335		(void) smb_oplock_break_WRITE(node, file);
336	}
337
338	return (result);
339}
340
341/*
342 * smb_lock_range_access
343 *
344 * scans node lock list
345 * to check if there is any overlapping lock. Overlapping
346 * lock is allowed only under same session and client pid.
347 *
348 * Return values
349 *	NT_STATUS_SUCCESS		lock access granted.
350 *	NT_STATUS_FILE_LOCK_CONFLICT	access denied due to lock conflict.
351 */
352int
353smb_lock_range_access(
354    smb_request_t	*sr,
355    smb_node_t		*node,
356    uint64_t		start,
357    uint64_t		length,
358    boolean_t		will_write)
359{
360	smb_lock_t	*lock;
361	smb_llist_t	*llist;
362	uint32_t	lk_pid = 0;
363	int		status = NT_STATUS_SUCCESS;
364
365	if (length == 0)
366		return (status);
367
368	/*
369	 * What PID to use for lock conflict checks?
370	 * SMB2 locking ignores PIDs (have lk_pid=0)
371	 * SMB1 uses low 16 bits of sr->smb_pid
372	 */
373	if (sr->session->dialect < SMB_VERS_2_BASE)
374		lk_pid = sr->smb_pid & 0xFFFF;
375
376	llist = &node->n_lock_list;
377	smb_llist_enter(llist, RW_READER);
378	/* Search for any applicable lock */
379	for (lock = smb_llist_head(llist);
380	    lock != NULL;
381	    lock = smb_llist_next(llist, lock)) {
382
383		if (!smb_lock_range_overlap(lock, start, length))
384			/* Lock does not overlap */
385			continue;
386
387		if (lock->l_type == SMB_LOCK_TYPE_READONLY && !will_write)
388			continue;
389
390		if (lock->l_type == SMB_LOCK_TYPE_READWRITE &&
391		    lock->l_file == sr->fid_ofile &&
392		    lock->l_pid == lk_pid)
393			continue;
394
395#ifdef	DEBUG
396		if (smb_lock_debug) {
397			cmn_err(CE_CONT, "smb_lock_range_access conflict: "
398			    "off=0x%llx, len=0x%llx, "
399			    "f=%p, pid=%d, typ=%d\n",
400			    (long long)lock->l_start,
401			    (long long)lock->l_length,
402			    (void *)lock->l_file,
403			    lock->l_pid, lock->l_type);
404		}
405#endif
406		status = NT_STATUS_FILE_LOCK_CONFLICT;
407		break;
408	}
409	smb_llist_exit(llist);
410	return (status);
411}
412
413/*
414 * The ofile is being closed.  Wake any waiting locks and
415 * clear any granted locks.
416 */
417void
418smb_node_destroy_lock_by_ofile(smb_node_t *node, smb_ofile_t *file)
419{
420	cred_t		*kcr = zone_kcred();
421	smb_lock_t	*lock;
422	smb_lock_t	*nxtl;
423	list_t		destroy_list;
424
425	SMB_NODE_VALID(node);
426	ASSERT(node->n_refcnt);
427
428	/*
429	 * Cancel any waiting locks for this ofile
430	 */
431	smb_llist_enter(&node->n_wlock_list, RW_READER);
432	for (lock = smb_llist_head(&node->n_wlock_list);
433	    lock != NULL;
434	    lock = smb_llist_next(&node->n_wlock_list, lock)) {
435
436		if (lock->l_file == file) {
437			mutex_enter(&lock->l_mutex);
438			lock->l_blocked_by = NULL;
439			lock->l_flags |= SMB_LOCK_FLAG_CLOSED;
440			cv_broadcast(&lock->l_cv);
441			mutex_exit(&lock->l_mutex);
442		}
443	}
444	smb_llist_exit(&node->n_wlock_list);
445
446	/*
447	 * Move locks matching the specified file from the node->n_lock_list
448	 * to a temporary list (holding the lock the entire time) then
449	 * destroy all the matching locks.  We can't call smb_lock_destroy
450	 * while we are holding the lock for node->n_lock_list because we will
451	 * deadlock and we can't drop the lock because the list contents might
452	 * change (for example nxtl might get removed on another thread).
453	 */
454	list_create(&destroy_list, sizeof (smb_lock_t),
455	    offsetof(smb_lock_t, l_lnd));
456
457	smb_llist_enter(&node->n_lock_list, RW_WRITER);
458	lock = smb_llist_head(&node->n_lock_list);
459	while (lock) {
460		nxtl = smb_llist_next(&node->n_lock_list, lock);
461		if (lock->l_file == file) {
462			smb_llist_remove(&node->n_lock_list, lock);
463			smb_lock_posix_unlock(node, lock, kcr);
464			list_insert_tail(&destroy_list, lock);
465		}
466		lock = nxtl;
467	}
468	smb_llist_exit(&node->n_lock_list);
469
470	lock = list_head(&destroy_list);
471	while (lock) {
472		nxtl = list_next(&destroy_list, lock);
473		list_remove(&destroy_list, lock);
474		smb_lock_destroy(lock);
475		lock = nxtl;
476	}
477
478	list_destroy(&destroy_list);
479}
480
481/*
482 * Cause a waiting lock to stop waiting and return an error.
483 * returns same status codes as unlock:
484 * NT_STATUS_SUCCESS, NT_STATUS_RANGE_NOT_LOCKED
485 */
486uint32_t
487smb_lock_range_cancel(smb_request_t *sr,
488    uint64_t start, uint64_t length, uint32_t pid)
489{
490	smb_node_t *node;
491	smb_lock_t *lock;
492	uint32_t status = NT_STATUS_RANGE_NOT_LOCKED;
493	int cnt = 0;
494
495	node = sr->fid_ofile->f_node;
496
497	smb_llist_enter(&node->n_wlock_list, RW_READER);
498
499#ifdef	DEBUG
500	if (smb_lock_debug) {
501		cmn_err(CE_CONT, "smb_lock_range_cancel:\n"
502		    "\tstart=0x%llx, len=0x%llx, of=%p, pid=%d\n",
503		    (long long)start, (long long)length,
504		    (void *)sr->fid_ofile, pid);
505	}
506#endif
507
508	for (lock = smb_llist_head(&node->n_wlock_list);
509	    lock != NULL;
510	    lock = smb_llist_next(&node->n_wlock_list, lock)) {
511
512		if ((start == lock->l_start) &&
513		    (length == lock->l_length) &&
514		    lock->l_file == sr->fid_ofile &&
515		    lock->l_pid == pid) {
516
517			mutex_enter(&lock->l_mutex);
518			lock->l_blocked_by = NULL;
519			lock->l_flags |= SMB_LOCK_FLAG_CANCELLED;
520			cv_broadcast(&lock->l_cv);
521			mutex_exit(&lock->l_mutex);
522			status = NT_STATUS_SUCCESS;
523			cnt++;
524		}
525	}
526
527#ifdef	DEBUG
528	if (smb_lock_debug && cnt != 1) {
529		cmn_err(CE_CONT, "cancel found %d\n", cnt);
530		smb_lock_dumpnode(node);
531	}
532#endif
533
534	smb_llist_exit(&node->n_wlock_list);
535
536	return (status);
537}
538
539void
540smb_lock_range_error(smb_request_t *sr, uint32_t status32)
541{
542	uint16_t errcode;
543
544	if (status32 == NT_STATUS_CANCELLED) {
545		status32 = NT_STATUS_FILE_LOCK_CONFLICT;
546		errcode = ERROR_LOCK_VIOLATION;
547	} else {
548		errcode = ERRlock;
549	}
550
551	smbsr_error(sr, status32, ERRDOS, errcode);
552}
553
554/*
555 * An SMB variant of nbl_conflict().
556 *
557 * SMB prevents remove or rename when conflicting locks exist
558 * (unlike NFS, which is why we can't just use nbl_conflict).
559 *
560 * Returns:
561 *	NT_STATUS_SHARING_VIOLATION - nbl_share_conflict
562 *	NT_STATUS_FILE_LOCK_CONFLICT - nbl_lock_conflict
563 *	NT_STATUS_SUCCESS - operation can proceed
564 *
565 * NB: This function used to also check the list of ofiles,
566 * via: smb_lock_range_access() but we _can't_ do that here
567 * due to lock order constraints between node->n_lock_list
568 * and node->vp->vnbllock (taken via nvl_start_crit).
569 * They must be taken in that order, and in here, we
570 * already hold vp->vnbllock.
571 */
572DWORD
573smb_nbl_conflict(smb_node_t *node, uint64_t off, uint64_t len, nbl_op_t op)
574{
575	int svmand;
576
577	SMB_NODE_VALID(node);
578	ASSERT(smb_node_in_crit(node));
579	ASSERT(op == NBL_READ || op == NBL_WRITE || op == NBL_READWRITE ||
580	    op == NBL_REMOVE || op == NBL_RENAME);
581
582	if (smb_node_is_dir(node))
583		return (NT_STATUS_SUCCESS);
584
585	if (nbl_share_conflict(node->vp, op, &smb_ct))
586		return (NT_STATUS_SHARING_VIOLATION);
587
588	/*
589	 * When checking for lock conflicts, rename and remove
590	 * are not allowed, so treat those as read/write.
591	 */
592	if (op == NBL_RENAME || op == NBL_REMOVE)
593		op = NBL_READWRITE;
594
595	if (nbl_svmand(node->vp, zone_kcred(), &svmand))
596		svmand = 1;
597
598	if (nbl_lock_conflict(node->vp, op, off, len, svmand, &smb_ct))
599		return (NT_STATUS_FILE_LOCK_CONFLICT);
600
601	return (NT_STATUS_SUCCESS);
602}
603
604/*
605 * smb_lock_posix_unlock
606 *
607 * checks if the current unlock request is in another lock and repeatedly calls
608 * smb_is_range_unlocked on a sliding basis to unlock all bits of the lock
609 * that are not in other locks
610 *
611 */
612static void
613smb_lock_posix_unlock(smb_node_t *node, smb_lock_t *lock, cred_t *cr)
614{
615	uint64_t	new_mark;
616	uint64_t	unlock_start;
617	uint64_t	unlock_end;
618	smb_lock_t	new_unlock;
619	smb_llist_t	*llist;
620	boolean_t	can_unlock;
621
622	new_mark = 0;
623	unlock_start = lock->l_start;
624	unlock_end = unlock_start + lock->l_length;
625	llist = &node->n_lock_list;
626
627	for (;;) {
628		can_unlock = smb_is_range_unlocked(unlock_start, unlock_end,
629		    lock->l_file->f_uniqid, llist, &new_mark);
630		if (can_unlock) {
631			if (new_mark) {
632				new_unlock = *lock;
633				new_unlock.l_start = unlock_start;
634				new_unlock.l_length = new_mark - unlock_start;
635				(void) smb_fsop_frlock(node, &new_unlock,
636				    B_TRUE, cr);
637				unlock_start = new_mark;
638			} else {
639				new_unlock = *lock;
640				new_unlock.l_start = unlock_start;
641				new_unlock.l_length = unlock_end - unlock_start;
642				(void) smb_fsop_frlock(node, &new_unlock,
643				    B_TRUE, cr);
644				break;
645			}
646		} else if (new_mark) {
647			unlock_start = new_mark;
648		} else {
649			break;
650		}
651	}
652}
653
654/*
655 * smb_lock_range_overlap
656 *
657 * Checks if lock range(start, length) overlaps range in lock structure.
658 *
659 * Zero-length byte range locks actually affect no single byte of the stream,
660 * meaning they can still be accessed even with such locks in place. However,
661 * they do conflict with other ranges in the following manner:
662 *  conflict will only exist if the positive-length range contains the
663 *  zero-length range's offset but doesn't start at it
664 *
665 * return values:
666 *	0 - Lock range doesn't overlap
667 *	1 - Lock range overlaps.
668 */
669
670#define	RANGE_NO_OVERLAP	0
671#define	RANGE_OVERLAP		1
672
673static int
674smb_lock_range_overlap(struct smb_lock *lock, uint64_t start, uint64_t length)
675{
676	if (length == 0) {
677		if ((lock->l_start < start) &&
678		    ((lock->l_start + lock->l_length) > start))
679			return (RANGE_OVERLAP);
680
681		return (RANGE_NO_OVERLAP);
682	}
683
684	/* The following test is intended to catch roll over locks. */
685	if ((start == lock->l_start) && (length == lock->l_length))
686		return (RANGE_OVERLAP);
687
688	if (start < lock->l_start) {
689		if (start + length > lock->l_start)
690			return (RANGE_OVERLAP);
691	} else if (start < lock->l_start + lock->l_length)
692		return (RANGE_OVERLAP);
693
694	return (RANGE_NO_OVERLAP);
695}
696
697/*
698 * smb_lock_range_lckrules
699 *
700 * Lock range rules:
701 *	1. Overlapping read locks are allowed if the
702 *	   current locks in the region are only read locks
703 *	   irrespective of pid of smb client issuing lock request.
704 *
705 *	2. Read lock in the overlapped region of write lock
706 *	   are allowed if the previous lock is performed by the
707 *	   same pid and connection.
708 *
709 * return status:
710 *	NT_STATUS_SUCCESS - Input lock range conforms to lock rules.
711 *	NT_STATUS_LOCK_NOT_GRANTED - Input lock conflicts lock rules.
712 *	NT_STATUS_FILE_CLOSED
713 */
714static uint32_t
715smb_lock_range_lckrules(
716    smb_ofile_t		*file,
717    smb_lock_t		*dlock,		/* desired lock */
718    smb_lock_t		**conflictp)
719{
720	smb_node_t	*node = file->f_node;
721	smb_lock_t	*lock;
722	uint32_t	status = NT_STATUS_SUCCESS;
723
724	/* Check if file is closed */
725	if (!smb_ofile_is_open(file)) {
726		return (NT_STATUS_FILE_CLOSED);
727	}
728
729	/* Caller must hold lock for node->n_lock_list */
730	for (lock = smb_llist_head(&node->n_lock_list);
731	    lock != NULL;
732	    lock = smb_llist_next(&node->n_lock_list, lock)) {
733
734		if (!smb_lock_range_overlap(lock, dlock->l_start,
735		    dlock->l_length))
736			continue;
737
738		/*
739		 * Check to see if lock in the overlapping record
740		 * is only read lock. Current finding is read
741		 * locks can overlapped irrespective of pids.
742		 */
743		if ((lock->l_type == SMB_LOCK_TYPE_READONLY) &&
744		    (dlock->l_type == SMB_LOCK_TYPE_READONLY)) {
745			continue;
746		}
747
748		/*
749		 * When the read lock overlaps write lock, check if
750		 * allowed.
751		 */
752		if ((dlock->l_type == SMB_LOCK_TYPE_READONLY) &&
753		    !(lock->l_type == SMB_LOCK_TYPE_READONLY)) {
754			if (lock->l_file == dlock->l_file &&
755			    lock->l_pid == dlock->l_pid) {
756				continue;
757			}
758		}
759
760		/* Conflict in overlapping lock element */
761		*conflictp = lock;
762		status = NT_STATUS_LOCK_NOT_GRANTED;
763		break;
764	}
765
766	return (status);
767}
768
769/*
770 * Cancel method for smb_lock_wait()
771 *
772 * This request is waiting on a lock.  Wakeup everything
773 * waiting on the lock so that the relevant thread regains
774 * control and notices that is has been cancelled.  The
775 * other lock request threads waiting on this lock will go
776 * back to sleep when they discover they are still blocked.
777 */
778static void
779smb_lock_cancel_sr(smb_request_t *sr)
780{
781	smb_lock_t *lock = sr->cancel_arg2;
782
783	ASSERT(lock->l_magic == SMB_LOCK_MAGIC);
784	mutex_enter(&lock->l_mutex);
785	lock->l_blocked_by = NULL;
786	lock->l_flags |= SMB_LOCK_FLAG_CANCELLED;
787	cv_broadcast(&lock->l_cv);
788	mutex_exit(&lock->l_mutex);
789}
790
791/*
792 * smb_lock_wait
793 *
794 * Wait operation for smb overlapping lock to be released.  Caller must hold
795 * write lock for node->n_lock_list so that the set of active locks can't
796 * change unexpectedly.  The lock for node->n_lock_list  will be released
797 * within this function during the sleep after the lock dependency has
798 * been recorded.
799 *
800 * Returns NT_STATUS_SUCCESS when the lock can be granted,
801 * otherwise NT_STATUS_CANCELLED, etc.
802 */
803static uint32_t
804smb_lock_wait(smb_request_t *sr, smb_lock_t *lock, smb_lock_t *conflict)
805{
806	smb_node_t	*node;
807	clock_t		rc;
808	uint32_t	status = NT_STATUS_SUCCESS;
809
810	node = lock->l_file->f_node;
811	ASSERT(node == conflict->l_file->f_node);
812
813	/*
814	 * Let the blocked lock (lock) l_blocked_by point to the
815	 * conflicting lock (conflict), and increment a count of
816	 * conflicts with the latter.  When the conflicting lock
817	 * is destroyed, we'll search the list of waiting locks
818	 * (on the node) and wake any with l_blocked_by ==
819	 * the formerly conflicting lock.
820	 */
821	mutex_enter(&lock->l_mutex);
822	lock->l_blocked_by = conflict;
823	mutex_exit(&lock->l_mutex);
824
825	mutex_enter(&conflict->l_mutex);
826	conflict->l_conflicts++;
827	mutex_exit(&conflict->l_mutex);
828
829	/*
830	 * Put the blocked lock on the waiting list.
831	 */
832	smb_llist_enter(&node->n_wlock_list, RW_WRITER);
833	smb_llist_insert_tail(&node->n_wlock_list, lock);
834	smb_llist_exit(&node->n_wlock_list);
835
836#ifdef	DEBUG
837	if (smb_lock_debug) {
838		cmn_err(CE_CONT, "smb_lock_wait: lock=%p conflict=%p\n",
839		    (void *)lock, (void *)conflict);
840		smb_lock_dumpnode(node);
841	}
842#endif
843
844	/*
845	 * We come in with n_lock_list already held, and keep
846	 * that hold until we're done with conflict (are now).
847	 * Drop that now, and retake later.  Note that the lock
848	 * (*conflict) may go away once we exit this list.
849	 */
850	smb_llist_exit(&node->n_lock_list);
851	conflict = NULL;
852
853	/*
854	 * Before we actually start waiting, setup the hooks
855	 * smb_request_cancel uses to unblock this wait.
856	 */
857	mutex_enter(&sr->sr_mutex);
858	if (sr->sr_state == SMB_REQ_STATE_ACTIVE) {
859		sr->sr_state = SMB_REQ_STATE_WAITING_LOCK;
860		sr->cancel_method = smb_lock_cancel_sr;
861		sr->cancel_arg2 = lock;
862	} else {
863		status = NT_STATUS_CANCELLED;
864	}
865	mutex_exit(&sr->sr_mutex);
866
867	/*
868	 * Now we're ready to actually wait for the conflicting
869	 * lock to be removed, or for the wait to be ended by
870	 * an external cancel, or a timeout.
871	 */
872	mutex_enter(&lock->l_mutex);
873	while (status == NT_STATUS_SUCCESS &&
874	    lock->l_blocked_by != NULL) {
875		if (lock->l_flags & SMB_LOCK_FLAG_INDEFINITE) {
876			cv_wait(&lock->l_cv, &lock->l_mutex);
877		} else {
878			rc = cv_timedwait(&lock->l_cv,
879			    &lock->l_mutex, lock->l_end_time);
880			if (rc < 0)
881				status = NT_STATUS_TIMEOUT;
882		}
883	}
884	if (status == NT_STATUS_SUCCESS) {
885		if (lock->l_flags & SMB_LOCK_FLAG_CANCELLED)
886			status = NT_STATUS_CANCELLED;
887		if (lock->l_flags & SMB_LOCK_FLAG_CLOSED)
888			status = NT_STATUS_FILE_CLOSED;
889	}
890	mutex_exit(&lock->l_mutex);
891
892	/*
893	 * Done waiting.  Cleanup cancel hooks and
894	 * finish SR state transitions.
895	 */
896	mutex_enter(&sr->sr_mutex);
897	sr->cancel_method = NULL;
898	sr->cancel_arg2 = NULL;
899
900	switch (sr->sr_state) {
901	case SMB_REQ_STATE_WAITING_LOCK:
902		/* Normal wakeup.  Keep status from above. */
903		sr->sr_state = SMB_REQ_STATE_ACTIVE;
904		break;
905
906	case SMB_REQ_STATE_CANCEL_PENDING:
907		/* Cancelled via smb_lock_cancel_sr */
908		sr->sr_state = SMB_REQ_STATE_CANCELLED;
909		/* FALLTHROUGH */
910	case SMB_REQ_STATE_CANCELLED:
911		if (status == NT_STATUS_SUCCESS)
912			status = NT_STATUS_CANCELLED;
913		break;
914
915	default:
916		break;
917	}
918	mutex_exit(&sr->sr_mutex);
919
920	/* Return to the caller with n_lock_list held. */
921	smb_llist_enter(&node->n_lock_list, RW_WRITER);
922
923	smb_llist_enter(&node->n_wlock_list, RW_WRITER);
924	smb_llist_remove(&node->n_wlock_list, lock);
925	smb_llist_exit(&node->n_wlock_list);
926
927	return (status);
928}
929
930/*
931 * smb_lock_range_ulckrules
932 *
933 *	1. Unlock should be performed at exactly matching ends.
934 *	   This has been changed because overlapping ends is
935 *	   allowed and there is no other precise way of locating
936 *	   lock entity in node lock list.
937 *
938 *	2. Unlock is failed if there is no corresponding lock exists.
939 *
940 * Return values
941 *
942 *	NT_STATUS_SUCCESS		Unlock request matches lock record
943 *					pointed by 'foundlock' lock structure.
944 *
945 *	NT_STATUS_RANGE_NOT_LOCKED	Unlock request doen't match any
946 *					of lock record in node lock request or
947 *					error in unlock range processing.
948 */
949static uint32_t
950smb_lock_range_ulckrules(
951    smb_ofile_t		*file,
952    uint64_t		start,
953    uint64_t		length,
954    uint32_t		pid,
955    smb_lock_t		**foundlock)
956{
957	smb_node_t	*node = file->f_node;
958	smb_lock_t	*lock;
959	uint32_t	status = NT_STATUS_RANGE_NOT_LOCKED;
960
961	/* Caller must hold lock for node->n_lock_list */
962	for (lock = smb_llist_head(&node->n_lock_list);
963	    lock != NULL;
964	    lock = smb_llist_next(&node->n_lock_list, lock)) {
965
966		if ((start == lock->l_start) &&
967		    (length == lock->l_length) &&
968		    lock->l_file == file &&
969		    lock->l_pid == pid) {
970			*foundlock = lock;
971			status = NT_STATUS_SUCCESS;
972			break;
973		}
974	}
975
976	return (status);
977}
978
979static smb_lock_t *
980smb_lock_create(
981    smb_request_t *sr,
982    uint64_t start,
983    uint64_t length,
984    uint32_t pid,
985    uint32_t locktype,
986    uint32_t timeout)
987{
988	smb_lock_t *lock;
989
990	ASSERT(locktype == SMB_LOCK_TYPE_READWRITE ||
991	    locktype == SMB_LOCK_TYPE_READONLY);
992
993	lock = kmem_cache_alloc(smb_cache_lock, KM_SLEEP);
994	bzero(lock, sizeof (*lock));
995	lock->l_magic = SMB_LOCK_MAGIC;
996	lock->l_file = sr->fid_ofile;
997	/* l_file == fid_ofile implies same connection (see ofile lookup) */
998	lock->l_pid = pid;
999	lock->l_type = locktype;
1000	lock->l_start = start;
1001	lock->l_length = length;
1002	/*
1003	 * Calculate the absolute end time so that we can use it
1004	 * in cv_timedwait.
1005	 */
1006	lock->l_end_time = ddi_get_lbolt() + MSEC_TO_TICK(timeout);
1007	if (timeout == UINT_MAX)
1008		lock->l_flags |= SMB_LOCK_FLAG_INDEFINITE;
1009
1010	mutex_init(&lock->l_mutex, NULL, MUTEX_DEFAULT, NULL);
1011	cv_init(&lock->l_cv, NULL, CV_DEFAULT, NULL);
1012
1013	return (lock);
1014}
1015
1016static void
1017smb_lock_free(smb_lock_t *lock)
1018{
1019
1020	lock->l_magic = 0;
1021	cv_destroy(&lock->l_cv);
1022	mutex_destroy(&lock->l_mutex);
1023
1024	kmem_cache_free(smb_cache_lock, lock);
1025}
1026
1027/*
1028 * smb_lock_destroy
1029 *
1030 * Caller must hold node->n_lock_list
1031 */
1032static void
1033smb_lock_destroy(smb_lock_t *lock)
1034{
1035	smb_lock_t *tl;
1036	smb_node_t *node;
1037	uint32_t ccnt;
1038
1039	/*
1040	 * Wake any waiting locks that were blocked by this.
1041	 * We want them to wake and continue in FIFO order,
1042	 * so enter/exit the llist every time...
1043	 */
1044	mutex_enter(&lock->l_mutex);
1045	ccnt = lock->l_conflicts;
1046	lock->l_conflicts = 0;
1047	mutex_exit(&lock->l_mutex);
1048
1049	node = lock->l_file->f_node;
1050	while (ccnt) {
1051
1052		smb_llist_enter(&node->n_wlock_list, RW_READER);
1053
1054		for (tl = smb_llist_head(&node->n_wlock_list);
1055		    tl != NULL;
1056		    tl = smb_llist_next(&node->n_wlock_list, tl)) {
1057			mutex_enter(&tl->l_mutex);
1058			if (tl->l_blocked_by == lock) {
1059				tl->l_blocked_by = NULL;
1060				cv_broadcast(&tl->l_cv);
1061				mutex_exit(&tl->l_mutex);
1062				goto woke_one;
1063			}
1064			mutex_exit(&tl->l_mutex);
1065		}
1066		/* No more in the list blocked by this lock. */
1067		ccnt = 0;
1068	woke_one:
1069		smb_llist_exit(&node->n_wlock_list);
1070		if (ccnt) {
1071			/*
1072			 * Let the thread we woke have a chance to run
1073			 * before we wake competitors for their lock.
1074			 */
1075			delay(MSEC_TO_TICK(1));
1076		}
1077	}
1078
1079	smb_lock_free(lock);
1080}
1081
1082/*
1083 * smb_is_range_unlocked
1084 *
1085 * Checks if the current unlock byte range request overlaps another lock
1086 * This function is used to determine where POSIX unlocks should be
1087 * applied.
1088 *
1089 * The return code and the value of new_mark must be interpreted as
1090 * follows:
1091 *
1092 * B_TRUE and (new_mark == 0):
1093 *   This is the last or only lock left to be unlocked
1094 *
1095 * B_TRUE and (new_mark > 0):
1096 *   The range from start to new_mark can be unlocked
1097 *
1098 * B_FALSE and (new_mark == 0):
1099 *   The unlock can't be performed and we are done
1100 *
1101 * B_FALSE and (new_mark > 0),
1102 *   The range from start to new_mark can't be unlocked
1103 *   Start should be reset to new_mark for the next pass
1104 */
1105
1106static boolean_t
1107smb_is_range_unlocked(uint64_t start, uint64_t end, uint32_t uniqid,
1108    smb_llist_t *llist_head, uint64_t *new_mark)
1109{
1110	struct smb_lock *lk = NULL;
1111	uint64_t low_water_mark = MAXOFFSET_T;
1112	uint64_t lk_start;
1113	uint64_t lk_end;
1114
1115	*new_mark = 0;
1116	lk = smb_llist_head(llist_head);
1117	while (lk) {
1118		if (lk->l_length == 0) {
1119			lk = smb_llist_next(llist_head, lk);
1120			continue;
1121		}
1122
1123		if (lk->l_file->f_uniqid != uniqid) {
1124			lk = smb_llist_next(llist_head, lk);
1125			continue;
1126		}
1127
1128		lk_end = lk->l_start + lk->l_length - 1;
1129		lk_start = lk->l_start;
1130
1131		/*
1132		 * there is no overlap for the first 2 cases
1133		 * check next node
1134		 */
1135		if (lk_end < start) {
1136			lk = smb_llist_next(llist_head, lk);
1137			continue;
1138		}
1139		if (lk_start > end) {
1140			lk = smb_llist_next(llist_head, lk);
1141			continue;
1142		}
1143
1144		/* this range is completely locked */
1145		if ((lk_start <= start) && (lk_end >= end)) {
1146			return (B_FALSE);
1147		}
1148
1149		/* the first part of this range is locked */
1150		if ((start >= lk_start) && (start <= lk_end)) {
1151			if (end > lk_end)
1152				*new_mark = lk_end + 1;
1153			return (B_FALSE);
1154		}
1155
1156		/* this piece is unlocked */
1157		if ((lk_start >= start) && (lk_start <= end)) {
1158			if (low_water_mark > lk_start)
1159				low_water_mark  = lk_start;
1160		}
1161
1162		lk = smb_llist_next(llist_head, lk);
1163	}
1164
1165	if (low_water_mark != MAXOFFSET_T) {
1166		*new_mark = low_water_mark;
1167		return (B_TRUE);
1168	}
1169	/* the range is completely unlocked */
1170	return (B_TRUE);
1171}
1172
1173#ifdef	DEBUG
1174static void
1175smb_lock_dump1(smb_lock_t *lock)
1176{
1177	cmn_err(CE_CONT, "\t0x%p: 0x%llx, 0x%llx, %p, %d\n",
1178	    (void *)lock,
1179	    (long long)lock->l_start,
1180	    (long long)lock->l_length,
1181	    (void *)lock->l_file,
1182	    lock->l_pid);
1183
1184}
1185
1186static void
1187smb_lock_dumplist(smb_llist_t *llist)
1188{
1189	smb_lock_t *lock;
1190
1191	for (lock = smb_llist_head(llist);
1192	    lock != NULL;
1193	    lock = smb_llist_next(llist, lock)) {
1194		smb_lock_dump1(lock);
1195	}
1196}
1197
1198static void
1199smb_lock_dumpnode(smb_node_t *node)
1200{
1201	cmn_err(CE_CONT, "Granted Locks on %p (%d)\n",
1202	    (void *)node, node->n_lock_list.ll_count);
1203	smb_lock_dumplist(&node->n_lock_list);
1204
1205	cmn_err(CE_CONT, "Waiting Locks on %p (%d)\n",
1206	    (void *)node, node->n_wlock_list.ll_count);
1207	smb_lock_dumplist(&node->n_wlock_list);
1208}
1209
1210#endif
1211