xref: /illumos-gate/usr/src/uts/common/sys/flock_impl.h (revision 7c478bd95313f5f23a4c958a745db2134aa0324)
1*7c478bd9Sstevel@tonic-gate /*
2*7c478bd9Sstevel@tonic-gate  * CDDL HEADER START
3*7c478bd9Sstevel@tonic-gate  *
4*7c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*7c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*7c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*7c478bd9Sstevel@tonic-gate  * with the License.
8*7c478bd9Sstevel@tonic-gate  *
9*7c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*7c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*7c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*7c478bd9Sstevel@tonic-gate  * and limitations under the License.
13*7c478bd9Sstevel@tonic-gate  *
14*7c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*7c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*7c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*7c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*7c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*7c478bd9Sstevel@tonic-gate  *
20*7c478bd9Sstevel@tonic-gate  * CDDL HEADER END
21*7c478bd9Sstevel@tonic-gate  */
22*7c478bd9Sstevel@tonic-gate /*
23*7c478bd9Sstevel@tonic-gate  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24*7c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
25*7c478bd9Sstevel@tonic-gate  */
26*7c478bd9Sstevel@tonic-gate 
27*7c478bd9Sstevel@tonic-gate #ifndef _SYS_FLOCK_IMPL_H
28*7c478bd9Sstevel@tonic-gate #define	_SYS_FLOCK_IMPL_H
29*7c478bd9Sstevel@tonic-gate 
30*7c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
31*7c478bd9Sstevel@tonic-gate 
32*7c478bd9Sstevel@tonic-gate #include <sys/types.h>
33*7c478bd9Sstevel@tonic-gate #include <sys/fcntl.h>		/* flock definition */
34*7c478bd9Sstevel@tonic-gate #include <sys/file.h>		/* FREAD etc */
35*7c478bd9Sstevel@tonic-gate #include <sys/flock.h>		/* RCMD etc */
36*7c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
37*7c478bd9Sstevel@tonic-gate #include <sys/user.h>
38*7c478bd9Sstevel@tonic-gate #include <sys/thread.h>
39*7c478bd9Sstevel@tonic-gate #include <sys/proc.h>
40*7c478bd9Sstevel@tonic-gate #include <sys/cred.h>
41*7c478bd9Sstevel@tonic-gate #include <sys/debug.h>
42*7c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
43*7c478bd9Sstevel@tonic-gate #include <sys/errno.h>
44*7c478bd9Sstevel@tonic-gate #include <sys/systm.h>
45*7c478bd9Sstevel@tonic-gate #include <sys/vnode.h>
46*7c478bd9Sstevel@tonic-gate #include <sys/share.h>		/* just to get GETSYSID def */
47*7c478bd9Sstevel@tonic-gate 
48*7c478bd9Sstevel@tonic-gate #ifdef	__cplusplus
49*7c478bd9Sstevel@tonic-gate extern "C" {
50*7c478bd9Sstevel@tonic-gate #endif
51*7c478bd9Sstevel@tonic-gate 
52*7c478bd9Sstevel@tonic-gate struct	edge {
53*7c478bd9Sstevel@tonic-gate 	struct	edge	*edge_adj_next;	/* adjacency list next */
54*7c478bd9Sstevel@tonic-gate 	struct	edge	*edge_adj_prev; /* adjacency list prev */
55*7c478bd9Sstevel@tonic-gate 	struct	edge	*edge_in_next;	/* incoming edges list next */
56*7c478bd9Sstevel@tonic-gate 	struct	edge	*edge_in_prev;	/* incoming edges list prev */
57*7c478bd9Sstevel@tonic-gate 	struct 	lock_descriptor	*from_vertex;	/* edge emanating from lock */
58*7c478bd9Sstevel@tonic-gate 	struct 	lock_descriptor	*to_vertex;	/* edge pointing to lock */
59*7c478bd9Sstevel@tonic-gate };
60*7c478bd9Sstevel@tonic-gate 
61*7c478bd9Sstevel@tonic-gate typedef	struct	edge	edge_t;
62*7c478bd9Sstevel@tonic-gate 
63*7c478bd9Sstevel@tonic-gate struct lock_descriptor {
64*7c478bd9Sstevel@tonic-gate 	struct	lock_descriptor	*l_next;	/* next active/sleep lock */
65*7c478bd9Sstevel@tonic-gate 	struct	lock_descriptor	*l_prev;	/* previous active/sleep lock */
66*7c478bd9Sstevel@tonic-gate 	struct	edge		l_edge;		/* edge for adj and in lists */
67*7c478bd9Sstevel@tonic-gate 	struct	lock_descriptor	*l_stack;	/* for stack operations */
68*7c478bd9Sstevel@tonic-gate 	struct	lock_descriptor	*l_stack1;	/* for stack operations */
69*7c478bd9Sstevel@tonic-gate 	struct 	lock_descriptor *l_dstack;	/* stack for debug functions */
70*7c478bd9Sstevel@tonic-gate 	struct	edge		*l_sedge;	/* start edge for graph alg. */
71*7c478bd9Sstevel@tonic-gate 			int	l_index; 	/* used for barrier count */
72*7c478bd9Sstevel@tonic-gate 		struct	graph	*l_graph;	/* graph this belongs to */
73*7c478bd9Sstevel@tonic-gate 		vnode_t		*l_vnode;	/* vnode being locked */
74*7c478bd9Sstevel@tonic-gate 			int	l_type;		/* type of lock */
75*7c478bd9Sstevel@tonic-gate 			int	l_state;	/* state described below */
76*7c478bd9Sstevel@tonic-gate 		u_offset_t	l_start;	/* start offset */
77*7c478bd9Sstevel@tonic-gate 		u_offset_t	l_end;		/* end offset */
78*7c478bd9Sstevel@tonic-gate 		flock64_t	l_flock;	/* original flock request */
79*7c478bd9Sstevel@tonic-gate 			int	l_color;	/* color used for graph alg */
80*7c478bd9Sstevel@tonic-gate 		kcondvar_t	l_cv;		/* wait condition for lock */
81*7c478bd9Sstevel@tonic-gate 		int		pvertex;	/* index to proc vertex */
82*7c478bd9Sstevel@tonic-gate 			int	l_status;	/* status described below */
83*7c478bd9Sstevel@tonic-gate 		flk_nlm_status_t l_nlm_state;	/* state of NLM server */
84*7c478bd9Sstevel@tonic-gate 		flk_callback_t	*l_callbacks;	/* callbacks, or NULL */
85*7c478bd9Sstevel@tonic-gate 		zoneid_t	l_zoneid;	/* zone of request */
86*7c478bd9Sstevel@tonic-gate };
87*7c478bd9Sstevel@tonic-gate 
88*7c478bd9Sstevel@tonic-gate typedef struct 	lock_descriptor	lock_descriptor_t;
89*7c478bd9Sstevel@tonic-gate 
90*7c478bd9Sstevel@tonic-gate /*
91*7c478bd9Sstevel@tonic-gate  * Each graph holds locking information for some number of vnodes.  The
92*7c478bd9Sstevel@tonic-gate  * active and sleeping lists are circular, with a dummy head element.
93*7c478bd9Sstevel@tonic-gate  */
94*7c478bd9Sstevel@tonic-gate 
95*7c478bd9Sstevel@tonic-gate struct	graph {
96*7c478bd9Sstevel@tonic-gate 	kmutex_t	gp_mutex;	/* mutex for this graph */
97*7c478bd9Sstevel@tonic-gate 	struct	lock_descriptor	active_locks;
98*7c478bd9Sstevel@tonic-gate 	struct	lock_descriptor	sleeping_locks;
99*7c478bd9Sstevel@tonic-gate 	int index;	/* index of this graph into the hash table */
100*7c478bd9Sstevel@tonic-gate 	int mark;	/* used for coloring the graph */
101*7c478bd9Sstevel@tonic-gate };
102*7c478bd9Sstevel@tonic-gate 
103*7c478bd9Sstevel@tonic-gate typedef	struct	graph	graph_t;
104*7c478bd9Sstevel@tonic-gate 
105*7c478bd9Sstevel@tonic-gate /*
106*7c478bd9Sstevel@tonic-gate  * The possible states a lock can be in.  These states are stored in the
107*7c478bd9Sstevel@tonic-gate  * 'l_status' member of the 'lock_descriptor_t' structure.  All locks start
108*7c478bd9Sstevel@tonic-gate  * life in the INITIAL state, and end up in the DEAD state.  Possible state
109*7c478bd9Sstevel@tonic-gate  * transitions are :
110*7c478bd9Sstevel@tonic-gate  *
111*7c478bd9Sstevel@tonic-gate  * INITIAL--> START    --> ACTIVE    --> DEAD
112*7c478bd9Sstevel@tonic-gate  *
113*7c478bd9Sstevel@tonic-gate  *                     --> DEAD
114*7c478bd9Sstevel@tonic-gate  *
115*7c478bd9Sstevel@tonic-gate  *        --> ACTIVE   --> DEAD          (new locks from flk_relation)
116*7c478bd9Sstevel@tonic-gate  *
117*7c478bd9Sstevel@tonic-gate  *        --> SLEEPING --> GRANTED   --> START     --> ACTIVE --> DEAD
118*7c478bd9Sstevel@tonic-gate  *
119*7c478bd9Sstevel@tonic-gate  *                                   --> INTR      --> DEAD
120*7c478bd9Sstevel@tonic-gate  *
121*7c478bd9Sstevel@tonic-gate  *                                   --> CANCELLED --> DEAD
122*7c478bd9Sstevel@tonic-gate  *
123*7c478bd9Sstevel@tonic-gate  *                                                 --> INTR   --> DEAD
124*7c478bd9Sstevel@tonic-gate  *
125*7c478bd9Sstevel@tonic-gate  *                     --> INTR      --> DEAD
126*7c478bd9Sstevel@tonic-gate  *
127*7c478bd9Sstevel@tonic-gate  *                     --> CANCELLED --> DEAD
128*7c478bd9Sstevel@tonic-gate  *
129*7c478bd9Sstevel@tonic-gate  *                                   --> INTR      --> DEAD
130*7c478bd9Sstevel@tonic-gate  *
131*7c478bd9Sstevel@tonic-gate  * Lock transitions are done in the following functions:
132*7c478bd9Sstevel@tonic-gate  * --> INITIAL		flk_get_lock(), reclock()
133*7c478bd9Sstevel@tonic-gate  * --> START		flk_execute_request()
134*7c478bd9Sstevel@tonic-gate  * --> ACTIVE		flk_insert_active_lock()
135*7c478bd9Sstevel@tonic-gate  * --> SLEEPING		flk_insert_sleeping_lock()
136*7c478bd9Sstevel@tonic-gate  * --> GRANTED		GRANT_WAKEUP
137*7c478bd9Sstevel@tonic-gate  * --> INTERRUPTED	INTERRUPT_WAKEUP
138*7c478bd9Sstevel@tonic-gate  * --> CANCELLED	CANCEL_WAKEUP
139*7c478bd9Sstevel@tonic-gate  * --> DEAD		reclock(), flk_delete_active_lock(), and
140*7c478bd9Sstevel@tonic-gate  *                          flk_cancel_sleeping_lock()
141*7c478bd9Sstevel@tonic-gate  */
142*7c478bd9Sstevel@tonic-gate 
143*7c478bd9Sstevel@tonic-gate #define	FLK_INITIAL_STATE	1	/* Initial state of all requests */
144*7c478bd9Sstevel@tonic-gate #define	FLK_START_STATE		2	/* Request has started execution */
145*7c478bd9Sstevel@tonic-gate #define	FLK_ACTIVE_STATE	3	/* In active queue */
146*7c478bd9Sstevel@tonic-gate #define	FLK_SLEEPING_STATE	4	/* Request is blocked */
147*7c478bd9Sstevel@tonic-gate #define	FLK_GRANTED_STATE	5	/* Request is granted */
148*7c478bd9Sstevel@tonic-gate #define	FLK_INTERRUPTED_STATE	6	/* Request is interrupted */
149*7c478bd9Sstevel@tonic-gate #define	FLK_CANCELLED_STATE	7	/* Request is cancelled */
150*7c478bd9Sstevel@tonic-gate #define	FLK_DEAD_STATE		8	/* Request is done - will be deleted */
151*7c478bd9Sstevel@tonic-gate 
152*7c478bd9Sstevel@tonic-gate /* flags defining state of locks */
153*7c478bd9Sstevel@tonic-gate 
154*7c478bd9Sstevel@tonic-gate /*
155*7c478bd9Sstevel@tonic-gate  * The LLM design has been modified so that lock states are now stored
156*7c478bd9Sstevel@tonic-gate  * in the l_status field of lock_descriptor_t.  The l_state field is
157*7c478bd9Sstevel@tonic-gate  * currently preserved for binary compatibility, but may be modified or
158*7c478bd9Sstevel@tonic-gate  * removed in a minor release of Solaris.  Note that both of these
159*7c478bd9Sstevel@tonic-gate  * fields (and the rest of the lock_descriptor_t structure) are private
160*7c478bd9Sstevel@tonic-gate  * to the implementation of the lock manager and should not be used
161*7c478bd9Sstevel@tonic-gate  * externally.
162*7c478bd9Sstevel@tonic-gate  */
163*7c478bd9Sstevel@tonic-gate 
164*7c478bd9Sstevel@tonic-gate #define	ACTIVE_LOCK		0x0001	/* in active queue */
165*7c478bd9Sstevel@tonic-gate #define	SLEEPING_LOCK		0x0002	/* in sleep queue */
166*7c478bd9Sstevel@tonic-gate #define	IO_LOCK			0x0004	/* is an IO lock */
167*7c478bd9Sstevel@tonic-gate #define	REFERENCED_LOCK		0x0008	/* referenced some where */
168*7c478bd9Sstevel@tonic-gate #define	QUERY_LOCK		0x0010	/* querying about lock */
169*7c478bd9Sstevel@tonic-gate #define	WILLING_TO_SLEEP_LOCK	0x0020	/* lock can be put in sleep queue */
170*7c478bd9Sstevel@tonic-gate #define	RECOMPUTE_LOCK		0x0040	/* used for recomputing dependencies */
171*7c478bd9Sstevel@tonic-gate #define	RECOMPUTE_DONE		0x0080	/* used for recomputing dependencies */
172*7c478bd9Sstevel@tonic-gate #define	BARRIER_LOCK		0x0100	/* used for recomputing dependencies */
173*7c478bd9Sstevel@tonic-gate #define	GRANTED_LOCK		0x0200	/* granted but still in sleep queue */
174*7c478bd9Sstevel@tonic-gate #define	CANCELLED_LOCK		0x0400	/* cancelled will be thrown out */
175*7c478bd9Sstevel@tonic-gate #define	DELETED_LOCK		0x0800	/* deleted - free at earliest */
176*7c478bd9Sstevel@tonic-gate #define	INTERRUPTED_LOCK	0x1000	/* pretend signal */
177*7c478bd9Sstevel@tonic-gate #define	LOCKMGR_LOCK		0x2000	/* remote lock (server-side) */
178*7c478bd9Sstevel@tonic-gate /* Clustering: flag for PXFS locks */
179*7c478bd9Sstevel@tonic-gate #define	PXFS_LOCK		0x4000	/* lock created by PXFS file system */
180*7c478bd9Sstevel@tonic-gate #define	NBMAND_LOCK		0x8000	/* non-blocking mandatory locking */
181*7c478bd9Sstevel@tonic-gate 
182*7c478bd9Sstevel@tonic-gate #define	HASH_SIZE	32
183*7c478bd9Sstevel@tonic-gate #define	HASH_SHIFT	(HASH_SIZE - 1)
184*7c478bd9Sstevel@tonic-gate #define	HASH_INDEX(vp)	(((uintptr_t)vp >> 7) & HASH_SHIFT)
185*7c478bd9Sstevel@tonic-gate 
186*7c478bd9Sstevel@tonic-gate /* extern definitions */
187*7c478bd9Sstevel@tonic-gate 
188*7c478bd9Sstevel@tonic-gate extern struct graph	*lock_graph[HASH_SIZE];
189*7c478bd9Sstevel@tonic-gate extern struct kmem_cache *flk_edge_cache;
190*7c478bd9Sstevel@tonic-gate 
191*7c478bd9Sstevel@tonic-gate /* Clustering: functions called by PXFS */
192*7c478bd9Sstevel@tonic-gate int flk_execute_request(lock_descriptor_t *);
193*7c478bd9Sstevel@tonic-gate void flk_cancel_sleeping_lock(lock_descriptor_t *, int);
194*7c478bd9Sstevel@tonic-gate void flk_set_state(lock_descriptor_t *, int);
195*7c478bd9Sstevel@tonic-gate graph_t *flk_get_lock_graph(vnode_t *, int);
196*7c478bd9Sstevel@tonic-gate 
197*7c478bd9Sstevel@tonic-gate /* flags used for readability in flock.c */
198*7c478bd9Sstevel@tonic-gate 
199*7c478bd9Sstevel@tonic-gate #define	FLK_USE_GRAPH	0	/* don't initialize the lock_graph */
200*7c478bd9Sstevel@tonic-gate #define	FLK_INIT_GRAPH	1	/* initialize the lock graph */
201*7c478bd9Sstevel@tonic-gate #define	NO_COLOR	0	/* vertex is not colored */
202*7c478bd9Sstevel@tonic-gate #define	NO_CHECK_CYCLE	0	/* don't mark vertex's in flk_add_edge */
203*7c478bd9Sstevel@tonic-gate #define	CHECK_CYCLE	1	/* mark vertex's in flk_add_edge */
204*7c478bd9Sstevel@tonic-gate 
205*7c478bd9Sstevel@tonic-gate #define	SAME_OWNER(lock1, lock2)	\
206*7c478bd9Sstevel@tonic-gate 	(((lock1)->l_flock.l_pid == (lock2)->l_flock.l_pid) && \
207*7c478bd9Sstevel@tonic-gate 		((lock1)->l_flock.l_sysid == (lock2)->l_flock.l_sysid))
208*7c478bd9Sstevel@tonic-gate 
209*7c478bd9Sstevel@tonic-gate #define	COLORED(vertex)		((vertex)->l_color == (vertex)->l_graph->mark)
210*7c478bd9Sstevel@tonic-gate #define	COLOR(vertex)		((vertex)->l_color = (vertex)->l_graph->mark)
211*7c478bd9Sstevel@tonic-gate 
212*7c478bd9Sstevel@tonic-gate /*
213*7c478bd9Sstevel@tonic-gate  * stack data structure and operations
214*7c478bd9Sstevel@tonic-gate  */
215*7c478bd9Sstevel@tonic-gate 
216*7c478bd9Sstevel@tonic-gate #define	STACK_INIT(stack)	((stack) = NULL)
217*7c478bd9Sstevel@tonic-gate #define	STACK_PUSH(stack, ptr, stack_link)	(ptr)->stack_link = (stack),\
218*7c478bd9Sstevel@tonic-gate 				(stack) = (ptr)
219*7c478bd9Sstevel@tonic-gate #define	STACK_POP(stack, stack_link)	(stack) = (stack)->stack_link
220*7c478bd9Sstevel@tonic-gate #define	STACK_TOP(stack)	(stack)
221*7c478bd9Sstevel@tonic-gate #define	STACK_EMPTY(stack)	((stack) == NULL)
222*7c478bd9Sstevel@tonic-gate 
223*7c478bd9Sstevel@tonic-gate 
224*7c478bd9Sstevel@tonic-gate #define	ACTIVE_HEAD(gp)	(&(gp)->active_locks)
225*7c478bd9Sstevel@tonic-gate 
226*7c478bd9Sstevel@tonic-gate #define	SLEEPING_HEAD(gp)	(&(gp)->sleeping_locks)
227*7c478bd9Sstevel@tonic-gate 
228*7c478bd9Sstevel@tonic-gate #define	SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp) \
229*7c478bd9Sstevel@tonic-gate { \
230*7c478bd9Sstevel@tonic-gate 	(lock) = (lock_descriptor_t *)vp->v_filocks;	\
231*7c478bd9Sstevel@tonic-gate }
232*7c478bd9Sstevel@tonic-gate 
233*7c478bd9Sstevel@tonic-gate #define	SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp) \
234*7c478bd9Sstevel@tonic-gate { \
235*7c478bd9Sstevel@tonic-gate for ((lock) = SLEEPING_HEAD((gp))->l_next; ((lock) != SLEEPING_HEAD((gp)) && \
236*7c478bd9Sstevel@tonic-gate 			(lock)->l_vnode != (vp)); (lock) = (lock)->l_next) \
237*7c478bd9Sstevel@tonic-gate 			; \
238*7c478bd9Sstevel@tonic-gate (lock) = ((lock) == SLEEPING_HEAD((gp))) ? NULL : (lock); \
239*7c478bd9Sstevel@tonic-gate }
240*7c478bd9Sstevel@tonic-gate 
241*7c478bd9Sstevel@tonic-gate #define	OVERLAP(lock1, lock2) \
242*7c478bd9Sstevel@tonic-gate 	(((lock1)->l_start <= (lock2)->l_start && \
243*7c478bd9Sstevel@tonic-gate 		(lock2)->l_start <= (lock1)->l_end) || \
244*7c478bd9Sstevel@tonic-gate 	((lock2)->l_start <= (lock1)->l_start && \
245*7c478bd9Sstevel@tonic-gate 		(lock1)->l_start <= (lock2)->l_end))
246*7c478bd9Sstevel@tonic-gate 
247*7c478bd9Sstevel@tonic-gate #define	IS_INITIAL(lock)	((lock)->l_status == FLK_INITIAL_STATE)
248*7c478bd9Sstevel@tonic-gate #define	IS_ACTIVE(lock)		((lock)->l_status == FLK_ACTIVE_STATE)
249*7c478bd9Sstevel@tonic-gate #define	IS_SLEEPING(lock)	((lock)->l_status == FLK_SLEEPING_STATE)
250*7c478bd9Sstevel@tonic-gate #define	IS_GRANTED(lock)	((lock)->l_status == FLK_GRANTED_STATE)
251*7c478bd9Sstevel@tonic-gate #define	IS_INTERRUPTED(lock)	((lock)->l_status == FLK_INTERRUPTED_STATE)
252*7c478bd9Sstevel@tonic-gate #define	IS_CANCELLED(lock)	((lock)->l_status == FLK_CANCELLED_STATE)
253*7c478bd9Sstevel@tonic-gate #define	IS_DEAD(lock)		((lock)->l_status == FLK_DEAD_STATE)
254*7c478bd9Sstevel@tonic-gate 
255*7c478bd9Sstevel@tonic-gate #define	IS_QUERY_LOCK(lock)	((lock)->l_state & QUERY_LOCK)
256*7c478bd9Sstevel@tonic-gate #define	IS_RECOMPUTE(lock)	((lock)->l_state & RECOMPUTE_LOCK)
257*7c478bd9Sstevel@tonic-gate #define	IS_BARRIER(lock)	((lock)->l_state & BARRIER_LOCK)
258*7c478bd9Sstevel@tonic-gate #define	IS_DELETED(lock)	((lock)->l_state & DELETED_LOCK)
259*7c478bd9Sstevel@tonic-gate #define	IS_REFERENCED(lock)	((lock)->l_state & REFERENCED_LOCK)
260*7c478bd9Sstevel@tonic-gate #define	IS_IO_LOCK(lock)	((lock)->l_state & IO_LOCK)
261*7c478bd9Sstevel@tonic-gate #define	IS_WILLING_TO_SLEEP(lock)	\
262*7c478bd9Sstevel@tonic-gate 		((lock)->l_state & WILLING_TO_SLEEP_LOCK)
263*7c478bd9Sstevel@tonic-gate #define	IS_LOCKMGR(lock)	((lock)->l_state & LOCKMGR_LOCK)
264*7c478bd9Sstevel@tonic-gate #define	IS_NLM_UP(lock)		((lock)->l_nlm_state == FLK_NLM_UP)
265*7c478bd9Sstevel@tonic-gate /* Clustering: Macro for PXFS locks */
266*7c478bd9Sstevel@tonic-gate #define	IS_PXFS(lock)		((lock)->l_state & PXFS_LOCK)
267*7c478bd9Sstevel@tonic-gate 
268*7c478bd9Sstevel@tonic-gate /*
269*7c478bd9Sstevel@tonic-gate  * "local" requests don't involve the NFS lock manager in any way.
270*7c478bd9Sstevel@tonic-gate  * "remote" requests can be on the server (requests from a remote client),
271*7c478bd9Sstevel@tonic-gate  * in which case they should be associated with a local vnode (UFS, tmpfs,
272*7c478bd9Sstevel@tonic-gate  * etc.).  These requests are flagged with LOCKMGR_LOCK and are made using
273*7c478bd9Sstevel@tonic-gate  * kernel service threads.  Remote requests can also be on an NFS client,
274*7c478bd9Sstevel@tonic-gate  * because the NFS lock manager uses local locking for some of its
275*7c478bd9Sstevel@tonic-gate  * bookkeeping.  These requests are made by regular user processes.
276*7c478bd9Sstevel@tonic-gate  */
277*7c478bd9Sstevel@tonic-gate #define	IS_LOCAL(lock)	(GETSYSID((lock)->l_flock.l_sysid) == 0)
278*7c478bd9Sstevel@tonic-gate #define	IS_REMOTE(lock)	(! IS_LOCAL(lock))
279*7c478bd9Sstevel@tonic-gate 
280*7c478bd9Sstevel@tonic-gate /* Clustering: Return value for blocking PXFS locks */
281*7c478bd9Sstevel@tonic-gate /*
282*7c478bd9Sstevel@tonic-gate  * For PXFS locks, reclock() will return this error code for requests that
283*7c478bd9Sstevel@tonic-gate  * need to block
284*7c478bd9Sstevel@tonic-gate  */
285*7c478bd9Sstevel@tonic-gate #define	PXFS_LOCK_BLOCKED -1
286*7c478bd9Sstevel@tonic-gate 
287*7c478bd9Sstevel@tonic-gate /* Clustering: PXFS callback function */
288*7c478bd9Sstevel@tonic-gate /*
289*7c478bd9Sstevel@tonic-gate  * This function is a callback from the LLM into the PXFS server module.  It
290*7c478bd9Sstevel@tonic-gate  * is initialized as a weak stub, and is functional when the pxfs server module
291*7c478bd9Sstevel@tonic-gate  * is loaded.
292*7c478bd9Sstevel@tonic-gate  */
293*7c478bd9Sstevel@tonic-gate extern void cl_flk_state_transition_notify(lock_descriptor_t *lock,
294*7c478bd9Sstevel@tonic-gate     int old_state, int new_state);
295*7c478bd9Sstevel@tonic-gate 
296*7c478bd9Sstevel@tonic-gate #define	BLOCKS(lock1, lock2)	(!SAME_OWNER((lock1), (lock2)) && \
297*7c478bd9Sstevel@tonic-gate 					(((lock1)->l_type == F_WRLCK) || \
298*7c478bd9Sstevel@tonic-gate 					((lock2)->l_type == F_WRLCK)) && \
299*7c478bd9Sstevel@tonic-gate 					OVERLAP((lock1), (lock2)))
300*7c478bd9Sstevel@tonic-gate 
301*7c478bd9Sstevel@tonic-gate #define	COVERS(lock1, lock2)	\
302*7c478bd9Sstevel@tonic-gate 		(((lock1)->l_start <= (lock2)->l_start) && \
303*7c478bd9Sstevel@tonic-gate 			((lock1)->l_end >= (lock2)->l_end))
304*7c478bd9Sstevel@tonic-gate 
305*7c478bd9Sstevel@tonic-gate #define	IN_LIST_REMOVE(ep)	\
306*7c478bd9Sstevel@tonic-gate 	{ \
307*7c478bd9Sstevel@tonic-gate 	(ep)->edge_in_next->edge_in_prev = (ep)->edge_in_prev; \
308*7c478bd9Sstevel@tonic-gate 	(ep)->edge_in_prev->edge_in_next = (ep)->edge_in_next; \
309*7c478bd9Sstevel@tonic-gate 	}
310*7c478bd9Sstevel@tonic-gate 
311*7c478bd9Sstevel@tonic-gate #define	ADJ_LIST_REMOVE(ep)	\
312*7c478bd9Sstevel@tonic-gate 	{ \
313*7c478bd9Sstevel@tonic-gate 	(ep)->edge_adj_next->edge_adj_prev = (ep)->edge_adj_prev; \
314*7c478bd9Sstevel@tonic-gate 	(ep)->edge_adj_prev->edge_adj_next = (ep)->edge_adj_next; \
315*7c478bd9Sstevel@tonic-gate 	}
316*7c478bd9Sstevel@tonic-gate 
317*7c478bd9Sstevel@tonic-gate #define	NOT_BLOCKED(lock)	\
318*7c478bd9Sstevel@tonic-gate 	((lock)->l_edge.edge_adj_next == &(lock)->l_edge && !IS_GRANTED(lock))
319*7c478bd9Sstevel@tonic-gate 
320*7c478bd9Sstevel@tonic-gate #define	GRANT_WAKEUP(lock)	\
321*7c478bd9Sstevel@tonic-gate 	{	\
322*7c478bd9Sstevel@tonic-gate 		flk_set_state(lock, FLK_GRANTED_STATE); \
323*7c478bd9Sstevel@tonic-gate 		(lock)->l_state |= GRANTED_LOCK; \
324*7c478bd9Sstevel@tonic-gate 		/* \
325*7c478bd9Sstevel@tonic-gate 		 * Clustering: PXFS locks do not sleep in the LLM, \
326*7c478bd9Sstevel@tonic-gate 		 * so there is no need to signal them \
327*7c478bd9Sstevel@tonic-gate 		 */ \
328*7c478bd9Sstevel@tonic-gate 		if (!IS_PXFS(lock)) { \
329*7c478bd9Sstevel@tonic-gate 			cv_signal(&(lock)->l_cv); \
330*7c478bd9Sstevel@tonic-gate 		} \
331*7c478bd9Sstevel@tonic-gate 	}
332*7c478bd9Sstevel@tonic-gate 
333*7c478bd9Sstevel@tonic-gate #define	CANCEL_WAKEUP(lock)	\
334*7c478bd9Sstevel@tonic-gate 	{ \
335*7c478bd9Sstevel@tonic-gate 		flk_set_state(lock, FLK_CANCELLED_STATE); \
336*7c478bd9Sstevel@tonic-gate 		(lock)->l_state |= CANCELLED_LOCK; \
337*7c478bd9Sstevel@tonic-gate 		/* \
338*7c478bd9Sstevel@tonic-gate 		 * Clustering: PXFS locks do not sleep in the LLM, \
339*7c478bd9Sstevel@tonic-gate 		 * so there is no need to signal them \
340*7c478bd9Sstevel@tonic-gate 		 */ \
341*7c478bd9Sstevel@tonic-gate 		if (!IS_PXFS(lock)) { \
342*7c478bd9Sstevel@tonic-gate 			cv_signal(&(lock)->l_cv); \
343*7c478bd9Sstevel@tonic-gate 		} \
344*7c478bd9Sstevel@tonic-gate 	}
345*7c478bd9Sstevel@tonic-gate 
346*7c478bd9Sstevel@tonic-gate #define	INTERRUPT_WAKEUP(lock)	\
347*7c478bd9Sstevel@tonic-gate 	{ \
348*7c478bd9Sstevel@tonic-gate 		flk_set_state(lock, FLK_INTERRUPTED_STATE); \
349*7c478bd9Sstevel@tonic-gate 		(lock)->l_state |= INTERRUPTED_LOCK; \
350*7c478bd9Sstevel@tonic-gate 		/* \
351*7c478bd9Sstevel@tonic-gate 		 * Clustering: PXFS locks do not sleep in the LLM, \
352*7c478bd9Sstevel@tonic-gate 		 * so there is no need to signal them \
353*7c478bd9Sstevel@tonic-gate 		 */ \
354*7c478bd9Sstevel@tonic-gate 		if (!IS_PXFS(lock)) { \
355*7c478bd9Sstevel@tonic-gate 			cv_signal(&(lock)->l_cv); \
356*7c478bd9Sstevel@tonic-gate 		} \
357*7c478bd9Sstevel@tonic-gate 	}
358*7c478bd9Sstevel@tonic-gate 
359*7c478bd9Sstevel@tonic-gate #define	REMOVE_SLEEP_QUEUE(lock)	\
360*7c478bd9Sstevel@tonic-gate 	{ \
361*7c478bd9Sstevel@tonic-gate 	ASSERT(IS_SLEEPING(lock) || IS_GRANTED(lock) || \
362*7c478bd9Sstevel@tonic-gate 	    IS_INTERRUPTED(lock) || IS_CANCELLED(lock)); \
363*7c478bd9Sstevel@tonic-gate 	(lock)->l_state &= ~SLEEPING_LOCK; \
364*7c478bd9Sstevel@tonic-gate 	(lock)->l_next->l_prev = (lock)->l_prev; \
365*7c478bd9Sstevel@tonic-gate 	(lock)->l_prev->l_next = (lock)->l_next; \
366*7c478bd9Sstevel@tonic-gate 	(lock)->l_next = (lock)->l_prev = (lock_descriptor_t *)NULL; \
367*7c478bd9Sstevel@tonic-gate 	}
368*7c478bd9Sstevel@tonic-gate 
369*7c478bd9Sstevel@tonic-gate #define	NO_DEPENDENTS(lock)	\
370*7c478bd9Sstevel@tonic-gate 	((lock)->l_edge.edge_in_next == &(lock)->l_edge)
371*7c478bd9Sstevel@tonic-gate 
372*7c478bd9Sstevel@tonic-gate #define	GRANT(lock)	\
373*7c478bd9Sstevel@tonic-gate 	{ \
374*7c478bd9Sstevel@tonic-gate 	(lock)->l_state |= GRANTED_LOCK; \
375*7c478bd9Sstevel@tonic-gate 	flk_set_state(lock, FLK_GRANTED_STATE); \
376*7c478bd9Sstevel@tonic-gate 	}
377*7c478bd9Sstevel@tonic-gate 
378*7c478bd9Sstevel@tonic-gate #define	FIRST_IN(lock)	((lock)->l_edge.edge_in_next)
379*7c478bd9Sstevel@tonic-gate #define	FIRST_ADJ(lock)	((lock)->l_edge.edge_adj_next)
380*7c478bd9Sstevel@tonic-gate #define	HEAD(lock)	(&(lock)->l_edge)
381*7c478bd9Sstevel@tonic-gate #define	NEXT_ADJ(ep)	((ep)->edge_adj_next)
382*7c478bd9Sstevel@tonic-gate #define	NEXT_IN(ep)	((ep)->edge_in_next)
383*7c478bd9Sstevel@tonic-gate #define	IN_ADJ_INIT(lock)	\
384*7c478bd9Sstevel@tonic-gate {	\
385*7c478bd9Sstevel@tonic-gate (lock)->l_edge.edge_adj_next = (lock)->l_edge.edge_adj_prev = &(lock)->l_edge; \
386*7c478bd9Sstevel@tonic-gate (lock)->l_edge.edge_in_next = (lock)->l_edge.edge_in_prev = &(lock)->l_edge; \
387*7c478bd9Sstevel@tonic-gate }
388*7c478bd9Sstevel@tonic-gate 
389*7c478bd9Sstevel@tonic-gate #define	COPY(lock1, lock2)	\
390*7c478bd9Sstevel@tonic-gate {	\
391*7c478bd9Sstevel@tonic-gate (lock1)->l_graph = (lock2)->l_graph; \
392*7c478bd9Sstevel@tonic-gate (lock1)->l_vnode = (lock2)->l_vnode; \
393*7c478bd9Sstevel@tonic-gate (lock1)->l_type = (lock2)->l_type; \
394*7c478bd9Sstevel@tonic-gate (lock1)->l_state = (lock2)->l_state; \
395*7c478bd9Sstevel@tonic-gate (lock1)->l_start = (lock2)->l_start; \
396*7c478bd9Sstevel@tonic-gate (lock1)->l_end = (lock2)->l_end; \
397*7c478bd9Sstevel@tonic-gate (lock1)->l_flock = (lock2)->l_flock; \
398*7c478bd9Sstevel@tonic-gate (lock1)->l_zoneid = (lock2)->l_zoneid; \
399*7c478bd9Sstevel@tonic-gate (lock1)->pvertex = (lock2)->pvertex; \
400*7c478bd9Sstevel@tonic-gate }
401*7c478bd9Sstevel@tonic-gate 
402*7c478bd9Sstevel@tonic-gate /*
403*7c478bd9Sstevel@tonic-gate  * Clustering
404*7c478bd9Sstevel@tonic-gate  */
405*7c478bd9Sstevel@tonic-gate /* Routines to set and get the NLM state in a lock request */
406*7c478bd9Sstevel@tonic-gate #define	SET_NLM_STATE(lock, nlm_state)	((lock)->l_nlm_state = nlm_state)
407*7c478bd9Sstevel@tonic-gate #define	GET_NLM_STATE(lock)	((lock)->l_nlm_state)
408*7c478bd9Sstevel@tonic-gate /*
409*7c478bd9Sstevel@tonic-gate  * NLM registry abstraction:
410*7c478bd9Sstevel@tonic-gate  *   Abstraction overview:
411*7c478bd9Sstevel@tonic-gate  *   This registry keeps track of the NLM servers via their nlmids
412*7c478bd9Sstevel@tonic-gate  *   that have requested locks at the LLM this registry is associated
413*7c478bd9Sstevel@tonic-gate  *   with.
414*7c478bd9Sstevel@tonic-gate  */
415*7c478bd9Sstevel@tonic-gate /* Routines to manipulate the NLM registry object state */
416*7c478bd9Sstevel@tonic-gate #define	FLK_REGISTRY_IS_NLM_UNKNOWN(nlmreg, nlmid) \
417*7c478bd9Sstevel@tonic-gate 	    ((nlmreg)[nlmid] == FLK_NLM_UNKNOWN)
418*7c478bd9Sstevel@tonic-gate #define	FLK_REGISTRY_IS_NLM_UP(nlmreg, nlmid) \
419*7c478bd9Sstevel@tonic-gate 	    ((nlmreg)[nlmid] == FLK_NLM_UP)
420*7c478bd9Sstevel@tonic-gate #define	FLK_REGISTRY_ADD_NLMID(nlmreg, nlmid) \
421*7c478bd9Sstevel@tonic-gate 	    ((nlmreg)[nlmid] = FLK_NLM_UP)
422*7c478bd9Sstevel@tonic-gate #define	FLK_REGISTRY_CHANGE_NLM_STATE(nlmreg, nlmid, state) \
423*7c478bd9Sstevel@tonic-gate 	    ((nlmreg)[nlmid] = state)
424*7c478bd9Sstevel@tonic-gate 
425*7c478bd9Sstevel@tonic-gate /* Indicates the effect of executing a request on the existing locks */
426*7c478bd9Sstevel@tonic-gate 
427*7c478bd9Sstevel@tonic-gate #define	FLK_UNLOCK	0x1	/* request unlocks the existing lock */
428*7c478bd9Sstevel@tonic-gate #define	FLK_DOWNGRADE	0x2	/* request downgrades the existing lock */
429*7c478bd9Sstevel@tonic-gate #define	FLK_UPGRADE	0x3	/* request upgrades the existing lock */
430*7c478bd9Sstevel@tonic-gate #define	FLK_STAY_SAME	0x4	/* request type is same as existing lock */
431*7c478bd9Sstevel@tonic-gate 
432*7c478bd9Sstevel@tonic-gate 
433*7c478bd9Sstevel@tonic-gate /*	proc graph definitions	*/
434*7c478bd9Sstevel@tonic-gate 
435*7c478bd9Sstevel@tonic-gate /*
436*7c478bd9Sstevel@tonic-gate  * Proc graph is the global process graph that maintains information
437*7c478bd9Sstevel@tonic-gate  * about the dependencies between processes. An edge is added between two
438*7c478bd9Sstevel@tonic-gate  * processes represented by proc_vertex's A and B, iff there exists l1
439*7c478bd9Sstevel@tonic-gate  * owned by process A in any of the lock_graph's dependent on l2
440*7c478bd9Sstevel@tonic-gate  * (thus having an edge to l2) owned by process B.
441*7c478bd9Sstevel@tonic-gate  */
442*7c478bd9Sstevel@tonic-gate struct proc_vertex {
443*7c478bd9Sstevel@tonic-gate 	pid_t	pid;	/* pid of the process */
444*7c478bd9Sstevel@tonic-gate 	long	sysid;	/* sysid of the process */
445*7c478bd9Sstevel@tonic-gate 	struct proc_edge	*edge;	/* adajcent edges of this process */
446*7c478bd9Sstevel@tonic-gate 	int incount;		/* Number of inedges to this process */
447*7c478bd9Sstevel@tonic-gate 	struct proc_edge *p_sedge;	/* used for implementing stack alg. */
448*7c478bd9Sstevel@tonic-gate 	struct proc_vertex	*p_stack;	/* used for stack alg. */
449*7c478bd9Sstevel@tonic-gate 	int atime;	/* used for cycle detection algorithm */
450*7c478bd9Sstevel@tonic-gate 	int dtime;	/* used for cycle detection algorithm */
451*7c478bd9Sstevel@tonic-gate 	int index;	/* index into the  array of proc_graph vertices */
452*7c478bd9Sstevel@tonic-gate };
453*7c478bd9Sstevel@tonic-gate 
454*7c478bd9Sstevel@tonic-gate typedef	struct proc_vertex proc_vertex_t;
455*7c478bd9Sstevel@tonic-gate 
456*7c478bd9Sstevel@tonic-gate struct proc_edge {
457*7c478bd9Sstevel@tonic-gate 	struct proc_edge	*next;	/* next edge in adjacency list */
458*7c478bd9Sstevel@tonic-gate 	int  refcount;			/* reference count of this edge */
459*7c478bd9Sstevel@tonic-gate 	struct proc_vertex	*to_proc;	/* process this points to */
460*7c478bd9Sstevel@tonic-gate };
461*7c478bd9Sstevel@tonic-gate 
462*7c478bd9Sstevel@tonic-gate typedef struct proc_edge proc_edge_t;
463*7c478bd9Sstevel@tonic-gate 
464*7c478bd9Sstevel@tonic-gate 
465*7c478bd9Sstevel@tonic-gate #define	PROC_CHUNK	100
466*7c478bd9Sstevel@tonic-gate 
467*7c478bd9Sstevel@tonic-gate struct proc_graph {
468*7c478bd9Sstevel@tonic-gate 	struct proc_vertex **proc;	/* list of proc_vertexes */
469*7c478bd9Sstevel@tonic-gate 	int gcount;		/* list size */
470*7c478bd9Sstevel@tonic-gate 	int free;		/* number of free slots in the list */
471*7c478bd9Sstevel@tonic-gate 	int mark;		/* used for graph coloring */
472*7c478bd9Sstevel@tonic-gate };
473*7c478bd9Sstevel@tonic-gate 
474*7c478bd9Sstevel@tonic-gate typedef struct proc_graph proc_graph_t;
475*7c478bd9Sstevel@tonic-gate 
476*7c478bd9Sstevel@tonic-gate extern	struct proc_graph	pgraph;
477*7c478bd9Sstevel@tonic-gate 
478*7c478bd9Sstevel@tonic-gate #define	PROC_SAME_OWNER(lock, pvertex)	\
479*7c478bd9Sstevel@tonic-gate 	(((lock)->l_flock.l_pid == (pvertex)->pid) && \
480*7c478bd9Sstevel@tonic-gate 		((lock)->l_flock.l_sysid == (pvertex)->sysid))
481*7c478bd9Sstevel@tonic-gate 
482*7c478bd9Sstevel@tonic-gate #define	PROC_ARRIVE(pvertex)	((pvertex)->atime = pgraph.mark)
483*7c478bd9Sstevel@tonic-gate #define	PROC_DEPART(pvertex)	((pvertex)->dtime = pgraph.mark)
484*7c478bd9Sstevel@tonic-gate #define	PROC_ARRIVED(pvertex)	((pvertex)->atime == pgraph.mark)
485*7c478bd9Sstevel@tonic-gate #define	PROC_DEPARTED(pvertex)  ((pvertex)->dtime == pgraph.mark)
486*7c478bd9Sstevel@tonic-gate 
487*7c478bd9Sstevel@tonic-gate #ifdef	__cplusplus
488*7c478bd9Sstevel@tonic-gate }
489*7c478bd9Sstevel@tonic-gate #endif
490*7c478bd9Sstevel@tonic-gate 
491*7c478bd9Sstevel@tonic-gate #endif	/* _SYS_FLOCK_IMPL_H */
492