17c478bdstevel@tonic-gate/*
27c478bdstevel@tonic-gate * CDDL HEADER START
37c478bdstevel@tonic-gate *
47c478bdstevel@tonic-gate * The contents of this file are subject to the terms of the
57c478bdstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only
67c478bdstevel@tonic-gate * (the "License").  You may not use this file except in compliance
77c478bdstevel@tonic-gate * with the License.
87c478bdstevel@tonic-gate *
97c478bdstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
107c478bdstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
117c478bdstevel@tonic-gate * See the License for the specific language governing permissions
127c478bdstevel@tonic-gate * and limitations under the License.
137c478bdstevel@tonic-gate *
147c478bdstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
157c478bdstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
167c478bdstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
177c478bdstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
187c478bdstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
197c478bdstevel@tonic-gate *
207c478bdstevel@tonic-gate * CDDL HEADER END
217c478bdstevel@tonic-gate */
227c478bdstevel@tonic-gate/*
237c478bdstevel@tonic-gate * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
247c478bdstevel@tonic-gate * Use is subject to license terms.
257a5aac9Jerry Jelinek * Copyright 2015 Joyent, Inc.
267c478bdstevel@tonic-gate */
277c478bdstevel@tonic-gate
287c478bdstevel@tonic-gate#ifndef _SYS_FLOCK_IMPL_H
297c478bdstevel@tonic-gate#define	_SYS_FLOCK_IMPL_H
307c478bdstevel@tonic-gate
317c478bdstevel@tonic-gate#include <sys/types.h>
327c478bdstevel@tonic-gate#include <sys/fcntl.h>		/* flock definition */
337c478bdstevel@tonic-gate#include <sys/file.h>		/* FREAD etc */
347c478bdstevel@tonic-gate#include <sys/flock.h>		/* RCMD etc */
357c478bdstevel@tonic-gate#include <sys/kmem.h>
367c478bdstevel@tonic-gate#include <sys/user.h>
377c478bdstevel@tonic-gate#include <sys/thread.h>
387c478bdstevel@tonic-gate#include <sys/proc.h>
397c478bdstevel@tonic-gate#include <sys/cred.h>
407c478bdstevel@tonic-gate#include <sys/debug.h>
417c478bdstevel@tonic-gate#include <sys/cmn_err.h>
427c478bdstevel@tonic-gate#include <sys/errno.h>
437c478bdstevel@tonic-gate#include <sys/systm.h>
447c478bdstevel@tonic-gate#include <sys/vnode.h>
457c478bdstevel@tonic-gate#include <sys/share.h>		/* just to get GETSYSID def */
467c478bdstevel@tonic-gate
477c478bdstevel@tonic-gate#ifdef	__cplusplus
487c478bdstevel@tonic-gateextern "C" {
497c478bdstevel@tonic-gate#endif
507c478bdstevel@tonic-gate
517c478bdstevel@tonic-gatestruct	edge {
527c478bdstevel@tonic-gate	struct	edge	*edge_adj_next;	/* adjacency list next */
537c478bdstevel@tonic-gate	struct	edge	*edge_adj_prev; /* adjacency list prev */
547c478bdstevel@tonic-gate	struct	edge	*edge_in_next;	/* incoming edges list next */
557c478bdstevel@tonic-gate	struct	edge	*edge_in_prev;	/* incoming edges list prev */
567c478bdstevel@tonic-gate	struct 	lock_descriptor	*from_vertex;	/* edge emanating from lock */
577c478bdstevel@tonic-gate	struct 	lock_descriptor	*to_vertex;	/* edge pointing to lock */
587c478bdstevel@tonic-gate};
597c478bdstevel@tonic-gate
607c478bdstevel@tonic-gatetypedef	struct	edge	edge_t;
617c478bdstevel@tonic-gate
627c478bdstevel@tonic-gatestruct lock_descriptor {
637c478bdstevel@tonic-gate	struct	lock_descriptor	*l_next;	/* next active/sleep lock */
647c478bdstevel@tonic-gate	struct	lock_descriptor	*l_prev;	/* previous active/sleep lock */
657c478bdstevel@tonic-gate	struct	edge		l_edge;		/* edge for adj and in lists */
667c478bdstevel@tonic-gate	struct	lock_descriptor	*l_stack;	/* for stack operations */
677c478bdstevel@tonic-gate	struct	lock_descriptor	*l_stack1;	/* for stack operations */
687c478bdstevel@tonic-gate	struct 	lock_descriptor *l_dstack;	/* stack for debug functions */
697c478bdstevel@tonic-gate	struct	edge		*l_sedge;	/* start edge for graph alg. */
707c478bdstevel@tonic-gate			int	l_index; 	/* used for barrier count */
717c478bdstevel@tonic-gate		struct	graph	*l_graph;	/* graph this belongs to */
727c478bdstevel@tonic-gate		vnode_t		*l_vnode;	/* vnode being locked */
737c478bdstevel@tonic-gate			int	l_type;		/* type of lock */
747c478bdstevel@tonic-gate			int	l_state;	/* state described below */
757c478bdstevel@tonic-gate		u_offset_t	l_start;	/* start offset */
767c478bdstevel@tonic-gate		u_offset_t	l_end;		/* end offset */
777c478bdstevel@tonic-gate		flock64_t	l_flock;	/* original flock request */
787c478bdstevel@tonic-gate			int	l_color;	/* color used for graph alg */
797c478bdstevel@tonic-gate		kcondvar_t	l_cv;		/* wait condition for lock */
807c478bdstevel@tonic-gate		int		pvertex;	/* index to proc vertex */
817c478bdstevel@tonic-gate			int	l_status;	/* status described below */
827c478bdstevel@tonic-gate		flk_nlm_status_t l_nlm_state;	/* state of NLM server */
837c478bdstevel@tonic-gate		flk_callback_t	*l_callbacks;	/* callbacks, or NULL */
847c478bdstevel@tonic-gate		zoneid_t	l_zoneid;	/* zone of request */
857a5aac9Jerry Jelinek		file_t		*l_ofd;		/* OFD-style reference */
867c478bdstevel@tonic-gate};
877c478bdstevel@tonic-gate
887c478bdstevel@tonic-gatetypedef struct 	lock_descriptor	lock_descriptor_t;
897c478bdstevel@tonic-gate
907c478bdstevel@tonic-gate/*
917c478bdstevel@tonic-gate * Each graph holds locking information for some number of vnodes.  The
927c478bdstevel@tonic-gate * active and sleeping lists are circular, with a dummy head element.
937c478bdstevel@tonic-gate */
947c478bdstevel@tonic-gate
957c478bdstevel@tonic-gatestruct	graph {
967c478bdstevel@tonic-gate	kmutex_t	gp_mutex;	/* mutex for this graph */
977c478bdstevel@tonic-gate	struct	lock_descriptor	active_locks;
987c478bdstevel@tonic-gate	struct	lock_descriptor	sleeping_locks;
997c478bdstevel@tonic-gate	int index;	/* index of this graph into the hash table */
1007c478bdstevel@tonic-gate	int mark;	/* used for coloring the graph */
1017c478bdstevel@tonic-gate};
1027c478bdstevel@tonic-gate
1037c478bdstevel@tonic-gatetypedef	struct	graph	graph_t;
1047c478bdstevel@tonic-gate
1057c478bdstevel@tonic-gate/*
1067c478bdstevel@tonic-gate * The possible states a lock can be in.  These states are stored in the
1077c478bdstevel@tonic-gate * 'l_status' member of the 'lock_descriptor_t' structure.  All locks start
1087c478bdstevel@tonic-gate * life in the INITIAL state, and end up in the DEAD state.  Possible state
1097c478bdstevel@tonic-gate * transitions are :
1107c478bdstevel@tonic-gate *
1117c478bdstevel@tonic-gate * INITIAL--> START    --> ACTIVE    --> DEAD
1127c478bdstevel@tonic-gate *
1137c478bdstevel@tonic-gate *                     --> DEAD
1147c478bdstevel@tonic-gate *
1157c478bdstevel@tonic-gate *        --> ACTIVE   --> DEAD          (new locks from flk_relation)
1167c478bdstevel@tonic-gate *
1177c478bdstevel@tonic-gate *        --> SLEEPING --> GRANTED   --> START     --> ACTIVE --> DEAD
1187c478bdstevel@tonic-gate *
1197c478bdstevel@tonic-gate *                                   --> INTR      --> DEAD
1207c478bdstevel@tonic-gate *
1217c478bdstevel@tonic-gate *                                   --> CANCELLED --> DEAD
1227c478bdstevel@tonic-gate *
1237c478bdstevel@tonic-gate *                                                 --> INTR   --> DEAD
1247c478bdstevel@tonic-gate *
1257c478bdstevel@tonic-gate *                     --> INTR      --> DEAD
1267c478bdstevel@tonic-gate *
1277c478bdstevel@tonic-gate *                     --> CANCELLED --> DEAD
1287c478bdstevel@tonic-gate *
1297c478bdstevel@tonic-gate *                                   --> INTR      --> DEAD
1307c478bdstevel@tonic-gate *
1317c478bdstevel@tonic-gate * Lock transitions are done in the following functions:
1327c478bdstevel@tonic-gate * --> INITIAL		flk_get_lock(), reclock()
1337c478bdstevel@tonic-gate * --> START		flk_execute_request()
1347c478bdstevel@tonic-gate * --> ACTIVE		flk_insert_active_lock()
1357c478bdstevel@tonic-gate * --> SLEEPING		flk_insert_sleeping_lock()
1367c478bdstevel@tonic-gate * --> GRANTED		GRANT_WAKEUP
1377c478bdstevel@tonic-gate * --> INTERRUPTED	INTERRUPT_WAKEUP
1387c478bdstevel@tonic-gate * --> CANCELLED	CANCEL_WAKEUP
1397c478bdstevel@tonic-gate * --> DEAD		reclock(), flk_delete_active_lock(), and
1407c478bdstevel@tonic-gate *                          flk_cancel_sleeping_lock()
1417c478bdstevel@tonic-gate */
1427c478bdstevel@tonic-gate
1437c478bdstevel@tonic-gate#define	FLK_INITIAL_STATE	1	/* Initial state of all requests */
1447c478bdstevel@tonic-gate#define	FLK_START_STATE		2	/* Request has started execution */
1457c478bdstevel@tonic-gate#define	FLK_ACTIVE_STATE	3	/* In active queue */
1467c478bdstevel@tonic-gate#define	FLK_SLEEPING_STATE	4	/* Request is blocked */
1477c478bdstevel@tonic-gate#define	FLK_GRANTED_STATE	5	/* Request is granted */
1487c478bdstevel@tonic-gate#define	FLK_INTERRUPTED_STATE	6	/* Request is interrupted */
1497c478bdstevel@tonic-gate#define	FLK_CANCELLED_STATE	7	/* Request is cancelled */
1507c478bdstevel@tonic-gate#define	FLK_DEAD_STATE		8	/* Request is done - will be deleted */
1517c478bdstevel@tonic-gate
1527c478bdstevel@tonic-gate/* flags defining state of locks */
1537c478bdstevel@tonic-gate
1547c478bdstevel@tonic-gate/*
1557c478bdstevel@tonic-gate * The LLM design has been modified so that lock states are now stored
1567c478bdstevel@tonic-gate * in the l_status field of lock_descriptor_t.  The l_state field is
1577c478bdstevel@tonic-gate * currently preserved for binary compatibility, but may be modified or
1587c478bdstevel@tonic-gate * removed in a minor release of Solaris.  Note that both of these
1597c478bdstevel@tonic-gate * fields (and the rest of the lock_descriptor_t structure) are private
1607c478bdstevel@tonic-gate * to the implementation of the lock manager and should not be used
1617c478bdstevel@tonic-gate * externally.
1627c478bdstevel@tonic-gate */
1637c478bdstevel@tonic-gate
1647c478bdstevel@tonic-gate#define	ACTIVE_LOCK		0x0001	/* in active queue */
1657c478bdstevel@tonic-gate#define	SLEEPING_LOCK		0x0002	/* in sleep queue */
1667c478bdstevel@tonic-gate#define	IO_LOCK			0x0004	/* is an IO lock */
1677c478bdstevel@tonic-gate#define	REFERENCED_LOCK		0x0008	/* referenced some where */
1687c478bdstevel@tonic-gate#define	QUERY_LOCK		0x0010	/* querying about lock */
1697c478bdstevel@tonic-gate#define	WILLING_TO_SLEEP_LOCK	0x0020	/* lock can be put in sleep queue */
1707c478bdstevel@tonic-gate#define	RECOMPUTE_LOCK		0x0040	/* used for recomputing dependencies */
1717c478bdstevel@tonic-gate#define	RECOMPUTE_DONE		0x0080	/* used for recomputing dependencies */
1727c478bdstevel@tonic-gate#define	BARRIER_LOCK		0x0100	/* used for recomputing dependencies */
1737c478bdstevel@tonic-gate#define	GRANTED_LOCK		0x0200	/* granted but still in sleep queue */
1747c478bdstevel@tonic-gate#define	CANCELLED_LOCK		0x0400	/* cancelled will be thrown out */
1757c478bdstevel@tonic-gate#define	DELETED_LOCK		0x0800	/* deleted - free at earliest */
1767c478bdstevel@tonic-gate#define	INTERRUPTED_LOCK	0x1000	/* pretend signal */
1777c478bdstevel@tonic-gate#define	LOCKMGR_LOCK		0x2000	/* remote lock (server-side) */
1787c478bdstevel@tonic-gate/* Clustering: flag for PXFS locks */
1797c478bdstevel@tonic-gate#define	PXFS_LOCK		0x4000	/* lock created by PXFS file system */
1807c478bdstevel@tonic-gate#define	NBMAND_LOCK		0x8000	/* non-blocking mandatory locking */
1817c478bdstevel@tonic-gate
1827c478bdstevel@tonic-gate#define	HASH_SIZE	32
1837c478bdstevel@tonic-gate#define	HASH_SHIFT	(HASH_SIZE - 1)
1847c478bdstevel@tonic-gate#define	HASH_INDEX(vp)	(((uintptr_t)vp >> 7) & HASH_SHIFT)
1857c478bdstevel@tonic-gate
1867c478bdstevel@tonic-gate/* extern definitions */
1877c478bdstevel@tonic-gate
1887c478bdstevel@tonic-gateextern struct graph	*lock_graph[HASH_SIZE];
1897c478bdstevel@tonic-gateextern struct kmem_cache *flk_edge_cache;
1907c478bdstevel@tonic-gate
1917c478bdstevel@tonic-gate/* Clustering: functions called by PXFS */
1927c478bdstevel@tonic-gateint flk_execute_request(lock_descriptor_t *);
1937c478bdstevel@tonic-gatevoid flk_cancel_sleeping_lock(lock_descriptor_t *, int);
1947c478bdstevel@tonic-gatevoid flk_set_state(lock_descriptor_t *, int);
1957c478bdstevel@tonic-gategraph_t *flk_get_lock_graph(vnode_t *, int);
1967c478bdstevel@tonic-gate
1977c478bdstevel@tonic-gate/* flags used for readability in flock.c */
1987c478bdstevel@tonic-gate
1997c478bdstevel@tonic-gate#define	FLK_USE_GRAPH	0	/* don't initialize the lock_graph */
2007c478bdstevel@tonic-gate#define	FLK_INIT_GRAPH	1	/* initialize the lock graph */
2017c478bdstevel@tonic-gate#define	NO_COLOR	0	/* vertex is not colored */
2027c478bdstevel@tonic-gate#define	NO_CHECK_CYCLE	0	/* don't mark vertex's in flk_add_edge */
2037c478bdstevel@tonic-gate#define	CHECK_CYCLE	1	/* mark vertex's in flk_add_edge */
2047c478bdstevel@tonic-gate
2057c478bdstevel@tonic-gate#define	SAME_OWNER(lock1, lock2)	\
2067c478bdstevel@tonic-gate	(((lock1)->l_flock.l_pid == (lock2)->l_flock.l_pid) && \
2077a5aac9Jerry Jelinek		((lock1)->l_flock.l_sysid == (lock2)->l_flock.l_sysid) && \
2087a5aac9Jerry Jelinek		((lock1)->l_ofd == (lock2)->l_ofd))
2097c478bdstevel@tonic-gate
2107c478bdstevel@tonic-gate#define	COLORED(vertex)		((vertex)->l_color == (vertex)->l_graph->mark)
2117c478bdstevel@tonic-gate#define	COLOR(vertex)		((vertex)->l_color = (vertex)->l_graph->mark)
2127c478bdstevel@tonic-gate
2137c478bdstevel@tonic-gate/*
2147c478bdstevel@tonic-gate * stack data structure and operations
2157c478bdstevel@tonic-gate */
2167c478bdstevel@tonic-gate
2177c478bdstevel@tonic-gate#define	STACK_INIT(stack)	((stack) = NULL)
2187c478bdstevel@tonic-gate#define	STACK_PUSH(stack, ptr, stack_link)	(ptr)->stack_link = (stack),\
2197c478bdstevel@tonic-gate				(stack) = (ptr)
2207c478bdstevel@tonic-gate#define	STACK_POP(stack, stack_link)	(stack) = (stack)->stack_link
2217c478bdstevel@tonic-gate#define	STACK_TOP(stack)	(stack)
2227c478bdstevel@tonic-gate#define	STACK_EMPTY(stack)	((stack) == NULL)
2237c478bdstevel@tonic-gate
2247c478bdstevel@tonic-gate
2257c478bdstevel@tonic-gate#define	ACTIVE_HEAD(gp)	(&(gp)->active_locks)
2267c478bdstevel@tonic-gate
2277c478bdstevel@tonic-gate#define	SLEEPING_HEAD(gp)	(&(gp)->sleeping_locks)
2287c478bdstevel@tonic-gate
2297c478bdstevel@tonic-gate#define	SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp) \
2307c478bdstevel@tonic-gate{ \
2317c478bdstevel@tonic-gate	(lock) = (lock_descriptor_t *)vp->v_filocks;	\
2327c478bdstevel@tonic-gate}
2337c478bdstevel@tonic-gate
2347c478bdstevel@tonic-gate#define	SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp) \
2357c478bdstevel@tonic-gate{ \
2367c478bdstevel@tonic-gatefor ((lock) = SLEEPING_HEAD((gp))->l_next; ((lock) != SLEEPING_HEAD((gp)) && \
2377c478bdstevel@tonic-gate			(lock)->l_vnode != (vp)); (lock) = (lock)->l_next) \
2387c478bdstevel@tonic-gate			; \
2397c478bdstevel@tonic-gate(lock) = ((lock) == SLEEPING_HEAD((gp))) ? NULL : (lock); \
2407c478bdstevel@tonic-gate}
2417c478bdstevel@tonic-gate
2427c478bdstevel@tonic-gate#define	OVERLAP(lock1, lock2) \
2437c478bdstevel@tonic-gate	(((lock1)->l_start <= (lock2)->l_start && \
2447c478bdstevel@tonic-gate		(lock2)->l_start <= (lock1)->l_end) || \
2457c478bdstevel@tonic-gate	((lock2)->l_start <= (lock1)->l_start && \
2467c478bdstevel@tonic-gate		(lock1)->l_start <= (lock2)->l_end))
2477c478bdstevel@tonic-gate
2487c478bdstevel@tonic-gate#define	IS_INITIAL(lock)	((lock)->l_status == FLK_INITIAL_STATE)
2497c478bdstevel@tonic-gate#define	IS_ACTIVE(lock)		((lock)->l_status == FLK_ACTIVE_STATE)
2507c478bdstevel@tonic-gate#define	IS_SLEEPING(lock)	((lock)->l_status == FLK_SLEEPING_STATE)
2517c478bdstevel@tonic-gate#define	IS_GRANTED(lock)	((lock)->l_status == FLK_GRANTED_STATE)
2527c478bdstevel@tonic-gate#define	IS_INTERRUPTED(lock)	((lock)->l_status == FLK_INTERRUPTED_STATE)
2537c478bdstevel@tonic-gate#define	IS_CANCELLED(lock)	((lock)->l_status == FLK_CANCELLED_STATE)
2547c478bdstevel@tonic-gate#define	IS_DEAD(lock)		((lock)->l_status == FLK_DEAD_STATE)
2557c478bdstevel@tonic-gate
2567c478bdstevel@tonic-gate#define	IS_QUERY_LOCK(lock)	((lock)->l_state & QUERY_LOCK)
2577c478bdstevel@tonic-gate#define	IS_RECOMPUTE(lock)	((lock)->l_state & RECOMPUTE_LOCK)
2587c478bdstevel@tonic-gate#define	IS_BARRIER(lock)	((lock)->l_state & BARRIER_LOCK)
2597c478bdstevel@tonic-gate#define	IS_DELETED(lock)	((lock)->l_state & DELETED_LOCK)
2607c478bdstevel@tonic-gate#define	IS_REFERENCED(lock)	((lock)->l_state & REFERENCED_LOCK)
2617c478bdstevel@tonic-gate#define	IS_IO_LOCK(lock)	((lock)->l_state & IO_LOCK)
2627c478bdstevel@tonic-gate#define	IS_WILLING_TO_SLEEP(lock)	\
2637c478bdstevel@tonic-gate		((lock)->l_state & WILLING_TO_SLEEP_LOCK)
2647c478bdstevel@tonic-gate#define	IS_LOCKMGR(lock)	((lock)->l_state & LOCKMGR_LOCK)
2657c478bdstevel@tonic-gate#define	IS_NLM_UP(lock)		((lock)->l_nlm_state == FLK_NLM_UP)
2667c478bdstevel@tonic-gate/* Clustering: Macro for PXFS locks */
2677c478bdstevel@tonic-gate#define	IS_PXFS(lock)		((lock)->l_state & PXFS_LOCK)
2687c478bdstevel@tonic-gate
2697c478bdstevel@tonic-gate/*
2707c478bdstevel@tonic-gate * "local" requests don't involve the NFS lock manager in any way.
2717c478bdstevel@tonic-gate * "remote" requests can be on the server (requests from a remote client),
2727c478bdstevel@tonic-gate * in which case they should be associated with a local vnode (UFS, tmpfs,
2737c478bdstevel@tonic-gate * etc.).  These requests are flagged with LOCKMGR_LOCK and are made using
2747c478bdstevel@tonic-gate * kernel service threads.  Remote requests can also be on an NFS client,
2757c478bdstevel@tonic-gate * because the NFS lock manager uses local locking for some of its
2767c478bdstevel@tonic-gate * bookkeeping.  These requests are made by regular user processes.
2777c478bdstevel@tonic-gate */
2787c478bdstevel@tonic-gate#define	IS_LOCAL(lock)	(GETSYSID((lock)->l_flock.l_sysid) == 0)
2797c478bdstevel@tonic-gate#define	IS_REMOTE(lock)	(! IS_LOCAL(lock))
2807c478bdstevel@tonic-gate
2817c478bdstevel@tonic-gate/* Clustering: Return value for blocking PXFS locks */
2827c478bdstevel@tonic-gate/*
2837c478bdstevel@tonic-gate * For PXFS locks, reclock() will return this error code for requests that
2847c478bdstevel@tonic-gate * need to block
2857c478bdstevel@tonic-gate */
2867c478bdstevel@tonic-gate#define	PXFS_LOCK_BLOCKED -1
2877c478bdstevel@tonic-gate
2887c478bdstevel@tonic-gate/* Clustering: PXFS callback function */
2897c478bdstevel@tonic-gate/*
2907c478bdstevel@tonic-gate * This function is a callback from the LLM into the PXFS server module.  It
2917c478bdstevel@tonic-gate * is initialized as a weak stub, and is functional when the pxfs server module
2927c478bdstevel@tonic-gate * is loaded.
2937c478bdstevel@tonic-gate */
2947c478bdstevel@tonic-gateextern void cl_flk_state_transition_notify(lock_descriptor_t *lock,
2957c478bdstevel@tonic-gate    int old_state, int new_state);
2967c478bdstevel@tonic-gate
2977c478bdstevel@tonic-gate#define	BLOCKS(lock1, lock2)	(!SAME_OWNER((lock1), (lock2)) && \
2987c478bdstevel@tonic-gate					(((lock1)->l_type == F_WRLCK) || \
2997c478bdstevel@tonic-gate					((lock2)->l_type == F_WRLCK)) && \
3007c478bdstevel@tonic-gate					OVERLAP((lock1), (lock2)))
3017c478bdstevel@tonic-gate
3027c478bdstevel@tonic-gate#define	COVERS(lock1, lock2)	\
3037c478bdstevel@tonic-gate		(((lock1)->l_start <= (lock2)->l_start) && \
3047c478bdstevel@tonic-gate			((lock1)->l_end >= (lock2)->l_end))
3057c478bdstevel@tonic-gate
3067c478bdstevel@tonic-gate#define	IN_LIST_REMOVE(ep)	\
3077c478bdstevel@tonic-gate	{ \
3087c478bdstevel@tonic-gate	(ep)->edge_in_next->edge_in_prev = (ep)->edge_in_prev; \
3097c478bdstevel@tonic-gate	(ep)->edge_in_prev->edge_in_next = (ep)->edge_in_next; \
3107c478bdstevel@tonic-gate	}
3117c478bdstevel@tonic-gate
3127c478bdstevel@tonic-gate#define	ADJ_LIST_REMOVE(ep)	\
3137c478bdstevel@tonic-gate	{ \
3147c478bdstevel@tonic-gate	(ep)->edge_adj_next->edge_adj_prev = (ep)->edge_adj_prev; \
3157c478bdstevel@tonic-gate	(ep)->edge_adj_prev->edge_adj_next = (ep)->edge_adj_next; \
3167c478bdstevel@tonic-gate	}
3177c478bdstevel@tonic-gate
3187c478bdstevel@tonic-gate#define	NOT_BLOCKED(lock)	\
3197c478bdstevel@tonic-gate	((lock)->l_edge.edge_adj_next == &(lock)->l_edge && !IS_GRANTED(lock))
3207c478bdstevel@tonic-gate
3217c478bdstevel@tonic-gate#define	GRANT_WAKEUP(lock)	\
3227c478bdstevel@tonic-gate	{	\
3237c478bdstevel@tonic-gate		flk_set_state(lock, FLK_GRANTED_STATE); \
3247c478bdstevel@tonic-gate		(lock)->l_state |= GRANTED_LOCK; \
3257c478bdstevel@tonic-gate		/* \
3267c478bdstevel@tonic-gate		 * Clustering: PXFS locks do not sleep in the LLM, \
3277c478bdstevel@tonic-gate		 * so there is no need to signal them \
3287c478bdstevel@tonic-gate		 */ \
3297c478bdstevel@tonic-gate		if (!IS_PXFS(lock)) { \
3307c478bdstevel@tonic-gate			cv_signal(&(lock)->l_cv); \
3317c478bdstevel@tonic-gate		} \
3327c478bdstevel@tonic-gate	}
3337c478bdstevel@tonic-gate
3347c478bdstevel@tonic-gate#define	CANCEL_WAKEUP(lock)	\
3357c478bdstevel@tonic-gate	{ \
3367c478bdstevel@tonic-gate		flk_set_state(lock, FLK_CANCELLED_STATE); \
3377c478bdstevel@tonic-gate		(lock)->l_state |= CANCELLED_LOCK; \
3387c478bdstevel@tonic-gate		/* \
3397c478bdstevel@tonic-gate		 * Clustering: PXFS locks do not sleep in the LLM, \
3407c478bdstevel@tonic-gate		 * so there is no need to signal them \
3417c478bdstevel@tonic-gate		 */ \
3427c478bdstevel@tonic-gate		if (!IS_PXFS(lock)) { \
3437c478bdstevel@tonic-gate			cv_signal(&(lock)->l_cv); \
3447c478bdstevel@tonic-gate		} \
3457c478bdstevel@tonic-gate	}
3467c478bdstevel@tonic-gate
3477c478bdstevel@tonic-gate#define	INTERRUPT_WAKEUP(lock)	\
3487c478bdstevel@tonic-gate	{ \
3497c478bdstevel@tonic-gate		flk_set_state(lock, FLK_INTERRUPTED_STATE); \
3507c478bdstevel@tonic-gate		(lock)->l_state |= INTERRUPTED_LOCK; \
3517c478bdstevel@tonic-gate		/* \
3527c478bdstevel@tonic-gate		 * Clustering: PXFS locks do not sleep in the LLM, \
3537c478bdstevel@tonic-gate		 * so there is no need to signal them \
3547c478bdstevel@tonic-gate		 */ \
3557c478bdstevel@tonic-gate		if (!IS_PXFS(lock)) { \
3567c478bdstevel@tonic-gate			cv_signal(&(lock)->l_cv); \
3577c478bdstevel@tonic-gate		} \
3587c478bdstevel@tonic-gate	}
3597c478bdstevel@tonic-gate
3607c478bdstevel@tonic-gate#define	REMOVE_SLEEP_QUEUE(lock)	\
3617c478bdstevel@tonic-gate	{ \
3627c478bdstevel@tonic-gate	ASSERT(IS_SLEEPING(lock) || IS_GRANTED(lock) || \
3637c478bdstevel@tonic-gate	    IS_INTERRUPTED(lock) || IS_CANCELLED(lock)); \
3647c478bdstevel@tonic-gate	(lock)->l_state &= ~SLEEPING_LOCK; \
3657c478bdstevel@tonic-gate	(lock)->l_next->l_prev = (lock)->l_prev; \
3667c478bdstevel@tonic-gate	(lock)->l_prev->l_next = (lock)->l_next; \
3677c478bdstevel@tonic-gate	(lock)->l_next = (lock)->l_prev = (lock_descriptor_t *)NULL; \
3687c478bdstevel@tonic-gate	}
3697c478bdstevel@tonic-gate
3707c478bdstevel@tonic-gate#define	NO_DEPENDENTS(lock)	\
3717c478bdstevel@tonic-gate	((lock)->l_edge.edge_in_next == &(lock)->l_edge)
3727c478bdstevel@tonic-gate
3737c478bdstevel@tonic-gate#define	GRANT(lock)	\
3747c478bdstevel@tonic-gate	{ \
3757c478bdstevel@tonic-gate	(lock)->l_state |= GRANTED_LOCK; \
3767c478bdstevel@tonic-gate	flk_set_state(lock, FLK_GRANTED_STATE); \
3777c478bdstevel@tonic-gate	}
3787c478bdstevel@tonic-gate
3797c478bdstevel@tonic-gate#define	FIRST_IN(lock)	((lock)->l_edge.edge_in_next)
3807c478bdstevel@tonic-gate#define	FIRST_ADJ(lock)	((lock)->l_edge.edge_adj_next)
3817c478bdstevel@tonic-gate#define	HEAD(lock)	(&(lock)->l_edge)
3827c478bdstevel@tonic-gate#define	NEXT_ADJ(ep)	((ep)->edge_adj_next)
3837c478bdstevel@tonic-gate#define	NEXT_IN(ep)	((ep)->edge_in_next)
3847c478bdstevel@tonic-gate#define	IN_ADJ_INIT(lock)	\
3857c478bdstevel@tonic-gate{	\
3867c478bdstevel@tonic-gate(lock)->l_edge.edge_adj_next = (lock)->l_edge.edge_adj_prev = &(lock)->l_edge; \
3877c478bdstevel@tonic-gate(lock)->l_edge.edge_in_next = (lock)->l_edge.edge_in_prev = &(lock)->l_edge; \
3887c478bdstevel@tonic-gate}
3897c478bdstevel@tonic-gate
3907c478bdstevel@tonic-gate#define	COPY(lock1, lock2)	\
3917c478bdstevel@tonic-gate{	\
3927c478bdstevel@tonic-gate(lock1)->l_graph = (lock2)->l_graph; \
3937c478bdstevel@tonic-gate(lock1)->l_vnode = (lock2)->l_vnode; \
3947c478bdstevel@tonic-gate(lock1)->l_type = (lock2)->l_type; \
3957c478bdstevel@tonic-gate(lock1)->l_state = (lock2)->l_state; \
3967c478bdstevel@tonic-gate(lock1)->l_start = (lock2)->l_start; \
3977c478bdstevel@tonic-gate(lock1)->l_end = (lock2)->l_end; \
3987c478bdstevel@tonic-gate(lock1)->l_flock = (lock2)->l_flock; \
3997c478bdstevel@tonic-gate(lock1)->l_zoneid = (lock2)->l_zoneid; \
4007c478bdstevel@tonic-gate(lock1)->pvertex = (lock2)->pvertex; \
4017c478bdstevel@tonic-gate}
4027c478bdstevel@tonic-gate
4037c478bdstevel@tonic-gate/*
4047c478bdstevel@tonic-gate * Clustering
4057c478bdstevel@tonic-gate */
4067c478bdstevel@tonic-gate/* Routines to set and get the NLM state in a lock request */
4077c478bdstevel@tonic-gate#define	SET_NLM_STATE(lock, nlm_state)	((lock)->l_nlm_state = nlm_state)
4087c478bdstevel@tonic-gate#define	GET_NLM_STATE(lock)	((lock)->l_nlm_state)
4097c478bdstevel@tonic-gate/*
4107c478bdstevel@tonic-gate * NLM registry abstraction:
4117c478bdstevel@tonic-gate *   Abstraction overview:
4127c478bdstevel@tonic-gate *   This registry keeps track of the NLM servers via their nlmids
4137c478bdstevel@tonic-gate *   that have requested locks at the LLM this registry is associated
4147c478bdstevel@tonic-gate *   with.
4157c478bdstevel@tonic-gate */
4167c478bdstevel@tonic-gate/* Routines to manipulate the NLM registry object state */
4177c478bdstevel@tonic-gate#define	FLK_REGISTRY_IS_NLM_UNKNOWN(nlmreg, nlmid) \
4187c478bdstevel@tonic-gate	    ((nlmreg)[nlmid] == FLK_NLM_UNKNOWN)
4197c478bdstevel@tonic-gate#define	FLK_REGISTRY_IS_NLM_UP(nlmreg, nlmid) \
4207c478bdstevel@tonic-gate	    ((nlmreg)[nlmid] == FLK_NLM_UP)
4217c478bdstevel@tonic-gate#define	FLK_REGISTRY_ADD_NLMID(nlmreg, nlmid) \
4227c478bdstevel@tonic-gate	    ((nlmreg)[nlmid] = FLK_NLM_UP)
4237c478bdstevel@tonic-gate#define	FLK_REGISTRY_CHANGE_NLM_STATE(nlmreg, nlmid, state) \
4247c478bdstevel@tonic-gate	    ((nlmreg)[nlmid] = state)
4257c478bdstevel@tonic-gate
4267c478bdstevel@tonic-gate/* Indicates the effect of executing a request on the existing locks */
4277c478bdstevel@tonic-gate
4287c478bdstevel@tonic-gate#define	FLK_UNLOCK	0x1	/* request unlocks the existing lock */
4297c478bdstevel@tonic-gate#define	FLK_DOWNGRADE	0x2	/* request downgrades the existing lock */
4307c478bdstevel@tonic-gate#define	FLK_UPGRADE	0x3	/* request upgrades the existing lock */
4317c478bdstevel@tonic-gate#define	FLK_STAY_SAME	0x4	/* request type is same as existing lock */
4327c478bdstevel@tonic-gate
4337c478bdstevel@tonic-gate
4347c478bdstevel@tonic-gate/*	proc graph definitions	*/
4357c478bdstevel@tonic-gate
4367c478bdstevel@tonic-gate/*
4377c478bdstevel@tonic-gate * Proc graph is the global process graph that maintains information
4387c478bdstevel@tonic-gate * about the dependencies between processes. An edge is added between two
4397c478bdstevel@tonic-gate * processes represented by proc_vertex's A and B, iff there exists l1
4407c478bdstevel@tonic-gate * owned by process A in any of the lock_graph's dependent on l2
4417c478bdstevel@tonic-gate * (thus having an edge to l2) owned by process B.
4427c478bdstevel@tonic-gate */
4437c478bdstevel@tonic-gatestruct proc_vertex {
4447c478bdstevel@tonic-gate	pid_t	pid;	/* pid of the process */
4457c478bdstevel@tonic-gate	long	sysid;	/* sysid of the process */
4467c478bdstevel@tonic-gate	struct proc_edge	*edge;	/* adajcent edges of this process */
4477c478bdstevel@tonic-gate	int incount;		/* Number of inedges to this process */
4487c478bdstevel@tonic-gate	struct proc_edge *p_sedge;	/* used for implementing stack alg. */
4497c478bdstevel@tonic-gate	struct proc_vertex	*p_stack;	/* used for stack alg. */
4507c478bdstevel@tonic-gate	int atime;	/* used for cycle detection algorithm */
4517c478bdstevel@tonic-gate	int dtime;	/* used for cycle detection algorithm */
4527c478bdstevel@tonic-gate	int index;	/* index into the  array of proc_graph vertices */
4537c478bdstevel@tonic-gate};
4547c478bdstevel@tonic-gate
4557c478bdstevel@tonic-gatetypedef	struct proc_vertex proc_vertex_t;
4567c478bdstevel@tonic-gate
4577c478bdstevel@tonic-gatestruct proc_edge {
4587c478bdstevel@tonic-gate	struct proc_edge	*next;	/* next edge in adjacency list */
4597c478bdstevel@tonic-gate	int  refcount;			/* reference count of this edge */
4607c478bdstevel@tonic-gate	struct proc_vertex	*to_proc;	/* process this points to */
4617c478bdstevel@tonic-gate};
4627c478bdstevel@tonic-gate
4637c478bdstevel@tonic-gatetypedef struct proc_edge proc_edge_t;
4647c478bdstevel@tonic-gate
4657c478bdstevel@tonic-gate
4667c478bdstevel@tonic-gate#define	PROC_CHUNK	100
4677c478bdstevel@tonic-gate
4687c478bdstevel@tonic-gatestruct proc_graph {
4697c478bdstevel@tonic-gate	struct proc_vertex **proc;	/* list of proc_vertexes */
4707c478bdstevel@tonic-gate	int gcount;		/* list size */
4717c478bdstevel@tonic-gate	int free;		/* number of free slots in the list */
4727c478bdstevel@tonic-gate	int mark;		/* used for graph coloring */
4737c478bdstevel@tonic-gate};
4747c478bdstevel@tonic-gate
4757c478bdstevel@tonic-gatetypedef struct proc_graph proc_graph_t;
4767c478bdstevel@tonic-gate
4777c478bdstevel@tonic-gateextern	struct proc_graph	pgraph;
4787c478bdstevel@tonic-gate
4797c478bdstevel@tonic-gate#define	PROC_SAME_OWNER(lock, pvertex)	\
4807c478bdstevel@tonic-gate	(((lock)->l_flock.l_pid == (pvertex)->pid) && \
4817c478bdstevel@tonic-gate		((lock)->l_flock.l_sysid == (pvertex)->sysid))
4827c478bdstevel@tonic-gate
4837c478bdstevel@tonic-gate#define	PROC_ARRIVE(pvertex)	((pvertex)->atime = pgraph.mark)
4847c478bdstevel@tonic-gate#define	PROC_DEPART(pvertex)	((pvertex)->dtime = pgraph.mark)
4857c478bdstevel@tonic-gate#define	PROC_ARRIVED(pvertex)	((pvertex)->atime == pgraph.mark)
4867c478bdstevel@tonic-gate#define	PROC_DEPARTED(pvertex)  ((pvertex)->dtime == pgraph.mark)
4877c478bdstevel@tonic-gate
4887c478bdstevel@tonic-gate#ifdef	__cplusplus
4897c478bdstevel@tonic-gate}
4907c478bdstevel@tonic-gate#endif
4917c478bdstevel@tonic-gate
4927c478bdstevel@tonic-gate#endif	/* _SYS_FLOCK_IMPL_H */
493