1/*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 1996, 1997, 1998
5 *	Sleepycat Software.  All rights reserved.
6 */
7
8#include "config.h"
9
10#ifndef lint
11static const char sccsid[] = "@(#)lock.c	10.61 (Sleepycat) 1/3/99";
12#endif /* not lint */
13
14#ifndef NO_SYSTEM_INCLUDES
15#include <sys/types.h>
16
17#include <errno.h>
18#include <string.h>
19#endif
20
21#include "db_int.h"
22#include "shqueue.h"
23#include "db_page.h"
24#include "db_shash.h"
25#include "lock.h"
26#include "db_am.h"
27#include "txn_auto.h"
28#include "txn_ext.h"
29#include "common_ext.h"
30
31static void __lock_checklocker __P((DB_LOCKTAB *, struct __db_lock *, int));
32static void __lock_freeobj __P((DB_LOCKTAB *, DB_LOCKOBJ *));
33static int  __lock_get_internal __P((DB_LOCKTAB *, u_int32_t, DB_TXN *,
34    u_int32_t, const DBT *, db_lockmode_t, struct __db_lock **));
35static int  __lock_is_parent __P((u_int32_t, DB_TXN *));
36static int  __lock_promote __P((DB_LOCKTAB *, DB_LOCKOBJ *));
37static int  __lock_put_internal __P((DB_LOCKTAB *, struct __db_lock *, int));
38static void __lock_remove_waiter
39    __P((DB_LOCKTAB *, DB_LOCKOBJ *, struct __db_lock *, db_status_t));
40static int  __lock_vec_internal __P((DB_LOCKTAB *, u_int32_t, DB_TXN *,
41	    u_int32_t, DB_LOCKREQ *, int, DB_LOCKREQ **elistp));
42
43int
44lock_id(lt, idp)
45	DB_LOCKTAB *lt;
46	u_int32_t *idp;
47{
48	u_int32_t id;
49
50	LOCK_PANIC_CHECK(lt);
51
52	LOCK_LOCKREGION(lt);
53	if (lt->region->id >= DB_LOCK_MAXID)
54		lt->region->id = 0;
55	id = ++lt->region->id;
56	UNLOCK_LOCKREGION(lt);
57
58	*idp = id;
59	return (0);
60}
61
62int
63lock_vec(lt, locker, flags, list, nlist, elistp)
64	DB_LOCKTAB *lt;
65	u_int32_t locker, flags;
66	int nlist;
67	DB_LOCKREQ *list, **elistp;
68{
69	return (__lock_vec_internal(lt,
70	    locker, NULL, flags, list, nlist, elistp));
71}
72
73int
74lock_tvec(lt, txn, flags, list, nlist, elistp)
75	DB_LOCKTAB *lt;
76	DB_TXN *txn;
77	u_int32_t flags;
78	int nlist;
79	DB_LOCKREQ *list, **elistp;
80{
81	return (__lock_vec_internal(lt,
82	    txn->txnid, txn, flags, list, nlist, elistp));
83}
84
85static int
86__lock_vec_internal(lt, locker, txn, flags, list, nlist, elistp)
87	DB_LOCKTAB *lt;
88	u_int32_t locker;
89	DB_TXN *txn;
90	u_int32_t flags;
91	int nlist;
92	DB_LOCKREQ *list, **elistp;
93{
94	struct __db_lock *lp;
95	DB_LOCKOBJ *sh_obj, *sh_locker, *sh_parent;
96	int i, ret, run_dd;
97
98	LOCK_PANIC_CHECK(lt);
99
100	/* Validate arguments. */
101	if ((ret =
102	    __db_fchk(lt->dbenv, "lock_vec", flags, DB_LOCK_NOWAIT)) != 0)
103		return (ret);
104
105	LOCK_LOCKREGION(lt);
106
107	if ((ret = __lock_validate_region(lt)) != 0) {
108		UNLOCK_LOCKREGION(lt);
109		return (ret);
110	}
111
112	ret = 0;
113	for (i = 0; i < nlist && ret == 0; i++) {
114		switch (list[i].op) {
115		case DB_LOCK_GET:
116			ret = __lock_get_internal(lt, locker, txn, flags,
117			    list[i].obj, list[i].mode, &lp);
118			if (ret == 0) {
119				list[i].lock = LOCK_TO_OFFSET(lt, lp);
120				lt->region->nrequests++;
121			}
122			break;
123		case DB_LOCK_INHERIT:
124			/* Find the locker. */
125			if ((ret = __lock_getobj(lt, locker,
126			    NULL, DB_LOCK_LOCKER, &sh_locker)) != 0)
127				break;
128			if (txn == NULL || txn->parent == NULL) {
129				ret = EINVAL;
130				break;
131			}
132
133			if ((ret = __lock_getobj(lt, txn->parent->txnid,
134			    NULL, DB_LOCK_LOCKER, &sh_parent)) != 0)
135				break;
136
137			/*
138			 * Traverse all the locks held by this locker.  Remove
139			 * the locks from the locker's list and put them on the
140			 * parent's list.
141			 */
142			for (lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock);
143			    lp != NULL;
144			    lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock)) {
145				SH_LIST_REMOVE(lp, locker_links, __db_lock);
146				SH_LIST_INSERT_HEAD(&sh_parent->heldby, lp,
147				    locker_links, __db_lock);
148				lp->holder = txn->parent->txnid;
149			}
150			__lock_freeobj(lt, sh_locker);
151			lt->region->nlockers--;
152			break;
153		case DB_LOCK_PUT:
154			lp = OFFSET_TO_LOCK(lt, list[i].lock);
155			if (lp->holder != locker) {
156				ret = DB_LOCK_NOTHELD;
157				break;
158			}
159			list[i].mode = lp->mode;
160
161			ret = __lock_put_internal(lt, lp, 0);
162			__lock_checklocker(lt, lp, 0);
163			break;
164		case DB_LOCK_PUT_ALL:
165			/* Find the locker. */
166			if ((ret = __lock_getobj(lt, locker,
167			    NULL, DB_LOCK_LOCKER, &sh_locker)) != 0)
168				break;
169
170			for (lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock);
171			    lp != NULL;
172			    lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock)) {
173				if ((ret = __lock_put_internal(lt, lp, 1)) != 0)
174					break;
175			}
176			__lock_freeobj(lt, sh_locker);
177			lt->region->nlockers--;
178			break;
179		case DB_LOCK_PUT_OBJ:
180
181			/* Look up the object in the hash table. */
182			HASHLOOKUP(lt->hashtab, __db_lockobj, links,
183			    list[i].obj, sh_obj, lt->region->table_size,
184			    __lock_ohash, __lock_cmp);
185			if (sh_obj == NULL) {
186				ret = EINVAL;
187				break;
188			}
189			/*
190			 * Release waiters first, because they won't cause
191			 * anyone else to be awakened.  If we release the
192			 * lockers first, all the waiters get awakened
193			 * needlessly.
194			 */
195			for (lp = SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock);
196			    lp != NULL;
197			    lp = SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock)) {
198				lt->region->nreleases += lp->refcount;
199				__lock_remove_waiter(lt, sh_obj, lp,
200				    DB_LSTAT_NOGRANT);
201				__lock_checklocker(lt, lp, 1);
202			}
203
204			for (lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock);
205			    lp != NULL;
206			    lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock)) {
207
208				lt->region->nreleases += lp->refcount;
209				SH_LIST_REMOVE(lp, locker_links, __db_lock);
210				SH_TAILQ_REMOVE(&sh_obj->holders, lp, links,
211				    __db_lock);
212				lp->status = DB_LSTAT_FREE;
213				SH_TAILQ_INSERT_HEAD(&lt->region->free_locks,
214				    lp, links, __db_lock);
215			}
216
217			/* Now free the object. */
218			__lock_freeobj(lt, sh_obj);
219			break;
220#ifdef DEBUG
221		case DB_LOCK_DUMP:
222			/* Find the locker. */
223			if ((ret = __lock_getobj(lt, locker,
224			    NULL, DB_LOCK_LOCKER, &sh_locker)) != 0)
225				break;
226
227			for (lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock);
228			    lp != NULL;
229			    lp = SH_LIST_NEXT(lp, locker_links, __db_lock)) {
230				__lock_printlock(lt, lp, 1);
231				ret = EINVAL;
232			}
233			if (ret == 0) {
234				__lock_freeobj(lt, sh_locker);
235				lt->region->nlockers--;
236			}
237			break;
238#endif
239		default:
240			ret = EINVAL;
241			break;
242		}
243	}
244
245	if (lt->region->need_dd && lt->region->detect != DB_LOCK_NORUN) {
246		run_dd = 1;
247		lt->region->need_dd = 0;
248	} else
249		run_dd = 0;
250
251	UNLOCK_LOCKREGION(lt);
252
253	if (ret == 0 && run_dd)
254		lock_detect(lt, 0, lt->region->detect);
255
256	if (elistp && ret != 0)
257		*elistp = &list[i - 1];
258	return (ret);
259}
260
261int
262lock_get(lt, locker, flags, obj, lock_mode, lock)
263	DB_LOCKTAB *lt;
264	u_int32_t locker, flags;
265	const DBT *obj;
266	db_lockmode_t lock_mode;
267	DB_LOCK *lock;
268{
269	struct __db_lock *lockp;
270	int ret;
271
272	LOCK_PANIC_CHECK(lt);
273
274	/* Validate arguments. */
275	if ((ret = __db_fchk(lt->dbenv,
276	    "lock_get", flags, DB_LOCK_NOWAIT | DB_LOCK_UPGRADE)) != 0)
277		return (ret);
278
279	LOCK_LOCKREGION(lt);
280
281	if ((ret = __lock_validate_region(lt)) == 0) {
282		if (LF_ISSET(DB_LOCK_UPGRADE))
283			lockp = OFFSET_TO_LOCK(lt, *lock);
284
285		if ((ret = __lock_get_internal(lt,
286		    locker, NULL, flags, obj, lock_mode, &lockp)) == 0) {
287			if (!LF_ISSET(DB_LOCK_UPGRADE))
288				*lock = LOCK_TO_OFFSET(lt, lockp);
289			lt->region->nrequests++;
290		}
291	}
292
293	UNLOCK_LOCKREGION(lt);
294	return (ret);
295}
296
297int
298lock_tget(lt, txn, flags, obj, lock_mode, lock)
299	DB_LOCKTAB *lt;
300	DB_TXN *txn;
301	u_int32_t flags;
302	const DBT *obj;
303	db_lockmode_t lock_mode;
304	DB_LOCK *lock;
305{
306	struct __db_lock *lockp;
307	int ret;
308
309	LOCK_PANIC_CHECK(lt);
310
311	/* Validate arguments. */
312	if ((ret = __db_fchk(lt->dbenv,
313	    "lock_get", flags, DB_LOCK_NOWAIT | DB_LOCK_UPGRADE)) != 0)
314		return (ret);
315
316	LOCK_LOCKREGION(lt);
317
318	if ((ret = __lock_validate_region(lt)) == 0) {
319		if (LF_ISSET(DB_LOCK_UPGRADE))
320			lockp = OFFSET_TO_LOCK(lt, *lock);
321
322		if ((ret = __lock_get_internal(lt,
323		    txn->txnid, txn, flags, obj, lock_mode, &lockp)) == 0) {
324			if (!LF_ISSET(DB_LOCK_UPGRADE))
325				*lock = LOCK_TO_OFFSET(lt, lockp);
326			lt->region->nrequests++;
327		}
328	}
329
330	UNLOCK_LOCKREGION(lt);
331	return (ret);
332}
333int
334lock_put(lt, lock)
335	DB_LOCKTAB *lt;
336	DB_LOCK lock;
337{
338	struct __db_lock *lockp;
339	int ret, run_dd;
340
341	LOCK_PANIC_CHECK(lt);
342
343	LOCK_LOCKREGION(lt);
344
345	if ((ret = __lock_validate_region(lt)) != 0)
346		return (ret);
347	else {
348		lockp = OFFSET_TO_LOCK(lt, lock);
349		ret = __lock_put_internal(lt, lockp, 0);
350	}
351
352	__lock_checklocker(lt, lockp, 0);
353
354	if (lt->region->need_dd && lt->region->detect != DB_LOCK_NORUN) {
355		run_dd = 1;
356		lt->region->need_dd = 0;
357	} else
358		run_dd = 0;
359
360	UNLOCK_LOCKREGION(lt);
361
362	if (ret == 0 && run_dd)
363		lock_detect(lt, 0, lt->region->detect);
364
365	return (ret);
366}
367
368static int
369__lock_put_internal(lt, lockp, do_all)
370	DB_LOCKTAB *lt;
371	struct __db_lock *lockp;
372	int do_all;
373{
374	DB_LOCKOBJ *sh_obj;
375	int state_changed;
376
377	if (lockp->refcount == 0 || (lockp->status != DB_LSTAT_HELD &&
378	    lockp->status != DB_LSTAT_WAITING) || lockp->obj == 0) {
379		__db_err(lt->dbenv, "lock_put: invalid lock %lu",
380		    (u_long)((u_int8_t *)lockp - (u_int8_t *)lt->region));
381		return (EINVAL);
382	}
383
384	if (do_all)
385		lt->region->nreleases += lockp->refcount;
386	else
387		lt->region->nreleases++;
388	if (do_all == 0 && lockp->refcount > 1) {
389		lockp->refcount--;
390		return (0);
391	}
392
393	/* Get the object associated with this lock. */
394	sh_obj = (DB_LOCKOBJ *)((u_int8_t *)lockp + lockp->obj);
395
396	/* Remove lock from locker list. */
397	SH_LIST_REMOVE(lockp, locker_links, __db_lock);
398
399	/* Remove this lock from its holders/waitlist. */
400	if (lockp->status != DB_LSTAT_HELD)
401		__lock_remove_waiter(lt, sh_obj, lockp, DB_LSTAT_FREE);
402	else
403		SH_TAILQ_REMOVE(&sh_obj->holders, lockp, links, __db_lock);
404
405	state_changed = __lock_promote(lt, sh_obj);
406
407	/* Check if object should be reclaimed. */
408	if (SH_TAILQ_FIRST(&sh_obj->holders, __db_lock) == NULL) {
409		HASHREMOVE_EL(lt->hashtab, __db_lockobj,
410		    links, sh_obj, lt->region->table_size, __lock_lhash);
411		if (sh_obj->lockobj.size > sizeof(sh_obj->objdata))
412			__db_shalloc_free(lt->mem,
413			    SH_DBT_PTR(&sh_obj->lockobj));
414		SH_TAILQ_INSERT_HEAD(&lt->region->free_objs, sh_obj, links,
415		    __db_lockobj);
416		state_changed = 1;
417	}
418
419	/* Free lock. */
420	lockp->status = DB_LSTAT_FREE;
421	SH_TAILQ_INSERT_HEAD(&lt->region->free_locks, lockp, links, __db_lock);
422
423	/*
424	 * If we did not promote anyone; we need to run the deadlock
425	 * detector again.
426	 */
427	if (state_changed == 0)
428		lt->region->need_dd = 1;
429
430	return (0);
431}
432
433static int
434__lock_get_internal(lt, locker, txn, flags, obj, lock_mode, lockp)
435	DB_LOCKTAB *lt;
436	u_int32_t locker, flags;
437	DB_TXN *txn;
438	const DBT *obj;
439	db_lockmode_t lock_mode;
440	struct __db_lock **lockp;
441{
442	struct __db_lock *newl, *lp;
443	DB_LOCKOBJ *sh_obj, *sh_locker;
444	DB_LOCKREGION *lrp;
445	size_t newl_off;
446	int ihold, no_dd, ret;
447
448	no_dd = ret = 0;
449
450	/*
451	 * Check that lock mode is valid.
452	 */
453	lrp = lt->region;
454	if ((u_int32_t)lock_mode >= lrp->nmodes) {
455		__db_err(lt->dbenv,
456		    "lock_get: invalid lock mode %lu\n", (u_long)lock_mode);
457		return (EINVAL);
458	}
459
460	/* Allocate a new lock.  Optimize for the common case of a grant. */
461	if ((newl = SH_TAILQ_FIRST(&lrp->free_locks, __db_lock)) == NULL) {
462		if ((ret = __lock_grow_region(lt, DB_LOCK_LOCK, 0)) != 0)
463			return (ret);
464		lrp = lt->region;
465		newl = SH_TAILQ_FIRST(&lrp->free_locks, __db_lock);
466	}
467	newl_off = LOCK_TO_OFFSET(lt, newl);
468
469	/* Optimize for common case of granting a lock. */
470	SH_TAILQ_REMOVE(&lrp->free_locks, newl, links, __db_lock);
471
472	newl->mode = lock_mode;
473	newl->status = DB_LSTAT_HELD;
474	newl->holder = locker;
475	newl->refcount = 1;
476
477	if ((ret = __lock_getobj(lt, 0, obj, DB_LOCK_OBJTYPE, &sh_obj)) != 0)
478		return (ret);
479
480	lrp = lt->region;			/* getobj might have grown */
481	newl = OFFSET_TO_LOCK(lt, newl_off);
482
483	/* Now make new lock point to object */
484	newl->obj = SH_PTR_TO_OFF(newl, sh_obj);
485
486	/*
487	 * Now we have a lock and an object and we need to see if we should
488	 * grant the lock.  We use a FIFO ordering so we can only grant a
489	 * new lock if it does not conflict with anyone on the holders list
490	 * OR anyone on the waiters list.  The reason that we don't grant if
491	 * there's a conflict is that this can lead to starvation (a writer
492	 * waiting on a popularly read item will never be granted).  The
493	 * downside of this is that a waiting reader can prevent an upgrade
494	 * from reader to writer, which is not uncommon.
495	 *
496	 * There is one exception to the no-conflict rule.  If a lock is held
497	 * by the requesting locker AND the new lock does not conflict with
498	 * any other holders, then we grant the lock.  The most common place
499	 * this happens is when the holder has a WRITE lock and a READ lock
500	 * request comes in for the same locker.  If we do not grant the read
501	 * lock, then we guarantee deadlock.
502	 *
503	 * In case of conflict, we put the new lock on the end of the waiters
504	 * list, unless we are upgrading in which case the locker goes on the
505	 * front of the list.
506	 */
507	ihold = 0;
508	for (lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock);
509	    lp != NULL;
510	    lp = SH_TAILQ_NEXT(lp, links, __db_lock)) {
511		if (locker == lp->holder ||
512		    __lock_is_parent(lp->holder, txn)) {
513			if (lp->mode == lock_mode &&
514			    lp->status == DB_LSTAT_HELD) {
515				if (LF_ISSET(DB_LOCK_UPGRADE))
516					goto upgrade;
517
518				/*
519				 * Lock is held, so we can increment the
520				 * reference count and return this lock.
521				 */
522				lp->refcount++;
523				*lockp = lp;
524				SH_TAILQ_INSERT_HEAD(&lrp->free_locks,
525				    newl, links, __db_lock);
526				return (0);
527			} else
528				ihold = 1;
529		} else if (CONFLICTS(lt, lp->mode, lock_mode))
530			break;
531    	}
532
533	/*
534	 * If we are upgrading, then there are two scenarios.  Either
535	 * we had no conflicts, so we can do the upgrade.  Or, there
536	 * is a conflict and we should wait at the HEAD of the waiters
537	 * list.
538	 */
539	if (LF_ISSET(DB_LOCK_UPGRADE)) {
540		if (lp == NULL)
541			goto upgrade;
542
543		/* There was a conflict, wait. */
544		SH_TAILQ_INSERT_HEAD(&sh_obj->waiters, newl, links, __db_lock);
545		goto wait;
546	}
547
548	if (lp == NULL && !ihold)
549		for (lp = SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock);
550		    lp != NULL;
551		    lp = SH_TAILQ_NEXT(lp, links, __db_lock)) {
552			if (CONFLICTS(lt, lp->mode, lock_mode) &&
553			    locker != lp->holder)
554				break;
555		}
556	if (lp == NULL)
557		SH_TAILQ_INSERT_TAIL(&sh_obj->holders, newl, links);
558	else if (!(flags & DB_LOCK_NOWAIT))
559		SH_TAILQ_INSERT_TAIL(&sh_obj->waiters, newl, links);
560	else {
561		/* Free the lock and return an error. */
562		newl->status = DB_LSTAT_FREE;
563		SH_TAILQ_INSERT_HEAD(&lrp->free_locks, newl, links, __db_lock);
564		return (DB_LOCK_NOTGRANTED);
565	}
566
567	/*
568	 * Now, insert the lock onto its locker's list.  If the locker does
569	 * not currently hold any locks, there's no reason to run a deadlock
570	 * detector, save that information.
571	 */
572	if ((ret =
573	    __lock_getobj(lt, locker, NULL, DB_LOCK_LOCKER, &sh_locker)) != 0)
574		return (ret);
575	no_dd = SH_LIST_FIRST(&sh_locker->heldby, __db_lock) == NULL;
576
577	lrp = lt->region;
578	SH_LIST_INSERT_HEAD(&sh_locker->heldby, newl, locker_links, __db_lock);
579
580	if (lp != NULL) {
581		/*
582		 * This is really a blocker for the process, so initialize it
583		 * set.  That way the current process will block when it tries
584		 * to get it and the waking process will release it.
585		 */
586wait:		(void)__db_mutex_init(&newl->mutex,
587		    MUTEX_LOCK_OFFSET(lt->region, &newl->mutex));
588		(void)__db_mutex_lock(&newl->mutex, lt->reginfo.fd);
589
590		newl->status = DB_LSTAT_WAITING;
591		lrp->nconflicts++;
592
593		/*
594		 * We are about to wait; must release the region mutex.  Then,
595		 * when we wakeup, we need to reacquire the region mutex before
596		 * continuing.
597		 */
598		if (lrp->detect == DB_LOCK_NORUN)
599			lt->region->need_dd = 1;
600		UNLOCK_LOCKREGION(lt);
601
602		/*
603		 * We are about to wait; before waiting, see if the deadlock
604		 * detector should be run.
605		 */
606		if (lrp->detect != DB_LOCK_NORUN && !no_dd)
607			(void)lock_detect(lt, 0, lrp->detect);
608
609		(void)__db_mutex_lock(&newl->mutex, lt->reginfo.fd);
610
611		LOCK_LOCKREGION(lt);
612		if (newl->status != DB_LSTAT_PENDING) {
613			/*
614			 * If this lock errored due to a deadlock, then
615			 * we have waiters that require promotion.
616			 */
617			if (newl->status == DB_LSTAT_ABORTED)
618				(void)__lock_promote(lt, sh_obj);
619			/* Return to free list. */
620			__lock_checklocker(lt, newl, 0);
621			SH_TAILQ_INSERT_HEAD(&lrp->free_locks, newl, links,
622			    __db_lock);
623			switch (newl->status) {
624				case DB_LSTAT_ABORTED:
625					ret = DB_LOCK_DEADLOCK;
626					break;
627				case DB_LSTAT_NOGRANT:
628					ret = DB_LOCK_NOTGRANTED;
629					break;
630				default:
631					ret = EINVAL;
632					break;
633			}
634			newl->status = DB_LSTAT_FREE;
635			newl = NULL;
636		} else if (LF_ISSET(DB_LOCK_UPGRADE)) {
637			/*
638			 * The lock that was just granted got put on the
639			 * holders list.  Since we're upgrading some other
640			 * lock, we've got to remove it here.
641			 */
642			SH_TAILQ_REMOVE(&sh_obj->holders,
643			    newl, links, __db_lock);
644			goto upgrade;
645		} else
646			newl->status = DB_LSTAT_HELD;
647	}
648
649	*lockp = newl;
650	return (ret);
651
652upgrade:
653	/*
654	 * This was an upgrade, so return the new lock to the free list and
655	 * upgrade the mode.
656	 */
657	(*lockp)->mode = lock_mode;
658	newl->status = DB_LSTAT_FREE;
659	SH_TAILQ_INSERT_HEAD(&lrp->free_locks, newl, links, __db_lock);
660	return (0);
661}
662
663/*
664 * __lock_is_locked --
665 *
666 * PUBLIC: int __lock_is_locked
667 * PUBLIC:    __P((DB_LOCKTAB *, u_int32_t, DBT *, db_lockmode_t));
668 */
669int
670__lock_is_locked(lt, locker, dbt, mode)
671	DB_LOCKTAB *lt;
672	u_int32_t locker;
673	DBT *dbt;
674	db_lockmode_t mode;
675{
676	struct __db_lock *lp;
677	DB_LOCKOBJ *sh_obj;
678	DB_LOCKREGION *lrp;
679
680	lrp = lt->region;
681
682	/* Look up the object in the hash table. */
683	HASHLOOKUP(lt->hashtab, __db_lockobj, links,
684	    dbt, sh_obj, lrp->table_size, __lock_ohash, __lock_cmp);
685	if (sh_obj == NULL)
686		return (0);
687
688	for (lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock);
689	    lp != NULL;
690	    lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock)) {
691		if (lp->holder == locker && lp->mode == mode)
692			return (1);
693	}
694
695	return (0);
696}
697
698/*
699 * __lock_printlock --
700 *
701 * PUBLIC: void __lock_printlock __P((DB_LOCKTAB *, struct __db_lock *, int));
702 */
703void
704__lock_printlock(lt, lp, ispgno)
705	DB_LOCKTAB *lt;
706	struct __db_lock *lp;
707	int ispgno;
708{
709	DB_LOCKOBJ *lockobj;
710	db_pgno_t pgno;
711	size_t obj;
712	u_int8_t *ptr;
713	const char *mode, *status;
714
715	switch (lp->mode) {
716	case DB_LOCK_IREAD:
717		mode = "IREAD";
718		break;
719	case DB_LOCK_IWR:
720		mode = "IWR";
721		break;
722	case DB_LOCK_IWRITE:
723		mode = "IWRITE";
724		break;
725	case DB_LOCK_NG:
726		mode = "NG";
727		break;
728	case DB_LOCK_READ:
729		mode = "READ";
730		break;
731	case DB_LOCK_WRITE:
732		mode = "WRITE";
733		break;
734	default:
735		mode = "UNKNOWN";
736		break;
737	}
738	switch (lp->status) {
739	case DB_LSTAT_ABORTED:
740		status = "ABORT";
741		break;
742	case DB_LSTAT_ERR:
743		status = "ERROR";
744		break;
745	case DB_LSTAT_FREE:
746		status = "FREE";
747		break;
748	case DB_LSTAT_HELD:
749		status = "HELD";
750		break;
751	case DB_LSTAT_NOGRANT:
752		status = "NONE";
753		break;
754	case DB_LSTAT_WAITING:
755		status = "WAIT";
756		break;
757	case DB_LSTAT_PENDING:
758		status = "PENDING";
759		break;
760	default:
761		status = "UNKNOWN";
762		break;
763	}
764	printf("\t%lx\t%s\t%lu\t%s\t",
765	    (u_long)lp->holder, mode, (u_long)lp->refcount, status);
766
767	lockobj = (DB_LOCKOBJ *)((u_int8_t *)lp + lp->obj);
768	ptr = SH_DBT_PTR(&lockobj->lockobj);
769	if (ispgno) {
770		/* Assume this is a DBT lock. */
771		memcpy(&pgno, ptr, sizeof(db_pgno_t));
772		printf("page %lu\n", (u_long)pgno);
773	} else {
774		obj = (u_int8_t *)lp + lp->obj - (u_int8_t *)lt->region;
775		printf("0x%lx ", (u_long)obj);
776		__db_pr(ptr, lockobj->lockobj.size);
777		printf("\n");
778	}
779}
780
781/*
782 * PUBLIC: int __lock_getobj  __P((DB_LOCKTAB *,
783 * PUBLIC:     u_int32_t, const DBT *, u_int32_t type, DB_LOCKOBJ **));
784 */
785int
786__lock_getobj(lt, locker, dbt, type, objp)
787	DB_LOCKTAB *lt;
788	u_int32_t locker, type;
789	const DBT *dbt;
790	DB_LOCKOBJ **objp;
791{
792	DB_LOCKREGION *lrp;
793	DB_LOCKOBJ *sh_obj;
794	u_int32_t obj_size;
795	int ret;
796	void *p, *src;
797
798	lrp = lt->region;
799
800	/* Look up the object in the hash table. */
801	if (type == DB_LOCK_OBJTYPE) {
802		HASHLOOKUP(lt->hashtab, __db_lockobj, links, dbt, sh_obj,
803		    lrp->table_size, __lock_ohash, __lock_cmp);
804		obj_size = dbt->size;
805	} else {
806		HASHLOOKUP(lt->hashtab, __db_lockobj, links, locker,
807		    sh_obj, lrp->table_size, __lock_locker_hash,
808		    __lock_locker_cmp);
809		obj_size = sizeof(locker);
810	}
811
812	/*
813	 * If we found the object, then we can just return it.  If
814	 * we didn't find the object, then we need to create it.
815	 */
816	if (sh_obj == NULL) {
817		/* Create new object and then insert it into hash table. */
818		if ((sh_obj =
819		    SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj)) == NULL) {
820			if ((ret = __lock_grow_region(lt, DB_LOCK_OBJ, 0)) != 0)
821				return (ret);
822			lrp = lt->region;
823			sh_obj = SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj);
824		}
825
826		/*
827		 * If we can fit this object in the structure, do so instead
828		 * of shalloc-ing space for it.
829		 */
830		if (obj_size <= sizeof(sh_obj->objdata))
831			p = sh_obj->objdata;
832		else
833			if ((ret =
834			    __db_shalloc(lt->mem, obj_size, 0, &p)) != 0) {
835				if ((ret = __lock_grow_region(lt,
836				    DB_LOCK_MEM, obj_size)) != 0)
837					return (ret);
838				lrp = lt->region;
839				/* Reacquire the head of the list. */
840				sh_obj = SH_TAILQ_FIRST(&lrp->free_objs,
841				    __db_lockobj);
842				(void)__db_shalloc(lt->mem, obj_size, 0, &p);
843			}
844
845		src = type == DB_LOCK_OBJTYPE ? dbt->data : (void *)&locker;
846		memcpy(p, src, obj_size);
847
848		sh_obj->type = type;
849		SH_TAILQ_REMOVE(&lrp->free_objs, sh_obj, links, __db_lockobj);
850
851		SH_TAILQ_INIT(&sh_obj->waiters);
852		if (type == DB_LOCK_LOCKER)
853			SH_LIST_INIT(&sh_obj->heldby);
854		else
855			SH_TAILQ_INIT(&sh_obj->holders);
856		sh_obj->lockobj.size = obj_size;
857		sh_obj->lockobj.off = SH_PTR_TO_OFF(&sh_obj->lockobj, p);
858
859		HASHINSERT(lt->hashtab,
860		    __db_lockobj, links, sh_obj, lrp->table_size, __lock_lhash);
861
862		if (type == DB_LOCK_LOCKER)
863			lrp->nlockers++;
864	}
865
866	*objp = sh_obj;
867	return (0);
868}
869
870/*
871 * Any lock on the waitlist has a process waiting for it.  Therefore, we
872 * can't return the lock to the freelist immediately.  Instead, we can
873 * remove the lock from the list of waiters, set the status field of the
874 * lock, and then let the process waking up return the lock to the
875 * free list.
876 */
877static void
878__lock_remove_waiter(lt, sh_obj, lockp, status)
879	DB_LOCKTAB *lt;
880	DB_LOCKOBJ *sh_obj;
881	struct __db_lock *lockp;
882	db_status_t status;
883{
884	SH_TAILQ_REMOVE(&sh_obj->waiters, lockp, links, __db_lock);
885	lockp->status = status;
886
887	/* Wake whoever is waiting on this lock. */
888	(void)__db_mutex_unlock(&lockp->mutex, lt->reginfo.fd);
889}
890
891static void
892__lock_checklocker(lt, lockp, do_remove)
893	DB_LOCKTAB *lt;
894	struct __db_lock *lockp;
895	int do_remove;
896{
897	DB_LOCKOBJ *sh_locker;
898
899	if (do_remove)
900		SH_LIST_REMOVE(lockp, locker_links, __db_lock);
901
902	/* if the locker list is NULL, free up the object. */
903	if (__lock_getobj(lt, lockp->holder, NULL, DB_LOCK_LOCKER, &sh_locker)
904	    == 0 && SH_LIST_FIRST(&sh_locker->heldby, __db_lock) == NULL) {
905		__lock_freeobj(lt, sh_locker);
906		    lt->region->nlockers--;
907	}
908}
909
910static void
911__lock_freeobj(lt, obj)
912	DB_LOCKTAB *lt;
913	DB_LOCKOBJ *obj;
914{
915	HASHREMOVE_EL(lt->hashtab,
916	    __db_lockobj, links, obj, lt->region->table_size, __lock_lhash);
917	if (obj->lockobj.size > sizeof(obj->objdata))
918		__db_shalloc_free(lt->mem, SH_DBT_PTR(&obj->lockobj));
919	SH_TAILQ_INSERT_HEAD(&lt->region->free_objs, obj, links, __db_lockobj);
920}
921
922/*
923 * __lock_downgrade --
924 *	Used by the concurrent access product to downgrade write locks
925 * back to iwrite locks.
926 *
927 * PUBLIC: int __lock_downgrade __P((DB_LOCKTAB *,
928 * PUBLIC:     DB_LOCK, db_lockmode_t, u_int32_t));
929 */
930int
931__lock_downgrade(lt, lock, new_mode, flags)
932	DB_LOCKTAB *lt;
933	DB_LOCK lock;
934	db_lockmode_t new_mode;
935	u_int32_t flags;
936{
937	struct __db_lock *lockp;
938	DB_LOCKOBJ *obj;
939	int ret;
940
941	COMPQUIET(flags, 0);
942	LOCK_PANIC_CHECK(lt);
943	LOCK_LOCKREGION(lt);
944
945	if ((ret = __lock_validate_region(lt)) == 0) {
946		lockp = OFFSET_TO_LOCK(lt, lock);
947		lockp->mode = new_mode;
948
949		/* Get the object associated with this lock. */
950		obj = (DB_LOCKOBJ *)((u_int8_t *)lockp + lockp->obj);
951		(void)__lock_promote(lt, obj);
952		++lt->region->nreleases;
953	}
954
955	UNLOCK_LOCKREGION(lt);
956
957	return (ret);
958}
959
960/*
961 * __lock_promote --
962 *
963 * Look through the waiters and holders lists and decide which (if any)
964 * locks can be promoted.   Promote any that are eligible.
965 */
966static int
967__lock_promote(lt, obj)
968	DB_LOCKTAB *lt;
969	DB_LOCKOBJ *obj;
970{
971	struct __db_lock *lp_w, *lp_h, *next_waiter;
972	int state_changed, waiter_is_txn;
973
974	/*
975	 * We need to do lock promotion.  We also need to determine if
976	 * we're going to need to run the deadlock detector again.  If
977	 * we release locks, and there are waiters, but no one gets promoted,
978	 * then we haven't fundamentally changed the lockmgr state, so
979	 * we may still have a deadlock and we have to run again.  However,
980	 * if there were no waiters, or we actually promoted someone, then
981	 * we are OK and we don't have to run it immediately.
982	 *
983	 * During promotion, we look for state changes so we can return
984	 * this information to the caller.
985	 */
986	for (lp_w = SH_TAILQ_FIRST(&obj->waiters, __db_lock),
987	    state_changed = lp_w == NULL;
988	    lp_w != NULL;
989	    lp_w = next_waiter) {
990		waiter_is_txn = TXN_IS_HOLDING(lp_w);
991		next_waiter = SH_TAILQ_NEXT(lp_w, links, __db_lock);
992		for (lp_h = SH_TAILQ_FIRST(&obj->holders, __db_lock);
993		    lp_h != NULL;
994		    lp_h = SH_TAILQ_NEXT(lp_h, links, __db_lock)) {
995			if (CONFLICTS(lt, lp_h->mode, lp_w->mode) &&
996			    lp_h->holder != lp_w->holder &&
997			    !(waiter_is_txn &&
998			    TXN_IS_HOLDING(lp_h) &&
999			    __txn_is_ancestor(lt->dbenv->tx_info,
1000			        lp_h->txnoff, lp_w->txnoff)))
1001				break;
1002		}
1003		if (lp_h != NULL)	/* Found a conflict. */
1004			break;
1005
1006		/* No conflict, promote the waiting lock. */
1007		SH_TAILQ_REMOVE(&obj->waiters, lp_w, links, __db_lock);
1008		lp_w->status = DB_LSTAT_PENDING;
1009		SH_TAILQ_INSERT_TAIL(&obj->holders, lp_w, links);
1010
1011		/* Wake up waiter. */
1012		(void)__db_mutex_unlock(&lp_w->mutex, lt->reginfo.fd);
1013		state_changed = 1;
1014	}
1015
1016	return (state_changed);
1017}
1018
1019static int
1020__lock_is_parent(locker, txn)
1021	u_int32_t locker;
1022	DB_TXN *txn;
1023{
1024	DB_TXN *t;
1025
1026	if (txn == NULL)
1027		return (0);
1028
1029	for (t = txn->parent; t != NULL; t = t->parent)
1030		if (t->txnid == locker)
1031			return (1);
1032
1033	return (0);
1034}
1035