1 /*-
2  * See the file LICENSE for redistribution information.
3  *
4  * Copyright (c) 1996, 1997, 1998
5  *	Sleepycat Software.  All rights reserved.
6  */
7 
8 #include "config.h"
9 
10 #ifndef lint
11 static const char sccsid[] = "@(#)lock_deadlock.c	10.37 (Sleepycat) 10/4/98";
12 #endif /* not lint */
13 
14 #ifndef NO_SYSTEM_INCLUDES
15 #include <sys/types.h>
16 
17 #include <errno.h>
18 #include <string.h>
19 #endif
20 
21 #include "db_int.h"
22 #include "shqueue.h"
23 #include "db_shash.h"
24 #include "lock.h"
25 #include "common_ext.h"
26 
27 #define	ISSET_MAP(M, N)	(M[(N) / 32] & (1 << (N) % 32))
28 
29 #define	CLEAR_MAP(M, N) {						\
30 	u_int32_t __i;							\
31 	for (__i = 0; __i < (N); __i++)					\
32 		M[__i] = 0;						\
33 }
34 
35 #define	SET_MAP(M, B)	(M[(B) / 32] |= (1 << ((B) % 32)))
36 #define	CLR_MAP(M, B)	(M[(B) / 32] &= ~(1 << ((B) % 32)))
37 
38 #define	OR_MAP(D, S, N)	{						\
39 	u_int32_t __i;							\
40 	for (__i = 0; __i < (N); __i++)					\
41 		D[__i] |= S[__i];					\
42 }
43 #define	BAD_KILLID	0xffffffff
44 
45 typedef struct {
46 	int		valid;
47 	u_int32_t	id;
48 	DB_LOCK		last_lock;
49 	db_pgno_t	pgno;
50 } locker_info;
51 
52 static int  __dd_abort __P((DB_ENV *, locker_info *));
53 static int  __dd_build
54 	__P((DB_ENV *, u_int32_t **, u_int32_t *, locker_info **));
55 static u_int32_t
56 	   *__dd_find __P((u_int32_t *, locker_info *, u_int32_t));
57 
58 #ifdef DIAGNOSTIC
59 static void __dd_debug __P((DB_ENV *, locker_info *, u_int32_t *, u_int32_t));
60 #endif
61 
62 int
lock_detect(lt,flags,atype)63 lock_detect(lt, flags, atype)
64 	DB_LOCKTAB *lt;
65 	u_int32_t flags, atype;
66 {
67 	DB_ENV *dbenv;
68 	locker_info *idmap;
69 	u_int32_t *bitmap, *deadlock, i, killid, nentries, nlockers;
70 	int do_pass, ret;
71 
72 	LOCK_PANIC_CHECK(lt);
73 
74 	/* Validate arguments. */
75 	if ((ret =
76 	    __db_fchk(lt->dbenv, "lock_detect", flags, DB_LOCK_CONFLICT)) != 0)
77 		return (ret);
78 
79 	/* Check if a detector run is necessary. */
80 	dbenv = lt->dbenv;
81 	if (LF_ISSET(DB_LOCK_CONFLICT)) {
82 		/* Make a pass every time a lock waits. */
83 		LOCK_LOCKREGION(lt);
84 		do_pass = dbenv->lk_info->region->need_dd != 0;
85 		UNLOCK_LOCKREGION(lt);
86 
87 		if (!do_pass)
88 			return (0);
89 	}
90 
91 	/* Build the waits-for bitmap. */
92 	if ((ret = __dd_build(dbenv, &bitmap, &nlockers, &idmap)) != 0)
93 		return (ret);
94 
95 	if (nlockers == 0)
96 		return (0);
97 #ifdef DIAGNOSTIC
98 	if (dbenv->db_verbose != 0)
99 		__dd_debug(dbenv, idmap, bitmap, nlockers);
100 #endif
101 	/* Find a deadlock. */
102 	deadlock = __dd_find(bitmap, idmap, nlockers);
103 	nentries = ALIGN(nlockers, 32) / 32;
104 	killid = BAD_KILLID;
105 	if (deadlock != NULL) {
106 		/* Kill someone. */
107 		switch (atype) {
108 		case DB_LOCK_OLDEST:
109 			/*
110 			 * Find the first bit set in the current
111 			 * array and then look for a lower tid in
112 			 * the array.
113 			 */
114 			for (i = 0; i < nlockers; i++)
115 				if (ISSET_MAP(deadlock, i))
116 					killid = i;
117 
118 			if (killid == BAD_KILLID) {
119 				__db_err(dbenv,
120 				    "warning: could not find locker to abort");
121 				break;
122 			}
123 
124 			/*
125 			 * The oldest transaction has the lowest
126 			 * transaction id.
127 			 */
128 			for (i = killid + 1; i < nlockers; i++)
129 				if (ISSET_MAP(deadlock, i) &&
130 				    idmap[i].id < idmap[killid].id)
131 					killid = i;
132 			break;
133 		case DB_LOCK_DEFAULT:
134 		case DB_LOCK_RANDOM:
135 			/*
136 			 * We are trying to calculate the id of the
137 			 * locker whose entry is indicated by deadlock.
138 			 */
139 			killid = (deadlock - bitmap) / nentries;
140 			break;
141 		case DB_LOCK_YOUNGEST:
142 			/*
143 			 * Find the first bit set in the current
144 			 * array and then look for a lower tid in
145 			 * the array.
146 			 */
147 			for (i = 0; i < nlockers; i++)
148 				if (ISSET_MAP(deadlock, i))
149 					killid = i;
150 
151 			if (killid == BAD_KILLID) {
152 				__db_err(dbenv,
153 				    "warning: could not find locker to abort");
154 				break;
155 			}
156 			/*
157 			 * The youngest transaction has the highest
158 			 * transaction id.
159 			 */
160 			for (i = killid + 1; i < nlockers; i++)
161 				if (ISSET_MAP(deadlock, i) &&
162 				    idmap[i].id > idmap[killid].id)
163 					killid = i;
164 			break;
165 		default:
166 			killid = BAD_KILLID;
167 			ret = EINVAL;
168 		}
169 
170 		/* Kill the locker with lockid idmap[killid]. */
171 		if (dbenv->db_verbose != 0 && killid != BAD_KILLID)
172 			__db_err(dbenv, "Aborting locker %lx",
173 			    (u_long)idmap[killid].id);
174 
175 		if (killid != BAD_KILLID &&
176 		    (ret = __dd_abort(dbenv, &idmap[killid])) != 0)
177 			__db_err(dbenv,
178 			    "warning: unable to abort locker %lx",
179 			    (u_long)idmap[killid].id);
180 	}
181 	__os_free(bitmap, 0);
182 	__os_free(idmap, 0);
183 
184 	return (ret);
185 }
186 
187 /*
188  * ========================================================================
189  * Utilities
190  */
191 static int
__dd_build(dbenv,bmp,nlockers,idmap)192 __dd_build(dbenv, bmp, nlockers, idmap)
193 	DB_ENV *dbenv;
194 	u_int32_t **bmp, *nlockers;
195 	locker_info **idmap;
196 {
197 	struct __db_lock *lp;
198 	DB_LOCKTAB *lt;
199 	DB_LOCKOBJ *op, *lo, *lockerp;
200 	u_int8_t *pptr;
201 	locker_info *id_array;
202 	u_int32_t *bitmap, count, *entryp, i, id, nentries, *tmpmap;
203 	int is_first, ret;
204 
205 	lt = dbenv->lk_info;
206 
207 	/*
208 	 * We'll check how many lockers there are, add a few more in for
209 	 * good measure and then allocate all the structures.  Then we'll
210 	 * verify that we have enough room when we go back in and get the
211 	 * mutex the second time.
212 	 */
213 	LOCK_LOCKREGION(lt);
214 retry:	count = lt->region->nlockers;
215 	lt->region->need_dd = 0;
216 	UNLOCK_LOCKREGION(lt);
217 
218 	if (count == 0) {
219 		*nlockers = 0;
220 		return (0);
221 	}
222 
223 	if (dbenv->db_verbose)
224 		__db_err(dbenv, "%lu lockers", (u_long)count);
225 
226 	count += 10;
227 	nentries = ALIGN(count, 32) / 32;
228 	/*
229 	 * Allocate enough space for a count by count bitmap matrix.
230 	 *
231 	 * XXX
232 	 * We can probably save the malloc's between iterations just
233 	 * reallocing if necessary because count grew by too much.
234 	 */
235 	if ((ret = __os_calloc((size_t)count,
236 	    sizeof(u_int32_t) * nentries, &bitmap)) != 0)
237 		return (ret);
238 
239 	if ((ret = __os_calloc(sizeof(u_int32_t), nentries, &tmpmap)) != 0) {
240 		__os_free(bitmap, sizeof(u_int32_t) * nentries);
241 		return (ret);
242 	}
243 
244 	if ((ret =
245 	    __os_calloc((size_t)count, sizeof(locker_info), &id_array)) != 0) {
246 		__os_free(bitmap, count * sizeof(u_int32_t) * nentries);
247 		__os_free(tmpmap, sizeof(u_int32_t) * nentries);
248 		return (ret);
249 	}
250 
251 	/*
252 	 * Now go back in and actually fill in the matrix.
253 	 */
254 	LOCK_LOCKREGION(lt);
255 	if (lt->region->nlockers > count) {
256 		__os_free(bitmap, count * sizeof(u_int32_t) * nentries);
257 		__os_free(tmpmap, sizeof(u_int32_t) * nentries);
258 		__os_free(id_array, count * sizeof(locker_info));
259 		goto retry;
260 	}
261 
262 	/*
263 	 * First we go through and assign each locker a deadlock detector id.
264 	 * Note that we fill in the idmap in the next loop since that's the
265 	 * only place where we conveniently have both the deadlock id and the
266 	 * actual locker.
267 	 */
268 	for (id = 0, i = 0; i < lt->region->table_size; i++)
269 		for (op = SH_TAILQ_FIRST(&lt->hashtab[i], __db_lockobj);
270 		    op != NULL; op = SH_TAILQ_NEXT(op, links, __db_lockobj))
271 			if (op->type == DB_LOCK_LOCKER)
272 				op->dd_id = id++;
273 	/*
274 	 * We go through the hash table and find each object.  For each object,
275 	 * we traverse the waiters list and add an entry in the waitsfor matrix
276 	 * for each waiter/holder combination.
277 	 */
278 	for (i = 0; i < lt->region->table_size; i++) {
279 		for (op = SH_TAILQ_FIRST(&lt->hashtab[i], __db_lockobj);
280 		    op != NULL; op = SH_TAILQ_NEXT(op, links, __db_lockobj)) {
281 			if (op->type != DB_LOCK_OBJTYPE)
282 				continue;
283 			CLEAR_MAP(tmpmap, nentries);
284 
285 			/*
286 			 * First we go through and create a bit map that
287 			 * represents all the holders of this object.
288 			 */
289 			for (lp = SH_TAILQ_FIRST(&op->holders, __db_lock);
290 			    lp != NULL;
291 			    lp = SH_TAILQ_NEXT(lp, links, __db_lock)) {
292 				if (__lock_getobj(lt, lp->holder,
293 				    NULL, DB_LOCK_LOCKER, &lockerp) != 0) {
294 					__db_err(dbenv,
295 					    "warning unable to find object");
296 					continue;
297 				}
298 				id_array[lockerp->dd_id].id = lp->holder;
299 				id_array[lockerp->dd_id].valid = 1;
300 
301 				/*
302 				 * If the holder has already been aborted, then
303 				 * we should ignore it for now.
304 				 */
305 				if (lp->status == DB_LSTAT_HELD)
306 					SET_MAP(tmpmap, lockerp->dd_id);
307 			}
308 
309 			/*
310 			 * Next, for each waiter, we set its row in the matrix
311 			 * equal to the map of holders we set up above.
312 			 */
313 			for (is_first = 1,
314 			    lp = SH_TAILQ_FIRST(&op->waiters, __db_lock);
315 			    lp != NULL;
316 			    is_first = 0,
317 			    lp = SH_TAILQ_NEXT(lp, links, __db_lock)) {
318 				if (__lock_getobj(lt, lp->holder,
319 				    NULL, DB_LOCK_LOCKER, &lockerp) != 0) {
320 					__db_err(dbenv,
321 					    "warning unable to find object");
322 					continue;
323 				}
324 				id_array[lockerp->dd_id].id = lp->holder;
325 				id_array[lockerp->dd_id].valid = 1;
326 
327 				/*
328 				 * If the transaction is pending abortion, then
329 				 * ignore it on this iteration.
330 				 */
331 				if (lp->status != DB_LSTAT_WAITING)
332 					continue;
333 
334 				entryp = bitmap + (nentries * lockerp->dd_id);
335 				OR_MAP(entryp, tmpmap, nentries);
336 				/*
337 				 * If this is the first waiter on the queue,
338 				 * then we remove the waitsfor relationship
339 				 * with oneself.  However, if it's anywhere
340 				 * else on the queue, then we have to keep
341 				 * it and we have an automatic deadlock.
342 				 */
343 				if (is_first)
344 					CLR_MAP(entryp, lockerp->dd_id);
345 			}
346 		}
347 	}
348 
349 	/* Now for each locker; record its last lock. */
350 	for (id = 0; id < count; id++) {
351 		if (!id_array[id].valid)
352 			continue;
353 		if (__lock_getobj(lt,
354 		    id_array[id].id, NULL, DB_LOCK_LOCKER, &lockerp) != 0) {
355 			__db_err(dbenv,
356 			    "No locks for locker %lu", (u_long)id_array[id].id);
357 			continue;
358 		}
359 		lp = SH_LIST_FIRST(&lockerp->heldby, __db_lock);
360 		if (lp != NULL) {
361 			id_array[id].last_lock = LOCK_TO_OFFSET(lt, lp);
362 			lo = (DB_LOCKOBJ *)((u_int8_t *)lp + lp->obj);
363 			pptr = SH_DBT_PTR(&lo->lockobj);
364 			if (lo->lockobj.size >= sizeof(db_pgno_t))
365 				memcpy(&id_array[id].pgno, pptr,
366 				    sizeof(db_pgno_t));
367 			else
368 				id_array[id].pgno = 0;
369 		}
370 	}
371 
372 	/* Pass complete, reset the deadlock detector bit. */
373 	lt->region->need_dd = 0;
374 	UNLOCK_LOCKREGION(lt);
375 
376 	/*
377 	 * Now we can release everything except the bitmap matrix that we
378 	 * created.
379 	 */
380 	*nlockers = id;
381 	*idmap = id_array;
382 	*bmp = bitmap;
383 	__os_free(tmpmap, sizeof(u_int32_t) * nentries);
384 	return (0);
385 }
386 
387 static u_int32_t *
__dd_find(bmp,idmap,nlockers)388 __dd_find(bmp, idmap, nlockers)
389 	u_int32_t *bmp, nlockers;
390 	locker_info *idmap;
391 {
392 	u_int32_t i, j, nentries, *mymap, *tmpmap;
393 
394 	/*
395 	 * For each locker, OR in the bits from the lockers on which that
396 	 * locker is waiting.
397 	 */
398 	nentries = ALIGN(nlockers, 32) / 32;
399 	for (mymap = bmp, i = 0; i < nlockers; i++, mymap += nentries) {
400 		if (!idmap[i].valid)
401 			continue;
402 		for (j = 0; j < nlockers; j++) {
403 			if (ISSET_MAP(mymap, j)) {
404 				/* Find the map for this bit. */
405 				tmpmap = bmp + (nentries * j);
406 				OR_MAP(mymap, tmpmap, nentries);
407 				if (ISSET_MAP(mymap, i))
408 					return (mymap);
409 			}
410 		}
411 	}
412 	return (NULL);
413 }
414 
415 static int
__dd_abort(dbenv,info)416 __dd_abort(dbenv, info)
417 	DB_ENV *dbenv;
418 	locker_info *info;
419 {
420 	struct __db_lock *lockp;
421 	DB_LOCKTAB *lt;
422 	DB_LOCKOBJ *lockerp, *sh_obj;
423 	int ret;
424 
425 	lt = dbenv->lk_info;
426 	LOCK_LOCKREGION(lt);
427 
428 	/* Find the locker's last lock. */
429 	if ((ret =
430 	    __lock_getobj(lt, info->id, NULL, DB_LOCK_LOCKER, &lockerp)) != 0)
431 		goto out;
432 
433 	lockp = SH_LIST_FIRST(&lockerp->heldby, __db_lock);
434 
435 	/*
436 	 * It's possible that this locker was already aborted.
437 	 * If that's the case, make sure that we remove its
438 	 * locker from the hash table.
439 	 */
440 	if (lockp == NULL) {
441 		HASHREMOVE_EL(lt->hashtab, __db_lockobj,
442 		    links, lockerp, lt->region->table_size, __lock_lhash);
443 		SH_TAILQ_INSERT_HEAD(&lt->region->free_objs,
444 		    lockerp, links, __db_lockobj);
445 		lt->region->nlockers--;
446 		goto out;
447 	} else if (LOCK_TO_OFFSET(lt, lockp) != info->last_lock ||
448 	    lockp->status != DB_LSTAT_WAITING)
449 		goto out;
450 
451 	/* Abort lock, take it off list, and wake up this lock. */
452 	lockp->status = DB_LSTAT_ABORTED;
453 	lt->region->ndeadlocks++;
454 	SH_LIST_REMOVE(lockp, locker_links, __db_lock);
455 	sh_obj = (DB_LOCKOBJ *)((u_int8_t *)lockp + lockp->obj);
456 	SH_TAILQ_REMOVE(&sh_obj->waiters, lockp, links, __db_lock);
457         (void)__db_mutex_unlock(&lockp->mutex, lt->reginfo.fd);
458 
459 	ret = 0;
460 
461 out:	UNLOCK_LOCKREGION(lt);
462 	return (ret);
463 }
464 
465 #ifdef DIAGNOSTIC
466 static void
__dd_debug(dbenv,idmap,bitmap,nlockers)467 __dd_debug(dbenv, idmap, bitmap, nlockers)
468 	DB_ENV *dbenv;
469 	locker_info *idmap;
470 	u_int32_t *bitmap, nlockers;
471 {
472 	u_int32_t i, j, *mymap, nentries;
473 	int ret;
474 	char *msgbuf;
475 
476 	__db_err(dbenv, "Waitsfor array");
477 	__db_err(dbenv, "waiter\twaiting on");
478 
479 	/* Allocate space to print 10 bytes per item waited on. */
480 #undef	MSGBUF_LEN
481 #define	MSGBUF_LEN ((nlockers + 1) * 10 + 64)
482 	if ((ret = __os_malloc(MSGBUF_LEN, NULL, &msgbuf)) != 0)
483 		return;
484 
485 	nentries = ALIGN(nlockers, 32) / 32;
486 	for (mymap = bitmap, i = 0; i < nlockers; i++, mymap += nentries) {
487 		if (!idmap[i].valid)
488 			continue;
489 		sprintf(msgbuf,					/* Waiter. */
490 		    "%lx/%lu:\t", (u_long)idmap[i].id, (u_long)idmap[i].pgno);
491 		for (j = 0; j < nlockers; j++)
492 			if (ISSET_MAP(mymap, j))
493 				sprintf(msgbuf, "%s %lx", msgbuf,
494 				    (u_long)idmap[j].id);
495 		(void)sprintf(msgbuf,
496 		    "%s %lu", msgbuf, (u_long)idmap[i].last_lock);
497 		__db_err(dbenv, msgbuf);
498 	}
499 
500 	__os_free(msgbuf, MSGBUF_LEN);
501 }
502 #endif
503