1 /*-
2  * See the file LICENSE for redistribution information.
3  *
4  * Copyright (c) 1996, 1997, 1998
5  *	Sleepycat Software.  All rights reserved.
6  */
7 /*
8  * Copyright (c) 1995, 1996
9  *	The President and Fellows of Harvard University.  All rights reserved.
10  *
11  * This code is derived from software contributed to Berkeley by
12  * Margo Seltzer.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  * 3. All advertising materials mentioning features or use of this software
23  *    must display the following acknowledgement:
24  *	This product includes software developed by the University of
25  *	California, Berkeley and its contributors.
26  * 4. Neither the name of the University nor the names of its contributors
27  *    may be used to endorse or promote products derived from this software
28  *    without specific prior written permission.
29  *
30  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40  * SUCH DAMAGE.
41  */
42 
43 #include "config.h"
44 
45 #ifndef lint
46 static const char sccsid[] = "@(#)db_dispatch.c	10.20 (Sleepycat) 10/10/98";
47 #endif /* not lint */
48 
49 #ifndef NO_SYSTEM_INCLUDES
50 #include <sys/types.h>
51 
52 #include <errno.h>
53 #include <shqueue.h>
54 #include <stddef.h>
55 #include <stdlib.h>
56 #include <string.h>
57 #endif
58 
59 #include "db_int.h"
60 #include "db_page.h"
61 #include "db_dispatch.h"
62 #include "db_am.h"
63 #include "common_ext.h"
64 #include "log_auto.h"
65 #include "txn.h"
66 #include "txn_auto.h"
67 
68 /*
69  * Data structures to manage the DB dispatch table.  The dispatch table
70  * is a dynamically allocated array of pointers to dispatch functions.
71  * The dispatch_size is the number of entries possible in the current
72  * dispatch table and the dispatch_valid is the number of valid entries
73  * in the dispatch table.
74  */
75 static int (**dispatch_table) __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
76 static u_int32_t dispatch_size = 0;
77 
78 /*
79  * __db_dispatch --
80  *
81  * This is the transaction dispatch function used by the db access methods.
82  * It is designed to handle the record format used by all the access
83  * methods (the one automatically generated by the db_{h,log,read}.sh
84  * scripts in the tools directory).  An application using a different
85  * recovery paradigm will supply a different dispatch function to txn_open.
86  *
87  * PUBLIC: int __db_dispatch __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
88  */
89 int
__db_dispatch(logp,db,lsnp,redo,info)90 __db_dispatch(logp, db, lsnp, redo, info)
91 	DB_LOG *logp;		/* The log file. */
92 	DBT *db;		/* The log record upon which to dispatch. */
93 	DB_LSN *lsnp;		/* The lsn of the record being dispatched. */
94 	int redo;		/* Redo this op (or undo it). */
95 	void *info;
96 {
97 	u_int32_t rectype, txnid;
98 
99 	memcpy(&rectype, db->data, sizeof(rectype));
100 	memcpy(&txnid, (u_int8_t *)db->data + sizeof(rectype), sizeof(txnid));
101 
102 	switch (redo) {
103 	case TXN_REDO:
104 	case TXN_UNDO:
105 		return ((dispatch_table[rectype])(logp, db, lsnp, redo, info));
106 	case TXN_OPENFILES:
107 		if (rectype < DB_txn_BEGIN )
108 			return ((dispatch_table[rectype])(logp,
109 			    db, lsnp, redo, info));
110 		break;
111 	case TXN_BACKWARD_ROLL:
112 		/*
113 		 * Running full recovery in the backward pass.  If we've
114 		 * seen this txnid before and added to it our commit list,
115 		 * then we do nothing during this pass.  If we've never
116 		 * seen it, then we call the appropriate recovery routine
117 		 * in "abort mode".
118 		 */
119 		if (rectype == DB_log_register || rectype == DB_txn_ckp ||
120 		    (__db_txnlist_find(info, txnid) == DB_NOTFOUND &&
121 		    txnid != 0))
122 			return ((dispatch_table[rectype])(logp,
123 			    db, lsnp, TXN_UNDO, info));
124 		break;
125 	case TXN_FORWARD_ROLL:
126 		/*
127 		 * In the forward pass, if we haven't seen the transaction,
128 		 * do nothing, else recovery it.
129 		 */
130 		if (rectype == DB_log_register || rectype == DB_txn_ckp ||
131 		    __db_txnlist_find(info, txnid) != DB_NOTFOUND)
132 			return ((dispatch_table[rectype])(logp,
133 			    db, lsnp, TXN_REDO, info));
134 		break;
135 	default:
136 		abort();
137 	}
138 	return (0);
139 }
140 
141 /*
142  * __db_add_recovery --
143  *
144  * PUBLIC: int __db_add_recovery __P((DB_ENV *,
145  * PUBLIC:    int (*)(DB_LOG *, DBT *, DB_LSN *, int, void *), u_int32_t));
146  */
147 int
__db_add_recovery(dbenv,func,ndx)148 __db_add_recovery(dbenv, func, ndx)
149 	DB_ENV *dbenv;
150 	int (*func) __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
151 	u_int32_t ndx;
152 {
153 	u_int32_t i;
154 	int ret;
155 
156 	COMPQUIET(dbenv, NULL);		/* !!!: not currently used. */
157 
158 	/* Check if we have to grow the table. */
159 	if (ndx >= dispatch_size) {
160 		if ((ret = __os_realloc(&dispatch_table,
161 		    (DB_user_BEGIN + dispatch_size) *
162 		    sizeof(dispatch_table[0]))) != 0)
163 			return (ret);
164 		for (i = dispatch_size,
165 		    dispatch_size += DB_user_BEGIN; i < dispatch_size; ++i)
166 			dispatch_table[i] = NULL;
167 	}
168 
169 	dispatch_table[ndx] = func;
170 	return (0);
171 }
172 
173 /*
174  * __db_txnlist_init --
175  *	Initialize transaction linked list.
176  *
177  * PUBLIC: int __db_txnlist_init __P((void *));
178  */
179 int
__db_txnlist_init(retp)180 __db_txnlist_init(retp)
181 	void *retp;
182 {
183 	DB_TXNHEAD *headp;
184 	int ret;
185 
186 	if ((ret = __os_malloc(sizeof(DB_TXNHEAD), NULL, &headp)) != 0)
187 		return (ret);
188 
189 	LIST_INIT(&headp->head);
190 	headp->maxid = 0;
191 	headp->generation = 1;
192 
193 	*(void **)retp = headp;
194 	return (0);
195 }
196 
197 /*
198  * __db_txnlist_add --
199  *	Add an element to our transaction linked list.
200  *
201  * PUBLIC: int __db_txnlist_add __P((void *, u_int32_t));
202  */
203 int
__db_txnlist_add(listp,txnid)204 __db_txnlist_add(listp, txnid)
205 	void *listp;
206 	u_int32_t txnid;
207 {
208 	DB_TXNHEAD *hp;
209 	DB_TXNLIST *elp;
210 	int ret;
211 
212 	if ((ret = __os_malloc(sizeof(DB_TXNLIST), NULL, &elp)) != 0)
213 		return (ret);
214 
215 	elp->txnid = txnid;
216 	hp = (DB_TXNHEAD *)listp;
217 	LIST_INSERT_HEAD(&hp->head, elp, links);
218 	if (txnid > hp->maxid)
219 		hp->maxid = txnid;
220 	elp->generation = hp->generation;
221 
222 	return (0);
223 }
224 
225 /*
226  * __db_txnlist_find --
227  *	Checks to see if a txnid with the current generation is in the
228  *	txnid list.
229  *
230  * PUBLIC: int __db_txnlist_find __P((void *, u_int32_t));
231  */
232 int
__db_txnlist_find(listp,txnid)233 __db_txnlist_find(listp, txnid)
234 	void *listp;
235 	u_int32_t txnid;
236 {
237 	DB_TXNHEAD *hp;
238 	DB_TXNLIST *p;
239 
240 	if ((hp = (DB_TXNHEAD *)listp) == NULL)
241 		return (DB_NOTFOUND);
242 
243 	for (p = hp->head.lh_first; p != NULL; p = p->links.le_next)
244 		if (p->txnid == txnid && hp->generation == p->generation)
245 			return (0);
246 
247 	return (DB_NOTFOUND);
248 }
249 
250 /*
251  * __db_txnlist_end --
252  *	Discard transaction linked list.
253  *
254  * PUBLIC: void __db_txnlist_end __P((void *));
255  */
256 void
__db_txnlist_end(listp)257 __db_txnlist_end(listp)
258 	void *listp;
259 {
260 	DB_TXNHEAD *hp;
261 	DB_TXNLIST *p;
262 
263 	hp = (DB_TXNHEAD *)listp;
264 	while ((p = LIST_FIRST(&hp->head)) != LIST_END(&hp->head)) {
265 		LIST_REMOVE(p, links);
266 		__os_free(p, 0);
267 	}
268 	__os_free(listp, sizeof(DB_TXNHEAD));
269 }
270 
271 /*
272  * __db_txnlist_gen --
273  *	Change the current generation number.
274  *
275  * PUBLIC: void __db_txnlist_gen __P((void *, int));
276  */
277 void
__db_txnlist_gen(listp,incr)278 __db_txnlist_gen(listp, incr)
279 	void *listp;
280 	int incr;
281 {
282 	DB_TXNHEAD *hp;
283 
284 	/*
285 	 * During recovery generation numbers keep track of how many "restart"
286 	 * checkpoints we've seen.  Restart checkpoints occur whenever we take
287 	 * a checkpoint and there are no outstanding transactions.  When that
288 	 * happens, we can reset transaction IDs back to 1.  It always happens
289 	 * at recovery and it prevents us from exhausting the transaction IDs
290 	 * name space.
291 	 */
292 	hp = (DB_TXNHEAD *)listp;
293 	hp->generation += incr;
294 }
295 
296 #ifdef DEBUG
297 /*
298  * __db_txnlist_print --
299  *	Print out the transaction list.
300  *
301  * PUBLIC: void __db_txnlist_print __P((void *));
302  */
303 void
__db_txnlist_print(listp)304 __db_txnlist_print(listp)
305 	void *listp;
306 {
307 	DB_TXNHEAD *hp;
308 	DB_TXNLIST *p;
309 
310 	hp = (DB_TXNHEAD *)listp;
311 	printf("Maxid: %lu Generation: %lu\n", (u_long)hp->maxid,
312 	    (u_long)hp->generation);
313 	for (p = hp->head.lh_first; p != NULL; p = p->links.le_next)
314 		printf("TXNID: %lu(%lu)\n", (u_long)p->txnid,
315 		(u_long)p->generation);
316 }
317 #endif
318