1/*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 1996, 1997, 1998
5 *	Sleepycat Software.  All rights reserved.
6 *
7 *	@(#)db_int.h	10.77 (Sleepycat) 1/3/99
8 */
9
10#ifndef _DB_INTERNAL_H_
11#define	_DB_INTERNAL_H_
12
13#include "db.h"				/* Standard DB include file. */
14#include "queue.h"
15#include "shqueue.h"
16
17/*******************************************************
18 * General purpose constants and macros.
19 *******************************************************/
20#define	UINT16_T_MAX	    0xffff	/* Maximum 16 bit unsigned. */
21#define	UINT32_T_MAX	0xffffffff	/* Maximum 32 bit unsigned. */
22
23#define	DB_MIN_PGSIZE	0x000200	/* Minimum page size. */
24#define	DB_MAX_PGSIZE	0x010000	/* Maximum page size. */
25
26#define	DB_MINCACHE	10		/* Minimum cached pages */
27
28#define	MEGABYTE	1048576
29
30/*
31 * If we are unable to determine the underlying filesystem block size, use
32 * 8K on the grounds that most OS's use less than 8K as their VM page size.
33 */
34#define	DB_DEF_IOSIZE	(8 * 1024)
35
36/*
37 * Aligning items to particular sizes or in pages or memory.  ALIGNP is a
38 * separate macro, as we've had to cast the pointer to different integral
39 * types on different architectures.
40 *
41 * We cast pointers into unsigned longs when manipulating them because C89
42 * guarantees that u_long is the largest available integral type and further,
43 * to never generate overflows.  However, neither C89 or C9X  requires that
44 * any integer type be large enough to hold a pointer, although C9X created
45 * the intptr_t type, which is guaranteed to hold a pointer but may or may
46 * not exist.  At some point in the future, we should test for intptr_t and
47 * use it where available.
48 */
49#undef	ALIGNTYPE
50#define	ALIGNTYPE		u_long
51#undef	ALIGNP
52#define	ALIGNP(value, bound)	ALIGN((ALIGNTYPE)value, bound)
53#undef	ALIGN
54#define	ALIGN(value, bound)	(((value) + (bound) - 1) & ~((bound) - 1))
55
56/*
57 * There are several on-page structures that are declared to have a number of
58 * fields followed by a variable length array of items.  The structure size
59 * without including the variable length array or the address of the first of
60 * those elements can be found using SSZ.
61 *
62 * This macro can also be used to find the offset of a structure element in a
63 * structure.  This is used in various places to copy structure elements from
64 * unaligned memory references, e.g., pointers into a packed page.
65 *
66 * There are two versions because compilers object if you take the address of
67 * an array.
68 */
69#undef	SSZ
70#define SSZ(name, field)	((int)&(((name *)0)->field))
71
72#undef	SSZA
73#define SSZA(name, field)	((int)&(((name *)0)->field[0]))
74
75/* Macros to return per-process address, offsets based on shared regions. */
76#define	R_ADDR(base, offset)	((void *)((u_int8_t *)((base)->addr) + offset))
77#define	R_OFFSET(base, p)	((u_int8_t *)(p) - (u_int8_t *)(base)->addr)
78
79#define	DB_DEFAULT	0x000000	/* No flag was specified. */
80
81/* Structure used to print flag values. */
82typedef struct __fn {
83	u_int32_t mask;			/* Flag value. */
84	const char *name;		/* Flag name. */
85} FN;
86
87/* Set, clear and test flags. */
88#define	F_SET(p, f)	(p)->flags |= (f)
89#define	F_CLR(p, f)	(p)->flags &= ~(f)
90#define	F_ISSET(p, f)	((p)->flags & (f))
91#define	LF_SET(f)	(flags |= (f))
92#define	LF_CLR(f)	(flags &= ~(f))
93#define	LF_ISSET(f)	(flags & (f))
94
95/*
96 * Panic check:
97 * All interfaces check the panic flag, if it's set, the tree is dead.
98 */
99#define	DB_PANIC_CHECK(dbp) {						\
100	if ((dbp)->dbenv != NULL && (dbp)->dbenv->db_panic != 0)	\
101		return (DB_RUNRECOVERY);				\
102}
103
104/* Display separator string. */
105#undef	DB_LINE
106#define	DB_LINE "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
107
108/* Unused, or not-used-yet variable.  "Shut that bloody compiler up!" */
109#define	COMPQUIET(n, v)	(n) = (v)
110
111/*
112 * Purify and similar run-time tools complain about unitialized reads/writes
113 * for structure fields whose only purpose is padding.
114 */
115#define	UMRW(v)		(v) = 0
116
117/*
118 * Win16 needs specific syntax on callback functions.  Nobody else cares.
119 */
120#ifndef	DB_CALLBACK
121#define	DB_CALLBACK	/* Nothing. */
122#endif
123
124/*******************************************************
125 * Files.
126 *******************************************************/
127 /*
128  * We use 1024 as the maximum path length.  It's too hard to figure out what
129  * the real path length is, as it was traditionally stored in <sys/param.h>,
130  * and that file isn't always available.
131  */
132#undef	MAXPATHLEN
133#define	MAXPATHLEN	1024
134
135#define	PATH_DOT	"."	/* Current working directory. */
136#define	PATH_SEPARATOR	"/"	/* Path separator character. */
137
138/*******************************************************
139 * Mutex support.
140 *******************************************************/
141#include <sys/machlock.h>
142typedef lock_t tsl_t;
143
144
145/*
146 * !!!
147 * Various systems require different alignments for mutexes (the worst we've
148 * seen so far is 16-bytes on some HP architectures).  The mutex (tsl_t) must
149 * be first in the db_mutex_t structure, which must itself be first in the
150 * region.  This ensures the alignment is as returned by mmap(2), which should
151 * be sufficient.  All other mutex users must ensure proper alignment locally.
152 */
153#define	MUTEX_ALIGNMENT	sizeof(int)
154
155/*
156 * The offset of a mutex in memory.
157 *
158 * !!!
159 * Not an off_t, so backing file offsets MUST be less than 4Gb.  See the
160 * off field of the db_mutex_t as well.
161 */
162#define	MUTEX_LOCK_OFFSET(a, b)	((u_int32_t)((u_int8_t *)b - (u_int8_t *)a))
163
164typedef struct _db_mutex_t {
165#ifdef HAVE_SPINLOCKS
166	tsl_t	  tsl_resource;		/* Resource test and set. */
167#ifdef DIAGNOSTIC
168	u_int32_t pid;			/* Lock holder: 0 or process pid. */
169#endif
170#else
171	u_int32_t off;			/* Backing file offset. */
172	u_int32_t pid;			/* Lock holder: 0 or process pid. */
173#endif
174	u_int32_t spins;		/* Spins before block. */
175	u_int32_t mutex_set_wait;	/* Granted after wait. */
176	u_int32_t mutex_set_nowait;	/* Granted without waiting. */
177} db_mutex_t;
178
179#include "mutex_ext.h"
180
181/*******************************************************
182 * Access methods.
183 *******************************************************/
184/* Lock/unlock a DB thread. */
185#define	DB_THREAD_LOCK(dbp)						\
186	if (F_ISSET(dbp, DB_AM_THREAD))					\
187	    (void)__db_mutex_lock((db_mutex_t *)(dbp)->mutexp, -1);
188#define	DB_THREAD_UNLOCK(dbp)						\
189	if (F_ISSET(dbp, DB_AM_THREAD))					\
190	    (void)__db_mutex_unlock((db_mutex_t *)(dbp)->mutexp, -1);
191
192/*******************************************************
193 * Environment.
194 *******************************************************/
195/* Type passed to __db_appname(). */
196typedef enum {
197	DB_APP_NONE=0,			/* No type (region). */
198	DB_APP_DATA,			/* Data file. */
199	DB_APP_LOG,			/* Log file. */
200	DB_APP_TMP			/* Temporary file. */
201} APPNAME;
202
203/*******************************************************
204 * Shared memory regions.
205 *******************************************************/
206/*
207 * The shared memory regions share an initial structure so that the general
208 * region code can handle races between the region being deleted and other
209 * processes waiting on the region mutex.
210 *
211 * !!!
212 * Note, the mutex must be the first entry in the region; see comment above.
213 */
214typedef struct _rlayout {
215	db_mutex_t lock;		/* Region mutex. */
216#define	DB_REGIONMAGIC	0x120897
217	u_int32_t  valid;		/* Valid magic number. */
218	u_int32_t  refcnt;		/* Region reference count. */
219	size_t	   size;		/* Region length. */
220	int	   majver;		/* Major version number. */
221	int	   minver;		/* Minor version number. */
222	int	   patch;		/* Patch version number. */
223	int	   panic;		/* Region is dead. */
224#define	INVALID_SEGID	-1
225	int	   segid;		/* shmget(2) ID, or Win16 segment ID. */
226
227#define	REGION_ANONYMOUS	0x01	/* Region is/should be in anon mem. */
228	u_int32_t  flags;
229} RLAYOUT;
230
231/*
232 * DB creates all regions on 4K boundaries out of sheer paranoia, so that
233 * we don't make the underlying VM unhappy.
234 */
235#define	DB_VMPAGESIZE	(4 * 1024)
236#define	DB_ROUNDOFF(n, round) {						\
237	(n) += (round) - 1;						\
238	(n) -= (n) % (round);						\
239}
240
241/*
242 * The interface to region attach is nasty, there is a lot of complex stuff
243 * going on, which has to be retained between create/attach and detach.  The
244 * REGINFO structure keeps track of it.
245 */
246struct __db_reginfo;	typedef struct __db_reginfo REGINFO;
247struct __db_reginfo {
248					/* Arguments. */
249	DB_ENV	   *dbenv;		/* Region naming info. */
250	APPNAME	    appname;		/* Region naming info. */
251	char	   *path;		/* Region naming info. */
252	const char *file;		/* Region naming info. */
253	int	    mode;		/* Region mode, if a file. */
254	size_t	    size;		/* Region size. */
255	u_int32_t   dbflags;		/* Region file open flags, if a file. */
256
257					/* Results. */
258	char	   *name;		/* Region name. */
259	void	   *addr;		/* Region address. */
260	int	    fd;			/* Fcntl(2) locking file descriptor.
261					   NB: this is only valid if a regular
262					   file is backing the shared region,
263					   and mmap(2) is being used to map it
264					   into our address space. */
265	int	    segid;		/* shmget(2) ID, or Win16 segment ID. */
266	void	   *wnt_handle;		/* Win/NT HANDLE. */
267
268					/* Shared flags. */
269/*				0x0001	COMMON MASK with RLAYOUT structure. */
270#define	REGION_CANGROW		0x0002	/* Can grow. */
271#define	REGION_CREATED		0x0004	/* Created. */
272#define	REGION_HOLDINGSYS	0x0008	/* Holding system resources. */
273#define	REGION_LASTDETACH	0x0010	/* Delete on last detach. */
274#define	REGION_MALLOC		0x0020	/* Created in malloc'd memory. */
275#define	REGION_PRIVATE		0x0040	/* Private to thread/process. */
276#define	REGION_REMOVED		0x0080	/* Already deleted. */
277#define	REGION_SIZEDEF		0x0100	/* Use default region size if exists. */
278	u_int32_t   flags;
279};
280
281/*******************************************************
282 * Mpool.
283 *******************************************************/
284/*
285 * File types for DB access methods.  Negative numbers are reserved to DB.
286 */
287#define	DB_FTYPE_BTREE		-1	/* Btree. */
288#define	DB_FTYPE_HASH		-2	/* Hash. */
289
290/* Structure used as the DB pgin/pgout pgcookie. */
291typedef struct __dbpginfo {
292	size_t	db_pagesize;		/* Underlying page size. */
293	int	needswap;		/* If swapping required. */
294} DB_PGINFO;
295
296/*******************************************************
297 * Log.
298 *******************************************************/
299/* Initialize an LSN to 'zero'. */
300#define	ZERO_LSN(LSN) {							\
301	(LSN).file = 0;							\
302	(LSN).offset = 0;						\
303}
304
305/* Return 1 if LSN is a 'zero' lsn, otherwise return 0. */
306#define	IS_ZERO_LSN(LSN)	((LSN).file == 0)
307
308/* Test if we need to log a change. */
309#define	DB_LOGGING(dbc)							\
310	(F_ISSET((dbc)->dbp, DB_AM_LOGGING) && !F_ISSET(dbc, DBC_RECOVER))
311
312#ifdef DIAGNOSTIC
313/*
314 * Debugging macro to log operations.
315 *	If DEBUG_WOP is defined, log operations that modify the database.
316 *	If DEBUG_ROP is defined, log operations that read the database.
317 *
318 * D dbp
319 * T txn
320 * O operation (string)
321 * K key
322 * A data
323 * F flags
324 */
325#define	LOG_OP(C, T, O, K, A, F) {					\
326	DB_LSN _lsn;							\
327	DBT _op;							\
328	if (DB_LOGGING((C))) {						\
329		memset(&_op, 0, sizeof(_op));				\
330		_op.data = O;						\
331		_op.size = strlen(O) + 1;				\
332		(void)__db_debug_log((C)->dbp->dbenv->lg_info,		\
333		    T, &_lsn, 0, &_op, (C)->dbp->log_fileid, K, A, F);	\
334	}								\
335}
336#ifdef DEBUG_ROP
337#define	DEBUG_LREAD(C, T, O, K, A, F)	LOG_OP(C, T, O, K, A, F)
338#else
339#define	DEBUG_LREAD(C, T, O, K, A, F)
340#endif
341#ifdef DEBUG_WOP
342#define	DEBUG_LWRITE(C, T, O, K, A, F)	LOG_OP(C, T, O, K, A, F)
343#else
344#define	DEBUG_LWRITE(C, T, O, K, A, F)
345#endif
346#else
347#define	DEBUG_LREAD(C, T, O, K, A, F)
348#define	DEBUG_LWRITE(C, T, O, K, A, F)
349#endif /* DIAGNOSTIC */
350
351/*******************************************************
352 * Transactions and recovery.
353 *******************************************************/
354/*
355 * Out of band value for a lock.  The locks are returned to callers as offsets
356 * into the lock regions.  Since the RLAYOUT structure begins all regions, an
357 * offset of 0 is guaranteed not to be a valid lock.
358 */
359#define	LOCK_INVALID	0
360
361/* The structure allocated for every transaction. */
362struct __db_txn {
363	DB_TXNMGR	*mgrp;		/* Pointer to transaction manager. */
364	DB_TXN		*parent;	/* Pointer to transaction's parent. */
365	DB_LSN		last_lsn;	/* Lsn of last log write. */
366	u_int32_t	txnid;		/* Unique transaction id. */
367	size_t		off;		/* Detail structure within region. */
368	TAILQ_ENTRY(__db_txn) links;	/* Links transactions off manager. */
369	TAILQ_HEAD(__kids, __db_txn) kids; /* Child transactions. */
370	TAILQ_ENTRY(__db_txn) klinks;	/* Links child transactions. */
371
372#define	TXN_MALLOC	0x01		/* Structure allocated by TXN system. */
373	u_int32_t	flags;
374};
375
376/*******************************************************
377 * Global variables.
378 *******************************************************/
379/*
380 * !!!
381 * Initialized in os/os_config.c, don't change this unless you change it
382 * as well.
383 */
384
385struct __rmname {
386	char *dbhome;
387	int rmid;
388	TAILQ_ENTRY(__rmname) links;
389};
390
391typedef struct __db_globals {
392	int db_mutexlocks;		/* DB_MUTEXLOCKS */
393	int db_pageyield;		/* DB_PAGEYIELD */
394	int db_region_anon;		/* DB_REGION_ANON, DB_REGION_NAME */
395	int db_region_init;		/* DB_REGION_INIT */
396	int db_tsl_spins;		/* DB_TSL_SPINS */
397					/* XA: list of opened environments. */
398	TAILQ_HEAD(__db_envq, __db_env) db_envq;
399					/* XA: list of id to dbhome mappings. */
400	TAILQ_HEAD(__db_nameq, __rmname) db_nameq;
401} DB_GLOBALS;
402
403extern	DB_GLOBALS	__db_global_values;
404#define	DB_GLOBAL(v)	__db_global_values.v
405
406#include "os.h"
407#include "os_ext.h"
408
409#endif /* !_DB_INTERNAL_H_ */
410