xref: /illumos-gate/usr/src/lib/libsqlite/src/pager.c (revision 1da57d55)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
37c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
47c478bd9Sstevel@tonic-gate  */
57c478bd9Sstevel@tonic-gate 
67c478bd9Sstevel@tonic-gate /*
77c478bd9Sstevel@tonic-gate ** 2001 September 15
87c478bd9Sstevel@tonic-gate **
97c478bd9Sstevel@tonic-gate ** The author disclaims copyright to this source code.  In place of
107c478bd9Sstevel@tonic-gate ** a legal notice, here is a blessing:
117c478bd9Sstevel@tonic-gate **
127c478bd9Sstevel@tonic-gate **    May you do good and not evil.
137c478bd9Sstevel@tonic-gate **    May you find forgiveness for yourself and forgive others.
147c478bd9Sstevel@tonic-gate **    May you share freely, never taking more than you give.
157c478bd9Sstevel@tonic-gate **
167c478bd9Sstevel@tonic-gate *************************************************************************
177c478bd9Sstevel@tonic-gate ** This is the implementation of the page cache subsystem or "pager".
18*1da57d55SToomas Soome **
197c478bd9Sstevel@tonic-gate ** The pager is used to access a database disk file.  It implements
207c478bd9Sstevel@tonic-gate ** atomic commit and rollback through the use of a journal file that
217c478bd9Sstevel@tonic-gate ** is separate from the database file.  The pager also implements file
227c478bd9Sstevel@tonic-gate ** locking to prevent two processes from writing the same database
237c478bd9Sstevel@tonic-gate ** file simultaneously, or one process from reading the database while
247c478bd9Sstevel@tonic-gate ** another is writing.
257c478bd9Sstevel@tonic-gate **
267c478bd9Sstevel@tonic-gate ** @(#) $Id: pager.c,v 1.101 2004/02/25 02:20:41 drh Exp $
277c478bd9Sstevel@tonic-gate */
287c478bd9Sstevel@tonic-gate #include "os.h"         /* Must be first to enable large file support */
297c478bd9Sstevel@tonic-gate #include "sqliteInt.h"
307c478bd9Sstevel@tonic-gate #include "pager.h"
317c478bd9Sstevel@tonic-gate #include <assert.h>
327c478bd9Sstevel@tonic-gate #include <string.h>
337c478bd9Sstevel@tonic-gate 
347c478bd9Sstevel@tonic-gate /*
357c478bd9Sstevel@tonic-gate ** Macros for troubleshooting.  Normally turned off
367c478bd9Sstevel@tonic-gate */
377c478bd9Sstevel@tonic-gate #if 0
387c478bd9Sstevel@tonic-gate static Pager *mainPager = 0;
397c478bd9Sstevel@tonic-gate #define SET_PAGER(X)  if( mainPager==0 ) mainPager = (X)
407c478bd9Sstevel@tonic-gate #define CLR_PAGER(X)  if( mainPager==(X) ) mainPager = 0
417c478bd9Sstevel@tonic-gate #define TRACE1(X)     if( pPager==mainPager ) fprintf(stderr,X)
427c478bd9Sstevel@tonic-gate #define TRACE2(X,Y)   if( pPager==mainPager ) fprintf(stderr,X,Y)
437c478bd9Sstevel@tonic-gate #define TRACE3(X,Y,Z) if( pPager==mainPager ) fprintf(stderr,X,Y,Z)
447c478bd9Sstevel@tonic-gate #else
457c478bd9Sstevel@tonic-gate #define SET_PAGER(X)
467c478bd9Sstevel@tonic-gate #define CLR_PAGER(X)
477c478bd9Sstevel@tonic-gate #define TRACE1(X)
487c478bd9Sstevel@tonic-gate #define TRACE2(X,Y)
497c478bd9Sstevel@tonic-gate #define TRACE3(X,Y,Z)
507c478bd9Sstevel@tonic-gate #endif
517c478bd9Sstevel@tonic-gate 
527c478bd9Sstevel@tonic-gate 
537c478bd9Sstevel@tonic-gate /*
547c478bd9Sstevel@tonic-gate ** The page cache as a whole is always in one of the following
557c478bd9Sstevel@tonic-gate ** states:
567c478bd9Sstevel@tonic-gate **
57*1da57d55SToomas Soome **   SQLITE_UNLOCK       The page cache is not currently reading or
587c478bd9Sstevel@tonic-gate **                       writing the database file.  There is no
597c478bd9Sstevel@tonic-gate **                       data held in memory.  This is the initial
607c478bd9Sstevel@tonic-gate **                       state.
617c478bd9Sstevel@tonic-gate **
627c478bd9Sstevel@tonic-gate **   SQLITE_READLOCK     The page cache is reading the database.
637c478bd9Sstevel@tonic-gate **                       Writing is not permitted.  There can be
647c478bd9Sstevel@tonic-gate **                       multiple readers accessing the same database
657c478bd9Sstevel@tonic-gate **                       file at the same time.
667c478bd9Sstevel@tonic-gate **
677c478bd9Sstevel@tonic-gate **   SQLITE_WRITELOCK    The page cache is writing the database.
687c478bd9Sstevel@tonic-gate **                       Access is exclusive.  No other processes or
697c478bd9Sstevel@tonic-gate **                       threads can be reading or writing while one
707c478bd9Sstevel@tonic-gate **                       process is writing.
717c478bd9Sstevel@tonic-gate **
727c478bd9Sstevel@tonic-gate ** The page cache comes up in SQLITE_UNLOCK.  The first time a
737c478bd9Sstevel@tonic-gate ** sqlite_page_get() occurs, the state transitions to SQLITE_READLOCK.
747c478bd9Sstevel@tonic-gate ** After all pages have been released using sqlite_page_unref(),
757c478bd9Sstevel@tonic-gate ** the state transitions back to SQLITE_UNLOCK.  The first time
767c478bd9Sstevel@tonic-gate ** that sqlite_page_write() is called, the state transitions to
777c478bd9Sstevel@tonic-gate ** SQLITE_WRITELOCK.  (Note that sqlite_page_write() can only be
787c478bd9Sstevel@tonic-gate ** called on an outstanding page which means that the pager must
797c478bd9Sstevel@tonic-gate ** be in SQLITE_READLOCK before it transitions to SQLITE_WRITELOCK.)
80*1da57d55SToomas Soome ** The sqlite_page_rollback() and sqlite_page_commit() functions
817c478bd9Sstevel@tonic-gate ** transition the state from SQLITE_WRITELOCK back to SQLITE_READLOCK.
827c478bd9Sstevel@tonic-gate */
837c478bd9Sstevel@tonic-gate #define SQLITE_UNLOCK      0
847c478bd9Sstevel@tonic-gate #define SQLITE_READLOCK    1
857c478bd9Sstevel@tonic-gate #define SQLITE_WRITELOCK   2
867c478bd9Sstevel@tonic-gate 
877c478bd9Sstevel@tonic-gate 
887c478bd9Sstevel@tonic-gate /*
897c478bd9Sstevel@tonic-gate ** Each in-memory image of a page begins with the following header.
907c478bd9Sstevel@tonic-gate ** This header is only visible to this pager module.  The client
917c478bd9Sstevel@tonic-gate ** code that calls pager sees only the data that follows the header.
927c478bd9Sstevel@tonic-gate **
937c478bd9Sstevel@tonic-gate ** Client code should call sqlitepager_write() on a page prior to making
947c478bd9Sstevel@tonic-gate ** any modifications to that page.  The first time sqlitepager_write()
957c478bd9Sstevel@tonic-gate ** is called, the original page contents are written into the rollback
967c478bd9Sstevel@tonic-gate ** journal and PgHdr.inJournal and PgHdr.needSync are set.  Later, once
977c478bd9Sstevel@tonic-gate ** the journal page has made it onto the disk surface, PgHdr.needSync
987c478bd9Sstevel@tonic-gate ** is cleared.  The modified page cannot be written back into the original
997c478bd9Sstevel@tonic-gate ** database file until the journal pages has been synced to disk and the
1007c478bd9Sstevel@tonic-gate ** PgHdr.needSync has been cleared.
1017c478bd9Sstevel@tonic-gate **
1027c478bd9Sstevel@tonic-gate ** The PgHdr.dirty flag is set when sqlitepager_write() is called and
1037c478bd9Sstevel@tonic-gate ** is cleared again when the page content is written back to the original
1047c478bd9Sstevel@tonic-gate ** database file.
1057c478bd9Sstevel@tonic-gate */
1067c478bd9Sstevel@tonic-gate typedef struct PgHdr PgHdr;
1077c478bd9Sstevel@tonic-gate struct PgHdr {
1087c478bd9Sstevel@tonic-gate   Pager *pPager;                 /* The pager to which this page belongs */
1097c478bd9Sstevel@tonic-gate   Pgno pgno;                     /* The page number for this page */
1107c478bd9Sstevel@tonic-gate   PgHdr *pNextHash, *pPrevHash;  /* Hash collision chain for PgHdr.pgno */
1117c478bd9Sstevel@tonic-gate   int nRef;                      /* Number of users of this page */
1127c478bd9Sstevel@tonic-gate   PgHdr *pNextFree, *pPrevFree;  /* Freelist of pages where nRef==0 */
1137c478bd9Sstevel@tonic-gate   PgHdr *pNextAll, *pPrevAll;    /* A list of all pages */
1147c478bd9Sstevel@tonic-gate   PgHdr *pNextCkpt, *pPrevCkpt;  /* List of pages in the checkpoint journal */
1157c478bd9Sstevel@tonic-gate   u8 inJournal;                  /* TRUE if has been written to journal */
1167c478bd9Sstevel@tonic-gate   u8 inCkpt;                     /* TRUE if written to the checkpoint journal */
1177c478bd9Sstevel@tonic-gate   u8 dirty;                      /* TRUE if we need to write back changes */
1187c478bd9Sstevel@tonic-gate   u8 needSync;                   /* Sync journal before writing this page */
1197c478bd9Sstevel@tonic-gate   u8 alwaysRollback;             /* Disable dont_rollback() for this page */
1207c478bd9Sstevel@tonic-gate   PgHdr *pDirty;                 /* Dirty pages sorted by PgHdr.pgno */
1217c478bd9Sstevel@tonic-gate   /* SQLITE_PAGE_SIZE bytes of page data follow this header */
1227c478bd9Sstevel@tonic-gate   /* Pager.nExtra bytes of local data follow the page data */
1237c478bd9Sstevel@tonic-gate };
1247c478bd9Sstevel@tonic-gate 
1257c478bd9Sstevel@tonic-gate 
1267c478bd9Sstevel@tonic-gate /*
1277c478bd9Sstevel@tonic-gate ** A macro used for invoking the codec if there is one
1287c478bd9Sstevel@tonic-gate */
1297c478bd9Sstevel@tonic-gate #ifdef SQLITE_HAS_CODEC
1307c478bd9Sstevel@tonic-gate # define CODEC(P,D,N,X) if( P->xCodec ){ P->xCodec(P->pCodecArg,D,N,X); }
1317c478bd9Sstevel@tonic-gate #else
1327c478bd9Sstevel@tonic-gate # define CODEC(P,D,N,X)
1337c478bd9Sstevel@tonic-gate #endif
1347c478bd9Sstevel@tonic-gate 
1357c478bd9Sstevel@tonic-gate /*
1367c478bd9Sstevel@tonic-gate ** Convert a pointer to a PgHdr into a pointer to its data
1377c478bd9Sstevel@tonic-gate ** and back again.
1387c478bd9Sstevel@tonic-gate */
1397c478bd9Sstevel@tonic-gate #define PGHDR_TO_DATA(P)  ((void*)(&(P)[1]))
1407c478bd9Sstevel@tonic-gate #define DATA_TO_PGHDR(D)  (&((PgHdr*)(D))[-1])
1417c478bd9Sstevel@tonic-gate #define PGHDR_TO_EXTRA(P) ((void*)&((char*)(&(P)[1]))[SQLITE_PAGE_SIZE])
1427c478bd9Sstevel@tonic-gate 
1437c478bd9Sstevel@tonic-gate /*
1447c478bd9Sstevel@tonic-gate ** How big to make the hash table used for locating in-memory pages
1457c478bd9Sstevel@tonic-gate ** by page number.
1467c478bd9Sstevel@tonic-gate */
1477c478bd9Sstevel@tonic-gate #define N_PG_HASH 2048
1487c478bd9Sstevel@tonic-gate 
1497c478bd9Sstevel@tonic-gate /*
1507c478bd9Sstevel@tonic-gate ** Hash a page number
1517c478bd9Sstevel@tonic-gate */
1527c478bd9Sstevel@tonic-gate #define pager_hash(PN)  ((PN)&(N_PG_HASH-1))
1537c478bd9Sstevel@tonic-gate 
1547c478bd9Sstevel@tonic-gate /*
1557c478bd9Sstevel@tonic-gate ** A open page cache is an instance of the following structure.
1567c478bd9Sstevel@tonic-gate */
1577c478bd9Sstevel@tonic-gate struct Pager {
1587c478bd9Sstevel@tonic-gate   char *zFilename;            /* Name of the database file */
1597c478bd9Sstevel@tonic-gate   char *zJournal;             /* Name of the journal file */
1607c478bd9Sstevel@tonic-gate   char *zDirectory;           /* Directory hold database and journal files */
1617c478bd9Sstevel@tonic-gate   OsFile fd, jfd;             /* File descriptors for database and journal */
1627c478bd9Sstevel@tonic-gate   OsFile cpfd;                /* File descriptor for the checkpoint journal */
1637c478bd9Sstevel@tonic-gate   int dbSize;                 /* Number of pages in the file */
1647c478bd9Sstevel@tonic-gate   int origDbSize;             /* dbSize before the current change */
1657c478bd9Sstevel@tonic-gate   int ckptSize;               /* Size of database (in pages) at ckpt_begin() */
1667c478bd9Sstevel@tonic-gate   off_t ckptJSize;            /* Size of journal at ckpt_begin() */
1677c478bd9Sstevel@tonic-gate   int nRec;                   /* Number of pages written to the journal */
1687c478bd9Sstevel@tonic-gate   u32 cksumInit;              /* Quasi-random value added to every checksum */
1697c478bd9Sstevel@tonic-gate   int ckptNRec;               /* Number of records in the checkpoint journal */
1707c478bd9Sstevel@tonic-gate   int nExtra;                 /* Add this many bytes to each in-memory page */
1717c478bd9Sstevel@tonic-gate   void (*xDestructor)(void*); /* Call this routine when freeing pages */
1727c478bd9Sstevel@tonic-gate   int nPage;                  /* Total number of in-memory pages */
1737c478bd9Sstevel@tonic-gate   int nRef;                   /* Number of in-memory pages with PgHdr.nRef>0 */
1747c478bd9Sstevel@tonic-gate   int mxPage;                 /* Maximum number of pages to hold in cache */
1757c478bd9Sstevel@tonic-gate   int nHit, nMiss, nOvfl;     /* Cache hits, missing, and LRU overflows */
1767c478bd9Sstevel@tonic-gate   void (*xCodec)(void*,void*,Pgno,int); /* Routine for en/decoding data */
1777c478bd9Sstevel@tonic-gate   void *pCodecArg;            /* First argument to xCodec() */
1787c478bd9Sstevel@tonic-gate   u8 journalOpen;             /* True if journal file descriptors is valid */
1797c478bd9Sstevel@tonic-gate   u8 journalStarted;          /* True if header of journal is synced */
1807c478bd9Sstevel@tonic-gate   u8 useJournal;              /* Use a rollback journal on this file */
1817c478bd9Sstevel@tonic-gate   u8 ckptOpen;                /* True if the checkpoint journal is open */
1827c478bd9Sstevel@tonic-gate   u8 ckptInUse;               /* True we are in a checkpoint */
1837c478bd9Sstevel@tonic-gate   u8 ckptAutoopen;            /* Open ckpt journal when main journal is opened*/
1847c478bd9Sstevel@tonic-gate   u8 noSync;                  /* Do not sync the journal if true */
1857c478bd9Sstevel@tonic-gate   u8 fullSync;                /* Do extra syncs of the journal for robustness */
1867c478bd9Sstevel@tonic-gate   u8 state;                   /* SQLITE_UNLOCK, _READLOCK or _WRITELOCK */
1877c478bd9Sstevel@tonic-gate   u8 errMask;                 /* One of several kinds of errors */
1887c478bd9Sstevel@tonic-gate   u8 tempFile;                /* zFilename is a temporary file */
1897c478bd9Sstevel@tonic-gate   u8 readOnly;                /* True for a read-only database */
1907c478bd9Sstevel@tonic-gate   u8 needSync;                /* True if an fsync() is needed on the journal */
1917c478bd9Sstevel@tonic-gate   u8 dirtyFile;               /* True if database file has changed in any way */
1927c478bd9Sstevel@tonic-gate   u8 alwaysRollback;          /* Disable dont_rollback() for all pages */
1937c478bd9Sstevel@tonic-gate   u8 *aInJournal;             /* One bit for each page in the database file */
1947c478bd9Sstevel@tonic-gate   u8 *aInCkpt;                /* One bit for each page in the database */
1957c478bd9Sstevel@tonic-gate   PgHdr *pFirst, *pLast;      /* List of free pages */
1967c478bd9Sstevel@tonic-gate   PgHdr *pFirstSynced;        /* First free page with PgHdr.needSync==0 */
1977c478bd9Sstevel@tonic-gate   PgHdr *pAll;                /* List of all pages */
1987c478bd9Sstevel@tonic-gate   PgHdr *pCkpt;               /* List of pages in the checkpoint journal */
1997c478bd9Sstevel@tonic-gate   PgHdr *aHash[N_PG_HASH];    /* Hash table to map page number of PgHdr */
2007c478bd9Sstevel@tonic-gate };
2017c478bd9Sstevel@tonic-gate 
2027c478bd9Sstevel@tonic-gate /*
2037c478bd9Sstevel@tonic-gate ** These are bits that can be set in Pager.errMask.
2047c478bd9Sstevel@tonic-gate */
2057c478bd9Sstevel@tonic-gate #define PAGER_ERR_FULL     0x01  /* a write() failed */
2067c478bd9Sstevel@tonic-gate #define PAGER_ERR_MEM      0x02  /* malloc() failed */
2077c478bd9Sstevel@tonic-gate #define PAGER_ERR_LOCK     0x04  /* error in the locking protocol */
2087c478bd9Sstevel@tonic-gate #define PAGER_ERR_CORRUPT  0x08  /* database or journal corruption */
2097c478bd9Sstevel@tonic-gate #define PAGER_ERR_DISK     0x10  /* general disk I/O error - bad hard drive? */
2107c478bd9Sstevel@tonic-gate 
2117c478bd9Sstevel@tonic-gate /*
2127c478bd9Sstevel@tonic-gate ** The journal file contains page records in the following
2137c478bd9Sstevel@tonic-gate ** format.
2147c478bd9Sstevel@tonic-gate **
2157c478bd9Sstevel@tonic-gate ** Actually, this structure is the complete page record for pager
2167c478bd9Sstevel@tonic-gate ** formats less than 3.  Beginning with format 3, this record is surrounded
2177c478bd9Sstevel@tonic-gate ** by two checksums.
2187c478bd9Sstevel@tonic-gate */
2197c478bd9Sstevel@tonic-gate typedef struct PageRecord PageRecord;
2207c478bd9Sstevel@tonic-gate struct PageRecord {
2217c478bd9Sstevel@tonic-gate   Pgno pgno;                      /* The page number */
2227c478bd9Sstevel@tonic-gate   char aData[SQLITE_PAGE_SIZE];   /* Original data for page pgno */
2237c478bd9Sstevel@tonic-gate };
2247c478bd9Sstevel@tonic-gate 
2257c478bd9Sstevel@tonic-gate /*
2267c478bd9Sstevel@tonic-gate ** Journal files begin with the following magic string.  The data
2277c478bd9Sstevel@tonic-gate ** was obtained from /dev/random.  It is used only as a sanity check.
2287c478bd9Sstevel@tonic-gate **
2297c478bd9Sstevel@tonic-gate ** There are three journal formats (so far). The 1st journal format writes
2307c478bd9Sstevel@tonic-gate ** 32-bit integers in the byte-order of the host machine.  New
2317c478bd9Sstevel@tonic-gate ** formats writes integers as big-endian.  All new journals use the
2327c478bd9Sstevel@tonic-gate ** new format, but we have to be able to read an older journal in order
2337c478bd9Sstevel@tonic-gate ** to rollback journals created by older versions of the library.
2347c478bd9Sstevel@tonic-gate **
2357c478bd9Sstevel@tonic-gate ** The 3rd journal format (added for 2.8.0) adds additional sanity
2367c478bd9Sstevel@tonic-gate ** checking information to the journal.  If the power fails while the
2377c478bd9Sstevel@tonic-gate ** journal is being written, semi-random garbage data might appear in
2387c478bd9Sstevel@tonic-gate ** the journal file after power is restored.  If an attempt is then made
2397c478bd9Sstevel@tonic-gate ** to roll the journal back, the database could be corrupted.  The additional
2407c478bd9Sstevel@tonic-gate ** sanity checking data is an attempt to discover the garbage in the
2417c478bd9Sstevel@tonic-gate ** journal and ignore it.
2427c478bd9Sstevel@tonic-gate **
2437c478bd9Sstevel@tonic-gate ** The sanity checking information for the 3rd journal format consists
2447c478bd9Sstevel@tonic-gate ** of a 32-bit checksum on each page of data.  The checksum covers both
2457c478bd9Sstevel@tonic-gate ** the page number and the SQLITE_PAGE_SIZE bytes of data for the page.
2467c478bd9Sstevel@tonic-gate ** This cksum is initialized to a 32-bit random value that appears in the
2477c478bd9Sstevel@tonic-gate ** journal file right after the header.  The random initializer is important,
2487c478bd9Sstevel@tonic-gate ** because garbage data that appears at the end of a journal is likely
2497c478bd9Sstevel@tonic-gate ** data that was once in other files that have now been deleted.  If the
2507c478bd9Sstevel@tonic-gate ** garbage data came from an obsolete journal file, the checksums might
2517c478bd9Sstevel@tonic-gate ** be correct.  But by initializing the checksum to random value which
2527c478bd9Sstevel@tonic-gate ** is different for every journal, we minimize that risk.
2537c478bd9Sstevel@tonic-gate */
2547c478bd9Sstevel@tonic-gate static const unsigned char aJournalMagic1[] = {
2557c478bd9Sstevel@tonic-gate   0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd4,
2567c478bd9Sstevel@tonic-gate };
2577c478bd9Sstevel@tonic-gate static const unsigned char aJournalMagic2[] = {
2587c478bd9Sstevel@tonic-gate   0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd5,
2597c478bd9Sstevel@tonic-gate };
2607c478bd9Sstevel@tonic-gate static const unsigned char aJournalMagic3[] = {
2617c478bd9Sstevel@tonic-gate   0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd6,
2627c478bd9Sstevel@tonic-gate };
2637c478bd9Sstevel@tonic-gate #define JOURNAL_FORMAT_1 1
2647c478bd9Sstevel@tonic-gate #define JOURNAL_FORMAT_2 2
2657c478bd9Sstevel@tonic-gate #define JOURNAL_FORMAT_3 3
2667c478bd9Sstevel@tonic-gate 
2677c478bd9Sstevel@tonic-gate /*
2687c478bd9Sstevel@tonic-gate ** The following integer determines what format to use when creating
2697c478bd9Sstevel@tonic-gate ** new primary journal files.  By default we always use format 3.
2707c478bd9Sstevel@tonic-gate ** When testing, we can set this value to older journal formats in order to
2717c478bd9Sstevel@tonic-gate ** make sure that newer versions of the library are able to rollback older
2727c478bd9Sstevel@tonic-gate ** journal files.
2737c478bd9Sstevel@tonic-gate **
2747c478bd9Sstevel@tonic-gate ** Note that checkpoint journals always use format 2 and omit the header.
2757c478bd9Sstevel@tonic-gate */
2767c478bd9Sstevel@tonic-gate #ifdef SQLITE_TEST
2777c478bd9Sstevel@tonic-gate int journal_format = 3;
2787c478bd9Sstevel@tonic-gate #else
2797c478bd9Sstevel@tonic-gate # define journal_format 3
2807c478bd9Sstevel@tonic-gate #endif
2817c478bd9Sstevel@tonic-gate 
2827c478bd9Sstevel@tonic-gate /*
2837c478bd9Sstevel@tonic-gate ** The size of the header and of each page in the journal varies according
2847c478bd9Sstevel@tonic-gate ** to which journal format is being used.  The following macros figure out
2857c478bd9Sstevel@tonic-gate ** the sizes based on format numbers.
2867c478bd9Sstevel@tonic-gate */
2877c478bd9Sstevel@tonic-gate #define JOURNAL_HDR_SZ(X) \
2887c478bd9Sstevel@tonic-gate    (sizeof(aJournalMagic1) + sizeof(Pgno) + ((X)>=3)*2*sizeof(u32))
2897c478bd9Sstevel@tonic-gate #define JOURNAL_PG_SZ(X) \
2907c478bd9Sstevel@tonic-gate    (SQLITE_PAGE_SIZE + sizeof(Pgno) + ((X)>=3)*sizeof(u32))
2917c478bd9Sstevel@tonic-gate 
2927c478bd9Sstevel@tonic-gate /*
2937c478bd9Sstevel@tonic-gate ** Enable reference count tracking here:
2947c478bd9Sstevel@tonic-gate */
2957c478bd9Sstevel@tonic-gate #ifdef SQLITE_TEST
2967c478bd9Sstevel@tonic-gate   int pager_refinfo_enable = 0;
pager_refinfo(PgHdr * p)2977c478bd9Sstevel@tonic-gate   static void pager_refinfo(PgHdr *p){
2987c478bd9Sstevel@tonic-gate     static int cnt = 0;
2997c478bd9Sstevel@tonic-gate     if( !pager_refinfo_enable ) return;
3007c478bd9Sstevel@tonic-gate     printf(
3017c478bd9Sstevel@tonic-gate        "REFCNT: %4d addr=0x%08x nRef=%d\n",
3027c478bd9Sstevel@tonic-gate        p->pgno, (int)PGHDR_TO_DATA(p), p->nRef
3037c478bd9Sstevel@tonic-gate     );
3047c478bd9Sstevel@tonic-gate     cnt++;   /* Something to set a breakpoint on */
3057c478bd9Sstevel@tonic-gate   }
3067c478bd9Sstevel@tonic-gate # define REFINFO(X)  pager_refinfo(X)
3077c478bd9Sstevel@tonic-gate #else
3087c478bd9Sstevel@tonic-gate # define REFINFO(X)
3097c478bd9Sstevel@tonic-gate #endif
3107c478bd9Sstevel@tonic-gate 
3117c478bd9Sstevel@tonic-gate /*
3127c478bd9Sstevel@tonic-gate ** Read a 32-bit integer from the given file descriptor.  Store the integer
3137c478bd9Sstevel@tonic-gate ** that is read in *pRes.  Return SQLITE_OK if everything worked, or an
3147c478bd9Sstevel@tonic-gate ** error code is something goes wrong.
3157c478bd9Sstevel@tonic-gate **
3167c478bd9Sstevel@tonic-gate ** If the journal format is 2 or 3, read a big-endian integer.  If the
3177c478bd9Sstevel@tonic-gate ** journal format is 1, read an integer in the native byte-order of the
3187c478bd9Sstevel@tonic-gate ** host machine.
3197c478bd9Sstevel@tonic-gate */
read32bits(int format,OsFile * fd,u32 * pRes)3207c478bd9Sstevel@tonic-gate static int read32bits(int format, OsFile *fd, u32 *pRes){
3217c478bd9Sstevel@tonic-gate   u32 res;
3227c478bd9Sstevel@tonic-gate   int rc;
3237c478bd9Sstevel@tonic-gate   rc = sqliteOsRead(fd, &res, sizeof(res));
3247c478bd9Sstevel@tonic-gate   if( rc==SQLITE_OK && format>JOURNAL_FORMAT_1 ){
3257c478bd9Sstevel@tonic-gate     unsigned char ac[4];
3267c478bd9Sstevel@tonic-gate     memcpy(ac, &res, 4);
3277c478bd9Sstevel@tonic-gate     res = (ac[0]<<24) | (ac[1]<<16) | (ac[2]<<8) | ac[3];
3287c478bd9Sstevel@tonic-gate   }
3297c478bd9Sstevel@tonic-gate   *pRes = res;
3307c478bd9Sstevel@tonic-gate   return rc;
3317c478bd9Sstevel@tonic-gate }
3327c478bd9Sstevel@tonic-gate 
3337c478bd9Sstevel@tonic-gate /*
3347c478bd9Sstevel@tonic-gate ** Write a 32-bit integer into the given file descriptor.  Return SQLITE_OK
3357c478bd9Sstevel@tonic-gate ** on success or an error code is something goes wrong.
3367c478bd9Sstevel@tonic-gate **
3377c478bd9Sstevel@tonic-gate ** If the journal format is 2 or 3, write the integer as 4 big-endian
3387c478bd9Sstevel@tonic-gate ** bytes.  If the journal format is 1, write the integer in the native
3397c478bd9Sstevel@tonic-gate ** byte order.  In normal operation, only formats 2 and 3 are used.
3407c478bd9Sstevel@tonic-gate ** Journal format 1 is only used for testing.
3417c478bd9Sstevel@tonic-gate */
write32bits(OsFile * fd,u32 val)3427c478bd9Sstevel@tonic-gate static int write32bits(OsFile *fd, u32 val){
3437c478bd9Sstevel@tonic-gate   unsigned char ac[4];
3447c478bd9Sstevel@tonic-gate   if( journal_format<=1 ){
3457c478bd9Sstevel@tonic-gate     return sqliteOsWrite(fd, &val, 4);
3467c478bd9Sstevel@tonic-gate   }
3477c478bd9Sstevel@tonic-gate   ac[0] = (val>>24) & 0xff;
3487c478bd9Sstevel@tonic-gate   ac[1] = (val>>16) & 0xff;
3497c478bd9Sstevel@tonic-gate   ac[2] = (val>>8) & 0xff;
3507c478bd9Sstevel@tonic-gate   ac[3] = val & 0xff;
3517c478bd9Sstevel@tonic-gate   return sqliteOsWrite(fd, ac, 4);
3527c478bd9Sstevel@tonic-gate }
3537c478bd9Sstevel@tonic-gate 
3547c478bd9Sstevel@tonic-gate /*
3557c478bd9Sstevel@tonic-gate ** Write a 32-bit integer into a page header right before the
3567c478bd9Sstevel@tonic-gate ** page data.  This will overwrite the PgHdr.pDirty pointer.
3577c478bd9Sstevel@tonic-gate **
3587c478bd9Sstevel@tonic-gate ** The integer is big-endian for formats 2 and 3 and native byte order
3597c478bd9Sstevel@tonic-gate ** for journal format 1.
3607c478bd9Sstevel@tonic-gate */
store32bits(u32 val,PgHdr * p,int offset)3617c478bd9Sstevel@tonic-gate static void store32bits(u32 val, PgHdr *p, int offset){
3627c478bd9Sstevel@tonic-gate   unsigned char *ac;
3637c478bd9Sstevel@tonic-gate   ac = &((unsigned char*)PGHDR_TO_DATA(p))[offset];
3647c478bd9Sstevel@tonic-gate   if( journal_format<=1 ){
3657c478bd9Sstevel@tonic-gate     memcpy(ac, &val, 4);
3667c478bd9Sstevel@tonic-gate   }else{
3677c478bd9Sstevel@tonic-gate     ac[0] = (val>>24) & 0xff;
3687c478bd9Sstevel@tonic-gate     ac[1] = (val>>16) & 0xff;
3697c478bd9Sstevel@tonic-gate     ac[2] = (val>>8) & 0xff;
3707c478bd9Sstevel@tonic-gate     ac[3] = val & 0xff;
3717c478bd9Sstevel@tonic-gate   }
3727c478bd9Sstevel@tonic-gate }
3737c478bd9Sstevel@tonic-gate 
3747c478bd9Sstevel@tonic-gate 
3757c478bd9Sstevel@tonic-gate /*
3767c478bd9Sstevel@tonic-gate ** Convert the bits in the pPager->errMask into an approprate
3777c478bd9Sstevel@tonic-gate ** return code.
3787c478bd9Sstevel@tonic-gate */
pager_errcode(Pager * pPager)3797c478bd9Sstevel@tonic-gate static int pager_errcode(Pager *pPager){
3807c478bd9Sstevel@tonic-gate   int rc = SQLITE_OK;
3817c478bd9Sstevel@tonic-gate   if( pPager->errMask & PAGER_ERR_LOCK )    rc = SQLITE_PROTOCOL;
3827c478bd9Sstevel@tonic-gate   if( pPager->errMask & PAGER_ERR_DISK )    rc = SQLITE_IOERR;
3837c478bd9Sstevel@tonic-gate   if( pPager->errMask & PAGER_ERR_FULL )    rc = SQLITE_FULL;
3847c478bd9Sstevel@tonic-gate   if( pPager->errMask & PAGER_ERR_MEM )     rc = SQLITE_NOMEM;
3857c478bd9Sstevel@tonic-gate   if( pPager->errMask & PAGER_ERR_CORRUPT ) rc = SQLITE_CORRUPT;
3867c478bd9Sstevel@tonic-gate   return rc;
3877c478bd9Sstevel@tonic-gate }
3887c478bd9Sstevel@tonic-gate 
3897c478bd9Sstevel@tonic-gate /*
3907c478bd9Sstevel@tonic-gate ** Add or remove a page from the list of all pages that are in the
3917c478bd9Sstevel@tonic-gate ** checkpoint journal.
3927c478bd9Sstevel@tonic-gate **
3937c478bd9Sstevel@tonic-gate ** The Pager keeps a separate list of pages that are currently in
3947c478bd9Sstevel@tonic-gate ** the checkpoint journal.  This helps the sqlitepager_ckpt_commit()
3957c478bd9Sstevel@tonic-gate ** routine run MUCH faster for the common case where there are many
3967c478bd9Sstevel@tonic-gate ** pages in memory but only a few are in the checkpoint journal.
3977c478bd9Sstevel@tonic-gate */
page_add_to_ckpt_list(PgHdr * pPg)3987c478bd9Sstevel@tonic-gate static void page_add_to_ckpt_list(PgHdr *pPg){
3997c478bd9Sstevel@tonic-gate   Pager *pPager = pPg->pPager;
4007c478bd9Sstevel@tonic-gate   if( pPg->inCkpt ) return;
4017c478bd9Sstevel@tonic-gate   assert( pPg->pPrevCkpt==0 && pPg->pNextCkpt==0 );
4027c478bd9Sstevel@tonic-gate   pPg->pPrevCkpt = 0;
4037c478bd9Sstevel@tonic-gate   if( pPager->pCkpt ){
4047c478bd9Sstevel@tonic-gate     pPager->pCkpt->pPrevCkpt = pPg;
4057c478bd9Sstevel@tonic-gate   }
4067c478bd9Sstevel@tonic-gate   pPg->pNextCkpt = pPager->pCkpt;
4077c478bd9Sstevel@tonic-gate   pPager->pCkpt = pPg;
4087c478bd9Sstevel@tonic-gate   pPg->inCkpt = 1;
4097c478bd9Sstevel@tonic-gate }
page_remove_from_ckpt_list(PgHdr * pPg)4107c478bd9Sstevel@tonic-gate static void page_remove_from_ckpt_list(PgHdr *pPg){
4117c478bd9Sstevel@tonic-gate   if( !pPg->inCkpt ) return;
4127c478bd9Sstevel@tonic-gate   if( pPg->pPrevCkpt ){
4137c478bd9Sstevel@tonic-gate     assert( pPg->pPrevCkpt->pNextCkpt==pPg );
4147c478bd9Sstevel@tonic-gate     pPg->pPrevCkpt->pNextCkpt = pPg->pNextCkpt;
4157c478bd9Sstevel@tonic-gate   }else{
4167c478bd9Sstevel@tonic-gate     assert( pPg->pPager->pCkpt==pPg );
4177c478bd9Sstevel@tonic-gate     pPg->pPager->pCkpt = pPg->pNextCkpt;
4187c478bd9Sstevel@tonic-gate   }
4197c478bd9Sstevel@tonic-gate   if( pPg->pNextCkpt ){
4207c478bd9Sstevel@tonic-gate     assert( pPg->pNextCkpt->pPrevCkpt==pPg );
4217c478bd9Sstevel@tonic-gate     pPg->pNextCkpt->pPrevCkpt = pPg->pPrevCkpt;
4227c478bd9Sstevel@tonic-gate   }
4237c478bd9Sstevel@tonic-gate   pPg->pNextCkpt = 0;
4247c478bd9Sstevel@tonic-gate   pPg->pPrevCkpt = 0;
4257c478bd9Sstevel@tonic-gate   pPg->inCkpt = 0;
4267c478bd9Sstevel@tonic-gate }
4277c478bd9Sstevel@tonic-gate 
4287c478bd9Sstevel@tonic-gate /*
4297c478bd9Sstevel@tonic-gate ** Find a page in the hash table given its page number.  Return
4307c478bd9Sstevel@tonic-gate ** a pointer to the page or NULL if not found.
4317c478bd9Sstevel@tonic-gate */
pager_lookup(Pager * pPager,Pgno pgno)4327c478bd9Sstevel@tonic-gate static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){
4337c478bd9Sstevel@tonic-gate   PgHdr *p = pPager->aHash[pager_hash(pgno)];
4347c478bd9Sstevel@tonic-gate   while( p && p->pgno!=pgno ){
4357c478bd9Sstevel@tonic-gate     p = p->pNextHash;
4367c478bd9Sstevel@tonic-gate   }
4377c478bd9Sstevel@tonic-gate   return p;
4387c478bd9Sstevel@tonic-gate }
4397c478bd9Sstevel@tonic-gate 
4407c478bd9Sstevel@tonic-gate /*
4417c478bd9Sstevel@tonic-gate ** Unlock the database and clear the in-memory cache.  This routine
4427c478bd9Sstevel@tonic-gate ** sets the state of the pager back to what it was when it was first
4437c478bd9Sstevel@tonic-gate ** opened.  Any outstanding pages are invalidated and subsequent attempts
4447c478bd9Sstevel@tonic-gate ** to access those pages will likely result in a coredump.
4457c478bd9Sstevel@tonic-gate */
pager_reset(Pager * pPager)4467c478bd9Sstevel@tonic-gate static void pager_reset(Pager *pPager){
4477c478bd9Sstevel@tonic-gate   PgHdr *pPg, *pNext;
4487c478bd9Sstevel@tonic-gate   for(pPg=pPager->pAll; pPg; pPg=pNext){
4497c478bd9Sstevel@tonic-gate     pNext = pPg->pNextAll;
4507c478bd9Sstevel@tonic-gate     sqliteFree(pPg);
4517c478bd9Sstevel@tonic-gate   }
4527c478bd9Sstevel@tonic-gate   pPager->pFirst = 0;
4537c478bd9Sstevel@tonic-gate   pPager->pFirstSynced = 0;
4547c478bd9Sstevel@tonic-gate   pPager->pLast = 0;
4557c478bd9Sstevel@tonic-gate   pPager->pAll = 0;
4567c478bd9Sstevel@tonic-gate   memset(pPager->aHash, 0, sizeof(pPager->aHash));
4577c478bd9Sstevel@tonic-gate   pPager->nPage = 0;
4587c478bd9Sstevel@tonic-gate   if( pPager->state>=SQLITE_WRITELOCK ){
4597c478bd9Sstevel@tonic-gate     sqlitepager_rollback(pPager);
4607c478bd9Sstevel@tonic-gate   }
4617c478bd9Sstevel@tonic-gate   sqliteOsUnlock(&pPager->fd);
4627c478bd9Sstevel@tonic-gate   pPager->state = SQLITE_UNLOCK;
4637c478bd9Sstevel@tonic-gate   pPager->dbSize = -1;
4647c478bd9Sstevel@tonic-gate   pPager->nRef = 0;
4657c478bd9Sstevel@tonic-gate   assert( pPager->journalOpen==0 );
4667c478bd9Sstevel@tonic-gate }
4677c478bd9Sstevel@tonic-gate 
4687c478bd9Sstevel@tonic-gate /*
4697c478bd9Sstevel@tonic-gate ** When this routine is called, the pager has the journal file open and
4707c478bd9Sstevel@tonic-gate ** a write lock on the database.  This routine releases the database
4717c478bd9Sstevel@tonic-gate ** write lock and acquires a read lock in its place.  The journal file
4727c478bd9Sstevel@tonic-gate ** is deleted and closed.
4737c478bd9Sstevel@tonic-gate **
4747c478bd9Sstevel@tonic-gate ** TODO: Consider keeping the journal file open for temporary databases.
4757c478bd9Sstevel@tonic-gate ** This might give a performance improvement on windows where opening
4767c478bd9Sstevel@tonic-gate ** a file is an expensive operation.
4777c478bd9Sstevel@tonic-gate */
pager_unwritelock(Pager * pPager)4787c478bd9Sstevel@tonic-gate static int pager_unwritelock(Pager *pPager){
4797c478bd9Sstevel@tonic-gate   int rc;
4807c478bd9Sstevel@tonic-gate   PgHdr *pPg;
4817c478bd9Sstevel@tonic-gate   if( pPager->state<SQLITE_WRITELOCK ) return SQLITE_OK;
4827c478bd9Sstevel@tonic-gate   sqlitepager_ckpt_commit(pPager);
4837c478bd9Sstevel@tonic-gate   if( pPager->ckptOpen ){
4847c478bd9Sstevel@tonic-gate     sqliteOsClose(&pPager->cpfd);
4857c478bd9Sstevel@tonic-gate     pPager->ckptOpen = 0;
4867c478bd9Sstevel@tonic-gate   }
4877c478bd9Sstevel@tonic-gate   if( pPager->journalOpen ){
4887c478bd9Sstevel@tonic-gate     sqliteOsClose(&pPager->jfd);
4897c478bd9Sstevel@tonic-gate     pPager->journalOpen = 0;
4907c478bd9Sstevel@tonic-gate     sqliteOsDelete(pPager->zJournal);
4917c478bd9Sstevel@tonic-gate     sqliteFree( pPager->aInJournal );
4927c478bd9Sstevel@tonic-gate     pPager->aInJournal = 0;
4937c478bd9Sstevel@tonic-gate     for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
4947c478bd9Sstevel@tonic-gate       pPg->inJournal = 0;
4957c478bd9Sstevel@tonic-gate       pPg->dirty = 0;
4967c478bd9Sstevel@tonic-gate       pPg->needSync = 0;
4977c478bd9Sstevel@tonic-gate     }
4987c478bd9Sstevel@tonic-gate   }else{
4997c478bd9Sstevel@tonic-gate     assert( pPager->dirtyFile==0 || pPager->useJournal==0 );
5007c478bd9Sstevel@tonic-gate   }
5017c478bd9Sstevel@tonic-gate   rc = sqliteOsReadLock(&pPager->fd);
5027c478bd9Sstevel@tonic-gate   if( rc==SQLITE_OK ){
5037c478bd9Sstevel@tonic-gate     pPager->state = SQLITE_READLOCK;
5047c478bd9Sstevel@tonic-gate   }else{
5057c478bd9Sstevel@tonic-gate     /* This can only happen if a process does a BEGIN, then forks and the
5067c478bd9Sstevel@tonic-gate     ** child process does the COMMIT.  Because of the semantics of unix
5077c478bd9Sstevel@tonic-gate     ** file locking, the unlock will fail.
5087c478bd9Sstevel@tonic-gate     */
5097c478bd9Sstevel@tonic-gate     pPager->state = SQLITE_UNLOCK;
5107c478bd9Sstevel@tonic-gate   }
5117c478bd9Sstevel@tonic-gate   return rc;
5127c478bd9Sstevel@tonic-gate }
5137c478bd9Sstevel@tonic-gate 
5147c478bd9Sstevel@tonic-gate /*
5157c478bd9Sstevel@tonic-gate ** Compute and return a checksum for the page of data.
5167c478bd9Sstevel@tonic-gate **
517*1da57d55SToomas Soome ** This is not a real checksum.  It is really just the sum of the
5187c478bd9Sstevel@tonic-gate ** random initial value and the page number.  We considered do a checksum
5197c478bd9Sstevel@tonic-gate ** of the database, but that was found to be too slow.
5207c478bd9Sstevel@tonic-gate */
pager_cksum(Pager * pPager,Pgno pgno,const char * aData)5217c478bd9Sstevel@tonic-gate static u32 pager_cksum(Pager *pPager, Pgno pgno, const char *aData){
5227c478bd9Sstevel@tonic-gate   u32 cksum = pPager->cksumInit + pgno;
5237c478bd9Sstevel@tonic-gate   return cksum;
5247c478bd9Sstevel@tonic-gate }
5257c478bd9Sstevel@tonic-gate 
5267c478bd9Sstevel@tonic-gate /*
5277c478bd9Sstevel@tonic-gate ** Read a single page from the journal file opened on file descriptor
5287c478bd9Sstevel@tonic-gate ** jfd.  Playback this one page.
5297c478bd9Sstevel@tonic-gate **
5307c478bd9Sstevel@tonic-gate ** There are three different journal formats.  The format parameter determines
5317c478bd9Sstevel@tonic-gate ** which format is used by the journal that is played back.
5327c478bd9Sstevel@tonic-gate */
pager_playback_one_page(Pager * pPager,OsFile * jfd,int format)5337c478bd9Sstevel@tonic-gate static int pager_playback_one_page(Pager *pPager, OsFile *jfd, int format){
5347c478bd9Sstevel@tonic-gate   int rc;
5357c478bd9Sstevel@tonic-gate   PgHdr *pPg;              /* An existing page in the cache */
5367c478bd9Sstevel@tonic-gate   PageRecord pgRec;
5377c478bd9Sstevel@tonic-gate   u32 cksum;
5387c478bd9Sstevel@tonic-gate 
5397c478bd9Sstevel@tonic-gate   rc = read32bits(format, jfd, &pgRec.pgno);
5407c478bd9Sstevel@tonic-gate   if( rc!=SQLITE_OK ) return rc;
5417c478bd9Sstevel@tonic-gate   rc = sqliteOsRead(jfd, &pgRec.aData, sizeof(pgRec.aData));
5427c478bd9Sstevel@tonic-gate   if( rc!=SQLITE_OK ) return rc;
5437c478bd9Sstevel@tonic-gate 
5447c478bd9Sstevel@tonic-gate   /* Sanity checking on the page.  This is more important that I originally
5457c478bd9Sstevel@tonic-gate   ** thought.  If a power failure occurs while the journal is being written,
5467c478bd9Sstevel@tonic-gate   ** it could cause invalid data to be written into the journal.  We need to
5477c478bd9Sstevel@tonic-gate   ** detect this invalid data (with high probability) and ignore it.
5487c478bd9Sstevel@tonic-gate   */
5497c478bd9Sstevel@tonic-gate   if( pgRec.pgno==0 ){
5507c478bd9Sstevel@tonic-gate     return SQLITE_DONE;
5517c478bd9Sstevel@tonic-gate   }
5527c478bd9Sstevel@tonic-gate   if( pgRec.pgno>(unsigned)pPager->dbSize ){
5537c478bd9Sstevel@tonic-gate     return SQLITE_OK;
5547c478bd9Sstevel@tonic-gate   }
5557c478bd9Sstevel@tonic-gate   if( format>=JOURNAL_FORMAT_3 ){
5567c478bd9Sstevel@tonic-gate     rc = read32bits(format, jfd, &cksum);
5577c478bd9Sstevel@tonic-gate     if( rc ) return rc;
5587c478bd9Sstevel@tonic-gate     if( pager_cksum(pPager, pgRec.pgno, pgRec.aData)!=cksum ){
5597c478bd9Sstevel@tonic-gate       return SQLITE_DONE;
5607c478bd9Sstevel@tonic-gate     }
5617c478bd9Sstevel@tonic-gate   }
5627c478bd9Sstevel@tonic-gate 
5637c478bd9Sstevel@tonic-gate   /* Playback the page.  Update the in-memory copy of the page
5647c478bd9Sstevel@tonic-gate   ** at the same time, if there is one.
5657c478bd9Sstevel@tonic-gate   */
5667c478bd9Sstevel@tonic-gate   pPg = pager_lookup(pPager, pgRec.pgno);
5677c478bd9Sstevel@tonic-gate   TRACE2("PLAYBACK %d\n", pgRec.pgno);
5687c478bd9Sstevel@tonic-gate   sqliteOsSeek(&pPager->fd, (pgRec.pgno-1)*(off_t)SQLITE_PAGE_SIZE);
5697c478bd9Sstevel@tonic-gate   rc = sqliteOsWrite(&pPager->fd, pgRec.aData, SQLITE_PAGE_SIZE);
5707c478bd9Sstevel@tonic-gate   if( pPg ){
5717c478bd9Sstevel@tonic-gate     /* No page should ever be rolled back that is in use, except for page
5727c478bd9Sstevel@tonic-gate     ** 1 which is held in use in order to keep the lock on the database
5737c478bd9Sstevel@tonic-gate     ** active.  However, such a page may be rolled back as a result of an
5747c478bd9Sstevel@tonic-gate     ** internal error resulting in an automatic call to
5757c478bd9Sstevel@tonic-gate     ** sqlitepager_rollback(), so we can't assert() it.
5767c478bd9Sstevel@tonic-gate     */
5777c478bd9Sstevel@tonic-gate     /* assert( pPg->nRef==0 || pPg->pgno==1 ) */
5787c478bd9Sstevel@tonic-gate     memcpy(PGHDR_TO_DATA(pPg), pgRec.aData, SQLITE_PAGE_SIZE);
5797c478bd9Sstevel@tonic-gate     memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
5807c478bd9Sstevel@tonic-gate     pPg->dirty = 0;
5817c478bd9Sstevel@tonic-gate     pPg->needSync = 0;
5827c478bd9Sstevel@tonic-gate     CODEC(pPager, PGHDR_TO_DATA(pPg), pPg->pgno, 3);
5837c478bd9Sstevel@tonic-gate   }
5847c478bd9Sstevel@tonic-gate   return rc;
5857c478bd9Sstevel@tonic-gate }
5867c478bd9Sstevel@tonic-gate 
5877c478bd9Sstevel@tonic-gate /*
5887c478bd9Sstevel@tonic-gate ** Playback the journal and thus restore the database file to
589*1da57d55SToomas Soome ** the state it was in before we started making changes.
5907c478bd9Sstevel@tonic-gate **
591*1da57d55SToomas Soome ** The journal file format is as follows:
5927c478bd9Sstevel@tonic-gate **
5937c478bd9Sstevel@tonic-gate **    *  8 byte prefix.  One of the aJournalMagic123 vectors defined
5947c478bd9Sstevel@tonic-gate **       above.  The format of the journal file is determined by which
5957c478bd9Sstevel@tonic-gate **       of the three prefix vectors is seen.
5967c478bd9Sstevel@tonic-gate **    *  4 byte big-endian integer which is the number of valid page records
5977c478bd9Sstevel@tonic-gate **       in the journal.  If this value is 0xffffffff, then compute the
5987c478bd9Sstevel@tonic-gate **       number of page records from the journal size.  This field appears
5997c478bd9Sstevel@tonic-gate **       in format 3 only.
600*1da57d55SToomas Soome **    *  4 byte big-endian integer which is the initial value for the
6017c478bd9Sstevel@tonic-gate **       sanity checksum.  This field appears in format 3 only.
6027c478bd9Sstevel@tonic-gate **    *  4 byte integer which is the number of pages to truncate the
6037c478bd9Sstevel@tonic-gate **       database to during a rollback.
6047c478bd9Sstevel@tonic-gate **    *  Zero or more pages instances, each as follows:
6057c478bd9Sstevel@tonic-gate **        +  4 byte page number.
6067c478bd9Sstevel@tonic-gate **        +  SQLITE_PAGE_SIZE bytes of data.
6077c478bd9Sstevel@tonic-gate **        +  4 byte checksum (format 3 only)
6087c478bd9Sstevel@tonic-gate **
6097c478bd9Sstevel@tonic-gate ** When we speak of the journal header, we mean the first 4 bullets above.
6107c478bd9Sstevel@tonic-gate ** Each entry in the journal is an instance of the 5th bullet.  Note that
6117c478bd9Sstevel@tonic-gate ** bullets 2 and 3 only appear in format-3 journals.
6127c478bd9Sstevel@tonic-gate **
6137c478bd9Sstevel@tonic-gate ** Call the value from the second bullet "nRec".  nRec is the number of
6147c478bd9Sstevel@tonic-gate ** valid page entries in the journal.  In most cases, you can compute the
6157c478bd9Sstevel@tonic-gate ** value of nRec from the size of the journal file.  But if a power
6167c478bd9Sstevel@tonic-gate ** failure occurred while the journal was being written, it could be the
6177c478bd9Sstevel@tonic-gate ** case that the size of the journal file had already been increased but
6187c478bd9Sstevel@tonic-gate ** the extra entries had not yet made it safely to disk.  In such a case,
6197c478bd9Sstevel@tonic-gate ** the value of nRec computed from the file size would be too large.  For
6207c478bd9Sstevel@tonic-gate ** that reason, we always use the nRec value in the header.
6217c478bd9Sstevel@tonic-gate **
6227c478bd9Sstevel@tonic-gate ** If the nRec value is 0xffffffff it means that nRec should be computed
6237c478bd9Sstevel@tonic-gate ** from the file size.  This value is used when the user selects the
6247c478bd9Sstevel@tonic-gate ** no-sync option for the journal.  A power failure could lead to corruption
6257c478bd9Sstevel@tonic-gate ** in this case.  But for things like temporary table (which will be
626*1da57d55SToomas Soome ** deleted when the power is restored) we don't care.
6277c478bd9Sstevel@tonic-gate **
6287c478bd9Sstevel@tonic-gate ** Journal formats 1 and 2 do not have an nRec value in the header so we
6297c478bd9Sstevel@tonic-gate ** have to compute nRec from the file size.  This has risks (as described
6307c478bd9Sstevel@tonic-gate ** above) which is why all persistent tables have been changed to use
6317c478bd9Sstevel@tonic-gate ** format 3.
6327c478bd9Sstevel@tonic-gate **
6337c478bd9Sstevel@tonic-gate ** If the file opened as the journal file is not a well-formed
6347c478bd9Sstevel@tonic-gate ** journal file then the database will likely already be
6357c478bd9Sstevel@tonic-gate ** corrupted, so the PAGER_ERR_CORRUPT bit is set in pPager->errMask
6367c478bd9Sstevel@tonic-gate ** and SQLITE_CORRUPT is returned.  If it all works, then this routine
6377c478bd9Sstevel@tonic-gate ** returns SQLITE_OK.
6387c478bd9Sstevel@tonic-gate */
pager_playback(Pager * pPager,int useJournalSize)6397c478bd9Sstevel@tonic-gate static int pager_playback(Pager *pPager, int useJournalSize){
6407c478bd9Sstevel@tonic-gate   off_t szJ;               /* Size of the journal file in bytes */
6417c478bd9Sstevel@tonic-gate   int nRec;                /* Number of Records in the journal */
6427c478bd9Sstevel@tonic-gate   int i;                   /* Loop counter */
6437c478bd9Sstevel@tonic-gate   Pgno mxPg = 0;           /* Size of the original file in pages */
6447c478bd9Sstevel@tonic-gate   int format;              /* Format of the journal file. */
6457c478bd9Sstevel@tonic-gate   unsigned char aMagic[sizeof(aJournalMagic1)];
6467c478bd9Sstevel@tonic-gate   int rc;
6477c478bd9Sstevel@tonic-gate 
6487c478bd9Sstevel@tonic-gate   /* Figure out how many records are in the journal.  Abort early if
6497c478bd9Sstevel@tonic-gate   ** the journal is empty.
6507c478bd9Sstevel@tonic-gate   */
6517c478bd9Sstevel@tonic-gate   assert( pPager->journalOpen );
6527c478bd9Sstevel@tonic-gate   sqliteOsSeek(&pPager->jfd, 0);
6537c478bd9Sstevel@tonic-gate   rc = sqliteOsFileSize(&pPager->jfd, &szJ);
6547c478bd9Sstevel@tonic-gate   if( rc!=SQLITE_OK ){
6557c478bd9Sstevel@tonic-gate     goto end_playback;
6567c478bd9Sstevel@tonic-gate   }
6577c478bd9Sstevel@tonic-gate 
6587c478bd9Sstevel@tonic-gate   /* If the journal file is too small to contain a complete header,
6597c478bd9Sstevel@tonic-gate   ** it must mean that the process that created the journal was just
6607c478bd9Sstevel@tonic-gate   ** beginning to write the journal file when it died.  In that case,
6617c478bd9Sstevel@tonic-gate   ** the database file should have still been completely unchanged.
6627c478bd9Sstevel@tonic-gate   ** Nothing needs to be rolled back.  We can safely ignore this journal.
6637c478bd9Sstevel@tonic-gate   */
6647c478bd9Sstevel@tonic-gate   if( szJ < sizeof(aMagic)+sizeof(Pgno) ){
6657c478bd9Sstevel@tonic-gate     goto end_playback;
6667c478bd9Sstevel@tonic-gate   }
6677c478bd9Sstevel@tonic-gate 
6687c478bd9Sstevel@tonic-gate   /* Read the beginning of the journal and truncate the
6697c478bd9Sstevel@tonic-gate   ** database file back to its original size.
6707c478bd9Sstevel@tonic-gate   */
6717c478bd9Sstevel@tonic-gate   rc = sqliteOsRead(&pPager->jfd, aMagic, sizeof(aMagic));
6727c478bd9Sstevel@tonic-gate   if( rc!=SQLITE_OK ){
6737c478bd9Sstevel@tonic-gate     rc = SQLITE_PROTOCOL;
6747c478bd9Sstevel@tonic-gate     goto end_playback;
6757c478bd9Sstevel@tonic-gate   }
6767c478bd9Sstevel@tonic-gate   if( memcmp(aMagic, aJournalMagic3, sizeof(aMagic))==0 ){
6777c478bd9Sstevel@tonic-gate     format = JOURNAL_FORMAT_3;
6787c478bd9Sstevel@tonic-gate   }else if( memcmp(aMagic, aJournalMagic2, sizeof(aMagic))==0 ){
6797c478bd9Sstevel@tonic-gate     format = JOURNAL_FORMAT_2;
6807c478bd9Sstevel@tonic-gate   }else if( memcmp(aMagic, aJournalMagic1, sizeof(aMagic))==0 ){
6817c478bd9Sstevel@tonic-gate     format = JOURNAL_FORMAT_1;
6827c478bd9Sstevel@tonic-gate   }else{
6837c478bd9Sstevel@tonic-gate     rc = SQLITE_PROTOCOL;
6847c478bd9Sstevel@tonic-gate     goto end_playback;
6857c478bd9Sstevel@tonic-gate   }
6867c478bd9Sstevel@tonic-gate   if( format>=JOURNAL_FORMAT_3 ){
6877c478bd9Sstevel@tonic-gate     if( szJ < sizeof(aMagic) + 3*sizeof(u32) ){
6887c478bd9Sstevel@tonic-gate       /* Ignore the journal if it is too small to contain a complete
6897c478bd9Sstevel@tonic-gate       ** header.  We already did this test once above, but at the prior
6907c478bd9Sstevel@tonic-gate       ** test, we did not know the journal format and so we had to assume
6917c478bd9Sstevel@tonic-gate       ** the smallest possible header.  Now we know the header is bigger
6927c478bd9Sstevel@tonic-gate       ** than the minimum so we test again.
6937c478bd9Sstevel@tonic-gate       */
6947c478bd9Sstevel@tonic-gate       goto end_playback;
6957c478bd9Sstevel@tonic-gate     }
6967c478bd9Sstevel@tonic-gate     rc = read32bits(format, &pPager->jfd, (u32*)&nRec);
6977c478bd9Sstevel@tonic-gate     if( rc ) goto end_playback;
6987c478bd9Sstevel@tonic-gate     rc = read32bits(format, &pPager->jfd, &pPager->cksumInit);
6997c478bd9Sstevel@tonic-gate     if( rc ) goto end_playback;
7007c478bd9Sstevel@tonic-gate     if( nRec==0xffffffff || useJournalSize ){
7017c478bd9Sstevel@tonic-gate       nRec = (szJ - JOURNAL_HDR_SZ(3))/JOURNAL_PG_SZ(3);
7027c478bd9Sstevel@tonic-gate     }
7037c478bd9Sstevel@tonic-gate   }else{
7047c478bd9Sstevel@tonic-gate     nRec = (szJ - JOURNAL_HDR_SZ(2))/JOURNAL_PG_SZ(2);
7057c478bd9Sstevel@tonic-gate     assert( nRec*JOURNAL_PG_SZ(2)+JOURNAL_HDR_SZ(2)==szJ );
7067c478bd9Sstevel@tonic-gate   }
7077c478bd9Sstevel@tonic-gate   rc = read32bits(format, &pPager->jfd, &mxPg);
7087c478bd9Sstevel@tonic-gate   if( rc!=SQLITE_OK ){
7097c478bd9Sstevel@tonic-gate     goto end_playback;
7107c478bd9Sstevel@tonic-gate   }
7117c478bd9Sstevel@tonic-gate   assert( pPager->origDbSize==0 || pPager->origDbSize==mxPg );
7127c478bd9Sstevel@tonic-gate   rc = sqliteOsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)mxPg);
7137c478bd9Sstevel@tonic-gate   if( rc!=SQLITE_OK ){
7147c478bd9Sstevel@tonic-gate     goto end_playback;
7157c478bd9Sstevel@tonic-gate   }
7167c478bd9Sstevel@tonic-gate   pPager->dbSize = mxPg;
717*1da57d55SToomas Soome 
7187c478bd9Sstevel@tonic-gate   /* Copy original pages out of the journal and back into the database file.
7197c478bd9Sstevel@tonic-gate   */
7207c478bd9Sstevel@tonic-gate   for(i=0; i<nRec; i++){
7217c478bd9Sstevel@tonic-gate     rc = pager_playback_one_page(pPager, &pPager->jfd, format);
7227c478bd9Sstevel@tonic-gate     if( rc!=SQLITE_OK ){
7237c478bd9Sstevel@tonic-gate       if( rc==SQLITE_DONE ){
7247c478bd9Sstevel@tonic-gate         rc = SQLITE_OK;
7257c478bd9Sstevel@tonic-gate       }
7267c478bd9Sstevel@tonic-gate       break;
7277c478bd9Sstevel@tonic-gate     }
7287c478bd9Sstevel@tonic-gate   }
7297c478bd9Sstevel@tonic-gate 
7307c478bd9Sstevel@tonic-gate   /* Pages that have been written to the journal but never synced
7317c478bd9Sstevel@tonic-gate   ** where not restored by the loop above.  We have to restore those
7327c478bd9Sstevel@tonic-gate   ** pages by reading them back from the original database.
7337c478bd9Sstevel@tonic-gate   */
7347c478bd9Sstevel@tonic-gate   if( rc==SQLITE_OK ){
7357c478bd9Sstevel@tonic-gate     PgHdr *pPg;
7367c478bd9Sstevel@tonic-gate     for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
7377c478bd9Sstevel@tonic-gate       char zBuf[SQLITE_PAGE_SIZE];
7387c478bd9Sstevel@tonic-gate       if( !pPg->dirty ) continue;
7397c478bd9Sstevel@tonic-gate       if( (int)pPg->pgno <= pPager->origDbSize ){
7407c478bd9Sstevel@tonic-gate         sqliteOsSeek(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)(pPg->pgno-1));
7417c478bd9Sstevel@tonic-gate         rc = sqliteOsRead(&pPager->fd, zBuf, SQLITE_PAGE_SIZE);
7427c478bd9Sstevel@tonic-gate         TRACE2("REFETCH %d\n", pPg->pgno);
7437c478bd9Sstevel@tonic-gate         CODEC(pPager, zBuf, pPg->pgno, 2);
7447c478bd9Sstevel@tonic-gate         if( rc ) break;
7457c478bd9Sstevel@tonic-gate       }else{
7467c478bd9Sstevel@tonic-gate         memset(zBuf, 0, SQLITE_PAGE_SIZE);
7477c478bd9Sstevel@tonic-gate       }
7487c478bd9Sstevel@tonic-gate       if( pPg->nRef==0 || memcmp(zBuf, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE) ){
7497c478bd9Sstevel@tonic-gate         memcpy(PGHDR_TO_DATA(pPg), zBuf, SQLITE_PAGE_SIZE);
7507c478bd9Sstevel@tonic-gate         memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
7517c478bd9Sstevel@tonic-gate       }
7527c478bd9Sstevel@tonic-gate       pPg->needSync = 0;
7537c478bd9Sstevel@tonic-gate       pPg->dirty = 0;
7547c478bd9Sstevel@tonic-gate     }
7557c478bd9Sstevel@tonic-gate   }
7567c478bd9Sstevel@tonic-gate 
7577c478bd9Sstevel@tonic-gate end_playback:
7587c478bd9Sstevel@tonic-gate   if( rc!=SQLITE_OK ){
7597c478bd9Sstevel@tonic-gate     pager_unwritelock(pPager);
7607c478bd9Sstevel@tonic-gate     pPager->errMask |= PAGER_ERR_CORRUPT;
7617c478bd9Sstevel@tonic-gate     rc = SQLITE_CORRUPT;
7627c478bd9Sstevel@tonic-gate   }else{
7637c478bd9Sstevel@tonic-gate     rc = pager_unwritelock(pPager);
7647c478bd9Sstevel@tonic-gate   }
7657c478bd9Sstevel@tonic-gate   return rc;
7667c478bd9Sstevel@tonic-gate }
7677c478bd9Sstevel@tonic-gate 
7687c478bd9Sstevel@tonic-gate /*
7697c478bd9Sstevel@tonic-gate ** Playback the checkpoint journal.
7707c478bd9Sstevel@tonic-gate **
7717c478bd9Sstevel@tonic-gate ** This is similar to playing back the transaction journal but with
7727c478bd9Sstevel@tonic-gate ** a few extra twists.
7737c478bd9Sstevel@tonic-gate **
7747c478bd9Sstevel@tonic-gate **    (1)  The number of pages in the database file at the start of
7757c478bd9Sstevel@tonic-gate **         the checkpoint is stored in pPager->ckptSize, not in the
7767c478bd9Sstevel@tonic-gate **         journal file itself.
7777c478bd9Sstevel@tonic-gate **
7787c478bd9Sstevel@tonic-gate **    (2)  In addition to playing back the checkpoint journal, also
7797c478bd9Sstevel@tonic-gate **         playback all pages of the transaction journal beginning
7807c478bd9Sstevel@tonic-gate **         at offset pPager->ckptJSize.
7817c478bd9Sstevel@tonic-gate */
pager_ckpt_playback(Pager * pPager)7827c478bd9Sstevel@tonic-gate static int pager_ckpt_playback(Pager *pPager){
7837c478bd9Sstevel@tonic-gate   off_t szJ;               /* Size of the full journal */
7847c478bd9Sstevel@tonic-gate   int nRec;                /* Number of Records */
7857c478bd9Sstevel@tonic-gate   int i;                   /* Loop counter */
7867c478bd9Sstevel@tonic-gate   int rc;
7877c478bd9Sstevel@tonic-gate 
7887c478bd9Sstevel@tonic-gate   /* Truncate the database back to its original size.
7897c478bd9Sstevel@tonic-gate   */
7907c478bd9Sstevel@tonic-gate   rc = sqliteOsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)pPager->ckptSize);
7917c478bd9Sstevel@tonic-gate   pPager->dbSize = pPager->ckptSize;
7927c478bd9Sstevel@tonic-gate 
7937c478bd9Sstevel@tonic-gate   /* Figure out how many records are in the checkpoint journal.
7947c478bd9Sstevel@tonic-gate   */
7957c478bd9Sstevel@tonic-gate   assert( pPager->ckptInUse && pPager->journalOpen );
7967c478bd9Sstevel@tonic-gate   sqliteOsSeek(&pPager->cpfd, 0);
7977c478bd9Sstevel@tonic-gate   nRec = pPager->ckptNRec;
798*1da57d55SToomas Soome 
7997c478bd9Sstevel@tonic-gate   /* Copy original pages out of the checkpoint journal and back into the
8007c478bd9Sstevel@tonic-gate   ** database file.  Note that the checkpoint journal always uses format
8017c478bd9Sstevel@tonic-gate   ** 2 instead of format 3 since it does not need to be concerned with
8027c478bd9Sstevel@tonic-gate   ** power failures corrupting the journal and can thus omit the checksums.
8037c478bd9Sstevel@tonic-gate   */
8047c478bd9Sstevel@tonic-gate   for(i=nRec-1; i>=0; i--){
8057c478bd9Sstevel@tonic-gate     rc = pager_playback_one_page(pPager, &pPager->cpfd, 2);
8067c478bd9Sstevel@tonic-gate     assert( rc!=SQLITE_DONE );
8077c478bd9Sstevel@tonic-gate     if( rc!=SQLITE_OK ) goto end_ckpt_playback;
8087c478bd9Sstevel@tonic-gate   }
8097c478bd9Sstevel@tonic-gate 
8107c478bd9Sstevel@tonic-gate   /* Figure out how many pages need to be copied out of the transaction
8117c478bd9Sstevel@tonic-gate   ** journal.
8127c478bd9Sstevel@tonic-gate   */
8137c478bd9Sstevel@tonic-gate   rc = sqliteOsSeek(&pPager->jfd, pPager->ckptJSize);
8147c478bd9Sstevel@tonic-gate   if( rc!=SQLITE_OK ){
8157c478bd9Sstevel@tonic-gate     goto end_ckpt_playback;
8167c478bd9Sstevel@tonic-gate   }
8177c478bd9Sstevel@tonic-gate   rc = sqliteOsFileSize(&pPager->jfd, &szJ);
8187c478bd9Sstevel@tonic-gate   if( rc!=SQLITE_OK ){
8197c478bd9Sstevel@tonic-gate     goto end_ckpt_playback;
8207c478bd9Sstevel@tonic-gate   }
8217c478bd9Sstevel@tonic-gate   nRec = (szJ - pPager->ckptJSize)/JOURNAL_PG_SZ(journal_format);
8227c478bd9Sstevel@tonic-gate   for(i=nRec-1; i>=0; i--){
8237c478bd9Sstevel@tonic-gate     rc = pager_playback_one_page(pPager, &pPager->jfd, journal_format);
8247c478bd9Sstevel@tonic-gate     if( rc!=SQLITE_OK ){
8257c478bd9Sstevel@tonic-gate       assert( rc!=SQLITE_DONE );
8267c478bd9Sstevel@tonic-gate       goto end_ckpt_playback;
8277c478bd9Sstevel@tonic-gate     }
8287c478bd9Sstevel@tonic-gate   }
829*1da57d55SToomas Soome 
8307c478bd9Sstevel@tonic-gate end_ckpt_playback:
8317c478bd9Sstevel@tonic-gate   if( rc!=SQLITE_OK ){
8327c478bd9Sstevel@tonic-gate     pPager->errMask |= PAGER_ERR_CORRUPT;
8337c478bd9Sstevel@tonic-gate     rc = SQLITE_CORRUPT;
8347c478bd9Sstevel@tonic-gate   }
8357c478bd9Sstevel@tonic-gate   return rc;
8367c478bd9Sstevel@tonic-gate }
8377c478bd9Sstevel@tonic-gate 
8387c478bd9Sstevel@tonic-gate /*
8397c478bd9Sstevel@tonic-gate ** Change the maximum number of in-memory pages that are allowed.
8407c478bd9Sstevel@tonic-gate **
8417c478bd9Sstevel@tonic-gate ** The maximum number is the absolute value of the mxPage parameter.
8427c478bd9Sstevel@tonic-gate ** If mxPage is negative, the noSync flag is also set.  noSync bypasses
8437c478bd9Sstevel@tonic-gate ** calls to sqliteOsSync().  The pager runs much faster with noSync on,
844*1da57d55SToomas Soome ** but if the operating system crashes or there is an abrupt power
8457c478bd9Sstevel@tonic-gate ** failure, the database file might be left in an inconsistent and
846*1da57d55SToomas Soome ** unrepairable state.
8477c478bd9Sstevel@tonic-gate */
sqlitepager_set_cachesize(Pager * pPager,int mxPage)8487c478bd9Sstevel@tonic-gate void sqlitepager_set_cachesize(Pager *pPager, int mxPage){
8497c478bd9Sstevel@tonic-gate   if( mxPage>=0 ){
8507c478bd9Sstevel@tonic-gate     pPager->noSync = pPager->tempFile;
8517c478bd9Sstevel@tonic-gate     if( pPager->noSync==0 ) pPager->needSync = 0;
8527c478bd9Sstevel@tonic-gate   }else{
8537c478bd9Sstevel@tonic-gate     pPager->noSync = 1;
8547c478bd9Sstevel@tonic-gate     mxPage = -mxPage;
8557c478bd9Sstevel@tonic-gate   }
8567c478bd9Sstevel@tonic-gate   if( mxPage>10 ){
8577c478bd9Sstevel@tonic-gate     pPager->mxPage = mxPage;
8587c478bd9Sstevel@tonic-gate   }
8597c478bd9Sstevel@tonic-gate }
8607c478bd9Sstevel@tonic-gate 
8617c478bd9Sstevel@tonic-gate /*
8627c478bd9Sstevel@tonic-gate ** Adjust the robustness of the database to damage due to OS crashes
8637c478bd9Sstevel@tonic-gate ** or power failures by changing the number of syncs()s when writing
8647c478bd9Sstevel@tonic-gate ** the rollback journal.  There are three levels:
8657c478bd9Sstevel@tonic-gate **
8667c478bd9Sstevel@tonic-gate **    OFF       sqliteOsSync() is never called.  This is the default
8677c478bd9Sstevel@tonic-gate **              for temporary and transient files.
8687c478bd9Sstevel@tonic-gate **
8697c478bd9Sstevel@tonic-gate **    NORMAL    The journal is synced once before writes begin on the
8707c478bd9Sstevel@tonic-gate **              database.  This is normally adequate protection, but
8717c478bd9Sstevel@tonic-gate **              it is theoretically possible, though very unlikely,
8727c478bd9Sstevel@tonic-gate **              that an inopertune power failure could leave the journal
8737c478bd9Sstevel@tonic-gate **              in a state which would cause damage to the database
8747c478bd9Sstevel@tonic-gate **              when it is rolled back.
8757c478bd9Sstevel@tonic-gate **
8767c478bd9Sstevel@tonic-gate **    FULL      The journal is synced twice before writes begin on the
8777c478bd9Sstevel@tonic-gate **              database (with some additional information - the nRec field
8787c478bd9Sstevel@tonic-gate **              of the journal header - being written in between the two
8797c478bd9Sstevel@tonic-gate **              syncs).  If we assume that writing a
8807c478bd9Sstevel@tonic-gate **              single disk sector is atomic, then this mode provides
8817c478bd9Sstevel@tonic-gate **              assurance that the journal will not be corrupted to the
8827c478bd9Sstevel@tonic-gate **              point of causing damage to the database during rollback.
8837c478bd9Sstevel@tonic-gate **
8847c478bd9Sstevel@tonic-gate ** Numeric values associated with these states are OFF==1, NORMAL=2,
8857c478bd9Sstevel@tonic-gate ** and FULL=3.
8867c478bd9Sstevel@tonic-gate */
sqlitepager_set_safety_level(Pager * pPager,int level)8877c478bd9Sstevel@tonic-gate void sqlitepager_set_safety_level(Pager *pPager, int level){
8887c478bd9Sstevel@tonic-gate   pPager->noSync =  level==1 || pPager->tempFile;
8897c478bd9Sstevel@tonic-gate   pPager->fullSync = level==3 && !pPager->tempFile;
8907c478bd9Sstevel@tonic-gate   if( pPager->noSync==0 ) pPager->needSync = 0;
8917c478bd9Sstevel@tonic-gate }
8927c478bd9Sstevel@tonic-gate 
8937c478bd9Sstevel@tonic-gate /*
8947c478bd9Sstevel@tonic-gate ** Open a temporary file.  Write the name of the file into zName
8957c478bd9Sstevel@tonic-gate ** (zName must be at least SQLITE_TEMPNAME_SIZE bytes long.)  Write
8967c478bd9Sstevel@tonic-gate ** the file descriptor into *fd.  Return SQLITE_OK on success or some
8977c478bd9Sstevel@tonic-gate ** other error code if we fail.
8987c478bd9Sstevel@tonic-gate **
8997c478bd9Sstevel@tonic-gate ** The OS will automatically delete the temporary file when it is
9007c478bd9Sstevel@tonic-gate ** closed.
9017c478bd9Sstevel@tonic-gate */
sqlitepager_opentemp(char * zFile,OsFile * fd)9027c478bd9Sstevel@tonic-gate static int sqlitepager_opentemp(char *zFile, OsFile *fd){
9037c478bd9Sstevel@tonic-gate   int cnt = 8;
9047c478bd9Sstevel@tonic-gate   int rc;
9057c478bd9Sstevel@tonic-gate   do{
9067c478bd9Sstevel@tonic-gate     cnt--;
9077c478bd9Sstevel@tonic-gate     sqliteOsTempFileName(zFile);
9087c478bd9Sstevel@tonic-gate     rc = sqliteOsOpenExclusive(zFile, fd, 1);
9097c478bd9Sstevel@tonic-gate   }while( cnt>0 && rc!=SQLITE_OK );
9107c478bd9Sstevel@tonic-gate   return rc;
9117c478bd9Sstevel@tonic-gate }
9127c478bd9Sstevel@tonic-gate 
9137c478bd9Sstevel@tonic-gate /*
9147c478bd9Sstevel@tonic-gate ** Create a new page cache and put a pointer to the page cache in *ppPager.
9157c478bd9Sstevel@tonic-gate ** The file to be cached need not exist.  The file is not locked until
9167c478bd9Sstevel@tonic-gate ** the first call to sqlitepager_get() and is only held open until the
9177c478bd9Sstevel@tonic-gate ** last page is released using sqlitepager_unref().
9187c478bd9Sstevel@tonic-gate **
9197c478bd9Sstevel@tonic-gate ** If zFilename is NULL then a randomly-named temporary file is created
9207c478bd9Sstevel@tonic-gate ** and used as the file to be cached.  The file will be deleted
9217c478bd9Sstevel@tonic-gate ** automatically when it is closed.
9227c478bd9Sstevel@tonic-gate */
sqlitepager_open(Pager ** ppPager,const char * zFilename,int mxPage,int nExtra,int useJournal)9237c478bd9Sstevel@tonic-gate int sqlitepager_open(
9247c478bd9Sstevel@tonic-gate   Pager **ppPager,         /* Return the Pager structure here */
9257c478bd9Sstevel@tonic-gate   const char *zFilename,   /* Name of the database file to open */
9267c478bd9Sstevel@tonic-gate   int mxPage,              /* Max number of in-memory cache pages */
9277c478bd9Sstevel@tonic-gate   int nExtra,              /* Extra bytes append to each in-memory page */
9287c478bd9Sstevel@tonic-gate   int useJournal           /* TRUE to use a rollback journal on this file */
9297c478bd9Sstevel@tonic-gate ){
9307c478bd9Sstevel@tonic-gate   Pager *pPager;
9317c478bd9Sstevel@tonic-gate   char *zFullPathname;
9327c478bd9Sstevel@tonic-gate   int nameLen;
9337c478bd9Sstevel@tonic-gate   OsFile fd;
9347c478bd9Sstevel@tonic-gate   int rc, i;
9357c478bd9Sstevel@tonic-gate   int tempFile;
9367c478bd9Sstevel@tonic-gate   int readOnly = 0;
9377c478bd9Sstevel@tonic-gate   char zTemp[SQLITE_TEMPNAME_SIZE];
9387c478bd9Sstevel@tonic-gate 
9397c478bd9Sstevel@tonic-gate   *ppPager = 0;
9407c478bd9Sstevel@tonic-gate   if( sqlite_malloc_failed ){
9417c478bd9Sstevel@tonic-gate     return SQLITE_NOMEM;
9427c478bd9Sstevel@tonic-gate   }
9437c478bd9Sstevel@tonic-gate   if( zFilename && zFilename[0] ){
9447c478bd9Sstevel@tonic-gate     zFullPathname = sqliteOsFullPathname(zFilename);
9457c478bd9Sstevel@tonic-gate     rc = sqliteOsOpenReadWrite(zFullPathname, &fd, &readOnly);
9467c478bd9Sstevel@tonic-gate     tempFile = 0;
9477c478bd9Sstevel@tonic-gate   }else{
9487c478bd9Sstevel@tonic-gate     rc = sqlitepager_opentemp(zTemp, &fd);
9497c478bd9Sstevel@tonic-gate     zFilename = zTemp;
9507c478bd9Sstevel@tonic-gate     zFullPathname = sqliteOsFullPathname(zFilename);
9517c478bd9Sstevel@tonic-gate     tempFile = 1;
9527c478bd9Sstevel@tonic-gate   }
9537c478bd9Sstevel@tonic-gate   if( sqlite_malloc_failed ){
9547c478bd9Sstevel@tonic-gate     return SQLITE_NOMEM;
9557c478bd9Sstevel@tonic-gate   }
9567c478bd9Sstevel@tonic-gate   if( rc!=SQLITE_OK ){
9577c478bd9Sstevel@tonic-gate     sqliteFree(zFullPathname);
9587c478bd9Sstevel@tonic-gate     return SQLITE_CANTOPEN;
9597c478bd9Sstevel@tonic-gate   }
9607c478bd9Sstevel@tonic-gate   nameLen = strlen(zFullPathname);
9617c478bd9Sstevel@tonic-gate   pPager = sqliteMalloc( sizeof(*pPager) + nameLen*3 + 30 );
9627c478bd9Sstevel@tonic-gate   if( pPager==0 ){
9637c478bd9Sstevel@tonic-gate     sqliteOsClose(&fd);
9647c478bd9Sstevel@tonic-gate     sqliteFree(zFullPathname);
9657c478bd9Sstevel@tonic-gate     return SQLITE_NOMEM;
9667c478bd9Sstevel@tonic-gate   }
9677c478bd9Sstevel@tonic-gate   SET_PAGER(pPager);
9687c478bd9Sstevel@tonic-gate   pPager->zFilename = (char*)&pPager[1];
9697c478bd9Sstevel@tonic-gate   pPager->zDirectory = &pPager->zFilename[nameLen+1];
9707c478bd9Sstevel@tonic-gate   pPager->zJournal = &pPager->zDirectory[nameLen+1];
9717c478bd9Sstevel@tonic-gate   strcpy(pPager->zFilename, zFullPathname);
9727c478bd9Sstevel@tonic-gate   strcpy(pPager->zDirectory, zFullPathname);
9737c478bd9Sstevel@tonic-gate   for(i=nameLen; i>0 && pPager->zDirectory[i-1]!='/'; i--){}
9747c478bd9Sstevel@tonic-gate   if( i>0 ) pPager->zDirectory[i-1] = 0;
9757c478bd9Sstevel@tonic-gate   strcpy(pPager->zJournal, zFullPathname);
9767c478bd9Sstevel@tonic-gate   sqliteFree(zFullPathname);
9777c478bd9Sstevel@tonic-gate   strcpy(&pPager->zJournal[nameLen], "-journal");
9787c478bd9Sstevel@tonic-gate   pPager->fd = fd;
9797c478bd9Sstevel@tonic-gate   pPager->journalOpen = 0;
9807c478bd9Sstevel@tonic-gate   pPager->useJournal = useJournal;
9817c478bd9Sstevel@tonic-gate   pPager->ckptOpen = 0;
9827c478bd9Sstevel@tonic-gate   pPager->ckptInUse = 0;
9837c478bd9Sstevel@tonic-gate   pPager->nRef = 0;
9847c478bd9Sstevel@tonic-gate   pPager->dbSize = -1;
9857c478bd9Sstevel@tonic-gate   pPager->ckptSize = 0;
9867c478bd9Sstevel@tonic-gate   pPager->ckptJSize = 0;
9877c478bd9Sstevel@tonic-gate   pPager->nPage = 0;
9887c478bd9Sstevel@tonic-gate   pPager->mxPage = mxPage>5 ? mxPage : 10;
9897c478bd9Sstevel@tonic-gate   pPager->state = SQLITE_UNLOCK;
9907c478bd9Sstevel@tonic-gate   pPager->errMask = 0;
9917c478bd9Sstevel@tonic-gate   pPager->tempFile = tempFile;
9927c478bd9Sstevel@tonic-gate   pPager->readOnly = readOnly;
9937c478bd9Sstevel@tonic-gate   pPager->needSync = 0;
9947c478bd9Sstevel@tonic-gate   pPager->noSync = pPager->tempFile || !useJournal;
9957c478bd9Sstevel@tonic-gate   pPager->pFirst = 0;
9967c478bd9Sstevel@tonic-gate   pPager->pFirstSynced = 0;
9977c478bd9Sstevel@tonic-gate   pPager->pLast = 0;
9987c478bd9Sstevel@tonic-gate   pPager->nExtra = nExtra;
9997c478bd9Sstevel@tonic-gate   memset(pPager->aHash, 0, sizeof(pPager->aHash));
10007c478bd9Sstevel@tonic-gate   *ppPager = pPager;
10017c478bd9Sstevel@tonic-gate   return SQLITE_OK;
10027c478bd9Sstevel@tonic-gate }
10037c478bd9Sstevel@tonic-gate 
10047c478bd9Sstevel@tonic-gate /*
10057c478bd9Sstevel@tonic-gate ** Set the destructor for this pager.  If not NULL, the destructor is called
10067c478bd9Sstevel@tonic-gate ** when the reference count on each page reaches zero.  The destructor can
10077c478bd9Sstevel@tonic-gate ** be used to clean up information in the extra segment appended to each page.
10087c478bd9Sstevel@tonic-gate **
1009*1da57d55SToomas Soome ** The destructor is not called as a result sqlitepager_close().
10107c478bd9Sstevel@tonic-gate ** Destructors are only called by sqlitepager_unref().
10117c478bd9Sstevel@tonic-gate */
sqlitepager_set_destructor(Pager * pPager,void (* xDesc)(void *))10127c478bd9Sstevel@tonic-gate void sqlitepager_set_destructor(Pager *pPager, void (*xDesc)(void*)){
10137c478bd9Sstevel@tonic-gate   pPager->xDestructor = xDesc;
10147c478bd9Sstevel@tonic-gate }
10157c478bd9Sstevel@tonic-gate 
10167c478bd9Sstevel@tonic-gate /*
10177c478bd9Sstevel@tonic-gate ** Return the total number of pages in the disk file associated with
10187c478bd9Sstevel@tonic-gate ** pPager.
10197c478bd9Sstevel@tonic-gate */
sqlitepager_pagecount(Pager * pPager)10207c478bd9Sstevel@tonic-gate int sqlitepager_pagecount(Pager *pPager){
10217c478bd9Sstevel@tonic-gate   off_t n;
10227c478bd9Sstevel@tonic-gate   assert( pPager!=0 );
10237c478bd9Sstevel@tonic-gate   if( pPager->dbSize>=0 ){
10247c478bd9Sstevel@tonic-gate     return pPager->dbSize;
10257c478bd9Sstevel@tonic-gate   }
10267c478bd9Sstevel@tonic-gate   if( sqliteOsFileSize(&pPager->fd, &n)!=SQLITE_OK ){
10277c478bd9Sstevel@tonic-gate     pPager->errMask |= PAGER_ERR_DISK;
10287c478bd9Sstevel@tonic-gate     return 0;
10297c478bd9Sstevel@tonic-gate   }
10307c478bd9Sstevel@tonic-gate   n /= SQLITE_PAGE_SIZE;
10317c478bd9Sstevel@tonic-gate   if( pPager->state!=SQLITE_UNLOCK ){
10327c478bd9Sstevel@tonic-gate     pPager->dbSize = n;
10337c478bd9Sstevel@tonic-gate   }
10347c478bd9Sstevel@tonic-gate   return n;
10357c478bd9Sstevel@tonic-gate }
10367c478bd9Sstevel@tonic-gate 
10377c478bd9Sstevel@tonic-gate /*
10387c478bd9Sstevel@tonic-gate ** Forward declaration
10397c478bd9Sstevel@tonic-gate */
10407c478bd9Sstevel@tonic-gate static int syncJournal(Pager*);
10417c478bd9Sstevel@tonic-gate 
10427c478bd9Sstevel@tonic-gate /*
10437c478bd9Sstevel@tonic-gate ** Truncate the file to the number of pages specified.
10447c478bd9Sstevel@tonic-gate */
sqlitepager_truncate(Pager * pPager,Pgno nPage)10457c478bd9Sstevel@tonic-gate int sqlitepager_truncate(Pager *pPager, Pgno nPage){
10467c478bd9Sstevel@tonic-gate   int rc;
10477c478bd9Sstevel@tonic-gate   if( pPager->dbSize<0 ){
10487c478bd9Sstevel@tonic-gate     sqlitepager_pagecount(pPager);
10497c478bd9Sstevel@tonic-gate   }
10507c478bd9Sstevel@tonic-gate   if( pPager->errMask!=0 ){
10517c478bd9Sstevel@tonic-gate     rc = pager_errcode(pPager);
10527c478bd9Sstevel@tonic-gate     return rc;
10537c478bd9Sstevel@tonic-gate   }
10547c478bd9Sstevel@tonic-gate   if( nPage>=(unsigned)pPager->dbSize ){
10557c478bd9Sstevel@tonic-gate     return SQLITE_OK;
10567c478bd9Sstevel@tonic-gate   }
10577c478bd9Sstevel@tonic-gate   syncJournal(pPager);
10587c478bd9Sstevel@tonic-gate   rc = sqliteOsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)nPage);
10597c478bd9Sstevel@tonic-gate   if( rc==SQLITE_OK ){
10607c478bd9Sstevel@tonic-gate     pPager->dbSize = nPage;
10617c478bd9Sstevel@tonic-gate   }
10627c478bd9Sstevel@tonic-gate   return rc;
10637c478bd9Sstevel@tonic-gate }
10647c478bd9Sstevel@tonic-gate 
10657c478bd9Sstevel@tonic-gate /*
10667c478bd9Sstevel@tonic-gate ** Shutdown the page cache.  Free all memory and close all files.
10677c478bd9Sstevel@tonic-gate **
10687c478bd9Sstevel@tonic-gate ** If a transaction was in progress when this routine is called, that
10697c478bd9Sstevel@tonic-gate ** transaction is rolled back.  All outstanding pages are invalidated
10707c478bd9Sstevel@tonic-gate ** and their memory is freed.  Any attempt to use a page associated
10717c478bd9Sstevel@tonic-gate ** with this page cache after this function returns will likely
10727c478bd9Sstevel@tonic-gate ** result in a coredump.
10737c478bd9Sstevel@tonic-gate */
sqlitepager_close(Pager * pPager)10747c478bd9Sstevel@tonic-gate int sqlitepager_close(Pager *pPager){
10757c478bd9Sstevel@tonic-gate   PgHdr *pPg, *pNext;
10767c478bd9Sstevel@tonic-gate   switch( pPager->state ){
10777c478bd9Sstevel@tonic-gate     case SQLITE_WRITELOCK: {
10787c478bd9Sstevel@tonic-gate       sqlitepager_rollback(pPager);
10797c478bd9Sstevel@tonic-gate       sqliteOsUnlock(&pPager->fd);
10807c478bd9Sstevel@tonic-gate       assert( pPager->journalOpen==0 );
10817c478bd9Sstevel@tonic-gate       break;
10827c478bd9Sstevel@tonic-gate     }
10837c478bd9Sstevel@tonic-gate     case SQLITE_READLOCK: {
10847c478bd9Sstevel@tonic-gate       sqliteOsUnlock(&pPager->fd);
10857c478bd9Sstevel@tonic-gate       break;
10867c478bd9Sstevel@tonic-gate     }
10877c478bd9Sstevel@tonic-gate     default: {
10887c478bd9Sstevel@tonic-gate       /* Do nothing */
10897c478bd9Sstevel@tonic-gate       break;
10907c478bd9Sstevel@tonic-gate     }
10917c478bd9Sstevel@tonic-gate   }
10927c478bd9Sstevel@tonic-gate   for(pPg=pPager->pAll; pPg; pPg=pNext){
10937c478bd9Sstevel@tonic-gate     pNext = pPg->pNextAll;
10947c478bd9Sstevel@tonic-gate     sqliteFree(pPg);
10957c478bd9Sstevel@tonic-gate   }
10967c478bd9Sstevel@tonic-gate   sqliteOsClose(&pPager->fd);
10977c478bd9Sstevel@tonic-gate   assert( pPager->journalOpen==0 );
10987c478bd9Sstevel@tonic-gate   /* Temp files are automatically deleted by the OS
10997c478bd9Sstevel@tonic-gate   ** if( pPager->tempFile ){
11007c478bd9Sstevel@tonic-gate   **   sqliteOsDelete(pPager->zFilename);
11017c478bd9Sstevel@tonic-gate   ** }
11027c478bd9Sstevel@tonic-gate   */
11037c478bd9Sstevel@tonic-gate   CLR_PAGER(pPager);
11047c478bd9Sstevel@tonic-gate   if( pPager->zFilename!=(char*)&pPager[1] ){
11057c478bd9Sstevel@tonic-gate     assert( 0 );  /* Cannot happen */
11067c478bd9Sstevel@tonic-gate     sqliteFree(pPager->zFilename);
11077c478bd9Sstevel@tonic-gate     sqliteFree(pPager->zJournal);
11087c478bd9Sstevel@tonic-gate     sqliteFree(pPager->zDirectory);
11097c478bd9Sstevel@tonic-gate   }
11107c478bd9Sstevel@tonic-gate   sqliteFree(pPager);
11117c478bd9Sstevel@tonic-gate   return SQLITE_OK;
11127c478bd9Sstevel@tonic-gate }
11137c478bd9Sstevel@tonic-gate 
11147c478bd9Sstevel@tonic-gate /*
11157c478bd9Sstevel@tonic-gate ** Return the page number for the given page data.
11167c478bd9Sstevel@tonic-gate */
sqlitepager_pagenumber(void * pData)11177c478bd9Sstevel@tonic-gate Pgno sqlitepager_pagenumber(void *pData){
11187c478bd9Sstevel@tonic-gate   PgHdr *p = DATA_TO_PGHDR(pData);
11197c478bd9Sstevel@tonic-gate   return p->pgno;
11207c478bd9Sstevel@tonic-gate }
11217c478bd9Sstevel@tonic-gate 
11227c478bd9Sstevel@tonic-gate /*
11237c478bd9Sstevel@tonic-gate ** Increment the reference count for a page.  If the page is
11247c478bd9Sstevel@tonic-gate ** currently on the freelist (the reference count is zero) then
11257c478bd9Sstevel@tonic-gate ** remove it from the freelist.
11267c478bd9Sstevel@tonic-gate */
11277c478bd9Sstevel@tonic-gate #define page_ref(P)   ((P)->nRef==0?_page_ref(P):(void)(P)->nRef++)
_page_ref(PgHdr * pPg)11287c478bd9Sstevel@tonic-gate static void _page_ref(PgHdr *pPg){
11297c478bd9Sstevel@tonic-gate   if( pPg->nRef==0 ){
11307c478bd9Sstevel@tonic-gate     /* The page is currently on the freelist.  Remove it. */
11317c478bd9Sstevel@tonic-gate     if( pPg==pPg->pPager->pFirstSynced ){
11327c478bd9Sstevel@tonic-gate       PgHdr *p = pPg->pNextFree;
11337c478bd9Sstevel@tonic-gate       while( p && p->needSync ){ p = p->pNextFree; }
11347c478bd9Sstevel@tonic-gate       pPg->pPager->pFirstSynced = p;
11357c478bd9Sstevel@tonic-gate     }
11367c478bd9Sstevel@tonic-gate     if( pPg->pPrevFree ){
11377c478bd9Sstevel@tonic-gate       pPg->pPrevFree->pNextFree = pPg->pNextFree;
11387c478bd9Sstevel@tonic-gate     }else{
11397c478bd9Sstevel@tonic-gate       pPg->pPager->pFirst = pPg->pNextFree;
11407c478bd9Sstevel@tonic-gate     }
11417c478bd9Sstevel@tonic-gate     if( pPg->pNextFree ){
11427c478bd9Sstevel@tonic-gate       pPg->pNextFree->pPrevFree = pPg->pPrevFree;
11437c478bd9Sstevel@tonic-gate     }else{
11447c478bd9Sstevel@tonic-gate       pPg->pPager->pLast = pPg->pPrevFree;
11457c478bd9Sstevel@tonic-gate     }
11467c478bd9Sstevel@tonic-gate     pPg->pPager->nRef++;
11477c478bd9Sstevel@tonic-gate   }
11487c478bd9Sstevel@tonic-gate   pPg->nRef++;
11497c478bd9Sstevel@tonic-gate   REFINFO(pPg);
11507c478bd9Sstevel@tonic-gate }
11517c478bd9Sstevel@tonic-gate 
11527c478bd9Sstevel@tonic-gate /*
11537c478bd9Sstevel@tonic-gate ** Increment the reference count for a page.  The input pointer is
11547c478bd9Sstevel@tonic-gate ** a reference to the page data.
11557c478bd9Sstevel@tonic-gate */
sqlitepager_ref(void * pData)11567c478bd9Sstevel@tonic-gate int sqlitepager_ref(void *pData){
11577c478bd9Sstevel@tonic-gate   PgHdr *pPg = DATA_TO_PGHDR(pData);
11587c478bd9Sstevel@tonic-gate   page_ref(pPg);
11597c478bd9Sstevel@tonic-gate   return SQLITE_OK;
11607c478bd9Sstevel@tonic-gate }
11617c478bd9Sstevel@tonic-gate 
11627c478bd9Sstevel@tonic-gate /*
11637c478bd9Sstevel@tonic-gate ** Sync the journal.  In other words, make sure all the pages that have
11647c478bd9Sstevel@tonic-gate ** been written to the journal have actually reached the surface of the
11657c478bd9Sstevel@tonic-gate ** disk.  It is not safe to modify the original database file until after
11667c478bd9Sstevel@tonic-gate ** the journal has been synced.  If the original database is modified before
11677c478bd9Sstevel@tonic-gate ** the journal is synced and a power failure occurs, the unsynced journal
11687c478bd9Sstevel@tonic-gate ** data would be lost and we would be unable to completely rollback the
11697c478bd9Sstevel@tonic-gate ** database changes.  Database corruption would occur.
1170*1da57d55SToomas Soome **
11717c478bd9Sstevel@tonic-gate ** This routine also updates the nRec field in the header of the journal.
11727c478bd9Sstevel@tonic-gate ** (See comments on the pager_playback() routine for additional information.)
11737c478bd9Sstevel@tonic-gate ** If the sync mode is FULL, two syncs will occur.  First the whole journal
11747c478bd9Sstevel@tonic-gate ** is synced, then the nRec field is updated, then a second sync occurs.
11757c478bd9Sstevel@tonic-gate **
11767c478bd9Sstevel@tonic-gate ** For temporary databases, we do not care if we are able to rollback
11777c478bd9Sstevel@tonic-gate ** after a power failure, so sync occurs.
11787c478bd9Sstevel@tonic-gate **
11797c478bd9Sstevel@tonic-gate ** This routine clears the needSync field of every page current held in
11807c478bd9Sstevel@tonic-gate ** memory.
11817c478bd9Sstevel@tonic-gate */
syncJournal(Pager * pPager)11827c478bd9Sstevel@tonic-gate static int syncJournal(Pager *pPager){
11837c478bd9Sstevel@tonic-gate   PgHdr *pPg;
11847c478bd9Sstevel@tonic-gate   int rc = SQLITE_OK;
11857c478bd9Sstevel@tonic-gate 
11867c478bd9Sstevel@tonic-gate   /* Sync the journal before modifying the main database
11877c478bd9Sstevel@tonic-gate   ** (assuming there is a journal and it needs to be synced.)
11887c478bd9Sstevel@tonic-gate   */
11897c478bd9Sstevel@tonic-gate   if( pPager->needSync ){
11907c478bd9Sstevel@tonic-gate     if( !pPager->tempFile ){
11917c478bd9Sstevel@tonic-gate       assert( pPager->journalOpen );
11927c478bd9Sstevel@tonic-gate       /* assert( !pPager->noSync ); // noSync might be set if synchronous
11937c478bd9Sstevel@tonic-gate       ** was turned off after the transaction was started.  Ticket #615 */
11947c478bd9Sstevel@tonic-gate #ifndef NDEBUG
11957c478bd9Sstevel@tonic-gate       {
11967c478bd9Sstevel@tonic-gate         /* Make sure the pPager->nRec counter we are keeping agrees
11977c478bd9Sstevel@tonic-gate         ** with the nRec computed from the size of the journal file.
11987c478bd9Sstevel@tonic-gate         */
11997c478bd9Sstevel@tonic-gate         off_t hdrSz, pgSz, jSz;
12007c478bd9Sstevel@tonic-gate         hdrSz = JOURNAL_HDR_SZ(journal_format);
12017c478bd9Sstevel@tonic-gate         pgSz = JOURNAL_PG_SZ(journal_format);
12027c478bd9Sstevel@tonic-gate         rc = sqliteOsFileSize(&pPager->jfd, &jSz);
12037c478bd9Sstevel@tonic-gate         if( rc!=0 ) return rc;
12047c478bd9Sstevel@tonic-gate         assert( pPager->nRec*pgSz+hdrSz==jSz );
12057c478bd9Sstevel@tonic-gate       }
12067c478bd9Sstevel@tonic-gate #endif
12077c478bd9Sstevel@tonic-gate       if( journal_format>=3 ){
12087c478bd9Sstevel@tonic-gate         /* Write the nRec value into the journal file header */
12097c478bd9Sstevel@tonic-gate         off_t szJ;
12107c478bd9Sstevel@tonic-gate         if( pPager->fullSync ){
12117c478bd9Sstevel@tonic-gate           TRACE1("SYNC\n");
12127c478bd9Sstevel@tonic-gate           rc = sqliteOsSync(&pPager->jfd);
12137c478bd9Sstevel@tonic-gate           if( rc!=0 ) return rc;
12147c478bd9Sstevel@tonic-gate         }
12157c478bd9Sstevel@tonic-gate         sqliteOsSeek(&pPager->jfd, sizeof(aJournalMagic1));
12167c478bd9Sstevel@tonic-gate         rc = write32bits(&pPager->jfd, pPager->nRec);
12177c478bd9Sstevel@tonic-gate         if( rc ) return rc;
12187c478bd9Sstevel@tonic-gate         szJ = JOURNAL_HDR_SZ(journal_format) +
12197c478bd9Sstevel@tonic-gate                  pPager->nRec*JOURNAL_PG_SZ(journal_format);
12207c478bd9Sstevel@tonic-gate         sqliteOsSeek(&pPager->jfd, szJ);
12217c478bd9Sstevel@tonic-gate       }
12227c478bd9Sstevel@tonic-gate       TRACE1("SYNC\n");
12237c478bd9Sstevel@tonic-gate       rc = sqliteOsSync(&pPager->jfd);
12247c478bd9Sstevel@tonic-gate       if( rc!=0 ) return rc;
12257c478bd9Sstevel@tonic-gate       pPager->journalStarted = 1;
12267c478bd9Sstevel@tonic-gate     }
12277c478bd9Sstevel@tonic-gate     pPager->needSync = 0;
12287c478bd9Sstevel@tonic-gate 
12297c478bd9Sstevel@tonic-gate     /* Erase the needSync flag from every page.
12307c478bd9Sstevel@tonic-gate     */
12317c478bd9Sstevel@tonic-gate     for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
12327c478bd9Sstevel@tonic-gate       pPg->needSync = 0;
12337c478bd9Sstevel@tonic-gate     }
12347c478bd9Sstevel@tonic-gate     pPager->pFirstSynced = pPager->pFirst;
12357c478bd9Sstevel@tonic-gate   }
12367c478bd9Sstevel@tonic-gate 
12377c478bd9Sstevel@tonic-gate #ifndef NDEBUG
12387c478bd9Sstevel@tonic-gate   /* If the Pager.needSync flag is clear then the PgHdr.needSync
12397c478bd9Sstevel@tonic-gate   ** flag must also be clear for all pages.  Verify that this
12407c478bd9Sstevel@tonic-gate   ** invariant is true.
12417c478bd9Sstevel@tonic-gate   */
12427c478bd9Sstevel@tonic-gate   else{
12437c478bd9Sstevel@tonic-gate     for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
12447c478bd9Sstevel@tonic-gate       assert( pPg->needSync==0 );
12457c478bd9Sstevel@tonic-gate     }
12467c478bd9Sstevel@tonic-gate     assert( pPager->pFirstSynced==pPager->pFirst );
12477c478bd9Sstevel@tonic-gate   }
12487c478bd9Sstevel@tonic-gate #endif
12497c478bd9Sstevel@tonic-gate 
12507c478bd9Sstevel@tonic-gate   return rc;
12517c478bd9Sstevel@tonic-gate }
12527c478bd9Sstevel@tonic-gate 
12537c478bd9Sstevel@tonic-gate /*
12547c478bd9Sstevel@tonic-gate ** Given a list of pages (connected by the PgHdr.pDirty pointer) write
12557c478bd9Sstevel@tonic-gate ** every one of those pages out to the database file and mark them all
12567c478bd9Sstevel@tonic-gate ** as clean.
12577c478bd9Sstevel@tonic-gate */
pager_write_pagelist(PgHdr * pList)12587c478bd9Sstevel@tonic-gate static int pager_write_pagelist(PgHdr *pList){
12597c478bd9Sstevel@tonic-gate   Pager *pPager;
12607c478bd9Sstevel@tonic-gate   int rc;
12617c478bd9Sstevel@tonic-gate 
12627c478bd9Sstevel@tonic-gate   if( pList==0 ) return SQLITE_OK;
12637c478bd9Sstevel@tonic-gate   pPager = pList->pPager;
12647c478bd9Sstevel@tonic-gate   while( pList ){
12657c478bd9Sstevel@tonic-gate     assert( pList->dirty );
12667c478bd9Sstevel@tonic-gate     sqliteOsSeek(&pPager->fd, (pList->pgno-1)*(off_t)SQLITE_PAGE_SIZE);
12677c478bd9Sstevel@tonic-gate     CODEC(pPager, PGHDR_TO_DATA(pList), pList->pgno, 6);
12687c478bd9Sstevel@tonic-gate     TRACE2("STORE %d\n", pList->pgno);
12697c478bd9Sstevel@tonic-gate     rc = sqliteOsWrite(&pPager->fd, PGHDR_TO_DATA(pList), SQLITE_PAGE_SIZE);
12707c478bd9Sstevel@tonic-gate     CODEC(pPager, PGHDR_TO_DATA(pList), pList->pgno, 0);
12717c478bd9Sstevel@tonic-gate     if( rc ) return rc;
12727c478bd9Sstevel@tonic-gate     pList->dirty = 0;
12737c478bd9Sstevel@tonic-gate     pList = pList->pDirty;
12747c478bd9Sstevel@tonic-gate   }
12757c478bd9Sstevel@tonic-gate   return SQLITE_OK;
12767c478bd9Sstevel@tonic-gate }
12777c478bd9Sstevel@tonic-gate 
12787c478bd9Sstevel@tonic-gate /*
12797c478bd9Sstevel@tonic-gate ** Collect every dirty page into a dirty list and
12807c478bd9Sstevel@tonic-gate ** return a pointer to the head of that list.  All pages are
12817c478bd9Sstevel@tonic-gate ** collected even if they are still in use.
12827c478bd9Sstevel@tonic-gate */
pager_get_all_dirty_pages(Pager * pPager)12837c478bd9Sstevel@tonic-gate static PgHdr *pager_get_all_dirty_pages(Pager *pPager){
12847c478bd9Sstevel@tonic-gate   PgHdr *p, *pList;
12857c478bd9Sstevel@tonic-gate   pList = 0;
12867c478bd9Sstevel@tonic-gate   for(p=pPager->pAll; p; p=p->pNextAll){
12877c478bd9Sstevel@tonic-gate     if( p->dirty ){
12887c478bd9Sstevel@tonic-gate       p->pDirty = pList;
12897c478bd9Sstevel@tonic-gate       pList = p;
12907c478bd9Sstevel@tonic-gate     }
12917c478bd9Sstevel@tonic-gate   }
12927c478bd9Sstevel@tonic-gate   return pList;
12937c478bd9Sstevel@tonic-gate }
12947c478bd9Sstevel@tonic-gate 
12957c478bd9Sstevel@tonic-gate /*
12967c478bd9Sstevel@tonic-gate ** Acquire a page.
12977c478bd9Sstevel@tonic-gate **
1298*1da57d55SToomas Soome ** A read lock on the disk file is obtained when the first page is acquired.
12997c478bd9Sstevel@tonic-gate ** This read lock is dropped when the last page is released.
13007c478bd9Sstevel@tonic-gate **
13017c478bd9Sstevel@tonic-gate ** A _get works for any page number greater than 0.  If the database
13027c478bd9Sstevel@tonic-gate ** file is smaller than the requested page, then no actual disk
13037c478bd9Sstevel@tonic-gate ** read occurs and the memory image of the page is initialized to
13047c478bd9Sstevel@tonic-gate ** all zeros.  The extra data appended to a page is always initialized
13057c478bd9Sstevel@tonic-gate ** to zeros the first time a page is loaded into memory.
13067c478bd9Sstevel@tonic-gate **
13077c478bd9Sstevel@tonic-gate ** The acquisition might fail for several reasons.  In all cases,
13087c478bd9Sstevel@tonic-gate ** an appropriate error code is returned and *ppPage is set to NULL.
13097c478bd9Sstevel@tonic-gate **
13107c478bd9Sstevel@tonic-gate ** See also sqlitepager_lookup().  Both this routine and _lookup() attempt
13117c478bd9Sstevel@tonic-gate ** to find a page in the in-memory cache first.  If the page is not already
13127c478bd9Sstevel@tonic-gate ** in memory, this routine goes to disk to read it in whereas _lookup()
13137c478bd9Sstevel@tonic-gate ** just returns 0.  This routine acquires a read-lock the first time it
13147c478bd9Sstevel@tonic-gate ** has to go to disk, and could also playback an old journal if necessary.
13157c478bd9Sstevel@tonic-gate ** Since _lookup() never goes to disk, it never has to deal with locks
13167c478bd9Sstevel@tonic-gate ** or journal files.
13177c478bd9Sstevel@tonic-gate */
sqlitepager_get(Pager * pPager,Pgno pgno,void ** ppPage)13187c478bd9Sstevel@tonic-gate int sqlitepager_get(Pager *pPager, Pgno pgno, void **ppPage){
13197c478bd9Sstevel@tonic-gate   PgHdr *pPg;
13207c478bd9Sstevel@tonic-gate   int rc;
13217c478bd9Sstevel@tonic-gate 
13227c478bd9Sstevel@tonic-gate   /* Make sure we have not hit any critical errors.
1323*1da57d55SToomas Soome   */
13247c478bd9Sstevel@tonic-gate   assert( pPager!=0 );
13257c478bd9Sstevel@tonic-gate   assert( pgno!=0 );
13267c478bd9Sstevel@tonic-gate   *ppPage = 0;
13277c478bd9Sstevel@tonic-gate   if( pPager->errMask & ~(PAGER_ERR_FULL) ){
13287c478bd9Sstevel@tonic-gate     return pager_errcode(pPager);
13297c478bd9Sstevel@tonic-gate   }
13307c478bd9Sstevel@tonic-gate 
13317c478bd9Sstevel@tonic-gate   /* If this is the first page accessed, then get a read lock
13327c478bd9Sstevel@tonic-gate   ** on the database file.
13337c478bd9Sstevel@tonic-gate   */
13347c478bd9Sstevel@tonic-gate   if( pPager->nRef==0 ){
13357c478bd9Sstevel@tonic-gate     rc = sqliteOsReadLock(&pPager->fd);
13367c478bd9Sstevel@tonic-gate     if( rc!=SQLITE_OK ){
13377c478bd9Sstevel@tonic-gate       return rc;
13387c478bd9Sstevel@tonic-gate     }
13397c478bd9Sstevel@tonic-gate     pPager->state = SQLITE_READLOCK;
13407c478bd9Sstevel@tonic-gate 
13417c478bd9Sstevel@tonic-gate     /* If a journal file exists, try to play it back.
13427c478bd9Sstevel@tonic-gate     */
13437c478bd9Sstevel@tonic-gate     if( pPager->useJournal && sqliteOsFileExists(pPager->zJournal) ){
13447c478bd9Sstevel@tonic-gate        int rc;
13457c478bd9Sstevel@tonic-gate 
13467c478bd9Sstevel@tonic-gate        /* Get a write lock on the database
13477c478bd9Sstevel@tonic-gate        */
13487c478bd9Sstevel@tonic-gate        rc = sqliteOsWriteLock(&pPager->fd);
13497c478bd9Sstevel@tonic-gate        if( rc!=SQLITE_OK ){
13507c478bd9Sstevel@tonic-gate          if( sqliteOsUnlock(&pPager->fd)!=SQLITE_OK ){
13517c478bd9Sstevel@tonic-gate            /* This should never happen! */
13527c478bd9Sstevel@tonic-gate            rc = SQLITE_INTERNAL;
13537c478bd9Sstevel@tonic-gate          }
13547c478bd9Sstevel@tonic-gate          return rc;
13557c478bd9Sstevel@tonic-gate        }
13567c478bd9Sstevel@tonic-gate        pPager->state = SQLITE_WRITELOCK;
13577c478bd9Sstevel@tonic-gate 
13587c478bd9Sstevel@tonic-gate        /* Open the journal for reading only.  Return SQLITE_BUSY if
1359*1da57d55SToomas Soome        ** we are unable to open the journal file.
13607c478bd9Sstevel@tonic-gate        **
13617c478bd9Sstevel@tonic-gate        ** The journal file does not need to be locked itself.  The
13627c478bd9Sstevel@tonic-gate        ** journal file is never open unless the main database file holds
13637c478bd9Sstevel@tonic-gate        ** a write lock, so there is never any chance of two or more
13647c478bd9Sstevel@tonic-gate        ** processes opening the journal at the same time.
13657c478bd9Sstevel@tonic-gate        */
13667c478bd9Sstevel@tonic-gate        rc = sqliteOsOpenReadOnly(pPager->zJournal, &pPager->jfd);
13677c478bd9Sstevel@tonic-gate        if( rc!=SQLITE_OK ){
13687c478bd9Sstevel@tonic-gate          rc = sqliteOsUnlock(&pPager->fd);
13697c478bd9Sstevel@tonic-gate          assert( rc==SQLITE_OK );
13707c478bd9Sstevel@tonic-gate          return SQLITE_BUSY;
13717c478bd9Sstevel@tonic-gate        }
13727c478bd9Sstevel@tonic-gate        pPager->journalOpen = 1;
13737c478bd9Sstevel@tonic-gate        pPager->journalStarted = 0;
13747c478bd9Sstevel@tonic-gate 
13757c478bd9Sstevel@tonic-gate        /* Playback and delete the journal.  Drop the database write
13767c478bd9Sstevel@tonic-gate        ** lock and reacquire the read lock.
13777c478bd9Sstevel@tonic-gate        */
13787c478bd9Sstevel@tonic-gate        rc = pager_playback(pPager, 0);
13797c478bd9Sstevel@tonic-gate        if( rc!=SQLITE_OK ){
13807c478bd9Sstevel@tonic-gate          return rc;
13817c478bd9Sstevel@tonic-gate        }
13827c478bd9Sstevel@tonic-gate     }
13837c478bd9Sstevel@tonic-gate     pPg = 0;
13847c478bd9Sstevel@tonic-gate   }else{
13857c478bd9Sstevel@tonic-gate     /* Search for page in cache */
13867c478bd9Sstevel@tonic-gate     pPg = pager_lookup(pPager, pgno);
13877c478bd9Sstevel@tonic-gate   }
13887c478bd9Sstevel@tonic-gate   if( pPg==0 ){
13897c478bd9Sstevel@tonic-gate     /* The requested page is not in the page cache. */
13907c478bd9Sstevel@tonic-gate     int h;
13917c478bd9Sstevel@tonic-gate     pPager->nMiss++;
13927c478bd9Sstevel@tonic-gate     if( pPager->nPage<pPager->mxPage || pPager->pFirst==0 ){
13937c478bd9Sstevel@tonic-gate       /* Create a new page */
1394*1da57d55SToomas Soome       pPg = sqliteMallocRaw( sizeof(*pPg) + SQLITE_PAGE_SIZE
13957c478bd9Sstevel@tonic-gate                               + sizeof(u32) + pPager->nExtra );
13967c478bd9Sstevel@tonic-gate       if( pPg==0 ){
13977c478bd9Sstevel@tonic-gate         pager_unwritelock(pPager);
13987c478bd9Sstevel@tonic-gate         pPager->errMask |= PAGER_ERR_MEM;
13997c478bd9Sstevel@tonic-gate         return SQLITE_NOMEM;
14007c478bd9Sstevel@tonic-gate       }
14017c478bd9Sstevel@tonic-gate       memset(pPg, 0, sizeof(*pPg));
14027c478bd9Sstevel@tonic-gate       pPg->pPager = pPager;
14037c478bd9Sstevel@tonic-gate       pPg->pNextAll = pPager->pAll;
14047c478bd9Sstevel@tonic-gate       if( pPager->pAll ){
14057c478bd9Sstevel@tonic-gate         pPager->pAll->pPrevAll = pPg;
14067c478bd9Sstevel@tonic-gate       }
14077c478bd9Sstevel@tonic-gate       pPg->pPrevAll = 0;
14087c478bd9Sstevel@tonic-gate       pPager->pAll = pPg;
14097c478bd9Sstevel@tonic-gate       pPager->nPage++;
14107c478bd9Sstevel@tonic-gate     }else{
14117c478bd9Sstevel@tonic-gate       /* Find a page to recycle.  Try to locate a page that does not
14127c478bd9Sstevel@tonic-gate       ** require us to do an fsync() on the journal.
14137c478bd9Sstevel@tonic-gate       */
14147c478bd9Sstevel@tonic-gate       pPg = pPager->pFirstSynced;
14157c478bd9Sstevel@tonic-gate 
14167c478bd9Sstevel@tonic-gate       /* If we could not find a page that does not require an fsync()
14177c478bd9Sstevel@tonic-gate       ** on the journal file then fsync the journal file.  This is a
14187c478bd9Sstevel@tonic-gate       ** very slow operation, so we work hard to avoid it.  But sometimes
14197c478bd9Sstevel@tonic-gate       ** it can't be helped.
14207c478bd9Sstevel@tonic-gate       */
14217c478bd9Sstevel@tonic-gate       if( pPg==0 ){
14227c478bd9Sstevel@tonic-gate         int rc = syncJournal(pPager);
14237c478bd9Sstevel@tonic-gate         if( rc!=0 ){
14247c478bd9Sstevel@tonic-gate           sqlitepager_rollback(pPager);
14257c478bd9Sstevel@tonic-gate           return SQLITE_IOERR;
14267c478bd9Sstevel@tonic-gate         }
14277c478bd9Sstevel@tonic-gate         pPg = pPager->pFirst;
14287c478bd9Sstevel@tonic-gate       }
14297c478bd9Sstevel@tonic-gate       assert( pPg->nRef==0 );
14307c478bd9Sstevel@tonic-gate 
14317c478bd9Sstevel@tonic-gate       /* Write the page to the database file if it is dirty.
14327c478bd9Sstevel@tonic-gate       */
14337c478bd9Sstevel@tonic-gate       if( pPg->dirty ){
14347c478bd9Sstevel@tonic-gate         assert( pPg->needSync==0 );
14357c478bd9Sstevel@tonic-gate         pPg->pDirty = 0;
14367c478bd9Sstevel@tonic-gate         rc = pager_write_pagelist( pPg );
14377c478bd9Sstevel@tonic-gate         if( rc!=SQLITE_OK ){
14387c478bd9Sstevel@tonic-gate           sqlitepager_rollback(pPager);
14397c478bd9Sstevel@tonic-gate           return SQLITE_IOERR;
14407c478bd9Sstevel@tonic-gate         }
14417c478bd9Sstevel@tonic-gate       }
14427c478bd9Sstevel@tonic-gate       assert( pPg->dirty==0 );
14437c478bd9Sstevel@tonic-gate 
14447c478bd9Sstevel@tonic-gate       /* If the page we are recycling is marked as alwaysRollback, then
14457c478bd9Sstevel@tonic-gate       ** set the global alwaysRollback flag, thus disabling the
14467c478bd9Sstevel@tonic-gate       ** sqlite_dont_rollback() optimization for the rest of this transaction.
14477c478bd9Sstevel@tonic-gate       ** It is necessary to do this because the page marked alwaysRollback
14487c478bd9Sstevel@tonic-gate       ** might be reloaded at a later time but at that point we won't remember
14497c478bd9Sstevel@tonic-gate       ** that is was marked alwaysRollback.  This means that all pages must
14507c478bd9Sstevel@tonic-gate       ** be marked as alwaysRollback from here on out.
14517c478bd9Sstevel@tonic-gate       */
14527c478bd9Sstevel@tonic-gate       if( pPg->alwaysRollback ){
14537c478bd9Sstevel@tonic-gate         pPager->alwaysRollback = 1;
14547c478bd9Sstevel@tonic-gate       }
14557c478bd9Sstevel@tonic-gate 
14567c478bd9Sstevel@tonic-gate       /* Unlink the old page from the free list and the hash table
14577c478bd9Sstevel@tonic-gate       */
14587c478bd9Sstevel@tonic-gate       if( pPg==pPager->pFirstSynced ){
14597c478bd9Sstevel@tonic-gate         PgHdr *p = pPg->pNextFree;
14607c478bd9Sstevel@tonic-gate         while( p && p->needSync ){ p = p->pNextFree; }
14617c478bd9Sstevel@tonic-gate         pPager->pFirstSynced = p;
14627c478bd9Sstevel@tonic-gate       }
14637c478bd9Sstevel@tonic-gate       if( pPg->pPrevFree ){
14647c478bd9Sstevel@tonic-gate         pPg->pPrevFree->pNextFree = pPg->pNextFree;
14657c478bd9Sstevel@tonic-gate       }else{
14667c478bd9Sstevel@tonic-gate         assert( pPager->pFirst==pPg );
14677c478bd9Sstevel@tonic-gate         pPager->pFirst = pPg->pNextFree;
14687c478bd9Sstevel@tonic-gate       }
14697c478bd9Sstevel@tonic-gate       if( pPg->pNextFree ){
14707c478bd9Sstevel@tonic-gate         pPg->pNextFree->pPrevFree = pPg->pPrevFree;
14717c478bd9Sstevel@tonic-gate       }else{
14727c478bd9Sstevel@tonic-gate         assert( pPager->pLast==pPg );
14737c478bd9Sstevel@tonic-gate         pPager->pLast = pPg->pPrevFree;
14747c478bd9Sstevel@tonic-gate       }
14757c478bd9Sstevel@tonic-gate       pPg->pNextFree = pPg->pPrevFree = 0;
14767c478bd9Sstevel@tonic-gate       if( pPg->pNextHash ){
14777c478bd9Sstevel@tonic-gate         pPg->pNextHash->pPrevHash = pPg->pPrevHash;
14787c478bd9Sstevel@tonic-gate       }
14797c478bd9Sstevel@tonic-gate       if( pPg->pPrevHash ){
14807c478bd9Sstevel@tonic-gate         pPg->pPrevHash->pNextHash = pPg->pNextHash;
14817c478bd9Sstevel@tonic-gate       }else{
14827c478bd9Sstevel@tonic-gate         h = pager_hash(pPg->pgno);
14837c478bd9Sstevel@tonic-gate         assert( pPager->aHash[h]==pPg );
14847c478bd9Sstevel@tonic-gate         pPager->aHash[h] = pPg->pNextHash;
14857c478bd9Sstevel@tonic-gate       }
14867c478bd9Sstevel@tonic-gate       pPg->pNextHash = pPg->pPrevHash = 0;
14877c478bd9Sstevel@tonic-gate       pPager->nOvfl++;
14887c478bd9Sstevel@tonic-gate     }
14897c478bd9Sstevel@tonic-gate     pPg->pgno = pgno;
14907c478bd9Sstevel@tonic-gate     if( pPager->aInJournal && (int)pgno<=pPager->origDbSize ){
14917c478bd9Sstevel@tonic-gate       sqliteCheckMemory(pPager->aInJournal, pgno/8);
14927c478bd9Sstevel@tonic-gate       assert( pPager->journalOpen );
14937c478bd9Sstevel@tonic-gate       pPg->inJournal = (pPager->aInJournal[pgno/8] & (1<<(pgno&7)))!=0;
14947c478bd9Sstevel@tonic-gate       pPg->needSync = 0;
14957c478bd9Sstevel@tonic-gate     }else{
14967c478bd9Sstevel@tonic-gate       pPg->inJournal = 0;
14977c478bd9Sstevel@tonic-gate       pPg->needSync = 0;
14987c478bd9Sstevel@tonic-gate     }
14997c478bd9Sstevel@tonic-gate     if( pPager->aInCkpt && (int)pgno<=pPager->ckptSize
15007c478bd9Sstevel@tonic-gate              && (pPager->aInCkpt[pgno/8] & (1<<(pgno&7)))!=0 ){
15017c478bd9Sstevel@tonic-gate       page_add_to_ckpt_list(pPg);
15027c478bd9Sstevel@tonic-gate     }else{
15037c478bd9Sstevel@tonic-gate       page_remove_from_ckpt_list(pPg);
15047c478bd9Sstevel@tonic-gate     }
15057c478bd9Sstevel@tonic-gate     pPg->dirty = 0;
15067c478bd9Sstevel@tonic-gate     pPg->nRef = 1;
15077c478bd9Sstevel@tonic-gate     REFINFO(pPg);
15087c478bd9Sstevel@tonic-gate     pPager->nRef++;
15097c478bd9Sstevel@tonic-gate     h = pager_hash(pgno);
15107c478bd9Sstevel@tonic-gate     pPg->pNextHash = pPager->aHash[h];
15117c478bd9Sstevel@tonic-gate     pPager->aHash[h] = pPg;
15127c478bd9Sstevel@tonic-gate     if( pPg->pNextHash ){
15137c478bd9Sstevel@tonic-gate       assert( pPg->pNextHash->pPrevHash==0 );
15147c478bd9Sstevel@tonic-gate       pPg->pNextHash->pPrevHash = pPg;
15157c478bd9Sstevel@tonic-gate     }
15167c478bd9Sstevel@tonic-gate     if( pPager->nExtra>0 ){
15177c478bd9Sstevel@tonic-gate       memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
15187c478bd9Sstevel@tonic-gate     }
15197c478bd9Sstevel@tonic-gate     if( pPager->dbSize<0 ) sqlitepager_pagecount(pPager);
15207c478bd9Sstevel@tonic-gate     if( pPager->errMask!=0 ){
15217c478bd9Sstevel@tonic-gate       sqlitepager_unref(PGHDR_TO_DATA(pPg));
15227c478bd9Sstevel@tonic-gate       rc = pager_errcode(pPager);
15237c478bd9Sstevel@tonic-gate       return rc;
15247c478bd9Sstevel@tonic-gate     }
15257c478bd9Sstevel@tonic-gate     if( pPager->dbSize<(int)pgno ){
15267c478bd9Sstevel@tonic-gate       memset(PGHDR_TO_DATA(pPg), 0, SQLITE_PAGE_SIZE);
15277c478bd9Sstevel@tonic-gate     }else{
15287c478bd9Sstevel@tonic-gate       int rc;
15297c478bd9Sstevel@tonic-gate       sqliteOsSeek(&pPager->fd, (pgno-1)*(off_t)SQLITE_PAGE_SIZE);
15307c478bd9Sstevel@tonic-gate       rc = sqliteOsRead(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
15317c478bd9Sstevel@tonic-gate       TRACE2("FETCH %d\n", pPg->pgno);
15327c478bd9Sstevel@tonic-gate       CODEC(pPager, PGHDR_TO_DATA(pPg), pPg->pgno, 3);
15337c478bd9Sstevel@tonic-gate       if( rc!=SQLITE_OK ){
15347c478bd9Sstevel@tonic-gate         off_t fileSize;
15357c478bd9Sstevel@tonic-gate         if( sqliteOsFileSize(&pPager->fd,&fileSize)!=SQLITE_OK
15367c478bd9Sstevel@tonic-gate                || fileSize>=pgno*SQLITE_PAGE_SIZE ){
15377c478bd9Sstevel@tonic-gate           sqlitepager_unref(PGHDR_TO_DATA(pPg));
15387c478bd9Sstevel@tonic-gate           return rc;
15397c478bd9Sstevel@tonic-gate         }else{
15407c478bd9Sstevel@tonic-gate           memset(PGHDR_TO_DATA(pPg), 0, SQLITE_PAGE_SIZE);
15417c478bd9Sstevel@tonic-gate         }
15427c478bd9Sstevel@tonic-gate       }
15437c478bd9Sstevel@tonic-gate     }
15447c478bd9Sstevel@tonic-gate   }else{
15457c478bd9Sstevel@tonic-gate     /* The requested page is in the page cache. */
15467c478bd9Sstevel@tonic-gate     pPager->nHit++;
15477c478bd9Sstevel@tonic-gate     page_ref(pPg);
15487c478bd9Sstevel@tonic-gate   }
15497c478bd9Sstevel@tonic-gate   *ppPage = PGHDR_TO_DATA(pPg);
15507c478bd9Sstevel@tonic-gate   return SQLITE_OK;
15517c478bd9Sstevel@tonic-gate }
15527c478bd9Sstevel@tonic-gate 
15537c478bd9Sstevel@tonic-gate /*
15547c478bd9Sstevel@tonic-gate ** Acquire a page if it is already in the in-memory cache.  Do
15557c478bd9Sstevel@tonic-gate ** not read the page from disk.  Return a pointer to the page,
15567c478bd9Sstevel@tonic-gate ** or 0 if the page is not in cache.
15577c478bd9Sstevel@tonic-gate **
15587c478bd9Sstevel@tonic-gate ** See also sqlitepager_get().  The difference between this routine
15597c478bd9Sstevel@tonic-gate ** and sqlitepager_get() is that _get() will go to the disk and read
15607c478bd9Sstevel@tonic-gate ** in the page if the page is not already in cache.  This routine
1561*1da57d55SToomas Soome ** returns NULL if the page is not in cache or if a disk I/O error
15627c478bd9Sstevel@tonic-gate ** has ever happened.
15637c478bd9Sstevel@tonic-gate */
sqlitepager_lookup(Pager * pPager,Pgno pgno)15647c478bd9Sstevel@tonic-gate void *sqlitepager_lookup(Pager *pPager, Pgno pgno){
15657c478bd9Sstevel@tonic-gate   PgHdr *pPg;
15667c478bd9Sstevel@tonic-gate 
15677c478bd9Sstevel@tonic-gate   assert( pPager!=0 );
15687c478bd9Sstevel@tonic-gate   assert( pgno!=0 );
15697c478bd9Sstevel@tonic-gate   if( pPager->errMask & ~(PAGER_ERR_FULL) ){
15707c478bd9Sstevel@tonic-gate     return 0;
15717c478bd9Sstevel@tonic-gate   }
15727c478bd9Sstevel@tonic-gate   /* if( pPager->nRef==0 ){
15737c478bd9Sstevel@tonic-gate   **  return 0;
15747c478bd9Sstevel@tonic-gate   ** }
15757c478bd9Sstevel@tonic-gate   */
15767c478bd9Sstevel@tonic-gate   pPg = pager_lookup(pPager, pgno);
15777c478bd9Sstevel@tonic-gate   if( pPg==0 ) return 0;
15787c478bd9Sstevel@tonic-gate   page_ref(pPg);
15797c478bd9Sstevel@tonic-gate   return PGHDR_TO_DATA(pPg);
15807c478bd9Sstevel@tonic-gate }
15817c478bd9Sstevel@tonic-gate 
15827c478bd9Sstevel@tonic-gate /*
15837c478bd9Sstevel@tonic-gate ** Release a page.
15847c478bd9Sstevel@tonic-gate **
15857c478bd9Sstevel@tonic-gate ** If the number of references to the page drop to zero, then the
15867c478bd9Sstevel@tonic-gate ** page is added to the LRU list.  When all references to all pages
15877c478bd9Sstevel@tonic-gate ** are released, a rollback occurs and the lock on the database is
15887c478bd9Sstevel@tonic-gate ** removed.
15897c478bd9Sstevel@tonic-gate */
sqlitepager_unref(void * pData)15907c478bd9Sstevel@tonic-gate int sqlitepager_unref(void *pData){
15917c478bd9Sstevel@tonic-gate   PgHdr *pPg;
15927c478bd9Sstevel@tonic-gate 
15937c478bd9Sstevel@tonic-gate   /* Decrement the reference count for this page
15947c478bd9Sstevel@tonic-gate   */
15957c478bd9Sstevel@tonic-gate   pPg = DATA_TO_PGHDR(pData);
15967c478bd9Sstevel@tonic-gate   assert( pPg->nRef>0 );
15977c478bd9Sstevel@tonic-gate   pPg->nRef--;
15987c478bd9Sstevel@tonic-gate   REFINFO(pPg);
15997c478bd9Sstevel@tonic-gate 
16007c478bd9Sstevel@tonic-gate   /* When the number of references to a page reach 0, call the
16017c478bd9Sstevel@tonic-gate   ** destructor and add the page to the freelist.
16027c478bd9Sstevel@tonic-gate   */
16037c478bd9Sstevel@tonic-gate   if( pPg->nRef==0 ){
16047c478bd9Sstevel@tonic-gate     Pager *pPager;
16057c478bd9Sstevel@tonic-gate     pPager = pPg->pPager;
16067c478bd9Sstevel@tonic-gate     pPg->pNextFree = 0;
16077c478bd9Sstevel@tonic-gate     pPg->pPrevFree = pPager->pLast;
16087c478bd9Sstevel@tonic-gate     pPager->pLast = pPg;
16097c478bd9Sstevel@tonic-gate     if( pPg->pPrevFree ){
16107c478bd9Sstevel@tonic-gate       pPg->pPrevFree->pNextFree = pPg;
16117c478bd9Sstevel@tonic-gate     }else{
16127c478bd9Sstevel@tonic-gate       pPager->pFirst = pPg;
16137c478bd9Sstevel@tonic-gate     }
16147c478bd9Sstevel@tonic-gate     if( pPg->needSync==0 && pPager->pFirstSynced==0 ){
16157c478bd9Sstevel@tonic-gate       pPager->pFirstSynced = pPg;
16167c478bd9Sstevel@tonic-gate     }
16177c478bd9Sstevel@tonic-gate     if( pPager->xDestructor ){
16187c478bd9Sstevel@tonic-gate       pPager->xDestructor(pData);
16197c478bd9Sstevel@tonic-gate     }
1620*1da57d55SToomas Soome 
16217c478bd9Sstevel@tonic-gate     /* When all pages reach the freelist, drop the read lock from
16227c478bd9Sstevel@tonic-gate     ** the database file.
16237c478bd9Sstevel@tonic-gate     */
16247c478bd9Sstevel@tonic-gate     pPager->nRef--;
16257c478bd9Sstevel@tonic-gate     assert( pPager->nRef>=0 );
16267c478bd9Sstevel@tonic-gate     if( pPager->nRef==0 ){
16277c478bd9Sstevel@tonic-gate       pager_reset(pPager);
16287c478bd9Sstevel@tonic-gate     }
16297c478bd9Sstevel@tonic-gate   }
16307c478bd9Sstevel@tonic-gate   return SQLITE_OK;
16317c478bd9Sstevel@tonic-gate }
16327c478bd9Sstevel@tonic-gate 
16337c478bd9Sstevel@tonic-gate /*
16347c478bd9Sstevel@tonic-gate ** Create a journal file for pPager.  There should already be a write
16357c478bd9Sstevel@tonic-gate ** lock on the database file when this routine is called.
16367c478bd9Sstevel@tonic-gate **
16377c478bd9Sstevel@tonic-gate ** Return SQLITE_OK if everything.  Return an error code and release the
16387c478bd9Sstevel@tonic-gate ** write lock if anything goes wrong.
16397c478bd9Sstevel@tonic-gate */
pager_open_journal(Pager * pPager)16407c478bd9Sstevel@tonic-gate static int pager_open_journal(Pager *pPager){
16417c478bd9Sstevel@tonic-gate   int rc;
16427c478bd9Sstevel@tonic-gate   assert( pPager->state==SQLITE_WRITELOCK );
16437c478bd9Sstevel@tonic-gate   assert( pPager->journalOpen==0 );
16447c478bd9Sstevel@tonic-gate   assert( pPager->useJournal );
16457c478bd9Sstevel@tonic-gate   sqlitepager_pagecount(pPager);
16467c478bd9Sstevel@tonic-gate   pPager->aInJournal = sqliteMalloc( pPager->dbSize/8 + 1 );
16477c478bd9Sstevel@tonic-gate   if( pPager->aInJournal==0 ){
16487c478bd9Sstevel@tonic-gate     sqliteOsReadLock(&pPager->fd);
16497c478bd9Sstevel@tonic-gate     pPager->state = SQLITE_READLOCK;
16507c478bd9Sstevel@tonic-gate     return SQLITE_NOMEM;
16517c478bd9Sstevel@tonic-gate   }
16527c478bd9Sstevel@tonic-gate   rc = sqliteOsOpenExclusive(pPager->zJournal, &pPager->jfd,pPager->tempFile);
16537c478bd9Sstevel@tonic-gate   if( rc!=SQLITE_OK ){
16547c478bd9Sstevel@tonic-gate     sqliteFree(pPager->aInJournal);
16557c478bd9Sstevel@tonic-gate     pPager->aInJournal = 0;
16567c478bd9Sstevel@tonic-gate     sqliteOsReadLock(&pPager->fd);
16577c478bd9Sstevel@tonic-gate     pPager->state = SQLITE_READLOCK;
16587c478bd9Sstevel@tonic-gate     return SQLITE_CANTOPEN;
16597c478bd9Sstevel@tonic-gate   }
16607c478bd9Sstevel@tonic-gate   sqliteOsOpenDirectory(pPager->zDirectory, &pPager->jfd);
16617c478bd9Sstevel@tonic-gate   pPager->journalOpen = 1;
16627c478bd9Sstevel@tonic-gate   pPager->journalStarted = 0;
16637c478bd9Sstevel@tonic-gate   pPager->needSync = 0;
16647c478bd9Sstevel@tonic-gate   pPager->alwaysRollback = 0;
16657c478bd9Sstevel@tonic-gate   pPager->nRec = 0;
16667c478bd9Sstevel@tonic-gate   if( pPager->errMask!=0 ){
16677c478bd9Sstevel@tonic-gate     rc = pager_errcode(pPager);
16687c478bd9Sstevel@tonic-gate     return rc;
16697c478bd9Sstevel@tonic-gate   }
16707c478bd9Sstevel@tonic-gate   pPager->origDbSize = pPager->dbSize;
16717c478bd9Sstevel@tonic-gate   if( journal_format==JOURNAL_FORMAT_3 ){
16727c478bd9Sstevel@tonic-gate     rc = sqliteOsWrite(&pPager->jfd, aJournalMagic3, sizeof(aJournalMagic3));
16737c478bd9Sstevel@tonic-gate     if( rc==SQLITE_OK ){
16747c478bd9Sstevel@tonic-gate       rc = write32bits(&pPager->jfd, pPager->noSync ? 0xffffffff : 0);
16757c478bd9Sstevel@tonic-gate     }
16767c478bd9Sstevel@tonic-gate     if( rc==SQLITE_OK ){
16777c478bd9Sstevel@tonic-gate       sqliteRandomness(sizeof(pPager->cksumInit), &pPager->cksumInit);
16787c478bd9Sstevel@tonic-gate       rc = write32bits(&pPager->jfd, pPager->cksumInit);
16797c478bd9Sstevel@tonic-gate     }
16807c478bd9Sstevel@tonic-gate   }else if( journal_format==JOURNAL_FORMAT_2 ){
16817c478bd9Sstevel@tonic-gate     rc = sqliteOsWrite(&pPager->jfd, aJournalMagic2, sizeof(aJournalMagic2));
16827c478bd9Sstevel@tonic-gate   }else{
16837c478bd9Sstevel@tonic-gate     assert( journal_format==JOURNAL_FORMAT_1 );
16847c478bd9Sstevel@tonic-gate     rc = sqliteOsWrite(&pPager->jfd, aJournalMagic1, sizeof(aJournalMagic1));
16857c478bd9Sstevel@tonic-gate   }
16867c478bd9Sstevel@tonic-gate   if( rc==SQLITE_OK ){
16877c478bd9Sstevel@tonic-gate     rc = write32bits(&pPager->jfd, pPager->dbSize);
16887c478bd9Sstevel@tonic-gate   }
16897c478bd9Sstevel@tonic-gate   if( pPager->ckptAutoopen && rc==SQLITE_OK ){
16907c478bd9Sstevel@tonic-gate     rc = sqlitepager_ckpt_begin(pPager);
16917c478bd9Sstevel@tonic-gate   }
16927c478bd9Sstevel@tonic-gate   if( rc!=SQLITE_OK ){
16937c478bd9Sstevel@tonic-gate     rc = pager_unwritelock(pPager);
16947c478bd9Sstevel@tonic-gate     if( rc==SQLITE_OK ){
16957c478bd9Sstevel@tonic-gate       rc = SQLITE_FULL;
16967c478bd9Sstevel@tonic-gate     }
16977c478bd9Sstevel@tonic-gate   }
1698*1da57d55SToomas Soome   return rc;
16997c478bd9Sstevel@tonic-gate }
17007c478bd9Sstevel@tonic-gate 
17017c478bd9Sstevel@tonic-gate /*
17027c478bd9Sstevel@tonic-gate ** Acquire a write-lock on the database.  The lock is removed when
17037c478bd9Sstevel@tonic-gate ** the any of the following happen:
17047c478bd9Sstevel@tonic-gate **
17057c478bd9Sstevel@tonic-gate **   *  sqlitepager_commit() is called.
17067c478bd9Sstevel@tonic-gate **   *  sqlitepager_rollback() is called.
17077c478bd9Sstevel@tonic-gate **   *  sqlitepager_close() is called.
17087c478bd9Sstevel@tonic-gate **   *  sqlitepager_unref() is called to on every outstanding page.
17097c478bd9Sstevel@tonic-gate **
17107c478bd9Sstevel@tonic-gate ** The parameter to this routine is a pointer to any open page of the
17117c478bd9Sstevel@tonic-gate ** database file.  Nothing changes about the page - it is used merely
17127c478bd9Sstevel@tonic-gate ** to acquire a pointer to the Pager structure and as proof that there
17137c478bd9Sstevel@tonic-gate ** is already a read-lock on the database.
17147c478bd9Sstevel@tonic-gate **
17157c478bd9Sstevel@tonic-gate ** A journal file is opened if this is not a temporary file.  For
17167c478bd9Sstevel@tonic-gate ** temporary files, the opening of the journal file is deferred until
17177c478bd9Sstevel@tonic-gate ** there is an actual need to write to the journal.
17187c478bd9Sstevel@tonic-gate **
17197c478bd9Sstevel@tonic-gate ** If the database is already write-locked, this routine is a no-op.
17207c478bd9Sstevel@tonic-gate */
sqlitepager_begin(void * pData)17217c478bd9Sstevel@tonic-gate int sqlitepager_begin(void *pData){
17227c478bd9Sstevel@tonic-gate   PgHdr *pPg = DATA_TO_PGHDR(pData);
17237c478bd9Sstevel@tonic-gate   Pager *pPager = pPg->pPager;
17247c478bd9Sstevel@tonic-gate   int rc = SQLITE_OK;
17257c478bd9Sstevel@tonic-gate   assert( pPg->nRef>0 );
17267c478bd9Sstevel@tonic-gate   assert( pPager->state!=SQLITE_UNLOCK );
17277c478bd9Sstevel@tonic-gate   if( pPager->state==SQLITE_READLOCK ){
17287c478bd9Sstevel@tonic-gate     assert( pPager->aInJournal==0 );
17297c478bd9Sstevel@tonic-gate     rc = sqliteOsWriteLock(&pPager->fd);
17307c478bd9Sstevel@tonic-gate     if( rc!=SQLITE_OK ){
17317c478bd9Sstevel@tonic-gate       return rc;
17327c478bd9Sstevel@tonic-gate     }
17337c478bd9Sstevel@tonic-gate     pPager->state = SQLITE_WRITELOCK;
17347c478bd9Sstevel@tonic-gate     pPager->dirtyFile = 0;
17357c478bd9Sstevel@tonic-gate     TRACE1("TRANSACTION\n");
17367c478bd9Sstevel@tonic-gate     if( pPager->useJournal && !pPager->tempFile ){
17377c478bd9Sstevel@tonic-gate       rc = pager_open_journal(pPager);
17387c478bd9Sstevel@tonic-gate     }
17397c478bd9Sstevel@tonic-gate   }
17407c478bd9Sstevel@tonic-gate   return rc;
17417c478bd9Sstevel@tonic-gate }
17427c478bd9Sstevel@tonic-gate 
17437c478bd9Sstevel@tonic-gate /*
1744*1da57d55SToomas Soome ** Mark a data page as writeable.  The page is written into the journal
17457c478bd9Sstevel@tonic-gate ** if it is not there already.  This routine must be called before making
17467c478bd9Sstevel@tonic-gate ** changes to a page.
17477c478bd9Sstevel@tonic-gate **
17487c478bd9Sstevel@tonic-gate ** The first time this routine is called, the pager creates a new
17497c478bd9Sstevel@tonic-gate ** journal and acquires a write lock on the database.  If the write
17507c478bd9Sstevel@tonic-gate ** lock could not be acquired, this routine returns SQLITE_BUSY.  The
17517c478bd9Sstevel@tonic-gate ** calling routine must check for that return value and be careful not to
17527c478bd9Sstevel@tonic-gate ** change any page data until this routine returns SQLITE_OK.
17537c478bd9Sstevel@tonic-gate **
17547c478bd9Sstevel@tonic-gate ** If the journal file could not be written because the disk is full,
17557c478bd9Sstevel@tonic-gate ** then this routine returns SQLITE_FULL and does an immediate rollback.
17567c478bd9Sstevel@tonic-gate ** All subsequent write attempts also return SQLITE_FULL until there
17577c478bd9Sstevel@tonic-gate ** is a call to sqlitepager_commit() or sqlitepager_rollback() to
17587c478bd9Sstevel@tonic-gate ** reset.
17597c478bd9Sstevel@tonic-gate */
sqlitepager_write(void * pData)17607c478bd9Sstevel@tonic-gate int sqlitepager_write(void *pData){
17617c478bd9Sstevel@tonic-gate   PgHdr *pPg = DATA_TO_PGHDR(pData);
17627c478bd9Sstevel@tonic-gate   Pager *pPager = pPg->pPager;
17637c478bd9Sstevel@tonic-gate   int rc = SQLITE_OK;
17647c478bd9Sstevel@tonic-gate 
17657c478bd9Sstevel@tonic-gate   /* Check for errors
17667c478bd9Sstevel@tonic-gate   */
1767*1da57d55SToomas Soome   if( pPager->errMask ){
17687c478bd9Sstevel@tonic-gate     return pager_errcode(pPager);
17697c478bd9Sstevel@tonic-gate   }
17707c478bd9Sstevel@tonic-gate   if( pPager->readOnly ){
17717c478bd9Sstevel@tonic-gate     return SQLITE_PERM;
17727c478bd9Sstevel@tonic-gate   }
17737c478bd9Sstevel@tonic-gate 
17747c478bd9Sstevel@tonic-gate   /* Mark the page as dirty.  If the page has already been written
17757c478bd9Sstevel@tonic-gate   ** to the journal then we can return right away.
17767c478bd9Sstevel@tonic-gate   */
17777c478bd9Sstevel@tonic-gate   pPg->dirty = 1;
17787c478bd9Sstevel@tonic-gate   if( pPg->inJournal && (pPg->inCkpt || pPager->ckptInUse==0) ){
17797c478bd9Sstevel@tonic-gate     pPager->dirtyFile = 1;
17807c478bd9Sstevel@tonic-gate     return SQLITE_OK;
17817c478bd9Sstevel@tonic-gate   }
17827c478bd9Sstevel@tonic-gate 
17837c478bd9Sstevel@tonic-gate   /* If we get this far, it means that the page needs to be
17847c478bd9Sstevel@tonic-gate   ** written to the transaction journal or the ckeckpoint journal
17857c478bd9Sstevel@tonic-gate   ** or both.
17867c478bd9Sstevel@tonic-gate   **
17877c478bd9Sstevel@tonic-gate   ** First check to see that the transaction journal exists and
17887c478bd9Sstevel@tonic-gate   ** create it if it does not.
17897c478bd9Sstevel@tonic-gate   */
17907c478bd9Sstevel@tonic-gate   assert( pPager->state!=SQLITE_UNLOCK );
17917c478bd9Sstevel@tonic-gate   rc = sqlitepager_begin(pData);
17927c478bd9Sstevel@tonic-gate   if( rc!=SQLITE_OK ){
17937c478bd9Sstevel@tonic-gate     return rc;
17947c478bd9Sstevel@tonic-gate   }
17957c478bd9Sstevel@tonic-gate   assert( pPager->state==SQLITE_WRITELOCK );
17967c478bd9Sstevel@tonic-gate   if( !pPager->journalOpen && pPager->useJournal ){
17977c478bd9Sstevel@tonic-gate     rc = pager_open_journal(pPager);
17987c478bd9Sstevel@tonic-gate     if( rc!=SQLITE_OK ) return rc;
17997c478bd9Sstevel@tonic-gate   }
18007c478bd9Sstevel@tonic-gate   assert( pPager->journalOpen || !pPager->useJournal );
18017c478bd9Sstevel@tonic-gate   pPager->dirtyFile = 1;
18027c478bd9Sstevel@tonic-gate 
18037c478bd9Sstevel@tonic-gate   /* The transaction journal now exists and we have a write lock on the
1804*1da57d55SToomas Soome   ** main database file.  Write the current page to the transaction
18057c478bd9Sstevel@tonic-gate   ** journal if it is not there already.
18067c478bd9Sstevel@tonic-gate   */
18077c478bd9Sstevel@tonic-gate   if( !pPg->inJournal && pPager->useJournal ){
18087c478bd9Sstevel@tonic-gate     if( (int)pPg->pgno <= pPager->origDbSize ){
18097c478bd9Sstevel@tonic-gate       int szPg;
18107c478bd9Sstevel@tonic-gate       u32 saved;
18117c478bd9Sstevel@tonic-gate       if( journal_format>=JOURNAL_FORMAT_3 ){
18127c478bd9Sstevel@tonic-gate         u32 cksum = pager_cksum(pPager, pPg->pgno, pData);
18137c478bd9Sstevel@tonic-gate         saved = *(u32*)PGHDR_TO_EXTRA(pPg);
18147c478bd9Sstevel@tonic-gate         store32bits(cksum, pPg, SQLITE_PAGE_SIZE);
18157c478bd9Sstevel@tonic-gate         szPg = SQLITE_PAGE_SIZE+8;
18167c478bd9Sstevel@tonic-gate       }else{
18177c478bd9Sstevel@tonic-gate         szPg = SQLITE_PAGE_SIZE+4;
18187c478bd9Sstevel@tonic-gate       }
18197c478bd9Sstevel@tonic-gate       store32bits(pPg->pgno, pPg, -4);
18207c478bd9Sstevel@tonic-gate       CODEC(pPager, pData, pPg->pgno, 7);
18217c478bd9Sstevel@tonic-gate       rc = sqliteOsWrite(&pPager->jfd, &((char*)pData)[-4], szPg);
18227c478bd9Sstevel@tonic-gate       TRACE3("JOURNAL %d %d\n", pPg->pgno, pPg->needSync);
18237c478bd9Sstevel@tonic-gate       CODEC(pPager, pData, pPg->pgno, 0);
18247c478bd9Sstevel@tonic-gate       if( journal_format>=JOURNAL_FORMAT_3 ){
18257c478bd9Sstevel@tonic-gate         *(u32*)PGHDR_TO_EXTRA(pPg) = saved;
18267c478bd9Sstevel@tonic-gate       }
18277c478bd9Sstevel@tonic-gate       if( rc!=SQLITE_OK ){
18287c478bd9Sstevel@tonic-gate         sqlitepager_rollback(pPager);
18297c478bd9Sstevel@tonic-gate         pPager->errMask |= PAGER_ERR_FULL;
18307c478bd9Sstevel@tonic-gate         return rc;
18317c478bd9Sstevel@tonic-gate       }
18327c478bd9Sstevel@tonic-gate       pPager->nRec++;
18337c478bd9Sstevel@tonic-gate       assert( pPager->aInJournal!=0 );
18347c478bd9Sstevel@tonic-gate       pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
18357c478bd9Sstevel@tonic-gate       pPg->needSync = !pPager->noSync;
18367c478bd9Sstevel@tonic-gate       pPg->inJournal = 1;
18377c478bd9Sstevel@tonic-gate       if( pPager->ckptInUse ){
18387c478bd9Sstevel@tonic-gate         pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
18397c478bd9Sstevel@tonic-gate         page_add_to_ckpt_list(pPg);
18407c478bd9Sstevel@tonic-gate       }
18417c478bd9Sstevel@tonic-gate     }else{
18427c478bd9Sstevel@tonic-gate       pPg->needSync = !pPager->journalStarted && !pPager->noSync;
18437c478bd9Sstevel@tonic-gate       TRACE3("APPEND %d %d\n", pPg->pgno, pPg->needSync);
18447c478bd9Sstevel@tonic-gate     }
18457c478bd9Sstevel@tonic-gate     if( pPg->needSync ){
18467c478bd9Sstevel@tonic-gate       pPager->needSync = 1;
18477c478bd9Sstevel@tonic-gate     }
18487c478bd9Sstevel@tonic-gate   }
18497c478bd9Sstevel@tonic-gate 
18507c478bd9Sstevel@tonic-gate   /* If the checkpoint journal is open and the page is not in it,
18517c478bd9Sstevel@tonic-gate   ** then write the current page to the checkpoint journal.  Note that
18527c478bd9Sstevel@tonic-gate   ** the checkpoint journal always uses the simplier format 2 that lacks
18537c478bd9Sstevel@tonic-gate   ** checksums.  The header is also omitted from the checkpoint journal.
18547c478bd9Sstevel@tonic-gate   */
18557c478bd9Sstevel@tonic-gate   if( pPager->ckptInUse && !pPg->inCkpt && (int)pPg->pgno<=pPager->ckptSize ){
18567c478bd9Sstevel@tonic-gate     assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
18577c478bd9Sstevel@tonic-gate     store32bits(pPg->pgno, pPg, -4);
18587c478bd9Sstevel@tonic-gate     CODEC(pPager, pData, pPg->pgno, 7);
18597c478bd9Sstevel@tonic-gate     rc = sqliteOsWrite(&pPager->cpfd, &((char*)pData)[-4], SQLITE_PAGE_SIZE+4);
18607c478bd9Sstevel@tonic-gate     TRACE2("CKPT-JOURNAL %d\n", pPg->pgno);
18617c478bd9Sstevel@tonic-gate     CODEC(pPager, pData, pPg->pgno, 0);
18627c478bd9Sstevel@tonic-gate     if( rc!=SQLITE_OK ){
18637c478bd9Sstevel@tonic-gate       sqlitepager_rollback(pPager);
18647c478bd9Sstevel@tonic-gate       pPager->errMask |= PAGER_ERR_FULL;
18657c478bd9Sstevel@tonic-gate       return rc;
18667c478bd9Sstevel@tonic-gate     }
18677c478bd9Sstevel@tonic-gate     pPager->ckptNRec++;
18687c478bd9Sstevel@tonic-gate     assert( pPager->aInCkpt!=0 );
18697c478bd9Sstevel@tonic-gate     pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
18707c478bd9Sstevel@tonic-gate     page_add_to_ckpt_list(pPg);
18717c478bd9Sstevel@tonic-gate   }
18727c478bd9Sstevel@tonic-gate 
18737c478bd9Sstevel@tonic-gate   /* Update the database size and return.
18747c478bd9Sstevel@tonic-gate   */
18757c478bd9Sstevel@tonic-gate   if( pPager->dbSize<(int)pPg->pgno ){
18767c478bd9Sstevel@tonic-gate     pPager->dbSize = pPg->pgno;
18777c478bd9Sstevel@tonic-gate   }
18787c478bd9Sstevel@tonic-gate   return rc;
18797c478bd9Sstevel@tonic-gate }
18807c478bd9Sstevel@tonic-gate 
18817c478bd9Sstevel@tonic-gate /*
18827c478bd9Sstevel@tonic-gate ** Return TRUE if the page given in the argument was previously passed
18837c478bd9Sstevel@tonic-gate ** to sqlitepager_write().  In other words, return TRUE if it is ok
18847c478bd9Sstevel@tonic-gate ** to change the content of the page.
18857c478bd9Sstevel@tonic-gate */
sqlitepager_iswriteable(void * pData)18867c478bd9Sstevel@tonic-gate int sqlitepager_iswriteable(void *pData){
18877c478bd9Sstevel@tonic-gate   PgHdr *pPg = DATA_TO_PGHDR(pData);
18887c478bd9Sstevel@tonic-gate   return pPg->dirty;
18897c478bd9Sstevel@tonic-gate }
18907c478bd9Sstevel@tonic-gate 
18917c478bd9Sstevel@tonic-gate /*
18927c478bd9Sstevel@tonic-gate ** Replace the content of a single page with the information in the third
18937c478bd9Sstevel@tonic-gate ** argument.
18947c478bd9Sstevel@tonic-gate */
sqlitepager_overwrite(Pager * pPager,Pgno pgno,void * pData)18957c478bd9Sstevel@tonic-gate int sqlitepager_overwrite(Pager *pPager, Pgno pgno, void *pData){
18967c478bd9Sstevel@tonic-gate   void *pPage;
18977c478bd9Sstevel@tonic-gate   int rc;
18987c478bd9Sstevel@tonic-gate 
18997c478bd9Sstevel@tonic-gate   rc = sqlitepager_get(pPager, pgno, &pPage);
19007c478bd9Sstevel@tonic-gate   if( rc==SQLITE_OK ){
19017c478bd9Sstevel@tonic-gate     rc = sqlitepager_write(pPage);
19027c478bd9Sstevel@tonic-gate     if( rc==SQLITE_OK ){
19037c478bd9Sstevel@tonic-gate       memcpy(pPage, pData, SQLITE_PAGE_SIZE);
19047c478bd9Sstevel@tonic-gate     }
19057c478bd9Sstevel@tonic-gate     sqlitepager_unref(pPage);
19067c478bd9Sstevel@tonic-gate   }
19077c478bd9Sstevel@tonic-gate   return rc;
19087c478bd9Sstevel@tonic-gate }
19097c478bd9Sstevel@tonic-gate 
19107c478bd9Sstevel@tonic-gate /*
19117c478bd9Sstevel@tonic-gate ** A call to this routine tells the pager that it is not necessary to
19127c478bd9Sstevel@tonic-gate ** write the information on page "pgno" back to the disk, even though
19137c478bd9Sstevel@tonic-gate ** that page might be marked as dirty.
19147c478bd9Sstevel@tonic-gate **
19157c478bd9Sstevel@tonic-gate ** The overlying software layer calls this routine when all of the data
19167c478bd9Sstevel@tonic-gate ** on the given page is unused.  The pager marks the page as clean so
19177c478bd9Sstevel@tonic-gate ** that it does not get written to disk.
19187c478bd9Sstevel@tonic-gate **
19197c478bd9Sstevel@tonic-gate ** Tests show that this optimization, together with the
19207c478bd9Sstevel@tonic-gate ** sqlitepager_dont_rollback() below, more than double the speed
19217c478bd9Sstevel@tonic-gate ** of large INSERT operations and quadruple the speed of large DELETEs.
19227c478bd9Sstevel@tonic-gate **
19237c478bd9Sstevel@tonic-gate ** When this routine is called, set the alwaysRollback flag to true.
19247c478bd9Sstevel@tonic-gate ** Subsequent calls to sqlitepager_dont_rollback() for the same page
19257c478bd9Sstevel@tonic-gate ** will thereafter be ignored.  This is necessary to avoid a problem
19267c478bd9Sstevel@tonic-gate ** where a page with data is added to the freelist during one part of
19277c478bd9Sstevel@tonic-gate ** a transaction then removed from the freelist during a later part
19287c478bd9Sstevel@tonic-gate ** of the same transaction and reused for some other purpose.  When it
19297c478bd9Sstevel@tonic-gate ** is first added to the freelist, this routine is called.  When reused,
19307c478bd9Sstevel@tonic-gate ** the dont_rollback() routine is called.  But because the page contains
19317c478bd9Sstevel@tonic-gate ** critical data, we still need to be sure it gets rolled back in spite
19327c478bd9Sstevel@tonic-gate ** of the dont_rollback() call.
19337c478bd9Sstevel@tonic-gate */
sqlitepager_dont_write(Pager * pPager,Pgno pgno)19347c478bd9Sstevel@tonic-gate void sqlitepager_dont_write(Pager *pPager, Pgno pgno){
19357c478bd9Sstevel@tonic-gate   PgHdr *pPg;
19367c478bd9Sstevel@tonic-gate 
19377c478bd9Sstevel@tonic-gate   pPg = pager_lookup(pPager, pgno);
19387c478bd9Sstevel@tonic-gate   pPg->alwaysRollback = 1;
19397c478bd9Sstevel@tonic-gate   if( pPg && pPg->dirty ){
19407c478bd9Sstevel@tonic-gate     if( pPager->dbSize==(int)pPg->pgno && pPager->origDbSize<pPager->dbSize ){
19417c478bd9Sstevel@tonic-gate       /* If this pages is the last page in the file and the file has grown
19427c478bd9Sstevel@tonic-gate       ** during the current transaction, then do NOT mark the page as clean.
19437c478bd9Sstevel@tonic-gate       ** When the database file grows, we must make sure that the last page
19447c478bd9Sstevel@tonic-gate       ** gets written at least once so that the disk file will be the correct
19457c478bd9Sstevel@tonic-gate       ** size. If you do not write this page and the size of the file
19467c478bd9Sstevel@tonic-gate       ** on the disk ends up being too small, that can lead to database
19477c478bd9Sstevel@tonic-gate       ** corruption during the next transaction.
19487c478bd9Sstevel@tonic-gate       */
19497c478bd9Sstevel@tonic-gate     }else{
19507c478bd9Sstevel@tonic-gate       TRACE2("DONT_WRITE %d\n", pgno);
19517c478bd9Sstevel@tonic-gate       pPg->dirty = 0;
19527c478bd9Sstevel@tonic-gate     }
19537c478bd9Sstevel@tonic-gate   }
19547c478bd9Sstevel@tonic-gate }
19557c478bd9Sstevel@tonic-gate 
19567c478bd9Sstevel@tonic-gate /*
19577c478bd9Sstevel@tonic-gate ** A call to this routine tells the pager that if a rollback occurs,
19587c478bd9Sstevel@tonic-gate ** it is not necessary to restore the data on the given page.  This
19597c478bd9Sstevel@tonic-gate ** means that the pager does not have to record the given page in the
19607c478bd9Sstevel@tonic-gate ** rollback journal.
19617c478bd9Sstevel@tonic-gate */
sqlitepager_dont_rollback(void * pData)19627c478bd9Sstevel@tonic-gate void sqlitepager_dont_rollback(void *pData){
19637c478bd9Sstevel@tonic-gate   PgHdr *pPg = DATA_TO_PGHDR(pData);
19647c478bd9Sstevel@tonic-gate   Pager *pPager = pPg->pPager;
19657c478bd9Sstevel@tonic-gate 
19667c478bd9Sstevel@tonic-gate   if( pPager->state!=SQLITE_WRITELOCK || pPager->journalOpen==0 ) return;
19677c478bd9Sstevel@tonic-gate   if( pPg->alwaysRollback || pPager->alwaysRollback ) return;
19687c478bd9Sstevel@tonic-gate   if( !pPg->inJournal && (int)pPg->pgno <= pPager->origDbSize ){
19697c478bd9Sstevel@tonic-gate     assert( pPager->aInJournal!=0 );
19707c478bd9Sstevel@tonic-gate     pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
19717c478bd9Sstevel@tonic-gate     pPg->inJournal = 1;
19727c478bd9Sstevel@tonic-gate     if( pPager->ckptInUse ){
19737c478bd9Sstevel@tonic-gate       pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
19747c478bd9Sstevel@tonic-gate       page_add_to_ckpt_list(pPg);
19757c478bd9Sstevel@tonic-gate     }
19767c478bd9Sstevel@tonic-gate     TRACE2("DONT_ROLLBACK %d\n", pPg->pgno);
19777c478bd9Sstevel@tonic-gate   }
19787c478bd9Sstevel@tonic-gate   if( pPager->ckptInUse && !pPg->inCkpt && (int)pPg->pgno<=pPager->ckptSize ){
19797c478bd9Sstevel@tonic-gate     assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
19807c478bd9Sstevel@tonic-gate     assert( pPager->aInCkpt!=0 );
19817c478bd9Sstevel@tonic-gate     pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
19827c478bd9Sstevel@tonic-gate     page_add_to_ckpt_list(pPg);
19837c478bd9Sstevel@tonic-gate   }
19847c478bd9Sstevel@tonic-gate }
19857c478bd9Sstevel@tonic-gate 
19867c478bd9Sstevel@tonic-gate /*
19877c478bd9Sstevel@tonic-gate ** Commit all changes to the database and release the write lock.
19887c478bd9Sstevel@tonic-gate **
19897c478bd9Sstevel@tonic-gate ** If the commit fails for any reason, a rollback attempt is made
19907c478bd9Sstevel@tonic-gate ** and an error code is returned.  If the commit worked, SQLITE_OK
19917c478bd9Sstevel@tonic-gate ** is returned.
19927c478bd9Sstevel@tonic-gate */
sqlitepager_commit(Pager * pPager)19937c478bd9Sstevel@tonic-gate int sqlitepager_commit(Pager *pPager){
19947c478bd9Sstevel@tonic-gate   int rc;
19957c478bd9Sstevel@tonic-gate   PgHdr *pPg;
19967c478bd9Sstevel@tonic-gate 
19977c478bd9Sstevel@tonic-gate   if( pPager->errMask==PAGER_ERR_FULL ){
19987c478bd9Sstevel@tonic-gate     rc = sqlitepager_rollback(pPager);
19997c478bd9Sstevel@tonic-gate     if( rc==SQLITE_OK ){
20007c478bd9Sstevel@tonic-gate       rc = SQLITE_FULL;
20017c478bd9Sstevel@tonic-gate     }
20027c478bd9Sstevel@tonic-gate     return rc;
20037c478bd9Sstevel@tonic-gate   }
20047c478bd9Sstevel@tonic-gate   if( pPager->errMask!=0 ){
20057c478bd9Sstevel@tonic-gate     rc = pager_errcode(pPager);
20067c478bd9Sstevel@tonic-gate     return rc;
20077c478bd9Sstevel@tonic-gate   }
20087c478bd9Sstevel@tonic-gate   if( pPager->state!=SQLITE_WRITELOCK ){
20097c478bd9Sstevel@tonic-gate     return SQLITE_ERROR;
20107c478bd9Sstevel@tonic-gate   }
20117c478bd9Sstevel@tonic-gate   TRACE1("COMMIT\n");
20127c478bd9Sstevel@tonic-gate   if( pPager->dirtyFile==0 ){
20137c478bd9Sstevel@tonic-gate     /* Exit early (without doing the time-consuming sqliteOsSync() calls)
20147c478bd9Sstevel@tonic-gate     ** if there have been no changes to the database file. */
20157c478bd9Sstevel@tonic-gate     assert( pPager->needSync==0 );
20167c478bd9Sstevel@tonic-gate     rc = pager_unwritelock(pPager);
20177c478bd9Sstevel@tonic-gate     pPager->dbSize = -1;
20187c478bd9Sstevel@tonic-gate     return rc;
20197c478bd9Sstevel@tonic-gate   }
20207c478bd9Sstevel@tonic-gate   assert( pPager->journalOpen );
20217c478bd9Sstevel@tonic-gate   rc = syncJournal(pPager);
20227c478bd9Sstevel@tonic-gate   if( rc!=SQLITE_OK ){
20237c478bd9Sstevel@tonic-gate     goto commit_abort;
20247c478bd9Sstevel@tonic-gate   }
20257c478bd9Sstevel@tonic-gate   pPg = pager_get_all_dirty_pages(pPager);
20267c478bd9Sstevel@tonic-gate   if( pPg ){
20277c478bd9Sstevel@tonic-gate     rc = pager_write_pagelist(pPg);
20287c478bd9Sstevel@tonic-gate     if( rc || (!pPager->noSync && sqliteOsSync(&pPager->fd)!=SQLITE_OK) ){
20297c478bd9Sstevel@tonic-gate       goto commit_abort;
20307c478bd9Sstevel@tonic-gate     }
20317c478bd9Sstevel@tonic-gate   }
20327c478bd9Sstevel@tonic-gate   rc = pager_unwritelock(pPager);
20337c478bd9Sstevel@tonic-gate   pPager->dbSize = -1;
20347c478bd9Sstevel@tonic-gate   return rc;
20357c478bd9Sstevel@tonic-gate 
20367c478bd9Sstevel@tonic-gate   /* Jump here if anything goes wrong during the commit process.
20377c478bd9Sstevel@tonic-gate   */
20387c478bd9Sstevel@tonic-gate commit_abort:
20397c478bd9Sstevel@tonic-gate   rc = sqlitepager_rollback(pPager);
20407c478bd9Sstevel@tonic-gate   if( rc==SQLITE_OK ){
20417c478bd9Sstevel@tonic-gate     rc = SQLITE_FULL;
20427c478bd9Sstevel@tonic-gate   }
20437c478bd9Sstevel@tonic-gate   return rc;
20447c478bd9Sstevel@tonic-gate }
20457c478bd9Sstevel@tonic-gate 
20467c478bd9Sstevel@tonic-gate /*
20477c478bd9Sstevel@tonic-gate ** Rollback all changes.  The database falls back to read-only mode.
20487c478bd9Sstevel@tonic-gate ** All in-memory cache pages revert to their original data contents.
20497c478bd9Sstevel@tonic-gate ** The journal is deleted.
20507c478bd9Sstevel@tonic-gate **
20517c478bd9Sstevel@tonic-gate ** This routine cannot fail unless some other process is not following
20527c478bd9Sstevel@tonic-gate ** the correct locking protocol (SQLITE_PROTOCOL) or unless some other
20537c478bd9Sstevel@tonic-gate ** process is writing trash into the journal file (SQLITE_CORRUPT) or
20547c478bd9Sstevel@tonic-gate ** unless a prior malloc() failed (SQLITE_NOMEM).  Appropriate error
20557c478bd9Sstevel@tonic-gate ** codes are returned for all these occasions.  Otherwise,
20567c478bd9Sstevel@tonic-gate ** SQLITE_OK is returned.
20577c478bd9Sstevel@tonic-gate */
sqlitepager_rollback(Pager * pPager)20587c478bd9Sstevel@tonic-gate int sqlitepager_rollback(Pager *pPager){
20597c478bd9Sstevel@tonic-gate   int rc;
20607c478bd9Sstevel@tonic-gate   TRACE1("ROLLBACK\n");
20617c478bd9Sstevel@tonic-gate   if( !pPager->dirtyFile || !pPager->journalOpen ){
20627c478bd9Sstevel@tonic-gate     rc = pager_unwritelock(pPager);
20637c478bd9Sstevel@tonic-gate     pPager->dbSize = -1;
20647c478bd9Sstevel@tonic-gate     return rc;
20657c478bd9Sstevel@tonic-gate   }
20667c478bd9Sstevel@tonic-gate 
20677c478bd9Sstevel@tonic-gate   if( pPager->errMask!=0 && pPager->errMask!=PAGER_ERR_FULL ){
20687c478bd9Sstevel@tonic-gate     if( pPager->state>=SQLITE_WRITELOCK ){
20697c478bd9Sstevel@tonic-gate       pager_playback(pPager, 1);
20707c478bd9Sstevel@tonic-gate     }
20717c478bd9Sstevel@tonic-gate     return pager_errcode(pPager);
20727c478bd9Sstevel@tonic-gate   }
20737c478bd9Sstevel@tonic-gate   if( pPager->state!=SQLITE_WRITELOCK ){
20747c478bd9Sstevel@tonic-gate     return SQLITE_OK;
20757c478bd9Sstevel@tonic-gate   }
20767c478bd9Sstevel@tonic-gate   rc = pager_playback(pPager, 1);
20777c478bd9Sstevel@tonic-gate   if( rc!=SQLITE_OK ){
20787c478bd9Sstevel@tonic-gate     rc = SQLITE_CORRUPT;
20797c478bd9Sstevel@tonic-gate     pPager->errMask |= PAGER_ERR_CORRUPT;
20807c478bd9Sstevel@tonic-gate   }
20817c478bd9Sstevel@tonic-gate   pPager->dbSize = -1;
20827c478bd9Sstevel@tonic-gate   return rc;
20837c478bd9Sstevel@tonic-gate }
20847c478bd9Sstevel@tonic-gate 
20857c478bd9Sstevel@tonic-gate /*
20867c478bd9Sstevel@tonic-gate ** Return TRUE if the database file is opened read-only.  Return FALSE
20877c478bd9Sstevel@tonic-gate ** if the database is (in theory) writable.
20887c478bd9Sstevel@tonic-gate */
sqlitepager_isreadonly(Pager * pPager)20897c478bd9Sstevel@tonic-gate int sqlitepager_isreadonly(Pager *pPager){
20907c478bd9Sstevel@tonic-gate   return pPager->readOnly;
20917c478bd9Sstevel@tonic-gate }
20927c478bd9Sstevel@tonic-gate 
20937c478bd9Sstevel@tonic-gate /*
20947c478bd9Sstevel@tonic-gate ** This routine is used for testing and analysis only.
20957c478bd9Sstevel@tonic-gate */
sqlitepager_stats(Pager * pPager)20967c478bd9Sstevel@tonic-gate int *sqlitepager_stats(Pager *pPager){
20977c478bd9Sstevel@tonic-gate   static int a[9];
20987c478bd9Sstevel@tonic-gate   a[0] = pPager->nRef;
20997c478bd9Sstevel@tonic-gate   a[1] = pPager->nPage;
21007c478bd9Sstevel@tonic-gate   a[2] = pPager->mxPage;
21017c478bd9Sstevel@tonic-gate   a[3] = pPager->dbSize;
21027c478bd9Sstevel@tonic-gate   a[4] = pPager->state;
21037c478bd9Sstevel@tonic-gate   a[5] = pPager->errMask;
21047c478bd9Sstevel@tonic-gate   a[6] = pPager->nHit;
21057c478bd9Sstevel@tonic-gate   a[7] = pPager->nMiss;
21067c478bd9Sstevel@tonic-gate   a[8] = pPager->nOvfl;
21077c478bd9Sstevel@tonic-gate   return a;
21087c478bd9Sstevel@tonic-gate }
21097c478bd9Sstevel@tonic-gate 
21107c478bd9Sstevel@tonic-gate /*
21117c478bd9Sstevel@tonic-gate ** Set the checkpoint.
21127c478bd9Sstevel@tonic-gate **
21137c478bd9Sstevel@tonic-gate ** This routine should be called with the transaction journal already
21147c478bd9Sstevel@tonic-gate ** open.  A new checkpoint journal is created that can be used to rollback
21157c478bd9Sstevel@tonic-gate ** changes of a single SQL command within a larger transaction.
21167c478bd9Sstevel@tonic-gate */
sqlitepager_ckpt_begin(Pager * pPager)21177c478bd9Sstevel@tonic-gate int sqlitepager_ckpt_begin(Pager *pPager){
21187c478bd9Sstevel@tonic-gate   int rc;
21197c478bd9Sstevel@tonic-gate   char zTemp[SQLITE_TEMPNAME_SIZE];
21207c478bd9Sstevel@tonic-gate   if( !pPager->journalOpen ){
21217c478bd9Sstevel@tonic-gate     pPager->ckptAutoopen = 1;
21227c478bd9Sstevel@tonic-gate     return SQLITE_OK;
21237c478bd9Sstevel@tonic-gate   }
21247c478bd9Sstevel@tonic-gate   assert( pPager->journalOpen );
21257c478bd9Sstevel@tonic-gate   assert( !pPager->ckptInUse );
21267c478bd9Sstevel@tonic-gate   pPager->aInCkpt = sqliteMalloc( pPager->dbSize/8 + 1 );
21277c478bd9Sstevel@tonic-gate   if( pPager->aInCkpt==0 ){
21287c478bd9Sstevel@tonic-gate     sqliteOsReadLock(&pPager->fd);
21297c478bd9Sstevel@tonic-gate     return SQLITE_NOMEM;
21307c478bd9Sstevel@tonic-gate   }
21317c478bd9Sstevel@tonic-gate #ifndef NDEBUG
21327c478bd9Sstevel@tonic-gate   rc = sqliteOsFileSize(&pPager->jfd, &pPager->ckptJSize);
21337c478bd9Sstevel@tonic-gate   if( rc ) goto ckpt_begin_failed;
2134*1da57d55SToomas Soome   assert( pPager->ckptJSize ==
21357c478bd9Sstevel@tonic-gate     pPager->nRec*JOURNAL_PG_SZ(journal_format)+JOURNAL_HDR_SZ(journal_format) );
21367c478bd9Sstevel@tonic-gate #endif
21377c478bd9Sstevel@tonic-gate   pPager->ckptJSize = pPager->nRec*JOURNAL_PG_SZ(journal_format)
21387c478bd9Sstevel@tonic-gate                          + JOURNAL_HDR_SZ(journal_format);
21397c478bd9Sstevel@tonic-gate   pPager->ckptSize = pPager->dbSize;
21407c478bd9Sstevel@tonic-gate   if( !pPager->ckptOpen ){
21417c478bd9Sstevel@tonic-gate     rc = sqlitepager_opentemp(zTemp, &pPager->cpfd);
21427c478bd9Sstevel@tonic-gate     if( rc ) goto ckpt_begin_failed;
21437c478bd9Sstevel@tonic-gate     pPager->ckptOpen = 1;
21447c478bd9Sstevel@tonic-gate     pPager->ckptNRec = 0;
21457c478bd9Sstevel@tonic-gate   }
21467c478bd9Sstevel@tonic-gate   pPager->ckptInUse = 1;
21477c478bd9Sstevel@tonic-gate   return SQLITE_OK;
2148*1da57d55SToomas Soome 
21497c478bd9Sstevel@tonic-gate ckpt_begin_failed:
21507c478bd9Sstevel@tonic-gate   if( pPager->aInCkpt ){
21517c478bd9Sstevel@tonic-gate     sqliteFree(pPager->aInCkpt);
21527c478bd9Sstevel@tonic-gate     pPager->aInCkpt = 0;
21537c478bd9Sstevel@tonic-gate   }
21547c478bd9Sstevel@tonic-gate   return rc;
21557c478bd9Sstevel@tonic-gate }
21567c478bd9Sstevel@tonic-gate 
21577c478bd9Sstevel@tonic-gate /*
21587c478bd9Sstevel@tonic-gate ** Commit a checkpoint.
21597c478bd9Sstevel@tonic-gate */
sqlitepager_ckpt_commit(Pager * pPager)21607c478bd9Sstevel@tonic-gate int sqlitepager_ckpt_commit(Pager *pPager){
21617c478bd9Sstevel@tonic-gate   if( pPager->ckptInUse ){
21627c478bd9Sstevel@tonic-gate     PgHdr *pPg, *pNext;
21637c478bd9Sstevel@tonic-gate     sqliteOsSeek(&pPager->cpfd, 0);
21647c478bd9Sstevel@tonic-gate     /* sqliteOsTruncate(&pPager->cpfd, 0); */
21657c478bd9Sstevel@tonic-gate     pPager->ckptNRec = 0;
21667c478bd9Sstevel@tonic-gate     pPager->ckptInUse = 0;
21677c478bd9Sstevel@tonic-gate     sqliteFree( pPager->aInCkpt );
21687c478bd9Sstevel@tonic-gate     pPager->aInCkpt = 0;
21697c478bd9Sstevel@tonic-gate     for(pPg=pPager->pCkpt; pPg; pPg=pNext){
21707c478bd9Sstevel@tonic-gate       pNext = pPg->pNextCkpt;
21717c478bd9Sstevel@tonic-gate       assert( pPg->inCkpt );
21727c478bd9Sstevel@tonic-gate       pPg->inCkpt = 0;
21737c478bd9Sstevel@tonic-gate       pPg->pPrevCkpt = pPg->pNextCkpt = 0;
21747c478bd9Sstevel@tonic-gate     }
21757c478bd9Sstevel@tonic-gate     pPager->pCkpt = 0;
21767c478bd9Sstevel@tonic-gate   }
21777c478bd9Sstevel@tonic-gate   pPager->ckptAutoopen = 0;
21787c478bd9Sstevel@tonic-gate   return SQLITE_OK;
21797c478bd9Sstevel@tonic-gate }
21807c478bd9Sstevel@tonic-gate 
21817c478bd9Sstevel@tonic-gate /*
21827c478bd9Sstevel@tonic-gate ** Rollback a checkpoint.
21837c478bd9Sstevel@tonic-gate */
sqlitepager_ckpt_rollback(Pager * pPager)21847c478bd9Sstevel@tonic-gate int sqlitepager_ckpt_rollback(Pager *pPager){
21857c478bd9Sstevel@tonic-gate   int rc;
21867c478bd9Sstevel@tonic-gate   if( pPager->ckptInUse ){
21877c478bd9Sstevel@tonic-gate     rc = pager_ckpt_playback(pPager);
21887c478bd9Sstevel@tonic-gate     sqlitepager_ckpt_commit(pPager);
21897c478bd9Sstevel@tonic-gate   }else{
21907c478bd9Sstevel@tonic-gate     rc = SQLITE_OK;
21917c478bd9Sstevel@tonic-gate   }
21927c478bd9Sstevel@tonic-gate   pPager->ckptAutoopen = 0;
21937c478bd9Sstevel@tonic-gate   return rc;
21947c478bd9Sstevel@tonic-gate }
21957c478bd9Sstevel@tonic-gate 
21967c478bd9Sstevel@tonic-gate /*
21977c478bd9Sstevel@tonic-gate ** Return the full pathname of the database file.
21987c478bd9Sstevel@tonic-gate */
sqlitepager_filename(Pager * pPager)21997c478bd9Sstevel@tonic-gate const char *sqlitepager_filename(Pager *pPager){
22007c478bd9Sstevel@tonic-gate   return pPager->zFilename;
22017c478bd9Sstevel@tonic-gate }
22027c478bd9Sstevel@tonic-gate 
22037c478bd9Sstevel@tonic-gate /*
22047c478bd9Sstevel@tonic-gate ** Set the codec for this pager
22057c478bd9Sstevel@tonic-gate */
sqlitepager_set_codec(Pager * pPager,void (* xCodec)(void *,void *,Pgno,int),void * pCodecArg)22067c478bd9Sstevel@tonic-gate void sqlitepager_set_codec(
22077c478bd9Sstevel@tonic-gate   Pager *pPager,
22087c478bd9Sstevel@tonic-gate   void (*xCodec)(void*,void*,Pgno,int),
22097c478bd9Sstevel@tonic-gate   void *pCodecArg
22107c478bd9Sstevel@tonic-gate ){
22117c478bd9Sstevel@tonic-gate   pPager->xCodec = xCodec;
22127c478bd9Sstevel@tonic-gate   pPager->pCodecArg = pCodecArg;
22137c478bd9Sstevel@tonic-gate }
22147c478bd9Sstevel@tonic-gate 
22157c478bd9Sstevel@tonic-gate #ifdef SQLITE_TEST
22167c478bd9Sstevel@tonic-gate /*
22177c478bd9Sstevel@tonic-gate ** Print a listing of all referenced pages and their ref count.
22187c478bd9Sstevel@tonic-gate */
sqlitepager_refdump(Pager * pPager)22197c478bd9Sstevel@tonic-gate void sqlitepager_refdump(Pager *pPager){
22207c478bd9Sstevel@tonic-gate   PgHdr *pPg;
22217c478bd9Sstevel@tonic-gate   for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
22227c478bd9Sstevel@tonic-gate     if( pPg->nRef<=0 ) continue;
2223*1da57d55SToomas Soome     printf("PAGE %3d addr=0x%08x nRef=%d\n",
22247c478bd9Sstevel@tonic-gate        pPg->pgno, (int)PGHDR_TO_DATA(pPg), pPg->nRef);
22257c478bd9Sstevel@tonic-gate   }
22267c478bd9Sstevel@tonic-gate }
22277c478bd9Sstevel@tonic-gate #endif
2228