File Coverage

pager.c
Criterion Covered Total %
statement 472 774 60.9
branch 220 478 46.0
condition n/a
subroutine n/a
pod n/a
total 692 1252 55.2


line stmt bran cond sub pod time code
1             /*
2             ** 2001 September 15
3             **
4             ** The author disclaims copyright to this source code. In place of
5             ** a legal notice, here is a blessing:
6             **
7             ** May you do good and not evil.
8             ** May you find forgiveness for yourself and forgive others.
9             ** May you share freely, never taking more than you give.
10             **
11             *************************************************************************
12             ** This is the implementation of the page cache subsystem or "pager".
13             **
14             ** The pager is used to access a database disk file. It implements
15             ** atomic commit and rollback through the use of a journal file that
16             ** is separate from the database file. The pager also implements file
17             ** locking to prevent two processes from writing the same database
18             ** file simultaneously, or one process from reading the database while
19             ** another is writing.
20             **
21             ** @(#) $Id: pager.c,v 1.1.1.1 2004/08/08 15:03:57 matt Exp $
22             */
23             #include "os.h" /* Must be first to enable large file support */
24             #include "sqliteInt.h"
25             #include "pager.h"
26             #include
27             #include
28              
29             /*
30             ** Macros for troubleshooting. Normally turned off
31             */
32             #if 0
33             static Pager *mainPager = 0;
34             #define SET_PAGER(X) if( mainPager==0 ) mainPager = (X)
35             #define CLR_PAGER(X) if( mainPager==(X) ) mainPager = 0
36             #define TRACE1(X) if( pPager==mainPager ) fprintf(stderr,X)
37             #define TRACE2(X,Y) if( pPager==mainPager ) fprintf(stderr,X,Y)
38             #define TRACE3(X,Y,Z) if( pPager==mainPager ) fprintf(stderr,X,Y,Z)
39             #else
40             #define SET_PAGER(X)
41             #define CLR_PAGER(X)
42             #define TRACE1(X)
43             #define TRACE2(X,Y)
44             #define TRACE3(X,Y,Z)
45             #endif
46              
47              
48             /*
49             ** The page cache as a whole is always in one of the following
50             ** states:
51             **
52             ** SQLITE_UNLOCK The page cache is not currently reading or
53             ** writing the database file. There is no
54             ** data held in memory. This is the initial
55             ** state.
56             **
57             ** SQLITE_READLOCK The page cache is reading the database.
58             ** Writing is not permitted. There can be
59             ** multiple readers accessing the same database
60             ** file at the same time.
61             **
62             ** SQLITE_WRITELOCK The page cache is writing the database.
63             ** Access is exclusive. No other processes or
64             ** threads can be reading or writing while one
65             ** process is writing.
66             **
67             ** The page cache comes up in SQLITE_UNLOCK. The first time a
68             ** sqlite_page_get() occurs, the state transitions to SQLITE_READLOCK.
69             ** After all pages have been released using sqlite_page_unref(),
70             ** the state transitions back to SQLITE_UNLOCK. The first time
71             ** that sqlite_page_write() is called, the state transitions to
72             ** SQLITE_WRITELOCK. (Note that sqlite_page_write() can only be
73             ** called on an outstanding page which means that the pager must
74             ** be in SQLITE_READLOCK before it transitions to SQLITE_WRITELOCK.)
75             ** The sqlite_page_rollback() and sqlite_page_commit() functions
76             ** transition the state from SQLITE_WRITELOCK back to SQLITE_READLOCK.
77             */
78             #define SQLITE_UNLOCK 0
79             #define SQLITE_READLOCK 1
80             #define SQLITE_WRITELOCK 2
81              
82              
83             /*
84             ** Each in-memory image of a page begins with the following header.
85             ** This header is only visible to this pager module. The client
86             ** code that calls pager sees only the data that follows the header.
87             **
88             ** Client code should call sqlitepager_write() on a page prior to making
89             ** any modifications to that page. The first time sqlitepager_write()
90             ** is called, the original page contents are written into the rollback
91             ** journal and PgHdr.inJournal and PgHdr.needSync are set. Later, once
92             ** the journal page has made it onto the disk surface, PgHdr.needSync
93             ** is cleared. The modified page cannot be written back into the original
94             ** database file until the journal pages has been synced to disk and the
95             ** PgHdr.needSync has been cleared.
96             **
97             ** The PgHdr.dirty flag is set when sqlitepager_write() is called and
98             ** is cleared again when the page content is written back to the original
99             ** database file.
100             */
101             typedef struct PgHdr PgHdr;
102             struct PgHdr {
103             Pager *pPager; /* The pager to which this page belongs */
104             Pgno pgno; /* The page number for this page */
105             PgHdr *pNextHash, *pPrevHash; /* Hash collision chain for PgHdr.pgno */
106             int nRef; /* Number of users of this page */
107             PgHdr *pNextFree, *pPrevFree; /* Freelist of pages where nRef==0 */
108             PgHdr *pNextAll, *pPrevAll; /* A list of all pages */
109             PgHdr *pNextCkpt, *pPrevCkpt; /* List of pages in the checkpoint journal */
110             u8 inJournal; /* TRUE if has been written to journal */
111             u8 inCkpt; /* TRUE if written to the checkpoint journal */
112             u8 dirty; /* TRUE if we need to write back changes */
113             u8 needSync; /* Sync journal before writing this page */
114             u8 alwaysRollback; /* Disable dont_rollback() for this page */
115             PgHdr *pDirty; /* Dirty pages sorted by PgHdr.pgno */
116             /* SQLITE_PAGE_SIZE bytes of page data follow this header */
117             /* Pager.nExtra bytes of local data follow the page data */
118             };
119              
120              
121             /*
122             ** A macro used for invoking the codec if there is one
123             */
124             #ifdef SQLITE_HAS_CODEC
125             # define CODEC(P,D,N,X) if( P->xCodec ){ P->xCodec(P->pCodecArg,D,N,X); }
126             #else
127             # define CODEC(P,D,N,X)
128             #endif
129              
130             /*
131             ** Convert a pointer to a PgHdr into a pointer to its data
132             ** and back again.
133             */
134             #define PGHDR_TO_DATA(P) ((void*)(&(P)[1]))
135             #define DATA_TO_PGHDR(D) (&((PgHdr*)(D))[-1])
136             #define PGHDR_TO_EXTRA(P) ((void*)&((char*)(&(P)[1]))[SQLITE_PAGE_SIZE])
137              
138             /*
139             ** How big to make the hash table used for locating in-memory pages
140             ** by page number.
141             */
142             #define N_PG_HASH 2048
143              
144             /*
145             ** Hash a page number
146             */
147             #define pager_hash(PN) ((PN)&(N_PG_HASH-1))
148              
149             /*
150             ** A open page cache is an instance of the following structure.
151             */
152             struct Pager {
153             char *zFilename; /* Name of the database file */
154             char *zJournal; /* Name of the journal file */
155             char *zDirectory; /* Directory hold database and journal files */
156             OsFile fd, jfd; /* File descriptors for database and journal */
157             OsFile cpfd; /* File descriptor for the checkpoint journal */
158             int dbSize; /* Number of pages in the file */
159             int origDbSize; /* dbSize before the current change */
160             int ckptSize; /* Size of database (in pages) at ckpt_begin() */
161             sql_off_t ckptJSize; /* Size of journal at ckpt_begin() */
162             int nRec; /* Number of pages written to the journal */
163             u32 cksumInit; /* Quasi-random value added to every checksum */
164             int ckptNRec; /* Number of records in the checkpoint journal */
165             int nExtra; /* Add this many bytes to each in-memory page */
166             void (*xDestructor)(void*); /* Call this routine when freeing pages */
167             int nPage; /* Total number of in-memory pages */
168             int nRef; /* Number of in-memory pages with PgHdr.nRef>0 */
169             int mxPage; /* Maximum number of pages to hold in cache */
170             int nHit, nMiss, nOvfl; /* Cache hits, missing, and LRU overflows */
171             void (*xCodec)(void*,void*,Pgno,int); /* Routine for en/decoding data */
172             void *pCodecArg; /* First argument to xCodec() */
173             u8 journalOpen; /* True if journal file descriptors is valid */
174             u8 journalStarted; /* True if header of journal is synced */
175             u8 useJournal; /* Use a rollback journal on this file */
176             u8 ckptOpen; /* True if the checkpoint journal is open */
177             u8 ckptInUse; /* True we are in a checkpoint */
178             u8 ckptAutoopen; /* Open ckpt journal when main journal is opened*/
179             u8 noSync; /* Do not sync the journal if true */
180             u8 fullSync; /* Do extra syncs of the journal for robustness */
181             u8 state; /* SQLITE_UNLOCK, _READLOCK or _WRITELOCK */
182             u8 errMask; /* One of several kinds of errors */
183             u8 tempFile; /* zFilename is a temporary file */
184             u8 readOnly; /* True for a read-only database */
185             u8 needSync; /* True if an fsync() is needed on the journal */
186             u8 dirtyFile; /* True if database file has changed in any way */
187             u8 alwaysRollback; /* Disable dont_rollback() for all pages */
188             u8 *aInJournal; /* One bit for each page in the database file */
189             u8 *aInCkpt; /* One bit for each page in the database */
190             PgHdr *pFirst, *pLast; /* List of free pages */
191             PgHdr *pFirstSynced; /* First free page with PgHdr.needSync==0 */
192             PgHdr *pAll; /* List of all pages */
193             PgHdr *pCkpt; /* List of pages in the checkpoint journal */
194             PgHdr *aHash[N_PG_HASH]; /* Hash table to map page number of PgHdr */
195             };
196              
197             /*
198             ** These are bits that can be set in Pager.errMask.
199             */
200             #define PAGER_ERR_FULL 0x01 /* a write() failed */
201             #define PAGER_ERR_MEM 0x02 /* malloc() failed */
202             #define PAGER_ERR_LOCK 0x04 /* error in the locking protocol */
203             #define PAGER_ERR_CORRUPT 0x08 /* database or journal corruption */
204             #define PAGER_ERR_DISK 0x10 /* general disk I/O error - bad hard drive? */
205              
206             /*
207             ** The journal file contains page records in the following
208             ** format.
209             **
210             ** Actually, this structure is the complete page record for pager
211             ** formats less than 3. Beginning with format 3, this record is surrounded
212             ** by two checksums.
213             */
214             typedef struct PageRecord PageRecord;
215             struct PageRecord {
216             Pgno pgno; /* The page number */
217             char aData[SQLITE_PAGE_SIZE]; /* Original data for page pgno */
218             };
219              
220             /*
221             ** Journal files begin with the following magic string. The data
222             ** was obtained from /dev/random. It is used only as a sanity check.
223             **
224             ** There are three journal formats (so far). The 1st journal format writes
225             ** 32-bit integers in the byte-order of the host machine. New
226             ** formats writes integers as big-endian. All new journals use the
227             ** new format, but we have to be able to read an older journal in order
228             ** to rollback journals created by older versions of the library.
229             **
230             ** The 3rd journal format (added for 2.8.0) adds additional sanity
231             ** checking information to the journal. If the power fails while the
232             ** journal is being written, semi-random garbage data might appear in
233             ** the journal file after power is restored. If an attempt is then made
234             ** to roll the journal back, the database could be corrupted. The additional
235             ** sanity checking data is an attempt to discover the garbage in the
236             ** journal and ignore it.
237             **
238             ** The sanity checking information for the 3rd journal format consists
239             ** of a 32-bit checksum on each page of data. The checksum covers both
240             ** the page number and the SQLITE_PAGE_SIZE bytes of data for the page.
241             ** This cksum is initialized to a 32-bit random value that appears in the
242             ** journal file right after the header. The random initializer is important,
243             ** because garbage data that appears at the end of a journal is likely
244             ** data that was once in other files that have now been deleted. If the
245             ** garbage data came from an obsolete journal file, the checksums might
246             ** be correct. But by initializing the checksum to random value which
247             ** is different for every journal, we minimize that risk.
248             */
249             static const unsigned char aJournalMagic1[] = {
250             0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd4,
251             };
252             static const unsigned char aJournalMagic2[] = {
253             0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd5,
254             };
255             static const unsigned char aJournalMagic3[] = {
256             0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd6,
257             };
258             #define JOURNAL_FORMAT_1 1
259             #define JOURNAL_FORMAT_2 2
260             #define JOURNAL_FORMAT_3 3
261              
262             /*
263             ** The following integer determines what format to use when creating
264             ** new primary journal files. By default we always use format 3.
265             ** When testing, we can set this value to older journal formats in order to
266             ** make sure that newer versions of the library are able to rollback older
267             ** journal files.
268             **
269             ** Note that checkpoint journals always use format 2 and omit the header.
270             */
271             #ifdef SQLITE_TEST
272             int journal_format = 3;
273             #else
274             # define journal_format 3
275             #endif
276              
277             /*
278             ** The size of the header and of each page in the journal varies according
279             ** to which journal format is being used. The following macros figure out
280             ** the sizes based on format numbers.
281             */
282             #define JOURNAL_HDR_SZ(X) \
283             (sizeof(aJournalMagic1) + sizeof(Pgno) + ((X)>=3)*2*sizeof(u32))
284             #define JOURNAL_PG_SZ(X) \
285             (SQLITE_PAGE_SIZE + sizeof(Pgno) + ((X)>=3)*sizeof(u32))
286              
287             /*
288             ** Enable reference count tracking here:
289             */
290             #ifdef SQLITE_TEST
291             int pager_refinfo_enable = 0;
292             static void pager_refinfo(PgHdr *p){
293             static int cnt = 0;
294             if( !pager_refinfo_enable ) return;
295             printf(
296             "REFCNT: %4d addr=0x%08x nRef=%d\n",
297             p->pgno, (int)PGHDR_TO_DATA(p), p->nRef
298             );
299             cnt++; /* Something to set a breakpoint on */
300             }
301             # define REFINFO(X) pager_refinfo(X)
302             #else
303             # define REFINFO(X)
304             #endif
305              
306             /*
307             ** Read a 32-bit integer from the given file descriptor. Store the integer
308             ** that is read in *pRes. Return SQLITE_OK if everything worked, or an
309             ** error code is something goes wrong.
310             **
311             ** If the journal format is 2 or 3, read a big-endian integer. If the
312             ** journal format is 1, read an integer in the native byte-order of the
313             ** host machine.
314             */
315 17           static int read32bits(int format, OsFile *fd, u32 *pRes){
316             u32 res;
317             int rc;
318 17           rc = sqliteOsRead(fd, &res, sizeof(res));
319 17 50         if( rc==SQLITE_OK && format>JOURNAL_FORMAT_1 ){
    50          
320             unsigned char ac[4];
321 17           memcpy(ac, &res, 4);
322 17           res = (ac[0]<<24) | (ac[1]<<16) | (ac[2]<<8) | ac[3];
323             }
324 17           *pRes = res;
325 17           return rc;
326             }
327              
328             /*
329             ** Write a 32-bit integer into the given file descriptor. Return SQLITE_OK
330             ** on success or an error code is something goes wrong.
331             **
332             ** If the journal format is 2 or 3, write the integer as 4 big-endian
333             ** bytes. If the journal format is 1, write the integer in the native
334             ** byte order. In normal operation, only formats 2 and 3 are used.
335             ** Journal format 1 is only used for testing.
336             */
337 724           static int write32bits(OsFile *fd, u32 val){
338             unsigned char ac[4];
339             if( journal_format<=1 ){
340             return sqliteOsWrite(fd, &val, 4);
341             }
342 362           ac[0] = (val>>24) & 0xff;
343 362           ac[1] = (val>>16) & 0xff;
344 362           ac[2] = (val>>8) & 0xff;
345 362           ac[3] = val & 0xff;
346 362           return sqliteOsWrite(fd, ac, 4);
347             }
348              
349             /*
350             ** Write a 32-bit integer into a page header right before the
351             ** page data. This will overwrite the PgHdr.pDirty pointer.
352             **
353             ** The integer is big-endian for formats 2 and 3 and native byte order
354             ** for journal format 1.
355             */
356 260           static void store32bits(u32 val, PgHdr *p, int offset){
357             unsigned char *ac;
358 260           ac = &((unsigned char*)PGHDR_TO_DATA(p))[offset];
359             if( journal_format<=1 ){
360             memcpy(ac, &val, 4);
361             }else{
362 260           ac[0] = (val>>24) & 0xff;
363 260           ac[1] = (val>>16) & 0xff;
364 260           ac[2] = (val>>8) & 0xff;
365 260           ac[3] = val & 0xff;
366             }
367 260           }
368              
369              
370             /*
371             ** Convert the bits in the pPager->errMask into an approprate
372             ** return code.
373             */
374 0           static int pager_errcode(Pager *pPager){
375 0           int rc = SQLITE_OK;
376 0 0         if( pPager->errMask & PAGER_ERR_LOCK ) rc = SQLITE_PROTOCOL;
377 0 0         if( pPager->errMask & PAGER_ERR_DISK ) rc = SQLITE_IOERR;
378 0 0         if( pPager->errMask & PAGER_ERR_FULL ) rc = SQLITE_FULL;
379 0 0         if( pPager->errMask & PAGER_ERR_MEM ) rc = SQLITE_NOMEM;
380 0 0         if( pPager->errMask & PAGER_ERR_CORRUPT ) rc = SQLITE_CORRUPT;
381 0           return rc;
382             }
383              
384             /*
385             ** Add or remove a page from the list of all pages that are in the
386             ** checkpoint journal.
387             **
388             ** The Pager keeps a separate list of pages that are currently in
389             ** the checkpoint journal. This helps the sqlitepager_ckpt_commit()
390             ** routine run MUCH faster for the common case where there are many
391             ** pages in memory but only a few are in the checkpoint journal.
392             */
393 0           static void page_add_to_ckpt_list(PgHdr *pPg){
394 0           Pager *pPager = pPg->pPager;
395 0 0         if( pPg->inCkpt ) return;
396             assert( pPg->pPrevCkpt==0 && pPg->pNextCkpt==0 );
397 0           pPg->pPrevCkpt = 0;
398 0 0         if( pPager->pCkpt ){
399 0           pPager->pCkpt->pPrevCkpt = pPg;
400             }
401 0           pPg->pNextCkpt = pPager->pCkpt;
402 0           pPager->pCkpt = pPg;
403 0           pPg->inCkpt = 1;
404             }
405 634           static void page_remove_from_ckpt_list(PgHdr *pPg){
406 634 50         if( !pPg->inCkpt ) return;
407 0 0         if( pPg->pPrevCkpt ){
408             assert( pPg->pPrevCkpt->pNextCkpt==pPg );
409 0           pPg->pPrevCkpt->pNextCkpt = pPg->pNextCkpt;
410             }else{
411             assert( pPg->pPager->pCkpt==pPg );
412 0           pPg->pPager->pCkpt = pPg->pNextCkpt;
413             }
414 0 0         if( pPg->pNextCkpt ){
415             assert( pPg->pNextCkpt->pPrevCkpt==pPg );
416 0           pPg->pNextCkpt->pPrevCkpt = pPg->pPrevCkpt;
417             }
418 0           pPg->pNextCkpt = 0;
419 0           pPg->pPrevCkpt = 0;
420 0           pPg->inCkpt = 0;
421             }
422              
423             /*
424             ** Find a page in the hash table given its page number. Return
425             ** a pointer to the page or NULL if not found.
426             */
427 1143           static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){
428 1143           PgHdr *p = pPager->aHash[pager_hash(pgno)];
429 1143 100         while( p && p->pgno!=pgno ){
    50          
430 0           p = p->pNextHash;
431             }
432 1143           return p;
433             }
434              
435             /*
436             ** Unlock the database and clear the in-memory cache. This routine
437             ** sets the state of the pager back to what it was when it was first
438             ** opened. Any outstanding pages are invalidated and subsequent attempts
439             ** to access those pages will likely result in a coredump.
440             */
441 256           static void pager_reset(Pager *pPager){
442             PgHdr *pPg, *pNext;
443 884 100         for(pPg=pPager->pAll; pPg; pPg=pNext){
444 628           pNext = pPg->pNextAll;
445 628           sqliteFree(pPg);
446             }
447 256           pPager->pFirst = 0;
448 256           pPager->pFirstSynced = 0;
449 256           pPager->pLast = 0;
450 256           pPager->pAll = 0;
451 256           memset(pPager->aHash, 0, sizeof(pPager->aHash));
452 256           pPager->nPage = 0;
453 256 50         if( pPager->state>=SQLITE_WRITELOCK ){
454 0           sqlitepager_rollback(pPager);
455             }
456 256           sqliteOsUnlock(&pPager->fd);
457 256           pPager->state = SQLITE_UNLOCK;
458 256           pPager->dbSize = -1;
459 256           pPager->nRef = 0;
460             assert( pPager->journalOpen==0 );
461 256           }
462              
463             /*
464             ** When this routine is called, the pager has the journal file open and
465             ** a write lock on the database. This routine releases the database
466             ** write lock and acquires a read lock in its place. The journal file
467             ** is deleted and closed.
468             **
469             ** TODO: Consider keeping the journal file open for temporary databases.
470             ** This might give a performance improvement on windows where opening
471             ** a file is an expensive operation.
472             */
473 156           static int pager_unwritelock(Pager *pPager){
474             int rc;
475             PgHdr *pPg;
476 156 50         if( pPager->state
477 156           sqlitepager_ckpt_commit(pPager);
478 156 50         if( pPager->ckptOpen ){
479 0           sqliteOsClose(&pPager->cpfd);
480 0           pPager->ckptOpen = 0;
481             }
482 156 100         if( pPager->journalOpen ){
483 98           sqliteOsClose(&pPager->jfd);
484 98           pPager->journalOpen = 0;
485 98           sqliteOsDelete(pPager->zJournal);
486 98           sqliteFree( pPager->aInJournal );
487 98           pPager->aInJournal = 0;
488 419 100         for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
489 321           pPg->inJournal = 0;
490 321           pPg->dirty = 0;
491 321           pPg->needSync = 0;
492             }
493             }else{
494             assert( pPager->dirtyFile==0 || pPager->useJournal==0 );
495             }
496 156           rc = sqliteOsReadLock(&pPager->fd);
497 156 50         if( rc==SQLITE_OK ){
498 156           pPager->state = SQLITE_READLOCK;
499             }else{
500             /* This can only happen if a process does a BEGIN, then forks and the
501             ** child process does the COMMIT. Because of the semantics of unix
502             ** file locking, the unlock will fail.
503             */
504 0           pPager->state = SQLITE_UNLOCK;
505             }
506 156           return rc;
507             }
508              
509             /*
510             ** Compute and return a checksum for the page of data.
511             **
512             ** This is not a real checksum. It is really just the sum of the
513             ** random initial value and the page number. We considered do a checksum
514             ** of the database, but that was found to be too slow.
515             */
516 134           static u32 pager_cksum(Pager *pPager, Pgno pgno, const char *aData){
517 134           u32 cksum = pPager->cksumInit + pgno;
518 134           return cksum;
519             }
520              
521             /*
522             ** Read a single page from the journal file opened on file descriptor
523             ** jfd. Playback this one page.
524             **
525             ** There are three different journal formats. The format parameter determines
526             ** which format is used by the journal that is played back.
527             */
528 4           static int pager_playback_one_page(Pager *pPager, OsFile *jfd, int format){
529             int rc;
530             PgHdr *pPg; /* An existing page in the cache */
531             PageRecord pgRec;
532             u32 cksum;
533              
534 4           rc = read32bits(format, jfd, &pgRec.pgno);
535 4 50         if( rc!=SQLITE_OK ) return rc;
536 4           rc = sqliteOsRead(jfd, &pgRec.aData, sizeof(pgRec.aData));
537 4 50         if( rc!=SQLITE_OK ) return rc;
538              
539             /* Sanity checking on the page. This is more important that I originally
540             ** thought. If a power failure occurs while the journal is being written,
541             ** it could cause invalid data to be written into the journal. We need to
542             ** detect this invalid data (with high probability) and ignore it.
543             */
544 4 50         if( pgRec.pgno==0 ){
545 0           return SQLITE_DONE;
546             }
547 4 50         if( pgRec.pgno>(unsigned)pPager->dbSize ){
548 0           return SQLITE_OK;
549             }
550 4 50         if( format>=JOURNAL_FORMAT_3 ){
551 4           rc = read32bits(format, jfd, &cksum);
552 4 50         if( rc ) return rc;
553 4 50         if( pager_cksum(pPager, pgRec.pgno, pgRec.aData)!=cksum ){
554 0           return SQLITE_DONE;
555             }
556             }
557              
558             /* Playback the page. Update the in-memory copy of the page
559             ** at the same time, if there is one.
560             */
561 4           pPg = pager_lookup(pPager, pgRec.pgno);
562             TRACE2("PLAYBACK %d\n", pgRec.pgno);
563 4           sqliteOsSeek(&pPager->fd, (pgRec.pgno-1)*(sql_off_t)SQLITE_PAGE_SIZE);
564 4           rc = sqliteOsWrite(&pPager->fd, pgRec.aData, SQLITE_PAGE_SIZE);
565 4 50         if( pPg ){
566             /* No page should ever be rolled back that is in use, except for page
567             ** 1 which is held in use in order to keep the lock on the database
568             ** active.
569             */
570             assert( pPg->nRef==0 || pPg->pgno==1 );
571 4           memcpy(PGHDR_TO_DATA(pPg), pgRec.aData, SQLITE_PAGE_SIZE);
572 4           memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
573 4           pPg->dirty = 0;
574 4           pPg->needSync = 0;
575             CODEC(pPager, PGHDR_TO_DATA(pPg), pPg->pgno, 3);
576             }
577 4           return rc;
578             }
579              
580             /*
581             ** Playback the journal and thus restore the database file to
582             ** the state it was in before we started making changes.
583             **
584             ** The journal file format is as follows:
585             **
586             ** * 8 byte prefix. One of the aJournalMagic123 vectors defined
587             ** above. The format of the journal file is determined by which
588             ** of the three prefix vectors is seen.
589             ** * 4 byte big-endian integer which is the number of valid page records
590             ** in the journal. If this value is 0xffffffff, then compute the
591             ** number of page records from the journal size. This field appears
592             ** in format 3 only.
593             ** * 4 byte big-endian integer which is the initial value for the
594             ** sanity checksum. This field appears in format 3 only.
595             ** * 4 byte integer which is the number of pages to truncate the
596             ** database to during a rollback.
597             ** * Zero or more pages instances, each as follows:
598             ** + 4 byte page number.
599             ** + SQLITE_PAGE_SIZE bytes of data.
600             ** + 4 byte checksum (format 3 only)
601             **
602             ** When we speak of the journal header, we mean the first 4 bullets above.
603             ** Each entry in the journal is an instance of the 5th bullet. Note that
604             ** bullets 2 and 3 only appear in format-3 journals.
605             **
606             ** Call the value from the second bullet "nRec". nRec is the number of
607             ** valid page entries in the journal. In most cases, you can compute the
608             ** value of nRec from the size of the journal file. But if a power
609             ** failure occurred while the journal was being written, it could be the
610             ** case that the size of the journal file had already been increased but
611             ** the extra entries had not yet made it safely to disk. In such a case,
612             ** the value of nRec computed from the file size would be too large. For
613             ** that reason, we always use the nRec value in the header.
614             **
615             ** If the nRec value is 0xffffffff it means that nRec should be computed
616             ** from the file size. This value is used when the user selects the
617             ** no-sync option for the journal. A power failure could lead to corruption
618             ** in this case. But for things like temporary table (which will be
619             ** deleted when the power is restored) we don't care.
620             **
621             ** Journal formats 1 and 2 do not have an nRec value in the header so we
622             ** have to compute nRec from the file size. This has risks (as described
623             ** above) which is why all persistent tables have been changed to use
624             ** format 3.
625             **
626             ** If the file opened as the journal file is not a well-formed
627             ** journal file then the database will likely already be
628             ** corrupted, so the PAGER_ERR_CORRUPT bit is set in pPager->errMask
629             ** and SQLITE_CORRUPT is returned. If it all works, then this routine
630             ** returns SQLITE_OK.
631             */
632 3           static int pager_playback(Pager *pPager, int useJournalSize){
633             sql_off_t szJ; /* Size of the journal file in bytes */
634             int nRec; /* Number of Records in the journal */
635             int i; /* Loop counter */
636 3           Pgno mxPg = 0; /* Size of the original file in pages */
637             int format; /* Format of the journal file. */
638             unsigned char aMagic[sizeof(aJournalMagic1)];
639             int rc;
640              
641             /* Figure out how many records are in the journal. Abort early if
642             ** the journal is empty.
643             */
644             assert( pPager->journalOpen );
645 3           sqliteOsSeek(&pPager->jfd, 0);
646 3           rc = sqliteOsFileSize(&pPager->jfd, &szJ);
647 3 50         if( rc!=SQLITE_OK ){
648 0           goto end_playback;
649             }
650              
651             /* If the journal file is too small to contain a complete header,
652             ** it must mean that the process that created the journal was just
653             ** beginning to write the journal file when it died. In that case,
654             ** the database file should have still been completely unchanged.
655             ** Nothing needs to be rolled back. We can safely ignore this journal.
656             */
657 3 50         if( szJ < sizeof(aMagic)+sizeof(Pgno) ){
658 0           goto end_playback;
659             }
660              
661             /* Read the beginning of the journal and truncate the
662             ** database file back to its original size.
663             */
664 3           rc = sqliteOsRead(&pPager->jfd, aMagic, sizeof(aMagic));
665 3 50         if( rc!=SQLITE_OK ){
666 0           rc = SQLITE_PROTOCOL;
667 0           goto end_playback;
668             }
669 3 50         if( memcmp(aMagic, aJournalMagic3, sizeof(aMagic))==0 ){
670 3           format = JOURNAL_FORMAT_3;
671 0 0         }else if( memcmp(aMagic, aJournalMagic2, sizeof(aMagic))==0 ){
672 0           format = JOURNAL_FORMAT_2;
673 0 0         }else if( memcmp(aMagic, aJournalMagic1, sizeof(aMagic))==0 ){
674 0           format = JOURNAL_FORMAT_1;
675             }else{
676 0           rc = SQLITE_PROTOCOL;
677 0           goto end_playback;
678             }
679 3 50         if( format>=JOURNAL_FORMAT_3 ){
680 3 50         if( szJ < sizeof(aMagic) + 3*sizeof(u32) ){
681             /* Ignore the journal if it is too small to contain a complete
682             ** header. We already did this test once above, but at the prior
683             ** test, we did not know the journal format and so we had to assume
684             ** the smallest possible header. Now we know the header is bigger
685             ** than the minimum so we test again.
686             */
687 0           goto end_playback;
688             }
689 3           rc = read32bits(format, &pPager->jfd, (u32*)&nRec);
690 3 50         if( rc ) goto end_playback;
691 3           rc = read32bits(format, &pPager->jfd, &pPager->cksumInit);
692 3 50         if( rc ) goto end_playback;
693 3 50         if( nRec==0xffffffff || useJournalSize ){
    50          
694 3           nRec = (szJ - JOURNAL_HDR_SZ(3))/JOURNAL_PG_SZ(3);
695             }
696             }else{
697 0           nRec = (szJ - JOURNAL_HDR_SZ(2))/JOURNAL_PG_SZ(2);
698             assert( nRec*JOURNAL_PG_SZ(2)+JOURNAL_HDR_SZ(2)==szJ );
699             }
700 3           rc = read32bits(format, &pPager->jfd, &mxPg);
701 3 50         if( rc!=SQLITE_OK ){
702 0           goto end_playback;
703             }
704             assert( pPager->origDbSize==0 || pPager->origDbSize==mxPg );
705 3           rc = sqliteOsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(sql_off_t)mxPg);
706 3 50         if( rc!=SQLITE_OK ){
707 0           goto end_playback;
708             }
709 3           pPager->dbSize = mxPg;
710            
711             /* Copy original pages out of the journal and back into the database file.
712             */
713 7 100         for(i=0; i
714 4           rc = pager_playback_one_page(pPager, &pPager->jfd, format);
715 4 50         if( rc!=SQLITE_OK ){
716 0 0         if( rc==SQLITE_DONE ){
717 0           rc = SQLITE_OK;
718             }
719 0           break;
720             }
721             }
722              
723             /* Pages that have been written to the journal but never synced
724             ** where not restored by the loop above. We have to restore those
725             ** pages by reading them back from the original database.
726             */
727 3 50         if( rc==SQLITE_OK ){
728             PgHdr *pPg;
729 10 100         for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
730             char zBuf[SQLITE_PAGE_SIZE];
731 7 50         if( !pPg->dirty ) continue;
732 0 0         if( (int)pPg->pgno <= pPager->origDbSize ){
733 0           sqliteOsSeek(&pPager->fd, SQLITE_PAGE_SIZE*(sql_off_t)(pPg->pgno-1));
734 0           rc = sqliteOsRead(&pPager->fd, zBuf, SQLITE_PAGE_SIZE);
735             TRACE2("REFETCH %d\n", pPg->pgno);
736             CODEC(pPager, zBuf, pPg->pgno, 2);
737 0 0         if( rc ) break;
738             }else{
739 0           memset(zBuf, 0, SQLITE_PAGE_SIZE);
740             }
741 0 0         if( pPg->nRef==0 || memcmp(zBuf, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE) ){
    0          
742 0           memcpy(PGHDR_TO_DATA(pPg), zBuf, SQLITE_PAGE_SIZE);
743 0           memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
744             }
745 0           pPg->needSync = 0;
746 0           pPg->dirty = 0;
747             }
748             }
749              
750             end_playback:
751 3 50         if( rc!=SQLITE_OK ){
752 0           pager_unwritelock(pPager);
753 0           pPager->errMask |= PAGER_ERR_CORRUPT;
754 0           rc = SQLITE_CORRUPT;
755             }else{
756 3           rc = pager_unwritelock(pPager);
757             }
758 3           return rc;
759             }
760              
761             /*
762             ** Playback the checkpoint journal.
763             **
764             ** This is similar to playing back the transaction journal but with
765             ** a few extra twists.
766             **
767             ** (1) The number of pages in the database file at the start of
768             ** the checkpoint is stored in pPager->ckptSize, not in the
769             ** journal file itself.
770             **
771             ** (2) In addition to playing back the checkpoint journal, also
772             ** playback all pages of the transaction journal beginning
773             ** at offset pPager->ckptJSize.
774             */
775 0           static int pager_ckpt_playback(Pager *pPager){
776             sql_off_t szJ; /* Size of the full journal */
777             int nRec; /* Number of Records */
778             int i; /* Loop counter */
779             int rc;
780              
781             /* Truncate the database back to its original size.
782             */
783 0           rc = sqliteOsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(sql_off_t)pPager->ckptSize);
784 0           pPager->dbSize = pPager->ckptSize;
785              
786             /* Figure out how many records are in the checkpoint journal.
787             */
788             assert( pPager->ckptInUse && pPager->journalOpen );
789 0           sqliteOsSeek(&pPager->cpfd, 0);
790 0           nRec = pPager->ckptNRec;
791            
792             /* Copy original pages out of the checkpoint journal and back into the
793             ** database file. Note that the checkpoint journal always uses format
794             ** 2 instead of format 3 since it does not need to be concerned with
795             ** power failures corrupting the journal and can thus omit the checksums.
796             */
797 0 0         for(i=nRec-1; i>=0; i--){
798 0           rc = pager_playback_one_page(pPager, &pPager->cpfd, 2);
799             assert( rc!=SQLITE_DONE );
800 0 0         if( rc!=SQLITE_OK ) goto end_ckpt_playback;
801             }
802              
803             /* Figure out how many pages need to be copied out of the transaction
804             ** journal.
805             */
806 0           rc = sqliteOsSeek(&pPager->jfd, pPager->ckptJSize);
807 0 0         if( rc!=SQLITE_OK ){
808 0           goto end_ckpt_playback;
809             }
810 0           rc = sqliteOsFileSize(&pPager->jfd, &szJ);
811 0 0         if( rc!=SQLITE_OK ){
812 0           goto end_ckpt_playback;
813             }
814 0           nRec = (szJ - pPager->ckptJSize)/JOURNAL_PG_SZ(journal_format);
815 0 0         for(i=nRec-1; i>=0; i--){
816 0           rc = pager_playback_one_page(pPager, &pPager->jfd, journal_format);
817 0 0         if( rc!=SQLITE_OK ){
818             assert( rc!=SQLITE_DONE );
819 0           goto end_ckpt_playback;
820             }
821             }
822            
823             end_ckpt_playback:
824 0 0         if( rc!=SQLITE_OK ){
825 0           pPager->errMask |= PAGER_ERR_CORRUPT;
826 0           rc = SQLITE_CORRUPT;
827             }
828 0           return rc;
829             }
830              
831             /*
832             ** Change the maximum number of in-memory pages that are allowed.
833             **
834             ** The maximum number is the absolute value of the mxPage parameter.
835             ** If mxPage is negative, the noSync flag is also set. noSync bypasses
836             ** calls to sqliteOsSync(). The pager runs much faster with noSync on,
837             ** but if the operating system crashes or there is an abrupt power
838             ** failure, the database file might be left in an inconsistent and
839             ** unrepairable state.
840             */
841 54           void sqlitepager_set_cachesize(Pager *pPager, int mxPage){
842 54 50         if( mxPage>=0 ){
843 54           pPager->noSync = pPager->tempFile;
844 54 100         if( pPager->noSync==0 ) pPager->needSync = 0;
845             }else{
846 0           pPager->noSync = 1;
847 0           mxPage = -mxPage;
848             }
849 54 50         if( mxPage>10 ){
850 54           pPager->mxPage = mxPage;
851             }
852 54           }
853              
854             /*
855             ** Adjust the robustness of the database to damage due to OS crashes
856             ** or power failures by changing the number of syncs()s when writing
857             ** the rollback journal. There are three levels:
858             **
859             ** OFF sqliteOsSync() is never called. This is the default
860             ** for temporary and transient files.
861             **
862             ** NORMAL The journal is synced once before writes begin on the
863             ** database. This is normally adequate protection, but
864             ** it is theoretically possible, though very unlikely,
865             ** that an inopertune power failure could leave the journal
866             ** in a state which would cause damage to the database
867             ** when it is rolled back.
868             **
869             ** FULL The journal is synced twice before writes begin on the
870             ** database (with some additional information - the nRec field
871             ** of the journal header - being written in between the two
872             ** syncs). If we assume that writing a
873             ** single disk sector is atomic, then this mode provides
874             ** assurance that the journal will not be corrupted to the
875             ** point of causing damage to the database during rollback.
876             **
877             ** Numeric values associated with these states are OFF==1, NORMAL=2,
878             ** and FULL=3.
879             */
880 54           void sqlitepager_set_safety_level(Pager *pPager, int level){
881 54 50         pPager->noSync = level==1 || pPager->tempFile;
    100          
882 54 50         pPager->fullSync = level==3 && !pPager->tempFile;
    0          
883 54 100         if( pPager->noSync==0 ) pPager->needSync = 0;
884 54           }
885              
886             /*
887             ** Open a temporary file. Write the name of the file into zName
888             ** (zName must be at least SQLITE_TEMPNAME_SIZE bytes long.) Write
889             ** the file descriptor into *fd. Return SQLITE_OK on success or some
890             ** other error code if we fail.
891             **
892             ** The OS will automatically delete the temporary file when it is
893             ** closed.
894             */
895 28           static int sqlitepager_opentemp(char *zFile, OsFile *fd){
896 28           int cnt = 8;
897             int rc;
898             do{
899 28           cnt--;
900 28           sqliteOsTempFileName(zFile);
901 28           rc = sqliteOsOpenExclusive(zFile, fd, 1);
902 28 50         }while( cnt>0 && rc!=SQLITE_OK );
    50          
903 28           return rc;
904             }
905              
906             /*
907             ** Create a new page cache and put a pointer to the page cache in *ppPager.
908             ** The file to be cached need not exist. The file is not locked until
909             ** the first call to sqlitepager_get() and is only held open until the
910             ** last page is released using sqlitepager_unref().
911             **
912             ** If zFilename is NULL then a randomly-named temporary file is created
913             ** and used as the file to be cached. The file will be deleted
914             ** automatically when it is closed.
915             */
916 53           int sqlitepager_open(
917             Pager **ppPager, /* Return the Pager structure here */
918             const char *zFilename, /* Name of the database file to open */
919             int mxPage, /* Max number of in-memory cache pages */
920             int nExtra, /* Extra bytes append to each in-memory page */
921             int useJournal /* TRUE to use a rollback journal on this file */
922             ){
923             Pager *pPager;
924             char *zFullPathname;
925             int nameLen;
926             OsFile fd;
927             int rc, i;
928             int tempFile;
929 53           int readOnly = 0;
930             char zTemp[SQLITE_TEMPNAME_SIZE];
931              
932 53           *ppPager = 0;
933 53 50         if( sqlite_malloc_failed ){
934 0           return SQLITE_NOMEM;
935             }
936 53 100         if( zFilename && zFilename[0] ){
    50          
937 25           zFullPathname = sqliteOsFullPathname(zFilename);
938 25           rc = sqliteOsOpenReadWrite(zFullPathname, &fd, &readOnly);
939 25           tempFile = 0;
940             }else{
941 28           rc = sqlitepager_opentemp(zTemp, &fd);
942 28           zFilename = zTemp;
943 28           zFullPathname = sqliteOsFullPathname(zFilename);
944 28           tempFile = 1;
945             }
946 53 50         if( sqlite_malloc_failed ){
947 0           return SQLITE_NOMEM;
948             }
949 53 50         if( rc!=SQLITE_OK ){
950 0           sqliteFree(zFullPathname);
951 0           return SQLITE_CANTOPEN;
952             }
953 53           nameLen = strlen(zFullPathname);
954 53           pPager = sqliteMalloc( sizeof(*pPager) + nameLen*3 + 30 );
955 53 50         if( pPager==0 ){
956 0           sqliteOsClose(&fd);
957 0           sqliteFree(zFullPathname);
958 0           return SQLITE_NOMEM;
959             }
960             SET_PAGER(pPager);
961 53           pPager->zFilename = (char*)&pPager[1];
962 53           pPager->zDirectory = &pPager->zFilename[nameLen+1];
963 53           pPager->zJournal = &pPager->zDirectory[nameLen+1];
964 53           strcpy(pPager->zFilename, zFullPathname);
965 53           strcpy(pPager->zDirectory, zFullPathname);
966 744 50         for(i=nameLen; i>0 && pPager->zDirectory[i-1]!='/'; i--){}
    100          
967 53 50         if( i>0 ) pPager->zDirectory[i-1] = 0;
968 53           strcpy(pPager->zJournal, zFullPathname);
969 53           sqliteFree(zFullPathname);
970 53           strcpy(&pPager->zJournal[nameLen], "-journal");
971 53           pPager->fd = fd;
972 53           pPager->journalOpen = 0;
973 53           pPager->useJournal = useJournal;
974 53           pPager->ckptOpen = 0;
975 53           pPager->ckptInUse = 0;
976 53           pPager->nRef = 0;
977 53           pPager->dbSize = -1;
978 53           pPager->ckptSize = 0;
979 53           pPager->ckptJSize = 0;
980 53           pPager->nPage = 0;
981 53 50         pPager->mxPage = mxPage>5 ? mxPage : 10;
982 53           pPager->state = SQLITE_UNLOCK;
983 53           pPager->errMask = 0;
984 53           pPager->tempFile = tempFile;
985 53           pPager->readOnly = readOnly;
986 53           pPager->needSync = 0;
987 53 100         pPager->noSync = pPager->tempFile || !useJournal;
    50          
988 53           pPager->pFirst = 0;
989 53           pPager->pFirstSynced = 0;
990 53           pPager->pLast = 0;
991 53           pPager->nExtra = nExtra;
992 53           memset(pPager->aHash, 0, sizeof(pPager->aHash));
993 53           *ppPager = pPager;
994 53           return SQLITE_OK;
995             }
996              
997             /*
998             ** Set the destructor for this pager. If not NULL, the destructor is called
999             ** when the reference count on each page reaches zero. The destructor can
1000             ** be used to clean up information in the extra segment appended to each page.
1001             **
1002             ** The destructor is not called as a result sqlitepager_close().
1003             ** Destructors are only called by sqlitepager_unref().
1004             */
1005 53           void sqlitepager_set_destructor(Pager *pPager, void (*xDesc)(void*)){
1006 53           pPager->xDestructor = xDesc;
1007 53           }
1008              
1009             /*
1010             ** Return the total number of pages in the disk file associated with
1011             ** pPager.
1012             */
1013 824           int sqlitepager_pagecount(Pager *pPager){
1014             sql_off_t n;
1015             assert( pPager!=0 );
1016 824 100         if( pPager->dbSize>=0 ){
1017 565           return pPager->dbSize;
1018             }
1019 259 50         if( sqliteOsFileSize(&pPager->fd, &n)!=SQLITE_OK ){
1020 0           pPager->errMask |= PAGER_ERR_DISK;
1021 0           return 0;
1022             }
1023 259           n /= SQLITE_PAGE_SIZE;
1024 259 50         if( pPager->state!=SQLITE_UNLOCK ){
1025 259           pPager->dbSize = n;
1026             }
1027 824           return n;
1028             }
1029              
1030             /*
1031             ** Forward declaration
1032             */
1033             static int syncJournal(Pager*);
1034              
1035             /*
1036             ** Truncate the file to the number of pages specified.
1037             */
1038 0           int sqlitepager_truncate(Pager *pPager, Pgno nPage){
1039             int rc;
1040 0 0         if( pPager->dbSize<0 ){
1041 0           sqlitepager_pagecount(pPager);
1042             }
1043 0 0         if( pPager->errMask!=0 ){
1044 0           rc = pager_errcode(pPager);
1045 0           return rc;
1046             }
1047 0 0         if( nPage>=(unsigned)pPager->dbSize ){
1048 0           return SQLITE_OK;
1049             }
1050 0           syncJournal(pPager);
1051 0           rc = sqliteOsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(sql_off_t)nPage);
1052 0 0         if( rc==SQLITE_OK ){
1053 0           pPager->dbSize = nPage;
1054             }
1055 0           return rc;
1056             }
1057              
1058             /*
1059             ** Shutdown the page cache. Free all memory and close all files.
1060             **
1061             ** If a transaction was in progress when this routine is called, that
1062             ** transaction is rolled back. All outstanding pages are invalidated
1063             ** and their memory is freed. Any attempt to use a page associated
1064             ** with this page cache after this function returns will likely
1065             ** result in a coredump.
1066             */
1067 53           int sqlitepager_close(Pager *pPager){
1068             PgHdr *pPg, *pNext;
1069 53           switch( pPager->state ){
1070             case SQLITE_WRITELOCK: {
1071 3           sqlitepager_rollback(pPager);
1072 3           sqliteOsUnlock(&pPager->fd);
1073             assert( pPager->journalOpen==0 );
1074 3           break;
1075             }
1076             case SQLITE_READLOCK: {
1077 0           sqliteOsUnlock(&pPager->fd);
1078 0           break;
1079             }
1080             default: {
1081             /* Do nothing */
1082 50           break;
1083             }
1084             }
1085 59 100         for(pPg=pPager->pAll; pPg; pPg=pNext){
1086 6           pNext = pPg->pNextAll;
1087 6           sqliteFree(pPg);
1088             }
1089 53           sqliteOsClose(&pPager->fd);
1090             assert( pPager->journalOpen==0 );
1091             /* Temp files are automatically deleted by the OS
1092             ** if( pPager->tempFile ){
1093             ** sqliteOsDelete(pPager->zFilename);
1094             ** }
1095             */
1096             CLR_PAGER(pPager);
1097 53 50         if( pPager->zFilename!=(char*)&pPager[1] ){
1098             assert( 0 ); /* Cannot happen */
1099 0           sqliteFree(pPager->zFilename);
1100 0           sqliteFree(pPager->zJournal);
1101 0           sqliteFree(pPager->zDirectory);
1102             }
1103 53           sqliteFree(pPager);
1104 53           return SQLITE_OK;
1105             }
1106              
1107             /*
1108             ** Return the page number for the given page data.
1109             */
1110 2           Pgno sqlitepager_pagenumber(void *pData){
1111 2           PgHdr *p = DATA_TO_PGHDR(pData);
1112 2           return p->pgno;
1113             }
1114              
1115             /*
1116             ** Increment the reference count for a page. If the page is
1117             ** currently on the freelist (the reference count is zero) then
1118             ** remove it from the freelist.
1119             */
1120             #define page_ref(P) ((P)->nRef==0?_page_ref(P):(void)(P)->nRef++)
1121 60           static void _page_ref(PgHdr *pPg){
1122 60 50         if( pPg->nRef==0 ){
1123             /* The page is currently on the freelist. Remove it. */
1124 60 100         if( pPg==pPg->pPager->pFirstSynced ){
1125 15           PgHdr *p = pPg->pNextFree;
1126 15 50         while( p && p->needSync ){ p = p->pNextFree; }
    0          
1127 15           pPg->pPager->pFirstSynced = p;
1128             }
1129 60 100         if( pPg->pPrevFree ){
1130 37           pPg->pPrevFree->pNextFree = pPg->pNextFree;
1131             }else{
1132 23           pPg->pPager->pFirst = pPg->pNextFree;
1133             }
1134 60 100         if( pPg->pNextFree ){
1135 32           pPg->pNextFree->pPrevFree = pPg->pPrevFree;
1136             }else{
1137 28           pPg->pPager->pLast = pPg->pPrevFree;
1138             }
1139 60           pPg->pPager->nRef++;
1140             }
1141 60           pPg->nRef++;
1142             REFINFO(pPg);
1143 60           }
1144              
1145             /*
1146             ** Increment the reference count for a page. The input pointer is
1147             ** a reference to the page data.
1148             */
1149 12           int sqlitepager_ref(void *pData){
1150 12           PgHdr *pPg = DATA_TO_PGHDR(pData);
1151 12 50         page_ref(pPg);
1152 12           return SQLITE_OK;
1153             }
1154              
1155             /*
1156             ** Sync the journal. In other words, make sure all the pages that have
1157             ** been written to the journal have actually reached the surface of the
1158             ** disk. It is not safe to modify the original database file until after
1159             ** the journal has been synced. If the original database is modified before
1160             ** the journal is synced and a power failure occurs, the unsynced journal
1161             ** data would be lost and we would be unable to completely rollback the
1162             ** database changes. Database corruption would occur.
1163             **
1164             ** This routine also updates the nRec field in the header of the journal.
1165             ** (See comments on the pager_playback() routine for additional information.)
1166             ** If the sync mode is FULL, two syncs will occur. First the whole journal
1167             ** is synced, then the nRec field is updated, then a second sync occurs.
1168             **
1169             ** For temporary databases, we do not care if we are able to rollback
1170             ** after a power failure, so sync occurs.
1171             **
1172             ** This routine clears the needSync field of every page current held in
1173             ** memory.
1174             */
1175 91           static int syncJournal(Pager *pPager){
1176             PgHdr *pPg;
1177 91           int rc = SQLITE_OK;
1178              
1179             /* Sync the journal before modifying the main database
1180             ** (assuming there is a journal and it needs to be synced.)
1181             */
1182 91 100         if( pPager->needSync ){
1183 68 50         if( !pPager->tempFile ){
1184             assert( pPager->journalOpen );
1185             /* assert( !pPager->noSync ); // noSync might be set if synchronous
1186             ** was turned off after the transaction was started. Ticket #615 */
1187             #ifndef NDEBUG
1188             {
1189             /* Make sure the pPager->nRec counter we are keeping agrees
1190             ** with the nRec computed from the size of the journal file.
1191             */
1192             sql_off_t hdrSz, pgSz, jSz;
1193             hdrSz = JOURNAL_HDR_SZ(journal_format);
1194             pgSz = JOURNAL_PG_SZ(journal_format);
1195             rc = sqliteOsFileSize(&pPager->jfd, &jSz);
1196             if( rc!=0 ) return rc;
1197             assert( pPager->nRec*pgSz+hdrSz==jSz );
1198             }
1199             #endif
1200             if( journal_format>=3 ){
1201             /* Write the nRec value into the journal file header */
1202             sql_off_t szJ;
1203 68 50         if( pPager->fullSync ){
1204             TRACE1("SYNC\n");
1205 0           rc = sqliteOsSync(&pPager->jfd);
1206 0 0         if( rc!=0 ) return rc;
1207             }
1208 68           sqliteOsSeek(&pPager->jfd, sizeof(aJournalMagic1));
1209 68           rc = write32bits(&pPager->jfd, pPager->nRec);
1210 68 50         if( rc ) return rc;
1211 68           szJ = JOURNAL_HDR_SZ(journal_format) +
1212 68           pPager->nRec*JOURNAL_PG_SZ(journal_format);
1213 68           sqliteOsSeek(&pPager->jfd, szJ);
1214             }
1215             TRACE1("SYNC\n");
1216 68           rc = sqliteOsSync(&pPager->jfd);
1217 68 50         if( rc!=0 ) return rc;
1218 68           pPager->journalStarted = 1;
1219             }
1220 68           pPager->needSync = 0;
1221              
1222             /* Erase the needSync flag from every page.
1223             */
1224 325 100         for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1225 257           pPg->needSync = 0;
1226             }
1227 68           pPager->pFirstSynced = pPager->pFirst;
1228             }
1229              
1230             #ifndef NDEBUG
1231             /* If the Pager.needSync flag is clear then the PgHdr.needSync
1232             ** flag must also be clear for all pages. Verify that this
1233             ** invariant is true.
1234             */
1235             else{
1236             for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1237             assert( pPg->needSync==0 );
1238             }
1239             assert( pPager->pFirstSynced==pPager->pFirst );
1240             }
1241             #endif
1242              
1243 91           return rc;
1244             }
1245              
1246             /*
1247             ** Given a list of pages (connected by the PgHdr.pDirty pointer) write
1248             ** every one of those pages out to the database file and mark them all
1249             ** as clean.
1250             */
1251 91           static int pager_write_pagelist(PgHdr *pList){
1252             Pager *pPager;
1253             int rc;
1254              
1255 91 50         if( pList==0 ) return SQLITE_OK;
1256 91           pPager = pList->pPager;
1257 316 100         while( pList ){
1258             assert( pList->dirty );
1259 225           sqliteOsSeek(&pPager->fd, (pList->pgno-1)*(sql_off_t)SQLITE_PAGE_SIZE);
1260             CODEC(pPager, PGHDR_TO_DATA(pList), pList->pgno, 6);
1261             TRACE2("STORE %d\n", pList->pgno);
1262 225           rc = sqliteOsWrite(&pPager->fd, PGHDR_TO_DATA(pList), SQLITE_PAGE_SIZE);
1263             CODEC(pPager, PGHDR_TO_DATA(pList), pList->pgno, 0);
1264 225 50         if( rc ) return rc;
1265 225           pList->dirty = 0;
1266 225           pList = pList->pDirty;
1267             }
1268 91           return SQLITE_OK;
1269             }
1270              
1271             /*
1272             ** Collect every dirty page into a dirty list and
1273             ** return a pointer to the head of that list. All pages are
1274             ** collected even if they are still in use.
1275             */
1276 91           static PgHdr *pager_get_all_dirty_pages(Pager *pPager){
1277             PgHdr *p, *pList;
1278 91           pList = 0;
1279 395 100         for(p=pPager->pAll; p; p=p->pNextAll){
1280 304 100         if( p->dirty ){
1281 225           p->pDirty = pList;
1282 225           pList = p;
1283             }
1284             }
1285 91           return pList;
1286             }
1287              
1288             /*
1289             ** Acquire a page.
1290             **
1291             ** A read lock on the disk file is obtained when the first page is acquired.
1292             ** This read lock is dropped when the last page is released.
1293             **
1294             ** A _get works for any page number greater than 0. If the database
1295             ** file is smaller than the requested page, then no actual disk
1296             ** read occurs and the memory image of the page is initialized to
1297             ** all zeros. The extra data appended to a page is always initialized
1298             ** to zeros the first time a page is loaded into memory.
1299             **
1300             ** The acquisition might fail for several reasons. In all cases,
1301             ** an appropriate error code is returned and *ppPage is set to NULL.
1302             **
1303             ** See also sqlitepager_lookup(). Both this routine and _lookup() attempt
1304             ** to find a page in the in-memory cache first. If the page is not already
1305             ** in memory, this routine goes to disk to read it in whereas _lookup()
1306             ** just returns 0. This routine acquires a read-lock the first time it
1307             ** has to go to disk, and could also playback an old journal if necessary.
1308             ** Since _lookup() never goes to disk, it never has to deal with locks
1309             ** or journal files.
1310             */
1311 1356           int sqlitepager_get(Pager *pPager, Pgno pgno, void **ppPage){
1312             PgHdr *pPg;
1313             int rc;
1314              
1315             /* Make sure we have not hit any critical errors.
1316             */
1317             assert( pPager!=0 );
1318             assert( pgno!=0 );
1319 1356           *ppPage = 0;
1320 1356 50         if( pPager->errMask & ~(PAGER_ERR_FULL) ){
1321 0           return pager_errcode(pPager);
1322             }
1323              
1324             /* If this is the first page accessed, then get a read lock
1325             ** on the database file.
1326             */
1327 1356 100         if( pPager->nRef==0 ){
1328 259           rc = sqliteOsReadLock(&pPager->fd);
1329 259 50         if( rc!=SQLITE_OK ){
1330 0           return rc;
1331             }
1332 259           pPager->state = SQLITE_READLOCK;
1333              
1334             /* If a journal file exists, try to play it back.
1335             */
1336 259 100         if( pPager->useJournal && sqliteOsFileExists(pPager->zJournal) ){
    50          
1337             int rc;
1338              
1339             /* Get a write lock on the database
1340             */
1341 0           rc = sqliteOsWriteLock(&pPager->fd);
1342 0 0         if( rc!=SQLITE_OK ){
1343 0 0         if( sqliteOsUnlock(&pPager->fd)!=SQLITE_OK ){
1344             /* This should never happen! */
1345 0           rc = SQLITE_INTERNAL;
1346             }
1347 0           return rc;
1348             }
1349 0           pPager->state = SQLITE_WRITELOCK;
1350              
1351             /* Open the journal for reading only. Return SQLITE_BUSY if
1352             ** we are unable to open the journal file.
1353             **
1354             ** The journal file does not need to be locked itself. The
1355             ** journal file is never open unless the main database file holds
1356             ** a write lock, so there is never any chance of two or more
1357             ** processes opening the journal at the same time.
1358             */
1359 0           rc = sqliteOsOpenReadOnly(pPager->zJournal, &pPager->jfd);
1360 0 0         if( rc!=SQLITE_OK ){
1361 0           rc = sqliteOsUnlock(&pPager->fd);
1362             assert( rc==SQLITE_OK );
1363 0           return SQLITE_BUSY;
1364             }
1365 0           pPager->journalOpen = 1;
1366 0           pPager->journalStarted = 0;
1367              
1368             /* Playback and delete the journal. Drop the database write
1369             ** lock and reacquire the read lock.
1370             */
1371 0           rc = pager_playback(pPager, 0);
1372 0 0         if( rc!=SQLITE_OK ){
1373 0           return rc;
1374             }
1375             }
1376 259           pPg = 0;
1377             }else{
1378             /* Search for page in cache */
1379 1097           pPg = pager_lookup(pPager, pgno);
1380             }
1381 1356 100         if( pPg==0 ){
1382             /* The requested page is not in the page cache. */
1383             int h;
1384 634           pPager->nMiss++;
1385 634 50         if( pPager->nPagemxPage || pPager->pFirst==0 ){
    0          
1386             /* Create a new page */
1387 634           pPg = sqliteMallocRaw( sizeof(*pPg) + SQLITE_PAGE_SIZE
1388 634           + sizeof(u32) + pPager->nExtra );
1389 634 50         if( pPg==0 ){
1390 0           pager_unwritelock(pPager);
1391 0           pPager->errMask |= PAGER_ERR_MEM;
1392 0           return SQLITE_NOMEM;
1393             }
1394 634           memset(pPg, 0, sizeof(*pPg));
1395 634           pPg->pPager = pPager;
1396 634           pPg->pNextAll = pPager->pAll;
1397 634 100         if( pPager->pAll ){
1398 375           pPager->pAll->pPrevAll = pPg;
1399             }
1400 634           pPg->pPrevAll = 0;
1401 634           pPager->pAll = pPg;
1402 634           pPager->nPage++;
1403             }else{
1404             /* Find a page to recycle. Try to locate a page that does not
1405             ** require us to do an fsync() on the journal.
1406             */
1407 0           pPg = pPager->pFirstSynced;
1408              
1409             /* If we could not find a page that does not require an fsync()
1410             ** on the journal file then fsync the journal file. This is a
1411             ** very slow operation, so we work hard to avoid it. But sometimes
1412             ** it can't be helped.
1413             */
1414 0 0         if( pPg==0 ){
1415 0           int rc = syncJournal(pPager);
1416 0 0         if( rc!=0 ){
1417 0           sqlitepager_rollback(pPager);
1418 0           return SQLITE_IOERR;
1419             }
1420 0           pPg = pPager->pFirst;
1421             }
1422             assert( pPg->nRef==0 );
1423              
1424             /* Write the page to the database file if it is dirty.
1425             */
1426 0 0         if( pPg->dirty ){
1427             assert( pPg->needSync==0 );
1428 0           pPg->pDirty = 0;
1429 0           rc = pager_write_pagelist( pPg );
1430 0 0         if( rc!=SQLITE_OK ){
1431 0           sqlitepager_rollback(pPager);
1432 0           return SQLITE_IOERR;
1433             }
1434             }
1435             assert( pPg->dirty==0 );
1436              
1437             /* If the page we are recycling is marked as alwaysRollback, then
1438             ** set the global alwaysRollback flag, thus disabling the
1439             ** sqlite_dont_rollback() optimization for the rest of this transaction.
1440             ** It is necessary to do this because the page marked alwaysRollback
1441             ** might be reloaded at a later time but at that point we won't remember
1442             ** that is was marked alwaysRollback. This means that all pages must
1443             ** be marked as alwaysRollback from here on out.
1444             */
1445 0 0         if( pPg->alwaysRollback ){
1446 0           pPager->alwaysRollback = 1;
1447             }
1448              
1449             /* Unlink the old page from the free list and the hash table
1450             */
1451 0 0         if( pPg==pPager->pFirstSynced ){
1452 0           PgHdr *p = pPg->pNextFree;
1453 0 0         while( p && p->needSync ){ p = p->pNextFree; }
    0          
1454 0           pPager->pFirstSynced = p;
1455             }
1456 0 0         if( pPg->pPrevFree ){
1457 0           pPg->pPrevFree->pNextFree = pPg->pNextFree;
1458             }else{
1459             assert( pPager->pFirst==pPg );
1460 0           pPager->pFirst = pPg->pNextFree;
1461             }
1462 0 0         if( pPg->pNextFree ){
1463 0           pPg->pNextFree->pPrevFree = pPg->pPrevFree;
1464             }else{
1465             assert( pPager->pLast==pPg );
1466 0           pPager->pLast = pPg->pPrevFree;
1467             }
1468 0           pPg->pNextFree = pPg->pPrevFree = 0;
1469 0 0         if( pPg->pNextHash ){
1470 0           pPg->pNextHash->pPrevHash = pPg->pPrevHash;
1471             }
1472 0 0         if( pPg->pPrevHash ){
1473 0           pPg->pPrevHash->pNextHash = pPg->pNextHash;
1474             }else{
1475 0           h = pager_hash(pPg->pgno);
1476             assert( pPager->aHash[h]==pPg );
1477 0           pPager->aHash[h] = pPg->pNextHash;
1478             }
1479 0           pPg->pNextHash = pPg->pPrevHash = 0;
1480 0           pPager->nOvfl++;
1481             }
1482 634           pPg->pgno = pgno;
1483 634 100         if( pPager->aInJournal && (int)pgno<=pPager->origDbSize ){
    100          
1484             sqliteCheckMemory(pPager->aInJournal, pgno/8);
1485             assert( pPager->journalOpen );
1486 145           pPg->inJournal = (pPager->aInJournal[pgno/8] & (1<<(pgno&7)))!=0;
1487 145           pPg->needSync = 0;
1488             }else{
1489 489           pPg->inJournal = 0;
1490 489           pPg->needSync = 0;
1491             }
1492 634 50         if( pPager->aInCkpt && (int)pgno<=pPager->ckptSize
    0          
1493 0 0         && (pPager->aInCkpt[pgno/8] & (1<<(pgno&7)))!=0 ){
1494 0           page_add_to_ckpt_list(pPg);
1495             }else{
1496 634           page_remove_from_ckpt_list(pPg);
1497             }
1498 634           pPg->dirty = 0;
1499 634           pPg->nRef = 1;
1500             REFINFO(pPg);
1501 634           pPager->nRef++;
1502 634           h = pager_hash(pgno);
1503 634           pPg->pNextHash = pPager->aHash[h];
1504 634           pPager->aHash[h] = pPg;
1505 634 50         if( pPg->pNextHash ){
1506             assert( pPg->pNextHash->pPrevHash==0 );
1507 0           pPg->pNextHash->pPrevHash = pPg;
1508             }
1509 634 50         if( pPager->nExtra>0 ){
1510 634           memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
1511             }
1512 634 100         if( pPager->dbSize<0 ) sqlitepager_pagecount(pPager);
1513 634 50         if( pPager->errMask!=0 ){
1514 0           sqlitepager_unref(PGHDR_TO_DATA(pPg));
1515 0           rc = pager_errcode(pPager);
1516 0           return rc;
1517             }
1518 634 100         if( pPager->dbSize<(int)pgno ){
1519 166           memset(PGHDR_TO_DATA(pPg), 0, SQLITE_PAGE_SIZE);
1520             }else{
1521             int rc;
1522 468           sqliteOsSeek(&pPager->fd, (pgno-1)*(sql_off_t)SQLITE_PAGE_SIZE);
1523 468           rc = sqliteOsRead(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
1524             TRACE2("FETCH %d\n", pPg->pgno);
1525             CODEC(pPager, PGHDR_TO_DATA(pPg), pPg->pgno, 3);
1526 468 50         if( rc!=SQLITE_OK ){
1527             sql_off_t fileSize;
1528 0 0         if( sqliteOsFileSize(&pPager->fd,&fileSize)!=SQLITE_OK
1529 0 0         || fileSize>=pgno*SQLITE_PAGE_SIZE ){
1530 0           sqlitepager_unref(PGHDR_TO_DATA(pPg));
1531 0           return rc;
1532             }else{
1533 634           memset(PGHDR_TO_DATA(pPg), 0, SQLITE_PAGE_SIZE);
1534             }
1535             }
1536             }
1537             }else{
1538             /* The requested page is in the page cache. */
1539 722           pPager->nHit++;
1540 722 100         page_ref(pPg);
1541             }
1542 1356           *ppPage = PGHDR_TO_DATA(pPg);
1543 1356           return SQLITE_OK;
1544             }
1545              
1546             /*
1547             ** Acquire a page if it is already in the in-memory cache. Do
1548             ** not read the page from disk. Return a pointer to the page,
1549             ** or 0 if the page is not in cache.
1550             **
1551             ** See also sqlitepager_get(). The difference between this routine
1552             ** and sqlitepager_get() is that _get() will go to the disk and read
1553             ** in the page if the page is not already in cache. This routine
1554             ** returns NULL if the page is not in cache or if a disk I/O error
1555             ** has ever happened.
1556             */
1557 2           void *sqlitepager_lookup(Pager *pPager, Pgno pgno){
1558             PgHdr *pPg;
1559              
1560             assert( pPager!=0 );
1561             assert( pgno!=0 );
1562 2 50         if( pPager->errMask & ~(PAGER_ERR_FULL) ){
1563 0           return 0;
1564             }
1565             /* if( pPager->nRef==0 ){
1566             ** return 0;
1567             ** }
1568             */
1569 2           pPg = pager_lookup(pPager, pgno);
1570 2 50         if( pPg==0 ) return 0;
1571 2 50         page_ref(pPg);
1572 2           return PGHDR_TO_DATA(pPg);
1573             }
1574              
1575             /*
1576             ** Release a page.
1577             **
1578             ** If the number of references to the page drop to zero, then the
1579             ** page is added to the LRU list. When all references to all pages
1580             ** are released, a rollback occurs and the lock on the database is
1581             ** removed.
1582             */
1583 1367           int sqlitepager_unref(void *pData){
1584             PgHdr *pPg;
1585              
1586             /* Decrement the reference count for this page
1587             */
1588 1367           pPg = DATA_TO_PGHDR(pData);
1589             assert( pPg->nRef>0 );
1590 1367           pPg->nRef--;
1591             REFINFO(pPg);
1592              
1593             /* When the number of references to a page reach 0, call the
1594             ** destructor and add the page to the freelist.
1595             */
1596 1367 100         if( pPg->nRef==0 ){
1597             Pager *pPager;
1598 691           pPager = pPg->pPager;
1599 691           pPg->pNextFree = 0;
1600 691           pPg->pPrevFree = pPager->pLast;
1601 691           pPager->pLast = pPg;
1602 691 100         if( pPg->pPrevFree ){
1603 409           pPg->pPrevFree->pNextFree = pPg;
1604             }else{
1605 282           pPager->pFirst = pPg;
1606             }
1607 691 100         if( pPg->needSync==0 && pPager->pFirstSynced==0 ){
    100          
1608 215           pPager->pFirstSynced = pPg;
1609             }
1610 691 50         if( pPager->xDestructor ){
1611 691           pPager->xDestructor(pData);
1612             }
1613            
1614             /* When all pages reach the freelist, drop the read lock from
1615             ** the database file.
1616             */
1617 691           pPager->nRef--;
1618             assert( pPager->nRef>=0 );
1619 691 100         if( pPager->nRef==0 ){
1620 256           pager_reset(pPager);
1621             }
1622             }
1623 1367           return SQLITE_OK;
1624             }
1625              
1626             /*
1627             ** Create a journal file for pPager. There should already be a write
1628             ** lock on the database file when this routine is called.
1629             **
1630             ** Return SQLITE_OK if everything. Return an error code and release the
1631             ** write lock if anything goes wrong.
1632             */
1633 98           static int pager_open_journal(Pager *pPager){
1634             int rc;
1635             assert( pPager->state==SQLITE_WRITELOCK );
1636             assert( pPager->journalOpen==0 );
1637             assert( pPager->useJournal );
1638 98           sqlitepager_pagecount(pPager);
1639 98           pPager->aInJournal = sqliteMalloc( pPager->dbSize/8 + 1 );
1640 98 50         if( pPager->aInJournal==0 ){
1641 0           sqliteOsReadLock(&pPager->fd);
1642 0           pPager->state = SQLITE_READLOCK;
1643 0           return SQLITE_NOMEM;
1644             }
1645 98           rc = sqliteOsOpenExclusive(pPager->zJournal, &pPager->jfd,pPager->tempFile);
1646 98 50         if( rc!=SQLITE_OK ){
1647 0           sqliteFree(pPager->aInJournal);
1648 0           pPager->aInJournal = 0;
1649 0           sqliteOsReadLock(&pPager->fd);
1650 0           pPager->state = SQLITE_READLOCK;
1651 0           return SQLITE_CANTOPEN;
1652             }
1653 98           sqliteOsOpenDirectory(pPager->zDirectory, &pPager->jfd);
1654 98           pPager->journalOpen = 1;
1655 98           pPager->journalStarted = 0;
1656 98           pPager->needSync = 0;
1657 98           pPager->alwaysRollback = 0;
1658 98           pPager->nRec = 0;
1659 98 50         if( pPager->errMask!=0 ){
1660 0           rc = pager_errcode(pPager);
1661 0           return rc;
1662             }
1663 98           pPager->origDbSize = pPager->dbSize;
1664             if( journal_format==JOURNAL_FORMAT_3 ){
1665 98           rc = sqliteOsWrite(&pPager->jfd, aJournalMagic3, sizeof(aJournalMagic3));
1666 98 50         if( rc==SQLITE_OK ){
1667 98 100         rc = write32bits(&pPager->jfd, pPager->noSync ? 0xffffffff : 0);
1668             }
1669 98 50         if( rc==SQLITE_OK ){
1670 98           sqliteRandomness(sizeof(pPager->cksumInit), &pPager->cksumInit);
1671 98           rc = write32bits(&pPager->jfd, pPager->cksumInit);
1672             }
1673             }else if( journal_format==JOURNAL_FORMAT_2 ){
1674             rc = sqliteOsWrite(&pPager->jfd, aJournalMagic2, sizeof(aJournalMagic2));
1675             }else{
1676             assert( journal_format==JOURNAL_FORMAT_1 );
1677             rc = sqliteOsWrite(&pPager->jfd, aJournalMagic1, sizeof(aJournalMagic1));
1678             }
1679 98 50         if( rc==SQLITE_OK ){
1680 98           rc = write32bits(&pPager->jfd, pPager->dbSize);
1681             }
1682 98 50         if( pPager->ckptAutoopen && rc==SQLITE_OK ){
    0          
1683 0           rc = sqlitepager_ckpt_begin(pPager);
1684             }
1685 98 50         if( rc!=SQLITE_OK ){
1686 0           rc = pager_unwritelock(pPager);
1687 0 0         if( rc==SQLITE_OK ){
1688 0           rc = SQLITE_FULL;
1689             }
1690             }
1691 98           return rc;
1692             }
1693              
1694             /*
1695             ** Acquire a write-lock on the database. The lock is removed when
1696             ** the any of the following happen:
1697             **
1698             ** * sqlitepager_commit() is called.
1699             ** * sqlitepager_rollback() is called.
1700             ** * sqlitepager_close() is called.
1701             ** * sqlitepager_unref() is called to on every outstanding page.
1702             **
1703             ** The parameter to this routine is a pointer to any open page of the
1704             ** database file. Nothing changes about the page - it is used merely
1705             ** to acquire a pointer to the Pager structure and as proof that there
1706             ** is already a read-lock on the database.
1707             **
1708             ** A journal file is opened if this is not a temporary file. For
1709             ** temporary files, the opening of the journal file is deferred until
1710             ** there is an actual need to write to the journal.
1711             **
1712             ** If the database is already write-locked, this routine is a no-op.
1713             */
1714 435           int sqlitepager_begin(void *pData){
1715 435           PgHdr *pPg = DATA_TO_PGHDR(pData);
1716 435           Pager *pPager = pPg->pPager;
1717 435           int rc = SQLITE_OK;
1718             assert( pPg->nRef>0 );
1719             assert( pPager->state!=SQLITE_UNLOCK );
1720 435 100         if( pPager->state==SQLITE_READLOCK ){
1721             assert( pPager->aInJournal==0 );
1722 156           rc = sqliteOsWriteLock(&pPager->fd);
1723 156 50         if( rc!=SQLITE_OK ){
1724 0           return rc;
1725             }
1726 156           pPager->state = SQLITE_WRITELOCK;
1727 156           pPager->dirtyFile = 0;
1728             TRACE1("TRANSACTION\n");
1729 156 100         if( pPager->useJournal && !pPager->tempFile ){
    100          
1730 75           rc = pager_open_journal(pPager);
1731             }
1732             }
1733 435           return rc;
1734             }
1735              
1736             /*
1737             ** Mark a data page as writeable. The page is written into the journal
1738             ** if it is not there already. This routine must be called before making
1739             ** changes to a page.
1740             **
1741             ** The first time this routine is called, the pager creates a new
1742             ** journal and acquires a write lock on the database. If the write
1743             ** lock could not be acquired, this routine returns SQLITE_BUSY. The
1744             ** calling routine must check for that return value and be careful not to
1745             ** change any page data until this routine returns SQLITE_OK.
1746             **
1747             ** If the journal file could not be written because the disk is full,
1748             ** then this routine returns SQLITE_FULL and does an immediate rollback.
1749             ** All subsequent write attempts also return SQLITE_FULL until there
1750             ** is a call to sqlitepager_commit() or sqlitepager_rollback() to
1751             ** reset.
1752             */
1753 426           int sqlitepager_write(void *pData){
1754 426           PgHdr *pPg = DATA_TO_PGHDR(pData);
1755 426           Pager *pPager = pPg->pPager;
1756 426           int rc = SQLITE_OK;
1757              
1758             /* Check for errors
1759             */
1760 426 50         if( pPager->errMask ){
1761 0           return pager_errcode(pPager);
1762             }
1763 426 50         if( pPager->readOnly ){
1764 0           return SQLITE_PERM;
1765             }
1766              
1767             /* Mark the page as dirty. If the page has already been written
1768             ** to the journal then we can return right away.
1769             */
1770 426           pPg->dirty = 1;
1771 426 100         if( pPg->inJournal && (pPg->inCkpt || pPager->ckptInUse==0) ){
    50          
    50          
1772 147           pPager->dirtyFile = 1;
1773 147           return SQLITE_OK;
1774             }
1775              
1776             /* If we get this far, it means that the page needs to be
1777             ** written to the transaction journal or the ckeckpoint journal
1778             ** or both.
1779             **
1780             ** First check to see that the transaction journal exists and
1781             ** create it if it does not.
1782             */
1783             assert( pPager->state!=SQLITE_UNLOCK );
1784 279           rc = sqlitepager_begin(pData);
1785 279 50         if( rc!=SQLITE_OK ){
1786 0           return rc;
1787             }
1788             assert( pPager->state==SQLITE_WRITELOCK );
1789 279 100         if( !pPager->journalOpen && pPager->useJournal ){
    100          
1790 23           rc = pager_open_journal(pPager);
1791 23 50         if( rc!=SQLITE_OK ) return rc;
1792             }
1793             assert( pPager->journalOpen || !pPager->useJournal );
1794 279           pPager->dirtyFile = 1;
1795              
1796             /* The transaction journal now exists and we have a write lock on the
1797             ** main database file. Write the current page to the transaction
1798             ** journal if it is not there already.
1799             */
1800 279 50         if( !pPg->inJournal && pPager->useJournal ){
    100          
1801 247 100         if( (int)pPg->pgno <= pPager->origDbSize ){
1802             int szPg;
1803             u32 saved;
1804             if( journal_format>=JOURNAL_FORMAT_3 ){
1805 130           u32 cksum = pager_cksum(pPager, pPg->pgno, pData);
1806 130           saved = *(u32*)PGHDR_TO_EXTRA(pPg);
1807 130           store32bits(cksum, pPg, SQLITE_PAGE_SIZE);
1808 130           szPg = SQLITE_PAGE_SIZE+8;
1809             }else{
1810             szPg = SQLITE_PAGE_SIZE+4;
1811             }
1812 130           store32bits(pPg->pgno, pPg, -4);
1813             CODEC(pPager, pData, pPg->pgno, 7);
1814 130           rc = sqliteOsWrite(&pPager->jfd, &((char*)pData)[-4], szPg);
1815             TRACE3("JOURNAL %d %d\n", pPg->pgno, pPg->needSync);
1816             CODEC(pPager, pData, pPg->pgno, 0);
1817             if( journal_format>=JOURNAL_FORMAT_3 ){
1818 130           *(u32*)PGHDR_TO_EXTRA(pPg) = saved;
1819             }
1820 130 50         if( rc!=SQLITE_OK ){
1821 0           sqlitepager_rollback(pPager);
1822 0           pPager->errMask |= PAGER_ERR_FULL;
1823 0           return rc;
1824             }
1825 130           pPager->nRec++;
1826             assert( pPager->aInJournal!=0 );
1827 130           pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
1828 130           pPg->needSync = !pPager->noSync;
1829 130           pPg->inJournal = 1;
1830 130 50         if( pPager->ckptInUse ){
1831 0           pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
1832 130           page_add_to_ckpt_list(pPg);
1833             }
1834             }else{
1835 117 50         pPg->needSync = !pPager->journalStarted && !pPager->noSync;
    100          
1836             TRACE3("APPEND %d %d\n", pPg->pgno, pPg->needSync);
1837             }
1838 247 100         if( pPg->needSync ){
1839 200           pPager->needSync = 1;
1840             }
1841             }
1842              
1843             /* If the checkpoint journal is open and the page is not in it,
1844             ** then write the current page to the checkpoint journal. Note that
1845             ** the checkpoint journal always uses the simplier format 2 that lacks
1846             ** checksums. The header is also omitted from the checkpoint journal.
1847             */
1848 279 50         if( pPager->ckptInUse && !pPg->inCkpt && (int)pPg->pgno<=pPager->ckptSize ){
    0          
    0          
1849             assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
1850 0           store32bits(pPg->pgno, pPg, -4);
1851             CODEC(pPager, pData, pPg->pgno, 7);
1852 0           rc = sqliteOsWrite(&pPager->cpfd, &((char*)pData)[-4], SQLITE_PAGE_SIZE+4);
1853             TRACE2("CKPT-JOURNAL %d\n", pPg->pgno);
1854             CODEC(pPager, pData, pPg->pgno, 0);
1855 0 0         if( rc!=SQLITE_OK ){
1856 0           sqlitepager_rollback(pPager);
1857 0           pPager->errMask |= PAGER_ERR_FULL;
1858 0           return rc;
1859             }
1860 0           pPager->ckptNRec++;
1861             assert( pPager->aInCkpt!=0 );
1862 0           pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
1863 0           page_add_to_ckpt_list(pPg);
1864             }
1865              
1866             /* Update the database size and return.
1867             */
1868 279 100         if( pPager->dbSize<(int)pPg->pgno ){
1869 106           pPager->dbSize = pPg->pgno;
1870             }
1871 279           return rc;
1872             }
1873              
1874             /*
1875             ** Return TRUE if the page given in the argument was previously passed
1876             ** to sqlitepager_write(). In other words, return TRUE if it is ok
1877             ** to change the content of the page.
1878             */
1879 0           int sqlitepager_iswriteable(void *pData){
1880 0           PgHdr *pPg = DATA_TO_PGHDR(pData);
1881 0           return pPg->dirty;
1882             }
1883              
1884             /*
1885             ** Replace the content of a single page with the information in the third
1886             ** argument.
1887             */
1888 0           int sqlitepager_overwrite(Pager *pPager, Pgno pgno, void *pData){
1889             void *pPage;
1890             int rc;
1891              
1892 0           rc = sqlitepager_get(pPager, pgno, &pPage);
1893 0 0         if( rc==SQLITE_OK ){
1894 0           rc = sqlitepager_write(pPage);
1895 0 0         if( rc==SQLITE_OK ){
1896 0           memcpy(pPage, pData, SQLITE_PAGE_SIZE);
1897             }
1898 0           sqlitepager_unref(pPage);
1899             }
1900 0           return rc;
1901             }
1902              
1903             /*
1904             ** A call to this routine tells the pager that it is not necessary to
1905             ** write the information on page "pgno" back to the disk, even though
1906             ** that page might be marked as dirty.
1907             **
1908             ** The overlying software layer calls this routine when all of the data
1909             ** on the given page is unused. The pager marks the page as clean so
1910             ** that it does not get written to disk.
1911             **
1912             ** Tests show that this optimization, together with the
1913             ** sqlitepager_dont_rollback() below, more than double the speed
1914             ** of large INSERT operations and quadruple the speed of large DELETEs.
1915             **
1916             ** When this routine is called, set the alwaysRollback flag to true.
1917             ** Subsequent calls to sqlitepager_dont_rollback() for the same page
1918             ** will thereafter be ignored. This is necessary to avoid a problem
1919             ** where a page with data is added to the freelist during one part of
1920             ** a transaction then removed from the freelist during a later part
1921             ** of the same transaction and reused for some other purpose. When it
1922             ** is first added to the freelist, this routine is called. When reused,
1923             ** the dont_rollback() routine is called. But because the page contains
1924             ** critical data, we still need to be sure it gets rolled back in spite
1925             ** of the dont_rollback() call.
1926             */
1927 40           void sqlitepager_dont_write(Pager *pPager, Pgno pgno){
1928             PgHdr *pPg;
1929              
1930 40           pPg = pager_lookup(pPager, pgno);
1931 40           pPg->alwaysRollback = 1;
1932 40 50         if( pPg && pPg->dirty ){
    100          
1933 8 50         if( pPager->dbSize==(int)pPg->pgno && pPager->origDbSizedbSize ){
    0          
1934             /* If this pages is the last page in the file and the file has grown
1935             ** during the current transaction, then do NOT mark the page as clean.
1936             ** When the database file grows, we must make sure that the last page
1937             ** gets written at least once so that the disk file will be the correct
1938             ** size. If you do not write this page and the size of the file
1939             ** on the disk ends up being too small, that can lead to database
1940             ** corruption during the next transaction.
1941             */
1942             }else{
1943             TRACE2("DONT_WRITE %d\n", pgno);
1944 8           pPg->dirty = 0;
1945             }
1946             }
1947 40           }
1948              
1949             /*
1950             ** A call to this routine tells the pager that if a rollback occurs,
1951             ** it is not necessary to restore the data on the given page. This
1952             ** means that the pager does not have to record the given page in the
1953             ** rollback journal.
1954             */
1955 7           void sqlitepager_dont_rollback(void *pData){
1956 7           PgHdr *pPg = DATA_TO_PGHDR(pData);
1957 7           Pager *pPager = pPg->pPager;
1958              
1959 7 50         if( pPager->state!=SQLITE_WRITELOCK || pPager->journalOpen==0 ) return;
    50          
1960 7 50         if( pPg->alwaysRollback || pPager->alwaysRollback ) return;
    50          
1961 7 50         if( !pPg->inJournal && (int)pPg->pgno <= pPager->origDbSize ){
    50          
1962             assert( pPager->aInJournal!=0 );
1963 7           pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
1964 7           pPg->inJournal = 1;
1965 7 50         if( pPager->ckptInUse ){
1966 0           pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
1967 0           page_add_to_ckpt_list(pPg);
1968             }
1969             TRACE2("DONT_ROLLBACK %d\n", pPg->pgno);
1970             }
1971 7 50         if( pPager->ckptInUse && !pPg->inCkpt && (int)pPg->pgno<=pPager->ckptSize ){
    0          
    0          
1972             assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
1973             assert( pPager->aInCkpt!=0 );
1974 0           pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
1975 0           page_add_to_ckpt_list(pPg);
1976             }
1977             }
1978              
1979             /*
1980             ** Commit all changes to the database and release the write lock.
1981             **
1982             ** If the commit fails for any reason, a rollback attempt is made
1983             ** and an error code is returned. If the commit worked, SQLITE_OK
1984             ** is returned.
1985             */
1986 143           int sqlitepager_commit(Pager *pPager){
1987             int rc;
1988             PgHdr *pPg;
1989              
1990 143 50         if( pPager->errMask==PAGER_ERR_FULL ){
1991 0           rc = sqlitepager_rollback(pPager);
1992 0 0         if( rc==SQLITE_OK ){
1993 0           rc = SQLITE_FULL;
1994             }
1995 0           return rc;
1996             }
1997 143 50         if( pPager->errMask!=0 ){
1998 0           rc = pager_errcode(pPager);
1999 0           return rc;
2000             }
2001 143 50         if( pPager->state!=SQLITE_WRITELOCK ){
2002 0           return SQLITE_ERROR;
2003             }
2004             TRACE1("COMMIT\n");
2005 143 100         if( pPager->dirtyFile==0 ){
2006             /* Exit early (without doing the time-consuming sqliteOsSync() calls)
2007             ** if there have been no changes to the database file. */
2008             assert( pPager->needSync==0 );
2009 52           rc = pager_unwritelock(pPager);
2010 52           pPager->dbSize = -1;
2011 52           return rc;
2012             }
2013             assert( pPager->journalOpen );
2014 91           rc = syncJournal(pPager);
2015 91 50         if( rc!=SQLITE_OK ){
2016 0           goto commit_abort;
2017             }
2018 91           pPg = pager_get_all_dirty_pages(pPager);
2019 91 50         if( pPg ){
2020 91           rc = pager_write_pagelist(pPg);
2021 91 50         if( rc || (!pPager->noSync && sqliteOsSync(&pPager->fd)!=SQLITE_OK) ){
    100          
    50          
2022             goto commit_abort;
2023             }
2024             }
2025 91           rc = pager_unwritelock(pPager);
2026 91           pPager->dbSize = -1;
2027 91           return rc;
2028              
2029             /* Jump here if anything goes wrong during the commit process.
2030             */
2031             commit_abort:
2032 0           rc = sqlitepager_rollback(pPager);
2033 0 0         if( rc==SQLITE_OK ){
2034 0           rc = SQLITE_FULL;
2035             }
2036 0           return rc;
2037             }
2038              
2039             /*
2040             ** Rollback all changes. The database falls back to read-only mode.
2041             ** All in-memory cache pages revert to their original data contents.
2042             ** The journal is deleted.
2043             **
2044             ** This routine cannot fail unless some other process is not following
2045             ** the correct locking protocol (SQLITE_PROTOCOL) or unless some other
2046             ** process is writing trash into the journal file (SQLITE_CORRUPT) or
2047             ** unless a prior malloc() failed (SQLITE_NOMEM). Appropriate error
2048             ** codes are returned for all these occasions. Otherwise,
2049             ** SQLITE_OK is returned.
2050             */
2051 13           int sqlitepager_rollback(Pager *pPager){
2052             int rc;
2053             TRACE1("ROLLBACK\n");
2054 13 100         if( !pPager->dirtyFile || !pPager->journalOpen ){
    100          
2055 10           rc = pager_unwritelock(pPager);
2056 10           pPager->dbSize = -1;
2057 10           return rc;
2058             }
2059              
2060 3 50         if( pPager->errMask!=0 && pPager->errMask!=PAGER_ERR_FULL ){
    0          
2061 0 0         if( pPager->state>=SQLITE_WRITELOCK ){
2062 0           pager_playback(pPager, 1);
2063             }
2064 0           return pager_errcode(pPager);
2065             }
2066 3 50         if( pPager->state!=SQLITE_WRITELOCK ){
2067 0           return SQLITE_OK;
2068             }
2069 3           rc = pager_playback(pPager, 1);
2070 3 50         if( rc!=SQLITE_OK ){
2071 0           rc = SQLITE_CORRUPT;
2072 0           pPager->errMask |= PAGER_ERR_CORRUPT;
2073             }
2074 3           pPager->dbSize = -1;
2075 3           return rc;
2076             }
2077              
2078             /*
2079             ** Return TRUE if the database file is opened read-only. Return FALSE
2080             ** if the database is (in theory) writable.
2081             */
2082 53           int sqlitepager_isreadonly(Pager *pPager){
2083 53           return pPager->readOnly;
2084             }
2085              
2086             /*
2087             ** This routine is used for testing and analysis only.
2088             */
2089 0           int *sqlitepager_stats(Pager *pPager){
2090             static int a[9];
2091 0           a[0] = pPager->nRef;
2092 0           a[1] = pPager->nPage;
2093 0           a[2] = pPager->mxPage;
2094 0           a[3] = pPager->dbSize;
2095 0           a[4] = pPager->state;
2096 0           a[5] = pPager->errMask;
2097 0           a[6] = pPager->nHit;
2098 0           a[7] = pPager->nMiss;
2099 0           a[8] = pPager->nOvfl;
2100 0           return a;
2101             }
2102              
2103             /*
2104             ** Set the checkpoint.
2105             **
2106             ** This routine should be called with the transaction journal already
2107             ** open. A new checkpoint journal is created that can be used to rollback
2108             ** changes of a single SQL command within a larger transaction.
2109             */
2110 0           int sqlitepager_ckpt_begin(Pager *pPager){
2111             int rc;
2112             char zTemp[SQLITE_TEMPNAME_SIZE];
2113 0 0         if( !pPager->journalOpen ){
2114 0           pPager->ckptAutoopen = 1;
2115 0           return SQLITE_OK;
2116             }
2117             assert( pPager->journalOpen );
2118             assert( !pPager->ckptInUse );
2119 0           pPager->aInCkpt = sqliteMalloc( pPager->dbSize/8 + 1 );
2120 0 0         if( pPager->aInCkpt==0 ){
2121 0           sqliteOsReadLock(&pPager->fd);
2122 0           return SQLITE_NOMEM;
2123             }
2124             #ifndef NDEBUG
2125             rc = sqliteOsFileSize(&pPager->jfd, &pPager->ckptJSize);
2126             if( rc ) goto ckpt_begin_failed;
2127             assert( pPager->ckptJSize ==
2128             pPager->nRec*JOURNAL_PG_SZ(journal_format)+JOURNAL_HDR_SZ(journal_format) );
2129             #endif
2130 0           pPager->ckptJSize = pPager->nRec*JOURNAL_PG_SZ(journal_format)
2131 0           + JOURNAL_HDR_SZ(journal_format);
2132 0           pPager->ckptSize = pPager->dbSize;
2133 0 0         if( !pPager->ckptOpen ){
2134 0           rc = sqlitepager_opentemp(zTemp, &pPager->cpfd);
2135 0 0         if( rc ) goto ckpt_begin_failed;
2136 0           pPager->ckptOpen = 1;
2137 0           pPager->ckptNRec = 0;
2138             }
2139 0           pPager->ckptInUse = 1;
2140 0           return SQLITE_OK;
2141            
2142             ckpt_begin_failed:
2143 0 0         if( pPager->aInCkpt ){
2144 0           sqliteFree(pPager->aInCkpt);
2145 0           pPager->aInCkpt = 0;
2146             }
2147 0           return rc;
2148             }
2149              
2150             /*
2151             ** Commit a checkpoint.
2152             */
2153 156           int sqlitepager_ckpt_commit(Pager *pPager){
2154 156 50         if( pPager->ckptInUse ){
2155             PgHdr *pPg, *pNext;
2156 0           sqliteOsSeek(&pPager->cpfd, 0);
2157             /* sqliteOsTruncate(&pPager->cpfd, 0); */
2158 0           pPager->ckptNRec = 0;
2159 0           pPager->ckptInUse = 0;
2160 0           sqliteFree( pPager->aInCkpt );
2161 0           pPager->aInCkpt = 0;
2162 0 0         for(pPg=pPager->pCkpt; pPg; pPg=pNext){
2163 0           pNext = pPg->pNextCkpt;
2164             assert( pPg->inCkpt );
2165 0           pPg->inCkpt = 0;
2166 0           pPg->pPrevCkpt = pPg->pNextCkpt = 0;
2167             }
2168 0           pPager->pCkpt = 0;
2169             }
2170 156           pPager->ckptAutoopen = 0;
2171 156           return SQLITE_OK;
2172             }
2173              
2174             /*
2175             ** Rollback a checkpoint.
2176             */
2177 0           int sqlitepager_ckpt_rollback(Pager *pPager){
2178             int rc;
2179 0 0         if( pPager->ckptInUse ){
2180 0           rc = pager_ckpt_playback(pPager);
2181 0           sqlitepager_ckpt_commit(pPager);
2182             }else{
2183 0           rc = SQLITE_OK;
2184             }
2185 0           pPager->ckptAutoopen = 0;
2186 0           return rc;
2187             }
2188              
2189             /*
2190             ** Return the full pathname of the database file.
2191             */
2192 0           const char *sqlitepager_filename(Pager *pPager){
2193 0           return pPager->zFilename;
2194             }
2195              
2196             /*
2197             ** Set the codec for this pager
2198             */
2199 0           void sqlitepager_set_codec(
2200             Pager *pPager,
2201             void (*xCodec)(void*,void*,Pgno,int),
2202             void *pCodecArg
2203             ){
2204 0           pPager->xCodec = xCodec;
2205 0           pPager->pCodecArg = pCodecArg;
2206 0           }
2207              
2208             #ifdef SQLITE_TEST
2209             /*
2210             ** Print a listing of all referenced pages and their ref count.
2211             */
2212             void sqlitepager_refdump(Pager *pPager){
2213             PgHdr *pPg;
2214             for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
2215             if( pPg->nRef<=0 ) continue;
2216             printf("PAGE %3d addr=0x%08x nRef=%d\n",
2217             pPg->pgno, (int)PGHDR_TO_DATA(pPg), pPg->nRef);
2218             }
2219             }
2220             #endif