File Coverage

btree.c
Criterion Covered Total %
statement 861 1319 65.2
branch 438 996 43.9
condition n/a
subroutine n/a
pod n/a
total 1299 2315 56.1


line stmt bran cond sub pod time code
1             /*
2             ** 2001 September 15
3             **
4             ** The author disclaims copyright to this source code. In place of
5             ** a legal notice, here is a blessing:
6             **
7             ** May you do good and not evil.
8             ** May you find forgiveness for yourself and forgive others.
9             ** May you share freely, never taking more than you give.
10             **
11             *************************************************************************
12             ** $Id: btree.c,v 1.1.1.1 2004/08/08 15:03:57 matt Exp $
13             **
14             ** This file implements a external (disk-based) database using BTrees.
15             ** For a detailed discussion of BTrees, refer to
16             **
17             ** Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3:
18             ** "Sorting And Searching", pages 473-480. Addison-Wesley
19             ** Publishing Company, Reading, Massachusetts.
20             **
21             ** The basic idea is that each page of the file contains N database
22             ** entries and N+1 pointers to subpages.
23             **
24             ** ----------------------------------------------------------------
25             ** | Ptr(0) | Key(0) | Ptr(1) | Key(1) | ... | Key(N) | Ptr(N+1) |
26             ** ----------------------------------------------------------------
27             **
28             ** All of the keys on the page that Ptr(0) points to have values less
29             ** than Key(0). All of the keys on page Ptr(1) and its subpages have
30             ** values greater than Key(0) and less than Key(1). All of the keys
31             ** on Ptr(N+1) and its subpages have values greater than Key(N). And
32             ** so forth.
33             **
34             ** Finding a particular key requires reading O(log(M)) pages from the
35             ** disk where M is the number of entries in the tree.
36             **
37             ** In this implementation, a single file can hold one or more separate
38             ** BTrees. Each BTree is identified by the index of its root page. The
39             ** key and data for any entry are combined to form the "payload". Up to
40             ** MX_LOCAL_PAYLOAD bytes of payload can be carried directly on the
41             ** database page. If the payload is larger than MX_LOCAL_PAYLOAD bytes
42             ** then surplus bytes are stored on overflow pages. The payload for an
43             ** entry and the preceding pointer are combined to form a "Cell". Each
44             ** page has a small header which contains the Ptr(N+1) pointer.
45             **
46             ** The first page of the file contains a magic string used to verify that
47             ** the file really is a valid BTree database, a pointer to a list of unused
48             ** pages in the file, and some meta information. The root of the first
49             ** BTree begins on page 2 of the file. (Pages are numbered beginning with
50             ** 1, not 0.) Thus a minimum database contains 2 pages.
51             */
52             #include "sqliteInt.h"
53             #include "pager.h"
54             #include "btree.h"
55             #include
56              
57             /* Forward declarations */
58             static BtOps sqliteBtreeOps;
59             static BtCursorOps sqliteBtreeCursorOps;
60              
61             /*
62             ** Macros used for byteswapping. B is a pointer to the Btree
63             ** structure. This is needed to access the Btree.needSwab boolean
64             ** in order to tell if byte swapping is needed or not.
65             ** X is an unsigned integer. SWAB16 byte swaps a 16-bit integer.
66             ** SWAB32 byteswaps a 32-bit integer.
67             */
68             #define SWAB16(B,X) ((B)->needSwab? swab16((u16)X) : ((u16)X))
69             #define SWAB32(B,X) ((B)->needSwab? swab32(X) : (X))
70             #define SWAB_ADD(B,X,A) \
71             if((B)->needSwab){ X=swab32(swab32(X)+A); }else{ X += (A); }
72              
73             /*
74             ** The following global variable - available only if SQLITE_TEST is
75             ** defined - is used to determine whether new databases are created in
76             ** native byte order or in non-native byte order. Non-native byte order
77             ** databases are created for testing purposes only. Under normal operation,
78             ** only native byte-order databases should be created, but we should be
79             ** able to read or write existing databases regardless of the byteorder.
80             */
81             #ifdef SQLITE_TEST
82             int btree_native_byte_order = 1;
83             #else
84             # define btree_native_byte_order 1
85             #endif
86              
87             /*
88             ** Forward declarations of structures used only in this file.
89             */
90             typedef struct PageOne PageOne;
91             typedef struct MemPage MemPage;
92             typedef struct PageHdr PageHdr;
93             typedef struct Cell Cell;
94             typedef struct CellHdr CellHdr;
95             typedef struct FreeBlk FreeBlk;
96             typedef struct OverflowPage OverflowPage;
97             typedef struct FreelistInfo FreelistInfo;
98              
99             /*
100             ** All structures on a database page are aligned to 4-byte boundries.
101             ** This routine rounds up a number of bytes to the next multiple of 4.
102             **
103             ** This might need to change for computer architectures that require
104             ** and 8-byte alignment boundry for structures.
105             */
106             #define ROUNDUP(X) ((X+3) & ~3)
107              
108             /*
109             ** This is a magic string that appears at the beginning of every
110             ** SQLite database in order to identify the file as a real database.
111             */
112             static const char zMagicHeader[] =
113             "** This file contains an SQLite 2.1 database **";
114             #define MAGIC_SIZE (sizeof(zMagicHeader))
115              
116             /*
117             ** This is a magic integer also used to test the integrity of the database
118             ** file. This integer is used in addition to the string above so that
119             ** if the file is written on a little-endian architecture and read
120             ** on a big-endian architectures (or vice versa) we can detect the
121             ** problem.
122             **
123             ** The number used was obtained at random and has no special
124             ** significance other than the fact that it represents a different
125             ** integer on little-endian and big-endian machines.
126             */
127             #define MAGIC 0xdae37528
128              
129             /*
130             ** The first page of the database file contains a magic header string
131             ** to identify the file as an SQLite database file. It also contains
132             ** a pointer to the first free page of the file. Page 2 contains the
133             ** root of the principle BTree. The file might contain other BTrees
134             ** rooted on pages above 2.
135             **
136             ** The first page also contains SQLITE_N_BTREE_META integers that
137             ** can be used by higher-level routines.
138             **
139             ** Remember that pages are numbered beginning with 1. (See pager.c
140             ** for additional information.) Page 0 does not exist and a page
141             ** number of 0 is used to mean "no such page".
142             */
143             struct PageOne {
144             char zMagic[MAGIC_SIZE]; /* String that identifies the file as a database */
145             int iMagic; /* Integer to verify correct byte order */
146             Pgno freeList; /* First free page in a list of all free pages */
147             int nFree; /* Number of pages on the free list */
148             int aMeta[SQLITE_N_BTREE_META-1]; /* User defined integers */
149             };
150              
151             /*
152             ** Each database page has a header that is an instance of this
153             ** structure.
154             **
155             ** PageHdr.firstFree is 0 if there is no free space on this page.
156             ** Otherwise, PageHdr.firstFree is the index in MemPage.u.aDisk[] of a
157             ** FreeBlk structure that describes the first block of free space.
158             ** All free space is defined by a linked list of FreeBlk structures.
159             **
160             ** Data is stored in a linked list of Cell structures. PageHdr.firstCell
161             ** is the index into MemPage.u.aDisk[] of the first cell on the page. The
162             ** Cells are kept in sorted order.
163             **
164             ** A Cell contains all information about a database entry and a pointer
165             ** to a child page that contains other entries less than itself. In
166             ** other words, the i-th Cell contains both Ptr(i) and Key(i). The
167             ** right-most pointer of the page is contained in PageHdr.rightChild.
168             */
169             struct PageHdr {
170             Pgno rightChild; /* Child page that comes after all cells on this page */
171             u16 firstCell; /* Index in MemPage.u.aDisk[] of the first cell */
172             u16 firstFree; /* Index in MemPage.u.aDisk[] of the first free block */
173             };
174              
175             /*
176             ** Entries on a page of the database are called "Cells". Each Cell
177             ** has a header and data. This structure defines the header. The
178             ** key and data (collectively the "payload") follow this header on
179             ** the database page.
180             **
181             ** A definition of the complete Cell structure is given below. The
182             ** header for the cell must be defined first in order to do some
183             ** of the sizing #defines that follow.
184             */
185             struct CellHdr {
186             Pgno leftChild; /* Child page that comes before this cell */
187             u16 nKey; /* Number of bytes in the key */
188             u16 iNext; /* Index in MemPage.u.aDisk[] of next cell in sorted order */
189             u8 nKeyHi; /* Upper 8 bits of key size for keys larger than 64K bytes */
190             u8 nDataHi; /* Upper 8 bits of data size when the size is more than 64K */
191             u16 nData; /* Number of bytes of data */
192             };
193              
194             /*
195             ** The key and data size are split into a lower 16-bit segment and an
196             ** upper 8-bit segment in order to pack them together into a smaller
197             ** space. The following macros reassembly a key or data size back
198             ** into an integer.
199             */
200             #define NKEY(b,h) (SWAB16(b,h.nKey) + h.nKeyHi*65536)
201             #define NDATA(b,h) (SWAB16(b,h.nData) + h.nDataHi*65536)
202              
203             /*
204             ** The minimum size of a complete Cell. The Cell must contain a header
205             ** and at least 4 bytes of payload.
206             */
207             #define MIN_CELL_SIZE (sizeof(CellHdr)+4)
208              
209             /*
210             ** The maximum number of database entries that can be held in a single
211             ** page of the database.
212             */
213             #define MX_CELL ((SQLITE_USABLE_SIZE-sizeof(PageHdr))/MIN_CELL_SIZE)
214              
215             /*
216             ** The amount of usable space on a single page of the BTree. This is the
217             ** page size minus the overhead of the page header.
218             */
219             #define USABLE_SPACE (SQLITE_USABLE_SIZE - sizeof(PageHdr))
220              
221             /*
222             ** The maximum amount of payload (in bytes) that can be stored locally for
223             ** a database entry. If the entry contains more data than this, the
224             ** extra goes onto overflow pages.
225             **
226             ** This number is chosen so that at least 4 cells will fit on every page.
227             */
228             #define MX_LOCAL_PAYLOAD ((USABLE_SPACE/4-(sizeof(CellHdr)+sizeof(Pgno)))&~3)
229              
230             /*
231             ** Data on a database page is stored as a linked list of Cell structures.
232             ** Both the key and the data are stored in aPayload[]. The key always comes
233             ** first. The aPayload[] field grows as necessary to hold the key and data,
234             ** up to a maximum of MX_LOCAL_PAYLOAD bytes. If the size of the key and
235             ** data combined exceeds MX_LOCAL_PAYLOAD bytes, then Cell.ovfl is the
236             ** page number of the first overflow page.
237             **
238             ** Though this structure is fixed in size, the Cell on the database
239             ** page varies in size. Every cell has a CellHdr and at least 4 bytes
240             ** of payload space. Additional payload bytes (up to the maximum of
241             ** MX_LOCAL_PAYLOAD) and the Cell.ovfl value are allocated only as
242             ** needed.
243             */
244             struct Cell {
245             CellHdr h; /* The cell header */
246             char aPayload[MX_LOCAL_PAYLOAD]; /* Key and data */
247             Pgno ovfl; /* The first overflow page */
248             };
249              
250             /*
251             ** Free space on a page is remembered using a linked list of the FreeBlk
252             ** structures. Space on a database page is allocated in increments of
253             ** at least 4 bytes and is always aligned to a 4-byte boundry. The
254             ** linked list of FreeBlks is always kept in order by address.
255             */
256             struct FreeBlk {
257             u16 iSize; /* Number of bytes in this block of free space */
258             u16 iNext; /* Index in MemPage.u.aDisk[] of the next free block */
259             };
260              
261             /*
262             ** The number of bytes of payload that will fit on a single overflow page.
263             */
264             #define OVERFLOW_SIZE (SQLITE_USABLE_SIZE-sizeof(Pgno))
265              
266             /*
267             ** When the key and data for a single entry in the BTree will not fit in
268             ** the MX_LOCAL_PAYLOAD bytes of space available on the database page,
269             ** then all extra bytes are written to a linked list of overflow pages.
270             ** Each overflow page is an instance of the following structure.
271             **
272             ** Unused pages in the database are also represented by instances of
273             ** the OverflowPage structure. The PageOne.freeList field is the
274             ** page number of the first page in a linked list of unused database
275             ** pages.
276             */
277             struct OverflowPage {
278             Pgno iNext;
279             char aPayload[OVERFLOW_SIZE];
280             };
281              
282             /*
283             ** The PageOne.freeList field points to a linked list of overflow pages
284             ** hold information about free pages. The aPayload section of each
285             ** overflow page contains an instance of the following structure. The
286             ** aFree[] array holds the page number of nFree unused pages in the disk
287             ** file.
288             */
289             struct FreelistInfo {
290             int nFree;
291             Pgno aFree[(OVERFLOW_SIZE-sizeof(int))/sizeof(Pgno)];
292             };
293              
294             /*
295             ** For every page in the database file, an instance of the following structure
296             ** is stored in memory. The u.aDisk[] array contains the raw bits read from
297             ** the disk. The rest is auxiliary information held in memory only. The
298             ** auxiliary info is only valid for regular database pages - it is not
299             ** used for overflow pages and pages on the freelist.
300             **
301             ** Of particular interest in the auxiliary info is the apCell[] entry. Each
302             ** apCell[] entry is a pointer to a Cell structure in u.aDisk[]. The cells are
303             ** put in this array so that they can be accessed in constant time, rather
304             ** than in linear time which would be needed if we had to walk the linked
305             ** list on every access.
306             **
307             ** Note that apCell[] contains enough space to hold up to two more Cells
308             ** than can possibly fit on one page. In the steady state, every apCell[]
309             ** points to memory inside u.aDisk[]. But in the middle of an insert
310             ** operation, some apCell[] entries may temporarily point to data space
311             ** outside of u.aDisk[]. This is a transient situation that is quickly
312             ** resolved. But while it is happening, it is possible for a database
313             ** page to hold as many as two more cells than it might otherwise hold.
314             ** The extra two entries in apCell[] are an allowance for this situation.
315             **
316             ** The pParent field points back to the parent page. This allows us to
317             ** walk up the BTree from any leaf to the root. Care must be taken to
318             ** unref() the parent page pointer when this page is no longer referenced.
319             ** The pageDestructor() routine handles that chore.
320             */
321             struct MemPage {
322             union u_page_data {
323             char aDisk[SQLITE_PAGE_SIZE]; /* Page data stored on disk */
324             PageHdr hdr; /* Overlay page header */
325             } u;
326             u8 isInit; /* True if auxiliary data is initialized */
327             u8 idxShift; /* True if apCell[] indices have changed */
328             u8 isOverfull; /* Some apCell[] points outside u.aDisk[] */
329             MemPage *pParent; /* The parent of this page. NULL for root */
330             int idxParent; /* Index in pParent->apCell[] of this node */
331             int nFree; /* Number of free bytes in u.aDisk[] */
332             int nCell; /* Number of entries on this page */
333             Cell *apCell[MX_CELL+2]; /* All data entires in sorted order */
334             };
335              
336             /*
337             ** The in-memory image of a disk page has the auxiliary information appended
338             ** to the end. EXTRA_SIZE is the number of bytes of space needed to hold
339             ** that extra information.
340             */
341             #define EXTRA_SIZE (sizeof(MemPage)-sizeof(union u_page_data))
342              
343             /*
344             ** Everything we need to know about an open database
345             */
346             struct Btree {
347             BtOps *pOps; /* Function table */
348             Pager *pPager; /* The page cache */
349             BtCursor *pCursor; /* A list of all open cursors */
350             PageOne *page1; /* First page of the database */
351             u8 inTrans; /* True if a transaction is in progress */
352             u8 inCkpt; /* True if there is a checkpoint on the transaction */
353             u8 readOnly; /* True if the underlying file is readonly */
354             u8 needSwab; /* Need to byte-swapping */
355             };
356             typedef Btree Bt;
357              
358             /*
359             ** A cursor is a pointer to a particular entry in the BTree.
360             ** The entry is identified by its MemPage and the index in
361             ** MemPage.apCell[] of the entry.
362             */
363             struct BtCursor {
364             BtCursorOps *pOps; /* Function table */
365             Btree *pBt; /* The Btree to which this cursor belongs */
366             BtCursor *pNext, *pPrev; /* Forms a linked list of all cursors */
367             BtCursor *pShared; /* Loop of cursors with the same root page */
368             Pgno pgnoRoot; /* The root page of this tree */
369             MemPage *pPage; /* Page that contains the entry */
370             int idx; /* Index of the entry in pPage->apCell[] */
371             u8 wrFlag; /* True if writable */
372             u8 eSkip; /* Determines if next step operation is a no-op */
373             u8 iMatch; /* compare result from last sqliteBtreeMoveto() */
374             };
375              
376             /*
377             ** Legal values for BtCursor.eSkip.
378             */
379             #define SKIP_NONE 0 /* Always step the cursor */
380             #define SKIP_NEXT 1 /* The next sqliteBtreeNext() is a no-op */
381             #define SKIP_PREV 2 /* The next sqliteBtreePrevious() is a no-op */
382             #define SKIP_INVALID 3 /* Calls to Next() and Previous() are invalid */
383              
384             /* Forward declarations */
385             static int fileBtreeCloseCursor(BtCursor *pCur);
386              
387             /*
388             ** Routines for byte swapping.
389             */
390 0           u16 swab16(u16 x){
391 0           return ((x & 0xff)<<8) | ((x>>8)&0xff);
392             }
393 0           u32 swab32(u32 x){
394 0           return ((x & 0xff)<<24) | ((x & 0xff00)<<8) |
395 0           ((x>>8) & 0xff00) | ((x>>24)&0xff);
396             }
397              
398             /*
399             ** Compute the total number of bytes that a Cell needs on the main
400             ** database page. The number returned includes the Cell header,
401             ** local payload storage, and the pointer to overflow pages (if
402             ** applicable). Additional space allocated on overflow pages
403             ** is NOT included in the value returned from this routine.
404             */
405 409           static int cellSize(Btree *pBt, Cell *pCell){
406 409 50         int n = NKEY(pBt, pCell->h) + NDATA(pBt, pCell->h);
    50          
407 409 100         if( n>MX_LOCAL_PAYLOAD ){
408 3           n = MX_LOCAL_PAYLOAD + sizeof(Pgno);
409             }else{
410 406           n = ROUNDUP(n);
411             }
412 409           n += sizeof(CellHdr);
413 409           return n;
414             }
415              
416             /*
417             ** Defragment the page given. All Cells are moved to the
418             ** beginning of the page and all free space is collected
419             ** into one big FreeBlk at the end of the page.
420             */
421 0           static void defragmentPage(Btree *pBt, MemPage *pPage){
422             int pc, i, n;
423             FreeBlk *pFBlk;
424             char newPage[SQLITE_USABLE_SIZE];
425              
426             assert( sqlitepager_iswriteable(pPage) );
427             assert( pPage->isInit );
428 0           pc = sizeof(PageHdr);
429 0 0         pPage->u.hdr.firstCell = SWAB16(pBt, pc);
430 0           memcpy(newPage, pPage->u.aDisk, pc);
431 0 0         for(i=0; inCell; i++){
432 0           Cell *pCell = pPage->apCell[i];
433              
434             /* This routine should never be called on an overfull page. The
435             ** following asserts verify that constraint. */
436             assert( Addr(pCell) > Addr(pPage) );
437             assert( Addr(pCell) < Addr(pPage) + SQLITE_USABLE_SIZE );
438              
439 0           n = cellSize(pBt, pCell);
440 0 0         pCell->h.iNext = SWAB16(pBt, pc + n);
441 0           memcpy(&newPage[pc], pCell, n);
442 0           pPage->apCell[i] = (Cell*)&pPage->u.aDisk[pc];
443 0           pc += n;
444             }
445             assert( pPage->nFree==SQLITE_USABLE_SIZE-pc );
446 0           memcpy(pPage->u.aDisk, newPage, pc);
447 0 0         if( pPage->nCell>0 ){
448 0           pPage->apCell[pPage->nCell-1]->h.iNext = 0;
449             }
450 0           pFBlk = (FreeBlk*)&pPage->u.aDisk[pc];
451 0 0         pFBlk->iSize = SWAB16(pBt, SQLITE_USABLE_SIZE - pc);
452 0           pFBlk->iNext = 0;
453 0 0         pPage->u.hdr.firstFree = SWAB16(pBt, pc);
454 0           memset(&pFBlk[1], 0, SQLITE_USABLE_SIZE - pc - sizeof(FreeBlk));
455 0           }
456              
457             /*
458             ** Allocate nByte bytes of space on a page. nByte must be a
459             ** multiple of 4.
460             **
461             ** Return the index into pPage->u.aDisk[] of the first byte of
462             ** the new allocation. Or return 0 if there is not enough free
463             ** space on the page to satisfy the allocation request.
464             **
465             ** If the page contains nBytes of free space but does not contain
466             ** nBytes of contiguous free space, then this routine automatically
467             ** calls defragementPage() to consolidate all free space before
468             ** allocating the new chunk.
469             */
470 133           static int allocateSpace(Btree *pBt, MemPage *pPage, int nByte){
471             FreeBlk *p;
472             u16 *pIdx;
473             int start;
474             int iSize;
475             #ifndef NDEBUG
476             int cnt = 0;
477             #endif
478              
479             assert( sqlitepager_iswriteable(pPage) );
480             assert( nByte==ROUNDUP(nByte) );
481             assert( pPage->isInit );
482 133 100         if( pPage->nFreeisOverfull ) return 0;
    50          
483 132           pIdx = &pPage->u.hdr.firstFree;
484 132 50         p = (FreeBlk*)&pPage->u.aDisk[SWAB16(pBt, *pIdx)];
485 135 50         while( (iSize = SWAB16(pBt, p->iSize))
    100          
486             assert( cnt++ < SQLITE_USABLE_SIZE/4 );
487 3 50         if( p->iNext==0 ){
488 0           defragmentPage(pBt, pPage);
489 0           pIdx = &pPage->u.hdr.firstFree;
490             }else{
491 3           pIdx = &p->iNext;
492             }
493 3 50         p = (FreeBlk*)&pPage->u.aDisk[SWAB16(pBt, *pIdx)];
494             }
495 132 100         if( iSize==nByte ){
496 3 50         start = SWAB16(pBt, *pIdx);
497 3           *pIdx = p->iNext;
498             }else{
499             FreeBlk *pNew;
500 129 50         start = SWAB16(pBt, *pIdx);
501 129           pNew = (FreeBlk*)&pPage->u.aDisk[start + nByte];
502 129           pNew->iNext = p->iNext;
503 129 50         pNew->iSize = SWAB16(pBt, iSize - nByte);
504 129 50         *pIdx = SWAB16(pBt, start + nByte);
505             }
506 132           pPage->nFree -= nByte;
507 132           return start;
508             }
509              
510             /*
511             ** Return a section of the MemPage.u.aDisk[] to the freelist.
512             ** The first byte of the new free block is pPage->u.aDisk[start]
513             ** and the size of the block is "size" bytes. Size must be
514             ** a multiple of 4.
515             **
516             ** Most of the effort here is involved in coalesing adjacent
517             ** free blocks into a single big free block.
518             */
519 39           static void freeSpace(Btree *pBt, MemPage *pPage, int start, int size){
520 39           int end = start + size;
521             u16 *pIdx, idx;
522             FreeBlk *pFBlk;
523             FreeBlk *pNew;
524             FreeBlk *pNext;
525             int iSize;
526              
527             assert( sqlitepager_iswriteable(pPage) );
528             assert( size == ROUNDUP(size) );
529             assert( start == ROUNDUP(start) );
530             assert( pPage->isInit );
531 39           pIdx = &pPage->u.hdr.firstFree;
532 39 50         idx = SWAB16(pBt, *pIdx);
533 39 50         while( idx!=0 && idx
    100          
534 2           pFBlk = (FreeBlk*)&pPage->u.aDisk[idx];
535 2 50         iSize = SWAB16(pBt, pFBlk->iSize);
536 2 50         if( idx + iSize == start ){
537 2 50         pFBlk->iSize = SWAB16(pBt, iSize + size);
538 2 50         if( idx + iSize + size == SWAB16(pBt, pFBlk->iNext) ){
    100          
539 1           pNext = (FreeBlk*)&pPage->u.aDisk[idx + iSize + size];
540 1 50         if( pBt->needSwab ){
541 0           pFBlk->iSize = swab16((u16)swab16(pNext->iSize)+iSize+size);
542             }else{
543 1           pFBlk->iSize += pNext->iSize;
544             }
545 1           pFBlk->iNext = pNext->iNext;
546             }
547 2           pPage->nFree += size;
548 2           return;
549             }
550 0           pIdx = &pFBlk->iNext;
551 0 0         idx = SWAB16(pBt, *pIdx);
552             }
553 37           pNew = (FreeBlk*)&pPage->u.aDisk[start];
554 37 100         if( idx != end ){
555 5 50         pNew->iSize = SWAB16(pBt, size);
556 5 50         pNew->iNext = SWAB16(pBt, idx);
557             }else{
558 32           pNext = (FreeBlk*)&pPage->u.aDisk[idx];
559 32 50         pNew->iSize = SWAB16(pBt, size + SWAB16(pBt, pNext->iSize));
    0          
    50          
560 32           pNew->iNext = pNext->iNext;
561             }
562 37 50         *pIdx = SWAB16(pBt, start);
563 37           pPage->nFree += size;
564             }
565              
566             /*
567             ** Initialize the auxiliary information for a disk block.
568             **
569             ** The pParent parameter must be a pointer to the MemPage which
570             ** is the parent of the page being initialized. The root of the
571             ** BTree (usually page 2) has no parent and so for that page,
572             ** pParent==NULL.
573             **
574             ** Return SQLITE_OK on success. If we see that the page does
575             ** not contain a well-formed database page, then return
576             ** SQLITE_CORRUPT. Note that a return of SQLITE_OK does not
577             ** guarantee that the page is well-formed. It only shows that
578             ** we failed to detect any corruption.
579             */
580 630           static int initPage(Bt *pBt, MemPage *pPage, Pgno pgnoThis, MemPage *pParent){
581             int idx; /* An index into pPage->u.aDisk[] */
582             Cell *pCell; /* A pointer to a Cell in pPage->u.aDisk[] */
583             FreeBlk *pFBlk; /* A pointer to a free block in pPage->u.aDisk[] */
584             int sz; /* The size of a Cell in bytes */
585             int freeSpace; /* Amount of free space on the page */
586              
587 630 100         if( pPage->pParent ){
588             assert( pPage->pParent==pParent );
589 1           return SQLITE_OK;
590             }
591 629 100         if( pParent ){
592 4           pPage->pParent = pParent;
593 4           sqlitepager_ref(pParent);
594             }
595 629 100         if( pPage->isInit ) return SQLITE_OK;
596 216           pPage->isInit = 1;
597 216           pPage->nCell = 0;
598 216           freeSpace = USABLE_SPACE;
599 216 50         idx = SWAB16(pBt, pPage->u.hdr.firstCell);
600 453 100         while( idx!=0 ){
601 237 50         if( idx>SQLITE_USABLE_SIZE-MIN_CELL_SIZE ) goto page_format_error;
602 237 50         if( idx
603 237 50         if( idx!=ROUNDUP(idx) ) goto page_format_error;
604 237           pCell = (Cell*)&pPage->u.aDisk[idx];
605 237           sz = cellSize(pBt, pCell);
606 237 50         if( idx+sz > SQLITE_USABLE_SIZE ) goto page_format_error;
607 237           freeSpace -= sz;
608 237           pPage->apCell[pPage->nCell++] = pCell;
609 237 50         idx = SWAB16(pBt, pCell->h.iNext);
610             }
611 216           pPage->nFree = 0;
612 216 50         idx = SWAB16(pBt, pPage->u.hdr.firstFree);
613 406 100         while( idx!=0 ){
614             int iNext;
615 190 50         if( idx>SQLITE_USABLE_SIZE-sizeof(FreeBlk) ) goto page_format_error;
616 190 50         if( idx
617 190           pFBlk = (FreeBlk*)&pPage->u.aDisk[idx];
618 190 50         pPage->nFree += SWAB16(pBt, pFBlk->iSize);
619 190 50         iNext = SWAB16(pBt, pFBlk->iNext);
620 190 100         if( iNext>0 && iNext <= idx ) goto page_format_error;
    50          
621 190           idx = iNext;
622             }
623 216 100         if( pPage->nCell==0 && pPage->nFree==0 ){
    100          
624             /* As a special case, an uninitialized root page appears to be
625             ** an empty database */
626 30           return SQLITE_OK;
627             }
628 186 50         if( pPage->nFree!=freeSpace ) goto page_format_error;
629 186           return SQLITE_OK;
630              
631             page_format_error:
632 0           return SQLITE_CORRUPT;
633             }
634              
635             /*
636             ** Set up a raw page so that it looks like a database page holding
637             ** no entries.
638             */
639 68           static void zeroPage(Btree *pBt, MemPage *pPage){
640             PageHdr *pHdr;
641             FreeBlk *pFBlk;
642             assert( sqlitepager_iswriteable(pPage) );
643 68           memset(pPage, 0, SQLITE_USABLE_SIZE);
644 68           pHdr = &pPage->u.hdr;
645 68           pHdr->firstCell = 0;
646 68 50         pHdr->firstFree = SWAB16(pBt, sizeof(*pHdr));
647 68           pFBlk = (FreeBlk*)&pHdr[1];
648 68           pFBlk->iNext = 0;
649 68           pPage->nFree = SQLITE_USABLE_SIZE - sizeof(*pHdr);
650 68 50         pFBlk->iSize = SWAB16(pBt, pPage->nFree);
651 68           pPage->nCell = 0;
652 68           pPage->isOverfull = 0;
653 68           }
654              
655             /*
656             ** This routine is called when the reference count for a page
657             ** reaches zero. We need to unref the pParent pointer when that
658             ** happens.
659             */
660 691           static void pageDestructor(void *pData){
661 691           MemPage *pPage = (MemPage*)pData;
662 691 100         if( pPage->pParent ){
663 6           MemPage *pParent = pPage->pParent;
664 6           pPage->pParent = 0;
665 6           sqlitepager_unref(pParent);
666             }
667 691           }
668              
669             /*
670             ** Open a new database.
671             **
672             ** Actually, this routine just sets up the internal data structures
673             ** for accessing the database. We do not open the database file
674             ** until the first page is loaded.
675             **
676             ** zFilename is the name of the database file. If zFilename is NULL
677             ** a new database with a random name is created. This randomly named
678             ** database file will be deleted when sqliteBtreeClose() is called.
679             */
680 53           int sqliteBtreeOpen(
681             const char *zFilename, /* Name of the file containing the BTree database */
682             int omitJournal, /* if TRUE then do not journal this file */
683             int nCache, /* How many pages in the page cache */
684             Btree **ppBtree /* Pointer to new Btree object written here */
685             ){
686             Btree *pBt;
687             int rc;
688              
689             /*
690             ** The following asserts make sure that structures used by the btree are
691             ** the right size. This is to guard against size changes that result
692             ** when compiling on a different architecture.
693             */
694             assert( sizeof(u32)==4 );
695             assert( sizeof(u16)==2 );
696             assert( sizeof(Pgno)==4 );
697             assert( sizeof(PageHdr)==8 );
698             assert( sizeof(CellHdr)==12 );
699             assert( sizeof(FreeBlk)==4 );
700             assert( sizeof(OverflowPage)==SQLITE_USABLE_SIZE );
701             assert( sizeof(FreelistInfo)==OVERFLOW_SIZE );
702             assert( sizeof(ptr)==sizeof(char*) );
703             assert( sizeof(uptr)==sizeof(ptr) );
704              
705 53           pBt = sqliteMalloc( sizeof(*pBt) );
706 53 50         if( pBt==0 ){
707 0           *ppBtree = 0;
708 0           return SQLITE_NOMEM;
709             }
710 53 50         if( nCache<10 ) nCache = 10;
711 53           rc = sqlitepager_open(&pBt->pPager, zFilename, nCache, EXTRA_SIZE,
712             !omitJournal);
713 53 50         if( rc!=SQLITE_OK ){
714 0 0         if( pBt->pPager ) sqlitepager_close(pBt->pPager);
715 0           sqliteFree(pBt);
716 0           *ppBtree = 0;
717 0           return rc;
718             }
719 53           sqlitepager_set_destructor(pBt->pPager, pageDestructor);
720 53           pBt->pCursor = 0;
721 53           pBt->page1 = 0;
722 53           pBt->readOnly = sqlitepager_isreadonly(pBt->pPager);
723 53           pBt->pOps = &sqliteBtreeOps;
724 53           *ppBtree = pBt;
725 53           return SQLITE_OK;
726             }
727              
728             /*
729             ** Close an open database and invalidate all cursors.
730             */
731 53           static int fileBtreeClose(Btree *pBt){
732 53 50         while( pBt->pCursor ){
733 0           fileBtreeCloseCursor(pBt->pCursor);
734             }
735 53           sqlitepager_close(pBt->pPager);
736 53           sqliteFree(pBt);
737 53           return SQLITE_OK;
738             }
739              
740             /*
741             ** Change the limit on the number of pages allowed in the cache.
742             **
743             ** The maximum number of cache pages is set to the absolute
744             ** value of mxPage. If mxPage is negative, the pager will
745             ** operate asynchronously - it will not stop to do fsync()s
746             ** to insure data is written to the disk surface before
747             ** continuing. Transactions still work if synchronous is off,
748             ** and the database cannot be corrupted if this program
749             ** crashes. But if the operating system crashes or there is
750             ** an abrupt power failure when synchronous is off, the database
751             ** could be left in an inconsistent and unrecoverable state.
752             ** Synchronous is on by default so database corruption is not
753             ** normally a worry.
754             */
755 54           static int fileBtreeSetCacheSize(Btree *pBt, int mxPage){
756 54           sqlitepager_set_cachesize(pBt->pPager, mxPage);
757 54           return SQLITE_OK;
758             }
759              
760             /*
761             ** Change the way data is synced to disk in order to increase or decrease
762             ** how well the database resists damage due to OS crashes and power
763             ** failures. Level 1 is the same as asynchronous (no syncs() occur and
764             ** there is a high probability of damage) Level 2 is the default. There
765             ** is a very low but non-zero probability of damage. Level 3 reduces the
766             ** probability of damage to near zero but with a write performance reduction.
767             */
768 54           static int fileBtreeSetSafetyLevel(Btree *pBt, int level){
769 54           sqlitepager_set_safety_level(pBt->pPager, level);
770 54           return SQLITE_OK;
771             }
772              
773             /*
774             ** Get a reference to page1 of the database file. This will
775             ** also acquire a readlock on that file.
776             **
777             ** SQLITE_OK is returned on success. If the file is not a
778             ** well-formed database file, then SQLITE_CORRUPT is returned.
779             ** SQLITE_BUSY is returned if the database is locked. SQLITE_NOMEM
780             ** is returned if we run out of memory. SQLITE_PROTOCOL is returned
781             ** if there is a locking protocol violation.
782             */
783 259           static int lockBtree(Btree *pBt){
784             int rc;
785 259 50         if( pBt->page1 ) return SQLITE_OK;
786 259           rc = sqlitepager_get(pBt->pPager, 1, (void**)&pBt->page1);
787 259 50         if( rc!=SQLITE_OK ) return rc;
788              
789             /* Do some checking to help insure the file we opened really is
790             ** a valid database file.
791             */
792 259 100         if( sqlitepager_pagecount(pBt->pPager)>0 ){
793 202           PageOne *pP1 = pBt->page1;
794 202 50         if( strcmp(pP1->zMagic,zMagicHeader)!=0 ||
    50          
795 0 0         (pP1->iMagic!=MAGIC && swab32(pP1->iMagic)!=MAGIC) ){
796 0           rc = SQLITE_NOTADB;
797 0           goto page1_init_failed;
798             }
799 202           pBt->needSwab = pP1->iMagic!=MAGIC;
800             }
801 259           return rc;
802              
803             page1_init_failed:
804 0           sqlitepager_unref(pBt->page1);
805 0           pBt->page1 = 0;
806 0           return rc;
807             }
808              
809             /*
810             ** If there are no outstanding cursors and we are not in the middle
811             ** of a transaction but there is a read lock on the database, then
812             ** this routine unrefs the first page of the database file which
813             ** has the effect of releasing the read lock.
814             **
815             ** If there are any outstanding cursors, this routine is a no-op.
816             **
817             ** If there is a transaction in progress, this routine is a no-op.
818             */
819 425           static void unlockBtreeIfUnused(Btree *pBt){
820 425 100         if( pBt->inTrans==0 && pBt->pCursor==0 && pBt->page1!=0 ){
    100          
    50          
821 256           sqlitepager_unref(pBt->page1);
822 256           pBt->page1 = 0;
823 256           pBt->inTrans = 0;
824 256           pBt->inCkpt = 0;
825             }
826 425           }
827              
828             /*
829             ** Create a new database by initializing the first two pages of the
830             ** file.
831             */
832 156           static int newDatabase(Btree *pBt){
833             MemPage *pRoot;
834             PageOne *pP1;
835             int rc;
836 156 100         if( sqlitepager_pagecount(pBt->pPager)>1 ) return SQLITE_OK;
837 27           pP1 = pBt->page1;
838 27           rc = sqlitepager_write(pBt->page1);
839 27 50         if( rc ) return rc;
840 27           rc = sqlitepager_get(pBt->pPager, 2, (void**)&pRoot);
841 27 50         if( rc ) return rc;
842 27           rc = sqlitepager_write(pRoot);
843 27 50         if( rc ){
844 0           sqlitepager_unref(pRoot);
845 0           return rc;
846             }
847 27           strcpy(pP1->zMagic, zMagicHeader);
848             if( btree_native_byte_order ){
849 27           pP1->iMagic = MAGIC;
850 27           pBt->needSwab = 0;
851             }else{
852             pP1->iMagic = swab32(MAGIC);
853             pBt->needSwab = 1;
854             }
855 27           zeroPage(pBt, pRoot);
856 27           sqlitepager_unref(pRoot);
857 156           return SQLITE_OK;
858             }
859              
860             /*
861             ** Attempt to start a new transaction.
862             **
863             ** A transaction must be started before attempting any changes
864             ** to the database. None of the following routines will work
865             ** unless a transaction is started first:
866             **
867             ** sqliteBtreeCreateTable()
868             ** sqliteBtreeCreateIndex()
869             ** sqliteBtreeClearTable()
870             ** sqliteBtreeDropTable()
871             ** sqliteBtreeInsert()
872             ** sqliteBtreeDelete()
873             ** sqliteBtreeUpdateMeta()
874             */
875 156           static int fileBtreeBeginTrans(Btree *pBt){
876             int rc;
877 156 50         if( pBt->inTrans ) return SQLITE_ERROR;
878 156 50         if( pBt->readOnly ) return SQLITE_READONLY;
879 156 50         if( pBt->page1==0 ){
880 156           rc = lockBtree(pBt);
881 156 50         if( rc!=SQLITE_OK ){
882 0           return rc;
883             }
884             }
885 156           rc = sqlitepager_begin(pBt->page1);
886 156 50         if( rc==SQLITE_OK ){
887 156           rc = newDatabase(pBt);
888             }
889 156 50         if( rc==SQLITE_OK ){
890 156           pBt->inTrans = 1;
891 156           pBt->inCkpt = 0;
892             }else{
893 0           unlockBtreeIfUnused(pBt);
894             }
895 156           return rc;
896             }
897              
898             /*
899             ** Commit the transaction currently in progress.
900             **
901             ** This will release the write lock on the database file. If there
902             ** are no active cursors, it also releases the read lock.
903             */
904 143           static int fileBtreeCommit(Btree *pBt){
905             int rc;
906 143 50         rc = pBt->readOnly ? SQLITE_OK : sqlitepager_commit(pBt->pPager);
907 143           pBt->inTrans = 0;
908 143           pBt->inCkpt = 0;
909 143           unlockBtreeIfUnused(pBt);
910 143           return rc;
911             }
912              
913             /*
914             ** Rollback the transaction in progress. All cursors will be
915             ** invalided by this operation. Any attempt to use a cursor
916             ** that was open at the beginning of this operation will result
917             ** in an error.
918             **
919             ** This will release the write lock on the database file. If there
920             ** are no active cursors, it also releases the read lock.
921             */
922 10           static int fileBtreeRollback(Btree *pBt){
923             int rc;
924             BtCursor *pCur;
925 10 50         if( pBt->inTrans==0 ) return SQLITE_OK;
926 10           pBt->inTrans = 0;
927 10           pBt->inCkpt = 0;
928 10 50         rc = pBt->readOnly ? SQLITE_OK : sqlitepager_rollback(pBt->pPager);
929 10 50         for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){
930 0 0         if( pCur->pPage && pCur->pPage->isInit==0 ){
    0          
931 0           sqlitepager_unref(pCur->pPage);
932 0           pCur->pPage = 0;
933             }
934             }
935 10           unlockBtreeIfUnused(pBt);
936 10           return rc;
937             }
938              
939             /*
940             ** Set the checkpoint for the current transaction. The checkpoint serves
941             ** as a sub-transaction that can be rolled back independently of the
942             ** main transaction. You must start a transaction before starting a
943             ** checkpoint. The checkpoint is ended automatically if the transaction
944             ** commits or rolls back.
945             **
946             ** Only one checkpoint may be active at a time. It is an error to try
947             ** to start a new checkpoint if another checkpoint is already active.
948             */
949 0           static int fileBtreeBeginCkpt(Btree *pBt){
950             int rc;
951 0 0         if( !pBt->inTrans || pBt->inCkpt ){
    0          
952 0 0         return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
953             }
954 0 0         rc = pBt->readOnly ? SQLITE_OK : sqlitepager_ckpt_begin(pBt->pPager);
955 0           pBt->inCkpt = 1;
956 0           return rc;
957             }
958              
959              
960             /*
961             ** Commit a checkpoint to transaction currently in progress. If no
962             ** checkpoint is active, this is a no-op.
963             */
964 0           static int fileBtreeCommitCkpt(Btree *pBt){
965             int rc;
966 0 0         if( pBt->inCkpt && !pBt->readOnly ){
    0          
967 0           rc = sqlitepager_ckpt_commit(pBt->pPager);
968             }else{
969 0           rc = SQLITE_OK;
970             }
971 0           pBt->inCkpt = 0;
972 0           return rc;
973             }
974              
975             /*
976             ** Rollback the checkpoint to the current transaction. If there
977             ** is no active checkpoint or transaction, this routine is a no-op.
978             **
979             ** All cursors will be invalided by this operation. Any attempt
980             ** to use a cursor that was open at the beginning of this operation
981             ** will result in an error.
982             */
983 2           static int fileBtreeRollbackCkpt(Btree *pBt){
984             int rc;
985             BtCursor *pCur;
986 2 50         if( pBt->inCkpt==0 || pBt->readOnly ) return SQLITE_OK;
    0          
987 0           rc = sqlitepager_ckpt_rollback(pBt->pPager);
988 0 0         for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){
989 0 0         if( pCur->pPage && pCur->pPage->isInit==0 ){
    0          
990 0           sqlitepager_unref(pCur->pPage);
991 0           pCur->pPage = 0;
992             }
993             }
994 0           pBt->inCkpt = 0;
995 0           return rc;
996             }
997              
998             /*
999             ** Create a new cursor for the BTree whose root is on the page
1000             ** iTable. The act of acquiring a cursor gets a read lock on
1001             ** the database file.
1002             **
1003             ** If wrFlag==0, then the cursor can only be used for reading.
1004             ** If wrFlag==1, then the cursor can be used for reading or for
1005             ** writing if other conditions for writing are also met. These
1006             ** are the conditions that must be met in order for writing to
1007             ** be allowed:
1008             **
1009             ** 1: The cursor must have been opened with wrFlag==1
1010             **
1011             ** 2: No other cursors may be open with wrFlag==0 on the same table
1012             **
1013             ** 3: The database must be writable (not on read-only media)
1014             **
1015             ** 4: There must be an active transaction.
1016             **
1017             ** Condition 2 warrants further discussion. If any cursor is opened
1018             ** on a table with wrFlag==0, that prevents all other cursors from
1019             ** writing to that table. This is a kind of "read-lock". When a cursor
1020             ** is opened with wrFlag==0 it is guaranteed that the table will not
1021             ** change as long as the cursor is open. This allows the cursor to
1022             ** do a sequential scan of the table without having to worry about
1023             ** entries being inserted or deleted during the scan. Cursors should
1024             ** be opened with wrFlag==0 only if this read-lock property is needed.
1025             ** That is to say, cursors should be opened with wrFlag==0 only if they
1026             ** intend to use the sqliteBtreeNext() system call. All other cursors
1027             ** should be opened with wrFlag==1 even if they never really intend
1028             ** to write.
1029             **
1030             ** No checking is done to make sure that page iTable really is the
1031             ** root page of a b-tree. If it is not, then the cursor acquired
1032             ** will not work correctly.
1033             */
1034             static
1035 272           int fileBtreeCursor(Btree *pBt, int iTable, int wrFlag, BtCursor **ppCur){
1036             int rc;
1037             BtCursor *pCur, *pRing;
1038              
1039 272 50         if( pBt->readOnly && wrFlag ){
    0          
1040 0           *ppCur = 0;
1041 0           return SQLITE_READONLY;
1042             }
1043 272 100         if( pBt->page1==0 ){
1044 103           rc = lockBtree(pBt);
1045 103 50         if( rc!=SQLITE_OK ){
1046 0           *ppCur = 0;
1047 0           return rc;
1048             }
1049             }
1050 272           pCur = sqliteMalloc( sizeof(*pCur) );
1051 272 50         if( pCur==0 ){
1052 0           rc = SQLITE_NOMEM;
1053 0           goto create_cursor_exception;
1054             }
1055 272           pCur->pgnoRoot = (Pgno)iTable;
1056 272           rc = sqlitepager_get(pBt->pPager, pCur->pgnoRoot, (void**)&pCur->pPage);
1057 272 50         if( rc!=SQLITE_OK ){
1058 0           goto create_cursor_exception;
1059             }
1060 272           rc = initPage(pBt, pCur->pPage, pCur->pgnoRoot, 0);
1061 272 50         if( rc!=SQLITE_OK ){
1062 0           goto create_cursor_exception;
1063             }
1064 272           pCur->pOps = &sqliteBtreeCursorOps;
1065 272           pCur->pBt = pBt;
1066 272           pCur->wrFlag = wrFlag;
1067 272           pCur->idx = 0;
1068 272           pCur->eSkip = SKIP_INVALID;
1069 272           pCur->pNext = pBt->pCursor;
1070 272 100         if( pCur->pNext ){
1071 59           pCur->pNext->pPrev = pCur;
1072             }
1073 272           pCur->pPrev = 0;
1074 272           pRing = pBt->pCursor;
1075 278 100         while( pRing && pRing->pgnoRoot!=pCur->pgnoRoot ){ pRing = pRing->pNext; }
    100          
1076 272 100         if( pRing ){
1077 54           pCur->pShared = pRing->pShared;
1078 54           pRing->pShared = pCur;
1079             }else{
1080 218           pCur->pShared = pCur;
1081             }
1082 272           pBt->pCursor = pCur;
1083 272           *ppCur = pCur;
1084 272           return SQLITE_OK;
1085              
1086             create_cursor_exception:
1087 0           *ppCur = 0;
1088 0 0         if( pCur ){
1089 0 0         if( pCur->pPage ) sqlitepager_unref(pCur->pPage);
1090 0           sqliteFree(pCur);
1091             }
1092 0           unlockBtreeIfUnused(pBt);
1093 0           return rc;
1094             }
1095              
1096             /*
1097             ** Close a cursor. The read lock on the database file is released
1098             ** when the last cursor is closed.
1099             */
1100 272           static int fileBtreeCloseCursor(BtCursor *pCur){
1101 272           Btree *pBt = pCur->pBt;
1102 272 100         if( pCur->pPrev ){
1103 3           pCur->pPrev->pNext = pCur->pNext;
1104             }else{
1105 269           pBt->pCursor = pCur->pNext;
1106             }
1107 272 100         if( pCur->pNext ){
1108 56           pCur->pNext->pPrev = pCur->pPrev;
1109             }
1110 272 50         if( pCur->pPage ){
1111 272           sqlitepager_unref(pCur->pPage);
1112             }
1113 272 100         if( pCur->pShared!=pCur ){
1114 54           BtCursor *pRing = pCur->pShared;
1115 54 50         while( pRing->pShared!=pCur ){ pRing = pRing->pShared; }
1116 54           pRing->pShared = pCur->pShared;
1117             }
1118 272           unlockBtreeIfUnused(pBt);
1119 272           sqliteFree(pCur);
1120 272           return SQLITE_OK;
1121             }
1122              
1123             /*
1124             ** Make a temporary cursor by filling in the fields of pTempCur.
1125             ** The temporary cursor is not on the cursor list for the Btree.
1126             */
1127 0           static void getTempCursor(BtCursor *pCur, BtCursor *pTempCur){
1128 0           memcpy(pTempCur, pCur, sizeof(*pCur));
1129 0           pTempCur->pNext = 0;
1130 0           pTempCur->pPrev = 0;
1131 0 0         if( pTempCur->pPage ){
1132 0           sqlitepager_ref(pTempCur->pPage);
1133             }
1134 0           }
1135              
1136             /*
1137             ** Delete a temporary cursor such as was made by the CreateTemporaryCursor()
1138             ** function above.
1139             */
1140 0           static void releaseTempCursor(BtCursor *pCur){
1141 0 0         if( pCur->pPage ){
1142 0           sqlitepager_unref(pCur->pPage);
1143             }
1144 0           }
1145              
1146             /*
1147             ** Set *pSize to the number of bytes of key in the entry the
1148             ** cursor currently points to. Always return SQLITE_OK.
1149             ** Failure is not possible. If the cursor is not currently
1150             ** pointing to an entry (which can happen, for example, if
1151             ** the database is empty) then *pSize is set to 0.
1152             */
1153 4           static int fileBtreeKeySize(BtCursor *pCur, int *pSize){
1154             Cell *pCell;
1155             MemPage *pPage;
1156              
1157 4           pPage = pCur->pPage;
1158             assert( pPage!=0 );
1159 4 100         if( pCur->idx >= pPage->nCell ){
1160 3           *pSize = 0;
1161             }else{
1162 1           pCell = pPage->apCell[pCur->idx];
1163 1 50         *pSize = NKEY(pCur->pBt, pCell->h);
1164             }
1165 4           return SQLITE_OK;
1166             }
1167              
1168             /*
1169             ** Read payload information from the entry that the pCur cursor is
1170             ** pointing to. Begin reading the payload at "offset" and read
1171             ** a total of "amt" bytes. Put the result in zBuf.
1172             **
1173             ** This routine does not make a distinction between key and data.
1174             ** It just reads bytes from the payload area.
1175             */
1176 798           static int getPayload(BtCursor *pCur, int offset, int amt, char *zBuf){
1177             char *aPayload;
1178             Pgno nextPage;
1179             int rc;
1180 798           Btree *pBt = pCur->pBt;
1181             assert( pCur!=0 && pCur->pPage!=0 );
1182             assert( pCur->idx>=0 && pCur->idxpPage->nCell );
1183 798           aPayload = pCur->pPage->apCell[pCur->idx]->aPayload;
1184 798 50         if( offset
1185 798           int a = amt;
1186 798 100         if( a+offset>MX_LOCAL_PAYLOAD ){
1187 1           a = MX_LOCAL_PAYLOAD - offset;
1188             }
1189 798           memcpy(zBuf, &aPayload[offset], a);
1190 798 100         if( a==amt ){
1191 797           return SQLITE_OK;
1192             }
1193 1           offset = 0;
1194 1           zBuf += a;
1195 1           amt -= a;
1196             }else{
1197 0           offset -= MX_LOCAL_PAYLOAD;
1198             }
1199 1 50         if( amt>0 ){
1200 1 50         nextPage = SWAB32(pBt, pCur->pPage->apCell[pCur->idx]->ovfl);
1201             }
1202 34 100         while( amt>0 && nextPage ){
    50          
1203             OverflowPage *pOvfl;
1204 33           rc = sqlitepager_get(pBt->pPager, nextPage, (void**)&pOvfl);
1205 33 50         if( rc!=0 ){
1206 0           return rc;
1207             }
1208 33 50         nextPage = SWAB32(pBt, pOvfl->iNext);
1209 33 50         if( offset
1210 33           int a = amt;
1211 33 100         if( a + offset > OVERFLOW_SIZE ){
1212 32           a = OVERFLOW_SIZE - offset;
1213             }
1214 33           memcpy(zBuf, &pOvfl->aPayload[offset], a);
1215 33           offset = 0;
1216 33           amt -= a;
1217 33           zBuf += a;
1218             }else{
1219 0           offset -= OVERFLOW_SIZE;
1220             }
1221 33           sqlitepager_unref(pOvfl);
1222             }
1223 1 50         if( amt>0 ){
1224 0           return SQLITE_CORRUPT;
1225             }
1226 1           return SQLITE_OK;
1227             }
1228              
1229             /*
1230             ** Read part of the key associated with cursor pCur. A maximum
1231             ** of "amt" bytes will be transfered into zBuf[]. The transfer
1232             ** begins at "offset". The number of bytes actually read is
1233             ** returned.
1234             **
1235             ** Change: It used to be that the amount returned will be smaller
1236             ** than the amount requested if there are not enough bytes in the key
1237             ** to satisfy the request. But now, it must be the case that there
1238             ** is enough data available to satisfy the request. If not, an exception
1239             ** is raised. The change was made in an effort to boost performance
1240             ** by eliminating unneeded tests.
1241             */
1242 44           static int fileBtreeKey(BtCursor *pCur, int offset, int amt, char *zBuf){
1243             MemPage *pPage;
1244              
1245             assert( amt>=0 );
1246             assert( offset>=0 );
1247             assert( pCur->pPage!=0 );
1248 44           pPage = pCur->pPage;
1249 44 50         if( pCur->idx >= pPage->nCell ){
1250 0           return 0;
1251             }
1252             assert( amt+offset <= NKEY(pCur->pBt, pPage->apCell[pCur->idx]->h) );
1253 44           getPayload(pCur, offset, amt, zBuf);
1254 44           return amt;
1255             }
1256              
1257             /*
1258             ** Set *pSize to the number of bytes of data in the entry the
1259             ** cursor currently points to. Always return SQLITE_OK.
1260             ** Failure is not possible. If the cursor is not currently
1261             ** pointing to an entry (which can happen, for example, if
1262             ** the database is empty) then *pSize is set to 0.
1263             */
1264 386           static int fileBtreeDataSize(BtCursor *pCur, int *pSize){
1265             Cell *pCell;
1266             MemPage *pPage;
1267              
1268 386           pPage = pCur->pPage;
1269             assert( pPage!=0 );
1270 386 50         if( pCur->idx >= pPage->nCell ){
1271 0           *pSize = 0;
1272             }else{
1273 386           pCell = pPage->apCell[pCur->idx];
1274 386 50         *pSize = NDATA(pCur->pBt, pCell->h);
1275             }
1276 386           return SQLITE_OK;
1277             }
1278              
1279             /*
1280             ** Read part of the data associated with cursor pCur. A maximum
1281             ** of "amt" bytes will be transfered into zBuf[]. The transfer
1282             ** begins at "offset". The number of bytes actually read is
1283             ** returned. The amount returned will be smaller than the
1284             ** amount requested if there are not enough bytes in the data
1285             ** to satisfy the request.
1286             */
1287 754           static int fileBtreeData(BtCursor *pCur, int offset, int amt, char *zBuf){
1288             Cell *pCell;
1289             MemPage *pPage;
1290              
1291             assert( amt>=0 );
1292             assert( offset>=0 );
1293             assert( pCur->pPage!=0 );
1294 754           pPage = pCur->pPage;
1295 754 50         if( pCur->idx >= pPage->nCell ){
1296 0           return 0;
1297             }
1298 754           pCell = pPage->apCell[pCur->idx];
1299             assert( amt+offset <= NDATA(pCur->pBt, pCell->h) );
1300 754 50         getPayload(pCur, offset + NKEY(pCur->pBt, pCell->h), amt, zBuf);
1301 754           return amt;
1302             }
1303              
1304             /*
1305             ** Compare an external key against the key on the entry that pCur points to.
1306             **
1307             ** The external key is pKey and is nKey bytes long. The last nIgnore bytes
1308             ** of the key associated with pCur are ignored, as if they do not exist.
1309             ** (The normal case is for nIgnore to be zero in which case the entire
1310             ** internal key is used in the comparison.)
1311             **
1312             ** The comparison result is written to *pRes as follows:
1313             **
1314             ** *pRes<0 This means pCur
1315             **
1316             ** *pRes==0 This means pCur==pKey for all nKey bytes
1317             **
1318             ** *pRes>0 This means pCur>pKey
1319             **
1320             ** When one key is an exact prefix of the other, the shorter key is
1321             ** considered less than the longer one. In order to be equal the
1322             ** keys must be exactly the same length. (The length of the pCur key
1323             ** is the actual key length minus nIgnore bytes.)
1324             */
1325 182           static int fileBtreeKeyCompare(
1326             BtCursor *pCur, /* Pointer to entry to compare against */
1327             const void *pKey, /* Key to compare against entry that pCur points to */
1328             int nKey, /* Number of bytes in pKey */
1329             int nIgnore, /* Ignore this many bytes at the end of pCur */
1330             int *pResult /* Write the result here */
1331             ){
1332             Pgno nextPage;
1333             int n, c, rc, nLocal;
1334             Cell *pCell;
1335 182           Btree *pBt = pCur->pBt;
1336 182           const char *zKey = (const char*)pKey;
1337              
1338             assert( pCur->pPage );
1339             assert( pCur->idx>=0 && pCur->idxpPage->nCell );
1340 182           pCell = pCur->pPage->apCell[pCur->idx];
1341 182 50         nLocal = NKEY(pBt, pCell->h) - nIgnore;
1342 182 50         if( nLocal<0 ) nLocal = 0;
1343 182           n = nKey
1344 182 50         if( n>MX_LOCAL_PAYLOAD ){
1345 0           n = MX_LOCAL_PAYLOAD;
1346             }
1347 182           c = memcmp(pCell->aPayload, zKey, n);
1348 182 100         if( c!=0 ){
1349 152           *pResult = c;
1350 152           return SQLITE_OK;
1351             }
1352 30           zKey += n;
1353 30           nKey -= n;
1354 30           nLocal -= n;
1355 30 50         nextPage = SWAB32(pBt, pCell->ovfl);
1356 30 50         while( nKey>0 && nLocal>0 ){
    0          
1357             OverflowPage *pOvfl;
1358 0 0         if( nextPage==0 ){
1359 0           return SQLITE_CORRUPT;
1360             }
1361 0           rc = sqlitepager_get(pBt->pPager, nextPage, (void**)&pOvfl);
1362 0 0         if( rc ){
1363 0           return rc;
1364             }
1365 0 0         nextPage = SWAB32(pBt, pOvfl->iNext);
1366 0           n = nKey
1367 0 0         if( n>OVERFLOW_SIZE ){
1368 0           n = OVERFLOW_SIZE;
1369             }
1370 0           c = memcmp(pOvfl->aPayload, zKey, n);
1371 0           sqlitepager_unref(pOvfl);
1372 0 0         if( c!=0 ){
1373 0           *pResult = c;
1374 0           return SQLITE_OK;
1375             }
1376 0           nKey -= n;
1377 0           nLocal -= n;
1378 0           zKey += n;
1379             }
1380 30 50         if( c==0 ){
1381 30           c = nLocal - nKey;
1382             }
1383 30           *pResult = c;
1384 30           return SQLITE_OK;
1385             }
1386              
1387             /*
1388             ** Move the cursor down to a new child page. The newPgno argument is the
1389             ** page number of the child page in the byte order of the disk image.
1390             */
1391 4           static int moveToChild(BtCursor *pCur, int newPgno){
1392             int rc;
1393             MemPage *pNewPage;
1394 4           Btree *pBt = pCur->pBt;
1395              
1396 4 50         newPgno = SWAB32(pBt, newPgno);
1397 4           rc = sqlitepager_get(pBt->pPager, newPgno, (void**)&pNewPage);
1398 4 50         if( rc ) return rc;
1399 4           rc = initPage(pBt, pNewPage, newPgno, pCur->pPage);
1400 4 50         if( rc ) return rc;
1401             assert( pCur->idx>=pCur->pPage->nCell
1402             || pCur->pPage->apCell[pCur->idx]->h.leftChild==SWAB32(pBt,newPgno) );
1403             assert( pCur->idxpPage->nCell
1404             || pCur->pPage->u.hdr.rightChild==SWAB32(pBt,newPgno) );
1405 4           pNewPage->idxParent = pCur->idx;
1406 4           pCur->pPage->idxShift = 0;
1407 4           sqlitepager_unref(pCur->pPage);
1408 4           pCur->pPage = pNewPage;
1409 4           pCur->idx = 0;
1410 4 50         if( pNewPage->nCell<1 ){
1411 0           return SQLITE_CORRUPT;
1412             }
1413 4           return SQLITE_OK;
1414             }
1415              
1416             /*
1417             ** Move the cursor up to the parent page.
1418             **
1419             ** pCur->idx is set to the cell index that contains the pointer
1420             ** to the page we are coming from. If we are coming from the
1421             ** right-most child page then pCur->idx is set to one more than
1422             ** the largest cell index.
1423             */
1424 4           static void moveToParent(BtCursor *pCur){
1425             Pgno oldPgno;
1426             MemPage *pParent;
1427             MemPage *pPage;
1428             int idxParent;
1429 4           pPage = pCur->pPage;
1430             assert( pPage!=0 );
1431 4           pParent = pPage->pParent;
1432             assert( pParent!=0 );
1433 4           idxParent = pPage->idxParent;
1434 4           sqlitepager_ref(pParent);
1435 4           sqlitepager_unref(pPage);
1436 4           pCur->pPage = pParent;
1437             assert( pParent->idxShift==0 );
1438 4 50         if( pParent->idxShift==0 ){
1439 4           pCur->idx = idxParent;
1440             #ifndef NDEBUG
1441             /* Verify that pCur->idx is the correct index to point back to the child
1442             ** page we just came from
1443             */
1444             oldPgno = SWAB32(pCur->pBt, sqlitepager_pagenumber(pPage));
1445             if( pCur->idxnCell ){
1446             assert( pParent->apCell[idxParent]->h.leftChild==oldPgno );
1447             }else{
1448             assert( pParent->u.hdr.rightChild==oldPgno );
1449             }
1450             #endif
1451             }else{
1452             /* The MemPage.idxShift flag indicates that cell indices might have
1453             ** changed since idxParent was set and hence idxParent might be out
1454             ** of date. So recompute the parent cell index by scanning all cells
1455             ** and locating the one that points to the child we just came from.
1456             */
1457             int i;
1458 0           pCur->idx = pParent->nCell;
1459 0 0         oldPgno = SWAB32(pCur->pBt, sqlitepager_pagenumber(pPage));
1460 0 0         for(i=0; inCell; i++){
1461 0 0         if( pParent->apCell[i]->h.leftChild==oldPgno ){
1462 0           pCur->idx = i;
1463 0           break;
1464             }
1465             }
1466             }
1467 4           }
1468              
1469             /*
1470             ** Move the cursor to the root page
1471             */
1472 342           static int moveToRoot(BtCursor *pCur){
1473             MemPage *pNew;
1474             int rc;
1475 342           Btree *pBt = pCur->pBt;
1476              
1477 342           rc = sqlitepager_get(pBt->pPager, pCur->pgnoRoot, (void**)&pNew);
1478 342 50         if( rc ) return rc;
1479 342           rc = initPage(pBt, pNew, pCur->pgnoRoot, 0);
1480 342 50         if( rc ) return rc;
1481 342           sqlitepager_unref(pCur->pPage);
1482 342           pCur->pPage = pNew;
1483 342           pCur->idx = 0;
1484 342           return SQLITE_OK;
1485             }
1486              
1487             /*
1488             ** Move the cursor down to the left-most leaf entry beneath the
1489             ** entry to which it is currently pointing.
1490             */
1491 71           static int moveToLeftmost(BtCursor *pCur){
1492             Pgno pgno;
1493             int rc;
1494              
1495 73 100         while( (pgno = pCur->pPage->apCell[pCur->idx]->h.leftChild)!=0 ){
1496 2           rc = moveToChild(pCur, pgno);
1497 2 50         if( rc ) return rc;
1498             }
1499 71           return SQLITE_OK;
1500             }
1501              
1502             /*
1503             ** Move the cursor down to the right-most leaf entry beneath the
1504             ** page to which it is currently pointing. Notice the difference
1505             ** between moveToLeftmost() and moveToRightmost(). moveToLeftmost()
1506             ** finds the left-most entry beneath the *entry* whereas moveToRightmost()
1507             ** finds the right-most entry beneath the *page*.
1508             */
1509 35           static int moveToRightmost(BtCursor *pCur){
1510             Pgno pgno;
1511             int rc;
1512              
1513 35 50         while( (pgno = pCur->pPage->u.hdr.rightChild)!=0 ){
1514 0           pCur->idx = pCur->pPage->nCell;
1515 0           rc = moveToChild(pCur, pgno);
1516 0 0         if( rc ) return rc;
1517             }
1518 35           pCur->idx = pCur->pPage->nCell - 1;
1519 35           return SQLITE_OK;
1520             }
1521              
1522             /* Move the cursor to the first entry in the table. Return SQLITE_OK
1523             ** on success. Set *pRes to 0 if the cursor actually points to something
1524             ** or set *pRes to 1 if the table is empty.
1525             */
1526 141           static int fileBtreeFirst(BtCursor *pCur, int *pRes){
1527             int rc;
1528 141 50         if( pCur->pPage==0 ) return SQLITE_ABORT;
1529 141           rc = moveToRoot(pCur);
1530 141 50         if( rc ) return rc;
1531 141 100         if( pCur->pPage->nCell==0 ){
1532 72           *pRes = 1;
1533 72           return SQLITE_OK;
1534             }
1535 69           *pRes = 0;
1536 69           rc = moveToLeftmost(pCur);
1537 69           pCur->eSkip = SKIP_NONE;
1538 69           return rc;
1539             }
1540              
1541             /* Move the cursor to the last entry in the table. Return SQLITE_OK
1542             ** on success. Set *pRes to 0 if the cursor actually points to something
1543             ** or set *pRes to 1 if the table is empty.
1544             */
1545 70           static int fileBtreeLast(BtCursor *pCur, int *pRes){
1546             int rc;
1547 70 50         if( pCur->pPage==0 ) return SQLITE_ABORT;
1548 70           rc = moveToRoot(pCur);
1549 70 50         if( rc ) return rc;
1550             assert( pCur->pPage->isInit );
1551 70 100         if( pCur->pPage->nCell==0 ){
1552 35           *pRes = 1;
1553 35           return SQLITE_OK;
1554             }
1555 35           *pRes = 0;
1556 35           rc = moveToRightmost(pCur);
1557 35           pCur->eSkip = SKIP_NONE;
1558 35           return rc;
1559             }
1560              
1561             /* Move the cursor so that it points to an entry near pKey.
1562             ** Return a success code.
1563             **
1564             ** If an exact match is not found, then the cursor is always
1565             ** left pointing at a leaf page which would hold the entry if it
1566             ** were present. The cursor might point to an entry that comes
1567             ** before or after the key.
1568             **
1569             ** The result of comparing the key with the entry to which the
1570             ** cursor is left pointing is stored in pCur->iMatch. The same
1571             ** value is also written to *pRes if pRes!=NULL. The meaning of
1572             ** this value is as follows:
1573             **
1574             ** *pRes<0 The cursor is left pointing at an entry that
1575             ** is smaller than pKey or if the table is empty
1576             ** and the cursor is therefore left point to nothing.
1577             **
1578             ** *pRes==0 The cursor is left pointing at an entry that
1579             ** exactly matches pKey.
1580             **
1581             ** *pRes>0 The cursor is left pointing at an entry that
1582             ** is larger than pKey.
1583             */
1584             static
1585 131           int fileBtreeMoveto(BtCursor *pCur, const void *pKey, int nKey, int *pRes){
1586             int rc;
1587 131 50         if( pCur->pPage==0 ) return SQLITE_ABORT;
1588 131           pCur->eSkip = SKIP_NONE;
1589 131           rc = moveToRoot(pCur);
1590 131 50         if( rc ) return rc;
1591             for(;;){
1592             int lwr, upr;
1593             Pgno chldPg;
1594 131           MemPage *pPage = pCur->pPage;
1595 131           int c = -1; /* pRes return if table is empty must be -1 */
1596 131           lwr = 0;
1597 131           upr = pPage->nCell-1;
1598 283 100         while( lwr<=upr ){
1599 180           pCur->idx = (lwr+upr)/2;
1600 180           rc = fileBtreeKeyCompare(pCur, pKey, nKey, 0, &c);
1601 311 50         if( rc ) return rc;
1602 180 100         if( c==0 ){
1603 28           pCur->iMatch = c;
1604 28 50         if( pRes ) *pRes = 0;
1605 28           return SQLITE_OK;
1606             }
1607 152 100         if( c<0 ){
1608 151           lwr = pCur->idx+1;
1609             }else{
1610 1           upr = pCur->idx-1;
1611             }
1612             }
1613             assert( lwr==upr+1 );
1614             assert( pPage->isInit );
1615 103 100         if( lwr>=pPage->nCell ){
1616 102           chldPg = pPage->u.hdr.rightChild;
1617             }else{
1618 1           chldPg = pPage->apCell[lwr]->h.leftChild;
1619             }
1620 103 50         if( chldPg==0 ){
1621 103           pCur->iMatch = c;
1622 103 50         if( pRes ) *pRes = c;
1623 103           return SQLITE_OK;
1624             }
1625 0           pCur->idx = lwr;
1626 0           rc = moveToChild(pCur, chldPg);
1627 0 0         if( rc ) return rc;
1628 0           }
1629             /* NOT REACHED */
1630             }
1631              
1632             /*
1633             ** Advance the cursor to the next entry in the database. If
1634             ** successful then set *pRes=0. If the cursor
1635             ** was already pointing to the last entry in the database before
1636             ** this routine was called, then set *pRes=1.
1637             */
1638 170           static int fileBtreeNext(BtCursor *pCur, int *pRes){
1639             int rc;
1640 170           MemPage *pPage = pCur->pPage;
1641             assert( pRes!=0 );
1642 170 50         if( pPage==0 ){
1643 0           *pRes = 1;
1644 0           return SQLITE_ABORT;
1645             }
1646             assert( pPage->isInit );
1647             assert( pCur->eSkip!=SKIP_INVALID );
1648 170 100         if( pPage->nCell==0 ){
1649 13           *pRes = 1;
1650 13           return SQLITE_OK;
1651             }
1652             assert( pCur->idxnCell );
1653 157 50         if( pCur->eSkip==SKIP_NEXT ){
1654 0           pCur->eSkip = SKIP_NONE;
1655 0           *pRes = 0;
1656 0           return SQLITE_OK;
1657             }
1658 157           pCur->eSkip = SKIP_NONE;
1659 157           pCur->idx++;
1660 157 100         if( pCur->idx>=pPage->nCell ){
1661 59 100         if( pPage->u.hdr.rightChild ){
1662 2           rc = moveToChild(pCur, pPage->u.hdr.rightChild);
1663 2 50         if( rc ) return rc;
1664 2           rc = moveToLeftmost(pCur);
1665 2           *pRes = 0;
1666 2           return rc;
1667             }
1668             do{
1669 59 100         if( pPage->pParent==0 ){
1670 55           *pRes = 1;
1671 55           return SQLITE_OK;
1672             }
1673 4           moveToParent(pCur);
1674 4           pPage = pCur->pPage;
1675 4 100         }while( pCur->idx>=pPage->nCell );
1676 2           *pRes = 0;
1677 2           return SQLITE_OK;
1678             }
1679 98           *pRes = 0;
1680 98 50         if( pPage->u.hdr.rightChild==0 ){
1681 98           return SQLITE_OK;
1682             }
1683 0           rc = moveToLeftmost(pCur);
1684 0           return rc;
1685             }
1686              
1687             /*
1688             ** Step the cursor to the back to the previous entry in the database. If
1689             ** successful then set *pRes=0. If the cursor
1690             ** was already pointing to the first entry in the database before
1691             ** this routine was called, then set *pRes=1.
1692             */
1693 0           static int fileBtreePrevious(BtCursor *pCur, int *pRes){
1694             int rc;
1695             Pgno pgno;
1696             MemPage *pPage;
1697 0           pPage = pCur->pPage;
1698 0 0         if( pPage==0 ){
1699 0           *pRes = 1;
1700 0           return SQLITE_ABORT;
1701             }
1702             assert( pPage->isInit );
1703             assert( pCur->eSkip!=SKIP_INVALID );
1704 0 0         if( pPage->nCell==0 ){
1705 0           *pRes = 1;
1706 0           return SQLITE_OK;
1707             }
1708 0 0         if( pCur->eSkip==SKIP_PREV ){
1709 0           pCur->eSkip = SKIP_NONE;
1710 0           *pRes = 0;
1711 0           return SQLITE_OK;
1712             }
1713 0           pCur->eSkip = SKIP_NONE;
1714             assert( pCur->idx>=0 );
1715 0 0         if( (pgno = pPage->apCell[pCur->idx]->h.leftChild)!=0 ){
1716 0           rc = moveToChild(pCur, pgno);
1717 0 0         if( rc ) return rc;
1718 0           rc = moveToRightmost(pCur);
1719             }else{
1720 0 0         while( pCur->idx==0 ){
1721 0 0         if( pPage->pParent==0 ){
1722 0 0         if( pRes ) *pRes = 1;
1723 0           return SQLITE_OK;
1724             }
1725 0           moveToParent(pCur);
1726 0           pPage = pCur->pPage;
1727             }
1728 0           pCur->idx--;
1729 0           rc = SQLITE_OK;
1730             }
1731 0           *pRes = 0;
1732 0           return rc;
1733             }
1734              
1735             /*
1736             ** Allocate a new page from the database file.
1737             **
1738             ** The new page is marked as dirty. (In other words, sqlitepager_write()
1739             ** has already been called on the new page.) The new page has also
1740             ** been referenced and the calling routine is responsible for calling
1741             ** sqlitepager_unref() on the new page when it is done.
1742             **
1743             ** SQLITE_OK is returned on success. Any other return value indicates
1744             ** an error. *ppPage and *pPgno are undefined in the event of an error.
1745             ** Do not invoke sqlitepager_unref() on *ppPage if an error is returned.
1746             **
1747             ** If the "nearby" parameter is not 0, then a (feeble) effort is made to
1748             ** locate a page close to the page number "nearby". This can be used in an
1749             ** attempt to keep related pages close to each other in the database file,
1750             ** which in turn can make database access faster.
1751             */
1752 62           static int allocatePage(Btree *pBt, MemPage **ppPage, Pgno *pPgno, Pgno nearby){
1753 62           PageOne *pPage1 = pBt->page1;
1754             int rc;
1755 62 100         if( pPage1->freeList ){
1756             OverflowPage *pOvfl;
1757             FreelistInfo *pInfo;
1758              
1759 10           rc = sqlitepager_write(pPage1);
1760 10 50         if( rc ) return rc;
1761 10 50         SWAB_ADD(pBt, pPage1->nFree, -1);
1762 10 50         rc = sqlitepager_get(pBt->pPager, SWAB32(pBt, pPage1->freeList),
1763             (void**)&pOvfl);
1764 10 50         if( rc ) return rc;
1765 10           rc = sqlitepager_write(pOvfl);
1766 10 50         if( rc ){
1767 0           sqlitepager_unref(pOvfl);
1768 0           return rc;
1769             }
1770 10           pInfo = (FreelistInfo*)pOvfl->aPayload;
1771 10 100         if( pInfo->nFree==0 ){
1772 3 50         *pPgno = SWAB32(pBt, pPage1->freeList);
1773 3           pPage1->freeList = pOvfl->iNext;
1774 3           *ppPage = (MemPage*)pOvfl;
1775             }else{
1776             int closest, n;
1777 7 50         n = SWAB32(pBt, pInfo->nFree);
1778 7 50         if( n>1 && nearby>0 ){
    50          
1779             int i, dist;
1780 0           closest = 0;
1781 0 0         dist = SWAB32(pBt, pInfo->aFree[0]) - nearby;
1782 0 0         if( dist<0 ) dist = -dist;
1783 0 0         for(i=1; i
1784 0 0         int d2 = SWAB32(pBt, pInfo->aFree[i]) - nearby;
1785 0 0         if( d2<0 ) d2 = -d2;
1786 0 0         if( d2
1787             }
1788             }else{
1789 7           closest = 0;
1790             }
1791 7 50         SWAB_ADD(pBt, pInfo->nFree, -1);
1792 7 50         *pPgno = SWAB32(pBt, pInfo->aFree[closest]);
1793 7           pInfo->aFree[closest] = pInfo->aFree[n-1];
1794 7           rc = sqlitepager_get(pBt->pPager, *pPgno, (void**)ppPage);
1795 7           sqlitepager_unref(pOvfl);
1796 7 50         if( rc==SQLITE_OK ){
1797 7           sqlitepager_dont_rollback(*ppPage);
1798 10           rc = sqlitepager_write(*ppPage);
1799             }
1800             }
1801             }else{
1802 52           *pPgno = sqlitepager_pagecount(pBt->pPager) + 1;
1803 52           rc = sqlitepager_get(pBt->pPager, *pPgno, (void**)ppPage);
1804 52 50         if( rc ) return rc;
1805 52           rc = sqlitepager_write(*ppPage);
1806             }
1807 62           return rc;
1808             }
1809              
1810             /*
1811             ** Add a page of the database file to the freelist. Either pgno or
1812             ** pPage but not both may be 0.
1813             **
1814             ** sqlitepager_unref() is NOT called for pPage.
1815             */
1816 44           static int freePage(Btree *pBt, void *pPage, Pgno pgno){
1817 44           PageOne *pPage1 = pBt->page1;
1818 44           OverflowPage *pOvfl = (OverflowPage*)pPage;
1819             int rc;
1820 44           int needUnref = 0;
1821             MemPage *pMemPage;
1822              
1823 44 50         if( pgno==0 ){
1824             assert( pOvfl!=0 );
1825 0           pgno = sqlitepager_pagenumber(pOvfl);
1826             }
1827             assert( pgno>2 );
1828             assert( sqlitepager_pagenumber(pOvfl)==pgno );
1829 44           pMemPage = (MemPage*)pPage;
1830 44           pMemPage->isInit = 0;
1831 44 50         if( pMemPage->pParent ){
1832 0           sqlitepager_unref(pMemPage->pParent);
1833 0           pMemPage->pParent = 0;
1834             }
1835 44           rc = sqlitepager_write(pPage1);
1836 44 50         if( rc ){
1837 0           return rc;
1838             }
1839 44 50         SWAB_ADD(pBt, pPage1->nFree, 1);
1840 44 50         if( pPage1->nFree!=0 && pPage1->freeList!=0 ){
    100          
1841             OverflowPage *pFreeIdx;
1842 40 50         rc = sqlitepager_get(pBt->pPager, SWAB32(pBt, pPage1->freeList),
1843             (void**)&pFreeIdx);
1844 40 50         if( rc==SQLITE_OK ){
1845 40           FreelistInfo *pInfo = (FreelistInfo*)pFreeIdx->aPayload;
1846 40 50         int n = SWAB32(pBt, pInfo->nFree);
1847 40 50         if( n<(sizeof(pInfo->aFree)/sizeof(pInfo->aFree[0])) ){
1848 40           rc = sqlitepager_write(pFreeIdx);
1849 40 50         if( rc==SQLITE_OK ){
1850 40 50         pInfo->aFree[n] = SWAB32(pBt, pgno);
1851 40 50         SWAB_ADD(pBt, pInfo->nFree, 1);
1852 40           sqlitepager_unref(pFreeIdx);
1853 40           sqlitepager_dont_write(pBt->pPager, pgno);
1854 40           return rc;
1855             }
1856             }
1857 0           sqlitepager_unref(pFreeIdx);
1858             }
1859             }
1860 4 50         if( pOvfl==0 ){
1861             assert( pgno>0 );
1862 0           rc = sqlitepager_get(pBt->pPager, pgno, (void**)&pOvfl);
1863 0 0         if( rc ) return rc;
1864 0           needUnref = 1;
1865             }
1866 4           rc = sqlitepager_write(pOvfl);
1867 4 50         if( rc ){
1868 0 0         if( needUnref ) sqlitepager_unref(pOvfl);
1869 0           return rc;
1870             }
1871 4           pOvfl->iNext = pPage1->freeList;
1872 4 50         pPage1->freeList = SWAB32(pBt, pgno);
1873 4           memset(pOvfl->aPayload, 0, OVERFLOW_SIZE);
1874 4 50         if( needUnref ) rc = sqlitepager_unref(pOvfl);
1875 44           return rc;
1876             }
1877              
1878             /*
1879             ** Erase all the data out of a cell. This involves returning overflow
1880             ** pages back the freelist.
1881             */
1882 58           static int clearCell(Btree *pBt, Cell *pCell){
1883 58           Pager *pPager = pBt->pPager;
1884             OverflowPage *pOvfl;
1885             Pgno ovfl, nextOvfl;
1886             int rc;
1887              
1888 58 50         if( NKEY(pBt, pCell->h) + NDATA(pBt, pCell->h) <= MX_LOCAL_PAYLOAD ){
    50          
    100          
1889 57           return SQLITE_OK;
1890             }
1891 1 50         ovfl = SWAB32(pBt, pCell->ovfl);
1892 1           pCell->ovfl = 0;
1893 34 100         while( ovfl ){
1894 33           rc = sqlitepager_get(pPager, ovfl, (void**)&pOvfl);
1895 33 50         if( rc ) return rc;
1896 33 50         nextOvfl = SWAB32(pBt, pOvfl->iNext);
1897 33           rc = freePage(pBt, pOvfl, ovfl);
1898 33 50         if( rc ) return rc;
1899 33           sqlitepager_unref(pOvfl);
1900 33           ovfl = nextOvfl;
1901             }
1902 58           return SQLITE_OK;
1903             }
1904              
1905             /*
1906             ** Create a new cell from key and data. Overflow pages are allocated as
1907             ** necessary and linked to this cell.
1908             */
1909 121           static int fillInCell(
1910             Btree *pBt, /* The whole Btree. Needed to allocate pages */
1911             Cell *pCell, /* Populate this Cell structure */
1912             const void *pKey, int nKey, /* The key */
1913             const void *pData,int nData /* The data */
1914             ){
1915             OverflowPage *pOvfl, *pPrior;
1916             Pgno *pNext;
1917             int spaceLeft;
1918             int n, rc;
1919             int nPayload;
1920             const char *pPayload;
1921             char *pSpace;
1922 121           Pgno nearby = 0;
1923              
1924 121           pCell->h.leftChild = 0;
1925 121 50         pCell->h.nKey = SWAB16(pBt, nKey & 0xffff);
1926 121           pCell->h.nKeyHi = nKey >> 16;
1927 121 50         pCell->h.nData = SWAB16(pBt, nData & 0xffff);
1928 121           pCell->h.nDataHi = nData >> 16;
1929 121           pCell->h.iNext = 0;
1930              
1931 121           pNext = &pCell->ovfl;
1932 121           pSpace = pCell->aPayload;
1933 121           spaceLeft = MX_LOCAL_PAYLOAD;
1934 121           pPayload = pKey;
1935 121           pKey = 0;
1936 121           nPayload = nKey;
1937 121           pPrior = 0;
1938 371 100         while( nPayload>0 ){
1939 250 100         if( spaceLeft==0 ){
1940 33           rc = allocatePage(pBt, (MemPage**)&pOvfl, pNext, nearby);
1941 33 50         if( rc ){
1942 0           *pNext = 0;
1943             }else{
1944 33           nearby = *pNext;
1945             }
1946 33 100         if( pPrior ) sqlitepager_unref(pPrior);
1947 33 50         if( rc ){
1948 0           clearCell(pBt, pCell);
1949 0           return rc;
1950             }
1951 33 50         if( pBt->needSwab ) *pNext = swab32(*pNext);
1952 33           pPrior = pOvfl;
1953 33           spaceLeft = OVERFLOW_SIZE;
1954 33           pSpace = pOvfl->aPayload;
1955 33           pNext = &pOvfl->iNext;
1956             }
1957 250           n = nPayload;
1958 250 100         if( n>spaceLeft ) n = spaceLeft;
1959 250           memcpy(pSpace, pPayload, n);
1960 250           nPayload -= n;
1961 250 100         if( nPayload==0 && pData ){
    100          
1962 99           pPayload = pData;
1963 99           nPayload = nData;
1964 99           pData = 0;
1965             }else{
1966 151           pPayload += n;
1967             }
1968 250           spaceLeft -= n;
1969 250           pSpace += n;
1970             }
1971 121           *pNext = 0;
1972 121 100         if( pPrior ){
1973 1           sqlitepager_unref(pPrior);
1974             }
1975 121           return SQLITE_OK;
1976             }
1977              
1978             /*
1979             ** Change the MemPage.pParent pointer on the page whose number is
1980             ** given in the second argument so that MemPage.pParent holds the
1981             ** pointer in the third argument.
1982             */
1983 15           static void reparentPage(Pager *pPager, Pgno pgno, MemPage *pNewParent,int idx){
1984             MemPage *pThis;
1985              
1986 15 100         if( pgno==0 ) return;
1987             assert( pPager!=0 );
1988 2           pThis = sqlitepager_lookup(pPager, pgno);
1989 2 50         if( pThis && pThis->isInit ){
    50          
1990 2 100         if( pThis->pParent!=pNewParent ){
1991 1 50         if( pThis->pParent ) sqlitepager_unref(pThis->pParent);
1992 1           pThis->pParent = pNewParent;
1993 1 50         if( pNewParent ) sqlitepager_ref(pNewParent);
1994             }
1995 2           pThis->idxParent = idx;
1996 2           sqlitepager_unref(pThis);
1997             }
1998             }
1999              
2000             /*
2001             ** Reparent all children of the given page to be the given page.
2002             ** In other words, for every child of pPage, invoke reparentPage()
2003             ** to make sure that each child knows that pPage is its parent.
2004             **
2005             ** This routine gets called after you memcpy() one page into
2006             ** another.
2007             */
2008 3           static void reparentChildPages(Btree *pBt, MemPage *pPage){
2009             int i;
2010 3           Pager *pPager = pBt->pPager;
2011 15 100         for(i=0; inCell; i++){
2012 12 50         reparentPage(pPager, SWAB32(pBt, pPage->apCell[i]->h.leftChild), pPage, i);
2013             }
2014 3 50         reparentPage(pPager, SWAB32(pBt, pPage->u.hdr.rightChild), pPage, i);
2015 3           pPage->idxShift = 0;
2016 3           }
2017              
2018             /*
2019             ** Remove the i-th cell from pPage. This routine effects pPage only.
2020             ** The cell content is not freed or deallocated. It is assumed that
2021             ** the cell content has been copied someplace else. This routine just
2022             ** removes the reference to the cell from pPage.
2023             **
2024             ** "sz" must be the number of bytes in the cell.
2025             **
2026             ** Do not bother maintaining the integrity of the linked list of Cells.
2027             ** Only the pPage->apCell[] array is important. The relinkCellList()
2028             ** routine will be called soon after this routine in order to rebuild
2029             ** the linked list.
2030             */
2031 39           static void dropCell(Btree *pBt, MemPage *pPage, int idx, int sz){
2032             int j;
2033             assert( idx>=0 && idxnCell );
2034             assert( sz==cellSize(pBt, pPage->apCell[idx]) );
2035             assert( sqlitepager_iswriteable(pPage) );
2036 39           freeSpace(pBt, pPage, Addr(pPage->apCell[idx]) - Addr(pPage), sz);
2037 46 100         for(j=idx; jnCell-1; j++){
2038 7           pPage->apCell[j] = pPage->apCell[j+1];
2039             }
2040 39           pPage->nCell--;
2041 39           pPage->idxShift = 1;
2042 39           }
2043              
2044             /*
2045             ** Insert a new cell on pPage at cell index "i". pCell points to the
2046             ** content of the cell.
2047             **
2048             ** If the cell content will fit on the page, then put it there. If it
2049             ** will not fit, then just make pPage->apCell[i] point to the content
2050             ** and set pPage->isOverfull.
2051             **
2052             ** Do not bother maintaining the integrity of the linked list of Cells.
2053             ** Only the pPage->apCell[] array is important. The relinkCellList()
2054             ** routine will be called soon after this routine in order to rebuild
2055             ** the linked list.
2056             */
2057 133           static void insertCell(Btree *pBt, MemPage *pPage, int i, Cell *pCell, int sz){
2058             int idx, j;
2059             assert( i>=0 && i<=pPage->nCell );
2060             assert( sz==cellSize(pBt, pCell) );
2061             assert( sqlitepager_iswriteable(pPage) );
2062 133           idx = allocateSpace(pBt, pPage, sz);
2063 137 100         for(j=pPage->nCell; j>i; j--){
2064 4           pPage->apCell[j] = pPage->apCell[j-1];
2065             }
2066 133           pPage->nCell++;
2067 133 100         if( idx<=0 ){
2068 1           pPage->isOverfull = 1;
2069 1           pPage->apCell[i] = pCell;
2070             }else{
2071 132           memcpy(&pPage->u.aDisk[idx], pCell, sz);
2072 132           pPage->apCell[i] = (Cell*)&pPage->u.aDisk[idx];
2073             }
2074 133           pPage->idxShift = 1;
2075 133           }
2076              
2077             /*
2078             ** Rebuild the linked list of cells on a page so that the cells
2079             ** occur in the order specified by the pPage->apCell[] array.
2080             ** Invoke this routine once to repair damage after one or more
2081             ** invocations of either insertCell() or dropCell().
2082             */
2083 140           static void relinkCellList(Btree *pBt, MemPage *pPage){
2084             int i;
2085             u16 *pIdx;
2086             assert( sqlitepager_iswriteable(pPage) );
2087 140           pIdx = &pPage->u.hdr.firstCell;
2088 523 100         for(i=0; inCell; i++){
2089 383           int idx = Addr(pPage->apCell[i]) - Addr(pPage);
2090             assert( idx>0 && idx
2091 383 50         *pIdx = SWAB16(pBt, idx);
2092 383           pIdx = &pPage->apCell[i]->h.iNext;
2093             }
2094 140           *pIdx = 0;
2095 140           }
2096              
2097             /*
2098             ** Make a copy of the contents of pFrom into pTo. The pFrom->apCell[]
2099             ** pointers that point into pFrom->u.aDisk[] must be adjusted to point
2100             ** into pTo->u.aDisk[] instead. But some pFrom->apCell[] entries might
2101             ** not point to pFrom->u.aDisk[]. Those are unchanged.
2102             */
2103 2           static void copyPage(MemPage *pTo, MemPage *pFrom){
2104             uptr from, to;
2105             int i;
2106 2           memcpy(pTo->u.aDisk, pFrom->u.aDisk, SQLITE_USABLE_SIZE);
2107 2           pTo->pParent = 0;
2108 2           pTo->isInit = 1;
2109 2           pTo->nCell = pFrom->nCell;
2110 2           pTo->nFree = pFrom->nFree;
2111 2           pTo->isOverfull = pFrom->isOverfull;
2112 2           to = Addr(pTo);
2113 2           from = Addr(pFrom);
2114 26 100         for(i=0; inCell; i++){
2115 24           uptr x = Addr(pFrom->apCell[i]);
2116 24 50         if( x>from && x
    100          
2117 22           *((uptr*)&pTo->apCell[i]) = x + to - from;
2118             }else{
2119 2           pTo->apCell[i] = pFrom->apCell[i];
2120             }
2121             }
2122 2           }
2123              
2124             /*
2125             ** The following parameters determine how many adjacent pages get involved
2126             ** in a balancing operation. NN is the number of neighbors on either side
2127             ** of the page that participate in the balancing operation. NB is the
2128             ** total number of pages that participate, including the target page and
2129             ** NN neighbors on either side.
2130             **
2131             ** The minimum value of NN is 1 (of course). Increasing NN above 1
2132             ** (to 2 or 3) gives a modest improvement in SELECT and DELETE performance
2133             ** in exchange for a larger degradation in INSERT and UPDATE performance.
2134             ** The value of NN appears to give the best results overall.
2135             */
2136             #define NN 1 /* Number of neighbors on either side of pPage */
2137             #define NB (NN*2+1) /* Total pages involved in the balance */
2138              
2139             /*
2140             ** This routine redistributes Cells on pPage and up to two siblings
2141             ** of pPage so that all pages have about the same amount of free space.
2142             ** Usually one sibling on either side of pPage is used in the balancing,
2143             ** though both siblings might come from one side if pPage is the first
2144             ** or last child of its parent. If pPage has fewer than two siblings
2145             ** (something which can only happen if pPage is the root page or a
2146             ** child of root) then all available siblings participate in the balancing.
2147             **
2148             ** The number of siblings of pPage might be increased or decreased by
2149             ** one in an effort to keep pages between 66% and 100% full. The root page
2150             ** is special and is allowed to be less than 66% full. If pPage is
2151             ** the root page, then the depth of the tree might be increased
2152             ** or decreased by one, as necessary, to keep the root page from being
2153             ** overfull or empty.
2154             **
2155             ** This routine calls relinkCellList() on its input page regardless of
2156             ** whether or not it does any real balancing. Client routines will typically
2157             ** invoke insertCell() or dropCell() before calling this routine, so we
2158             ** need to call relinkCellList() to clean up the mess that those other
2159             ** routines left behind.
2160             **
2161             ** pCur is left pointing to the same cell as when this routine was called
2162             ** even if that cell gets moved to a different page. pCur may be NULL.
2163             ** Set the pCur parameter to NULL if you do not care about keeping track
2164             ** of a cell as that will save this routine the work of keeping track of it.
2165             **
2166             ** Note that when this routine is called, some of the Cells on pPage
2167             ** might not actually be stored in pPage->u.aDisk[]. This can happen
2168             ** if the page is overfull. Part of the job of this routine is to
2169             ** make sure all Cells for pPage once again fit in pPage->u.aDisk[].
2170             **
2171             ** In the course of balancing the siblings of pPage, the parent of pPage
2172             ** might become overfull or underfull. If that happens, then this routine
2173             ** is called recursively on the parent.
2174             **
2175             ** If this routine fails for any reason, it might leave the database
2176             ** in a corrupted state. So if this routine fails, the database should
2177             ** be rolled back.
2178             */
2179 139           static int balance(Btree *pBt, MemPage *pPage, BtCursor *pCur){
2180             MemPage *pParent; /* The parent of pPage */
2181             int nCell; /* Number of cells in apCell[] */
2182             int nOld; /* Number of pages in apOld[] */
2183             int nNew; /* Number of pages in apNew[] */
2184             int nDiv; /* Number of cells in apDiv[] */
2185             int i, j, k; /* Loop counters */
2186             int idx; /* Index of pPage in pParent->apCell[] */
2187             int nxDiv; /* Next divider slot in pParent->apCell[] */
2188             int rc; /* The return code */
2189             int iCur; /* apCell[iCur] is the cell of the cursor */
2190             MemPage *pOldCurPage; /* The cursor originally points to this page */
2191             int subtotal; /* Subtotal of bytes in cells on one page */
2192 139           MemPage *extraUnref = 0; /* A page that needs to be unref-ed */
2193             MemPage *apOld[NB]; /* pPage and up to two siblings */
2194             Pgno pgnoOld[NB]; /* Page numbers for each page in apOld[] */
2195             MemPage *apNew[NB+1]; /* pPage and up to NB siblings after balancing */
2196             Pgno pgnoNew[NB+1]; /* Page numbers for each page in apNew[] */
2197             int idxDiv[NB]; /* Indices of divider cells in pParent */
2198             Cell *apDiv[NB]; /* Divider cells in pParent */
2199             Cell aTemp[NB]; /* Temporary holding area for apDiv[] */
2200             int cntNew[NB+1]; /* Index in apCell[] of cell after i-th page */
2201             int szNew[NB+1]; /* Combined size of cells place on i-th page */
2202             MemPage aOld[NB]; /* Temporary copies of pPage and its siblings */
2203             Cell *apCell[(MX_CELL+2)*NB]; /* All cells from pages being balanced */
2204             int szCell[(MX_CELL+2)*NB]; /* Local size of all cells */
2205              
2206             /*
2207             ** Return without doing any work if pPage is neither overfull nor
2208             ** underfull.
2209             */
2210             assert( sqlitepager_iswriteable(pPage) );
2211 139 100         if( !pPage->isOverfull && pPage->nFree
    100          
2212 20 50         && pPage->nCell>=2){
2213 20           relinkCellList(pBt, pPage);
2214 20           return SQLITE_OK;
2215             }
2216              
2217             /*
2218             ** Find the parent of the page to be balanceed.
2219             ** If there is no parent, it means this page is the root page and
2220             ** special rules apply.
2221             */
2222 119           pParent = pPage->pParent;
2223 119 50         if( pParent==0 ){
2224             Pgno pgnoChild;
2225             MemPage *pChild;
2226             assert( pPage->isInit );
2227 119 100         if( pPage->nCell==0 ){
2228 13 50         if( pPage->u.hdr.rightChild ){
2229             /*
2230             ** The root page is empty. Copy the one child page
2231             ** into the root page and return. This reduces the depth
2232             ** of the BTree by one.
2233             */
2234 0 0         pgnoChild = SWAB32(pBt, pPage->u.hdr.rightChild);
2235 0           rc = sqlitepager_get(pBt->pPager, pgnoChild, (void**)&pChild);
2236 118 0         if( rc ) return rc;
2237 0           memcpy(pPage, pChild, SQLITE_USABLE_SIZE);
2238 0           pPage->isInit = 0;
2239 0           rc = initPage(pBt, pPage, sqlitepager_pagenumber(pPage), 0);
2240             assert( rc==SQLITE_OK );
2241 0           reparentChildPages(pBt, pPage);
2242 0 0         if( pCur && pCur->pPage==pChild ){
    0          
2243 0           sqlitepager_unref(pChild);
2244 0           pCur->pPage = pPage;
2245 0           sqlitepager_ref(pPage);
2246             }
2247 0           freePage(pBt, pChild, pgnoChild);
2248 0           sqlitepager_unref(pChild);
2249             }else{
2250 13           relinkCellList(pBt, pPage);
2251             }
2252 13           return SQLITE_OK;
2253             }
2254 106 100         if( !pPage->isOverfull ){
2255             /* It is OK for the root page to be less than half full.
2256             */
2257 105           relinkCellList(pBt, pPage);
2258 105           return SQLITE_OK;
2259             }
2260             /*
2261             ** If we get to here, it means the root page is overfull.
2262             ** When this happens, Create a new child page and copy the
2263             ** contents of the root into the child. Then make the root
2264             ** page an empty page with rightChild pointing to the new
2265             ** child. Then fall thru to the code below which will cause
2266             ** the overfull child page to be split.
2267             */
2268 1           rc = sqlitepager_write(pPage);
2269 1 50         if( rc ) return rc;
2270 1           rc = allocatePage(pBt, &pChild, &pgnoChild, sqlitepager_pagenumber(pPage));
2271 1 50         if( rc ) return rc;
2272             assert( sqlitepager_iswriteable(pChild) );
2273 1           copyPage(pChild, pPage);
2274 1           pChild->pParent = pPage;
2275 1           pChild->idxParent = 0;
2276 1           sqlitepager_ref(pPage);
2277 1           pChild->isOverfull = 1;
2278 1 50         if( pCur && pCur->pPage==pPage ){
    50          
2279 1           sqlitepager_unref(pPage);
2280 1           pCur->pPage = pChild;
2281             }else{
2282 0           extraUnref = pChild;
2283             }
2284 1           zeroPage(pBt, pPage);
2285 1 50         pPage->u.hdr.rightChild = SWAB32(pBt, pgnoChild);
2286 1           pParent = pPage;
2287 1           pPage = pChild;
2288             }
2289 1           rc = sqlitepager_write(pParent);
2290 1 50         if( rc ) return rc;
2291             assert( pParent->isInit );
2292            
2293             /*
2294             ** Find the Cell in the parent page whose h.leftChild points back
2295             ** to pPage. The "idx" variable is the index of that cell. If pPage
2296             ** is the rightmost child of pParent then set idx to pParent->nCell
2297             */
2298 1 50         if( pParent->idxShift ){
2299             Pgno pgno, swabPgno;
2300 1           pgno = sqlitepager_pagenumber(pPage);
2301 1 50         swabPgno = SWAB32(pBt, pgno);
2302 1 50         for(idx=0; idxnCell; idx++){
2303 0 0         if( pParent->apCell[idx]->h.leftChild==swabPgno ){
2304 0           break;
2305             }
2306             }
2307             assert( idxnCell || pParent->u.hdr.rightChild==swabPgno );
2308             }else{
2309 0           idx = pPage->idxParent;
2310             }
2311              
2312             /*
2313             ** Initialize variables so that it will be safe to jump
2314             ** directly to balance_cleanup at any moment.
2315             */
2316 1           nOld = nNew = 0;
2317 1           sqlitepager_ref(pParent);
2318              
2319             /*
2320             ** Find sibling pages to pPage and the Cells in pParent that divide
2321             ** the siblings. An attempt is made to find NN siblings on either
2322             ** side of pPage. More siblings are taken from one side, however, if
2323             ** pPage there are fewer than NN siblings on the other side. If pParent
2324             ** has NB or fewer children then all children of pParent are taken.
2325             */
2326 1           nxDiv = idx - NN;
2327 1 50         if( nxDiv + NB > pParent->nCell ){
2328 1           nxDiv = pParent->nCell - NB + 1;
2329             }
2330 1 50         if( nxDiv<0 ){
2331 1           nxDiv = 0;
2332             }
2333 1           nDiv = 0;
2334 2 50         for(i=0, k=nxDiv; i
2335 2 50         if( knCell ){
2336 0           idxDiv[i] = k;
2337 0           apDiv[i] = pParent->apCell[k];
2338 0           nDiv++;
2339 0 0         pgnoOld[i] = SWAB32(pBt, apDiv[i]->h.leftChild);
2340 2 100         }else if( k==pParent->nCell ){
2341 1 50         pgnoOld[i] = SWAB32(pBt, pParent->u.hdr.rightChild);
2342             }else{
2343 1           break;
2344             }
2345 1           rc = sqlitepager_get(pBt->pPager, pgnoOld[i], (void**)&apOld[i]);
2346 1 50         if( rc ) goto balance_cleanup;
2347 1           rc = initPage(pBt, apOld[i], pgnoOld[i], pParent);
2348 1 50         if( rc ) goto balance_cleanup;
2349 1           apOld[i]->idxParent = k;
2350 1           nOld++;
2351             }
2352              
2353             /*
2354             ** Set iCur to be the index in apCell[] of the cell that the cursor
2355             ** is pointing to. We will need this later on in order to keep the
2356             ** cursor pointing at the same cell. If pCur points to a page that
2357             ** has no involvement with this rebalancing, then set iCur to a large
2358             ** number so that the iCur==j tests always fail in the main cell
2359             ** distribution loop below.
2360             */
2361 1 50         if( pCur ){
2362 1           iCur = 0;
2363 1 50         for(i=0; i
2364 1 50         if( pCur->pPage==apOld[i] ){
2365 1           iCur += pCur->idx;
2366 1           break;
2367             }
2368 0           iCur += apOld[i]->nCell;
2369 0 0         if( ipPage==pParent && pCur->idx==idxDiv[i] ){
    0          
    0          
2370 0           break;
2371             }
2372 0           iCur++;
2373             }
2374 1           pOldCurPage = pCur->pPage;
2375             }
2376              
2377             /*
2378             ** Make copies of the content of pPage and its siblings into aOld[].
2379             ** The rest of this function will use data from the copies rather
2380             ** that the original pages since the original pages will be in the
2381             ** process of being overwritten.
2382             */
2383 2 100         for(i=0; i
2384 1           copyPage(&aOld[i], apOld[i]);
2385             }
2386              
2387             /*
2388             ** Load pointers to all cells on sibling pages and the divider cells
2389             ** into the local apCell[] array. Make copies of the divider cells
2390             ** into aTemp[] and remove the the divider Cells from pParent.
2391             */
2392 1           nCell = 0;
2393 2 100         for(i=0; i
2394 1           MemPage *pOld = &aOld[i];
2395 13 100         for(j=0; jnCell; j++){
2396 12           apCell[nCell] = pOld->apCell[j];
2397 12           szCell[nCell] = cellSize(pBt, apCell[nCell]);
2398 12           nCell++;
2399             }
2400 1 50         if( i
2401 0           szCell[nCell] = cellSize(pBt, apDiv[i]);
2402 0           memcpy(&aTemp[i], apDiv[i], szCell[nCell]);
2403 0           apCell[nCell] = &aTemp[i];
2404 0           dropCell(pBt, pParent, nxDiv, szCell[nCell]);
2405             assert( SWAB32(pBt, apCell[nCell]->h.leftChild)==pgnoOld[i] );
2406 0           apCell[nCell]->h.leftChild = pOld->u.hdr.rightChild;
2407 0           nCell++;
2408             }
2409             }
2410              
2411             /*
2412             ** Figure out the number of pages needed to hold all nCell cells.
2413             ** Store this number in "k". Also compute szNew[] which is the total
2414             ** size of all cells on the i-th page and cntNew[] which is the index
2415             ** in apCell[] of the cell that divides path i from path i+1.
2416             ** cntNew[k] should equal nCell.
2417             **
2418             ** This little patch of code is critical for keeping the tree
2419             ** balanced.
2420             */
2421 13 100         for(subtotal=k=i=0; i
2422 12           subtotal += szCell[i];
2423 12 100         if( subtotal > USABLE_SPACE ){
2424 1           szNew[k] = subtotal - szCell[i];
2425 1           cntNew[k] = i;
2426 1           subtotal = 0;
2427 1           k++;
2428             }
2429             }
2430 1           szNew[k] = subtotal;
2431 1           cntNew[k] = nCell;
2432 1           k++;
2433 2 100         for(i=k-1; i>0; i--){
2434 8 100         while( szNew[i]
2435 7           cntNew[i-1]--;
2436             assert( cntNew[i-1]>0 );
2437 7           szNew[i] += szCell[cntNew[i-1]];
2438 7           szNew[i-1] -= szCell[cntNew[i-1]-1];
2439             }
2440             }
2441             assert( cntNew[0]>0 );
2442              
2443             /*
2444             ** Allocate k new pages. Reuse old pages where possible.
2445             */
2446 3 100         for(i=0; i
2447 2 100         if( i
2448 1           apNew[i] = apOld[i];
2449 1           pgnoNew[i] = pgnoOld[i];
2450 1           apOld[i] = 0;
2451 1           sqlitepager_write(apNew[i]);
2452             }else{
2453 1           rc = allocatePage(pBt, &apNew[i], &pgnoNew[i], pgnoNew[i-1]);
2454 1 50         if( rc ) goto balance_cleanup;
2455             }
2456 2           nNew++;
2457 2           zeroPage(pBt, apNew[i]);
2458 2           apNew[i]->isInit = 1;
2459             }
2460              
2461             /* Free any old pages that were not reused as new pages.
2462             */
2463 1 50         while( i
2464 0           rc = freePage(pBt, apOld[i], pgnoOld[i]);
2465 0 0         if( rc ) goto balance_cleanup;
2466 0           sqlitepager_unref(apOld[i]);
2467 0           apOld[i] = 0;
2468 0           i++;
2469             }
2470              
2471             /*
2472             ** Put the new pages in accending order. This helps to
2473             ** keep entries in the disk file in order so that a scan
2474             ** of the table is a linear scan through the file. That
2475             ** in turn helps the operating system to deliver pages
2476             ** from the disk more rapidly.
2477             **
2478             ** An O(n^2) insertion sort algorithm is used, but since
2479             ** n is never more than NB (a small constant), that should
2480             ** not be a problem.
2481             **
2482             ** When NB==3, this one optimization makes the database
2483             ** about 25% faster for large insertions and deletions.
2484             */
2485 2 100         for(i=0; i
2486 1           int minV = pgnoNew[i];
2487 1           int minI = i;
2488 2 100         for(j=i+1; j
2489 1 50         if( pgnoNew[j]<(unsigned)minV ){
2490 0           minI = j;
2491 0           minV = pgnoNew[j];
2492             }
2493             }
2494 1 50         if( minI>i ){
2495             int t;
2496             MemPage *pT;
2497 0           t = pgnoNew[i];
2498 0           pT = apNew[i];
2499 0           pgnoNew[i] = pgnoNew[minI];
2500 0           apNew[i] = apNew[minI];
2501 0           pgnoNew[minI] = t;
2502 0           apNew[minI] = pT;
2503             }
2504             }
2505              
2506             /*
2507             ** Evenly distribute the data in apCell[] across the new pages.
2508             ** Insert divider cells into pParent as necessary.
2509             */
2510 1           j = 0;
2511 3 100         for(i=0; i
2512 2           MemPage *pNew = apNew[i];
2513 13 100         while( j
2514             assert( pNew->nFree>=szCell[j] );
2515 11 50         if( pCur && iCur==j ){ pCur->pPage = pNew; pCur->idx = pNew->nCell; }
    100          
2516 11           insertCell(pBt, pNew, pNew->nCell, apCell[j], szCell[j]);
2517 11           j++;
2518             }
2519             assert( pNew->nCell>0 );
2520             assert( !pNew->isOverfull );
2521 2           relinkCellList(pBt, pNew);
2522 2 100         if( i
    50          
2523 1           pNew->u.hdr.rightChild = apCell[j]->h.leftChild;
2524 1 50         apCell[j]->h.leftChild = SWAB32(pBt, pgnoNew[i]);
2525 1 50         if( pCur && iCur==j ){ pCur->pPage = pParent; pCur->idx = nxDiv; }
    50          
2526 1           insertCell(pBt, pParent, nxDiv, apCell[j], szCell[j]);
2527 1           j++;
2528 1           nxDiv++;
2529             }
2530             }
2531             assert( j==nCell );
2532 1           apNew[nNew-1]->u.hdr.rightChild = aOld[nOld-1].u.hdr.rightChild;
2533 1 50         if( nxDiv==pParent->nCell ){
2534 1 50         pParent->u.hdr.rightChild = SWAB32(pBt, pgnoNew[nNew-1]);
2535             }else{
2536 0 0         pParent->apCell[nxDiv]->h.leftChild = SWAB32(pBt, pgnoNew[nNew-1]);
2537             }
2538 1 50         if( pCur ){
2539 1 50         if( j<=iCur && pCur->pPage==pParent && pCur->idx>idxDiv[nOld-1] ){
    0          
    0          
2540             assert( pCur->pPage==pOldCurPage );
2541 0           pCur->idx += nNew - nOld;
2542             }else{
2543             assert( pOldCurPage!=0 );
2544 1           sqlitepager_ref(pCur->pPage);
2545 1           sqlitepager_unref(pOldCurPage);
2546             }
2547             }
2548              
2549             /*
2550             ** Reparent children of all cells.
2551             */
2552 3 100         for(i=0; i
2553 2           reparentChildPages(pBt, apNew[i]);
2554             }
2555 1           reparentChildPages(pBt, pParent);
2556              
2557             /*
2558             ** balance the parent page.
2559             */
2560 1           rc = balance(pBt, pParent, pCur);
2561              
2562             /*
2563             ** Cleanup before returning.
2564             */
2565             balance_cleanup:
2566 1 50         if( extraUnref ){
2567 0           sqlitepager_unref(extraUnref);
2568             }
2569 2 100         for(i=0; i
2570 1 50         if( apOld[i]!=0 && apOld[i]!=&aOld[i] ) sqlitepager_unref(apOld[i]);
    0          
2571             }
2572 3 100         for(i=0; i
2573 2           sqlitepager_unref(apNew[i]);
2574             }
2575 1 50         if( pCur && pCur->pPage==0 ){
    50          
2576 0           pCur->pPage = pParent;
2577 0           pCur->idx = 0;
2578             }else{
2579 1           sqlitepager_unref(pParent);
2580             }
2581 139           return rc;
2582             }
2583              
2584             /*
2585             ** This routine checks all cursors that point to the same table
2586             ** as pCur points to. If any of those cursors were opened with
2587             ** wrFlag==0 then this routine returns SQLITE_LOCKED. If all
2588             ** cursors point to the same table were opened with wrFlag==1
2589             ** then this routine returns SQLITE_OK.
2590             **
2591             ** In addition to checking for read-locks (where a read-lock
2592             ** means a cursor opened with wrFlag==0) this routine also moves
2593             ** all cursors other than pCur so that they are pointing to the
2594             ** first Cell on root page. This is necessary because an insert
2595             ** or delete might change the number of cells on a page or delete
2596             ** a page entirely and we do not want to leave any cursors
2597             ** pointing to non-existant pages or cells.
2598             */
2599 138           static int checkReadLocks(BtCursor *pCur){
2600             BtCursor *p;
2601             assert( pCur->wrFlag );
2602 138 50         for(p=pCur->pShared; p!=pCur; p=p->pShared){
2603             assert( p );
2604             assert( p->pgnoRoot==pCur->pgnoRoot );
2605 0 0         if( p->wrFlag==0 ) return SQLITE_LOCKED;
2606 0 0         if( sqlitepager_pagenumber(p->pPage)!=p->pgnoRoot ){
2607 0           moveToRoot(p);
2608             }
2609             }
2610 138           return SQLITE_OK;
2611             }
2612              
2613             /*
2614             ** Insert a new record into the BTree. The key is given by (pKey,nKey)
2615             ** and the data is given by (pData,nData). The cursor is used only to
2616             ** define what database the record should be inserted into. The cursor
2617             ** is left pointing at the new record.
2618             */
2619 121           static int fileBtreeInsert(
2620             BtCursor *pCur, /* Insert data into the table of this cursor */
2621             const void *pKey, int nKey, /* The key of the new record */
2622             const void *pData, int nData /* The data of the new record */
2623             ){
2624             Cell newCell;
2625             int rc;
2626             int loc;
2627             int szNew;
2628             MemPage *pPage;
2629 121           Btree *pBt = pCur->pBt;
2630              
2631 121 50         if( pCur->pPage==0 ){
2632 0           return SQLITE_ABORT; /* A rollback destroyed this cursor */
2633             }
2634 121 50         if( !pBt->inTrans || nKey+nData==0 ){
    50          
2635             /* Must start a transaction before doing an insert */
2636 0 0         return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
2637             }
2638             assert( !pBt->readOnly );
2639 121 50         if( !pCur->wrFlag ){
2640 0           return SQLITE_PERM; /* Cursor not open for writing */
2641             }
2642 121 50         if( checkReadLocks(pCur) ){
2643 0           return SQLITE_LOCKED; /* The table pCur points to has a read lock */
2644             }
2645 121           rc = fileBtreeMoveto(pCur, pKey, nKey, &loc);
2646 121 50         if( rc ) return rc;
2647 121           pPage = pCur->pPage;
2648             assert( pPage->isInit );
2649 121           rc = sqlitepager_write(pPage);
2650 121 50         if( rc ) return rc;
2651 121           rc = fillInCell(pBt, &newCell, pKey, nKey, pData, nData);
2652 121 50         if( rc ) return rc;
2653 121           szNew = cellSize(pBt, &newCell);
2654 121 100         if( loc==0 ){
2655 22           newCell.h.leftChild = pPage->apCell[pCur->idx]->h.leftChild;
2656 22           rc = clearCell(pBt, pPage->apCell[pCur->idx]);
2657 22 50         if( rc ) return rc;
2658 22           dropCell(pBt, pPage, pCur->idx, cellSize(pBt, pPage->apCell[pCur->idx]));
2659 99 50         }else if( loc<0 && pPage->nCell>0 ){
    100          
2660             assert( pPage->u.hdr.rightChild==0 ); /* Must be a leaf page */
2661 62           pCur->idx++;
2662             }else{
2663             assert( pPage->u.hdr.rightChild==0 ); /* Must be a leaf page */
2664             }
2665 121           insertCell(pBt, pPage, pCur->idx, &newCell, szNew);
2666 121           rc = balance(pCur->pBt, pPage, pCur);
2667             /* sqliteBtreePageDump(pCur->pBt, pCur->pgnoRoot, 1); */
2668             /* fflush(stdout); */
2669 121           pCur->eSkip = SKIP_INVALID;
2670 121           return rc;
2671             }
2672              
2673             /*
2674             ** Delete the entry that the cursor is pointing to.
2675             **
2676             ** The cursor is left pointing at either the next or the previous
2677             ** entry. If the cursor is left pointing to the next entry, then
2678             ** the pCur->eSkip flag is set to SKIP_NEXT which forces the next call to
2679             ** sqliteBtreeNext() to be a no-op. That way, you can always call
2680             ** sqliteBtreeNext() after a delete and the cursor will be left
2681             ** pointing to the first entry after the deleted entry. Similarly,
2682             ** pCur->eSkip is set to SKIP_PREV is the cursor is left pointing to
2683             ** the entry prior to the deleted entry so that a subsequent call to
2684             ** sqliteBtreePrevious() will always leave the cursor pointing at the
2685             ** entry immediately before the one that was deleted.
2686             */
2687 17           static int fileBtreeDelete(BtCursor *pCur){
2688 17           MemPage *pPage = pCur->pPage;
2689             Cell *pCell;
2690             int rc;
2691             Pgno pgnoChild;
2692 17           Btree *pBt = pCur->pBt;
2693              
2694             assert( pPage->isInit );
2695 17 50         if( pCur->pPage==0 ){
2696 0           return SQLITE_ABORT; /* A rollback destroyed this cursor */
2697             }
2698 17 50         if( !pBt->inTrans ){
2699             /* Must start a transaction before doing a delete */
2700 0 0         return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
2701             }
2702             assert( !pBt->readOnly );
2703 17 50         if( pCur->idx >= pPage->nCell ){
2704 0           return SQLITE_ERROR; /* The cursor is not pointing to anything */
2705             }
2706 17 50         if( !pCur->wrFlag ){
2707 0           return SQLITE_PERM; /* Did not open this cursor for writing */
2708             }
2709 17 50         if( checkReadLocks(pCur) ){
2710 0           return SQLITE_LOCKED; /* The table pCur points to has a read lock */
2711             }
2712 17           rc = sqlitepager_write(pPage);
2713 17 50         if( rc ) return rc;
2714 17           pCell = pPage->apCell[pCur->idx];
2715 17 50         pgnoChild = SWAB32(pBt, pCell->h.leftChild);
2716 17           clearCell(pBt, pCell);
2717 17 50         if( pgnoChild ){
2718             /*
2719             ** The entry we are about to delete is not a leaf so if we do not
2720             ** do something we will leave a hole on an internal page.
2721             ** We have to fill the hole by moving in a cell from a leaf. The
2722             ** next Cell after the one to be deleted is guaranteed to exist and
2723             ** to be a leaf so we can use it.
2724             */
2725             BtCursor leafCur;
2726             Cell *pNext;
2727             int szNext;
2728             int notUsed;
2729 0           getTempCursor(pCur, &leafCur);
2730 0           rc = fileBtreeNext(&leafCur, ¬Used);
2731 0 0         if( rc!=SQLITE_OK ){
2732 0 0         if( rc!=SQLITE_NOMEM ) rc = SQLITE_CORRUPT;
2733 0           return rc;
2734             }
2735 0           rc = sqlitepager_write(leafCur.pPage);
2736 0 0         if( rc ) return rc;
2737 0           dropCell(pBt, pPage, pCur->idx, cellSize(pBt, pCell));
2738 0           pNext = leafCur.pPage->apCell[leafCur.idx];
2739 0           szNext = cellSize(pBt, pNext);
2740 0 0         pNext->h.leftChild = SWAB32(pBt, pgnoChild);
2741 0           insertCell(pBt, pPage, pCur->idx, pNext, szNext);
2742 0           rc = balance(pBt, pPage, pCur);
2743 0 0         if( rc ) return rc;
2744 0           pCur->eSkip = SKIP_NEXT;
2745 0           dropCell(pBt, leafCur.pPage, leafCur.idx, szNext);
2746 0           rc = balance(pBt, leafCur.pPage, pCur);
2747 0           releaseTempCursor(&leafCur);
2748             }else{
2749 17           dropCell(pBt, pPage, pCur->idx, cellSize(pBt, pCell));
2750 17 100         if( pCur->idx>=pPage->nCell ){
2751 15           pCur->idx = pPage->nCell-1;
2752 15 100         if( pCur->idx<0 ){
2753 13           pCur->idx = 0;
2754 13           pCur->eSkip = SKIP_NEXT;
2755             }else{
2756 15           pCur->eSkip = SKIP_PREV;
2757             }
2758             }else{
2759 2           pCur->eSkip = SKIP_NEXT;
2760             }
2761 17           rc = balance(pBt, pPage, pCur);
2762             }
2763 17           return rc;
2764             }
2765              
2766             /*
2767             ** Create a new BTree table. Write into *piTable the page
2768             ** number for the root page of the new table.
2769             **
2770             ** In the current implementation, BTree tables and BTree indices are the
2771             ** the same. In the future, we may change this so that BTree tables
2772             ** are restricted to having a 4-byte integer key and arbitrary data and
2773             ** BTree indices are restricted to having an arbitrary key and no data.
2774             ** But for now, this routine also serves to create indices.
2775             */
2776 27           static int fileBtreeCreateTable(Btree *pBt, int *piTable){
2777             MemPage *pRoot;
2778             Pgno pgnoRoot;
2779             int rc;
2780 27 50         if( !pBt->inTrans ){
2781             /* Must start a transaction first */
2782 0 0         return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
2783             }
2784 27 50         if( pBt->readOnly ){
2785 0           return SQLITE_READONLY;
2786             }
2787 27           rc = allocatePage(pBt, &pRoot, &pgnoRoot, 0);
2788 27 50         if( rc ) return rc;
2789             assert( sqlitepager_iswriteable(pRoot) );
2790 27           zeroPage(pBt, pRoot);
2791 27           sqlitepager_unref(pRoot);
2792 27           *piTable = (int)pgnoRoot;
2793 27           return SQLITE_OK;
2794             }
2795              
2796             /*
2797             ** Erase the given database page and all its children. Return
2798             ** the page to the freelist.
2799             */
2800 11           static int clearDatabasePage(Btree *pBt, Pgno pgno, int freePageFlag){
2801             MemPage *pPage;
2802             int rc;
2803             Cell *pCell;
2804             int idx;
2805              
2806 11           rc = sqlitepager_get(pBt->pPager, pgno, (void**)&pPage);
2807 11 50         if( rc ) return rc;
2808 11           rc = sqlitepager_write(pPage);
2809 11 50         if( rc ) return rc;
2810 11           rc = initPage(pBt, pPage, pgno, 0);
2811 11 50         if( rc ) return rc;
2812 11 50         idx = SWAB16(pBt, pPage->u.hdr.firstCell);
2813 30 100         while( idx>0 ){
2814 19           pCell = (Cell*)&pPage->u.aDisk[idx];
2815 19 50         idx = SWAB16(pBt, pCell->h.iNext);
2816 19 50         if( pCell->h.leftChild ){
2817 0 0         rc = clearDatabasePage(pBt, SWAB32(pBt, pCell->h.leftChild), 1);
2818 0 0         if( rc ) return rc;
2819             }
2820 19           rc = clearCell(pBt, pCell);
2821 19 50         if( rc ) return rc;
2822             }
2823 11 50         if( pPage->u.hdr.rightChild ){
2824 0 0         rc = clearDatabasePage(pBt, SWAB32(pBt, pPage->u.hdr.rightChild), 1);
2825 0 0         if( rc ) return rc;
2826             }
2827 11 50         if( freePageFlag ){
2828 0           rc = freePage(pBt, pPage, pgno);
2829             }else{
2830 11           zeroPage(pBt, pPage);
2831             }
2832 11           sqlitepager_unref(pPage);
2833 11           return rc;
2834             }
2835              
2836             /*
2837             ** Delete all information from a single table in the database.
2838             */
2839 11           static int fileBtreeClearTable(Btree *pBt, int iTable){
2840             int rc;
2841             BtCursor *pCur;
2842 11 50         if( !pBt->inTrans ){
2843 0 0         return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
2844             }
2845 11 50         for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){
2846 0 0         if( pCur->pgnoRoot==(Pgno)iTable ){
2847 0 0         if( pCur->wrFlag==0 ) return SQLITE_LOCKED;
2848 0           moveToRoot(pCur);
2849             }
2850             }
2851 11           rc = clearDatabasePage(pBt, (Pgno)iTable, 0);
2852 11 50         if( rc ){
2853 0           fileBtreeRollback(pBt);
2854             }
2855 11           return rc;
2856             }
2857              
2858             /*
2859             ** Erase all information in a table and add the root of the table to
2860             ** the freelist. Except, the root of the principle table (the one on
2861             ** page 2) is never added to the freelist.
2862             */
2863 11           static int fileBtreeDropTable(Btree *pBt, int iTable){
2864             int rc;
2865             MemPage *pPage;
2866             BtCursor *pCur;
2867 11 50         if( !pBt->inTrans ){
2868 0 0         return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
2869             }
2870 11 50         for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){
2871 0 0         if( pCur->pgnoRoot==(Pgno)iTable ){
2872 0           return SQLITE_LOCKED; /* Cannot drop a table that has a cursor */
2873             }
2874             }
2875 11           rc = sqlitepager_get(pBt->pPager, (Pgno)iTable, (void**)&pPage);
2876 11 50         if( rc ) return rc;
2877 11           rc = fileBtreeClearTable(pBt, iTable);
2878 11 50         if( rc ) return rc;
2879 11 50         if( iTable>2 ){
2880 11           rc = freePage(pBt, pPage, iTable);
2881             }else{
2882 0           zeroPage(pBt, pPage);
2883             }
2884 11           sqlitepager_unref(pPage);
2885 11           return rc;
2886             }
2887              
2888             #if 0 /* UNTESTED */
2889             /*
2890             ** Copy all cell data from one database file into another.
2891             ** pages back the freelist.
2892             */
2893             static int copyCell(Btree *pBtFrom, BTree *pBtTo, Cell *pCell){
2894             Pager *pFromPager = pBtFrom->pPager;
2895             OverflowPage *pOvfl;
2896             Pgno ovfl, nextOvfl;
2897             Pgno *pPrev;
2898             int rc = SQLITE_OK;
2899             MemPage *pNew, *pPrevPg;
2900             Pgno new;
2901              
2902             if( NKEY(pBtTo, pCell->h) + NDATA(pBtTo, pCell->h) <= MX_LOCAL_PAYLOAD ){
2903             return SQLITE_OK;
2904             }
2905             pPrev = &pCell->ovfl;
2906             pPrevPg = 0;
2907             ovfl = SWAB32(pBtTo, pCell->ovfl);
2908             while( ovfl && rc==SQLITE_OK ){
2909             rc = sqlitepager_get(pFromPager, ovfl, (void**)&pOvfl);
2910             if( rc ) return rc;
2911             nextOvfl = SWAB32(pBtFrom, pOvfl->iNext);
2912             rc = allocatePage(pBtTo, &pNew, &new, 0);
2913             if( rc==SQLITE_OK ){
2914             rc = sqlitepager_write(pNew);
2915             if( rc==SQLITE_OK ){
2916             memcpy(pNew, pOvfl, SQLITE_USABLE_SIZE);
2917             *pPrev = SWAB32(pBtTo, new);
2918             if( pPrevPg ){
2919             sqlitepager_unref(pPrevPg);
2920             }
2921             pPrev = &pOvfl->iNext;
2922             pPrevPg = pNew;
2923             }
2924             }
2925             sqlitepager_unref(pOvfl);
2926             ovfl = nextOvfl;
2927             }
2928             if( pPrevPg ){
2929             sqlitepager_unref(pPrevPg);
2930             }
2931             return rc;
2932             }
2933             #endif
2934              
2935              
2936             #if 0 /* UNTESTED */
2937             /*
2938             ** Copy a page of data from one database over to another.
2939             */
2940             static int copyDatabasePage(
2941             Btree *pBtFrom,
2942             Pgno pgnoFrom,
2943             Btree *pBtTo,
2944             Pgno *pTo
2945             ){
2946             MemPage *pPageFrom, *pPage;
2947             Pgno to;
2948             int rc;
2949             Cell *pCell;
2950             int idx;
2951              
2952             rc = sqlitepager_get(pBtFrom->pPager, pgno, (void**)&pPageFrom);
2953             if( rc ) return rc;
2954             rc = allocatePage(pBt, &pPage, pTo, 0);
2955             if( rc==SQLITE_OK ){
2956             rc = sqlitepager_write(pPage);
2957             }
2958             if( rc==SQLITE_OK ){
2959             memcpy(pPage, pPageFrom, SQLITE_USABLE_SIZE);
2960             idx = SWAB16(pBt, pPage->u.hdr.firstCell);
2961             while( idx>0 ){
2962             pCell = (Cell*)&pPage->u.aDisk[idx];
2963             idx = SWAB16(pBt, pCell->h.iNext);
2964             if( pCell->h.leftChild ){
2965             Pgno newChld;
2966             rc = copyDatabasePage(pBtFrom, SWAB32(pBtFrom, pCell->h.leftChild),
2967             pBtTo, &newChld);
2968             if( rc ) return rc;
2969             pCell->h.leftChild = SWAB32(pBtFrom, newChld);
2970             }
2971             rc = copyCell(pBtFrom, pBtTo, pCell);
2972             if( rc ) return rc;
2973             }
2974             if( pPage->u.hdr.rightChild ){
2975             Pgno newChld;
2976             rc = copyDatabasePage(pBtFrom, SWAB32(pBtFrom, pPage->u.hdr.rightChild),
2977             pBtTo, &newChld);
2978             if( rc ) return rc;
2979             pPage->u.hdr.rightChild = SWAB32(pBtTo, newChild);
2980             }
2981             }
2982             sqlitepager_unref(pPage);
2983             return rc;
2984             }
2985             #endif
2986              
2987             /*
2988             ** Read the meta-information out of a database file.
2989             */
2990 254           static int fileBtreeGetMeta(Btree *pBt, int *aMeta){
2991             PageOne *pP1;
2992             int rc;
2993             int i;
2994              
2995 254           rc = sqlitepager_get(pBt->pPager, 1, (void**)&pP1);
2996 254 50         if( rc ) return rc;
2997 254 50         aMeta[0] = SWAB32(pBt, pP1->nFree);
2998 2540 100         for(i=0; iaMeta)/sizeof(pP1->aMeta[0]); i++){
2999 2286 50         aMeta[i+1] = SWAB32(pBt, pP1->aMeta[i]);
3000             }
3001 254           sqlitepager_unref(pP1);
3002 254           return SQLITE_OK;
3003             }
3004              
3005             /*
3006             ** Write meta-information back into the database.
3007             */
3008 53           static int fileBtreeUpdateMeta(Btree *pBt, int *aMeta){
3009             PageOne *pP1;
3010             int rc, i;
3011 53 50         if( !pBt->inTrans ){
3012 0 0         return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
3013             }
3014 53           pP1 = pBt->page1;
3015 53           rc = sqlitepager_write(pP1);
3016 53 50         if( rc ) return rc;
3017 530 100         for(i=0; iaMeta)/sizeof(pP1->aMeta[0]); i++){
3018 477 50         pP1->aMeta[i] = SWAB32(pBt, aMeta[i+1]);
3019             }
3020 53           return SQLITE_OK;
3021             }
3022              
3023             /******************************************************************************
3024             ** The complete implementation of the BTree subsystem is above this line.
3025             ** All the code the follows is for testing and troubleshooting the BTree
3026             ** subsystem. None of the code that follows is used during normal operation.
3027             ******************************************************************************/
3028              
3029             /*
3030             ** Print a disassembly of the given page on standard output. This routine
3031             ** is used for debugging and testing only.
3032             */
3033             #ifdef SQLITE_TEST
3034             static int fileBtreePageDump(Btree *pBt, int pgno, int recursive){
3035             int rc;
3036             MemPage *pPage;
3037             int i, j;
3038             int nFree;
3039             u16 idx;
3040             char range[20];
3041             unsigned char payload[20];
3042             rc = sqlitepager_get(pBt->pPager, (Pgno)pgno, (void**)&pPage);
3043             if( rc ){
3044             return rc;
3045             }
3046             if( recursive ) printf("PAGE %d:\n", pgno);
3047             i = 0;
3048             idx = SWAB16(pBt, pPage->u.hdr.firstCell);
3049             while( idx>0 && idx<=SQLITE_USABLE_SIZE-MIN_CELL_SIZE ){
3050             Cell *pCell = (Cell*)&pPage->u.aDisk[idx];
3051             int sz = cellSize(pBt, pCell);
3052             sprintf(range,"%d..%d", idx, idx+sz-1);
3053             sz = NKEY(pBt, pCell->h) + NDATA(pBt, pCell->h);
3054             if( sz>sizeof(payload)-1 ) sz = sizeof(payload)-1;
3055             memcpy(payload, pCell->aPayload, sz);
3056             for(j=0; j
3057             if( payload[j]<0x20 || payload[j]>0x7f ) payload[j] = '.';
3058             }
3059             payload[sz] = 0;
3060             printf(
3061             "cell %2d: i=%-10s chld=%-4d nk=%-4d nd=%-4d payload=%s\n",
3062             i, range, (int)pCell->h.leftChild,
3063             NKEY(pBt, pCell->h), NDATA(pBt, pCell->h),
3064             payload
3065             );
3066             if( pPage->isInit && pPage->apCell[i]!=pCell ){
3067             printf("**** apCell[%d] does not match on prior entry ****\n", i);
3068             }
3069             i++;
3070             idx = SWAB16(pBt, pCell->h.iNext);
3071             }
3072             if( idx!=0 ){
3073             printf("ERROR: next cell index out of range: %d\n", idx);
3074             }
3075             printf("right_child: %d\n", SWAB32(pBt, pPage->u.hdr.rightChild));
3076             nFree = 0;
3077             i = 0;
3078             idx = SWAB16(pBt, pPage->u.hdr.firstFree);
3079             while( idx>0 && idx
3080             FreeBlk *p = (FreeBlk*)&pPage->u.aDisk[idx];
3081             sprintf(range,"%d..%d", idx, idx+p->iSize-1);
3082             nFree += SWAB16(pBt, p->iSize);
3083             printf("freeblock %2d: i=%-10s size=%-4d total=%d\n",
3084             i, range, SWAB16(pBt, p->iSize), nFree);
3085             idx = SWAB16(pBt, p->iNext);
3086             i++;
3087             }
3088             if( idx!=0 ){
3089             printf("ERROR: next freeblock index out of range: %d\n", idx);
3090             }
3091             if( recursive && pPage->u.hdr.rightChild!=0 ){
3092             idx = SWAB16(pBt, pPage->u.hdr.firstCell);
3093             while( idx>0 && idx
3094             Cell *pCell = (Cell*)&pPage->u.aDisk[idx];
3095             fileBtreePageDump(pBt, SWAB32(pBt, pCell->h.leftChild), 1);
3096             idx = SWAB16(pBt, pCell->h.iNext);
3097             }
3098             fileBtreePageDump(pBt, SWAB32(pBt, pPage->u.hdr.rightChild), 1);
3099             }
3100             sqlitepager_unref(pPage);
3101             return SQLITE_OK;
3102             }
3103             #endif
3104              
3105             #ifdef SQLITE_TEST
3106             /*
3107             ** Fill aResult[] with information about the entry and page that the
3108             ** cursor is pointing to.
3109             **
3110             ** aResult[0] = The page number
3111             ** aResult[1] = The entry number
3112             ** aResult[2] = Total number of entries on this page
3113             ** aResult[3] = Size of this entry
3114             ** aResult[4] = Number of free bytes on this page
3115             ** aResult[5] = Number of free blocks on the page
3116             ** aResult[6] = Page number of the left child of this entry
3117             ** aResult[7] = Page number of the right child for the whole page
3118             **
3119             ** This routine is used for testing and debugging only.
3120             */
3121             static int fileBtreeCursorDump(BtCursor *pCur, int *aResult){
3122             int cnt, idx;
3123             MemPage *pPage = pCur->pPage;
3124             Btree *pBt = pCur->pBt;
3125             aResult[0] = sqlitepager_pagenumber(pPage);
3126             aResult[1] = pCur->idx;
3127             aResult[2] = pPage->nCell;
3128             if( pCur->idx>=0 && pCur->idxnCell ){
3129             aResult[3] = cellSize(pBt, pPage->apCell[pCur->idx]);
3130             aResult[6] = SWAB32(pBt, pPage->apCell[pCur->idx]->h.leftChild);
3131             }else{
3132             aResult[3] = 0;
3133             aResult[6] = 0;
3134             }
3135             aResult[4] = pPage->nFree;
3136             cnt = 0;
3137             idx = SWAB16(pBt, pPage->u.hdr.firstFree);
3138             while( idx>0 && idx
3139             cnt++;
3140             idx = SWAB16(pBt, ((FreeBlk*)&pPage->u.aDisk[idx])->iNext);
3141             }
3142             aResult[5] = cnt;
3143             aResult[7] = SWAB32(pBt, pPage->u.hdr.rightChild);
3144             return SQLITE_OK;
3145             }
3146             #endif
3147              
3148             /*
3149             ** Return the pager associated with a BTree. This routine is used for
3150             ** testing and debugging only.
3151             */
3152 0           static Pager *fileBtreePager(Btree *pBt){
3153 0           return pBt->pPager;
3154             }
3155              
3156             /*
3157             ** This structure is passed around through all the sanity checking routines
3158             ** in order to keep track of some global state information.
3159             */
3160             typedef struct IntegrityCk IntegrityCk;
3161             struct IntegrityCk {
3162             Btree *pBt; /* The tree being checked out */
3163             Pager *pPager; /* The associated pager. Also accessible by pBt->pPager */
3164             int nPage; /* Number of pages in the database */
3165             int *anRef; /* Number of times each page is referenced */
3166             char *zErrMsg; /* An error message. NULL of no errors seen. */
3167             };
3168              
3169             /*
3170             ** Append a message to the error message string.
3171             */
3172 0           static void checkAppendMsg(IntegrityCk *pCheck, char *zMsg1, char *zMsg2){
3173 0 0         if( pCheck->zErrMsg ){
3174 0           char *zOld = pCheck->zErrMsg;
3175 0           pCheck->zErrMsg = 0;
3176 0           sqliteSetString(&pCheck->zErrMsg, zOld, "\n", zMsg1, zMsg2, (char*)0);
3177 0           sqliteFree(zOld);
3178             }else{
3179 0           sqliteSetString(&pCheck->zErrMsg, zMsg1, zMsg2, (char*)0);
3180             }
3181 0           }
3182              
3183             /*
3184             ** Add 1 to the reference count for page iPage. If this is the second
3185             ** reference to the page, add an error message to pCheck->zErrMsg.
3186             ** Return 1 if there are 2 ore more references to the page and 0 if
3187             ** if this is the first reference to the page.
3188             **
3189             ** Also check that the page number is in bounds.
3190             */
3191 0           static int checkRef(IntegrityCk *pCheck, int iPage, char *zContext){
3192 0 0         if( iPage==0 ) return 1;
3193 0 0         if( iPage>pCheck->nPage || iPage<0 ){
    0          
3194             char zBuf[100];
3195 0           sprintf(zBuf, "invalid page number %d", iPage);
3196 0           checkAppendMsg(pCheck, zContext, zBuf);
3197 0           return 1;
3198             }
3199 0 0         if( pCheck->anRef[iPage]==1 ){
3200             char zBuf[100];
3201 0           sprintf(zBuf, "2nd reference to page %d", iPage);
3202 0           checkAppendMsg(pCheck, zContext, zBuf);
3203 0           return 1;
3204             }
3205 0           return (pCheck->anRef[iPage]++)>1;
3206             }
3207              
3208             /*
3209             ** Check the integrity of the freelist or of an overflow page list.
3210             ** Verify that the number of pages on the list is N.
3211             */
3212 0           static void checkList(
3213             IntegrityCk *pCheck, /* Integrity checking context */
3214             int isFreeList, /* True for a freelist. False for overflow page list */
3215             int iPage, /* Page number for first page in the list */
3216             int N, /* Expected number of pages in the list */
3217             char *zContext /* Context for error messages */
3218             ){
3219             int i;
3220             char zMsg[100];
3221 0 0         while( N-- > 0 ){
3222             OverflowPage *pOvfl;
3223 0 0         if( iPage<1 ){
3224 0           sprintf(zMsg, "%d pages missing from overflow list", N+1);
3225 0           checkAppendMsg(pCheck, zContext, zMsg);
3226 0           break;
3227             }
3228 0 0         if( checkRef(pCheck, iPage, zContext) ) break;
3229 0 0         if( sqlitepager_get(pCheck->pPager, (Pgno)iPage, (void**)&pOvfl) ){
3230 0           sprintf(zMsg, "failed to get page %d", iPage);
3231 0           checkAppendMsg(pCheck, zContext, zMsg);
3232 0           break;
3233             }
3234 0 0         if( isFreeList ){
3235 0           FreelistInfo *pInfo = (FreelistInfo*)pOvfl->aPayload;
3236 0 0         int n = SWAB32(pCheck->pBt, pInfo->nFree);
3237 0 0         for(i=0; i
3238 0 0         checkRef(pCheck, SWAB32(pCheck->pBt, pInfo->aFree[i]), zContext);
3239             }
3240 0           N -= n;
3241             }
3242 0 0         iPage = SWAB32(pCheck->pBt, pOvfl->iNext);
3243 0           sqlitepager_unref(pOvfl);
3244             }
3245 0           }
3246              
3247             /*
3248             ** Return negative if zKey1
3249             ** Return zero if zKey1==zKey2.
3250             ** Return positive if zKey1>zKey2.
3251             */
3252 0           static int keyCompare(
3253             const char *zKey1, int nKey1,
3254             const char *zKey2, int nKey2
3255             ){
3256 0           int min = nKey1>nKey2 ? nKey2 : nKey1;
3257 0           int c = memcmp(zKey1, zKey2, min);
3258 0 0         if( c==0 ){
3259 0           c = nKey1 - nKey2;
3260             }
3261 0           return c;
3262             }
3263              
3264             /*
3265             ** Do various sanity checks on a single page of a tree. Return
3266             ** the tree depth. Root pages return 0. Parents of root pages
3267             ** return 1, and so forth.
3268             **
3269             ** These checks are done:
3270             **
3271             ** 1. Make sure that cells and freeblocks do not overlap
3272             ** but combine to completely cover the page.
3273             ** 2. Make sure cell keys are in order.
3274             ** 3. Make sure no key is less than or equal to zLowerBound.
3275             ** 4. Make sure no key is greater than or equal to zUpperBound.
3276             ** 5. Check the integrity of overflow pages.
3277             ** 6. Recursively call checkTreePage on all children.
3278             ** 7. Verify that the depth of all children is the same.
3279             ** 8. Make sure this page is at least 33% full or else it is
3280             ** the root of the tree.
3281             */
3282 0           static int checkTreePage(
3283             IntegrityCk *pCheck, /* Context for the sanity check */
3284             int iPage, /* Page number of the page to check */
3285             MemPage *pParent, /* Parent page */
3286             char *zParentContext, /* Parent context */
3287             char *zLowerBound, /* All keys should be greater than this, if not NULL */
3288             int nLower, /* Number of characters in zLowerBound */
3289             char *zUpperBound, /* All keys should be less than this, if not NULL */
3290             int nUpper /* Number of characters in zUpperBound */
3291             ){
3292             MemPage *pPage;
3293             int i, rc, depth, d2, pgno;
3294             char *zKey1, *zKey2;
3295             int nKey1, nKey2;
3296             BtCursor cur;
3297             Btree *pBt;
3298             char zMsg[100];
3299             char zContext[100];
3300             char hit[SQLITE_USABLE_SIZE];
3301              
3302             /* Check that the page exists
3303             */
3304 0           cur.pBt = pBt = pCheck->pBt;
3305 0 0         if( iPage==0 ) return 0;
3306 0 0         if( checkRef(pCheck, iPage, zParentContext) ) return 0;
3307 0           sprintf(zContext, "On tree page %d: ", iPage);
3308 0 0         if( (rc = sqlitepager_get(pCheck->pPager, (Pgno)iPage, (void**)&pPage))!=0 ){
3309 0           sprintf(zMsg, "unable to get the page. error code=%d", rc);
3310 0           checkAppendMsg(pCheck, zContext, zMsg);
3311 0           return 0;
3312             }
3313 0 0         if( (rc = initPage(pBt, pPage, (Pgno)iPage, pParent))!=0 ){
3314 0           sprintf(zMsg, "initPage() returns error code %d", rc);
3315 0           checkAppendMsg(pCheck, zContext, zMsg);
3316 0           sqlitepager_unref(pPage);
3317 0           return 0;
3318             }
3319              
3320             /* Check out all the cells.
3321             */
3322 0           depth = 0;
3323 0 0         if( zLowerBound ){
3324 0           zKey1 = sqliteMalloc( nLower+1 );
3325 0           memcpy(zKey1, zLowerBound, nLower);
3326 0           zKey1[nLower] = 0;
3327             }else{
3328 0           zKey1 = 0;
3329             }
3330 0           nKey1 = nLower;
3331 0           cur.pPage = pPage;
3332 0 0         for(i=0; inCell; i++){
3333 0           Cell *pCell = pPage->apCell[i];
3334             int sz;
3335              
3336             /* Check payload overflow pages
3337             */
3338 0 0         nKey2 = NKEY(pBt, pCell->h);
3339 0 0         sz = nKey2 + NDATA(pBt, pCell->h);
3340 0           sprintf(zContext, "On page %d cell %d: ", iPage, i);
3341 0 0         if( sz>MX_LOCAL_PAYLOAD ){
3342 0           int nPage = (sz - MX_LOCAL_PAYLOAD + OVERFLOW_SIZE - 1)/OVERFLOW_SIZE;
3343 0 0         checkList(pCheck, 0, SWAB32(pBt, pCell->ovfl), nPage, zContext);
3344             }
3345              
3346             /* Check that keys are in the right order
3347             */
3348 0           cur.idx = i;
3349 0           zKey2 = sqliteMallocRaw( nKey2+1 );
3350 0           getPayload(&cur, 0, nKey2, zKey2);
3351 0 0         if( zKey1 && keyCompare(zKey1, nKey1, zKey2, nKey2)>=0 ){
    0          
3352 0           checkAppendMsg(pCheck, zContext, "Key is out of order");
3353             }
3354              
3355             /* Check sanity of left child page.
3356             */
3357 0 0         pgno = SWAB32(pBt, pCell->h.leftChild);
3358 0           d2 = checkTreePage(pCheck, pgno, pPage, zContext, zKey1,nKey1,zKey2,nKey2);
3359 0 0         if( i>0 && d2!=depth ){
    0          
3360 0           checkAppendMsg(pCheck, zContext, "Child page depth differs");
3361             }
3362 0           depth = d2;
3363 0           sqliteFree(zKey1);
3364 0           zKey1 = zKey2;
3365 0           nKey1 = nKey2;
3366             }
3367 0 0         pgno = SWAB32(pBt, pPage->u.hdr.rightChild);
3368 0           sprintf(zContext, "On page %d at right child: ", iPage);
3369 0           checkTreePage(pCheck, pgno, pPage, zContext, zKey1,nKey1,zUpperBound,nUpper);
3370 0           sqliteFree(zKey1);
3371            
3372             /* Check for complete coverage of the page
3373             */
3374 0           memset(hit, 0, sizeof(hit));
3375 0           memset(hit, 1, sizeof(PageHdr));
3376 0 0         for(i=SWAB16(pBt, pPage->u.hdr.firstCell); i>0 && i
    0          
    0          
3377 0           Cell *pCell = (Cell*)&pPage->u.aDisk[i];
3378             int j;
3379 0 0         for(j=i+cellSize(pBt, pCell)-1; j>=i; j--) hit[j]++;
3380 0 0         i = SWAB16(pBt, pCell->h.iNext);
3381             }
3382 0 0         for(i=SWAB16(pBt,pPage->u.hdr.firstFree); i>0 && i
    0          
    0          
3383 0           FreeBlk *pFBlk = (FreeBlk*)&pPage->u.aDisk[i];
3384             int j;
3385 0 0         for(j=i+SWAB16(pBt,pFBlk->iSize)-1; j>=i; j--) hit[j]++;
    0          
3386 0 0         i = SWAB16(pBt,pFBlk->iNext);
3387             }
3388 0 0         for(i=0; i
3389 0 0         if( hit[i]==0 ){
3390 0           sprintf(zMsg, "Unused space at byte %d of page %d", i, iPage);
3391 0           checkAppendMsg(pCheck, zMsg, 0);
3392 0           break;
3393 0 0         }else if( hit[i]>1 ){
3394 0           sprintf(zMsg, "Multiple uses for byte %d of page %d", i, iPage);
3395 0           checkAppendMsg(pCheck, zMsg, 0);
3396 0           break;
3397             }
3398             }
3399              
3400             /* Check that free space is kept to a minimum
3401             */
3402             #if 0
3403             if( pParent && pParent->nCell>2 && pPage->nFree>3*SQLITE_USABLE_SIZE/4 ){
3404             sprintf(zMsg, "free space (%d) greater than max (%d)", pPage->nFree,
3405             SQLITE_USABLE_SIZE/3);
3406             checkAppendMsg(pCheck, zContext, zMsg);
3407             }
3408             #endif
3409              
3410 0           sqlitepager_unref(pPage);
3411 0           return depth;
3412             }
3413              
3414             /*
3415             ** This routine does a complete check of the given BTree file. aRoot[] is
3416             ** an array of pages numbers were each page number is the root page of
3417             ** a table. nRoot is the number of entries in aRoot.
3418             **
3419             ** If everything checks out, this routine returns NULL. If something is
3420             ** amiss, an error message is written into memory obtained from malloc()
3421             ** and a pointer to that error message is returned. The calling function
3422             ** is responsible for freeing the error message when it is done.
3423             */
3424 0           char *fileBtreeIntegrityCheck(Btree *pBt, int *aRoot, int nRoot){
3425             int i;
3426             int nRef;
3427             IntegrityCk sCheck;
3428              
3429 0           nRef = *sqlitepager_stats(pBt->pPager);
3430 0 0         if( lockBtree(pBt)!=SQLITE_OK ){
3431 0           return sqliteStrDup("Unable to acquire a read lock on the database");
3432             }
3433 0           sCheck.pBt = pBt;
3434 0           sCheck.pPager = pBt->pPager;
3435 0           sCheck.nPage = sqlitepager_pagecount(sCheck.pPager);
3436 0 0         if( sCheck.nPage==0 ){
3437 0           unlockBtreeIfUnused(pBt);
3438 0           return 0;
3439             }
3440 0           sCheck.anRef = sqliteMallocRaw( (sCheck.nPage+1)*sizeof(sCheck.anRef[0]) );
3441 0           sCheck.anRef[1] = 1;
3442 0 0         for(i=2; i<=sCheck.nPage; i++){ sCheck.anRef[i] = 0; }
3443 0           sCheck.zErrMsg = 0;
3444              
3445             /* Check the integrity of the freelist
3446             */
3447 0 0         checkList(&sCheck, 1, SWAB32(pBt, pBt->page1->freeList),
    0          
3448 0           SWAB32(pBt, pBt->page1->nFree), "Main freelist: ");
3449              
3450             /* Check all the tables.
3451             */
3452 0 0         for(i=0; i
3453 0 0         if( aRoot[i]==0 ) continue;
3454 0           checkTreePage(&sCheck, aRoot[i], 0, "List of tree roots: ", 0,0,0,0);
3455             }
3456              
3457             /* Make sure every page in the file is referenced
3458             */
3459 0 0         for(i=1; i<=sCheck.nPage; i++){
3460 0 0         if( sCheck.anRef[i]==0 ){
3461             char zBuf[100];
3462 0           sprintf(zBuf, "Page %d is never used", i);
3463 0           checkAppendMsg(&sCheck, zBuf, 0);
3464             }
3465             }
3466              
3467             /* Make sure this analysis did not leave any unref() pages
3468             */
3469 0           unlockBtreeIfUnused(pBt);
3470 0 0         if( nRef != *sqlitepager_stats(pBt->pPager) ){
3471             char zBuf[100];
3472 0           sprintf(zBuf,
3473             "Outstanding page count goes from %d to %d during this analysis",
3474 0           nRef, *sqlitepager_stats(pBt->pPager)
3475             );
3476 0           checkAppendMsg(&sCheck, zBuf, 0);
3477             }
3478              
3479             /* Clean up and report errors.
3480             */
3481 0           sqliteFree(sCheck.anRef);
3482 0           return sCheck.zErrMsg;
3483             }
3484              
3485             /*
3486             ** Return the full pathname of the underlying database file.
3487             */
3488 0           static const char *fileBtreeGetFilename(Btree *pBt){
3489             assert( pBt->pPager!=0 );
3490 0           return sqlitepager_filename(pBt->pPager);
3491             }
3492              
3493             /*
3494             ** Copy the complete content of pBtFrom into pBtTo. A transaction
3495             ** must be active for both files.
3496             **
3497             ** The size of file pBtFrom may be reduced by this operation.
3498             ** If anything goes wrong, the transaction on pBtFrom is rolled back.
3499             */
3500 0           static int fileBtreeCopyFile(Btree *pBtTo, Btree *pBtFrom){
3501 0           int rc = SQLITE_OK;
3502             Pgno i, nPage, nToPage;
3503              
3504 0 0         if( !pBtTo->inTrans || !pBtFrom->inTrans ) return SQLITE_ERROR;
    0          
3505 0 0         if( pBtTo->needSwab!=pBtFrom->needSwab ) return SQLITE_ERROR;
3506 0 0         if( pBtTo->pCursor ) return SQLITE_BUSY;
3507 0           memcpy(pBtTo->page1, pBtFrom->page1, SQLITE_USABLE_SIZE);
3508 0           rc = sqlitepager_overwrite(pBtTo->pPager, 1, pBtFrom->page1);
3509 0           nToPage = sqlitepager_pagecount(pBtTo->pPager);
3510 0           nPage = sqlitepager_pagecount(pBtFrom->pPager);
3511 0 0         for(i=2; rc==SQLITE_OK && i<=nPage; i++){
    0          
3512             void *pPage;
3513 0           rc = sqlitepager_get(pBtFrom->pPager, i, &pPage);
3514 0 0         if( rc ) break;
3515 0           rc = sqlitepager_overwrite(pBtTo->pPager, i, pPage);
3516 0 0         if( rc ) break;
3517 0           sqlitepager_unref(pPage);
3518             }
3519 0 0         for(i=nPage+1; rc==SQLITE_OK && i<=nToPage; i++){
    0          
3520             void *pPage;
3521 0           rc = sqlitepager_get(pBtTo->pPager, i, &pPage);
3522 0 0         if( rc ) break;
3523 0           rc = sqlitepager_write(pPage);
3524 0           sqlitepager_unref(pPage);
3525 0           sqlitepager_dont_write(pBtTo->pPager, i);
3526             }
3527 0 0         if( !rc && nPage
    0          
3528 0           rc = sqlitepager_truncate(pBtTo->pPager, nPage);
3529             }
3530 0 0         if( rc ){
3531 0           fileBtreeRollback(pBtTo);
3532             }
3533 0           return rc;
3534             }
3535              
3536             /*
3537             ** The following tables contain pointers to all of the interface
3538             ** routines for this implementation of the B*Tree backend. To
3539             ** substitute a different implemention of the backend, one has merely
3540             ** to provide pointers to alternative functions in similar tables.
3541             */
3542             static BtOps sqliteBtreeOps = {
3543             fileBtreeClose,
3544             fileBtreeSetCacheSize,
3545             fileBtreeSetSafetyLevel,
3546             fileBtreeBeginTrans,
3547             fileBtreeCommit,
3548             fileBtreeRollback,
3549             fileBtreeBeginCkpt,
3550             fileBtreeCommitCkpt,
3551             fileBtreeRollbackCkpt,
3552             fileBtreeCreateTable,
3553             fileBtreeCreateTable, /* Really sqliteBtreeCreateIndex() */
3554             fileBtreeDropTable,
3555             fileBtreeClearTable,
3556             fileBtreeCursor,
3557             fileBtreeGetMeta,
3558             fileBtreeUpdateMeta,
3559             fileBtreeIntegrityCheck,
3560             fileBtreeGetFilename,
3561             fileBtreeCopyFile,
3562             fileBtreePager,
3563             #ifdef SQLITE_TEST
3564             fileBtreePageDump,
3565             #endif
3566             };
3567             static BtCursorOps sqliteBtreeCursorOps = {
3568             fileBtreeMoveto,
3569             fileBtreeDelete,
3570             fileBtreeInsert,
3571             fileBtreeFirst,
3572             fileBtreeLast,
3573             fileBtreeNext,
3574             fileBtreePrevious,
3575             fileBtreeKeySize,
3576             fileBtreeKey,
3577             fileBtreeKeyCompare,
3578             fileBtreeDataSize,
3579             fileBtreeData,
3580             fileBtreeCloseCursor,
3581             #ifdef SQLITE_TEST
3582             fileBtreeCursorDump,
3583             #endif
3584             };