Bug fixes for GiST crash recovery.
authorTeodor Sigaev
Thu, 30 Jun 2005 17:52:14 +0000 (17:52 +0000)
committerTeodor Sigaev
Thu, 30 Jun 2005 17:52:14 +0000 (17:52 +0000)
- add forgotten check of lsn for insert completion
- remove level of pages: hard to check in recovery
- some cleanups

src/backend/access/gist/gist.c
src/backend/access/gist/gistutil.c
src/backend/access/gist/gistvacuum.c
src/backend/access/gist/gistxlog.c
src/include/access/gist.h
src/include/access/gist_private.h

index 2e752252537d05b8c119568e0deba1b753749950..5ce3fceba6b8c135fe63d46656ad85fd83245c54 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/access/gist/gist.c,v 1.124 2005/06/29 14:06:14 teodor Exp $
+ *   $PostgreSQL: pgsql/src/backend/access/gist/gist.c,v 1.125 2005/06/30 17:52:13 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -587,7 +587,7 @@ gistfindleaf(GISTInsertState *state, GISTSTATE *giststate)
  * Should have the same interface as XLogReadBuffer
  */
 static Buffer
-gistReadAndLockBuffer( bool unused, Relation r, BlockNumber blkno ) {
+gistReadAndLockBuffer( Relation r, BlockNumber blkno ) {
    Buffer  buffer = ReadBuffer( r, blkno );
    LockBuffer( buffer, GIST_SHARE );
    return buffer;  
@@ -601,7 +601,7 @@ gistReadAndLockBuffer( bool unused, Relation r, BlockNumber blkno ) {
  * returns from the begining of closest parent; 
  */
 GISTInsertStack*
-gistFindPath( Relation r, BlockNumber child, Buffer  (*myReadBuffer)(bool, Relation, BlockNumber) ) {
+gistFindPath( Relation r, BlockNumber child, Buffer  (*myReadBuffer)(Relation, BlockNumber) ) {
    Page    page;
    Buffer  buffer;
    OffsetNumber i, maxoff;
@@ -614,9 +614,15 @@ gistFindPath( Relation r, BlockNumber child, Buffer  (*myReadBuffer)(bool, Relat
    top->blkno = GIST_ROOT_BLKNO;
 
    while( top && top->blkno != child ) {
-       buffer = myReadBuffer(false, r, top->blkno); /* buffer locked */
+       buffer = myReadBuffer(r, top->blkno); /* buffer locked */
        page = (Page)BufferGetPage( buffer );
-       Assert( !GistPageIsLeaf(page) );    
+
+       if ( GistPageIsLeaf(page) ) {
+           /* we can safety go away, follows only leaf pages */
+           LockBuffer( buffer, GIST_UNLOCK );
+           ReleaseBuffer( buffer );
+           return NULL;
+       }
 
        top->lsn = PageGetLSN(page);    
 
@@ -662,7 +668,7 @@ gistFindPath( Relation r, BlockNumber child, Buffer  (*myReadBuffer)(bool, Relat
                LockBuffer( buffer, GIST_UNLOCK );
                ReleaseBuffer( buffer );
                return top;
-           } else if ( GistPageGetOpaque(page)->level> 0 ) {
+           } else  {
                /* Install next inner page to the end of stack */
                ptr = (GISTInsertStack*)palloc0( sizeof(GISTInsertStack) ); 
                ptr->blkno = blkno;
@@ -855,11 +861,9 @@ gistSplit(Relation r,
    OffsetNumber    *realoffset;
    IndexTuple  *cleaneditup = itup;
    int lencleaneditup = *len;
-   int level;
 
    p = (Page) BufferGetPage(buffer);
    opaque = GistPageGetOpaque(p);
-   level = opaque->level;
 
    /*
     * The root of the tree is the first block in the relation.  If we're
@@ -872,7 +876,6 @@ gistSplit(Relation r,
        GISTInitBuffer(leftbuf, opaque->flags&F_LEAF);
        lbknum = BufferGetBlockNumber(leftbuf);
        left = (Page) BufferGetPage(leftbuf);
-       GistPageGetOpaque(left)->level = level;
    }
    else
    {
@@ -886,7 +889,6 @@ gistSplit(Relation r,
    GISTInitBuffer(rightbuf, opaque->flags&F_LEAF);
    rbknum = BufferGetBlockNumber(rightbuf);
    right = (Page) BufferGetPage(rightbuf);
-   GistPageGetOpaque(right)->level = level;
 
    /* generate the item array */
    realoffset = palloc((*len + 1) * sizeof(OffsetNumber));
@@ -1068,13 +1070,10 @@ void
 gistnewroot(Relation r, Buffer buffer, IndexTuple *itup, int len, ItemPointer key)
 {
    Page        page;
-   int     level;
 
    Assert( BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO );
    page = BufferGetPage(buffer);
-   level = GistPageGetOpaque(page)->level;
    GISTInitBuffer(buffer, 0);
-   GistPageGetOpaque(page)->level = level+1;
 
    gistfillbuffer(r, page, itup, len, FirstOffsetNumber);
    if ( !r->rd_istemp ) {
index 031914a37c4778a951c6f322f22d44e784a1f934..5b6d13a7a37d292ac17fb2baa7ea9ae145c7f877 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *          $PostgreSQL: pgsql/src/backend/access/gist/gistutil.c,v 1.4 2005/06/28 15:51:00 teodor Exp $
+ *          $PostgreSQL: pgsql/src/backend/access/gist/gistutil.c,v 1.5 2005/06/30 17:52:14 teodor Exp $
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"
@@ -809,8 +809,6 @@ GISTInitBuffer(Buffer b, uint32 f)
 
    opaque = GistPageGetOpaque(page);
    opaque->flags = f;
-   opaque->nsplited = 0;
-   opaque->level = 0;
    opaque->rightlink = InvalidBlockNumber;
    memset( &(opaque->nsn), 0, sizeof(GistNSN) );
 }
index cf6d89d27b123c0369f853d1c7fb5181ba911c79..381cf98559005c0a814c87f4e2e67b05aa789dad 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/access/gist/gistvacuum.c,v 1.5 2005/06/29 14:06:14 teodor Exp $
+ *   $PostgreSQL: pgsql/src/backend/access/gist/gistvacuum.c,v 1.6 2005/06/30 17:52:14 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -60,7 +60,6 @@ gistVacuumUpdate( GistVacuum *gv, BlockNumber blkno, bool needunion ) {
    page = (Page) BufferGetPage(buffer);
    maxoff = PageGetMaxOffsetNumber(page);
 
-
    if ( GistPageIsLeaf(page) ) {
        if ( GistTuplesDeleted(page) ) {
            needunion = needwrite = true;
index 30fd5b71eebee7c77e7ff93cf3f733e979771849..15acb18c80d9a83ce745bfca3f56ced6bfc05cde 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *           $PostgreSQL: pgsql/src/backend/access/gist/gistxlog.c,v 1.5 2005/06/28 15:51:00 teodor Exp $
+ *           $PostgreSQL: pgsql/src/backend/access/gist/gistxlog.c,v 1.6 2005/06/30 17:52:14 teodor Exp $
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"
@@ -44,6 +44,7 @@ typedef struct {
 
 typedef struct gistIncompleteInsert {
    RelFileNode node;
+   BlockNumber origblkno; /* for splits */
    ItemPointerData key;
    int     lenblk;
    BlockNumber *blkno;
@@ -79,6 +80,7 @@ pushIncompleteInsert(RelFileNode node, XLogRecPtr lsn, ItemPointerData key,
        ninsert->lenblk = lenblk;
        ninsert->blkno = (BlockNumber*)palloc( sizeof(BlockNumber)*ninsert->lenblk );
        memcpy(ninsert->blkno, blkno, sizeof(BlockNumber)*ninsert->lenblk);
+       ninsert->origblkno = *blkno;
    } else {
        int i;
 
@@ -87,6 +89,7 @@ pushIncompleteInsert(RelFileNode node, XLogRecPtr lsn, ItemPointerData key,
        ninsert->blkno = (BlockNumber*)palloc( sizeof(BlockNumber)*ninsert->lenblk );
        for(i=0;ilenblk;i++)
            ninsert->blkno[i] = xlinfo->page[i].header->blkno;
+       ninsert->origblkno = xlinfo->data->origblkno;
    }
    Assert( ninsert->lenblk>0 );
    
@@ -209,6 +212,7 @@ gistRedoEntryUpdateRecord(XLogRecPtr lsn, XLogRecord *record, bool isnewroot) {
 
    PageSetLSN(page, lsn);
    PageSetTLI(page, ThisTimeLineID);
+   GistPageGetOpaque(page)->rightlink = InvalidBlockNumber;
    LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
    WriteBuffer(buffer);
 
@@ -466,81 +470,98 @@ gist_form_invalid_tuple(BlockNumber blkno) {
    return tuple;
 }
 
+static Buffer
+gistXLogReadAndLockBuffer( Relation r, BlockNumber blkno ) {
+   Buffer  buffer = XLogReadBuffer( false, r, blkno );
+   if (!BufferIsValid(buffer))
+       elog(PANIC, "gistXLogReadAndLockBuffer: block %u unfound", blkno);
+   if ( PageIsNew( (PageHeader)(BufferGetPage(buffer)) ) )
+       elog(PANIC, "gistXLogReadAndLockBuffer: uninitialized page %u", blkno);
+   
+   return buffer;
+}
+
+
 static void
 gixtxlogFindPath( Relation index, gistIncompleteInsert *insert ) {
-   int i;
    GISTInsertStack *top;
    
    insert->pathlen = 0;
    insert->path = NULL;
 
-   for(i=0;insert->lenblk;i++) {
-       if ( (top=gistFindPath(index, insert->blkno[i], XLogReadBuffer)) != NULL ) {
-           GISTInsertStack *ptr=top;
-           while(ptr) {
-               insert->pathlen++;
-               ptr = ptr->parent;
-           }
+   if ( (top=gistFindPath(index, insert->origblkno, gistXLogReadAndLockBuffer)) != NULL ) {
+       int i;
+       GISTInsertStack *ptr=top;
+       while(ptr) {
+           insert->pathlen++;
+           ptr = ptr->parent;
+       }
 
-           insert->path=(BlockNumber*)palloc( sizeof(BlockNumber) * insert->pathlen );
+       insert->path=(BlockNumber*)palloc( sizeof(BlockNumber) * insert->pathlen );
 
-           i=0;
-           ptr = top;
-           while(ptr) {
-               insert->path[i] = ptr->blkno;
-               i++;
-               ptr = ptr->parent;
-           }
-           break;
+       i=0;
+       ptr = top;
+       while(ptr) {
+           insert->path[i] = ptr->blkno;
+           i++;
+           ptr = ptr->parent;
        }
-   }
+   } else
+       elog(LOG, "gixtxlogFindPath: lost parent for block %u", insert->origblkno);
 }
 
 static void
 gistContinueInsert(gistIncompleteInsert *insert) {
    IndexTuple   *itup;
    int i, lenitup;
-   MemoryContext oldCxt;
    Relation index;
 
-   oldCxt = MemoryContextSwitchTo(opCtx);
-   
    index = XLogOpenRelation(insert->node);
-   if (!RelationIsValid(index))
+   if (!RelationIsValid(index)) 
        return;
 
-   elog(LOG,"Detected incomplete insert into GiST index %u/%u/%u; It's desirable to vacuum or reindex index",
-        insert->node.spcNode, insert->node.dbNode, insert->node.relNode);
-
    /* needed vector itup never will be more than initial lenblkno+2, 
            because during this processing Indextuple can be only smaller */ 
    lenitup = insert->lenblk;   
    itup = (IndexTuple*)palloc(sizeof(IndexTuple)*(lenitup+2 /*guarantee root split*/));
 
-   for(i=0;ilenblk;i++) 
+   for(i=0;ilenblk;i++)
        itup[i] = gist_form_invalid_tuple( insert->blkno[i] );
 
-   /* construct path */
-   gixtxlogFindPath( index, insert );
-
-   if ( insert->pathlen==0 ) {
-       /*it  was split root, so we should only make new root*/
+   if ( insert->origblkno==GIST_ROOT_BLKNO ) {
+       /*it  was split root, so we should only make new root.
+         it can't be simple insert into root, look at call 
+         pushIncompleteInsert in gistRedoPageSplitRecord */ 
            Buffer buffer = XLogReadBuffer(true, index, GIST_ROOT_BLKNO);
            Page   page;
 
        if (!BufferIsValid(buffer))
            elog(PANIC, "gistContinueInsert: root block unfound");
 
+           page = BufferGetPage(buffer);
+       if (XLByteLE(insert->lsn, PageGetLSN(page))) {
+           LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+           ReleaseBuffer(buffer);
+           return;
+       }
+
            GISTInitBuffer(buffer, 0);
            page = BufferGetPage(buffer);
            gistfillbuffer(index, page, itup, lenitup, FirstOffsetNumber);
+       PageSetLSN(page, insert->lsn);
+       PageSetTLI(page, ThisTimeLineID);
        LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
            WriteBuffer(buffer);
    } else {
        Buffer  *buffers;
        Page    *pages;
        int numbuffer;
-       
+
+       /* construct path */
+       gixtxlogFindPath( index, insert );
+
+       Assert( insert->pathlen > 0 );
+
        buffers= (Buffer*) palloc( sizeof(Buffer) * (insert->lenblk+2/*guarantee root split*/) );
        pages  = (Page*)   palloc( sizeof(Page  ) * (insert->lenblk+2/*guarantee root split*/) );
 
@@ -555,6 +576,12 @@ gistContinueInsert(gistIncompleteInsert *insert) {
            if ( PageIsNew((PageHeader)(pages[numbuffer-1])) )
                elog(PANIC, "gistContinueInsert: uninitialized page");
 
+           if (XLByteLE(insert->lsn, PageGetLSN(pages[numbuffer-1]))) {
+               LockBuffer(buffers[numbuffer-1], BUFFER_LOCK_UNLOCK);
+               ReleaseBuffer(buffers[numbuffer-1]);
+               return;
+           }
+
            pituplen = PageGetMaxOffsetNumber(pages[numbuffer-1]);
            
            /* remove old IndexTuples */
@@ -587,9 +614,10 @@ gistContinueInsert(gistIncompleteInsert *insert) {
                if ( BufferGetBlockNumber( buffers[0] ) == GIST_ROOT_BLKNO ) {
                    IndexTuple *parentitup;
 
+                   /* we split root, just copy tuples from old root to new page */
                    parentitup = gistextractbuffer(buffers[numbuffer-1], &pituplen);
 
-                   /* we split root, just copy tuples from old root to new page */
+                   /* sanity check */
                    if ( i+1 != insert->pathlen )
                        elog(PANIC,"gistContinueInsert: can't restore index '%s'",
                            RelationGetRelationName( index ));
@@ -624,14 +652,15 @@ gistContinueInsert(gistIncompleteInsert *insert) {
                itup[j]=gist_form_invalid_tuple( BufferGetBlockNumber( buffers[j] ) );
                PageSetLSN(pages[j], insert->lsn);
                PageSetTLI(pages[j], ThisTimeLineID);
+               GistPageGetOpaque(pages[j])->rightlink = InvalidBlockNumber;
                LockBuffer(buffers[j], BUFFER_LOCK_UNLOCK);
                WriteBuffer( buffers[j] );
            }
        }
    }
 
-   MemoryContextSwitchTo(oldCxt);
-   MemoryContextReset(opCtx);
+   elog(LOG,"Detected incomplete insert into GiST index %u/%u/%u; It's desirable to vacuum or reindex index",
+        insert->node.spcNode, insert->node.dbNode, insert->node.relNode);
 }
 
 void
@@ -648,11 +677,22 @@ gist_xlog_startup(void) {
 void
 gist_xlog_cleanup(void) {
    ListCell   *l;
+   List *reverse=NIL;
+   MemoryContext oldCxt = MemoryContextSwitchTo(insertCtx);
 
-   foreach(l, incomplete_inserts) {
+   /* we should call gistContinueInsert in reverse order */
+
+   foreach(l, incomplete_inserts) 
+       reverse = lappend(reverse, lfirst(l));
+
+   MemoryContextSwitchTo(opCtx);
+   foreach(l, reverse) {
        gistIncompleteInsert    *insert = (gistIncompleteInsert*) lfirst(l);
        gistContinueInsert(insert);
+       MemoryContextReset(opCtx);
    }
+   MemoryContextSwitchTo(oldCxt);
+
    MemoryContextDelete(opCtx);
    MemoryContextDelete(insertCtx); 
 }
index ee060e83c2bfb7f636338987dd04252fe17f2f20..44fe84ee38b996c9e14c71c6d30fb64d75c165d8 100644 (file)
@@ -9,7 +9,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/access/gist.h,v 1.48 2005/06/27 12:45:22 teodor Exp $
+ * $PostgreSQL: pgsql/src/include/access/gist.h,v 1.49 2005/06/30 17:52:14 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -45,13 +45,7 @@ typedef XLogRecPtr GistNSN;
 
 typedef struct GISTPageOpaqueData
 {
-   uint8       flags;
-
-   /* number page to which current one is splitted in last split */
-   uint8       nsplited;
-
-   /* level of page, 0 - leaf */
-   uint16      level;
+   uint32      flags; /* 29 bits are unused for now */ 
    BlockNumber rightlink;
 
    /* the only meaning - change this value if
index 6ea4dccb688ac100cebe6b4d6ee61cc9b2a5268c..a14df2e37774153f04e061d2288e17c5fb115b0c 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/access/gist_private.h,v 1.6 2005/06/27 12:45:22 teodor Exp $
+ * $PostgreSQL: pgsql/src/include/access/gist_private.h,v 1.7 2005/06/30 17:52:14 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -234,7 +234,7 @@ extern IndexTuple * gistSplit(Relation r, Buffer buffer, IndexTuple *itup,
                   int *len, SplitedPageLayout    **dist, GISTSTATE *giststate);
 
 extern GISTInsertStack* gistFindPath( Relation r, BlockNumber child, 
-   Buffer  (*myReadBuffer)(bool, Relation, BlockNumber) );
+   Buffer  (*myReadBuffer)(Relation, BlockNumber) );
 /* gistxlog.c */
 extern void gist_redo(XLogRecPtr lsn, XLogRecord *record);
 extern void gist_desc(char *buf, uint8 xl_info, char *rec);