Couple additional functions to fix tree at runtime.
authorVadim B. Mikheev
Wed, 31 Jan 2001 01:08:36 +0000 (01:08 +0000)
committerVadim B. Mikheev
Wed, 31 Jan 2001 01:08:36 +0000 (01:08 +0000)
Need in one more function to handle "my bits moved..."
case. FixBTree is still FALSE.

src/backend/access/nbtree/nbtinsert.c

index 76d2d9ff86921ef80c481db7841a1f33dedf3a2e..b44b7839dea98c05ee2065e6fbab756e6a6aa749 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.78 2001/01/29 07:28:16 vadim Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.79 2001/01/31 01:08:36 vadim Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -37,7 +37,9 @@ typedef struct
 extern bool FixBTree;
 
 Buffer _bt_fixroot(Relation rel, Buffer oldrootbuf, bool release);
-static void _bt_fixtree(Relation rel, BlockNumber blkno, BTStack stack);
+static void _bt_fixtree(Relation rel, BlockNumber blkno);
+static BlockNumber _bt_fixlevel(Relation rel, Buffer buf, BlockNumber limit);
+static OffsetNumber _bt_getoff(Page page, BlockNumber blkno);
 
 static Buffer _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf);
 
@@ -512,7 +514,7 @@ _bt_insertonpg(Relation rel,
                        {
                            blkno = lpageop->btpo_parent;
                            _bt_relbuf(rel, buf, BT_WRITE);
-                           _bt_fixtree(rel, blkno, NULL);
+                           _bt_fixtree(rel, blkno);
                            goto formres;
                        }
                    }
@@ -561,6 +563,10 @@ _bt_insertonpg(Relation rel,
 
            pbuf = _bt_getstackbuf(rel, stack);
 
+           if (pbuf == InvalidBuffer)
+               elog(ERROR, "_bt_getstackbuf: my bits moved right off the end of the world!"
+                    "\n\tRecreate index %s.", RelationGetRelationName(rel));
+
            /* Now we can write and unlock the children */
            _bt_wrtbuf(rel, rbuf);
            _bt_wrtbuf(rel, buf);
@@ -1172,8 +1178,10 @@ _bt_getstackbuf(Relation rel, BTStack stack)
        }
        /* by here, the item we're looking for moved right at least one page */
        if (P_RIGHTMOST(opaque))
-           elog(FATAL, "_bt_getstackbuf: my bits moved right off the end of the world!"
-                "\n\tRecreate index %s.", RelationGetRelationName(rel));
+       {
+           _bt_relbuf(rel, buf, BT_WRITE);
+           return(InvalidBuffer);
+       }
 
        blkno = opaque->btpo_next;
        _bt_relbuf(rel, buf, BT_WRITE);
@@ -1450,6 +1458,7 @@ _bt_fixroot(Relation rel, Buffer oldrootbuf, bool release)
 
        /* give up left buffer */
        _bt_relbuf(rel, leftbuf, BT_WRITE);
+       pfree(btitem);
        leftbuf = rightbuf;
        leftpage = rightpage;
        leftopaque = rightopaque;
@@ -1477,10 +1486,318 @@ _bt_fixroot(Relation rel, Buffer oldrootbuf, bool release)
    return(rootbuf);
 }
 
+/*
+ * Using blkno of leftmost page on a level inside tree this func
+ * checks/fixes tree from this level up to the root page.
+ */
 static void
-_bt_fixtree(Relation rel, BlockNumber blkno, BTStack stack)
+_bt_fixtree(Relation rel, BlockNumber blkno)
+{
+   Buffer          buf;
+   Page            page;
+   BTPageOpaque    opaque;
+   BlockNumber     pblkno;
+
+   elog(ERROR, "bt_fixtree: unimplemented , yet (need to recreate index)");
+
+   for ( ; ; )
+   {
+       buf = _bt_getbuf(rel, blkno, BT_READ);
+       page = BufferGetPage(buf);
+       opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+       if (! P_LEFTMOST(opaque) || P_ISLEAF(opaque))
+           elog(ERROR, "bt_fixtree: invalid start page (need to recreate index)");
+       pblkno = opaque->btpo_parent;
+
+       /* check/fix entire level */
+       _bt_fixlevel(rel, buf, InvalidBlockNumber);
+
+       /*
+        * No pins/locks are held here. Re-read start page if its
+        * btpo_parent pointed to meta page else go up one level.
+        */
+       if (pblkno == BTREE_METAPAGE)
+       {
+           buf = _bt_getbuf(rel, blkno, BT_WRITE);
+           page = BufferGetPage(buf);
+           opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+           if (P_ISROOT(opaque))
+           {
+               /* Tree is Ok now */
+               _bt_relbuf(rel, buf, BT_WRITE);
+               return;
+           }
+           pblkno = opaque->btpo_parent;
+           /* Call _bt_fixroot() if there is no upper level */
+           if (pblkno == BTREE_METAPAGE)
+           {
+               buf = _bt_fixroot(rel, buf, true);
+               _bt_relbuf(rel, buf, BT_WRITE);
+               return;
+           }
+           /* Have to go up one level */
+           _bt_relbuf(rel, buf, BT_WRITE);
+           blkno = pblkno;
+       }
+   }
+
+}
+
+/*
+ * Check/fix level starting from page in buffer buf up to block
+ * limit on *child* level (or till rightmost child page if limit
+ * is InvalidBlockNumber). Start buffer must be read locked.
+ * No pins/locks are held on exit. Returns block number of last
+ * visited/pointing-to-limit page on *check/fix* level.
+ */
+static BlockNumber
+_bt_fixlevel(Relation rel, Buffer buf, BlockNumber limit)
+{
+   BlockNumber     blkno = BufferGetBlockNumber(buf);
+   BlockNumber     pblkno = blkno;
+   Page            page;
+   BTPageOpaque    opaque;
+   BlockNumber     cblkno[3];
+   OffsetNumber    coff[3];
+   Buffer          cbuf[3];
+   Page            cpage[3];
+   BTPageOpaque    copaque[3];
+   BTItem          btitem;
+   int             cidx, i;
+   bool            goodbye = false;
+   char            tbuf[BLCKSZ];
+
+   page = BufferGetPage(buf);
+   /* copy page to temp storage */
+   memmove(tbuf, page, PageGetPageSize(page));
+   _bt_relbuf(rel, buf, BT_READ);
+
+   page = (Page)tbuf;
+   opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+
+   /* Initialize first child data */
+   coff[0] = P_FIRSTDATAKEY(opaque);
+   if (coff[0] > PageGetMaxOffsetNumber(page))
+       elog(ERROR, "bt_fixlevel: invalid maxoff on start page (need to recreate index)");
+   btitem = (BTItem) PageGetItem(page, PageGetItemId(page, coff[0]));
+   cblkno[0] = ItemPointerGetBlockNumber(&(btitem->bti_itup.t_tid));
+   cbuf[0] = _bt_getbuf(rel, cblkno[0], BT_READ);
+   cpage[0] = BufferGetPage(cbuf[0]);
+   copaque[0] = (BTPageOpaque) PageGetSpecialPointer(cpage[0]);
+   if (P_LEFTMOST(opaque) && ! P_LEFTMOST(copaque[0]))
+       elog(ERROR, "bt_fixtlevel: non-leftmost child page of leftmost parent (need to recreate index)");
+   /* caller should take care and avoid this */
+   if (P_RIGHTMOST(copaque[0]))
+       elog(ERROR, "bt_fixtlevel: invalid start child (need to recreate index)");
+
+   for ( ; ; )
+   {
+       /*
+        * Read up to 2 more child pages and look for pointers
+        * to them in *saved* parent page
+        */
+       coff[1] = coff[2] = InvalidOffsetNumber;
+       for (cidx = 0; cidx < 2; )
+       {
+           cidx++;
+           cblkno[cidx] = (copaque[cidx - 1])->btpo_next;
+           cbuf[cidx] = _bt_getbuf(rel, cblkno[cidx], BT_READ);
+           cpage[cidx] = BufferGetPage(cbuf[cidx]);
+           copaque[cidx] = (BTPageOpaque) PageGetSpecialPointer(cpage[cidx]);
+           coff[cidx] = _bt_getoff(page, cblkno[cidx]);
+
+           /* sanity check */
+           if (coff[cidx] != InvalidOffsetNumber)
+           {
+               for (i = cidx - 1; i >= 0; i--)
+               {
+                   if (coff[i] == InvalidOffsetNumber)
+                       continue;
+                   if (coff[cidx] != coff[i] + 1)
+                       elog(ERROR, "bt_fixlevel: invalid item order(1) (need to recreate index)");
+                   break;
+               }
+           }
+
+           if (P_RIGHTMOST(copaque[cidx]))
+               break;
+       }
+
+       /*
+        * Read parent page and insert missed pointers.
+        */
+       if (coff[1] == InvalidOffsetNumber ||
+           (cidx == 2 && coff[2] == InvalidOffsetNumber))
+       {
+           Buffer          newbuf;
+           Page            newpage;
+           BTPageOpaque    newopaque;
+           BTItem          ritem;
+           Size            itemsz;
+           OffsetNumber    newitemoff;
+           BlockNumber     parblk[3];
+           BTStackData     stack;
+
+           stack.bts_parent = NULL;
+           stack.bts_blkno = pblkno;
+           stack.bts_offset = InvalidOffsetNumber;
+           ItemPointerSet(&(stack.bts_btitem.bti_itup.t_tid),
+                           cblkno[0], P_HIKEY);
+
+           buf = _bt_getstackbuf(rel, &stack);
+           if (buf == InvalidBuffer)
+               elog(ERROR, "bt_fixlevel: pointer disappeared (need to recreate index)");
+
+           page = BufferGetPage(buf);
+           opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+           coff[0] = stack.bts_offset;
+           pblkno = BufferGetBlockNumber(buf);
+           parblk[0] = pblkno;
+           if (cblkno[0] == limit)
+               blkno = pblkno;     /* where we have seen pointer to limit */
+
+           /* Check/insert missed pointers */
+           for (i = 1; i <= cidx; i++)
+           {
+               coff[i] = _bt_getoff(page, cblkno[i]);
+
+               /* sanity check */
+               parblk[i] = BufferGetBlockNumber(buf);
+               if (coff[i] != InvalidOffsetNumber)
+               {
+                   if (parblk[i] == parblk[i - 1] &&
+                               coff[i] != coff[i - 1] + 1)
+                       elog(ERROR, "bt_fixlevel: invalid item order(2) (need to recreate index)");
+                   if (cblkno[i] == limit)
+                       blkno = parblk[i];
+                   continue;
+               }
+               /* Have to check next page ? */
+               if ((! P_RIGHTMOST(opaque)) &&
+                   coff[i - 1] == PageGetMaxOffsetNumber(page))    /* yes */
+               {
+                   newbuf = _bt_getbuf(rel, opaque->btpo_next, BT_WRITE);
+                   newpage = BufferGetPage(newbuf);
+                   newopaque = (BTPageOpaque) PageGetSpecialPointer(newpage);
+                   coff[i] = _bt_getoff(newpage, cblkno[i]);
+                   if (coff[i] != InvalidOffsetNumber) /* found ! */
+                   {
+                       if (coff[i] != P_FIRSTDATAKEY(newopaque))
+                           elog(ERROR, "bt_fixlevel: invalid item order(3) (need to recreate index)");
+                       _bt_relbuf(rel, buf, BT_WRITE);
+                       buf = newbuf;
+                       page = newpage;
+                       opaque = newopaque;
+                       pblkno = BufferGetBlockNumber(buf);
+                       parblk[i] = pblkno;
+                       if (cblkno[i] == limit)
+                           blkno = pblkno;
+                       continue;
+                   }
+                   /* unfound - need to insert on current page */
+                   _bt_relbuf(rel, newbuf, BT_WRITE);
+               }
+               /* insert pointer */
+               ritem = (BTItem) PageGetItem(cpage[i - 1],
+                                   PageGetItemId(cpage[i - 1], P_HIKEY));
+               btitem = _bt_formitem(&(ritem->bti_itup));
+               ItemPointerSet(&(btitem->bti_itup.t_tid), cblkno[i], P_HIKEY);
+               itemsz = IndexTupleDSize(btitem->bti_itup)
+                   + (sizeof(BTItemData) - sizeof(IndexTupleData));
+               itemsz = MAXALIGN(itemsz);
+
+               newitemoff = coff[i - 1] + 1;
+
+               if (PageGetFreeSpace(page) < itemsz)
+               {
+                   OffsetNumber    firstright;
+                   OffsetNumber    itup_off;
+                   BlockNumber     itup_blkno;
+                   bool            newitemonleft;
+
+                   firstright = _bt_findsplitloc(rel, page,
+                                   newitemoff, itemsz, &newitemonleft);
+                   newbuf = _bt_split(rel, buf, firstright,
+                               newitemoff, itemsz, btitem, newitemonleft,
+                               &itup_off, &itup_blkno);
+                   /* what buffer we need in ? */
+                   if (newitemonleft)
+                       _bt_relbuf(rel, newbuf, BT_WRITE);
+                   else
+                   {
+                       _bt_relbuf(rel, buf, BT_WRITE);
+                       buf = newbuf;
+                       page = BufferGetPage(buf);
+                       opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+                   }
+                   pblkno = BufferGetBlockNumber(buf);
+                   coff[i] = itup_off;
+               }
+               else
+               {
+                   _bt_insertuple(rel, buf, itemsz, btitem, newitemoff);
+                   coff[i] = newitemoff;
+               }
+
+               pfree(btitem);
+               parblk[i] = pblkno;
+               if (cblkno[i] == limit)
+                   blkno = pblkno;
+           }
+
+           /* copy page with pointer to cblkno[cidx] to temp storage */
+           memmove(tbuf, page, PageGetPageSize(page));
+           _bt_relbuf(rel, buf, BT_WRITE);
+           page = (Page)tbuf;
+           opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+           if (limit == InvalidBlockNumber)
+               blkno = pblkno;     /* last visited page */
+       }
+
+       /* Continue if current check/fix level page is rightmost */
+       if (P_RIGHTMOST(opaque))
+           goodbye = false;
+
+       /* Pointers to child pages are Ok - right end of child level ? */
+       _bt_relbuf(rel, cbuf[0], BT_READ);
+       _bt_relbuf(rel, cbuf[1], BT_READ);
+       if (cidx == 1 ||
+           (cidx == 2 && (P_RIGHTMOST(copaque[2]) || goodbye)))
+       {
+           if (cidx == 2)
+               _bt_relbuf(rel, cbuf[2], BT_READ);
+           return(blkno);
+       }
+       if (cblkno[0] == limit || cblkno[1] == limit)
+           goodbye = true;
+       cblkno[0] = cblkno[2];
+       cbuf[0] = cbuf[2];
+       cpage[0] = cpage[2];
+       copaque[0] = copaque[2];
+       coff[0] = coff[2];
+   }
+}
+
+static OffsetNumber
+_bt_getoff(Page page, BlockNumber blkno)
 {
-   elog(ERROR, "bt_fixtree: unimplemented , yet");
+   BTPageOpaque    opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+   OffsetNumber    maxoff = PageGetMaxOffsetNumber(page);
+   OffsetNumber    offnum = P_FIRSTDATAKEY(opaque);
+   BlockNumber     curblkno;
+   ItemId          itemid;
+   BTItem          item;
+
+   for ( ; offnum <= maxoff; offnum++)
+   {
+       itemid = PageGetItemId(page, offnum);
+       item = (BTItem) PageGetItem(page, itemid);
+       curblkno = ItemPointerGetBlockNumber(&(item->bti_itup.t_tid));
+       if (curblkno == blkno)
+           return(offnum);
+   }
+
+   return(InvalidOffsetNumber);
 }
 
 /*