#include "access/gin_private.h"
#include "miscadmin.h"
+#include "utils/memutils.h"
#include "utils/rel.h"
static void ginFindParents(GinBtree btree, GinBtreeStack *stack);
* Insert a new item to a page.
*
* Returns true if the insertion was finished. On false, the page was split and
- * the parent needs to be updated. (a root split returns true as it doesn't
- * need any further action by the caller to complete)
+ * the parent needs to be updated. (A root split returns true as it doesn't
+ * need any further action by the caller to complete.)
*
- * When inserting a downlink to a internal page, 'childbuf' contains the
+ * When inserting a downlink to an internal page, 'childbuf' contains the
* child page that was split. Its GIN_INCOMPLETE_SPLIT flag will be cleared
- * atomically with the insert. Also, the existing item at the given location
- * is updated to point to 'updateblkno'.
+ * atomically with the insert. Also, the existing item at offset stack->off
+ * in the target page is updated to point to updateblkno.
*
* stack->buffer is locked on entry, and is kept locked.
+ * Likewise for childbuf, if given.
*/
static bool
ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
Buffer childbuf, GinStatsData *buildStats)
{
Page page = BufferGetPage(stack->buffer);
- XLogRecData *payloadrdata;
+ bool result;
GinPlaceToPageRC rc;
uint16 xlflags = 0;
Page childpage = NULL;
Page newlpage = NULL,
newrpage = NULL;
+ void *ptp_workspace = NULL;
+ XLogRecData payloadrdata[10];
+ MemoryContext tmpCxt;
+ MemoryContext oldCxt;
+
+ /*
+ * We do all the work of this function and its subfunctions in a temporary
+ * memory context. This avoids leakages and simplifies APIs, since some
+ * subfunctions allocate storage that has to survive until we've finished
+ * the WAL insertion.
+ */
+ tmpCxt = AllocSetContextCreate(CurrentMemoryContext,
+ "ginPlaceToPage temporary context",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ ALLOCSET_DEFAULT_MAXSIZE);
+ oldCxt = MemoryContextSwitchTo(tmpCxt);
if (GinPageIsData(page))
xlflags |= GIN_INSERT_ISDATA;
}
/*
- * Try to put the incoming tuple on the page. placeToPage will decide if
- * the page needs to be split.
+ * See if the incoming tuple will fit on the page. beginPlaceToPage will
+ * decide if the page needs to be split, and will compute the split
+ * contents if so. See comments for beginPlaceToPage and execPlaceToPage
+ * functions for more details of the API here.
*/
- rc = btree->placeToPage(btree, stack->buffer, stack,
- insertdata, updateblkno,
- &payloadrdata, &newlpage, &newrpage);
- if (rc == UNMODIFIED)
- return true;
- else if (rc == INSERTED)
+ rc = btree->beginPlaceToPage(btree, stack->buffer, stack,
+ insertdata, updateblkno,
+ &ptp_workspace,
+ &newlpage, &newrpage,
+ payloadrdata);
+
+ if (rc == GPTP_NO_WORK)
{
- /* placeToPage did START_CRIT_SECTION() */
+ /* Nothing to do */
+ result = true;
+ }
+ else if (rc == GPTP_INSERT)
+ {
+ /* It will fit, perform the insertion */
+ START_CRIT_SECTION();
+
+ /* Perform the page update, and set up WAL data about it */
+ btree->execPlaceToPage(btree, stack->buffer, stack,
+ insertdata, updateblkno,
+ ptp_workspace, payloadrdata);
+
MarkBufferDirty(stack->buffer);
/* An insert to an internal page finishes the split of the child. */
- if (childbuf != InvalidBuffer)
+ if (BufferIsValid(childbuf))
{
GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT;
MarkBufferDirty(childbuf);
* Log information about child if this was an insertion of a
* downlink.
*/
- if (childbuf != InvalidBuffer)
+ if (BufferIsValid(childbuf))
{
rdata[0].next = &rdata[1];
rdata[1].next = &rdata[2];
rdata[2].buffer = childbuf;
- rdata[2].buffer_std = false;
+ rdata[2].buffer_std = true;
rdata[2].data = NULL;
rdata[2].len = 0;
rdata[2].next = payloadrdata;
rdata[0].next = payloadrdata;
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT, rdata);
+
PageSetLSN(page, recptr);
- if (childbuf != InvalidBuffer)
+ if (BufferIsValid(childbuf))
PageSetLSN(childpage, recptr);
}
END_CRIT_SECTION();
- return true;
+ /* Insertion is complete. */
+ result = true;
}
- else if (rc == SPLIT)
+ else if (rc == GPTP_SPLIT)
{
- /* Didn't fit, have to split */
+ /*
+ * Didn't fit, need to split. The split has been computed in newlpage
+ * and newrpage, which are pointers to palloc'd pages, not associated
+ * with buffers. stack->buffer is not touched yet.
+ */
Buffer rbuffer;
BlockNumber savedRightLink;
- XLogRecData rdata[2];
ginxlogSplit data;
Buffer lbuffer = InvalidBuffer;
Page newrootpg = NULL;
+ /* Get a new index page to become the right page */
rbuffer = GinNewBuffer(btree->index);
/* During index build, count the new page */
savedRightLink = GinPageGetOpaque(page)->rightlink;
- /*
- * newlpage and newrpage are pointers to memory pages, not associated
- * with buffers. stack->buffer is not touched yet.
- */
-
+ /* Begin setting up WAL record (which we might not use) */
data.node = btree->index->rd_node;
data.rblkno = BufferGetBlockNumber(rbuffer);
data.flags = xlflags;
- if (childbuf != InvalidBuffer)
+ if (BufferIsValid(childbuf))
{
- Page childpage = BufferGetPage(childbuf);
-
- GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT;
-
data.leftChildBlkno = BufferGetBlockNumber(childbuf);
data.rightChildBlkno = GinPageGetOpaque(childpage)->rightlink;
}
else
data.leftChildBlkno = data.rightChildBlkno = InvalidBlockNumber;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].data = (char *) &data;
- rdata[0].len = sizeof(ginxlogSplit);
-
- if (childbuf != InvalidBuffer)
- {
- rdata[0].next = &rdata[1];
-
- rdata[1].buffer = childbuf;
- rdata[1].buffer_std = false;
- rdata[1].data = NULL;
- rdata[1].len = 0;
- rdata[1].next = payloadrdata;
- }
- else
- rdata[0].next = payloadrdata;
-
if (stack->parent == NULL)
{
/*
- * split root, so we need to allocate new left page and place
- * pointer on root to left and right page
+ * splitting the root, so we need to allocate new left page and
+ * place pointers to left and right page on root page.
*/
lbuffer = GinNewBuffer(btree->index);
- /* During index build, count the newly-added root page */
+ /* During index build, count the new left page */
if (buildStats)
{
if (btree->isData)
/*
* Construct a new root page containing downlinks to the new left
- * and right pages. (do this in a temporary copy first rather than
- * overwriting the original page directly, so that we can still
- * abort gracefully if this fails.)
+ * and right pages. (Do this in a temporary copy rather than
+ * overwriting the original page directly, since we're not in the
+ * critical section yet.)
*/
newrootpg = PageGetTempPage(newrpage);
GinInitPage(newrootpg, GinPageGetOpaque(newlpage)->flags & ~(GIN_LEAF | GIN_COMPRESSED), BLCKSZ);
}
else
{
- /* split non-root page */
+ /* splitting a non-root page */
data.rrlink = savedRightLink;
data.lblkno = BufferGetBlockNumber(stack->buffer);
}
/*
- * Ok, we have the new contents of the left page in a temporary copy
- * now (newlpage), and the newly-allocated right block has been filled
- * in. The original page is still unchanged.
+ * OK, we have the new contents of the left page in a temporary copy
+ * now (newlpage), and likewise for the new contents of the
+ * newly-allocated right block. The original page is still unchanged.
*
* If this is a root split, we also have a temporary page containing
- * the new contents of the root. Copy the new left page to a
- * newly-allocated block, and initialize the (original) root page the
- * new copy. Otherwise, copy over the temporary copy of the new left
- * page over the old left page.
+ * the new contents of the root.
*/
START_CRIT_SECTION();
MarkBufferDirty(rbuffer);
MarkBufferDirty(stack->buffer);
- if (BufferIsValid(childbuf))
- MarkBufferDirty(childbuf);
/*
- * Restore the temporary copies over the real buffers. But don't free
- * the temporary copies yet, WAL record data points to them.
+ * Restore the temporary copies over the real buffers.
*/
if (stack->parent == NULL)
{
+ /* Splitting the root, three pages to update */
MarkBufferDirty(lbuffer);
- memcpy(BufferGetPage(stack->buffer), newrootpg, BLCKSZ);
+ memcpy(page, newrootpg, BLCKSZ);
memcpy(BufferGetPage(lbuffer), newlpage, BLCKSZ);
memcpy(BufferGetPage(rbuffer), newrpage, BLCKSZ);
}
else
{
- memcpy(BufferGetPage(stack->buffer), newlpage, BLCKSZ);
+ /* Normal split, only two pages to update */
+ memcpy(page, newlpage, BLCKSZ);
memcpy(BufferGetPage(rbuffer), newrpage, BLCKSZ);
}
+ /* We also clear childbuf's INCOMPLETE_SPLIT flag, if passed */
+ if (BufferIsValid(childbuf))
+ {
+ GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT;
+ MarkBufferDirty(childbuf);
+ }
+
/* write WAL record */
if (RelationNeedsWAL(btree->index))
{
+ XLogRecData rdata[2];
XLogRecPtr recptr;
+ rdata[0].buffer = InvalidBuffer;
+ rdata[0].data = (char *) &data;
+ rdata[0].len = sizeof(ginxlogSplit);
+
+ if (BufferIsValid(childbuf))
+ {
+ rdata[0].next = &rdata[1];
+
+ rdata[1].buffer = childbuf;
+ rdata[1].buffer_std = true;
+ rdata[1].data = NULL;
+ rdata[1].len = 0;
+ rdata[1].next = payloadrdata;
+ }
+ else
+ rdata[0].next = payloadrdata;
+
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT, rdata);
- PageSetLSN(BufferGetPage(stack->buffer), recptr);
+
+ PageSetLSN(page, recptr);
PageSetLSN(BufferGetPage(rbuffer), recptr);
if (stack->parent == NULL)
PageSetLSN(BufferGetPage(lbuffer), recptr);
END_CRIT_SECTION();
/*
- * We can release the lock on the right page now, but keep the
- * original buffer locked.
+ * We can release the locks/pins on the new pages now, but keep
+ * stack->buffer locked. childbuf doesn't get unlocked either.
*/
UnlockReleaseBuffer(rbuffer);
if (stack->parent == NULL)
UnlockReleaseBuffer(lbuffer);
- pfree(newlpage);
- pfree(newrpage);
- if (newrootpg)
- pfree(newrootpg);
-
/*
* If we split the root, we're done. Otherwise the split is not
* complete until the downlink for the new page has been inserted to
* the parent.
*/
- if (stack->parent == NULL)
- return true;
- else
- return false;
+ result = (stack->parent == NULL);
}
else
{
- elog(ERROR, "unknown return code from GIN placeToPage method: %d", rc);
- return false; /* keep compiler quiet */
+ elog(ERROR, "invalid return code from GIN placeToPage method: %d", rc);
+ result = false; /* keep compiler quiet */
}
+
+ /* Clean up temp context */
+ MemoryContextSwitchTo(oldCxt);
+ MemoryContextDelete(tmpCxt);
+
+ return result;
}
/*
#include "access/heapam_xlog.h"
#include "lib/ilist.h"
#include "miscadmin.h"
-#include "utils/memutils.h"
#include "utils/rel.h"
/*
int rsize; /* total size on right page */
bool oldformat; /* page is in pre-9.4 format on disk */
+
+ /*
+ * If we need WAL data representing the reconstructed leaf page, it's
+ * stored here by computeLeafRecompressWALData.
+ */
+ char *walinfo; /* buffer start */
+ int walinfolen; /* and length */
} disassembledLeaf;
typedef struct
static void dataSplitPageInternal(GinBtree btree, Buffer origbuf,
GinBtreeStack *stack,
void *insertdata, BlockNumber updateblkno,
- XLogRecData **prdata, Page *newlpage, Page *newrpage);
+ Page *newlpage, Page *newrpage, XLogRecData *rdata);
static disassembledLeaf *disassembleLeaf(Page page);
static bool leafRepackItems(disassembledLeaf *leaf, ItemPointer remaining);
static bool addItemsToLeaf(disassembledLeaf *leaf, ItemPointer newItems,
int nNewItems);
-static XLogRecData *constructLeafRecompressWALData(Buffer buf,
- disassembledLeaf *leaf);
+static void computeLeafRecompressWALData(disassembledLeaf *leaf);
static void dataPlaceToPageLeafRecompress(Buffer buf, disassembledLeaf *leaf);
-static void dataPlaceToPageLeafSplit(Buffer buf,
- disassembledLeaf *leaf,
+static void dataPlaceToPageLeafSplit(disassembledLeaf *leaf,
ItemPointerData lbound, ItemPointerData rbound,
- XLogRecData **prdata, Page lpage, Page rpage);
+ Page lpage, Page rpage, XLogRecData *rdata);
/*
* Read TIDs from leaf data page to single uncompressed array. The TIDs are
}
/*
- * Places keys to leaf data page and fills WAL record.
+ * Prepare to insert data on a leaf data page.
+ *
+ * If it will fit, return GPTP_INSERT after doing whatever setup is needed
+ * before we enter the insertion critical section. *ptp_workspace can be
+ * set to pass information along to the execPlaceToPage function.
+ *
+ * If it won't fit, perform a page split and return two temporary page
+ * images into *newlpage and *newrpage, with result GPTP_SPLIT. Also,
+ * if WAL logging is needed, fill one or more entries of rdata[] with
+ * whatever data must be appended to the WAL record.
+ *
+ * In neither case should the given page buffer be modified here.
*/
static GinPlaceToPageRC
-dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
- void *insertdata, XLogRecData **prdata,
- Page *newlpage, Page *newrpage)
+dataBeginPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
+ void *insertdata,
+ void **ptp_workspace,
+ Page *newlpage, Page *newrpage,
+ XLogRecData *rdata)
{
GinBtreeDataLeafInsertData *items = insertdata;
ItemPointer newItems = &items->items[items->curitem];
bool append;
int segsize;
Size freespace;
- MemoryContext tmpCxt;
- MemoryContext oldCxt;
disassembledLeaf *leaf;
leafSegmentInfo *lastleftinfo;
ItemPointerData maxOldItem;
ItemPointerData remaining;
- Assert(GinPageIsData(page));
-
rbound = *GinDataPageGetRightBound(page);
/*
maxitems = i;
}
- /*
- * The following operations do quite a lot of small memory allocations,
- * create a temporary memory context so that we don't need to keep track
- * of them individually.
- */
- tmpCxt = AllocSetContextCreate(CurrentMemoryContext,
- "Gin split temporary context",
- ALLOCSET_DEFAULT_MINSIZE,
- ALLOCSET_DEFAULT_INITSIZE,
- ALLOCSET_DEFAULT_MAXSIZE);
- oldCxt = MemoryContextSwitchTo(tmpCxt);
-
+ /* Disassemble the data on the page */
leaf = disassembleLeaf(page);
/*
maxitems = Min(maxitems, nnewsegments * MinTuplesPerSegment);
}
- /* Add the new items to the segments */
+ /* Add the new items to the segment list */
if (!addItemsToLeaf(leaf, newItems, maxitems))
{
/* all items were duplicates, we have nothing to do */
items->curitem += maxitems;
- MemoryContextSwitchTo(oldCxt);
- MemoryContextDelete(tmpCxt);
-
- return UNMODIFIED;
+ return GPTP_NO_WORK;
}
/*
if (!needsplit)
{
/*
- * Great, all the items fit on a single page. Construct a WAL record
- * describing the changes we made, and write the segments back to the
- * page.
- *
- * Once we start modifying the page, there's no turning back. The
- * caller is responsible for calling END_CRIT_SECTION() after writing
- * the WAL record.
+ * Great, all the items fit on a single page. If needed, prepare data
+ * for a WAL record describing the changes we'll make.
*/
- MemoryContextSwitchTo(oldCxt);
if (RelationNeedsWAL(btree->index))
- *prdata = constructLeafRecompressWALData(buf, leaf);
- else
- *prdata = NULL;
- START_CRIT_SECTION();
- dataPlaceToPageLeafRecompress(buf, leaf);
+ computeLeafRecompressWALData(leaf);
+
+ /*
+ * We're ready to enter the critical section, but
+ * dataExecPlaceToPageLeaf will need access to the "leaf" data.
+ */
+ *ptp_workspace = leaf;
if (append)
elog(DEBUG2, "appended %d new items to block %u; %d bytes (%d to go)",
else
{
/*
- * Had to split.
+ * Have to split.
*
* leafRepackItems already divided the segments between the left and
* the right page. It filled the left page as full as possible, and
* until they're balanced.
*
* As a further heuristic, when appending items to the end of the
- * page, try make the left page 75% full, one the assumption that
+ * page, try to make the left page 75% full, on the assumption that
* subsequent insertions will probably also go to the end. This packs
* the index somewhat tighter when appending to a table, which is very
* common.
&lastleftinfo->nitems);
lbound = lastleftinfo->items[lastleftinfo->nitems - 1];
- *newlpage = MemoryContextAlloc(oldCxt, BLCKSZ);
- *newrpage = MemoryContextAlloc(oldCxt, BLCKSZ);
+ /*
+ * Now allocate a couple of temporary page images, and fill them.
+ */
+ *newlpage = palloc(BLCKSZ);
+ *newrpage = palloc(BLCKSZ);
- dataPlaceToPageLeafSplit(buf, leaf, lbound, rbound,
- prdata, *newlpage, *newrpage);
+ dataPlaceToPageLeafSplit(leaf, lbound, rbound,
+ *newlpage, *newrpage, rdata);
Assert(GinPageRightMost(page) ||
ginCompareItemPointers(GinDataPageGetRightBound(*newlpage),
items->nitem - items->curitem - maxitems);
}
- MemoryContextSwitchTo(oldCxt);
- MemoryContextDelete(tmpCxt);
-
items->curitem += maxitems;
- return needsplit ? SPLIT : INSERTED;
+ return needsplit ? GPTP_SPLIT : GPTP_INSERT;
+}
+
+/*
+ * Perform data insertion after beginPlaceToPage has decided it will fit.
+ *
+ * This is invoked within a critical section. It must modify the target
+ * buffer and store one or more XLogRecData records describing the changes
+ * in rdata[].
+ */
+static void
+dataExecPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
+ void *insertdata, void *ptp_workspace,
+ XLogRecData *rdata)
+{
+ disassembledLeaf *leaf = (disassembledLeaf *) ptp_workspace;
+
+ /* Apply changes to page */
+ dataPlaceToPageLeafRecompress(buf, leaf);
+
+ /* If needed, register WAL data built by computeLeafRecompressWALData */
+ if (RelationNeedsWAL(btree->index))
+ {
+ rdata[0].buffer = buf;
+ rdata[0].buffer_std = true;
+ rdata[0].data = leaf->walinfo;
+ rdata[0].len = leaf->walinfolen;
+ rdata[0].next = NULL;
+ }
}
/*
*/
if (removedsomething)
{
- XLogRecData *payloadrdata = NULL;
bool modified;
/*
}
if (RelationNeedsWAL(indexrel))
- payloadrdata = constructLeafRecompressWALData(buffer, leaf);
+ computeLeafRecompressWALData(leaf);
+
+ /* Apply changes to page */
START_CRIT_SECTION();
+
dataPlaceToPageLeafRecompress(buffer, leaf);
MarkBufferDirty(buffer);
if (RelationNeedsWAL(indexrel))
{
XLogRecPtr recptr;
- XLogRecData rdata;
+ XLogRecData rdata[2];
ginxlogVacuumDataLeafPage xlrec;
xlrec.node = indexrel->rd_node;
xlrec.blkno = BufferGetBlockNumber(buffer);
- rdata.buffer = InvalidBuffer;
- rdata.data = (char *) &xlrec;
- rdata.len = offsetof(ginxlogVacuumDataLeafPage, data);
- rdata.next = payloadrdata;
+ rdata[0].buffer = InvalidBuffer;
+ rdata[0].data = (char *) &xlrec;
+ rdata[0].len = offsetof(ginxlogVacuumDataLeafPage, data);
+ rdata[0].next = &rdata[1];
+
+ rdata[1].buffer = buffer;
+ rdata[1].buffer_std = true;
+ rdata[1].data = leaf->walinfo;
+ rdata[1].len = leaf->walinfolen;
+ rdata[1].next = NULL;
- recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_DATA_LEAF_PAGE, &rdata);
+ recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_DATA_LEAF_PAGE, rdata);
PageSetLSN(page, recptr);
}
/*
* Construct a ginxlogRecompressDataLeaf record representing the changes
- * in *leaf.
+ * in *leaf. (Because this requires a palloc, we have to do it before
+ * we enter the critical section that actually updates the page.)
*/
-static XLogRecData *
-constructLeafRecompressWALData(Buffer buf, disassembledLeaf *leaf)
+static void
+computeLeafRecompressWALData(disassembledLeaf *leaf)
{
int nmodified = 0;
char *walbufbegin;
char *walbufend;
- XLogRecData *rdata;
dlist_iter iter;
int segno;
ginxlogRecompressDataLeaf *recompress_xlog;
nmodified++;
}
- walbufbegin = palloc(
- sizeof(ginxlogRecompressDataLeaf) +
- BLCKSZ + /* max size needed to hold the segment
- * data */
- nmodified * 2 + /* (segno + action) per action */
- sizeof(XLogRecData));
+ walbufbegin =
+ palloc(sizeof(ginxlogRecompressDataLeaf) +
+ BLCKSZ + /* max size needed to hold the segment data */
+ nmodified * 2 /* (segno + action) per action */
+ );
walbufend = walbufbegin;
recompress_xlog = (ginxlogRecompressDataLeaf *) walbufend;
segno++;
}
- rdata = (XLogRecData *) MAXALIGN(walbufend);
- rdata->buffer = buf;
- rdata->buffer_std = TRUE;
- rdata->data = walbufbegin;
- rdata->len = walbufend - walbufbegin;
- rdata->next = NULL;
-
- return rdata;
+ /* Pass back the constructed info via *leaf */
+ leaf->walinfo = walbufbegin;
+ leaf->walinfolen = walbufend - walbufbegin;
}
/*
* Assemble a disassembled posting tree leaf page back to a buffer.
*
- * *prdata is filled with WAL information about this operation. The caller
- * is responsible for inserting to the WAL, along with any other information
- * about the operation that triggered this recompression.
+ * This just updates the target buffer; WAL stuff is caller's responsibility.
*
* NOTE: The segment pointers must not point directly to the same buffer,
* except for segments that have not been modified and whose preceding
* segments to two pages instead of one.
*
* This is different from the non-split cases in that this does not modify
- * the original page directly, but to temporary in-memory copies of the new
- * left and right pages.
+ * the original page directly, but writes to temporary in-memory copies of
+ * the new left and right pages. Also, we prepare rdata[] entries for the
+ * data that must be appended to the WAL record.
*/
static void
-dataPlaceToPageLeafSplit(Buffer buf, disassembledLeaf *leaf,
+dataPlaceToPageLeafSplit(disassembledLeaf *leaf,
ItemPointerData lbound, ItemPointerData rbound,
- XLogRecData **prdata, Page lpage, Page rpage)
+ Page lpage, Page rpage, XLogRecData *rdata)
{
char *ptr;
int segsize;
dlist_node *firstright;
leafSegmentInfo *seginfo;
- /* these must be static so they can be returned to caller */
+ /* this must be static so it can be returned to caller */
static ginxlogSplitDataLeaf split_xlog;
- static XLogRecData rdata[3];
/* Initialize temporary pages to hold the new left and right pages */
GinInitPage(lpage, GIN_DATA | GIN_LEAF | GIN_COMPRESSED, BLCKSZ);
rdata[2].data = (char *) GinDataLeafPageGetPostingList(rpage);
rdata[2].len = rsize;
rdata[2].next = NULL;
-
- *prdata = rdata;
}
/*
- * Place a PostingItem to page, and fill a WAL record.
+ * Prepare to insert data on an internal data page.
+ *
+ * If it will fit, return GPTP_INSERT after doing whatever setup is needed
+ * before we enter the insertion critical section. *ptp_workspace can be
+ * set to pass information along to the execPlaceToPage function.
*
- * If the item doesn't fit, returns false without modifying the page.
+ * If it won't fit, perform a page split and return two temporary page
+ * images into *newlpage and *newrpage, with result GPTP_SPLIT. Also,
+ * if WAL logging is needed, fill one or more entries of rdata[] with
+ * whatever data must be appended to the WAL record.
*
- * In addition to inserting the given item, the downlink of the existing item
- * at 'off' is updated to point to 'updateblkno'.
+ * In neither case should the given page buffer be modified here.
+ *
+ * Note: on insertion to an internal node, in addition to inserting the given
+ * item, the downlink of the existing item at stack->off will be updated to
+ * point to updateblkno.
*/
static GinPlaceToPageRC
-dataPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
- void *insertdata, BlockNumber updateblkno,
- XLogRecData **prdata, Page *newlpage, Page *newrpage)
+dataBeginPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
+ void *insertdata, BlockNumber updateblkno,
+ void **ptp_workspace,
+ Page *newlpage, Page *newrpage,
+ XLogRecData *rdata)
{
Page page = BufferGetPage(buf);
- OffsetNumber off = stack->off;
- PostingItem *pitem;
-
- /* these must be static so they can be returned to caller */
- static XLogRecData rdata;
- static ginxlogInsertDataInternal data;
- /* split if we have to */
+ /* If it doesn't fit, deal with split case */
if (GinNonLeafDataPageGetFreeSpace(page) < sizeof(PostingItem))
{
dataSplitPageInternal(btree, buf, stack, insertdata, updateblkno,
- prdata, newlpage, newrpage);
- return SPLIT;
+ newlpage, newrpage, rdata);
+ return GPTP_SPLIT;
}
- *prdata = &rdata;
- Assert(GinPageIsData(page));
+ /* Else, we're ready to proceed with insertion */
+ return GPTP_INSERT;
+}
- START_CRIT_SECTION();
+/*
+ * Perform data insertion after beginPlaceToPage has decided it will fit.
+ *
+ * This is invoked within a critical section. It must modify the target
+ * buffer and store one or more XLogRecData records describing the changes
+ * in rdata[].
+ */
+static void
+dataExecPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
+ void *insertdata, BlockNumber updateblkno,
+ void *ptp_workspace,
+ XLogRecData *rdata)
+{
+ Page page = BufferGetPage(buf);
+ OffsetNumber off = stack->off;
+ PostingItem *pitem;
/* Update existing downlink to point to next page (on internal page) */
pitem = GinDataPageGetPostingItem(page, off);
pitem = (PostingItem *) insertdata;
GinDataPageAddPostingItem(page, pitem, off);
- data.offset = off;
- data.newitem = *pitem;
+ if (RelationNeedsWAL(btree->index))
+ {
+ /*
+ * This must be static, because it has to survive until XLogInsert,
+ * and we can't palloc here. Ugly, but the XLogInsert infrastructure
+ * isn't reentrant anyway.
+ */
+ static ginxlogInsertDataInternal data;
- rdata.buffer = buf;
- rdata.buffer_std = TRUE;
- rdata.data = (char *) &data;
- rdata.len = sizeof(ginxlogInsertDataInternal);
- rdata.next = NULL;
+ data.offset = off;
+ data.newitem = *pitem;
- return INSERTED;
+ rdata[0].buffer = buf;
+ rdata[0].buffer_std = true;
+ rdata[0].data = (char *) &data;
+ rdata[0].len = sizeof(ginxlogInsertDataInternal);
+ rdata[0].next = NULL;
+ }
}
/*
- * Places an item (or items) to a posting tree. Calls relevant function of
- * internal of leaf page because they are handled very differently.
+ * Prepare to insert data on a posting-tree data page.
+ *
+ * If it will fit, return GPTP_INSERT after doing whatever setup is needed
+ * before we enter the insertion critical section. *ptp_workspace can be
+ * set to pass information along to the execPlaceToPage function.
+ *
+ * If it won't fit, perform a page split and return two temporary page
+ * images into *newlpage and *newrpage, with result GPTP_SPLIT. Also,
+ * if WAL logging is needed, fill one or more entries of rdata[] with
+ * whatever data must be appended to the WAL record.
+ *
+ * In neither case should the given page buffer be modified here.
+ *
+ * Note: on insertion to an internal node, in addition to inserting the given
+ * item, the downlink of the existing item at stack->off will be updated to
+ * point to updateblkno.
+ *
+ * Calls relevant function for internal or leaf page because they are handled
+ * very differently.
*/
static GinPlaceToPageRC
-dataPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
- void *insertdata, BlockNumber updateblkno,
- XLogRecData **prdata,
- Page *newlpage, Page *newrpage)
+dataBeginPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
+ void *insertdata, BlockNumber updateblkno,
+ void **ptp_workspace,
+ Page *newlpage, Page *newrpage,
+ XLogRecData *rdata)
{
Page page = BufferGetPage(buf);
Assert(GinPageIsData(page));
if (GinPageIsLeaf(page))
- return dataPlaceToPageLeaf(btree, buf, stack, insertdata,
- prdata, newlpage, newrpage);
+ return dataBeginPlaceToPageLeaf(btree, buf, stack, insertdata,
+ ptp_workspace,
+ newlpage, newrpage, rdata);
+ else
+ return dataBeginPlaceToPageInternal(btree, buf, stack,
+ insertdata, updateblkno,
+ ptp_workspace,
+ newlpage, newrpage, rdata);
+}
+
+/*
+ * Perform data insertion after beginPlaceToPage has decided it will fit.
+ *
+ * This is invoked within a critical section. It must modify the target
+ * buffer and store one or more XLogRecData records describing the changes
+ * in rdata[].
+ *
+ * Calls relevant function for internal or leaf page because they are handled
+ * very differently.
+ */
+static void
+dataExecPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
+ void *insertdata, BlockNumber updateblkno,
+ void *ptp_workspace,
+ XLogRecData *rdata)
+{
+ Page page = BufferGetPage(buf);
+
+ if (GinPageIsLeaf(page))
+ dataExecPlaceToPageLeaf(btree, buf, stack, insertdata,
+ ptp_workspace, rdata);
else
- return dataPlaceToPageInternal(btree, buf, stack,
- insertdata, updateblkno,
- prdata, newlpage, newrpage);
+ dataExecPlaceToPageInternal(btree, buf, stack, insertdata,
+ updateblkno, ptp_workspace, rdata);
}
/*
- * Split page and fill WAL record. Returns a new temp buffer filled with data
- * that should go to the left page. The original buffer is left untouched.
+ * Split internal page and insert new data.
+ *
+ * Returns new temp pages to *newlpage and *newrpage.
+ * The original buffer is left untouched.
*/
static void
dataSplitPageInternal(GinBtree btree, Buffer origbuf,
GinBtreeStack *stack,
void *insertdata, BlockNumber updateblkno,
- XLogRecData **prdata, Page *newlpage, Page *newrpage)
+ Page *newlpage, Page *newrpage, XLogRecData *rdata)
{
Page oldpage = BufferGetPage(origbuf);
OffsetNumber off = stack->off;
/* these must be static so they can be returned to caller */
static ginxlogSplitDataInternal data;
- static XLogRecData rdata[4];
static PostingItem allitems[(BLCKSZ / sizeof(PostingItem)) + 1];
lpage = PageGetTempPage(oldpage);
GinInitPage(lpage, GinPageGetOpaque(oldpage)->flags, pageSize);
GinInitPage(rpage, GinPageGetOpaque(oldpage)->flags, pageSize);
- *prdata = rdata;
-
/*
* First construct a new list of PostingItems, which includes all the old
* items, and the new item.
/* set up right bound for right page */
*GinDataPageGetRightBound(rpage) = oldbound;
+ /* Set up WAL data */
data.separator = separator;
data.nitem = nitems;
data.rightbound = oldbound;
rdata[1].len = nitems * sizeof(PostingItem);
rdata[1].next = NULL;
+ /* return temp pages to caller */
*newlpage = lpage;
*newrpage = rpage;
}
btree->isMoveRight = dataIsMoveRight;
btree->findItem = NULL;
btree->findChildPtr = dataFindChildPtr;
- btree->placeToPage = dataPlaceToPage;
+ btree->beginPlaceToPage = dataBeginPlaceToPage;
+ btree->execPlaceToPage = dataExecPlaceToPage;
btree->fillRoot = ginDataFillRoot;
btree->prepareDownlink = dataPrepareDownlink;
static void entrySplitPage(GinBtree btree, Buffer origbuf,
GinBtreeStack *stack,
- void *insertPayload,
- BlockNumber updateblkno, XLogRecData **prdata,
- Page *newlpage, Page *newrpage);
+ GinBtreeEntryInsertData *insertData,
+ BlockNumber updateblkno,
+ Page *newlpage, Page *newrpage,
+ XLogRecData *rdata);
/*
* Form a tuple for entry tree.
}
/*
- * Place tuple on page and fills WAL record
+ * Prepare to insert data on an entry page.
*
- * If the tuple doesn't fit, returns false without modifying the page.
+ * If it will fit, return GPTP_INSERT after doing whatever setup is needed
+ * before we enter the insertion critical section. *ptp_workspace can be
+ * set to pass information along to the execPlaceToPage function.
*
- * On insertion to an internal node, in addition to inserting the given item,
- * the downlink of the existing item at 'off' is updated to point to
- * 'updateblkno'.
+ * If it won't fit, perform a page split and return two temporary page
+ * images into *newlpage and *newrpage, with result GPTP_SPLIT. Also,
+ * if WAL logging is needed, fill one or more entries of rdata[] with
+ * whatever data must be appended to the WAL record.
+ *
+ * In neither case should the given page buffer be modified here.
+ *
+ * Note: on insertion to an internal node, in addition to inserting the given
+ * item, the downlink of the existing item at stack->off will be updated to
+ * point to updateblkno.
*/
static GinPlaceToPageRC
-entryPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
- void *insertPayload, BlockNumber updateblkno,
- XLogRecData **prdata, Page *newlpage, Page *newrpage)
+entryBeginPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
+ void *insertPayload, BlockNumber updateblkno,
+ void **ptp_workspace,
+ Page *newlpage, Page *newrpage,
+ XLogRecData *rdata)
{
GinBtreeEntryInsertData *insertData = insertPayload;
- Page page = BufferGetPage(buf);
OffsetNumber off = stack->off;
- OffsetNumber placed;
- int cnt = 0;
- /* these must be static so they can be returned to caller */
- static XLogRecData rdata[3];
- static ginxlogInsertEntry data;
-
- /* quick exit if it doesn't fit */
+ /* If it doesn't fit, deal with split case */
if (!entryIsEnoughSpace(btree, buf, off, insertData))
{
- entrySplitPage(btree, buf, stack, insertPayload, updateblkno,
- prdata, newlpage, newrpage);
- return SPLIT;
+ entrySplitPage(btree, buf, stack, insertData, updateblkno,
+ newlpage, newrpage, rdata);
+ return GPTP_SPLIT;
}
- START_CRIT_SECTION();
+ /* Else, we're ready to proceed with insertion */
+ return GPTP_INSERT;
+}
+
+/*
+ * Perform data insertion after beginPlaceToPage has decided it will fit.
+ *
+ * This is invoked within a critical section. It must modify the target
+ * buffer and store one or more XLogRecData records describing the changes
+ * in rdata[].
+ */
+static void
+entryExecPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
+ void *insertPayload, BlockNumber updateblkno,
+ void *ptp_workspace,
+ XLogRecData *rdata)
+{
+ GinBtreeEntryInsertData *insertData = insertPayload;
+ Page page = BufferGetPage(buf);
+ OffsetNumber off = stack->off;
+ OffsetNumber placed;
- *prdata = rdata;
entryPreparePage(btree, page, off, insertData, updateblkno);
placed = PageAddItem(page,
elog(ERROR, "failed to add item to index page in \"%s\"",
RelationGetRelationName(btree->index));
- data.isDelete = insertData->isDelete;
- data.offset = off;
-
- rdata[cnt].buffer = buf;
- rdata[cnt].buffer_std = true;
- rdata[cnt].data = (char *) &data;
- rdata[cnt].len = offsetof(ginxlogInsertEntry, tuple);
- rdata[cnt].next = &rdata[cnt + 1];
- cnt++;
-
- rdata[cnt].buffer = buf;
- rdata[cnt].buffer_std = true;
- rdata[cnt].data = (char *) insertData->entry;
- rdata[cnt].len = IndexTupleSize(insertData->entry);
- rdata[cnt].next = NULL;
-
- return INSERTED;
+ if (RelationNeedsWAL(btree->index))
+ {
+ /*
+ * This must be static, because it has to survive until XLogInsert,
+ * and we can't palloc here. Ugly, but the XLogInsert infrastructure
+ * isn't reentrant anyway.
+ */
+ static ginxlogInsertEntry data;
+
+ data.isDelete = insertData->isDelete;
+ data.offset = off;
+
+ rdata[0].buffer = buf;
+ rdata[0].buffer_std = true;
+ rdata[0].data = (char *) &data;
+ rdata[0].len = offsetof(ginxlogInsertEntry, tuple);
+ rdata[0].next = &rdata[1];
+
+ rdata[1].buffer = buf;
+ rdata[1].buffer_std = true;
+ rdata[1].data = (char *) insertData->entry;
+ rdata[1].len = IndexTupleSize(insertData->entry);
+ rdata[1].next = NULL;
+ }
}
/*
- * Place tuple and split page, original buffer(lbuf) leaves untouched,
- * returns shadow pages filled with new data.
- * Tuples are distributed between pages by equal size on its, not
- * an equal number!
+ * Split entry page and insert new data.
+ *
+ * Returns new temp pages to *newlpage and *newrpage.
+ * The original buffer is left untouched.
+ * Also, set up rdata[] entries describing data to be appended to WAL record.
*/
static void
entrySplitPage(GinBtree btree, Buffer origbuf,
GinBtreeStack *stack,
- void *insertPayload,
- BlockNumber updateblkno, XLogRecData **prdata,
- Page *newlpage, Page *newrpage)
+ GinBtreeEntryInsertData *insertData,
+ BlockNumber updateblkno,
+ Page *newlpage, Page *newrpage,
+ XLogRecData *rdata)
{
- GinBtreeEntryInsertData *insertData = insertPayload;
OffsetNumber off = stack->off;
OffsetNumber i,
maxoff,
Size pageSize = PageGetPageSize(lpage);
/* these must be static so they can be returned to caller */
- static XLogRecData rdata[2];
static ginxlogSplitEntry data;
static char tupstore[2 * BLCKSZ];
- *prdata = rdata;
entryPreparePage(btree, lpage, off, insertData, updateblkno);
/*
{
itup = (IndexTuple) ptr;
+ /*
+ * Decide where to split. We try to equalize the pages' total data
+ * size, not number of tuples.
+ */
if (lsize > totalsize / 2)
{
if (separator == InvalidOffsetNumber)
rdata[1].len = tupstoresize;
rdata[1].next = NULL;
+ /* return temp pages to caller */
*newlpage = lpage;
*newrpage = rpage;
}
btree->isMoveRight = entryIsMoveRight;
btree->findItem = entryLocateLeafEntry;
btree->findChildPtr = entryFindChildPtr;
- btree->placeToPage = entryPlaceToPage;
+ btree->beginPlaceToPage = entryBeginPlaceToPage;
+ btree->execPlaceToPage = entryExecPlaceToPage;
btree->fillRoot = ginEntryFillRoot;
btree->prepareDownlink = entryPrepareDownlink;