+ {
+ page = BufferGetPage( buffers[ i ] );
+ PageSetLSN(page, recptr);
+ PageSetTLI(page, ThisTimeLineID);
+ }
+ }
+
+ for(i=0; i
+ UnlockReleaseBuffer( buffers[ i ] );
+
+ END_CRIT_SECTION();
+ } while( blknoToDelete != newHead );
+
+ return false;
+}
+
+/* Add datum to DatumArray, resizing if needed */
+static void
+addDatum(DatumArray *datums, Datum datum)
+{
+ if ( datums->nvalues >= datums->maxvalues)
+ {
+ datums->maxvalues *= 2;
+ datums->values = (Datum*)repalloc(datums->values,
+ sizeof(Datum)*datums->maxvalues);
+ }
+
+ datums->values[ datums->nvalues++ ] = datum;
+}
+
+/*
+ * Go through all tuples >= startoff on page and collect values in memory
+ *
+ * Note that da is just workspace --- it does not carry any state across
+ * calls.
+ */
+static void
+processPendingPage(BuildAccumulator *accum, DatumArray *da,
+ Page page, OffsetNumber startoff)
+{
+ ItemPointerData heapptr;
+ OffsetNumber i,maxoff;
+ OffsetNumber attrnum, curattnum;
+
+ /* reset *da to empty */
+ da->nvalues = 0;
+
+ maxoff = PageGetMaxOffsetNumber(page);
+ Assert( maxoff >= FirstOffsetNumber );
+ ItemPointerSetInvalid(&heapptr);
+ attrnum = 0;
+
+ for (i = startoff; i <= maxoff; i = OffsetNumberNext(i))
+ {
+ IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
+
+ curattnum = gintuple_get_attrnum(accum->ginstate, itup);
+
+ if ( !ItemPointerIsValid(&heapptr) )
+ {
+ heapptr = itup->t_tid;
+ attrnum = curattnum;
+ }
+ else if ( !(ItemPointerEquals(&heapptr, &itup->t_tid) &&
+ curattnum == attrnum) )
+ {
+ /*
+ * We can insert several datums per call, but only for one heap
+ * tuple and one column.
+ */
+ ginInsertRecordBA(accum, &heapptr, attrnum, da->values, da->nvalues);
+ da->nvalues = 0;
+ heapptr = itup->t_tid;
+ attrnum = curattnum;
+ }
+ addDatum(da, gin_index_getattr(accum->ginstate, itup));
+ }
+
+ ginInsertRecordBA(accum, &heapptr, attrnum, da->values, da->nvalues);
+}
+
+/*
+ * Move tuples from pending pages into regular GIN structure.
+ *
+ * This can be called concurrently by multiple backends, so it must cope.
+ * On first glance it looks completely not concurrent-safe and not crash-safe
+ * either. The reason it's okay is that multiple insertion of the same entry
+ * is detected and treated as a no-op by gininsert.c. If we crash after
+ * posting entries to the main index and before removing them from the
+ * pending list, it's okay because when we redo the posting later on, nothing
+ * bad will happen. Likewise, if two backends simultaneously try to post
+ * a pending entry into the main index, one will succeed and one will do
+ * nothing. We try to notice when someone else is a little bit ahead of
+ * us in the process, but that's just to avoid wasting cycles. Only the
+ * action of removing a page from the pending list really needs exclusive
+ * lock.
+ *
+ * vac_delay indicates that ginInsertCleanup is called from vacuum process,
+ * so call vacuum_delay_point() periodically.
+ * If stats isn't null, we count deleted pending pages into the counts.
+ */
+void
+ginInsertCleanup(Relation index, GinState *ginstate,
+ bool vac_delay, IndexBulkDeleteResult *stats)
+{
+ Buffer metabuffer, buffer;
+ Page metapage, page;
+ GinMetaPageData *metadata;
+ MemoryContext opCtx, oldCtx;
+ BuildAccumulator accum;
+ DatumArray datums;
+ BlockNumber blkno;
+
+ metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
+ LockBuffer(metabuffer, GIN_SHARE);
+ metapage = BufferGetPage(metabuffer);
+ metadata = GinPageGetMeta(metapage);
+
+ if ( metadata->head == InvalidBlockNumber )
+ {
+ /* Nothing to do */
+ UnlockReleaseBuffer(metabuffer);
+ return;
+ }
+
+ /*
+ * Read and lock head of pending list
+ */
+ blkno = metadata->head;
+ buffer = ReadBuffer(index, blkno);
+ LockBuffer(buffer, GIN_SHARE);
+ page = BufferGetPage(buffer);
+
+ LockBuffer(metabuffer, GIN_UNLOCK);
+
+ /*
+ * Initialize. All temporary space will be in opCtx
+ */
+ opCtx = AllocSetContextCreate(CurrentMemoryContext,
+ "GIN insert cleanup temporary context",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ ALLOCSET_DEFAULT_MAXSIZE);
+
+ oldCtx = MemoryContextSwitchTo(opCtx);
+
+ datums.maxvalues=128;
+ datums.nvalues = 0;
+ datums.values = (Datum*)palloc(sizeof(Datum)*datums.maxvalues);
+
+ ginInitBA(&accum);
+ accum.ginstate = ginstate;
+
+ /*
+ * At the top of this loop, we have pin and lock on the current page
+ * of the pending list. However, we'll release that before exiting
+ * the loop. Note we also have pin but not lock on the metapage.
+ */
+ for(;;)
+ {
+ if ( GinPageIsDeleted(page) )
+ {
+ /* another cleanup process is running concurrently */
+ UnlockReleaseBuffer( buffer );
+ break;
+ }
+
+ /*
+ * read page's datums into memory
+ */
+ processPendingPage(&accum, &datums, page, FirstOffsetNumber);
+
+ if (vac_delay)
+ vacuum_delay_point();
+
+ /*
+ * Is it time to flush memory to disk? Flush if we are at the end
+ * of the pending list, or if we have a full row and memory is
+ * getting full.
+ *
+ * XXX using up maintenance_work_mem here is probably unreasonably
+ * much, since vacuum might already be using that much.
+ */
+ if ( GinPageGetOpaque(page)->rightlink == InvalidBlockNumber ||
+ ( GinPageHasFullRow(page) &&
+ accum.allocatedMemory > maintenance_work_mem * 1024L ) )
+ {
+ ItemPointerData *list;
+ uint32 nlist;
+ Datum entry;
+ OffsetNumber maxoff, attnum;
+
+ /*
+ * Unlock current page to increase performance.
+ * Changes of page will be checked later by comparing
+ * maxoff after completion of memory flush.
+ */
+ maxoff = PageGetMaxOffsetNumber(page);
+ LockBuffer(buffer, GIN_UNLOCK);
+
+ /*
+ * Moving collected data into regular structure can take
+ * significant amount of time - so, run it without locking pending
+ * list.
+ */
+ while ((list = ginGetEntry(&accum, &attnum, &entry, &nlist)) != NULL)
+ {
+ ginEntryInsert(index, ginstate, attnum, entry, list, nlist, FALSE);
+ if (vac_delay)
+ vacuum_delay_point();
+ }
+
+ /*
+ * Lock the whole list to remove pages
+ */
+ LockBuffer(metabuffer, GIN_EXCLUSIVE);
+ LockBuffer(buffer, GIN_SHARE);
+
+ if ( GinPageIsDeleted(page) )
+ {
+ /* another cleanup process is running concurrently */
+ UnlockReleaseBuffer(buffer);
+ LockBuffer(metabuffer, GIN_UNLOCK);
+ break;
+ }
+
+ /*
+ * While we left the page unlocked, more stuff might have gotten
+ * added to it. If so, process those entries immediately. There
+ * shouldn't be very many, so we don't worry about the fact that
+ * we're doing this with exclusive lock. Insertion algorithm
+ * gurantees that inserted row(s) will not continue on next page.
+ * NOTE: intentionally no vacuum_delay_point in this loop.
+ */
+ if ( PageGetMaxOffsetNumber(page) != maxoff )
+ {
+ ginInitBA(&accum);
+ processPendingPage(&accum, &datums, page, maxoff+1);
+
+ while ((list = ginGetEntry(&accum, &attnum, &entry, &nlist)) != NULL)
+ ginEntryInsert(index, ginstate, attnum, entry, list, nlist, FALSE);
+ }
+
+ /*
+ * Remember next page - it will become the new list head
+ */
+ blkno = GinPageGetOpaque(page)->rightlink;
+ UnlockReleaseBuffer(buffer); /* shiftList will do exclusive locking */
+
+ /*
+ * remove readed pages from pending list, at this point all
+ * content of readed pages is in regular structure
+ */
+ if ( shiftList(index, metabuffer, blkno, stats) )
+ {
+ /* another cleanup process is running concurrently */
+ LockBuffer(metabuffer, GIN_UNLOCK);
+ break;
+ }
+
+ Assert( blkno == metadata->head );
+ LockBuffer(metabuffer, GIN_UNLOCK);
+
+ /*
+ * if we removed the whole pending list just exit
+ */
+ if ( blkno == InvalidBlockNumber )
+ break;
+
+ /*
+ * release memory used so far and reinit state
+ */
+ MemoryContextReset(opCtx);
+ ginInitBA(&accum);
+ datums.nvalues = 0;
+ datums.values = (Datum*)palloc(sizeof(Datum)*datums.maxvalues);
+ }
+ else
+ {
+ blkno = GinPageGetOpaque(page)->rightlink;
+ UnlockReleaseBuffer(buffer);
+ }
+
+ /*
+ * Read next page in pending list
+ */
+ CHECK_FOR_INTERRUPTS();
+ buffer = ReadBuffer(index, blkno);
+ LockBuffer(buffer, GIN_SHARE);
+ page = BufferGetPage(buffer);
+ }
+
+ ReleaseBuffer(metabuffer);
+
+ /* Clean up temporary space */
+ MemoryContextSwitchTo(oldCtx);
+ MemoryContextDelete(opCtx);
+}
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.22 2009/01/10 21:08:36 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.23 2009/03/24 20:17:10 tgl Exp $
*-------------------------------------------------------------------------
*/
#include "utils/memutils.h"
+typedef struct pendingPosition
+{
+ Buffer pendingBuffer;
+ OffsetNumber firstOffset;
+ OffsetNumber lastOffset;
+ ItemPointerData item;
+} pendingPosition;
+
+
/*
* Tries to refind previously taken ItemPointer on page.
*/
}
/*
- * Start* functions setup begining state of searches: finds correct buffer and pins it.
+ * Start* functions setup beginning state of searches: finds correct buffer and pins it.
*/
static void
startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry)
Page page;
bool needUnlock = TRUE;
+ entry->buffer = InvalidBuffer;
+ entry->offset = InvalidOffsetNumber;
+ entry->list = NULL;
+ entry->nlist = 0;
+ entry->partialMatch = NULL;
+ entry->partialMatchResult = NULL;
+ entry->reduceResult = FALSE;
+ entry->predictNumberResult = 0;
+
if (entry->master != NULL)
{
entry->isFinished = entry->master->isFinished;
page = BufferGetPage(stackEntry->buffer);
entry->isFinished = TRUE;
- entry->buffer = InvalidBuffer;
- entry->offset = InvalidOffsetNumber;
- entry->list = NULL;
- entry->nlist = 0;
- entry->partialMatch = NULL;
- entry->partialMatchIterator = NULL;
- entry->partialMatchResult = NULL;
- entry->reduceResult = FALSE;
- entry->predictNumberResult = 0;
if ( entry->isPartialMatch )
{
entry->buffer = scanBeginPostingTree(gdi);
/*
- * We keep buffer pinned because we need to prevent deletition
+ * We keep buffer pinned because we need to prevent deletion of
* page during scan. See GIN's vacuum implementation. RefCount
- * is increased to keep buffer pinned after freeGinBtreeStack() call.
+ * is increased to keep buffer pinned after freeGinBtreeStack()
+ * call.
*/
IncrBufferRefCount(entry->buffer);
{
do
{
- if ( entry->partialMatchResult == NULL || entry->offset >= entry->partialMatchResult->ntuples )
+ if (entry->partialMatchResult == NULL ||
+ entry->offset >= entry->partialMatchResult->ntuples)
{
- entry->partialMatchResult = tbm_iterate( entry->partialMatchIterator );
+ entry->partialMatchResult = tbm_iterate(entry->partialMatchIterator);
if ( entry->partialMatchResult == NULL )
{
entry->isFinished = TRUE;
break;
}
- else if ( entry->partialMatchResult->ntuples < 0 )
- {
- /* bitmap became lossy */
- ereport(ERROR,
- (errcode(ERRCODE_OUT_OF_MEMORY),
- errmsg("not enough memory to store result of partial match operator" ),
- errhint("Increase the \"work_mem\" parameter.")));
- }
+
+ /*
+ * reset counter to the beginning of entry->partialMatchResult.
+ * Note: entry->offset is still greater than
+ * partialMatchResult->ntuples if partialMatchResult is
+ * lossy. So, on next call we will get next result from
+ * TIDBitmap.
+ */
entry->offset = 0;
}
- ItemPointerSet(&entry->curItem,
- entry->partialMatchResult->blockno,
- entry->partialMatchResult->offsets[ entry->offset ]);
- entry->offset ++;
+ if ( entry->partialMatchResult->ntuples < 0 )
+ {
+ /*
+ * lossy result, so we need to check the whole page
+ */
+ ItemPointerSetLossyPage(&entry->curItem,
+ entry->partialMatchResult->blockno);
+ /*
+ * We might as well fall out of the loop; we could not
+ * estimate number of results on this page to support correct
+ * reducing of result even if it's enabled
+ */
+ break;
+ }
- } while (entry->isFinished == FALSE && entry->reduceResult == TRUE && dropItem(entry));
+ ItemPointerSet(&entry->curItem,
+ entry->partialMatchResult->blockno,
+ entry->partialMatchResult->offsets[entry->offset]);
+ entry->offset++;
+ } while (entry->reduceResult == TRUE && dropItem(entry));
}
else if (!BufferIsValid(entry->buffer))
{
if (key->entryRes[i])
{
+ /*
+ * Move forward only entries which was the least
+ * on previous call
+ */
if (entry->isFinished == FALSE && entryGetItem(index, entry) == FALSE)
{
if (compareItemPointers(&entry->curItem, &key->curItem) < 0)
*/
*keyrecheck = true;
+ /*
+ * If one of the entry's scans returns lossy result, return it without
+ * checking - we can't suggest anything helpful to consistentFn.
+ */
+ if (ItemPointerIsLossyPage(&key->curItem))
+ return FALSE;
+
oldCtx = MemoryContextSwitchTo(tempCtx);
res = DatumGetBool(FunctionCall4(&ginstate->consistentFn[key->attnum-1],
PointerGetDatum(key->entryRes),
return FALSE;
}
+
+/*
+ * Get ItemPointer of next heap row to be checked from pending list.
+ * Returns false if there are no more.
+ *
+ * The pendingBuffer is presumed pinned and share-locked on entry, and is
+ * pinned and share-locked on success exit. On failure exit it's released.
+ */
+static bool
+scanGetCandidate(IndexScanDesc scan, pendingPosition *pos)
+{
+ OffsetNumber maxoff;
+ Page page;
+ IndexTuple itup;
+
+ ItemPointerSetInvalid( &pos->item );
+ for(;;)
+ {
+ page = BufferGetPage(pos->pendingBuffer);
+
+ maxoff = PageGetMaxOffsetNumber(page);
+ if ( pos->firstOffset > maxoff )
+ {
+ BlockNumber blkno = GinPageGetOpaque(page)->rightlink;
+ if ( blkno == InvalidBlockNumber )
+ {
+ UnlockReleaseBuffer(pos->pendingBuffer);
+ pos->pendingBuffer=InvalidBuffer;
+
+ return false;
+ }
+ else
+ {
+ /*
+ * Here we must prevent deletion of next page by
+ * insertcleanup process, which may be trying to obtain
+ * exclusive lock on current page. So, we lock next
+ * page before releasing the current one
+ */
+ Buffer tmpbuf = ReadBuffer(scan->indexRelation, blkno);
+
+ LockBuffer(tmpbuf, GIN_SHARE);
+ UnlockReleaseBuffer(pos->pendingBuffer);
+
+ pos->pendingBuffer = tmpbuf;
+ pos->firstOffset = FirstOffsetNumber;
+ }
+ }
+ else
+ {
+ itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->firstOffset));
+ pos->item = itup->t_tid;
+ if ( GinPageHasFullRow(page) )
+ {
+ /*
+ * find itempointer to the next row
+ */
+ for(pos->lastOffset = pos->firstOffset+1; pos->lastOffset<=maxoff; pos->lastOffset++)
+ {
+ itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->lastOffset));
+ if (!ItemPointerEquals(&pos->item, &itup->t_tid))
+ break;
+ }
+ }
+ else
+ {
+ /*
+ * All itempointers are the same on this page
+ */
+ pos->lastOffset = maxoff + 1;
+ }
+ break;
+ }
+ }
+
+ return true;
+}
+
+static bool
+matchPartialInPendingList(GinState *ginstate, Page page,
+ OffsetNumber off, OffsetNumber maxoff,
+ Datum value, OffsetNumber attrnum,
+ Datum *datum, bool *datumExtracted,
+ StrategyNumber strategy)
+{
+ IndexTuple itup;
+ int res;
+
+ while ( off < maxoff )
+ {
+ itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
+ if ( attrnum != gintuple_get_attrnum(ginstate, itup) )
+ return false;
+
+ if (datumExtracted[ off-1 ] == false)
+ {
+ datum[ off-1 ] = gin_index_getattr(ginstate, itup);
+ datumExtracted[ off-1 ] = true;
+ }
+
+ res = DatumGetInt32(FunctionCall3(&ginstate->comparePartialFn[attrnum],
+ value,
+ datum[ off-1 ],
+ UInt16GetDatum(strategy)));
+ if ( res == 0 )
+ return true;
+ else if (res>0)
+ return false;
+ }
+
+ return false;
+}
+
+/*
+ * Sets entryRes array for each key by looking at
+ * every entry per indexed value (row) in pending list.
+ * returns true if at least one of datum was matched by key's entry
+ *
+ * The pendingBuffer is presumed pinned and share-locked on entry.
+ */
+static bool
+collectDatumForItem(IndexScanDesc scan, pendingPosition *pos)
+{
+ GinScanOpaque so = (GinScanOpaque) scan->opaque;
+ OffsetNumber attrnum;
+ Page page;
+ IndexTuple itup;
+ int i, j;
+ bool hasMatch = false;
+
+ /*
+ * Resets entryRes
+ */
+ for (i = 0; i < so->nkeys; i++)
+ {
+ GinScanKey key = so->keys + i;
+ memset( key->entryRes, FALSE, key->nentries );
+ }
+
+ for(;;)
+ {
+ Datum datum[ BLCKSZ/sizeof(IndexTupleData) ];
+ bool datumExtracted[ BLCKSZ/sizeof(IndexTupleData) ];
+
+ Assert( pos->lastOffset > pos->firstOffset );
+ memset(datumExtracted + pos->firstOffset - 1, 0, sizeof(bool) * (pos->lastOffset - pos->firstOffset ));
+
+ page = BufferGetPage(pos->pendingBuffer);
+
+ for(i = 0; i < so->nkeys; i++)
+ {
+ GinScanKey key = so->keys + i;
+
+ for(j=0; jnentries; j++)
+ {
+ OffsetNumber StopLow = pos->firstOffset,
+ StopHigh = pos->lastOffset,
+ StopMiddle;
+ GinScanEntry entry = key->scanEntry + j;
+
+ if ( key->entryRes[j] )
+ continue;
+
+ while (StopLow < StopHigh)
+ {
+ StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
+
+ itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, StopMiddle));
+ attrnum = gintuple_get_attrnum(&so->ginstate, itup);
+
+ if (key->attnum < attrnum)
+ StopHigh = StopMiddle;
+ else if (key->attnum > attrnum)
+ StopLow = StopMiddle + 1;
+ else
+ {
+ int res;
+
+ if (datumExtracted[ StopMiddle-1 ] == false)
+ {
+ datum[ StopMiddle-1 ] = gin_index_getattr(&so->ginstate, itup);
+ datumExtracted[ StopMiddle-1 ] = true;
+ }
+ res = compareEntries(&so->ginstate,
+ entry->attnum,
+ entry->entry,
+ datum[ StopMiddle-1 ]);
+
+ if ( res == 0 )
+ {
+ if ( entry->isPartialMatch )
+ key->entryRes[j] =
+ matchPartialInPendingList(&so->ginstate,
+ page, StopMiddle,
+ pos->lastOffset,
+ entry->entry,
+ entry->attnum,
+ datum,
+ datumExtracted,
+ entry->strategy);
+ else
+ key->entryRes[j] = true;
+ break;
+ }
+ else if ( res < 0 )
+ StopHigh = StopMiddle;
+ else
+ StopLow = StopMiddle + 1;
+ }
+ }
+
+ if ( StopLow>=StopHigh && entry->isPartialMatch )
+ key->entryRes[j] =
+ matchPartialInPendingList(&so->ginstate,
+ page, StopHigh,
+ pos->lastOffset,
+ entry->entry,
+ entry->attnum,
+ datum,
+ datumExtracted,
+ entry->strategy);
+
+ hasMatch |= key->entryRes[j];
+ }
+ }
+
+ pos->firstOffset = pos->lastOffset;
+
+ if ( GinPageHasFullRow(page) )
+ {
+ /*
+ * We scan all values from one tuple, go to next one
+ */
+
+ return hasMatch;
+ }
+ else
+ {
+ ItemPointerData item = pos->item;
+
+ if ( scanGetCandidate(scan, pos) == false || !ItemPointerEquals(&pos->item, &item) )
+ elog(ERROR,"Could not process tuple"); /* XXX should not be here ! */
+ }
+ }
+
+ return hasMatch;
+}
+
+/*
+ * Collect all matched rows from pending list in bitmap
+ */
+static void
+scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
+{
+ GinScanOpaque so = (GinScanOpaque) scan->opaque;
+ MemoryContext oldCtx;
+ bool recheck, keyrecheck, match;
+ int i;
+ pendingPosition pos;
+ Buffer metabuffer = ReadBuffer(scan->indexRelation, GIN_METAPAGE_BLKNO);
+ BlockNumber blkno;
+
+ *ntids = 0;
+
+ LockBuffer(metabuffer, GIN_SHARE);
+ blkno = GinPageGetMeta(BufferGetPage(metabuffer))->head;
+
+ /*
+ * fetch head of list before unlocking metapage.
+ * head page must be pinned to prevent deletion by vacuum process
+ */
+ if ( blkno == InvalidBlockNumber )
+ {
+ /* No pending list, so proceed with normal scan */
+ UnlockReleaseBuffer( metabuffer );
+ return;
+ }
+
+ pos.pendingBuffer = ReadBuffer(scan->indexRelation, blkno);
+ LockBuffer(pos.pendingBuffer, GIN_SHARE);
+ pos.firstOffset = FirstOffsetNumber;
+ UnlockReleaseBuffer( metabuffer );
+
+ /*
+ * loop for each heap row
+ */
+ while( scanGetCandidate(scan, &pos) )
+ {
+
+ /*
+ * Check entries in rows and setup entryRes array
+ */
+ if (!collectDatumForItem(scan, &pos))
+ continue;
+
+ /*
+ * check for consistent
+ */
+ oldCtx = MemoryContextSwitchTo(so->tempCtx);
+ recheck = false;
+ match = true;
+
+ for (i = 0; match && i < so->nkeys; i++)
+ {
+ GinScanKey key = so->keys + i;
+
+ keyrecheck = true;
+
+ if ( DatumGetBool(FunctionCall4(&so->ginstate.consistentFn[ key->attnum-1 ],
+ PointerGetDatum(key->entryRes),
+ UInt16GetDatum(key->strategy),
+ key->query,
+ PointerGetDatum(&keyrecheck))) == false )
+ {
+ match = false;
+ }
+
+ recheck |= keyrecheck;
+ }
+
+ MemoryContextSwitchTo(oldCtx);
+ MemoryContextReset(so->tempCtx);
+
+ if ( match )
+ {
+ tbm_add_tuples(tbm, &pos.item, 1, recheck);
+ (*ntids)++;
+ }
+ }
+}
+
/*
* Get heap item pointer from scan
* returns true if found
{
int cmp = compareItemPointers(item, &key->curItem);
+ if ( cmp != 0 && (ItemPointerIsLossyPage(item) || ItemPointerIsLossyPage(&key->curItem)) )
+ {
+ /*
+ * if one of ItemPointers points to the whole page then
+ * compare only page's number
+ */
+ if ( ItemPointerGetBlockNumber(item) == ItemPointerGetBlockNumber(&key->curItem) )
+ cmp = 0;
+ else
+ cmp = (ItemPointerGetBlockNumber(item) > ItemPointerGetBlockNumber(&key->curItem)) ? 1 : -1;
+ }
+
if (cmp == 0)
break;
else if (cmp > 0)
if (GinIsVoidRes(scan))
PG_RETURN_INT64(0);
+ ntids = 0;
+
+ /*
+ * First, scan the pending list and collect any matching entries into
+ * the bitmap. After we scan a pending item, some other backend could
+ * post it into the main index, and so we might visit it a second time
+ * during the main scan. This is okay because we'll just re-set the
+ * same bit in the bitmap. (The possibility of duplicate visits is a
+ * major reason why GIN can't support the amgettuple API, however.)
+ * Note that it would not do to scan the main index before the pending
+ * list, since concurrent cleanup could then make us miss entries
+ * entirely.
+ */
+ scanPendingInsert(scan, tbm, &ntids);
+
+ /*
+ * Now scan the main index.
+ */
startScan(scan);
- ntids = 0;
for (;;)
{
ItemPointerData iptr;
if (!scanGetItem(scan, &iptr, &recheck))
break;
- tbm_add_tuples(tbm, &iptr, 1, recheck);
+ if ( ItemPointerIsLossyPage(&iptr) )
+ tbm_add_page(tbm, ItemPointerGetBlockNumber(&iptr));
+ else
+ tbm_add_tuples(tbm, &iptr, 1, recheck);
ntids++;
}
PG_RETURN_INT64(ntids);
}
-
-Datum
-gingettuple(PG_FUNCTION_ARGS)
-{
- IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
- ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1);
- bool res;
-
- if (dir != ForwardScanDirection)
- elog(ERROR, "GIN doesn't support other scan directions than forward");
-
- if (GinIsNewKey(scan))
- newScanKey(scan);
-
- if (GinIsVoidRes(scan))
- PG_RETURN_BOOL(false);
-
- startScan(scan);
- res = scanGetItem(scan, &scan->xs_ctup.t_self, &scan->xs_recheck);
-
- PG_RETURN_BOOL(res);
-}
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.18 2009/01/01 17:23:34 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.19 2009/03/24 20:17:11 tgl Exp $
*-------------------------------------------------------------------------
*/
/*
* Inserts only one entry to the index, but it can add more than 1 ItemPointer.
*/
-static void
-ginEntryInsert(Relation index, GinState *ginstate, OffsetNumber attnum, Datum value,
- ItemPointerData *items, uint32 nitem, bool isBuild)
+void
+ginEntryInsert(Relation index, GinState *ginstate,
+ OffsetNumber attnum, Datum value,
+ ItemPointerData *items, uint32 nitem,
+ bool isBuild)
{
GinBtreeData btree;
GinBtreeStack *stack;
IndexBuildResult *result;
double reltuples;
GinBuildState buildstate;
- Buffer buffer;
+ Buffer RootBuffer, MetaBuffer;
ItemPointerData *list;
Datum entry;
uint32 nlist;
initGinState(&buildstate.ginstate, index);
+ /* initialize the meta page */
+ MetaBuffer = GinNewBuffer(index);
+
/* initialize the root page */
- buffer = GinNewBuffer(index);
+ RootBuffer = GinNewBuffer(index);
+
START_CRIT_SECTION();
- GinInitBuffer(buffer, GIN_LEAF);
- MarkBufferDirty(buffer);
+ GinInitMetabuffer(MetaBuffer);
+ MarkBufferDirty(MetaBuffer);
+ GinInitBuffer(RootBuffer, GIN_LEAF);
+ MarkBufferDirty(RootBuffer);
if (!index->rd_istemp)
{
rdata.len = sizeof(RelFileNode);
rdata.next = NULL;
- page = BufferGetPage(buffer);
-
-
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX, &rdata);
+
+ page = BufferGetPage(RootBuffer);
PageSetLSN(page, recptr);
PageSetTLI(page, ThisTimeLineID);
+ page = BufferGetPage(MetaBuffer);
+ PageSetLSN(page, recptr);
+ PageSetTLI(page, ThisTimeLineID);
}
- UnlockReleaseBuffer(buffer);
+ UnlockReleaseBuffer(MetaBuffer);
+ UnlockReleaseBuffer(RootBuffer);
END_CRIT_SECTION();
/* build the index */
initGinState(&ginstate, index);
- for(i=0; inatts;i++)
- if ( !isnull[i] )
- res += ginHeapTupleInsert(index, &ginstate, (OffsetNumber)(i+1), values[i], ht_ctid);
+ if ( GinGetUseFastUpdate(index) )
+ {
+ GinTupleCollector collector;
+
+ memset(&collector, 0, sizeof(GinTupleCollector));
+ for(i=0; inatts;i++)
+ if ( !isnull[i] )
+ res += ginHeapTupleFastCollect(index, &ginstate, &collector,
+ (OffsetNumber)(i+1), values[i], ht_ctid);
+
+ ginHeapTupleFastInsert(index, &ginstate, &collector);
+ }
+ else
+ {
+ for(i=0; inatts;i++)
+ if ( !isnull[i] )
+ res += ginHeapTupleInsert(index, &ginstate,
+ (OffsetNumber)(i+1), values[i], ht_ctid);
+
+ }
MemoryContextSwitchTo(oldCtx);
MemoryContextDelete(insertCtx);
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/gin/ginutil.c,v 1.20 2009/01/05 17:14:28 alvherre Exp $
+ * $PostgreSQL: pgsql/src/backend/access/gin/ginutil.c,v 1.21 2009/03/24 20:17:11 tgl Exp $
*-------------------------------------------------------------------------
*/
CurrentMemoryContext);
/*
- * Check opclass capability to do partial match.
+ * Check opclass capability to do partial match.
*/
if ( index_getprocid(index, i+1, GIN_COMPARE_PARTIAL_PROC) != InvalidOid )
{
bool isnull;
/*
- * First attribute is always int16, so we can safely use any
+ * First attribute is always int16, so we can safely use any
* tuple descriptor to obtain first attribute of tuple
*/
res = index_getattr(tuple, FirstOffsetNumber, ginstate->tupdesc[0],
GinInitPage(BufferGetPage(b), f, BufferGetPageSize(b));
}
+void
+GinInitMetabuffer(Buffer b)
+{
+ GinMetaPageData *metadata;
+ Page page = BufferGetPage(b);
+
+ GinInitPage(page, GIN_META, BufferGetPageSize(b));
+
+ metadata = GinPageGetMeta(page);
+
+ metadata->head = metadata->tail = InvalidBlockNumber;
+ metadata->tailFreeSize = 0;
+ metadata->nPendingPages = 0;
+ metadata->nPendingHeapTuples = 0;
+}
+
int
compareEntries(GinState *ginstate, OffsetNumber attnum, Datum a, Datum b)
{
{
Datum reloptions = PG_GETARG_DATUM(0);
bool validate = PG_GETARG_BOOL(1);
- bytea *result;
+ relopt_value *options;
+ GinOptions *rdopts;
+ int numoptions;
+ static const relopt_parse_elt tab[] = {
+ {"fastupdate", RELOPT_TYPE_BOOL, offsetof(GinOptions, useFastUpdate)}
+ };
+
+ options = parseRelOptions(reloptions, validate, RELOPT_KIND_GIN,
+ &numoptions);
+
+ /* if none set, we're done */
+ if (numoptions == 0)
+ PG_RETURN_NULL();
+
+ rdopts = allocateReloptStruct(sizeof(GinOptions), options, numoptions);
+
+ fillRelOptions((void *) rdopts, sizeof(GinOptions), options, numoptions,
+ validate, tab, lengthof(tab));
+
+ pfree(options);
- result = default_reloptions(reloptions, validate, RELOPT_KIND_GIN);
- if (result)
- PG_RETURN_BYTEA_P(result);
- PG_RETURN_NULL();
+ PG_RETURN_BYTEA_P(rdopts);
}
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/gin/ginvacuum.c,v 1.27 2009/01/01 17:23:34 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/access/gin/ginvacuum.c,v 1.28 2009/03/24 20:17:11 tgl Exp $
*-------------------------------------------------------------------------
*/
#include "catalog/storage.h"
#include "commands/vacuum.h"
#include "miscadmin.h"
+#include "postmaster/autovacuum.h"
#include "storage/bufmgr.h"
-#include "storage/freespace.h"
#include "storage/indexfsm.h"
#include "storage/lmgr.h"
BlockNumber rootOfPostingTree[BLCKSZ / (sizeof(IndexTupleData) + sizeof(ItemId))];
uint32 nRoot;
+ gvs.index = index;
+ gvs.callback = callback;
+ gvs.callback_state = callback_state;
+ gvs.strategy = info->strategy;
+ initGinState(&gvs.ginstate, index);
+
/* first time through? */
if (stats == NULL)
+ {
+ /* Yes, so initialize stats to zeroes */
stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
+ /* and cleanup any pending inserts */
+ ginInsertCleanup(index, &gvs.ginstate, true, stats);
+ }
+
/* we'll re-count the tuples each time */
stats->num_index_tuples = 0;
-
- gvs.index = index;
gvs.result = stats;
- gvs.callback = callback;
- gvs.callback_state = callback_state;
- gvs.strategy = info->strategy;
- initGinState(&gvs.ginstate, index);
buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
RBM_NORMAL, info->strategy);
BlockNumber totFreePages;
BlockNumber lastBlock = GIN_ROOT_BLKNO,
lastFilledBlock = GIN_ROOT_BLKNO;
+ GinState ginstate;
- /* Set up all-zero stats if ginbulkdelete wasn't called */
+ /*
+ * In an autovacuum analyze, we want to clean up pending insertions.
+ * Otherwise, an ANALYZE-only call is a no-op.
+ */
+ if (info->analyze_only)
+ {
+ if (IsAutoVacuumWorkerProcess())
+ {
+ initGinState(&ginstate, index);
+ ginInsertCleanup(index, &ginstate, true, stats);
+ }
+ PG_RETURN_POINTER(stats);
+ }
+
+ /*
+ * Set up all-zero stats and cleanup pending inserts
+ * if ginbulkdelete wasn't called
+ */
if (stats == NULL)
+ {
stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
+ initGinState(&ginstate, index);
+ ginInsertCleanup(index, &ginstate, true, stats);
+ }
/*
* XXX we always report the heap tuple count as the number of index
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/gin/ginxlog.c,v 1.17 2009/01/20 18:59:36 heikki Exp $
+ * $PostgreSQL: pgsql/src/backend/access/gin/ginxlog.c,v 1.18 2009/03/24 20:17:11 tgl Exp $
*-------------------------------------------------------------------------
*/
#include "postgres.h"
ginRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
{
RelFileNode *node = (RelFileNode *) XLogRecGetData(record);
- Buffer buffer;
+ Buffer RootBuffer, MetaBuffer;
Page page;
- buffer = XLogReadBuffer(*node, GIN_ROOT_BLKNO, true);
- Assert(BufferIsValid(buffer));
- page = (Page) BufferGetPage(buffer);
+ MetaBuffer = XLogReadBuffer(*node, GIN_METAPAGE_BLKNO, true);
+ Assert(BufferIsValid(MetaBuffer));
+ GinInitMetabuffer(MetaBuffer);
+
+ page = (Page) BufferGetPage(MetaBuffer);
+ PageSetLSN(page, lsn);
+ PageSetTLI(page, ThisTimeLineID);
- GinInitBuffer(buffer, GIN_LEAF);
+ RootBuffer = XLogReadBuffer(*node, GIN_ROOT_BLKNO, true);
+ Assert(BufferIsValid(RootBuffer));
+ page = (Page) BufferGetPage(RootBuffer);
+
+ GinInitBuffer(RootBuffer, GIN_LEAF);
PageSetLSN(page, lsn);
PageSetTLI(page, ThisTimeLineID);
- MarkBufferDirty(buffer);
- UnlockReleaseBuffer(buffer);
+ MarkBufferDirty(MetaBuffer);
+ UnlockReleaseBuffer(MetaBuffer);
+ MarkBufferDirty(RootBuffer);
+ UnlockReleaseBuffer(RootBuffer);
}
static void
}
}
+static void
+ginRedoUpdateMetapage(XLogRecPtr lsn, XLogRecord *record)
+{
+ ginxlogUpdateMeta *data = (ginxlogUpdateMeta*) XLogRecGetData(record);
+ Buffer metabuffer;
+ Page metapage;
+
+ metabuffer = XLogReadBuffer(data->node, GIN_METAPAGE_BLKNO, false);
+ metapage = BufferGetPage(metabuffer);
+
+ if (!XLByteLE(lsn, PageGetLSN(metapage)))
+ {
+ memcpy( GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData));
+ PageSetLSN(metapage, lsn);
+ PageSetTLI(metapage, ThisTimeLineID);
+ MarkBufferDirty(metabuffer);
+ }
+
+ if ( data->ntuples > 0 )
+ {
+ /*
+ * insert into tail page
+ */
+ if (!(record->xl_info & XLR_BKP_BLOCK_1))
+ {
+ Buffer buffer = XLogReadBuffer(data->node, data->metadata.tail, false);
+ Page page = BufferGetPage(buffer);
+
+ if ( !XLByteLE(lsn, PageGetLSN(page)))
+ {
+ OffsetNumber l, off = (PageIsEmpty(page)) ? FirstOffsetNumber :
+ OffsetNumberNext(PageGetMaxOffsetNumber(page));
+ int i, tupsize;
+ IndexTuple tuples = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogUpdateMeta));
+
+ for(i=0; intuples; i++)
+ {
+ tupsize = IndexTupleSize(tuples);
+
+ l = PageAddItem(page, (Item)tuples, tupsize, off, false, false);
+
+ if (l == InvalidOffsetNumber)
+ elog(ERROR, "failed to add item to index page");
+
+ tuples = (IndexTuple)( ((char*)tuples) + tupsize );
+ }
+
+ /*
+ * Increase counter of heap tuples
+ */
+ GinPageGetOpaque(page)->maxoff++;
+
+ PageSetLSN(page, lsn);
+ PageSetTLI(page, ThisTimeLineID);
+ MarkBufferDirty(buffer);
+ }
+ UnlockReleaseBuffer(buffer);
+ }
+ }
+ else if ( data->prevTail != InvalidBlockNumber )
+ {
+ /*
+ * New tail
+ */
+
+ Buffer buffer = XLogReadBuffer(data->node, data->prevTail, false);
+ Page page = BufferGetPage(buffer);
+
+ if ( !XLByteLE(lsn, PageGetLSN(page)))
+ {
+ GinPageGetOpaque(page)->rightlink = data->newRightlink;
+
+ PageSetLSN(page, lsn);
+ PageSetTLI(page, ThisTimeLineID);
+ MarkBufferDirty(buffer);
+ }
+ UnlockReleaseBuffer(buffer);
+ }
+
+ UnlockReleaseBuffer(metabuffer);
+}
+
+static void
+ginRedoInsertListPage(XLogRecPtr lsn, XLogRecord *record)
+{
+ ginxlogInsertListPage *data = (ginxlogInsertListPage*) XLogRecGetData(record);
+ Buffer buffer;
+ Page page;
+ OffsetNumber l, off = FirstOffsetNumber;
+ int i, tupsize;
+ IndexTuple tuples = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogInsertListPage));
+
+ if (record->xl_info & XLR_BKP_BLOCK_1)
+ return;
+
+ buffer = XLogReadBuffer(data->node, data->blkno, true);
+ page = BufferGetPage(buffer);
+
+ GinInitBuffer(buffer, GIN_LIST);
+ GinPageGetOpaque(page)->rightlink = data->rightlink;
+ if ( data->rightlink == InvalidBlockNumber )
+ {
+ /* tail of sublist */
+ GinPageSetFullRow(page);
+ GinPageGetOpaque(page)->maxoff = 1;
+ }
+ else
+ {
+ GinPageGetOpaque(page)->maxoff = 0;
+ }
+
+ for(i=0; intuples; i++)
+ {
+ tupsize = IndexTupleSize(tuples);
+
+ l = PageAddItem(page, (Item)tuples, tupsize, off, false, false);
+
+ if (l == InvalidOffsetNumber)
+ elog(ERROR, "failed to add item to index page");
+
+ tuples = (IndexTuple)( ((char*)tuples) + tupsize );
+ }
+
+ PageSetLSN(page, lsn);
+ PageSetTLI(page, ThisTimeLineID);
+ MarkBufferDirty(buffer);
+
+ UnlockReleaseBuffer(buffer);
+}
+
+static void
+ginRedoDeleteListPages(XLogRecPtr lsn, XLogRecord *record)
+{
+ ginxlogDeleteListPages *data = (ginxlogDeleteListPages*) XLogRecGetData(record);
+ Buffer metabuffer;
+ Page metapage;
+ int i;
+
+ metabuffer = XLogReadBuffer(data->node, GIN_METAPAGE_BLKNO, false);
+ metapage = BufferGetPage(metabuffer);
+
+ if (!XLByteLE(lsn, PageGetLSN(metapage)))
+ {
+ memcpy( GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData));
+ PageSetLSN(metapage, lsn);
+ PageSetTLI(metapage, ThisTimeLineID);
+ MarkBufferDirty(metabuffer);
+ }
+
+ for(i=0; indeleted; i++)
+ {
+ Buffer buffer = XLogReadBuffer(data->node,data->toDelete[i],false);
+ Page page = BufferGetPage(buffer);
+
+ if ( !XLByteLE(lsn, PageGetLSN(page)))
+ {
+ GinPageGetOpaque(page)->flags = GIN_DELETED;
+
+ PageSetLSN(page, lsn);
+ PageSetTLI(page, ThisTimeLineID);
+ MarkBufferDirty(buffer);
+ }
+
+ UnlockReleaseBuffer(buffer);
+ }
+ UnlockReleaseBuffer(metabuffer);
+}
+
void
gin_redo(XLogRecPtr lsn, XLogRecord *record)
{
case XLOG_GIN_DELETE_PAGE:
ginRedoDeletePage(lsn, record);
break;
+ case XLOG_GIN_UPDATE_META_PAGE:
+ ginRedoUpdateMetapage(lsn, record);
+ break;
+ case XLOG_GIN_INSERT_LISTPAGE:
+ ginRedoInsertListPage(lsn, record);
+ break;
+ case XLOG_GIN_DELETE_LISTPAGE:
+ ginRedoDeleteListPages(lsn, record);
+ break;
default:
elog(PANIC, "gin_redo: unknown op code %u", info);
}
appendStringInfo(buf, "Delete page, ");
desc_node(buf, ((ginxlogDeletePage *) rec)->node, ((ginxlogDeletePage *) rec)->blkno);
break;
+ case XLOG_GIN_UPDATE_META_PAGE:
+ appendStringInfo(buf, "Update metapage, ");
+ desc_node(buf, ((ginxlogUpdateMeta *) rec)->node, ((ginxlogUpdateMeta *) rec)->metadata.tail);
+ break;
+ case XLOG_GIN_INSERT_LISTPAGE:
+ appendStringInfo(buf, "Insert new list page, ");
+ desc_node(buf, ((ginxlogInsertListPage *) rec)->node, ((ginxlogInsertListPage *) rec)->blkno);
+ break;
+ case XLOG_GIN_DELETE_LISTPAGE:
+ appendStringInfo(buf, "Delete list pages (%d), ", ((ginxlogDeleteListPages *) rec)->ndeleted);
+ desc_node(buf, ((ginxlogDeleteListPages *) rec)->node, ((ginxlogDeleteListPages *) rec)->metadata.head);
+ break;
default:
elog(PANIC, "gin_desc: unknown op code %u", info);
}
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/gist/gistvacuum.c,v 1.42 2009/01/01 17:23:35 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/access/gist/gistvacuum.c,v 1.43 2009/03/24 20:17:11 tgl Exp $
*
*-------------------------------------------------------------------------
*/
lastFilledBlock = GIST_ROOT_BLKNO;
bool needLock;
+ /* No-op in ANALYZE ONLY mode */
+ if (info->analyze_only)
+ PG_RETURN_POINTER(stats);
+
/* Set up all-zero stats if gistbulkdelete wasn't called */
if (stats == NULL)
{
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.108 2009/01/01 17:23:35 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.109 2009/03/24 20:17:11 tgl Exp $
*
* NOTES
* This file contains only the public interface routines.
BlockNumber num_pages;
/* If hashbulkdelete wasn't called, return NULL signifying no change */
+ /* Note: this covers the analyze_only case too */
if (stats == NULL)
PG_RETURN_POINTER(NULL);
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/index/indexam.c,v 1.112 2009/01/01 17:23:35 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/access/index/indexam.c,v 1.113 2009/03/24 20:17:12 tgl Exp $
*
* INTERFACE ROUTINES
* index_open - open an index relation by relation OID
* item slot could have been replaced by a newer tuple by the time we get
* to it.
*
- * Returns the number of matching tuples found.
+ * Returns the number of matching tuples found. (Note: this might be only
+ * approximate, so it should only be used for statistical purposes.)
* ----------------
*/
int64
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.167 2009/01/01 17:23:35 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.168 2009/03/24 20:17:12 tgl Exp $
*
*-------------------------------------------------------------------------
*/
IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0);
IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1);
+ /* No-op in ANALYZE ONLY mode */
+ if (info->analyze_only)
+ PG_RETURN_POINTER(stats);
+
/*
* If btbulkdelete was called, we need not do anything, just return the
* stats from the latest btbulkdelete call. If it wasn't called, we must
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.312 2009/01/22 20:16:01 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.313 2009/03/24 20:17:12 tgl Exp $
*
*
* INTERFACE ROUTINES
*/
ivinfo.index = indexRelation;
ivinfo.vacuum_full = false;
+ ivinfo.analyze_only = false;
ivinfo.message_level = DEBUG2;
ivinfo.num_heap_tuples = -1;
ivinfo.strategy = NULL;
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.133 2009/01/22 20:16:01 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.134 2009/03/24 20:17:13 tgl Exp $
*
*-------------------------------------------------------------------------
*/
/* We skip to here if there were no analyzable columns */
cleanup:
+ /* If this isn't part of VACUUM ANALYZE, let index AMs do cleanup */
+ if (!vacstmt->vacuum)
+ {
+ for (ind = 0; ind < nindexes; ind++)
+ {
+ IndexBulkDeleteResult *stats;
+ IndexVacuumInfo ivinfo;
+
+ ivinfo.index = Irel[ind];
+ ivinfo.vacuum_full = false;
+ ivinfo.analyze_only = true;
+ ivinfo.message_level = elevel;
+ ivinfo.num_heap_tuples = -1; /* not known for sure */
+ ivinfo.strategy = vac_strategy;
+
+ stats = index_vacuum_cleanup(&ivinfo, NULL);
+
+ if (stats)
+ pfree(stats);
+ }
+ }
+
/* Done with indexes */
vac_close_indexes(nindexes, Irel, NoLock);
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.385 2009/01/16 13:27:23 heikki Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.386 2009/03/24 20:17:13 tgl Exp $
*
*-------------------------------------------------------------------------
*/
ivinfo.index = indrel;
ivinfo.vacuum_full = true;
+ ivinfo.analyze_only = false;
ivinfo.message_level = elevel;
ivinfo.num_heap_tuples = num_tuples;
ivinfo.strategy = vac_strategy;
ivinfo.index = indrel;
ivinfo.vacuum_full = true;
+ ivinfo.analyze_only = false;
ivinfo.message_level = elevel;
ivinfo.num_heap_tuples = num_tuples + keep_tuples;
ivinfo.strategy = vac_strategy;
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.118 2009/01/22 19:25:00 heikki Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.119 2009/03/24 20:17:14 tgl Exp $
*
*-------------------------------------------------------------------------
*/
ivinfo.index = indrel;
ivinfo.vacuum_full = false;
+ ivinfo.analyze_only = false;
ivinfo.message_level = elevel;
/* We don't yet know rel_tuples, so pass -1 */
ivinfo.num_heap_tuples = -1;
ivinfo.index = indrel;
ivinfo.vacuum_full = false;
+ ivinfo.analyze_only = false;
ivinfo.message_level = elevel;
ivinfo.num_heap_tuples = vacrelstats->rel_tuples;
ivinfo.strategy = vac_strategy;
* Copyright (c) 2003-2009, PostgreSQL Global Development Group
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/nodes/tidbitmap.c,v 1.17 2009/01/10 21:08:36 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/nodes/tidbitmap.c,v 1.18 2009/03/24 20:17:14 tgl Exp $
*
*-------------------------------------------------------------------------
*/
}
}
+/*
+ * tbm_add_page - add a whole page to a TIDBitmap
+ *
+ * This causes the whole page to be reported (with the recheck flag)
+ * when the TIDBitmap is scanned.
+ */
+void
+tbm_add_page(TIDBitmap *tbm, BlockNumber pageno)
+{
+ /* Enter the page in the bitmap, or mark it lossy if already present */
+ tbm_mark_page_lossy(tbm, pageno);
+ /* If we went over the memory limit, lossify some more pages */
+ if (tbm->nentries > tbm->maxentries)
+ tbm_lossify(tbm);
+}
+
/*
* tbm_union - set union
*
{
/*
* Some of the tuples in 'a' might not satisfy the quals for 'b',
- * but because the page 'b' is lossy, we don't know which ones.
+ * but because the page 'b' is lossy, we don't know which ones.
* Therefore we mark 'a' as requiring rechecks, to indicate that
* at most those tuples set in 'a' are matches.
*/
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/access/genam.h,v 1.75 2009/01/01 17:23:55 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/access/genam.h,v 1.76 2009/03/24 20:17:14 tgl Exp $
*
*-------------------------------------------------------------------------
*/
{
Relation index; /* the index being vacuumed */
bool vacuum_full; /* VACUUM FULL (we have exclusive lock) */
+ bool analyze_only; /* ANALYZE (without any actual vacuum) */
int message_level; /* ereport level for progress messages */
double num_heap_tuples; /* tuples remaining in heap */
BufferAccessStrategy strategy; /* access strategy for reads */
*
* Copyright (c) 2006-2009, PostgreSQL Global Development Group
*
- * $PostgreSQL: pgsql/src/include/access/gin.h,v 1.28 2009/01/10 21:08:36 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/gin.h,v 1.29 2009/03/24 20:17:14 tgl Exp $
*--------------------------------------------------------------------------
*/
-
-
#ifndef GIN_H
#define GIN_H
#include "access/itup.h"
#include "access/xlog.h"
#include "fmgr.h"
-#include "nodes/tidbitmap.h"
-#include "storage/block.h"
-#include "storage/buf.h"
-#include "storage/off.h"
-#include "storage/relfilenode.h"
/*
typedef struct GinPageOpaqueData
{
BlockNumber rightlink; /* next page if any */
- OffsetNumber maxoff; /* number entries on GIN_DATA page: number of
+ OffsetNumber maxoff; /* number entries on GIN_DATA page; number of
* heap ItemPointer on GIN_DATA|GIN_LEAF page
* and number of records on GIN_DATA &
- * ~GIN_LEAF page */
+ * ~GIN_LEAF page. On GIN_LIST page, number of
+ * heap tuples. */
uint16 flags; /* see bit definitions below */
} GinPageOpaqueData;
typedef GinPageOpaqueData *GinPageOpaque;
-#define GIN_ROOT_BLKNO (0)
-
#define GIN_DATA (1 << 0)
#define GIN_LEAF (1 << 1)
#define GIN_DELETED (1 << 2)
+#define GIN_META (1 << 3)
+#define GIN_LIST (1 << 4)
+#define GIN_LIST_FULLROW (1 << 5) /* makes sense only on GIN_LIST page */
+
+/* Page numbers of fixed-location pages */
+#define GIN_METAPAGE_BLKNO (0)
+#define GIN_ROOT_BLKNO (1)
+
+typedef struct GinMetaPageData
+{
+ /*
+ * Pointers to head and tail of pending list, which consists of GIN_LIST
+ * pages. These store fast-inserted entries that haven't yet been moved
+ * into the regular GIN structure.
+ */
+ BlockNumber head;
+ BlockNumber tail;
+
+ /*
+ * Free space in bytes in the pending list's tail page.
+ */
+ uint32 tailFreeSize;
+
+ /*
+ * We store both number of pages and number of heap tuples
+ * that are in the pending list.
+ */
+ BlockNumber nPendingPages;
+ int64 nPendingHeapTuples;
+} GinMetaPageData;
+
+#define GinPageGetMeta(p) \
+ ((GinMetaPageData *) PageGetContents(p))
/*
* Works on page
#define GinPageSetNonLeaf(page) ( GinPageGetOpaque(page)->flags &= ~GIN_LEAF )
#define GinPageIsData(page) ( GinPageGetOpaque(page)->flags & GIN_DATA )
#define GinPageSetData(page) ( GinPageGetOpaque(page)->flags |= GIN_DATA )
+#define GinPageHasFullRow(page) ( GinPageGetOpaque(page)->flags & GIN_LIST_FULLROW )
+#define GinPageSetFullRow(page) ( GinPageGetOpaque(page)->flags |= GIN_LIST_FULLROW )
#define GinPageIsDeleted(page) ( GinPageGetOpaque(page)->flags & GIN_DELETED)
#define GinPageSetDeleted(page) ( GinPageGetOpaque(page)->flags |= GIN_DELETED)
#define GinPageRightMost(page) ( GinPageGetOpaque(page)->rightlink == InvalidBlockNumber)
/*
- * Define our ItemPointerGet(BlockNumber|GetOffsetNumber)
- * to prevent asserts
+ * We use our own ItemPointerGet(BlockNumber|GetOffsetNumber)
+ * to avoid Asserts, since sometimes the ip_posid isn't "valid"
*/
#define GinItemPointerGetBlockNumber(pointer) \
#define GinItemPointerGetOffsetNumber(pointer) \
((pointer)->ip_posid)
+#define ItemPointerSetMin(p) \
+ ItemPointerSet((p), (BlockNumber)0, (OffsetNumber)0)
+#define ItemPointerIsMin(p) \
+ (ItemPointerGetOffsetNumber(p) == (OffsetNumber)0 && \
+ ItemPointerGetBlockNumber(p) == (BlockNumber)0)
+#define ItemPointerSetMax(p) \
+ ItemPointerSet((p), InvalidBlockNumber, (OffsetNumber)0xffff)
+#define ItemPointerIsMax(p) \
+ (ItemPointerGetOffsetNumber(p) == (OffsetNumber)0xffff && \
+ ItemPointerGetBlockNumber(p) == InvalidBlockNumber)
+#define ItemPointerSetLossyPage(p, b) \
+ ItemPointerSet((p), (b), (OffsetNumber)0xffff)
+#define ItemPointerIsLossyPage(p) \
+ (ItemPointerGetOffsetNumber(p) == (OffsetNumber)0xffff && \
+ ItemPointerGetBlockNumber(p) != InvalidBlockNumber)
+
typedef struct
{
BlockIdData child_blkno; /* use it instead of BlockNumber to save space
- GinPageGetOpaque(page)->maxoff * GinSizeOfItem(page) \
- MAXALIGN(sizeof(GinPageOpaqueData)))
+/*
+ * List pages
+ */
+#define GinListPageSize \
+ ( BLCKSZ - SizeOfPageHeaderData - MAXALIGN(sizeof(GinPageOpaqueData)) )
+
+/*
+ * Storage type for GIN's reloptions
+ */
+typedef struct GinOptions
+{
+ int32 vl_len_; /* varlena header (do not touch directly!) */
+ bool useFastUpdate; /* use fast updates? */
+} GinOptions;
+
+#define GIN_DEFAULT_USE_FASTUPDATE true
+#define GinGetUseFastUpdate(relation) \
+ ((relation)->rd_options ? \
+ ((GinOptions *) (relation)->rd_options)->useFastUpdate : GIN_DEFAULT_USE_FASTUPDATE)
+
#define GIN_UNLOCK BUFFER_LOCK_UNLOCK
#define GIN_SHARE BUFFER_LOCK_SHARE
BlockNumber rightLink;
} ginxlogDeletePage;
+#define XLOG_GIN_UPDATE_META_PAGE 0x60
+
+typedef struct ginxlogUpdateMeta
+{
+ RelFileNode node;
+ GinMetaPageData metadata;
+ BlockNumber prevTail;
+ BlockNumber newRightlink;
+ int32 ntuples; /* if ntuples > 0 then metadata.tail was updated
+ * with that many tuples; else new sub list was
+ * inserted */
+ /* array of inserted tuples follows */
+} ginxlogUpdateMeta;
+
+#define XLOG_GIN_INSERT_LISTPAGE 0x70
+
+typedef struct ginxlogInsertListPage
+{
+ RelFileNode node;
+ BlockNumber blkno;
+ BlockNumber rightlink;
+ int32 ntuples;
+ /* array of inserted tuples follows */
+} ginxlogInsertListPage;
+
+#define XLOG_GIN_DELETE_LISTPAGE 0x80
+
+#define GIN_NDELETE_AT_ONCE 16
+typedef struct ginxlogDeleteListPages
+{
+ RelFileNode node;
+ GinMetaPageData metadata;
+ int32 ndeleted;
+ BlockNumber toDelete[GIN_NDELETE_AT_ONCE];
+} ginxlogDeleteListPages;
+
+
/* ginutil.c */
extern Datum ginoptions(PG_FUNCTION_ARGS);
extern void initGinState(GinState *state, Relation index);
extern Buffer GinNewBuffer(Relation index);
extern void GinInitBuffer(Buffer b, uint32 f);
extern void GinInitPage(Page page, uint32 f, Size pageSize);
+extern void GinInitMetabuffer(Buffer b);
extern int compareEntries(GinState *ginstate, OffsetNumber attnum, Datum a, Datum b);
-extern int compareAttEntries(GinState *ginstate, OffsetNumber attnum_a, Datum a,
+extern int compareAttEntries(GinState *ginstate, OffsetNumber attnum_a, Datum a,
OffsetNumber attnum_b, Datum b);
extern Datum *extractEntriesS(GinState *ginstate, OffsetNumber attnum, Datum value,
int32 *nentries, bool *needUnique);
extern Datum gin_index_getattr(GinState *ginstate, IndexTuple tuple);
extern OffsetNumber gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple);
+
/* gininsert.c */
extern Datum ginbuild(PG_FUNCTION_ARGS);
extern Datum gininsert(PG_FUNCTION_ARGS);
+extern void ginEntryInsert(Relation index, GinState *ginstate,
+ OffsetNumber attnum, Datum value,
+ ItemPointerData *items, uint32 nitem,
+ bool isBuild);
/* ginxlog.c */
extern void gin_redo(XLogRecPtr lsn, XLogRecord *record);
extern void findParents(GinBtree btree, GinBtreeStack *stack, BlockNumber rootBlkno);
/* ginentrypage.c */
-extern IndexTuple GinFormTuple(GinState *ginstate, OffsetNumber attnum, Datum key,
+extern IndexTuple GinFormTuple(GinState *ginstate, OffsetNumber attnum, Datum key,
ItemPointerData *ipd, uint32 nipd);
extern void prepareEntryScan(GinBtree btree, Relation index, OffsetNumber attnum,
Datum value, GinState *ginstate);
/* ginget.c */
extern PGDLLIMPORT int GinFuzzySearchLimit;
-#define ItemPointerSetMax(p) ItemPointerSet( (p), (BlockNumber)0xffffffff, (OffsetNumber)0xffff )
-#define ItemPointerIsMax(p) ( ItemPointerGetBlockNumber(p) == (BlockNumber)0xffffffff && ItemPointerGetOffsetNumber(p) == (OffsetNumber)0xffff )
-#define ItemPointerSetMin(p) ItemPointerSet( (p), (BlockNumber)0, (OffsetNumber)0)
-#define ItemPointerIsMin(p) ( ItemPointerGetBlockNumber(p) == (BlockNumber)0 && ItemPointerGetOffsetNumber(p) == (OffsetNumber)0 )
-
extern Datum gingetbitmap(PG_FUNCTION_ARGS);
-extern Datum gingettuple(PG_FUNCTION_ARGS);
/* ginvacuum.c */
extern Datum ginbulkdelete(PG_FUNCTION_ARGS);
extern void ginInitBA(BuildAccumulator *accum);
extern void ginInsertRecordBA(BuildAccumulator *accum,
- ItemPointer heapptr,
+ ItemPointer heapptr,
OffsetNumber attnum, Datum *entries, int32 nentry);
extern ItemPointerData *ginGetEntry(BuildAccumulator *accum, OffsetNumber *attnum, Datum *entry, uint32 *n);
-#endif
+/* ginfast.c */
+
+typedef struct GinTupleCollector
+{
+ IndexTuple *tuples;
+ uint32 ntuples;
+ uint32 lentuples;
+ uint32 sumsize;
+} GinTupleCollector;
+
+extern void ginHeapTupleFastInsert(Relation index, GinState *ginstate,
+ GinTupleCollector *collector);
+extern uint32 ginHeapTupleFastCollect(Relation index, GinState *ginstate,
+ GinTupleCollector *collector,
+ OffsetNumber attnum, Datum value, ItemPointer item);
+extern void ginInsertCleanup(Relation index, GinState *ginstate,
+ bool vac_delay, IndexBulkDeleteResult *stats);
+
+#endif /* GIN_H */
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.524 2009/02/24 10:06:34 petere Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.525 2009/03/24 20:17:15 tgl Exp $
*
*-------------------------------------------------------------------------
*/
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 200902242
+#define CATALOG_VERSION_NO 200903241
#endif
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/catalog/pg_am.h,v 1.61 2009/03/05 23:06:45 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_am.h,v 1.62 2009/03/24 20:17:15 tgl Exp $
*
* NOTES
* the genbki.sh script reads this file and generates .bki
DATA(insert OID = 783 ( gist 0 7 f f f t t t t t t 0 gistinsert gistbeginscan gistgettuple gistgetbitmap gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbulkdelete gistvacuumcleanup gistcostestimate gistoptions ));
DESCR("GiST index access method");
#define GIST_AM_OID 783
-DATA(insert OID = 2742 ( gin 0 5 f f f t t f f t f 0 gininsert ginbeginscan gingettuple gingetbitmap ginrescan ginendscan ginmarkpos ginrestrpos ginbuild ginbulkdelete ginvacuumcleanup gincostestimate ginoptions ));
+DATA(insert OID = 2742 ( gin 0 5 f f f t t f f t f 0 gininsert ginbeginscan - gingetbitmap ginrescan ginendscan ginmarkpos ginrestrpos ginbuild ginbulkdelete ginvacuumcleanup gincostestimate ginoptions ));
DESCR("GIN index access method");
#define GIN_AM_OID 2742
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.537 2009/02/24 10:06:34 petere Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.538 2009/03/24 20:17:15 tgl Exp $
*
* NOTES
* The script catalog/genbki.sh reads this file and generates .bki
DESCR("GiST support");
/* GIN */
-DATA(insert OID = 2730 ( gingettuple PGNSP PGUID 12 1 0 0 f f f t f v 2 0 16 "2281 2281" _null_ _null_ _null_ _null_ gingettuple _null_ _null_ _null_ ));
-DESCR("gin(internal)");
DATA(insert OID = 2731 ( gingetbitmap PGNSP PGUID 12 1 0 0 f f f t f v 2 0 20 "2281 2281" _null_ _null_ _null_ _null_ gingetbitmap _null_ _null_ _null_ ));
DESCR("gin(internal)");
DATA(insert OID = 2732 ( gininsert PGNSP PGUID 12 1 0 0 f f f t f v 6 0 16 "2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ gininsert _null_ _null_ _null_ ));
*
* Copyright (c) 2003-2009, PostgreSQL Global Development Group
*
- * $PostgreSQL: pgsql/src/include/nodes/tidbitmap.h,v 1.9 2009/01/10 21:08:36 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/tidbitmap.h,v 1.10 2009/03/24 20:17:18 tgl Exp $
*
*-------------------------------------------------------------------------
*/
extern void tbm_add_tuples(TIDBitmap *tbm,
const ItemPointer tids, int ntids,
bool recheck);
+extern void tbm_add_page(TIDBitmap *tbm, BlockNumber pageno);
extern void tbm_union(TIDBitmap *a, const TIDBitmap *b);
extern void tbm_intersect(TIDBitmap *a, const TIDBitmap *b);