Remove retry loop in heap_page_prune().
authorRobert Haas
Mon, 2 Oct 2023 15:40:07 +0000 (11:40 -0400)
committerRobert Haas
Mon, 2 Oct 2023 15:40:07 +0000 (11:40 -0400)
The retry loop is needed because heap_page_prune() calls
HeapTupleSatisfiesVacuum() and then lazy_scan_prune() does the same
thing again, and they might get different answers due to concurrent
clog updates.  But this patch makes heap_page_prune() return the
HeapTupleSatisfiesVacuum() results that it computed back to the
caller, which allows lazy_scan_prune() to avoid needing to recompute
those values in the first place. That's nice both because it eliminates
the need for a retry loop and also because it's cheaper.

Melanie Plageman, reviewed by David Geier, Andres Freund, and me.

Discussion: https://postgr.es/m/CAAKRu_br124qsGJieuYA0nGjywEukhK1dKBfRdby_4yY3E9SXA%40mail.gmail.com

src/backend/access/heap/pruneheap.c
src/backend/access/heap/vacuumlazy.c
src/include/access/heapam.h

index d5892a2db46b3e66d7d8d0e646e3327b9e0f885c..c5f1abd95a9a087901d49507a5dd26d294944b34 100644 (file)
@@ -53,16 +53,6 @@ typedef struct
     * 1. Otherwise every access would need to subtract 1.
     */
    bool        marked[MaxHeapTuplesPerPage + 1];
-
-   /*
-    * Tuple visibility is only computed once for each tuple, for correctness
-    * and efficiency reasons; see comment in heap_page_prune() for details.
-    * This is of type int8[], instead of HTSV_Result[], so we can use -1 to
-    * indicate no visibility has been computed, e.g. for LP_DEAD items.
-    *
-    * Same indexing as ->marked.
-    */
-   int8        htsv[MaxHeapTuplesPerPage + 1];
 } PruneState;
 
 /* Local functions */
@@ -71,6 +61,7 @@ static HTSV_Result heap_prune_satisfies_vacuum(PruneState *prstate,
                                               Buffer buffer);
 static int heap_prune_chain(Buffer buffer,
                             OffsetNumber rootoffnum,
+                            int8 *htsv,
                             PruneState *prstate);
 static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid);
 static void heap_prune_record_redirect(PruneState *prstate,
@@ -240,6 +231,10 @@ heap_page_prune(Relation relation, Buffer buffer,
    prstate.nredirected = prstate.ndead = prstate.nunused = 0;
    memset(prstate.marked, 0, sizeof(prstate.marked));
 
+   /*
+    * presult->htsv is not initialized here because all ntuple spots in the
+    * array will be set either to a valid HTSV_Result value or -1.
+    */
    presult->ndeleted = 0;
    presult->nnewlpdead = 0;
 
@@ -276,7 +271,7 @@ heap_page_prune(Relation relation, Buffer buffer,
        /* Nothing to do if slot doesn't contain a tuple */
        if (!ItemIdIsNormal(itemid))
        {
-           prstate.htsv[offnum] = -1;
+           presult->htsv[offnum] = -1;
            continue;
        }
 
@@ -292,8 +287,8 @@ heap_page_prune(Relation relation, Buffer buffer,
        if (off_loc)
            *off_loc = offnum;
 
-       prstate.htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup,
-                                                          buffer);
+       presult->htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup,
+                                                           buffer);
    }
 
    /* Scan the page */
@@ -317,7 +312,8 @@ heap_page_prune(Relation relation, Buffer buffer,
            continue;
 
        /* Process this item or chain of items */
-       presult->ndeleted += heap_prune_chain(buffer, offnum, &prstate);
+       presult->ndeleted += heap_prune_chain(buffer, offnum,
+                                             presult->htsv, &prstate);
    }
 
    /* Clear the offset information once we have processed the given page. */
@@ -446,6 +442,8 @@ heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
 /*
  * Prune specified line pointer or a HOT chain originating at line pointer.
  *
+ * Tuple visibility information is provided in htsv.
+ *
  * If the item is an index-referenced tuple (i.e. not a heap-only tuple),
  * the HOT chain is pruned by removing all DEAD tuples at the start of the HOT
  * chain.  We also prune any RECENTLY_DEAD tuples preceding a DEAD tuple.
@@ -473,7 +471,8 @@ heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
  * Returns the number of tuples (to be) deleted from the page.
  */
 static int
-heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
+heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum,
+                int8 *htsv, PruneState *prstate)
 {
    int         ndeleted = 0;
    Page        dp = (Page) BufferGetPage(buffer);
@@ -494,7 +493,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
     */
    if (ItemIdIsNormal(rootlp))
    {
-       Assert(prstate->htsv[rootoffnum] != -1);
+       Assert(htsv[rootoffnum] != -1);
        htup = (HeapTupleHeader) PageGetItem(dp, rootlp);
 
        if (HeapTupleHeaderIsHeapOnly(htup))
@@ -517,7 +516,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
             * either here or while following a chain below.  Whichever path
             * gets there first will mark the tuple unused.
             */
-           if (prstate->htsv[rootoffnum] == HEAPTUPLE_DEAD &&
+           if (htsv[rootoffnum] == HEAPTUPLE_DEAD &&
                !HeapTupleHeaderIsHotUpdated(htup))
            {
                heap_prune_record_unused(prstate, rootoffnum);
@@ -585,7 +584,6 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
            break;
 
        Assert(ItemIdIsNormal(lp));
-       Assert(prstate->htsv[offnum] != -1);
        htup = (HeapTupleHeader) PageGetItem(dp, lp);
 
        /*
@@ -605,7 +603,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
         */
        tupdead = recent_dead = false;
 
-       switch ((HTSV_Result) prstate->htsv[offnum])
+       switch (htsv_get_valid_status(htsv[offnum]))
        {
            case HEAPTUPLE_DEAD:
                tupdead = true;
index fa77ef7f4ad88059255169fc4f25e29955b89dbc..42e49bc159b680dbdb637571dbda212525b549a1 100644 (file)
@@ -1524,12 +1524,13 @@ lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno,
  * of complexity just so we could deal with tuples that were DEAD to VACUUM,
  * but nevertheless were left with storage after pruning.
  *
- * The approach we take now is to restart pruning when the race condition is
- * detected.  This allows heap_page_prune() to prune the tuples inserted by
- * the now-aborted transaction.  This is a little crude, but it guarantees
- * that any items that make it into the dead_items array are simple LP_DEAD
- * line pointers, and that every remaining item with tuple storage is
- * considered as a candidate for freezing.
+ * As of Postgres 17, we circumvent this problem altogether by reusing the
+ * result of heap_page_prune()'s visibility check. Without the second call to
+ * HeapTupleSatisfiesVacuum(), there is no new HTSV_Result and there can be no
+ * disagreement. We'll just handle such tuples as if they had become fully dead
+ * right after this operation completes instead of in the middle of it. Note that
+ * any tuple that becomes dead after the call to heap_page_prune() can't need to
+ * be frozen, because it was visible to another session when vacuum started.
  */
 static void
 lazy_scan_prune(LVRelState *vacrel,
@@ -1542,8 +1543,6 @@ lazy_scan_prune(LVRelState *vacrel,
    OffsetNumber offnum,
                maxoff;
    ItemId      itemid;
-   HeapTupleData tuple;
-   HTSV_Result res;
    PruneResult presult;
    int         tuples_frozen,
                lpdead_items,
@@ -1563,8 +1562,6 @@ lazy_scan_prune(LVRelState *vacrel,
     */
    maxoff = PageGetMaxOffsetNumber(page);
 
-retry:
-
    /* Initialize (or reset) page-level state */
    pagefrz.freeze_required = false;
    pagefrz.FreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
@@ -1600,6 +1597,7 @@ retry:
         offnum <= maxoff;
         offnum = OffsetNumberNext(offnum))
    {
+       HeapTupleHeader htup;
        bool        totally_frozen;
 
        /*
@@ -1642,22 +1640,7 @@ retry:
 
        Assert(ItemIdIsNormal(itemid));
 
-       ItemPointerSet(&(tuple.t_self), blkno, offnum);
-       tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
-       tuple.t_len = ItemIdGetLength(itemid);
-       tuple.t_tableOid = RelationGetRelid(rel);
-
-       /*
-        * DEAD tuples are almost always pruned into LP_DEAD line pointers by
-        * heap_page_prune(), but it's possible that the tuple state changed
-        * since heap_page_prune() looked.  Handle that here by restarting.
-        * (See comments at the top of function for a full explanation.)
-        */
-       res = HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
-                                      buf);
-
-       if (unlikely(res == HEAPTUPLE_DEAD))
-           goto retry;
+       htup = (HeapTupleHeader) PageGetItem(page, itemid);
 
        /*
         * The criteria for counting a tuple as live in this block need to
@@ -1678,7 +1661,7 @@ retry:
         * (Cases where we bypass index vacuuming will violate this optimistic
         * assumption, but the overall impact of that should be negligible.)
         */
-       switch (res)
+       switch (htsv_get_valid_status(presult.htsv[offnum]))
        {
            case HEAPTUPLE_LIVE:
 
@@ -1700,7 +1683,7 @@ retry:
                {
                    TransactionId xmin;
 
-                   if (!HeapTupleHeaderXminCommitted(tuple.t_data))
+                   if (!HeapTupleHeaderXminCommitted(htup))
                    {
                        prunestate->all_visible = false;
                        break;
@@ -1710,7 +1693,7 @@ retry:
                     * The inserter definitely committed. But is it old enough
                     * that everyone sees it as committed?
                     */
-                   xmin = HeapTupleHeaderGetXmin(tuple.t_data);
+                   xmin = HeapTupleHeaderGetXmin(htup);
                    if (!TransactionIdPrecedes(xmin,
                                               vacrel->cutoffs.OldestXmin))
                    {
@@ -1764,7 +1747,7 @@ retry:
        prunestate->hastup = true;  /* page makes rel truncation unsafe */
 
        /* Tuple with storage -- consider need to freeze */
-       if (heap_prepare_freeze_tuple(tuple.t_data, &vacrel->cutoffs, &pagefrz,
+       if (heap_prepare_freeze_tuple(htup, &vacrel->cutoffs, &pagefrz,
                                      &frozen[tuples_frozen], &totally_frozen))
        {
            /* Save prepared freeze plan for later */
index 2d3f149e4f0babf3301219cabbcda73f6f6005f3..62fac1d5d29b0d626d454ae5f3e218952713aab7 100644 (file)
@@ -198,8 +198,33 @@ typedef struct PruneResult
 {
    int         ndeleted;       /* Number of tuples deleted from the page */
    int         nnewlpdead;     /* Number of newly LP_DEAD items */
+
+   /*
+    * Tuple visibility is only computed once for each tuple, for correctness
+    * and efficiency reasons; see comment in heap_page_prune() for details.
+    * This is of type int8[], instead of HTSV_Result[], so we can use -1 to
+    * indicate no visibility has been computed, e.g. for LP_DEAD items.
+    *
+    * This needs to be MaxHeapTuplesPerPage + 1 long as FirstOffsetNumber is
+    * 1. Otherwise every access would need to subtract 1.
+    */
+   int8        htsv[MaxHeapTuplesPerPage + 1];
 } PruneResult;
 
+/*
+ * Pruning calculates tuple visibility once and saves the results in an array
+ * of int8. See PruneResult.htsv for details. This helper function is meant to
+ * guard against examining visibility status array members which have not yet
+ * been computed.
+ */
+static inline HTSV_Result
+htsv_get_valid_status(int status)
+{
+   Assert(status >= HEAPTUPLE_DEAD &&
+          status <= HEAPTUPLE_DELETE_IN_PROGRESS);
+   return (HTSV_Result) status;
+}
+
 /* ----------------
  *     function prototypes for heap access method
  *