Ensure vacuum removes all visibly dead tuples older than OldestXmin
authorMelanie Plageman
Fri, 19 Jul 2024 15:06:03 +0000 (11:06 -0400)
committerMelanie Plageman
Fri, 19 Jul 2024 16:05:51 +0000 (12:05 -0400)
If vacuum fails to remove a tuple with xmax older than
VacuumCutoffs->OldestXmin and younger than GlobalVisState->maybe_needed,
it will loop infinitely in lazy_scan_prune(), which compares tuples'
visibility information to OldestXmin.

Starting in version 14, which uses GlobalVisState for visibility testing
during pruning, it is possible for GlobalVisState->maybe_needed to
precede OldestXmin if maybe_needed is forced to go backward while vacuum
is running. This can happen if a disconnected standby with a running
transaction older than VacuumCutoffs->OldestXmin reconnects to the
primary after vacuum initially calculates GlobalVisState and OldestXmin.

Fix this by having vacuum always remove tuples older than OldestXmin
during pruning. This is okay because the standby won't replay the tuple
removal until the tuple is removable. Thus, the worst that can happen is
a recovery conflict.

Fixes BUG# 17257

Back-patched in versions 14-17

Author: Melanie Plageman
Reviewed-by: Noah Misch, Peter Geoghegan, Robert Haas, Andres Freund, and Heikki Linnakangas
Discussion: https://postgr.es/m/CAAKRu_Y_NJzF4-8gzTTeaOuUL3CcGoXPjXcAHbTTygT8AyVqag%40mail.gmail.com

src/backend/access/heap/pruneheap.c
src/backend/access/heap/vacuumlazy.c
src/include/access/heapam.h

index 9f43bbe25f5bd3c894c71e810a3477e8be5b9600..72ce130346cb075d27c0eeba48b3ac47a4786183 100644 (file)
@@ -33,7 +33,8 @@ typedef struct
 {
    Relation    rel;
 
-   /* tuple visibility test, initialized for the relation */
+   /* State used to test tuple visibility; Initialized for the relation */
+   TransactionId oldest_xmin;
    GlobalVisState *vistest;
 
    /*
@@ -206,7 +207,8 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
            int         ndeleted,
                        nnewlpdead;
 
-           ndeleted = heap_page_prune(relation, buffer, vistest, limited_xmin,
+           ndeleted = heap_page_prune(relation, buffer, InvalidTransactionId,
+                                      vistest, limited_xmin,
                                       limited_ts, &nnewlpdead, NULL);
 
            /*
@@ -248,11 +250,14 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
  * also need to account for a reduction in the length of the line pointer
  * array following array truncation by us.
  *
- * vistest is used to distinguish whether tuples are DEAD or RECENTLY_DEAD
- * (see heap_prune_satisfies_vacuum and
- * HeapTupleSatisfiesVacuum). old_snap_xmin / old_snap_ts need to
- * either have been set by TransactionIdLimitedForOldSnapshots, or
- * InvalidTransactionId/0 respectively.
+ * vistest and oldest_xmin are used to distinguish whether tuples are DEAD or
+ * RECENTLY_DEAD (see heap_prune_satisfies_vacuum and
+ * HeapTupleSatisfiesVacuum). If oldest_xmin is provided by the caller, it is
+ * used before consulting GlobalVisState.
+ *
+ * old_snap_xmin / old_snap_ts need to either have been set by
+ * TransactionIdLimitedForOldSnapshots, or InvalidTransactionId/0
+ * respectively.
  *
  * Sets *nnewlpdead for caller, indicating the number of items that were
  * newly set LP_DEAD during prune operation.
@@ -264,6 +269,7 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
  */
 int
 heap_page_prune(Relation relation, Buffer buffer,
+               TransactionId oldest_xmin,
                GlobalVisState *vistest,
                TransactionId old_snap_xmin,
                TimestampTz old_snap_ts,
@@ -291,6 +297,7 @@ heap_page_prune(Relation relation, Buffer buffer,
     */
    prstate.new_prune_xid = InvalidTransactionId;
    prstate.rel = relation;
+   prstate.oldest_xmin = oldest_xmin;
    prstate.vistest = vistest;
    prstate.old_snap_xmin = old_snap_xmin;
    prstate.old_snap_ts = old_snap_ts;
@@ -520,13 +527,31 @@ heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
    }
 
    /*
-    * First check if GlobalVisTestIsRemovableXid() is sufficient to find the
-    * row dead. If not, and old_snapshot_threshold is enabled, try to use the
-    * lowered horizon.
+    * For VACUUM, we must be sure to prune tuples with xmax older than
+    * oldest_xmin -- a visibility cutoff determined at the beginning of
+    * vacuuming the relation. oldest_xmin is used for freezing determination
+    * and we cannot freeze dead tuples' xmaxes.
+    */
+   if (TransactionIdIsValid(prstate->oldest_xmin) &&
+       NormalTransactionIdPrecedes(dead_after, prstate->oldest_xmin))
+       return HEAPTUPLE_DEAD;
+
+   /*
+    * Determine whether or not the tuple is considered dead when compared
+    * with the provided GlobalVisState. On-access pruning does not provide
+    * oldest_xmin. And for vacuum, even if the tuple's xmax is not older than
+    * oldest_xmin, GlobalVisTestIsRemovableXid() could find the row dead if
+    * the GlobalVisState has been updated since the beginning of vacuuming
+    * the relation.
     */
    if (GlobalVisTestIsRemovableXid(prstate->vistest, dead_after))
-       res = HEAPTUPLE_DEAD;
-   else if (OldSnapshotThresholdActive())
+       return HEAPTUPLE_DEAD;
+
+   /*
+    * If GlobalVisTestIsRemovableXid() is not sufficient to find the row dead
+    * and old_snapshot_threshold is enabled, try to use the lowered horizon.
+    */
+   if (OldSnapshotThresholdActive())
    {
        /* haven't determined limited horizon yet, requests */
        if (!TransactionIdIsValid(prstate->old_snap_xmin))
index 2e61e2f20b427a90d84a1282c0c9ccff99f1c3e5..7537be5c1f65059350160749a3b44ce5d4d79f26 100644 (file)
@@ -1585,7 +1585,8 @@ retry:
     * lpdead_items's final value can be thought of as the number of tuples
     * that were deleted from indexes.
     */
-   tuples_deleted = heap_page_prune(rel, buf, vacrel->vistest,
+   tuples_deleted = heap_page_prune(rel, buf, vacrel->OldestXmin,
+                                    vacrel->vistest,
                                     InvalidTransactionId, 0, &nnewlpdead,
                                     &vacrel->offnum);
 
index abf62d9df79140b1e557f9c59e0b1fa8f73ca9d6..50b9e729e634e4b2ffd35cb18860213b183ad35b 100644 (file)
@@ -185,6 +185,7 @@ extern TransactionId heap_index_delete_tuples(Relation rel,
 struct GlobalVisState;
 extern void heap_page_prune_opt(Relation relation, Buffer buffer);
 extern int heap_page_prune(Relation relation, Buffer buffer,
+                           TransactionId oldest_xmin,
                            struct GlobalVisState *vistest,
                            TransactionId old_snap_xmin,
                            TimestampTz old_snap_ts_ts,