Adjust pg_wal_replay_wait() procedure behavior on promoted standby
authorAlexander Korotkov
Sat, 10 Aug 2024 18:43:02 +0000 (21:43 +0300)
committerAlexander Korotkov
Sat, 10 Aug 2024 18:43:02 +0000 (21:43 +0300)
pg_wal_replay_wait() is intended to be called on standby.  However, standby
can be promoted to primary at any moment, even concurrently with the
pg_wal_replay_wait() call.  If recovery is not currently in progress
that doesn't mean the wait was unsuccessful.  Thus, we always need to recheck
if the target LSN is replayed.

Reported-by: Kevin Hale Boyes
Discussion: https://postgr.es/m/CAPpHfdu5QN%2BZGACS%2B7foxmr8_nekgA2PA%2B-G3BuOUrdBLBFb6Q%40mail.gmail.com
Author: Alexander Korotkov

doc/src/sgml/func.sgml
src/backend/commands/waitlsn.c
src/test/recovery/t/043_wal_replay_wait.pl

index 0f7154b76ab7683e308c01f15a519f4757e1a013..968a998552735537cc922ba92d12e040cffad64c 100644 (file)
@@ -28969,6 +28969,15 @@ postgres=# SELECT '0/0'::pg_lsn + pd.segment_number * ps.setting::int + :offset
     connection pooler side.
    
 
+   
+    pg_wal_replay_wait should be called on standby.
+    If a user calls pg_wal_replay_wait on primary, it
+    will error out.  However, if pg_wal_replay_wait is
+    called on primary promoted from standby and target_lsn
+    was already replayed, then pg_wal_replay_wait just
+    exits immediately.
+   
+
    
     You can use pg_wal_replay_wait to wait for
     the pg_lsn value.  For example, an application could update
index 3170f0792a54c2e4c320391726181bd36e590561..deefbd458c081ca826b966101d4982a4af728182 100644 (file)
@@ -230,14 +230,27 @@ WaitForLSNReplay(XLogRecPtr targetLSN, int64 timeout)
    Assert(MyProcNumber >= 0 && MyProcNumber < MaxBackends);
 
    if (!RecoveryInProgress())
+   {
+       /*
+        * Recovery is not in progress.  Given that we detected this in the
+        * very first check, this procedure was mistakenly called on primary.
+        * However, it's possible that standby was promoted concurrently to
+        * the procedure call, while target LSN is replayed.  So, we still
+        * check the last replay LSN before reporting an error.
+        */
+       if (targetLSN <= GetXLogReplayRecPtr(NULL))
+           return;
        ereport(ERROR,
                (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
                 errmsg("recovery is not in progress"),
                 errhint("Waiting for LSN can only be executed during recovery.")));
-
-   /* If target LSN is already replayed, exit immediately */
-   if (targetLSN <= GetXLogReplayRecPtr(NULL))
-       return;
+   }
+   else
+   {
+       /* If target LSN is already replayed, exit immediately */
+       if (targetLSN <= GetXLogReplayRecPtr(NULL))
+           return;
+   }
 
    if (timeout > 0)
    {
@@ -257,19 +270,30 @@ WaitForLSNReplay(XLogRecPtr targetLSN, int64 timeout)
        int         rc;
        long        delay_ms = 0;
 
-       /* Check if the waited LSN has been replayed */
-       currentLSN = GetXLogReplayRecPtr(NULL);
-       if (targetLSN <= currentLSN)
-           break;
-
        /* Recheck that recovery is still in-progress */
        if (!RecoveryInProgress())
+       {
+           /*
+            * Recovery was ended, but recheck if target LSN was already
+            * replayed.
+            */
+           currentLSN = GetXLogReplayRecPtr(NULL);
+           if (targetLSN <= currentLSN)
+               return;
            ereport(ERROR,
                    (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
                     errmsg("recovery is not in progress"),
                     errdetail("Recovery ended before replaying target LSN %X/%X; last replay LSN %X/%X.",
                               LSN_FORMAT_ARGS(targetLSN),
                               LSN_FORMAT_ARGS(currentLSN))));
+       }
+       else
+       {
+           /* Check if the waited LSN has been replayed */
+           currentLSN = GetXLogReplayRecPtr(NULL);
+           if (targetLSN <= currentLSN)
+               break;
+       }
 
        /*
         * If the timeout value is specified, calculate the number of
index e4842730b050df25565673655c31cf4d38764876..aaa21c408674f1750892ddb1c63e70b9850f6a9b 100644 (file)
@@ -126,12 +126,18 @@ ok(1, 'multiple LSN waiters reported consistent data');
 
 # 5. Check that the standby promotion terminates the wait on LSN.  Start
 # waiting for an unreachable LSN then promote.  Check the log for the relevant
-# error message.
+# error message.  Also, check that waiting for already replayed LSN doesn't
+# cause an error even after promotion.
+my $lsn4 =
+  $node_primary->safe_psql('postgres',
+   "SELECT pg_current_wal_insert_lsn() + 10000000000");
+my $lsn5 =
+  $node_primary->safe_psql('postgres', "SELECT pg_current_wal_insert_lsn()");
 my $psql_session = $node_standby1->background_psql('postgres');
 $psql_session->query_until(
    qr/start/, qq[
    \\echo start
-   CALL pg_wal_replay_wait('${lsn3}');
+   CALL pg_wal_replay_wait('${lsn4}');
 ]);
 
 $log_offset = -s $node_standby1->logfile;
@@ -140,6 +146,11 @@ $node_standby1->wait_for_log('recovery is not in progress', $log_offset);
 
 ok(1, 'got error after standby promote');
 
+$node_standby1->safe_psql('postgres', "CALL pg_wal_replay_wait('${lsn5}');");
+
+ok(1,
+   'wait for already replayed LSN exists immediately even after promotion');
+
 $node_standby1->stop;
 $node_primary->stop;