Fix waiting in RegisterSyncRequest().
authorThomas Munro
Wed, 16 Mar 2022 02:35:42 +0000 (15:35 +1300)
committerThomas Munro
Wed, 16 Mar 2022 02:35:42 +0000 (15:35 +1300)
If we run out of space in the checkpointer sync request queue (which is
hopefully rare on real systems, but common with very small buffer pool),
we wait for it to drain.  While waiting, we should report that as a wait
event so that users know what is going on, and also handle postmaster
death, since otherwise the loop might never terminate if the
checkpointer has exited.

Back-patch to 12.  Although the problem exists in earlier releases too,
the code is structured differently before 12 so I haven't gone any
further for now, in the absence of field complaints.

Reported-by: Andres Freund
Reviewed-by: Andres Freund
Discussion: https://postgr.es/m/20220226213942.nb7uvb2pamyu26dj%40alap3.anarazel.de

doc/src/sgml/monitoring.sgml
src/backend/storage/sync/sync.c
src/backend/utils/activity/wait_event.c
src/include/utils/wait_event.h

index b8ffc210a4a4b11a786187873a54c8dad3a903e2..cfddd33da1d1419e1591b2b13ae53de4bc2b5a69 100644 (file)
@@ -2242,6 +2242,11 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   11:34   0:00 postgres: ser
       Waiting during recovery when WAL data is not available from any
        source (pg_wal, archive or stream).
      
+     
+      RegisterSyncRequest
+      Waiting while sending synchronization requests to the
+       checkpointer, because the request queue is full.
+     
      
       VacuumDelay
       Waiting in a cost-based vacuum delay point.
index a12b35727598355944e3442f0e7aa1431098d827..ea076a4106a7824ed09e630910c26753e575a62d 100644 (file)
@@ -30,6 +30,7 @@
 #include "postmaster/bgwriter.h"
 #include "storage/bufmgr.h"
 #include "storage/ipc.h"
+#include "storage/latch.h"
 #include "storage/md.h"
 #include "utils/hsearch.h"
 #include "utils/inval.h"
@@ -606,7 +607,8 @@ RegisterSyncRequest(const FileTag *ftag, SyncRequestType type,
        if (ret || (!ret && !retryOnError))
            break;
 
-       pg_usleep(10000L);
+       WaitLatch(NULL, WL_EXIT_ON_PM_DEATH | WL_TIMEOUT, 10,
+                 WAIT_EVENT_REGISTER_SYNC_REQUEST);
    }
 
    return ret;
index affbcf25db60d282c51a9b638695239e10c6a24f..1a30faf8ad45cc611f226046d470ce4867a5a039 100644 (file)
@@ -485,6 +485,9 @@ pgstat_get_wait_timeout(WaitEventTimeout w)
        case WAIT_EVENT_RECOVERY_RETRIEVE_RETRY_INTERVAL:
            event_name = "RecoveryRetrieveRetryInterval";
            break;
+       case WAIT_EVENT_REGISTER_SYNC_REQUEST:
+           event_name = "RegisterSyncRequest";
+           break;
        case WAIT_EVENT_VACUUM_DELAY:
            event_name = "VacuumDelay";
            break;
index 1fb6f640138609fad88ba8a14d5b136b346ad4c9..4b1cea659385c479817357e22934293a14ee54a9 100644 (file)
@@ -141,7 +141,8 @@ typedef enum
    WAIT_EVENT_RECOVERY_APPLY_DELAY,
    WAIT_EVENT_RECOVERY_RETRIEVE_RETRY_INTERVAL,
    WAIT_EVENT_VACUUM_DELAY,
-   WAIT_EVENT_CHECKPOINT_WRITE_DELAY
+   WAIT_EVENT_CHECKPOINT_WRITE_DELAY,
+   WAIT_EVENT_REGISTER_SYNC_REQUEST
 } WaitEventTimeout;
 
 /* ----------