Restart bgworkers immediately after a crash-and-restart cycle.
authorRobert Haas
Wed, 7 May 2014 20:04:47 +0000 (16:04 -0400)
committerRobert Haas
Wed, 7 May 2014 20:19:35 +0000 (16:19 -0400)
Just as we would start bgworkers immediately after an initial startup
of the server, we should restart them immediately when reinitializing.

Petr Jelinek and Robert Haas

src/backend/postmaster/bgworker.c
src/backend/postmaster/postmaster.c
src/include/postmaster/bgworker_internals.h

index 8078a38ed725f8d8ec12c5aa7c1d9cc37f7eee86..64c97229931912383d70b24f0d2597fa039f3166 100644 (file)
@@ -394,6 +394,27 @@ BackgroundWorkerStopNotifications(pid_t pid)
    }
 }
 
+/*
+ * Reset background worker crash state.
+ *
+ * We assume that, after a crash-and-restart cycle, background workers should
+ * be restarted immediately, instead of waiting for bgw_restart_time to
+ * elapse.
+ */
+void
+ResetBackgroundWorkerCrashTimes(void)
+{
+   slist_mutable_iter  iter;
+
+   slist_foreach_modify(iter, &BackgroundWorkerList)
+   {
+       RegisteredBgWorker *rw;
+
+       rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
+       rw->rw_crashed_at = 0;
+   }
+}
+
 #ifdef EXEC_BACKEND
 /*
  * In EXEC_BACKEND mode, workers use this to retrieve their details from
@@ -478,13 +499,14 @@ bgworker_quickdie(SIGNAL_ARGS)
    on_exit_reset();
 
    /*
-    * Note we do exit(0) here, not exit(2) like quickdie.  The reason is that
-    * we don't want to be seen this worker as independently crashed, because
-    * then postmaster would delay restarting it again afterwards.  If some
-    * idiot DBA manually sends SIGQUIT to a random bgworker, the "dead man
-    * switch" will ensure that postmaster sees this as a crash.
+    * Note we do exit(2) not exit(0).  This is to force the postmaster into a
+    * system reset cycle if some idiot DBA sends a manual SIGQUIT to a random
+    * backend.  This is necessary precisely because we don't clean up our
+    * shared memory state.  (The "dead man switch" mechanism in pmsignal.c
+    * should ensure the postmaster sees this as a crash, too, but no harm in
+    * being doubly sure.)
     */
-   exit(0);
+   exit(2);
 }
 
 /*
index 0c6a4271a603cabf56489df1ca3baf31dcdb559d..ec1a59d061a50ffb209bbd5cb8103697a7c00d88 100644 (file)
@@ -2616,7 +2616,7 @@ reaper(SIGNAL_ARGS)
            if (PgStatPID == 0)
                PgStatPID = pgstat_start();
 
-           /* some workers may be scheduled to start now */
+           /* workers may be scheduled to start now */
            maybe_start_bgworker();
 
            /* at this point we are really open for business */
@@ -2860,7 +2860,6 @@ CleanupBackgroundWorker(int pid,
        {
            if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
            {
-               rw->rw_crashed_at = GetCurrentTimestamp();
                HandleChildCrash(pid, exitstatus, namebuf);
                return true;
            }
@@ -2871,7 +2870,6 @@ CleanupBackgroundWorker(int pid,
                 * Uh-oh, the child failed to clean itself up.  Treat as a
                 * crash after all.
                 */
-               rw->rw_crashed_at = GetCurrentTimestamp();
                HandleChildCrash(pid, exitstatus, namebuf);
                return true;
            }
@@ -3546,6 +3544,9 @@ PostmasterStateMachine(void)
        ereport(LOG,
                (errmsg("all server processes terminated; reinitializing")));
 
+       /* allow background workers to immediately restart */
+       ResetBackgroundWorkerCrashTimes();
+
        shmem_exit(1);
        reset_shared(PostPortNumber);
 
index 55401860d891e19b6bb5f2298e6897227e8a1c24..c2518a6c8d7c32da8bb033e190ba522ffa13e931 100644 (file)
@@ -43,6 +43,7 @@ extern void BackgroundWorkerStateChange(void);
 extern void ForgetBackgroundWorker(slist_mutable_iter *cur);
 extern void ReportBackgroundWorkerPID(RegisteredBgWorker *);
 extern void BackgroundWorkerStopNotifications(pid_t pid);
+extern void ResetBackgroundWorkerCrashTimes(void);
 
 /* Function to start a background worker, called from postmaster.c */
 extern void StartBackgroundWorker(void);