Change the signaling of end-of-recovery. Startup process now indicates end
authorHeikki Linnakangas
Mon, 23 Feb 2009 09:28:50 +0000 (09:28 +0000)
committerHeikki Linnakangas
Mon, 23 Feb 2009 09:28:50 +0000 (09:28 +0000)
of recovery by exiting with exit code 0, like in previous releases. Per
Tom's suggestion.

src/backend/access/transam/xlog.c
src/backend/postmaster/postmaster.c
src/include/storage/pmsignal.h
src/include/storage/proc.h

index 4bc7e776b0be0bed4f46f3c7fe3c57134fc83e28..de5c352e16535b921bff9e5dcfa391cfda01c4bf 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.331 2009/02/18 15:58:40 heikki Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.332 2009/02/23 09:28:49 heikki Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -432,7 +432,7 @@ static bool InRedo = false;
 static volatile sig_atomic_t shutdown_requested = false;
 /*
  * Flag set when executing a restore command, to tell SIGTERM signal handler
- * that it's safe to just proc_exit(0).
+ * that it's safe to just proc_exit.
  */
 static volatile sig_atomic_t in_restore_command = false;
 
@@ -2752,7 +2752,7 @@ RestoreArchivedFile(char *path, const char *xlogfname,
     */
    in_restore_command = true;
    if (shutdown_requested)
-       proc_exit(0);
+       proc_exit(1);
 
    /*
     * Copy xlog from archival storage to XLOGDIR
@@ -2818,7 +2818,7 @@ RestoreArchivedFile(char *path, const char *xlogfname,
     * On SIGTERM, assume we have received a fast shutdown request, and exit
     * cleanly. It's pure chance whether we receive the SIGTERM first, or the
     * child process. If we receive it first, the signal handler will call
-    * proc_exit(0), otherwise we do it here. If we or the child process
+    * proc_exit, otherwise we do it here. If we or the child process
     * received SIGTERM for any other reason than a fast shutdown request,
     * postmaster will perform an immediate shutdown when it sees us exiting
     * unexpectedly.
@@ -2829,7 +2829,7 @@ RestoreArchivedFile(char *path, const char *xlogfname,
     * too.
     */
    if (WTERMSIG(rc) == SIGTERM)
-       proc_exit(0);
+       proc_exit(1);
 
    signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125;
 
@@ -5367,7 +5367,7 @@ StartupXLOG(void)
                 * recovery.
                 */
                if (shutdown_requested)
-                   proc_exit(0);
+                   proc_exit(1);
 
                /*
                 * Have we reached our safe starting point? If so, we can
@@ -7646,7 +7646,7 @@ static void
 StartupProcShutdownHandler(SIGNAL_ARGS)
 {
    if (in_restore_command)
-       proc_exit(0);
+       proc_exit(1);
    else
        shutdown_requested = true;
 }
@@ -7694,9 +7694,9 @@ StartupProcessMain(void)
 
    BuildFlatFiles(false);
 
-   /* Let postmaster know that startup is finished */
-   SendPostmasterSignal(PMSIGNAL_RECOVERY_COMPLETED);
-
-   /* exit normally */
+   /*
+    * Exit normally. Exit code 0 tells postmaster that we completed
+    * recovery successfully.
+    */
    proc_exit(0);
 }
index 97eedac506a0c9ff1ff039f49385021d060eeb68..68505edc5688e50e2691f08137ce8a2d7e2b06ab 100644 (file)
@@ -37,7 +37,7 @@
  *
  *
  * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.572 2009/02/19 16:43:13 heikki Exp $
+ *   $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.573 2009/02/23 09:28:50 heikki Exp $
  *
  * NOTES
  *
@@ -225,15 +225,7 @@ static pid_t StartupPID = 0,
 static int Shutdown = NoShutdown;
 
 static bool FatalError = false; /* T if recovering from backend crash */
-static bool RecoveryError = false; /* T if recovery failed */
-
-/* State of WAL redo */
-#define            NoRecovery          0
-#define            RecoveryStarted     1
-#define            RecoveryConsistent  2
-#define            RecoveryCompleted   3
-
-static int RecoveryStatus = NoRecovery;
+static bool RecoveryError = false; /* T if WAL recovery failed */
 
 /*
  * We use a simple state machine to control startup, shutdown, and
@@ -252,8 +244,8 @@ static int  RecoveryStatus = NoRecovery;
  * could start accepting connections to perform read-only queries at this
  * point, if we had the infrastructure to do that.
  *
- * When the WAL redo is finished, the startup process signals us the third
- * time, and we switch to PM_RUN state. The startup process can also skip the
+ * When WAL redo is finished, the startup process exits with exit code 0
+ * and we switch to PM_RUN state. Startup process can also skip the
  * recovery and consistent recovery phases altogether, as it will during
  * normal startup when there's no recovery to be done, for example.
  *
@@ -338,7 +330,6 @@ static void pmdie(SIGNAL_ARGS);
 static void reaper(SIGNAL_ARGS);
 static void sigusr1_handler(SIGNAL_ARGS);
 static void dummy_handler(SIGNAL_ARGS);
-static void CheckRecoverySignals(void);
 static void CleanupBackend(int pid, int exitstatus);
 static void HandleChildCrash(int pid, int exitstatus, const char *procname);
 static void LogChildExit(int lev, const char *procname,
@@ -2019,7 +2010,8 @@ pmdie(SIGNAL_ARGS)
            ereport(LOG,
                    (errmsg("received smart shutdown request")));
 
-           if (pmState == PM_RUN || pmState == PM_RECOVERY || pmState == PM_RECOVERY_CONSISTENT)
+           if (pmState == PM_RUN || pmState == PM_RECOVERY ||
+               pmState == PM_RECOVERY_CONSISTENT)
            {
                /* autovacuum workers are told to shut down immediately */
                SignalAutovacWorkers(SIGTERM);
@@ -2161,21 +2153,12 @@ reaper(SIGNAL_ARGS)
        {
            StartupPID = 0;
 
-           /*
-            * Check if we've received a signal from the startup process
-            * first. This can change pmState. If the startup process sends
-            * a signal and exits immediately after that, we might not have
-            * processed the signal yet. We need to know if it completed
-            * recovery before it exited.
-            */
-           CheckRecoverySignals();
-
            /*
             * Unexpected exit of startup process (including FATAL exit)
             * during PM_STARTUP is treated as catastrophic. There is no
-            * other processes running yet.
+            * other processes running yet, so we can just exit.
             */
-           if (pmState == PM_STARTUP)
+           if (pmState == PM_STARTUP && !EXIT_STATUS_0(exitstatus))
            {
                LogChildExit(LOG, _("startup process"),
                             pid, exitstatus);
@@ -2183,6 +2166,17 @@ reaper(SIGNAL_ARGS)
                (errmsg("aborting startup due to startup process failure")));
                ExitPostmaster(1);
            }
+           /*
+            * Startup process exited in response to a shutdown request (or
+            * it completed normally regardless of the shutdown request).
+            */
+           if (Shutdown > NoShutdown &&
+               (EXIT_STATUS_0(exitstatus) || EXIT_STATUS_1(exitstatus)))
+           {
+               pmState = PM_WAIT_BACKENDS;
+               /* PostmasterStateMachine logic does the rest */
+               continue;
+           }
            /*
             * Any unexpected exit (including FATAL exit) of the startup
             * process is treated as a crash, except that we don't want
@@ -2195,18 +2189,44 @@ reaper(SIGNAL_ARGS)
                                 _("startup process"));
                continue;
            }
+
            /*
-            * Startup process exited normally, but didn't finish recovery.
-            * This can happen if someone else than postmaster kills the
-            * startup process with SIGTERM. Treat it like a crash.
+            * Startup succeeded, commence normal operations
             */
-           if (pmState == PM_RECOVERY || pmState == PM_RECOVERY_CONSISTENT)
-           {
-               RecoveryError = true;
-               HandleChildCrash(pid, exitstatus,
-                                _("startup process"));
-               continue;
-           }
+           FatalError = false;
+           pmState = PM_RUN;
+
+           /*
+            * Load the flat authorization file into postmaster's cache. The
+            * startup process has recomputed this from the database contents,
+            * so we wait till it finishes before loading it.
+            */
+           load_role();
+
+           /*
+            * Crank up the background writer, if we didn't do that already
+            * when we entered consistent recovery phase.  It doesn't matter
+            * if this fails, we'll just try again later.
+            */
+           if (BgWriterPID == 0)
+               BgWriterPID = StartBackgroundWriter();
+
+           /*
+            * Likewise, start other special children as needed.  In a restart
+            * situation, some of them may be alive already.
+            */
+           if (WalWriterPID == 0)
+               WalWriterPID = StartWalWriter();
+           if (AutoVacuumingActive() && AutoVacPID == 0)
+               AutoVacPID = StartAutoVacLauncher();
+           if (XLogArchivingActive() && PgArchPID == 0)
+               PgArchPID = pgarch_start();
+           if (PgStatPID == 0)
+               PgStatPID = pgstat_start();
+
+           /* at this point we are really open for business */
+           ereport(LOG,
+               (errmsg("database system is ready to accept connections")));
        }
 
        /*
@@ -2622,124 +2642,6 @@ LogChildExit(int lev, const char *procname, int pid, int exitstatus)
 static void
 PostmasterStateMachine(void)
 {
-   /* Startup states */
-
-   if (pmState == PM_STARTUP && RecoveryStatus > NoRecovery)
-   {
-       /* WAL redo has started. We're out of reinitialization. */
-       FatalError = false;
-
-       /*
-        * Go to shutdown mode if a shutdown request was pending.
-        */
-       if (Shutdown > NoShutdown)
-       {
-           pmState = PM_WAIT_BACKENDS;
-           /* PostmasterStateMachine logic does the rest */
-       }
-       else
-       {
-           /*
-            * Crank up the background writer.  It doesn't matter if this
-            * fails, we'll just try again later.
-            */
-           Assert(BgWriterPID == 0);
-           BgWriterPID = StartBackgroundWriter();
-
-           pmState = PM_RECOVERY;
-       }
-   }
-   if (pmState == PM_RECOVERY && RecoveryStatus >= RecoveryConsistent)
-   {
-       /*
-        * Recovery has reached a consistent recovery point. Go to shutdown
-        * mode if a shutdown request was pending.
-        */
-       if (Shutdown > NoShutdown)
-       {
-           pmState = PM_WAIT_BACKENDS;
-           /* PostmasterStateMachine logic does the rest */
-       }
-       else
-       {
-           pmState = PM_RECOVERY_CONSISTENT;
-
-           /*
-            * Load the flat authorization file into postmaster's cache. The
-            * startup process won't have recomputed this from the database yet,
-            * so we it may change following recovery. 
-            */
-           load_role();
-
-           /*
-            * Likewise, start other special children as needed.
-            */
-           Assert(PgStatPID == 0);
-           PgStatPID = pgstat_start();
-
-           /* XXX at this point we could accept read-only connections */
-           ereport(DEBUG1,
-                (errmsg("database system is in consistent recovery mode")));
-       }
-   }
-   if ((pmState == PM_RECOVERY || 
-        pmState == PM_RECOVERY_CONSISTENT ||
-        pmState == PM_STARTUP) &&
-       RecoveryStatus == RecoveryCompleted)
-   {
-       /*
-        * Startup succeeded.
-        *
-        * Go to shutdown mode if a shutdown request was pending.
-        */
-       if (Shutdown > NoShutdown)
-       {
-           pmState = PM_WAIT_BACKENDS;
-           /* PostmasterStateMachine logic does the rest */
-       }
-       else
-       {
-           /*
-            * Otherwise, commence normal operations.
-            */
-           pmState = PM_RUN;
-
-           /*
-            * Load the flat authorization file into postmaster's cache. The
-            * startup process has recomputed this from the database contents,
-            * so we wait till it finishes before loading it.
-            */
-           load_role();
-
-           /*
-            * Crank up the background writer, if we didn't do that already
-            * when we entered consistent recovery phase.  It doesn't matter
-            * if this fails, we'll just try again later.
-            */
-           if (BgWriterPID == 0)
-               BgWriterPID = StartBackgroundWriter();
-
-           /*
-            * Likewise, start other special children as needed.  In a restart
-            * situation, some of them may be alive already.
-            */
-           if (WalWriterPID == 0)
-               WalWriterPID = StartWalWriter();
-           if (AutoVacuumingActive() && AutoVacPID == 0)
-               AutoVacPID = StartAutoVacLauncher();
-           if (XLogArchivingActive() && PgArchPID == 0)
-               PgArchPID = pgarch_start();
-           if (PgStatPID == 0)
-               PgStatPID = pgstat_start();
-
-           /* at this point we are really open for business */
-           ereport(LOG,
-               (errmsg("database system is ready to accept connections")));
-       }
-   }
-
-   /* Shutdown states */
-
    if (pmState == PM_WAIT_BACKUP)
    {
        /*
@@ -2901,8 +2803,6 @@ PostmasterStateMachine(void)
        shmem_exit(1);
        reset_shared(PostPortNumber);
 
-       RecoveryStatus = NoRecovery;
-
        StartupPID = StartupDataBase();
        Assert(StartupPID != 0);
        pmState = PM_STARTUP;
@@ -4007,47 +3907,58 @@ ExitPostmaster(int status)
 }
 
 /*
- * common code used in sigusr1_handler() and reaper() to handle
- * recovery-related signals from startup process
+ * sigusr1_handler - handle signal conditions from child processes
  */
 static void
-CheckRecoverySignals(void)
+sigusr1_handler(SIGNAL_ARGS)
 {
-   bool changed = false;
+   int         save_errno = errno;
 
-   if (CheckPostmasterSignal(PMSIGNAL_RECOVERY_STARTED))
-   {
-       Assert(pmState == PM_STARTUP);
+   PG_SETMASK(&BlockSig);
 
-       RecoveryStatus = RecoveryStarted;
-       changed = true;
-   }
-   if (CheckPostmasterSignal(PMSIGNAL_RECOVERY_CONSISTENT))
+   /*
+    * RECOVERY_STARTED and RECOVERY_CONSISTENT signals are ignored in
+    * unexpected states. If the startup process quickly starts up, completes
+    * recovery, exits, we might process the death of the startup process
+    * first. We don't want to go back to recovery in that case.
+    */
+   if (CheckPostmasterSignal(PMSIGNAL_RECOVERY_STARTED) &&
+       pmState == PM_STARTUP)
    {
-       RecoveryStatus = RecoveryConsistent;
-       changed = true;
+       /* WAL redo has started. We're out of reinitialization. */
+       FatalError = false;
+
+       /*
+        * Crank up the background writer.  It doesn't matter if this
+        * fails, we'll just try again later.
+        */
+       Assert(BgWriterPID == 0);
+       BgWriterPID = StartBackgroundWriter();
+
+       pmState = PM_RECOVERY;
    }
-   if (CheckPostmasterSignal(PMSIGNAL_RECOVERY_COMPLETED))
+   if (CheckPostmasterSignal(PMSIGNAL_RECOVERY_CONSISTENT) &&
+       pmState == PM_RECOVERY)
    {
-       RecoveryStatus = RecoveryCompleted;
-       changed = true;
-   }
-
-   if (changed)
-       PostmasterStateMachine();
-}
+       /*
+        * Load the flat authorization file into postmaster's cache. The
+        * startup process won't have recomputed this from the database yet,
+        * so we it may change following recovery. 
+        */
+       load_role();
 
-/*
- * sigusr1_handler - handle signal conditions from child processes
- */
-static void
-sigusr1_handler(SIGNAL_ARGS)
-{
-   int         save_errno = errno;
+       /*
+        * Likewise, start other special children as needed.
+        */
+       Assert(PgStatPID == 0);
+       PgStatPID = pgstat_start();
 
-   PG_SETMASK(&BlockSig);
+       /* XXX at this point we could accept read-only connections */
+       ereport(DEBUG1,
+               (errmsg("database system is in consistent recovery mode")));
 
-   CheckRecoverySignals();
+       pmState = PM_RECOVERY_CONSISTENT;
+   }
 
    if (CheckPostmasterSignal(PMSIGNAL_PASSWORD_CHANGE))
    {
index 6d2b827c011f0a7848aa2dace25d170c0763bc49..bd6cff154ef80002ad135325dab60105d157d258 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/pmsignal.h,v 1.22 2009/02/18 15:58:41 heikki Exp $
+ * $PostgreSQL: pgsql/src/include/storage/pmsignal.h,v 1.23 2009/02/23 09:28:50 heikki Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -24,7 +24,6 @@ typedef enum
 {
    PMSIGNAL_RECOVERY_STARTED,  /* recovery has started */
    PMSIGNAL_RECOVERY_CONSISTENT, /* recovery has reached consistent state */
-   PMSIGNAL_RECOVERY_COMPLETED, /* recovery has completed */
    PMSIGNAL_PASSWORD_CHANGE,   /* pg_auth file has changed */
    PMSIGNAL_WAKEN_ARCHIVER,    /* send a NOTIFY signal to xlog archiver */
    PMSIGNAL_ROTATE_LOGFILE,    /* send SIGUSR1 to syslogger to rotate logfile */
index dccea9fa7ad060d662c0f21b4046c3b64c27eebc..b250d3f0f2d150f2e30542c8891c3d156914eab4 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.111 2009/02/19 08:02:32 heikki Exp $
+ * $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.112 2009/02/23 09:28:50 heikki Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -138,12 +138,13 @@ typedef struct PROC_HDR
 /*
  * We set aside some extra PGPROC structures for auxiliary processes,
  * ie things that aren't full-fledged backends but need shmem access.
- * 
+ *
  * Background writer, WAL writer, and autovacuum launcher run during
- * normal operation. When recovery has just finished, the startup
- * process can co-exist with them for a brief period before it exits.
+ * normal operation. Startup process also consumes one slot, but WAL
+ * writer and autovacuum launcher are launched only after it has
+ * exited.
  */
-#define NUM_AUXILIARY_PROCS        4
+#define NUM_AUXILIARY_PROCS        3
 
 
 /* configurable options */