Before removing backup_label and irrevocably changing pg_control file, check
authorHeikki Linnakangas
Tue, 26 Oct 2010 18:15:42 +0000 (21:15 +0300)
committerHeikki Linnakangas
Tue, 26 Oct 2010 18:19:30 +0000 (21:19 +0300)
that WAL file containing the checkpoint redo-location can be found. This
avoids making the cluster irrecoverable if the redo location is in an earlie
WAL file than the checkpoint record.

Report, analysis and patch by Jeff Davis, with small changes by me.

src/backend/access/transam/xlog.c

index b70687bd37ccf5450b0a58e15f0e2755dcd2318f..e3196e96d33bb2a0279ed95006362695337cf70c 100644 (file)
@@ -5823,14 +5823,29 @@ StartupXLOG(void)
        record = ReadCheckpointRecord(checkPointLoc, 0);
        if (record != NULL)
        {
+           memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
            ereport(DEBUG1,
                    (errmsg("checkpoint record is at %X/%X",
                            checkPointLoc.xlogid, checkPointLoc.xrecoff)));
            InRecovery = true;  /* force recovery even if SHUTDOWNED */
+
+           /*
+            * Make sure that REDO location exists. This may not be
+            * the case if there was a crash during an online backup,
+            * which left a backup_label around that references a WAL
+            * segment that's already been archived.
+            */
+           if (XLByteLT(checkPoint.redo, checkPointLoc))
+           {
+               if (!ReadRecord(&(checkPoint.redo), LOG, false))
+                   ereport(FATAL,
+                           (errmsg("could not find redo location referenced by checkpoint record"),
+                            errhint("If you are not restoring from a backup, try removing the file \"%s/backup_label\".", DataDir)));
+           }
        }
        else
        {
-           ereport(PANIC,
+           ereport(FATAL,
                    (errmsg("could not locate required checkpoint record"),
                     errhint("If you are not restoring from a backup, try removing the file \"%s/backup_label\".", DataDir)));
        }
@@ -5876,10 +5891,10 @@ StartupXLOG(void)
                ereport(PANIC,
                     (errmsg("could not locate a valid checkpoint record")));
        }
+       memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
    }
 
    LastRec = RecPtr = checkPointLoc;
-   memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
    wasShutdown = (record->xl_info == XLOG_CHECKPOINT_SHUTDOWN);
 
    ereport(DEBUG1,