Report progress of streaming base backup.
authorFujii Masao
Tue, 3 Mar 2020 03:03:43 +0000 (12:03 +0900)
committerFujii Masao
Tue, 3 Mar 2020 03:03:43 +0000 (12:03 +0900)
This commit adds pg_stat_progress_basebackup view that reports
the progress while an application like pg_basebackup is taking
a base backup. This uses the progress reporting infrastructure
added by c16dc1aca5e0, adding support for streaming base backup.

Bump catversion.

Author: Fujii Masao
Reviewed-by: Kyotaro Horiguchi, Amit Langote, Sergei Kornilov
Discussion: https://postgr.es/m/9ed8b801-8215-1f3d-62d7-65bff53f6e94@oss.nttdata.com

doc/src/sgml/monitoring.sgml
doc/src/sgml/protocol.sgml
doc/src/sgml/ref/pg_basebackup.sgml
src/backend/access/transam/xlog.c
src/backend/catalog/system_views.sql
src/backend/replication/basebackup.c
src/backend/utils/adt/pgstatfuncs.c
src/include/catalog/catversion.h
src/include/commands/progress.h
src/include/pgstat.h
src/test/regress/expected/rules.out

index 87586a7b069a1d1c11a521f93bbae9161fc06146..dd4a668eea6fb16faed18b685f3b0246cb874711 100644 (file)
@@ -376,6 +376,14 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   11:34   0:00 postgres: ser
       
      
 
+     
+      pg_stat_progress_basebackuppg_stat_progress_basebackup
+      One row for each WAL sender process streaming a base backup,
+       showing current progress.
+       See .
+      
+     
+
     
    
   
@@ -3535,7 +3543,10 @@ SELECT pg_stat_get_backend_pid(s.backendid) AS pid,
    certain commands during command execution.  Currently, the only commands
    which support progress reporting are ANALYZE,
    CLUSTER,
-   CREATE INDEX, and VACUUM.
+   CREATE INDEXVACUUM,
+   and  (i.e., replication
+   command that  issues to take
+   a base backup).
    This may be expanded in the future.
   
 
@@ -4336,6 +4347,156 @@ SELECT pg_stat_get_backend_pid(s.backendid) AS pid,
    
    
   
+
+  Base Backup Progress Reporting
+
+  
+   Whenever an application like pg_basebackup
+   is taking a base backup, the
+   pg_stat_progress_basebackup
+   view will contain a row for each WAL sender process that is currently
+   running BASE_BACKUP replication command
+   and streaming the backup. The tables below describe the information
+   that will be reported and provide information about how to interpret it.
+  
+
+  
+   <structname>pg_stat_progress_basebackup</structname> View
+   
+    
+    
+      Column
+      Type
+      Description
+     
+    
+
+   
+    
+     pid
+     integer
+     Process ID of a WAL sender process.
+    
+    
+     phase
+     text
+     Current processing phase. See .
+    
+    
+     backup_total
+     bigint
+     
+      Total amount of data that will be streamed. If progress reporting
+      is not enabled in pg_basebackup
+      (i.e., --progress option is not specified),
+      this is 0. Otherwise, this is estimated and
+      reported as of the beginning of
+      streaming database files phase. Note that
+      this is only an approximation since the database
+      may change during streaming database files phase
+      and WAL log may be included in the backup later. This is always
+      the same value as backup_streamed
+      once the amount of data streamed exceeds the estimated
+      total size.
+     
+    
+    
+     backup_streamed
+     bigint
+     
+      Amount of data streamed. This counter only advances
+      when the phase is streaming database files or
+      transfering wal files.
+     
+    
+    
+     tablespaces_total
+     bigint
+     
+      Total number of tablespaces that will be streamed.
+     
+    
+    
+     tablespaces_streamed
+     bigint
+     
+      Number of tablespaces streamed. This counter only
+      advances when the phase is streaming database files.
+     
+    
+   
+   
+  
+
+  
+   Base backup phases
+   
+    
+     
+      Phase
+      Description
+     
+    
+    
+     
+      initializing
+      
+       The WAL sender process is preparing to begin the backup.
+       This phase is expected to be very brief.
+      
+     
+     
+      waiting for checkpoint to finish
+      
+       The WAL sender process is currently performing
+       pg_start_backup to set up for
+       taking a base backup, and waiting for backup start
+       checkpoint to finish.
+      
+     
+     
+      estimating backup size
+      
+       The WAL sender process is currently estimating the total amount
+       of database files that will be streamed as a base backup.
+      
+     
+     
+      streaming database files
+      
+       The WAL sender process is currently streaming database files
+       as a base backup.
+      
+     
+     
+      waiting for wal archiving to finish
+      
+       The WAL sender process is currently performing
+       pg_stop_backup to finish the backup,
+       and waiting for all the WAL files required for the base backup
+       to be successfully archived.
+       If either --wal-method=none or
+       --wal-method=stream is specified in
+       pg_basebackup, the backup will end
+       when this phase is completed.
+      
+     
+     
+      transferring wal files
+      
+       The WAL sender process is currently transferring all WAL logs
+       generated during the backup. This phase occurs after
+       waiting for wal archiving to finish phase if
+       --wal-method=fetch is specified in
+       pg_basebackup. The backup will end
+       when this phase is completed.
+      
+     
+    
+   
+  
 
  
  
index 80275215e047b9d696576bb7477d416d36fccfa6..f139ba023128452b7fda08bce9d24f176eae1b4c 100644 (file)
@@ -2465,7 +2465,7 @@ The commands accepted in replication mode are:
     
   
 
-  
+   id="protocol-replication-base-backup" xreflabel="BASE_BACKUP">
     BASE_BACKUP [ LABEL 'label' ] [ PROGRESS ] [ FAST ] [ WAL ] [ NOWAIT ] [ MAX_RATE rate ] [ TABLESPACE_MAP ] [ NOVERIFY_CHECKSUMS ]
      BASE_BACKUP
     
index fc9e222f8d0b8dec3cdf9c747d1b6c0ec097415e..fc9ba2e8b00eb4b26a7e4963c681762e2f467e85 100644 (file)
@@ -104,6 +104,13 @@ PostgreSQL documentation
     
    
   
+
+  
+   Whenever pg_basebackup is taking a base
+   backup, the pg_stat_progress_basebackup
+   view will report the progress of the backup.
+   See  for details.
+  
  
 
  
@@ -459,6 +466,15 @@ PostgreSQL documentation
         This may make the backup take slightly longer, and in particular it
         will take longer before the first data is sent.
        
+       
+        Whether this is enabled or not, the
+        pg_stat_progress_basebackup view
+        report the progress of the backup in the server side. But note
+        that the total amount of data that will be streamed is estimated
+        and reported only when this option is enabled. In other words,
+        backup_total column in the view always
+        indicates 0 if this option is disabled.
+       
       
      
 
index d19408b3be329b760e7bf65a1725e9cca7e67c14..4361568882478df74e6f02c1b2b006fb2da0f46d 100644 (file)
@@ -39,6 +39,7 @@
 #include "catalog/catversion.h"
 #include "catalog/pg_control.h"
 #include "catalog/pg_database.h"
+#include "commands/progress.h"
 #include "commands/tablespace.h"
 #include "common/controldata_utils.h"
 #include "miscadmin.h"
@@ -10228,6 +10229,10 @@ issue_xlog_fsync(int fd, XLogSegNo segno)
  * active at the same time, and they don't conflict with an exclusive backup
  * either.
  *
+ * tablespaces is required only when this function is called while
+ * the streaming base backup requested by pg_basebackup is running.
+ * NULL should be specified otherwise.
+ *
  * tblspcmapfile is required mainly for tar format in windows as native windows
  * utilities are not able to create symlinks while extracting files from tar.
  * However for consistency, the same is used for all platforms.
@@ -10470,6 +10475,14 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
 
        datadirpathlen = strlen(DataDir);
 
+       /*
+        * Report that we are now estimating the total backup size
+        * if we're streaming base backup as requested by pg_basebackup
+        */
+       if (tablespaces)
+           pgstat_progress_update_param(PROGRESS_BASEBACKUP_PHASE,
+                                        PROGRESS_BASEBACKUP_PHASE_ESTIMATE_BACKUP_SIZE);
+
        /* Collect information about all tablespaces */
        tblspcdir = AllocateDir("pg_tblspc");
        while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL)
index f681aafcf9d7f7ae3be422f29fe0f3f16519d46b..b8a3f46912d2f6718f2f6e407ff8b062dc16ec36 100644 (file)
@@ -1060,6 +1060,22 @@ CREATE VIEW pg_stat_progress_create_index AS
     FROM pg_stat_get_progress_info('CREATE INDEX') AS S
         LEFT JOIN pg_database D ON S.datid = D.oid;
 
+CREATE VIEW pg_stat_progress_basebackup AS
+    SELECT
+        S.pid AS pid,
+        CASE S.param1 WHEN 0 THEN 'initializing'
+                      WHEN 1 THEN 'waiting for checkpoint to finish'
+                      WHEN 2 THEN 'estimating backup size'
+                      WHEN 3 THEN 'streaming database files'
+                      WHEN 4 THEN 'waiting for wal archiving to finish'
+                      WHEN 5 THEN 'transferring wal files'
+                      END AS phase,
+   S.param2 AS backup_total,
+   S.param3 AS backup_streamed,
+   S.param4 AS tablespaces_total,
+   S.param5 AS tablespaces_streamed
+    FROM pg_stat_get_progress_info('BASEBACKUP') AS S;
+
 CREATE VIEW pg_user_mappings AS
     SELECT
         U.oid       AS umid,
index ca8bebf432b2c511090f92474f258556bc50e8e0..f66cbc2428a47ae1939abc0a09aeadd06c16da9e 100644 (file)
@@ -19,6 +19,7 @@
 #include "access/xlog_internal.h"  /* for pg_start/stop_backup */
 #include "catalog/pg_type.h"
 #include "common/file_perm.h"
+#include "commands/progress.h"
 #include "lib/stringinfo.h"
 #include "libpq/libpq.h"
 #include "libpq/pqformat.h"
@@ -70,6 +71,7 @@ static void parse_basebackup_options(List *options, basebackup_options *opt);
 static void SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli);
 static int compareWalFileNames(const ListCell *a, const ListCell *b);
 static void throttle(size_t increment);
+static void update_basebackup_progress(int64 delta);
 static bool is_checksummed_file(const char *fullpath, const char *filename);
 
 /* Was the backup currently in-progress initiated in recovery mode? */
@@ -121,6 +123,12 @@ static long long int total_checksum_failures;
 /* Do not verify checksums. */
 static bool noverify_checksums = false;
 
+/* Total amount of backup data that will be streamed */
+static int64 backup_total = 0;
+
+/* Amount of backup data already streamed */
+static int64 backup_streamed = 0;
+
 /*
  * Definition of one element part of an exclusion list, used for paths part
  * of checksum validation or base backups.  "name" is the name of the file
@@ -246,6 +254,10 @@ perform_base_backup(basebackup_options *opt)
    int         datadirpathlen;
    List       *tablespaces = NIL;
 
+   backup_total = 0;
+   backup_streamed = 0;
+   pgstat_progress_start_command(PROGRESS_COMMAND_BASEBACKUP, InvalidOid);
+
    datadirpathlen = strlen(DataDir);
 
    backup_started_in_recovery = RecoveryInProgress();
@@ -255,6 +267,8 @@ perform_base_backup(basebackup_options *opt)
 
    total_checksum_failures = 0;
 
+   pgstat_progress_update_param(PROGRESS_BASEBACKUP_PHASE,
+                                PROGRESS_BASEBACKUP_PHASE_WAIT_CHECKPOINT);
    startptr = do_pg_start_backup(opt->label, opt->fastcheckpoint, &starttli,
                                  labelfile, &tablespaces,
                                  tblspc_map_file,
@@ -271,8 +285,7 @@ perform_base_backup(basebackup_options *opt)
    {
        ListCell   *lc;
        tablespaceinfo *ti;
-
-       SendXlogRecPtrResult(startptr, starttli);
+       int         tblspc_streamed = 0;
 
        /*
         * Calculate the relative path of temporary statistics directory in
@@ -291,6 +304,38 @@ perform_base_backup(basebackup_options *opt)
        ti->size = opt->progress ? sendDir(".", 1, true, tablespaces, true) : -1;
        tablespaces = lappend(tablespaces, ti);
 
+       /*
+        * Calculate the total backup size by summing up the size of each
+        * tablespace
+        */
+       if (opt->progress)
+       {
+           foreach(lc, tablespaces)
+           {
+               tablespaceinfo *tmp = (tablespaceinfo *) lfirst(lc);
+
+               backup_total += tmp->size;
+           }
+       }
+
+       /* Report that we are now streaming database files as a base backup */
+       {
+           const int   index[] = {
+               PROGRESS_BASEBACKUP_PHASE,
+               PROGRESS_BASEBACKUP_BACKUP_TOTAL,
+               PROGRESS_BASEBACKUP_TBLSPC_TOTAL
+           };
+           const int64 val[] = {
+               PROGRESS_BASEBACKUP_PHASE_STREAM_BACKUP,
+               backup_total, list_length(tablespaces)
+           };
+
+           pgstat_progress_update_multi_param(3, index, val);
+       }
+
+       /* Send the starting position of the backup */
+       SendXlogRecPtrResult(startptr, starttli);
+
        /* Send tablespace header */
        SendBackupHeader(tablespaces);
 
@@ -372,8 +417,14 @@ perform_base_backup(basebackup_options *opt)
            }
            else
                pq_putemptymessage('c');    /* CopyDone */
+
+           tblspc_streamed++;
+           pgstat_progress_update_param(PROGRESS_BASEBACKUP_TBLSPC_STREAMED,
+                                        tblspc_streamed);
        }
 
+       pgstat_progress_update_param(PROGRESS_BASEBACKUP_PHASE,
+                                    PROGRESS_BASEBACKUP_PHASE_WAIT_WAL_ARCHIVE);
        endptr = do_pg_stop_backup(labelfile->data, !opt->nowait, &endtli);
    }
    PG_END_ENSURE_ERROR_CLEANUP(do_pg_abort_backup, BoolGetDatum(false));
@@ -399,6 +450,9 @@ perform_base_backup(basebackup_options *opt)
        ListCell   *lc;
        TimeLineID  tli;
 
+       pgstat_progress_update_param(PROGRESS_BASEBACKUP_PHASE,
+                                    PROGRESS_BASEBACKUP_PHASE_TRANSFER_WAL);
+
        /*
         * I'd rather not worry about timelines here, so scan pg_wal and
         * include all WAL files in the range between 'startptr' and 'endptr',
@@ -548,6 +602,7 @@ perform_base_backup(basebackup_options *opt)
                if (pq_putmessage('d', buf, cnt))
                    ereport(ERROR,
                            (errmsg("base backup could not send data, aborting backup")));
+               update_basebackup_progress(cnt);
 
                len += cnt;
                throttle(cnt);
@@ -623,6 +678,7 @@ perform_base_backup(basebackup_options *opt)
                 errmsg("checksum verification failure during base backup")));
    }
 
+   pgstat_progress_end_command();
 }
 
 /*
@@ -949,6 +1005,7 @@ sendFileWithContent(const char *filename, const char *content)
    _tarWriteHeader(filename, NULL, &statbuf, false);
    /* Send the contents as a CopyData message */
    pq_putmessage('d', content, len);
+   update_basebackup_progress(len);
 
    /* Pad to 512 byte boundary, per tar format requirements */
    pad = ((len + 511) & ~511) - len;
@@ -958,6 +1015,7 @@ sendFileWithContent(const char *filename, const char *content)
 
        MemSet(buf, 0, pad);
        pq_putmessage('d', buf, pad);
+       update_basebackup_progress(pad);
    }
 }
 
@@ -1565,6 +1623,7 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf
        if (pq_putmessage('d', buf, cnt))
            ereport(ERROR,
                    (errmsg("base backup could not send data, aborting backup")));
+       update_basebackup_progress(cnt);
 
        len += cnt;
        throttle(cnt);
@@ -1590,6 +1649,7 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf
        {
            cnt = Min(sizeof(buf), statbuf->st_size - len);
            pq_putmessage('d', buf, cnt);
+           update_basebackup_progress(cnt);
            len += cnt;
            throttle(cnt);
        }
@@ -1604,6 +1664,7 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf
    {
        MemSet(buf, 0, pad);
        pq_putmessage('d', buf, pad);
+       update_basebackup_progress(pad);
    }
 
    FreeFile(fp);
@@ -1658,6 +1719,7 @@ _tarWriteHeader(const char *filename, const char *linktarget,
        }
 
        pq_putmessage('d', h, sizeof(h));
+       update_basebackup_progress(sizeof(h));
    }
 
    return sizeof(h);
@@ -1755,3 +1817,36 @@ throttle(size_t increment)
     */
    throttled_last = GetCurrentTimestamp();
 }
+
+/*
+ * Increment the counter for the amount of data already streamed
+ * by the given number of bytes, and update the progress report for
+ * pg_stat_progress_basebackup.
+ */
+static void
+update_basebackup_progress(int64 delta)
+{
+   const int   index[] = {
+       PROGRESS_BASEBACKUP_BACKUP_STREAMED,
+       PROGRESS_BASEBACKUP_BACKUP_TOTAL
+   };
+   int64       val[2];
+   int         nparam = 0;
+
+   backup_streamed += delta;
+   val[nparam++] = backup_streamed;
+
+   /*
+    * Avoid overflowing past 100% or the full size. This may make the total
+    * size number change as we approach the end of the backup (the estimate
+    * will always be wrong if WAL is included), but that's better than having
+    * the done column be bigger than the total.
+    */
+   if (backup_total > 0 && backup_streamed > backup_total)
+   {
+       backup_total = backup_streamed;
+       val[nparam++] = backup_total;
+   }
+
+   pgstat_progress_update_multi_param(nparam, index, val);
+}
index 7e6a3c177412970f5d357dc9f19730cb338d98d1..54d26732541ccbefc596fea9b361add02d70dfd9 100644 (file)
@@ -474,6 +474,8 @@ pg_stat_get_progress_info(PG_FUNCTION_ARGS)
        cmdtype = PROGRESS_COMMAND_CLUSTER;
    else if (pg_strcasecmp(cmd, "CREATE INDEX") == 0)
        cmdtype = PROGRESS_COMMAND_CREATE_INDEX;
+   else if (pg_strcasecmp(cmd, "BASEBACKUP") == 0)
+       cmdtype = PROGRESS_COMMAND_BASEBACKUP;
    else
        ereport(ERROR,
                (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
index 740e36fdf33e0a3ec0d20e8dc81f4322a7f8f1bb..d4fe84a03793b18f1cc2bc67f50ad07c9b193a04 100644 (file)
@@ -53,6 +53,6 @@
  */
 
 /*                         yyyymmddN */
-#define CATALOG_VERSION_NO 202002271
+#define CATALOG_VERSION_NO 202003031
 
 #endif
index 12e9d3d42f81debee44b152ad3bbafe9e11e9163..36b073e67757bb0446dd849e78ab4124c029c9c1 100644 (file)
 #define PROGRESS_SCAN_BLOCKS_TOTAL             15
 #define PROGRESS_SCAN_BLOCKS_DONE              16
 
+/* Progress parameters for pg_basebackup */
+#define PROGRESS_BASEBACKUP_PHASE              0
+#define PROGRESS_BASEBACKUP_BACKUP_TOTAL           1
+#define PROGRESS_BASEBACKUP_BACKUP_STREAMED            2
+#define PROGRESS_BASEBACKUP_TBLSPC_TOTAL           3
+#define PROGRESS_BASEBACKUP_TBLSPC_STREAMED            4
+
+/* Phases of pg_basebackup (as advertised via PROGRESS_BASEBACKUP_PHASE) */
+#define PROGRESS_BASEBACKUP_PHASE_WAIT_CHECKPOINT      1
+#define PROGRESS_BASEBACKUP_PHASE_ESTIMATE_BACKUP_SIZE     2
+#define PROGRESS_BASEBACKUP_PHASE_STREAM_BACKUP            3
+#define PROGRESS_BASEBACKUP_PHASE_WAIT_WAL_ARCHIVE     4
+#define PROGRESS_BASEBACKUP_PHASE_TRANSFER_WAL         5
+
 #endif
index 3a65a5169626a33444638e3b0ed63d42def178ff..7bc36c65838bcd45249721ba241821fc470bfe3a 100644 (file)
@@ -958,7 +958,8 @@ typedef enum ProgressCommandType
    PROGRESS_COMMAND_VACUUM,
    PROGRESS_COMMAND_ANALYZE,
    PROGRESS_COMMAND_CLUSTER,
-   PROGRESS_COMMAND_CREATE_INDEX
+   PROGRESS_COMMAND_CREATE_INDEX,
+   PROGRESS_COMMAND_BASEBACKUP
 } ProgressCommandType;
 
 #define PGSTAT_NUM_PROGRESS_PARAM  20
index 634f8256f748cdbfa709de44f06193e64340b4db..c7304611c3ad0a8b46fa0c76b0025f00f101add1 100644 (file)
@@ -1876,6 +1876,21 @@ pg_stat_progress_analyze| SELECT s.pid,
     (s.param8)::oid AS current_child_table_relid
    FROM (pg_stat_get_progress_info('ANALYZE'::text) s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20)
      LEFT JOIN pg_database d ON ((s.datid = d.oid)));
+pg_stat_progress_basebackup| SELECT s.pid,
+        CASE s.param1
+            WHEN 0 THEN 'initializing'::text
+            WHEN 1 THEN 'waiting for checkpoint to finish'::text
+            WHEN 2 THEN 'estimating backup size'::text
+            WHEN 3 THEN 'streaming database files'::text
+            WHEN 4 THEN 'waiting for wal archiving to finish'::text
+            WHEN 5 THEN 'transferring wal files'::text
+            ELSE NULL::text
+        END AS phase,
+    s.param2 AS backup_total,
+    s.param3 AS backup_streamed,
+    s.param4 AS tablespaces_total,
+    s.param5 AS tablespaces_streamed
+   FROM pg_stat_get_progress_info('BASEBACKUP'::text) s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20);
 pg_stat_progress_cluster| SELECT s.pid,
     s.datid,
     d.datname,