Allow archiving via loadable modules.

author Robert Haas

Thu, 3 Feb 2022 18:57:27 +0000 (13:57 -0500)

committer Robert Haas

Thu, 3 Feb 2022 19:05:02 +0000 (14:05 -0500)
author Robert Haas
Thu, 3 Feb 2022 18:57:27 +0000 (13:57 -0500)
committer Robert Haas
Thu, 3 Feb 2022 19:05:02 +0000 (14:05 -0500)
diff --git a/contrib/Makefile b/contrib/Makefile

index 87bf87ab90f8e05c8749f03b6fb375f39581033b..e3e221308bbec2c880df71bcd41eecac22609627 100644 (file)
--- a/contrib/Makefile
+++ b/contrib/Makefile
@@ -9,6 +9,7 @@ SUBDIRS = \
         amcheck     \
         auth_delay  \
         auto_explain    \
+       basic_archive   \
         bloom       \
         btree_gin   \
         btree_gist  \
diff --git a/contrib/basic_archive/.gitignore b/contrib/basic_archive/.gitignore

new file mode 100644 (file)

index 0000000..5dcb3ff
--- /dev/null
+++ b/contrib/basic_archive/.gitignore
@@ -0,0 +1,4 @@
+# Generated subdirectories
+/log/
+/results/
+/tmp_check/
diff --git a/contrib/basic_archive/Makefile b/contrib/basic_archive/Makefile

new file mode 100644 (file)

index 0000000..14d036e
--- /dev/null
+++ b/contrib/basic_archive/Makefile
@@ -0,0 +1,20 @@
+# contrib/basic_archive/Makefile
+
+MODULES = basic_archive
+PGFILEDESC = "basic_archive - basic archive module"
+
+REGRESS = basic_archive
+REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/basic_archive/basic_archive.conf
+
+NO_INSTALLCHECK = 1
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/basic_archive
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/contrib/basic_archive/basic_archive.c b/contrib/basic_archive/basic_archive.c

new file mode 100644 (file)

index 0000000..16ddddc
--- /dev/null
+++ b/contrib/basic_archive/basic_archive.c
@@ -0,0 +1,371 @@
+/*-------------------------------------------------------------------------
+ *
+ * basic_archive.c
+ *
+ * This file demonstrates a basic archive library implementation that is
+ * roughly equivalent to the following shell command:
+ *
+ *         test ! -f /path/to/dest && cp /path/to/src /path/to/dest
+ *
+ * One notable difference between this module and the shell command above
+ * is that this module first copies the file to a temporary destination,
+ * syncs it to disk, and then durably moves it to the final destination.
+ *
+ * Another notable difference is that if /path/to/dest already exists
+ * but has contents identical to /path/to/src, archiving will succeed,
+ * whereas the command shown above would fail. This prevents problems if
+ * a file is successfully archived and then the system crashes before
+ * a durable record of the success has been made.
+ *
+ * Copyright (c) 2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *   contrib/basic_archive/basic_archive.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include 
+#include 
+#include 
+
+#include "common/int.h"
+#include "miscadmin.h"
+#include "postmaster/pgarch.h"
+#include "storage/copydir.h"
+#include "storage/fd.h"
+#include "utils/guc.h"
+#include "utils/memutils.h"
+
+PG_MODULE_MAGIC;
+
+void _PG_init(void);
+void _PG_archive_module_init(ArchiveModuleCallbacks *cb);
+
+static char *archive_directory = NULL;
+static MemoryContext basic_archive_context;
+
+static bool basic_archive_configured(void);
+static bool basic_archive_file(const char *file, const char *path);
+static void basic_archive_file_internal(const char *file, const char *path);
+static bool check_archive_directory(char **newval, void **extra, GucSource source);
+static bool compare_files(const char *file1, const char *file2);
+
+/*
+ * _PG_init
+ *
+ * Defines the module's GUC.
+ */
+void
+_PG_init(void)
+{
+   DefineCustomStringVariable("basic_archive.archive_directory",
+                              gettext_noop("Archive file destination directory."),
+                              NULL,
+                              &archive_directory,
+                              "",
+                              PGC_SIGHUP,
+                              0,
+                              check_archive_directory, NULL, NULL);
+
+   EmitWarningsOnPlaceholders("basic_archive");
+
+   basic_archive_context = AllocSetContextCreate(TopMemoryContext,
+                                                 "basic_archive",
+                                                 ALLOCSET_DEFAULT_SIZES);
+}
+
+/*
+ * _PG_archive_module_init
+ *
+ * Returns the module's archiving callbacks.
+ */
+void
+_PG_archive_module_init(ArchiveModuleCallbacks *cb)
+{
+   AssertVariableIsOfType(&_PG_archive_module_init, ArchiveModuleInit);
+
+   cb->check_configured_cb = basic_archive_configured;
+   cb->archive_file_cb = basic_archive_file;
+}
+
+/*
+ * check_archive_directory
+ *
+ * Checks that the provided archive directory exists.
+ */
+static bool
+check_archive_directory(char **newval, void **extra, GucSource source)
+{
+   struct stat st;
+
+   /*
+    * The default value is an empty string, so we have to accept that value.
+    * Our check_configured callback also checks for this and prevents archiving
+    * from proceeding if it is still empty.
+    */
+   if (*newval == NULL || *newval[0] == '\0')
+       return true;
+
+   /*
+    * Make sure the file paths won't be too long.  The docs indicate that the
+    * file names to be archived can be up to 64 characters long.
+    */
+   if (strlen(*newval) + 64 + 2 >= MAXPGPATH)
+   {
+       GUC_check_errdetail("archive directory too long");
+       return false;
+   }
+
+   /*
+    * Do a basic sanity check that the specified archive directory exists.  It
+    * could be removed at some point in the future, so we still need to be
+    * prepared for it not to exist in the actual archiving logic.
+    */
+   if (stat(*newval, &st) != 0 || !S_ISDIR(st.st_mode))
+   {
+       GUC_check_errdetail("specified archive directory does not exist");
+       return false;
+   }
+
+   return true;
+}
+
+/*
+ * basic_archive_configured
+ *
+ * Checks that archive_directory is not blank.
+ */
+static bool
+basic_archive_configured(void)
+{
+   return archive_directory != NULL && archive_directory[0] != '\0';
+}
+
+/*
+ * basic_archive_file
+ *
+ * Archives one file.
+ */
+static bool
+basic_archive_file(const char *file, const char *path)
+{
+   sigjmp_buf  local_sigjmp_buf;
+   MemoryContext oldcontext;
+
+   /*
+    * We run basic_archive_file_internal() in our own memory context so that we
+    * can easily reset it during error recovery (thus avoiding memory leaks).
+    */
+   oldcontext = MemoryContextSwitchTo(basic_archive_context);
+
+   /*
+    * Since the archiver operates at the bottom of the exception stack, ERRORs
+    * turn into FATALs and cause the archiver process to restart.  However,
+    * using ereport(ERROR, ...) when there are problems is easy to code and
+    * maintain.  Therefore, we create our own exception handler to catch ERRORs
+    * and return false instead of restarting the archiver whenever there is a
+    * failure.
+    */
+   if (sigsetjmp(local_sigjmp_buf, 1) != 0)
+   {
+       /* Since not using PG_TRY, must reset error stack by hand */
+       error_context_stack = NULL;
+
+       /* Prevent interrupts while cleaning up */
+       HOLD_INTERRUPTS();
+
+       /* Report the error and clear ErrorContext for next time */
+       EmitErrorReport();
+       FlushErrorState();
+
+       /* Close any files left open by copy_file() or compare_files() */
+       AtEOSubXact_Files(false, InvalidSubTransactionId, InvalidSubTransactionId);
+
+       /* Reset our memory context and switch back to the original one */
+       MemoryContextSwitchTo(oldcontext);
+       MemoryContextReset(basic_archive_context);
+
+       /* Remove our exception handler */
+       PG_exception_stack = NULL;
+
+       /* Now we can allow interrupts again */
+       RESUME_INTERRUPTS();
+
+       /* Report failure so that the archiver retries this file */
+       return false;
+   }
+
+   /* Enable our exception handler */
+   PG_exception_stack = &local_sigjmp_buf;
+
+   /* Archive the file! */
+   basic_archive_file_internal(file, path);
+
+   /* Remove our exception handler */
+   PG_exception_stack = NULL;
+
+   /* Reset our memory context and switch back to the original one */
+   MemoryContextSwitchTo(oldcontext);
+   MemoryContextReset(basic_archive_context);
+
+   return true;
+}
+
+static void
+basic_archive_file_internal(const char *file, const char *path)
+{
+   char        destination[MAXPGPATH];
+   char        temp[MAXPGPATH + 256];
+   struct stat st;
+   struct timeval tv;
+   uint64      epoch;
+
+   ereport(DEBUG3,
+           (errmsg("archiving \"%s\" via basic_archive", file)));
+
+   snprintf(destination, MAXPGPATH, "%s/%s", archive_directory, file);
+
+   /*
+    * First, check if the file has already been archived.  If it already exists
+    * and has the same contents as the file we're trying to archive, we can
+    * return success (after ensuring the file is persisted to disk). This
+    * scenario is possible if the server crashed after archiving the file but
+    * before renaming its .ready file to .done.
+    *
+    * If the archive file already exists but has different contents, something
+    * might be wrong, so we just fail.
+    */
+   if (stat(destination, &st) == 0)
+   {
+       if (compare_files(path, destination))
+       {
+           ereport(DEBUG3,
+                   (errmsg("archive file \"%s\" already exists with identical contents",
+                           destination)));
+
+           fsync_fname(destination, false);
+           fsync_fname(archive_directory, true);
+
+           return;
+       }
+
+       ereport(ERROR,
+               (errmsg("archive file \"%s\" already exists", destination)));
+   }
+   else if (errno != ENOENT)
+       ereport(ERROR,
+               (errcode_for_file_access(),
+                errmsg("could not stat file \"%s\": %m", destination)));
+
+   /*
+    * Pick a sufficiently unique name for the temporary file so that a
+    * collision is unlikely.  This helps avoid problems in case a temporary
+    * file was left around after a crash or another server happens to be
+    * archiving to the same directory.
+    */
+   gettimeofday(&tv, NULL);
+   if (pg_mul_u64_overflow((uint64) 1000, (uint64) tv.tv_sec, &epoch) ||
+       pg_add_u64_overflow(epoch, (uint64) tv.tv_usec, &epoch))
+       elog(ERROR, "could not generate temporary file name for archiving");
+
+   snprintf(temp, sizeof(temp), "%s/%s.%s.%d." UINT64_FORMAT,
+            archive_directory, "archtemp", file, MyProcPid, epoch);
+
+   /*
+    * Copy the file to its temporary destination.  Note that this will fail if
+    * temp already exists.
+    */
+   copy_file(unconstify(char *, path), temp);
+
+   /*
+    * Sync the temporary file to disk and move it to its final destination.
+    * This will fail if destination already exists.
+    */
+   (void) durable_rename_excl(temp, destination, ERROR);
+
+   ereport(DEBUG1,
+           (errmsg("archived \"%s\" via basic_archive", file)));
+}
+
+/*
+ * compare_files
+ *
+ * Returns whether the contents of the files are the same.
+ */
+static bool
+compare_files(const char *file1, const char *file2)
+{
+#define CMP_BUF_SIZE (4096)
+   char        buf1[CMP_BUF_SIZE];
+   char        buf2[CMP_BUF_SIZE];
+   int         fd1;
+   int         fd2;
+   bool        ret = true;
+
+   fd1 = OpenTransientFile(file1, O_RDONLY | PG_BINARY);
+   if (fd1 < 0)
+       ereport(ERROR,
+               (errcode_for_file_access(),
+                errmsg("could not open file \"%s\": %m", file1)));
+
+   fd2 = OpenTransientFile(file2, O_RDONLY | PG_BINARY);
+   if (fd2 < 0)
+       ereport(ERROR,
+               (errcode_for_file_access(),
+                errmsg("could not open file \"%s\": %m", file2)));
+
+   for (;;)
+   {
+       int     nbytes = 0;
+       int     buf1_len = 0;
+       int     buf2_len = 0;
+
+       while (buf1_len < CMP_BUF_SIZE)
+       {
+           nbytes = read(fd1, buf1 + buf1_len, CMP_BUF_SIZE - buf1_len);
+           if (nbytes < 0)
+               ereport(ERROR,
+                       (errcode_for_file_access(),
+                        errmsg("could not read file \"%s\": %m", file1)));
+           else if (nbytes == 0)
+               break;
+
+           buf1_len += nbytes;
+       }
+
+       while (buf2_len < CMP_BUF_SIZE)
+       {
+           nbytes = read(fd2, buf2 + buf2_len, CMP_BUF_SIZE - buf2_len);
+           if (nbytes < 0)
+               ereport(ERROR,
+                       (errcode_for_file_access(),
+                        errmsg("could not read file \"%s\": %m", file2)));
+           else if (nbytes == 0)
+               break;
+
+           buf2_len += nbytes;
+       }
+
+       if (buf1_len != buf2_len || memcmp(buf1, buf2, buf1_len) != 0)
+       {
+           ret = false;
+           break;
+       }
+       else if (buf1_len == 0)
+           break;
+   }
+
+   if (CloseTransientFile(fd1) != 0)
+       ereport(ERROR,
+               (errcode_for_file_access(),
+                errmsg("could not close file \"%s\": %m", file1)));
+
+   if (CloseTransientFile(fd2) != 0)
+       ereport(ERROR,
+               (errcode_for_file_access(),
+                errmsg("could not close file \"%s\": %m", file2)));
+
+   return ret;
+}
diff --git a/contrib/basic_archive/basic_archive.conf b/contrib/basic_archive/basic_archive.conf

new file mode 100644 (file)

index 0000000..b26b2d4
--- /dev/null
+++ b/contrib/basic_archive/basic_archive.conf
@@ -0,0 +1,3 @@
+archive_mode = 'on'
+archive_library = 'basic_archive'
+basic_archive.archive_directory = '.'
diff --git a/contrib/basic_archive/expected/basic_archive.out b/contrib/basic_archive/expected/basic_archive.out

new file mode 100644 (file)

index 0000000..0015053
--- /dev/null
+++ b/contrib/basic_archive/expected/basic_archive.out
@@ -0,0 +1,29 @@
+CREATE TABLE test (a INT);
+SELECT 1 FROM pg_switch_wal();
+ ?column? 
+----------
+        1
+(1 row)
+
+DO $$
+DECLARE
+   archived bool;
+   loops int := 0;
+BEGIN
+   LOOP
+       archived := count(*) > 0 FROM pg_ls_dir('.', false, false) a
+           WHERE a ~ '^[0-9A-F]{24}$';
+       IF archived OR loops > 120 * 10 THEN EXIT; END IF;
+       PERFORM pg_sleep(0.1);
+       loops := loops + 1;
+   END LOOP;
+END
+$$;
+SELECT count(*) > 0 FROM pg_ls_dir('.', false, false) a
+   WHERE a ~ '^[0-9A-F]{24}$';
+ ?column? 
+----------
+ t
+(1 row)
+
+DROP TABLE test;
diff --git a/contrib/basic_archive/sql/basic_archive.sql b/contrib/basic_archive/sql/basic_archive.sql

new file mode 100644 (file)

index 0000000..14e236d
--- /dev/null
+++ b/contrib/basic_archive/sql/basic_archive.sql
@@ -0,0 +1,22 @@
+CREATE TABLE test (a INT);
+SELECT 1 FROM pg_switch_wal();
+
+DO $$
+DECLARE
+   archived bool;
+   loops int := 0;
+BEGIN
+   LOOP
+       archived := count(*) > 0 FROM pg_ls_dir('.', false, false) a
+           WHERE a ~ '^[0-9A-F]{24}$';
+       IF archived OR loops > 120 * 10 THEN EXIT; END IF;
+       PERFORM pg_sleep(0.1);
+       loops := loops + 1;
+   END LOOP;
+END
+$$;
+
+SELECT count(*) > 0 FROM pg_ls_dir('.', false, false) a
+   WHERE a ~ '^[0-9A-F]{24}$';
+
+DROP TABLE test;
diff --git a/doc/src/sgml/archive-modules.sgml b/doc/src/sgml/archive-modules.sgml

new file mode 100644 (file)

index 0000000..f1189dd
--- /dev/null
+++ b/doc/src/sgml/archive-modules.sgml
@@ -0,0 +1,136 @@
+
+
+
+ Archive Modules
+ 
+  Archive Modules
+ 
+
+ 
+  PostgreSQL provides infrastructure to create custom modules for continuous
+  archiving (see ).  While archiving via
+  a shell command (i.e., ) is much
+  simpler, a custom archive module will often be considerably more robust and
+  performant.
+ 
+
+ 
+  When a custom  is configured, PostgreSQL
+  will submit completed WAL files to the module, and the server will avoid
+  recyling or removing these WAL files until the module indicates that the files
+  were successfully archived.  It is ultimately up to the module to decide what
+  to do with each WAL file, but many recommendations are listed at
+  .
+ 
+
+ 
+  Archiving modules must at least consist of an initialization function (see
+  ) and the required callbacks (see
+  ).  However, archive modules are
+  also permitted to do much more (e.g., declare GUCs and register background
+  workers).
+ 
+
+ 
+  The contrib/basic_archive module contains a working
+  example, which demonstrates some useful techniques.
+ 
+
+ 
+  Initialization Functions
+  
+   _PG_archive_module_init
+  
+  
+   An archive library is loaded by dynamically loading a shared library with the
+   's name as the library base name.  The
+   normal library search path is used to locate the library.  To provide the
+   required archive module callbacks and to indicate that the library is
+   actually an archive module, it needs to provide a function named
+   _PG_archive_module_init.  This function is passed a
+   struct that needs to be filled with the callback function pointers for
+   individual actions.
+
+
+typedef struct ArchiveModuleCallbacks
+{
+    ArchiveCheckConfiguredCB check_configured_cb;
+    ArchiveFileCB archive_file_cb;
+    ArchiveShutdownCB shutdown_cb;
+} ArchiveModuleCallbacks;
+typedef void (*ArchiveModuleInit) (struct ArchiveModuleCallbacks *cb);
+
+
+   Only the archive_file_cb callback is required.  The
+   others are optional.
+  
+ 
+
+ 
+  Archive Module Callbacks
+  
+   The archive callbacks define the actual archiving behavior of the module.
+   The server will call them as required to process each individual WAL file.
+  
+
+  
+   Check Callback
+   
+    The check_configured_cb callback is called to determine
+    whether the module is fully configured and ready to accept WAL files (e.g.,
+    its configuration parameters are set to valid values).  If no
+    check_configured_cb is defined, the server always
+    assumes the module is configured.
+
+
+typedef bool (*ArchiveCheckConfiguredCB) (void);
+
+
+    If true is returned, the server will proceed with
+    archiving the file by calling the archive_file_cb
+    callback.  If false is returned, archiving will not
+    proceed, and the archiver will emit the following message to the server log:
+
+WARNING:  archive_mode enabled, yet archiving is not configured
+
+    In the latter case, the server will periodically call this function, and
+    archiving will proceed only when it returns true.
+   
+  
+
+  
+   Archive Callback
+   
+    The archive_file_cb callback is called to archive a
+    single WAL file.
+
+
+typedef bool (*ArchiveFileCB) (const char *file, const char *path);
+
+
+    If true is returned, the server proceeds as if the file
+    was successfully archived, which may include recycling or removing the
+    original WAL file.  If false is returned, the server will
+    keep the original WAL file and retry archiving later.
+    file will contain just the file name of the WAL file to
+    archive, while path contains the full path of the WAL
+    file (including the file name).
+   
+  
+
+  
+   Shutdown Callback
+   
+    The shutdown_cb callback is called when the archiver
+    process exits (e.g., after an error) or the value of
+     changes.  If no
+    shutdown_cb is defined, no special action is taken in
+    these situations.
+
+
+typedef void (*ArchiveShutdownCB) (void);
+
+   
+  
+ 
+
diff --git a/doc/src/sgml/backup.sgml b/doc/src/sgml/backup.sgml

index cba32b6eb3e521f702e5304ef4f489b77ff4114b..0d69851bb1c80d68003a6ab1ee1663a263ad5286 100644 (file)
--- a/doc/src/sgml/backup.sgml
+++ b/doc/src/sgml/backup.sgml
@@ -593,20 +593,23 @@ tar -cf backup.tar /usr/local/pgsql/data
      provide the database administrator with flexibility,
      PostgreSQL tries not to make any assumptions about how
      the archiving will be done.  Instead, PostgreSQL lets
-    the administrator specify a shell command to be executed to copy a
-    completed segment file to wherever it needs to go.  The command could be
-    as simple as a cp, or it could invoke a complex shell
-    script — it's all up to you.
+    the administrator specify an archive library to be executed to copy a
+    completed segment file to wherever it needs to go.  This could be as simple
+    as a shell command that uses cp, or it could invoke a
+    complex C function — it's all up to you.
     
  
     
      To enable WAL archiving, set the 
      configuration parameter to replica or higher,
       to on,
-    and specify the shell command to use in the 
-    linkend="guc-archive-command"/> configuration parameter.  In practice
+    and specify the library to use in the 
+    linkend="guc-archive-library"/> configuration parameter.  In practice
     these settings will always be placed in the
     postgresql.conf file.
+    One simple way to archive is to set archive_library to
+    an empty string and to specify a shell command in
+    .
     In archive_command,
     %p is replaced by the path name of the file to
     archive, while %f is replaced by only the file name.
@@ -631,7 +634,17 @@ test ! -f /mnt/server/archivedir/00000001000000A900000065 && cp pg_wal/0
    
 
    
-    The archive command will be executed under the ownership of the same
+    Another way to archive is to use a custom archive module as the
+    archive_library.  Since such modules are written in
+    C, creating your own may require considerably more effort
+    than writing a shell command.  However, archive modules can be more
+    performant than archiving via shell, and they will have access to many
+    useful server resources.  For more information about archive modules, see
+    .
+   
+
+   
+    The archive library will be executed under the ownership of the same
     user that the PostgreSQL server is running as.  Since
     the series of WAL files being archived contains effectively everything
     in your database, you will want to be sure that the archived data is
@@ -640,25 +653,31 @@ test ! -f /mnt/server/archivedir/00000001000000A900000065 && cp pg_wal/0
    
 
    
-    It is important that the archive command return zero exit status if and
-    only if it succeeds.  Upon getting a zero result,
+    It is important that the archive function return true if
+    and only if it succeeds.  If true is returned,
     PostgreSQL will assume that the file has been
-    successfully archived, and will remove or recycle it.  However, a nonzero
-    status tells PostgreSQL that the file was not archived;
-    it will try again periodically until it succeeds.
+    successfully archived, and will remove or recycle it.  However, a return
+    value of false tells
+    PostgreSQL that the file was not archived; it
+    will try again periodically until it succeeds.  If you are archiving via a
+    shell command, the appropriate return values can be achieved by returning
+    0 if the command succeeds and a nonzero value if it
+    fails.
    
 
    
-    When the archive command is terminated by a signal (other than
-    SIGTERM that is used as part of a server
-    shutdown) or an error by the shell with an exit status greater than
-    125 (such as command not found), the archiver process aborts and gets
-    restarted by the postmaster. In such cases, the failure is
-    not reported in .
+    If the archive function emits an ERROR or
+    FATAL, the archiver process aborts and gets restarted by
+    the postmaster.  If you are archiving via shell command, FATAL is emitted if
+    the command is terminated by a signal (other than
+    SIGTERM that is used as part of a server shutdown)
+    or an error by the shell with an exit status greater than 125 (such as
+    command not found).  In such cases, the failure is not reported in
+    .
    
 
    
-    The archive command should generally be designed to refuse to overwrite
+    The archive library should generally be designed to refuse to overwrite
     any pre-existing archive file.  This is an important safety feature to
     preserve the integrity of your archive in case of administrator error
     (such as sending the output of two different servers to the same archive
@@ -666,9 +685,9 @@ test ! -f /mnt/server/archivedir/00000001000000A900000065 && cp pg_wal/0
    
 
    
-    It is advisable to test your proposed archive command to ensure that it
+    It is advisable to test your proposed archive library to ensure that it
     indeed does not overwrite an existing file, and that it returns
-    nonzero status in this case.
+    false in this case.
     The example command above for Unix ensures this by including a separate
     test step.  On some Unix platforms, cp has
     switches such as  that can be used to do the same thing
@@ -680,7 +699,7 @@ test ! -f /mnt/server/archivedir/00000001000000A900000065 && cp pg_wal/0
 
    
     While designing your archiving setup, consider what will happen if
-    the archive command fails repeatedly because some aspect requires
+    the archive library fails repeatedly because some aspect requires
     operator intervention or the archive runs out of space. For example, this
     could occur if you write to tape without an autochanger; when the tape
     fills, nothing further can be archived until the tape is swapped.
@@ -695,7 +714,7 @@ test ! -f /mnt/server/archivedir/00000001000000A900000065 && cp pg_wal/0
    
 
    
-    The speed of the archiving command is unimportant as long as it can keep up
+    The speed of the archive library is unimportant as long as it can keep up
     with the average rate at which your server generates WAL data.  Normal
     operation continues even if the archiving process falls a little behind.
     If archiving falls significantly behind, this will increase the amount of
@@ -707,11 +726,11 @@ test ! -f /mnt/server/archivedir/00000001000000A900000065 && cp pg_wal/0
    
 
    
-    In writing your archive command, you should assume that the file names to
+    In writing your archive library, you should assume that the file names to
     be archived can be up to 64 characters long and can contain any
     combination of ASCII letters, digits, and dots.  It is not necessary to
-    preserve the original relative path (%p) but it is necessary to
-    preserve the file name (%f).
+    preserve the original relative path but it is necessary to preserve the file
+    name.
    
 
    
@@ -728,7 +747,7 @@ test ! -f /mnt/server/archivedir/00000001000000A900000065 && cp pg_wal/0
    
 
    
-    The archive command is only invoked on completed WAL segments.  Hence,
+    The archive function is only invoked on completed WAL segments.  Hence,
     if your server generates only little WAL traffic (or has slack periods
     where it does so), there could be a long delay between the completion
     of a transaction and its safe recording in archive storage.  To put
@@ -757,8 +776,9 @@ test ! -f /mnt/server/archivedir/00000001000000A900000065 && cp pg_wal/0
     turned on during execution of one of these statements, WAL would not
     contain enough information for archive recovery.  (Crash recovery is
     unaffected.)  For this reason, wal_level can only be changed at
-    server start.  However, archive_command can be changed with a
-    configuration file reload.  If you wish to temporarily stop archiving,
+    server start.  However, archive_library can be changed with a
+    configuration file reload.  If you are archiving via shell and wish to
+    temporarily stop archiving,
     one way to do it is to set archive_command to the empty
     string ('').
     This will cause WAL files to accumulate in pg_wal/ until a
@@ -938,11 +958,11 @@ SELECT * FROM pg_stop_backup(false, true);
      On a standby, archive_mode must be always in order
      for pg_stop_backup to wait.
      Archiving of these files happens automatically since you have
-     already configured archive_command. In most cases this
+     already configured archive_library. In most cases this
      happens quickly, but you are advised to monitor your archive
      system to ensure there are no delays.
      If the archive process has fallen behind
-     because of failures of the archive command, it will keep retrying
+     because of failures of the archive library, it will keep retrying
      until the archive succeeds and the backup is complete.
      If you wish to place a time limit on the execution of
      pg_stop_backup, set an appropriate
@@ -1500,9 +1520,10 @@ restore_command = 'cp /mnt/server/archivedir/%f %p'
       To prepare for low level standalone hot backups, make sure
       wal_level is set to
       replica or higher, archive_mode to
-      on, and set up an archive_command that performs
+      on, and set up an archive_library that performs
       archiving only when a switch file exists.  For example:
 
+archive_library = ''  # use shell command
 archive_command = 'test ! -f /var/lib/pgsql/backup_in_progress || (test ! -f /var/lib/pgsql/archive/%f && cp %p /var/lib/pgsql/archive/%f)'
 
       This command will perform archiving when


diff --git a/doc/src/sgml/basic-archive.sgml b/doc/src/sgml/basic-archive.sgml

new file mode 100644 (file)

index 0000000..0b650f1


--- /dev/null
+++ b/doc/src/sgml/basic-archive.sgml
@@ -0,0 +1,81 @@
+
+
+
+ basic_archive
+
+ 
+  basic_archive
+ 
+
+ 
+  basic_archive is an example of an archive module.  This
+  module copies completed WAL segment files to the specified directory.  This
+  may not be especially useful, but it can serve as a starting point for
+  developing your own archive module.  For more information about archive
+  modules, see .
+ 
+
+ 
+  In order to function, this module must be loaded via
+  , and 
+  must be enabled.
+ 
+
+ 
+  Configuration Parameters
+
+  
+   
+    
+     basic_archive.archive_directory (string)
+     
+      basic_archive.archive_directory configuration parameter
+     
+    
+    
+     
+      The directory where the server should copy WAL segment files.  This
+      directory must already exist.  The default is an empty string, which
+      effectively halts WAL archiving, but if 
+      is enabled, the server will accumulate WAL segment files in the
+      expectation that a value will soon be provided.
+     
+    
+   
+  
+
+  
+   These parameters must be set in postgresql.conf.
+   Typical usage might be:
+  
+
+
+# postgresql.conf
+archive_mode = 'on'
+archive_library = 'basic_archive'
+basic_archive.archive_directory = '/path/to/archive/directory'
+
+ 
+
+ 
+  Notes
+
+  
+   Server crashes may leave temporary files with the prefix
+   archtemp in the archive directory.  It is recommended to
+   delete such files before restarting the server after a crash.  It is safe to
+   remove such files while the server is running as long as they are unrelated
+   to any archiving still in progress, but users should use extra caution when
+   doing so.
+  
+ 
+
+ 
+  Author
+
+  
+   Nathan Bossart
+  
+ 
+
+


diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml

index 692d8a2a1782ebc18183f9636b7041a89b2652da..fc63172efde3070579f995bad4cd7046da26cea9 100644 (file)


--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -3479,7 +3479,7 @@ include_dir 'conf.d'
         Maximum size to let the WAL grow during automatic
         checkpoints. This is a soft limit; WAL size can exceed
         max_wal_size under special circumstances, such as
-        heavy load, a failing archive_command, or a high
+        heavy load, a failing archive_library, or a high
         wal_keep_size setting.
         If this value is specified without units, it is taken as megabytes.
         The default is 1 GB.
@@ -3528,7 +3528,7 @@ include_dir 'conf.d'
        
         When archive_mode is enabled, completed WAL segments
         are sent to archive storage by setting
-        command"/>. In addition to off,
+        library"/>. In addition to off,
         to disable, there are two modes: on, and
         always. During normal operation, there is no
         difference between the two modes, but when set to always
@@ -3538,9 +3538,6 @@ include_dir 'conf.d'
          for details.
        
        
-        archive_mode and archive_command are
-        separate variables so that archive_command can be
-        changed without leaving archiving mode.
         This parameter can only be set at server start.
         archive_mode cannot be enabled when
         wal_level is set to minimal.
@@ -3548,6 +3545,28 @@ include_dir 'conf.d'
       
      
 
+     
+      archive_library (string)
+      
+       archive_library configuration parameter
+      
+      
+      
+       
+        The library to use for archiving completed WAL file segments.  If set to
+        an empty string (the default), archiving via shell is enabled, and
+         is used.  Otherwise, the specified
+        shared library is used for archiving.  For more information, see
+         and
+        .
+       
+       
+        This parameter can only be set in the
+        postgresql.conf file or on the server command line.
+       
+      
+     
+
      
       archive_command (string)
       
@@ -3570,9 +3589,11 @@ include_dir 'conf.d'
        
         This parameter can only be set in the postgresql.conf
         file or on the server command line.  It is ignored unless
-        archive_mode was enabled at server start.
+        archive_mode was enabled at server start and
+        archive_library specifies to archive via shell command.
         If archive_command is an empty string (the default) while
-        archive_mode is enabled, WAL archiving is temporarily
+        archive_mode is enabled and archive_library
+        specifies archiving via shell, WAL archiving is temporarily
         disabled, but the server continues to accumulate WAL segment files in
         the expectation that a command will soon be provided.  Setting
         archive_command to a command that does nothing but
@@ -3592,7 +3613,7 @@ include_dir 'conf.d'
       
       
        
-        The command"/> is only invoked for
+        The library"/> is only invoked for
         completed WAL segments. Hence, if your server generates little WAL
         traffic (or has slack periods where it does so), there could be a
         long delay between the completion of a transaction and its safe


diff --git a/doc/src/sgml/contrib.sgml b/doc/src/sgml/contrib.sgml

index d3ca4b6932007b3f32f35ee4c87b13a40ab64fd4..be9711c6f2ce785573346866f100571824f8c80b 100644 (file)


--- a/doc/src/sgml/contrib.sgml
+++ b/doc/src/sgml/contrib.sgml
@@ -99,6 +99,7 @@ CREATE EXTENSION module_name;
  &amcheck;
  &auth-delay;
  &auto-explain;
+ &basic-archive;
  &bloom;
  &btree-gin;
  &btree-gist;


diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml

index 89454e99b981d9079beedafedd18b588a583e6f7..328cd1f378c786d0ce6ddd50867dd08f535b5856 100644 (file)


--- a/doc/src/sgml/filelist.sgml
+++ b/doc/src/sgml/filelist.sgml
@@ -99,6 +99,7 @@
 
 
 
+
 
 
 
@@ -112,6 +113,7 @@
 
 
 
+
 
 
 


diff --git a/doc/src/sgml/high-availability.sgml b/doc/src/sgml/high-availability.sgml

index a265409f025140f5b8945aa0bf954c9b3a2cb94c..437712762ae8f0e72634e76774287191bd9210fa 100644 (file)


--- a/doc/src/sgml/high-availability.sgml
+++ b/doc/src/sgml/high-availability.sgml
@@ -935,7 +935,7 @@ primary_conninfo = 'host=192.168.1.50 port=5432 user=foo password=foopass'
     In lieu of using replication slots, it is possible to prevent the removal
     of old WAL segments using , or by
     storing the segments in an archive using
-    command"/>.
+    library"/>.
     However, these methods often result in retaining more WAL segments than
     required, whereas replication slots retain only the number of segments
     known to be needed.  On the other hand, replication slots can retain so
@@ -1386,10 +1386,10 @@ synchronous_standby_names = 'ANY 2 (s1, s2, s3)'
      to always, and the standby will call the archive
      command for every WAL segment it receives, whether it's by restoring
      from the archive or by streaming replication. The shared archive can
-     be handled similarly, but the archive_command must
+     be handled similarly, but the archive_library must
      test if the file being archived exists already, and if the existing file
      has identical contents. This requires more care in the
-     archive_command, as it must
+     archive_library, as it must
      be careful to not overwrite an existing file with different contents,
      but return success if the exactly same file is archived twice. And
      all that must be done free of race conditions, if two servers attempt


diff --git a/doc/src/sgml/postgres.sgml b/doc/src/sgml/postgres.sgml

index dba9cf413f901627716cc40c4795ad38da6fbc5d..3db6d2160b1088635413fd634035d21a8c1698d5 100644 (file)


--- a/doc/src/sgml/postgres.sgml
+++ b/doc/src/sgml/postgres.sgml
@@ -233,6 +233,7 @@ break is not needed in a wider output rendering.
   &bgworker;
   &logicaldecoding;
   &replication-origins;
+  &archive-modules;
 
  
 


diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml

index 1546f10c0d9e6e9f993352225d75cd25204648d3..e7ae29ec3d36d8c0ff5b1de22f8fcf0bdfbd3021 100644 (file)


--- a/doc/src/sgml/ref/pg_basebackup.sgml
+++ b/doc/src/sgml/ref/pg_basebackup.sgml
@@ -102,8 +102,8 @@ PostgreSQL documentation
      
       All WAL records required for the backup must contain sufficient full-page writes,
       which requires you to enable full_page_writes on the primary and
-      not to use a tool like pg_compresslog as
-      archive_command to remove full-page writes from WAL files.
+      not to use a tool in your archive_library to remove
+      full-page writes from WAL files.
      
     
    


diff --git a/doc/src/sgml/ref/pg_receivewal.sgml b/doc/src/sgml/ref/pg_receivewal.sgml

index b2e41ea814c72b06bd289b364a5e5bcc430882b6..b846213fb7bfc5e72d1cb676ea2427b1176f8618 100644 (file)


--- a/doc/src/sgml/ref/pg_receivewal.sgml
+++ b/doc/src/sgml/ref/pg_receivewal.sgml
@@ -40,7 +40,7 @@ PostgreSQL documentation
   
    pg_receivewal streams the write-ahead
    log in real time as it's being generated on the server, and does not wait
-   for segments to complete like command"/> does.
+   for segments to complete like library"/> does.
    For this reason, it is not necessary to set
     when using
     pg_receivewal.
@@ -487,11 +487,11 @@ PostgreSQL documentation
 
   
    When using pg_receivewal instead of
-   command"/> as the main WAL backup method, it is
+   library"/> as the main WAL backup method, it is
    strongly recommended to use replication slots.  Otherwise, the server is
    free to recycle or remove write-ahead log files before they are backed up,
    because it does not have any information, either
-   from command"/> or the replication slots, about
+   from library"/> or the replication slots, about
    how far the WAL stream has been archived.  Note, however, that a
    replication slot will fill up the server's disk space if the receiver does
    not keep up with fetching the WAL data.


diff --git a/doc/src/sgml/wal.sgml b/doc/src/sgml/wal.sgml

index 24e1c89503cbc12ef9aba4a92fb0f1152af01431..2bb27a846828aaabcad660d752c1732f955fe9bf 100644 (file)


--- a/doc/src/sgml/wal.sgml
+++ b/doc/src/sgml/wal.sgml
@@ -636,7 +636,7 @@
    WAL files plus one additional WAL file are
    kept at all times. Also, if WAL archiving is used, old segments cannot be
    removed or recycled until they are archived. If WAL archiving cannot keep up
-   with the pace that WAL is generated, or if archive_command
+   with the pace that WAL is generated, or if archive_library
    fails repeatedly, old WAL files will accumulate in pg_wal
    until the situation is resolved. A slow or failed standby server that
    uses a replication slot will have the same effect (see


diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c

index dfe2a0bcce984b6be4e05ddda493c20d1fd1b4e9..958220c495bf0a572bc6f5f38ea95522c3d6a70b 100644 (file)


--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -8831,7 +8831,7 @@ ShutdownXLOG(int code, Datum arg)
         * process one more time at the end of shutdown). The checkpoint
         * record will go to the next XLOG file and won't be archived (yet).
         */
-       if (XLogArchivingActive() && XLogArchiveCommandSet())
+       if (XLogArchivingActive())
            RequestXLogSwitch(false);
 
        CreateCheckPoint(CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_IMMEDIATE);


diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c

index 6e3fcedc9788395a6623938980b7723fc352a798..d916ed39a8c45f93ca519a1c0dc55d4d460286f2 100644 (file)


--- a/src/backend/postmaster/pgarch.c
+++ b/src/backend/postmaster/pgarch.c
@@ -89,6 +89,8 @@ typedef struct PgArchData
    slock_t     arch_lck;
 } PgArchData;
 
+char *XLogArchiveLibrary = "";
+
 
 /* ----------
  * Local data
@@ -96,6 +98,8 @@ typedef struct PgArchData
  */
 static time_t last_sigterm_time = 0;
 static PgArchData *PgArch = NULL;
+static ArchiveModuleCallbacks ArchiveContext;
+
 
 /*
  * Stuff for tracking multiple files to archive from each scan of
@@ -140,6 +144,8 @@ static void pgarch_archiveDone(char *xlog);
 static void pgarch_die(int code, Datum arg);
 static void HandlePgArchInterrupts(void);
 static int ready_file_comparator(Datum a, Datum b, void *arg);
+static void LoadArchiveLibrary(void);
+static void call_archive_module_shutdown_callback(int code, Datum arg);
 
 /* Report shared memory space needed by PgArchShmemInit */
 Size
@@ -244,7 +250,16 @@ PgArchiverMain(void)
    arch_files->arch_heap = binaryheap_allocate(NUM_FILES_PER_DIRECTORY_SCAN,
                                                ready_file_comparator, NULL);
 
-   pgarch_MainLoop();
+   /* Load the archive_library. */
+   LoadArchiveLibrary();
+
+   PG_ENSURE_ERROR_CLEANUP(call_archive_module_shutdown_callback, 0);
+   {
+       pgarch_MainLoop();
+   }
+   PG_END_ENSURE_ERROR_CLEANUP(call_archive_module_shutdown_callback, 0);
+
+   call_archive_module_shutdown_callback(0, 0);
 
    proc_exit(0);
 }
@@ -407,11 +422,12 @@ pgarch_ArchiverCopyLoop(void)
             */
            HandlePgArchInterrupts();
 
-           /* can't do anything if no command ... */
-           if (!XLogArchiveCommandSet())
+           /* can't do anything if not configured ... */
+           if (ArchiveContext.check_configured_cb != NULL &&
+               !ArchiveContext.check_configured_cb())
            {
                ereport(WARNING,
-                       (errmsg("archive_mode enabled, yet archive_command is not set")));
+                       (errmsg("archive_mode enabled, yet archiving is not configured")));
                return;
            }
 
@@ -492,7 +508,7 @@ pgarch_ArchiverCopyLoop(void)
 /*
  * pgarch_archiveXlog
  *
- * Invokes system(3) to copy one archive file to wherever it should go
+ * Invokes archive_file_cb to copy one archive file to wherever it should go
  *
  * Returns true if successful
  */
@@ -509,7 +525,7 @@ pgarch_archiveXlog(char *xlog)
    snprintf(activitymsg, sizeof(activitymsg), "archiving %s", xlog);
    set_ps_display(activitymsg);
 
-   ret = shell_archive_file(xlog, pathname);
+   ret = ArchiveContext.archive_file_cb(xlog, pathname);
    if (ret)
        snprintf(activitymsg, sizeof(activitymsg), "last was %s", xlog);
    else
@@ -759,13 +775,89 @@ HandlePgArchInterrupts(void)
    if (ProcSignalBarrierPending)
        ProcessProcSignalBarrier();
 
+   /* Perform logging of memory contexts of this process */
+   if (LogMemoryContextPending)
+       ProcessLogMemoryContextInterrupt();
+
    if (ConfigReloadPending)
    {
+       char       *archiveLib = pstrdup(XLogArchiveLibrary);
+       bool        archiveLibChanged;
+
        ConfigReloadPending = false;
        ProcessConfigFile(PGC_SIGHUP);
+
+       archiveLibChanged = strcmp(XLogArchiveLibrary, archiveLib) != 0;
+       pfree(archiveLib);
+
+       if (archiveLibChanged)
+       {
+           /*
+            * Call the currently loaded archive module's shutdown callback, if
+            * one is defined.
+            */
+           call_archive_module_shutdown_callback(0, 0);
+
+           /*
+            * Ideally, we would simply unload the previous archive module and
+            * load the new one, but there is presently no mechanism for
+            * unloading a library (see the comment above
+            * internal_unload_library()).  To deal with this, we simply restart
+            * the archiver.  The new archive module will be loaded when the new
+            * archiver process starts up.
+            */
+           ereport(LOG,
+                   (errmsg("restarting archiver process because value of "
+                           "\"archive_library\" was changed")));
+
+           proc_exit(0);
+       }
    }
+}
 
-   /* Perform logging of memory contexts of this process */
-   if (LogMemoryContextPending)
-       ProcessLogMemoryContextInterrupt();
+/*
+ * LoadArchiveLibrary
+ *
+ * Loads the archiving callbacks into our local ArchiveContext.
+ */
+static void
+LoadArchiveLibrary(void)
+{
+   ArchiveModuleInit archive_init;
+
+   memset(&ArchiveContext, 0, sizeof(ArchiveModuleCallbacks));
+
+   /*
+    * If shell archiving is enabled, use our special initialization
+    * function.  Otherwise, load the library and call its
+    * _PG_archive_module_init().
+    */
+   if (XLogArchiveLibrary[0] == '\0')
+       archive_init = shell_archive_init;
+   else
+       archive_init = (ArchiveModuleInit)
+           load_external_function(XLogArchiveLibrary,
+                                  "_PG_archive_module_init", false, NULL);
+
+   if (archive_init == NULL)
+       ereport(ERROR,
+               (errmsg("archive modules have to declare the _PG_archive_module_init symbol")));
+
+   (*archive_init) (&ArchiveContext);
+
+   if (ArchiveContext.archive_file_cb == NULL)
+       ereport(ERROR,
+               (errmsg("archive modules must register an archive callback")));
+}
+
+/*
+ * call_archive_module_shutdown_callback
+ *
+ * Calls the loaded archive module's shutdown callback, if one is defined.
+ */
+static void
+call_archive_module_shutdown_callback(int code, Datum arg)
+{
+   if (ArchiveContext.shutdown_cb != NULL)
+       ArchiveContext.shutdown_cb();
 }


diff --git a/src/backend/postmaster/shell_archive.c b/src/backend/postmaster/shell_archive.c

index b54e701da4d5b40abb66c09e7183254dc87c5755..19e240c2053bc84caa8bb66127675ed10682fbac 100644 (file)


--- a/src/backend/postmaster/shell_archive.c
+++ b/src/backend/postmaster/shell_archive.c
@@ -2,6 +2,10 @@
  *
  * shell_archive.c
  *
+ * This archiving function uses a user-specified shell command (the
+ * archive_command GUC) to copy write-ahead log files.  It is used as the
+ * default, but other modules may define their own custom archiving logic.
+ *
  * Copyright (c) 2022, PostgreSQL Global Development Group
  *
  * IDENTIFICATION
@@ -17,7 +21,25 @@
 #include "pgstat.h"
 #include "postmaster/pgarch.h"
 
-bool
+static bool shell_archive_configured(void);
+static bool shell_archive_file(const char *file, const char *path);
+
+void
+shell_archive_init(ArchiveModuleCallbacks *cb)
+{
+   AssertVariableIsOfType(&shell_archive_init, ArchiveModuleInit);
+
+   cb->check_configured_cb = shell_archive_configured;
+   cb->archive_file_cb = shell_archive_file;
+}
+
+static bool
+shell_archive_configured(void)
+{
+   return XLogArchiveCommand[0] != '\0';
+}
+
+static bool
 shell_archive_file(const char *file, const char *path)
 {
    char        xlogarchcmd[MAXPGPATH];


diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c

index 0f2570d6264769a57b4c1fbf193a6e8db130cdd7..0868e5a24f648ffe124e8d7f85119ba36aa9c387 100644 (file)


--- a/src/backend/utils/init/miscinit.c
+++ b/src/backend/utils/init/miscinit.c
@@ -38,6 +38,7 @@
 #include "pgstat.h"
 #include "postmaster/autovacuum.h"
 #include "postmaster/interrupt.h"
+#include "postmaster/pgarch.h"
 #include "postmaster/postmaster.h"
 #include "storage/fd.h"
 #include "storage/ipc.h"


diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c

index b3fd42e0f185eca184071fdd5902b654a7b1f302..f505413a7f96f065cea6d1620e36d4f8e6bf5920 100644 (file)


--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -3881,13 +3881,23 @@ static struct config_string ConfigureNamesString[] =
    {
        {"archive_command", PGC_SIGHUP, WAL_ARCHIVING,
            gettext_noop("Sets the shell command that will be called to archive a WAL file."),
-           NULL
+           gettext_noop("This is used only if \"archive_library\" is not set.")
        },
        &XLogArchiveCommand,
        "",
        NULL, NULL, show_archive_command
    },
 
+   {
+       {"archive_library", PGC_SIGHUP, WAL_ARCHIVING,
+           gettext_noop("Sets the library that will be called to archive a WAL file."),
+           gettext_noop("An empty string indicates that \"archive_command\" should be used.")
+       },
+       &XLogArchiveLibrary,
+       "",
+       NULL, NULL, NULL
+   },
+
    {
        {"restore_command", PGC_SIGHUP, WAL_ARCHIVE_RECOVERY,
            gettext_noop("Sets the shell command that will be called to retrieve an archived WAL file."),


diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample

index 817d5f5324671b0f46b9b3d9d050f1995622564a..56d0bee6d9be6756c2b043c5fa230eff12d74ae7 100644 (file)


--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -245,6 +245,9 @@
 
 #archive_mode = off        # enables archiving; off, on, or always
                # (change requires restart)
+#archive_library = ''      # library to use to archive a logfile segment
+               # (empty string indicates archive_command should
+               # be used)
 #archive_command = ''      # command to use to archive a logfile segment
                # placeholders: %p = path of file to archive
                #               %f = file name only


diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h

index 5f934dd65ae3dd3ab9084639ac3089ecfc1e5bbc..a4b1c1286f2b8a825e119ec4b2b330d0d0b2d2f9 100644 (file)


--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -154,7 +154,6 @@ extern PGDLLIMPORT int wal_level;
 /* Is WAL archiving enabled always (even during recovery)? */
 #define XLogArchivingAlways() \
    (AssertMacro(XLogArchiveMode == ARCHIVE_MODE_OFF || wal_level >= WAL_LEVEL_REPLICA), XLogArchiveMode == ARCHIVE_MODE_ALWAYS)
-#define XLogArchiveCommandSet() (XLogArchiveCommand[0] != '\0')
 
 /*
  * Is WAL-logging necessary for archival or log-shipping, or can we skip


diff --git a/src/include/postmaster/pgarch.h b/src/include/postmaster/pgarch.h

index 991a6d061608d16533f40cc18862361f63483be9..9bc7593a2df046914ef0d6cc68e0689d327c1c95 100644 (file)


--- a/src/include/postmaster/pgarch.h
+++ b/src/include/postmaster/pgarch.h
@@ -33,7 +33,41 @@ extern void PgArchiverMain(void) pg_attribute_noreturn();
 extern void PgArchWakeup(void);
 extern void PgArchForceDirScan(void);
 
-/* in shell_archive.c */
-extern bool shell_archive_file(const char *file, const char *path);
+/*
+ * The value of the archive_library GUC.
+ */
+extern char *XLogArchiveLibrary;
+
+/*
+ * Archive module callbacks
+ *
+ * These callback functions should be defined by archive libraries and returned
+ * via _PG_archive_module_init().  ArchiveFileCB is the only required callback.
+ * For more information about the purpose of each callback, refer to the
+ * archive modules documentation.
+ */
+typedef bool (*ArchiveCheckConfiguredCB) (void);
+typedef bool (*ArchiveFileCB) (const char *file, const char *path);
+typedef void (*ArchiveShutdownCB) (void);
+
+typedef struct ArchiveModuleCallbacks
+{
+   ArchiveCheckConfiguredCB check_configured_cb;
+   ArchiveFileCB archive_file_cb;
+   ArchiveShutdownCB shutdown_cb;
+} ArchiveModuleCallbacks;
+
+/*
+ * Type of the shared library symbol _PG_archive_module_init that is looked
+ * up when loading an archive library.
+ */
+typedef void (*ArchiveModuleInit) (ArchiveModuleCallbacks *cb);
+
+/*
+ * Since the logic for archiving via a shell command is in the core server
+ * and does not need to be loaded via a shared library, it has a special
+ * initialization function.
+ */
+extern void shell_archive_init(ArchiveModuleCallbacks *cb);
 
 #endif                         /* _PGARCH_H */
-    linkend="guc-archive-command"/> configuration parameter.  In practice
+    and specify the library to use in the 
+    linkend="guc-archive-library"/> configuration parameter.  In practice
     these settings will always be placed in the
     postgresql.conf file.
+    One simple way to archive is to set archive_library to
+    an empty string and to specify a shell command in
+    .
     In archive_command,
     %p is replaced by the path name of the file to
     archive, while %f is replaced by only the file name.
@@ -631,7 +634,17 @@ test ! -f /mnt/server/archivedir/00000001000000A900000065 && cp pg_wal/0
    
 
    
-    The archive command will be executed under the ownership of the same
+    Another way to archive is to use a custom archive module as the
+    archive_library.  Since such modules are written in
+    C, creating your own may require considerably more effort
+    than writing a shell command.  However, archive modules can be more
+    performant than archiving via shell, and they will have access to many
+    useful server resources.  For more information about archive modules, see
+    .
+   
+
+   
+    The archive library will be executed under the ownership of the same
     user that the PostgreSQL server is running as.  Since
     the series of WAL files being archived contains effectively everything
     in your database, you will want to be sure that the archived data is
@@ -640,25 +653,31 @@ test ! -f /mnt/server/archivedir/00000001000000A900000065 && cp pg_wal/0
    
 
    
-    It is important that the archive command return zero exit status if and
-    only if it succeeds.  Upon getting a zero result,
+    It is important that the archive function return true if
+    and only if it succeeds.  If true is returned,
     PostgreSQL will assume that the file has been
-    successfully archived, and will remove or recycle it.  However, a nonzero
-    status tells PostgreSQL that the file was not archived;
-    it will try again periodically until it succeeds.
+    successfully archived, and will remove or recycle it.  However, a return
+    value of false tells
+    PostgreSQL that the file was not archived; it
+    will try again periodically until it succeeds.  If you are archiving via a
+    shell command, the appropriate return values can be achieved by returning
+    0 if the command succeeds and a nonzero value if it
+    fails.
    
 
    
-    When the archive command is terminated by a signal (other than
-    SIGTERM that is used as part of a server
-    shutdown) or an error by the shell with an exit status greater than
-    125 (such as command not found), the archiver process aborts and gets
-    restarted by the postmaster. In such cases, the failure is
-    not reported in .
+    If the archive function emits an ERROR or
+    FATAL, the archiver process aborts and gets restarted by
+    the postmaster.  If you are archiving via shell command, FATAL is emitted if
+    the command is terminated by a signal (other than
+    SIGTERM that is used as part of a server shutdown)
+    or an error by the shell with an exit status greater than 125 (such as
+    command not found).  In such cases, the failure is not reported in
+    .
    
 
    
-    The archive command should generally be designed to refuse to overwrite
+    The archive library should generally be designed to refuse to overwrite
     any pre-existing archive file.  This is an important safety feature to
     preserve the integrity of your archive in case of administrator error
     (such as sending the output of two different servers to the same archive
@@ -666,9 +685,9 @@ test ! -f /mnt/server/archivedir/00000001000000A900000065 && cp pg_wal/0
    
 
    
-    It is advisable to test your proposed archive command to ensure that it
+    It is advisable to test your proposed archive library to ensure that it
     indeed does not overwrite an existing file, and that it returns
-    nonzero status in this case.
+    false in this case.
     The example command above for Unix ensures this by including a separate
     test step.  On some Unix platforms, cp has
     switches such as  that can be used to do the same thing
@@ -680,7 +699,7 @@ test ! -f /mnt/server/archivedir/00000001000000A900000065 && cp pg_wal/0
 
    
     While designing your archiving setup, consider what will happen if
-    the archive command fails repeatedly because some aspect requires
+    the archive library fails repeatedly because some aspect requires
     operator intervention or the archive runs out of space. For example, this
     could occur if you write to tape without an autochanger; when the tape
     fills, nothing further can be archived until the tape is swapped.
@@ -695,7 +714,7 @@ test ! -f /mnt/server/archivedir/00000001000000A900000065 && cp pg_wal/0
    
 
    
-    The speed of the archiving command is unimportant as long as it can keep up
+    The speed of the archive library is unimportant as long as it can keep up
     with the average rate at which your server generates WAL data.  Normal
     operation continues even if the archiving process falls a little behind.
     If archiving falls significantly behind, this will increase the amount of
@@ -707,11 +726,11 @@ test ! -f /mnt/server/archivedir/00000001000000A900000065 && cp pg_wal/0
    
 
    
-    In writing your archive command, you should assume that the file names to
+    In writing your archive library, you should assume that the file names to
     be archived can be up to 64 characters long and can contain any
     combination of ASCII letters, digits, and dots.  It is not necessary to
-    preserve the original relative path (%p) but it is necessary to
-    preserve the file name (%f).
+    preserve the original relative path but it is necessary to preserve the file
+    name.
    
 
    
@@ -728,7 +747,7 @@ test ! -f /mnt/server/archivedir/00000001000000A900000065 && cp pg_wal/0
    
 
    
-    The archive command is only invoked on completed WAL segments.  Hence,
+    The archive function is only invoked on completed WAL segments.  Hence,
     if your server generates only little WAL traffic (or has slack periods
     where it does so), there could be a long delay between the completion
     of a transaction and its safe recording in archive storage.  To put
@@ -757,8 +776,9 @@ test ! -f /mnt/server/archivedir/00000001000000A900000065 && cp pg_wal/0
     turned on during execution of one of these statements, WAL would not
     contain enough information for archive recovery.  (Crash recovery is
     unaffected.)  For this reason, wal_level can only be changed at
-    server start.  However, archive_command can be changed with a
-    configuration file reload.  If you wish to temporarily stop archiving,
+    server start.  However, archive_library can be changed with a
+    configuration file reload.  If you are archiving via shell and wish to
+    temporarily stop archiving,
     one way to do it is to set archive_command to the empty
     string ('').
     This will cause WAL files to accumulate in pg_wal/ until a
@@ -938,11 +958,11 @@ SELECT * FROM pg_stop_backup(false, true);
      On a standby, archive_mode must be always in order
      for pg_stop_backup to wait.
      Archiving of these files happens automatically since you have
-     already configured archive_command. In most cases this
+     already configured archive_library. In most cases this
      happens quickly, but you are advised to monitor your archive
      system to ensure there are no delays.
      If the archive process has fallen behind
-     because of failures of the archive command, it will keep retrying
+     because of failures of the archive library, it will keep retrying
      until the archive succeeds and the backup is complete.
      If you wish to place a time limit on the execution of
      pg_stop_backup, set an appropriate
@@ -1500,9 +1520,10 @@ restore_command = 'cp /mnt/server/archivedir/%f %p'
       To prepare for low level standalone hot backups, make sure
       wal_level is set to
       replica or higher, archive_mode to
-      on, and set up an archive_command that performs
+      on, and set up an archive_library that performs
       archiving only when a switch file exists.  For example:
 
+archive_library = ''  # use shell command
 archive_command = 'test ! -f /var/lib/pgsql/backup_in_progress || (test ! -f /var/lib/pgsql/archive/%f && cp %p /var/lib/pgsql/archive/%f)'
 
       This command will perform archiving when
+    linkend="guc-archive-library"/> configuration parameter.  In practice
      these settings will always be placed in the
      postgresql.conf file.
+    One simple way to archive is to set archive_library to
+    an empty string and to specify a shell command in
+    .
      In archive_command,
      %p is replaced by the path name of the file to
      archive, while %f is replaced by only the file name.
@@ -631,7 +634,17 @@ test ! -f /mnt/server/archivedir/00000001000000A900000065 && cp pg_wal/0
     
  
     
-    The archive command will be executed under the ownership of the same
+    Another way to archive is to use a custom archive module as the
+    archive_library.  Since such modules are written in
+    C, creating your own may require considerably more effort
+    than writing a shell command.  However, archive modules can be more
+    performant than archiving via shell, and they will have access to many
+    useful server resources.  For more information about archive modules, see
+    .
+   
+
+   
+    The archive library will be executed under the ownership of the same
      user that the PostgreSQL server is running as.  Since
      the series of WAL files being archived contains effectively everything
      in your database, you will want to be sure that the archived data is
@@ -640,25 +653,31 @@ test ! -f /mnt/server/archivedir/00000001000000A900000065 && cp pg_wal/0
     
  
     
-    It is important that the archive command return zero exit status if and
-    only if it succeeds.  Upon getting a zero result,
+    It is important that the archive function return true if
+    and only if it succeeds.  If true is returned,
      PostgreSQL will assume that the file has been
-    successfully archived, and will remove or recycle it.  However, a nonzero
-    status tells PostgreSQL that the file was not archived;
-    it will try again periodically until it succeeds.
+    successfully archived, and will remove or recycle it.  However, a return
+    value of false tells
+    PostgreSQL that the file was not archived; it
+    will try again periodically until it succeeds.  If you are archiving via a
+    shell command, the appropriate return values can be achieved by returning
+    0 if the command succeeds and a nonzero value if it
+    fails.
     
  
     
-    When the archive command is terminated by a signal (other than
-    SIGTERM that is used as part of a server
-    shutdown) or an error by the shell with an exit status greater than
-    125 (such as command not found), the archiver process aborts and gets
-    restarted by the postmaster. In such cases, the failure is
-    not reported in .
+    If the archive function emits an ERROR or
+    FATAL, the archiver process aborts and gets restarted by
+    the postmaster.  If you are archiving via shell command, FATAL is emitted if
+    the command is terminated by a signal (other than
+    SIGTERM that is used as part of a server shutdown)
+    or an error by the shell with an exit status greater than 125 (such as
+    command not found).  In such cases, the failure is not reported in
+    .
     
  
     
-    The archive command should generally be designed to refuse to overwrite
+    The archive library should generally be designed to refuse to overwrite
      any pre-existing archive file.  This is an important safety feature to
      preserve the integrity of your archive in case of administrator error
      (such as sending the output of two different servers to the same archive
@@ -666,9 +685,9 @@ test ! -f /mnt/server/archivedir/00000001000000A900000065 && cp pg_wal/0
     
  
     
-    It is advisable to test your proposed archive command to ensure that it
+    It is advisable to test your proposed archive library to ensure that it
      indeed does not overwrite an existing file, and that it returns
-    nonzero status in this case.
+    false in this case.
      The example command above for Unix ensures this by including a separate
      test step.  On some Unix platforms, cp has
      switches such as  that can be used to do the same thing
@@ -680,7 +699,7 @@ test ! -f /mnt/server/archivedir/00000001000000A900000065 && cp pg_wal/0
  
     
      While designing your archiving setup, consider what will happen if
-    the archive command fails repeatedly because some aspect requires
+    the archive library fails repeatedly because some aspect requires
      operator intervention or the archive runs out of space. For example, this
      could occur if you write to tape without an autochanger; when the tape
      fills, nothing further can be archived until the tape is swapped.
@@ -695,7 +714,7 @@ test ! -f /mnt/server/archivedir/00000001000000A900000065 && cp pg_wal/0
     
  
     
-    The speed of the archiving command is unimportant as long as it can keep up
+    The speed of the archive library is unimportant as long as it can keep up
      with the average rate at which your server generates WAL data.  Normal
      operation continues even if the archiving process falls a little behind.
      If archiving falls significantly behind, this will increase the amount of
@@ -707,11 +726,11 @@ test ! -f /mnt/server/archivedir/00000001000000A900000065 && cp pg_wal/0
     
  
     
-    In writing your archive command, you should assume that the file names to
+    In writing your archive library, you should assume that the file names to
      be archived can be up to 64 characters long and can contain any
      combination of ASCII letters, digits, and dots.  It is not necessary to
-    preserve the original relative path (%p) but it is necessary to
-    preserve the file name (%f).
+    preserve the original relative path but it is necessary to preserve the file
+    name.
     
  
     
@@ -728,7 +747,7 @@ test ! -f /mnt/server/archivedir/00000001000000A900000065 && cp pg_wal/0
     
  
     
-    The archive command is only invoked on completed WAL segments.  Hence,
+    The archive function is only invoked on completed WAL segments.  Hence,
      if your server generates only little WAL traffic (or has slack periods
      where it does so), there could be a long delay between the completion
      of a transaction and its safe recording in archive storage.  To put
@@ -757,8 +776,9 @@ test ! -f /mnt/server/archivedir/00000001000000A900000065 && cp pg_wal/0
      turned on during execution of one of these statements, WAL would not
      contain enough information for archive recovery.  (Crash recovery is
      unaffected.)  For this reason, wal_level can only be changed at
-    server start.  However, archive_command can be changed with a
-    configuration file reload.  If you wish to temporarily stop archiving,
+    server start.  However, archive_library can be changed with a
+    configuration file reload.  If you are archiving via shell and wish to
+    temporarily stop archiving,
      one way to do it is to set archive_command to the empty
      string ('').
      This will cause WAL files to accumulate in pg_wal/ until a
@@ -938,11 +958,11 @@ SELECT * FROM pg_stop_backup(false, true);
       On a standby, archive_mode must be always in order
       for pg_stop_backup to wait.
       Archiving of these files happens automatically since you have
-     already configured archive_command. In most cases this
+     already configured archive_library. In most cases this
       happens quickly, but you are advised to monitor your archive
       system to ensure there are no delays.
       If the archive process has fallen behind
-     because of failures of the archive command, it will keep retrying
+     because of failures of the archive library, it will keep retrying
       until the archive succeeds and the backup is complete.
       If you wish to place a time limit on the execution of
       pg_stop_backup, set an appropriate
@@ -1500,9 +1520,10 @@ restore_command = 'cp /mnt/server/archivedir/%f %p'
        To prepare for low level standalone hot backups, make sure
        wal_level is set to
        replica or higher, archive_mode to
-      on, and set up an archive_command that performs
+      on, and set up an archive_library that performs
        archiving only when a switch file exists.  For example:
  
+archive_library = ''  # use shell command
  archive_command = 'test ! -f /var/lib/pgsql/backup_in_progress || (test ! -f /var/lib/pgsql/archive/%f && cp %p /var/lib/pgsql/archive/%f)'
  
        This command will perform archiving when
diff --git a/doc/src/sgml/basic-archive.sgml b/doc/src/sgml/basic-archive.sgml

new file mode 100644 (file)

index 0000000..0b650f1
--- /dev/null
+++ b/doc/src/sgml/basic-archive.sgml
@@ -0,0 +1,81 @@
+
+
+
+ basic_archive
+
+ 
+  basic_archive
+ 
+
+ 
+  basic_archive is an example of an archive module.  This
+  module copies completed WAL segment files to the specified directory.  This
+  may not be especially useful, but it can serve as a starting point for
+  developing your own archive module.  For more information about archive
+  modules, see .
+ 
+
+ 
+  In order to function, this module must be loaded via
+  , and 
+  must be enabled.
+ 
+
+ 
+  Configuration Parameters
+
+  
+   
+    
+     basic_archive.archive_directory (string)
+     
+      basic_archive.archive_directory configuration parameter
+     
+    
+    
+     
+      The directory where the server should copy WAL segment files.  This
+      directory must already exist.  The default is an empty string, which
+      effectively halts WAL archiving, but if 
+      is enabled, the server will accumulate WAL segment files in the
+      expectation that a value will soon be provided.
+     
+    
+   
+  
+
+  
+   These parameters must be set in postgresql.conf.
+   Typical usage might be:
+  
+
+
+# postgresql.conf
+archive_mode = 'on'
+archive_library = 'basic_archive'
+basic_archive.archive_directory = '/path/to/archive/directory'
+
+ 
+
+ 
+  Notes
+
+  
+   Server crashes may leave temporary files with the prefix
+   archtemp in the archive directory.  It is recommended to
+   delete such files before restarting the server after a crash.  It is safe to
+   remove such files while the server is running as long as they are unrelated
+   to any archiving still in progress, but users should use extra caution when
+   doing so.
+  
+ 
+
+ 
+  Author
+
+  
+   Nathan Bossart
+  
+ 
+
+
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml

index 692d8a2a1782ebc18183f9636b7041a89b2652da..fc63172efde3070579f995bad4cd7046da26cea9 100644 (file)
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -3479,7 +3479,7 @@ include_dir 'conf.d'
          Maximum size to let the WAL grow during automatic
          checkpoints. This is a soft limit; WAL size can exceed
          max_wal_size under special circumstances, such as
-        heavy load, a failing archive_command, or a high
+        heavy load, a failing archive_library, or a high
          wal_keep_size setting.
          If this value is specified without units, it is taken as megabytes.
          The default is 1 GB.
@@ -3528,7 +3528,7 @@ include_dir 'conf.d'
         
          When archive_mode is enabled, completed WAL segments
          are sent to archive storage by setting
-        command"/>. In addition to off,
+        library"/>. In addition to off,
          to disable, there are two modes: on, and
          always. During normal operation, there is no
          difference between the two modes, but when set to always
@@ -3538,9 +3538,6 @@ include_dir 'conf.d'
           for details.
         
         
-        archive_mode and archive_command are
-        separate variables so that archive_command can be
-        changed without leaving archiving mode.
          This parameter can only be set at server start.
          archive_mode cannot be enabled when
          wal_level is set to minimal.
@@ -3548,6 +3545,28 @@ include_dir 'conf.d'
        
       
  
+     
+      archive_library (string)
+      
+       archive_library configuration parameter
+      
+      
+      
+       
+        The library to use for archiving completed WAL file segments.  If set to
+        an empty string (the default), archiving via shell is enabled, and
+         is used.  Otherwise, the specified
+        shared library is used for archiving.  For more information, see
+         and
+        .
+       
+       
+        This parameter can only be set in the
+        postgresql.conf file or on the server command line.
+       
+      
+     
+
       
        archive_command (string)
        
@@ -3570,9 +3589,11 @@ include_dir 'conf.d'
         
          This parameter can only be set in the postgresql.conf
          file or on the server command line.  It is ignored unless
-        archive_mode was enabled at server start.
+        archive_mode was enabled at server start and
+        archive_library specifies to archive via shell command.
          If archive_command is an empty string (the default) while
-        archive_mode is enabled, WAL archiving is temporarily
+        archive_mode is enabled and archive_library
+        specifies archiving via shell, WAL archiving is temporarily
          disabled, but the server continues to accumulate WAL segment files in
          the expectation that a command will soon be provided.  Setting
          archive_command to a command that does nothing but
@@ -3592,7 +3613,7 @@ include_dir 'conf.d'
        
        
         
-        The command"/> is only invoked for
+        The library"/> is only invoked for
          completed WAL segments. Hence, if your server generates little WAL
          traffic (or has slack periods where it does so), there could be a
          long delay between the completion of a transaction and its safe
diff --git a/doc/src/sgml/contrib.sgml b/doc/src/sgml/contrib.sgml

index d3ca4b6932007b3f32f35ee4c87b13a40ab64fd4..be9711c6f2ce785573346866f100571824f8c80b 100644 (file)
--- a/doc/src/sgml/contrib.sgml
+++ b/doc/src/sgml/contrib.sgml
@@ -99,6 +99,7 @@ CREATE EXTENSION module_name;
   &amcheck;
   &auth-delay;
   &auto-explain;
+ &basic-archive;
   &bloom;
   &btree-gin;
   &btree-gist;
diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml

index 89454e99b981d9079beedafedd18b588a583e6f7..328cd1f378c786d0ce6ddd50867dd08f535b5856 100644 (file)
--- a/doc/src/sgml/filelist.sgml
+++ b/doc/src/sgml/filelist.sgml
@@ -99,6 +99,7 @@
  
  
  
+
  
  
  
@@ -112,6 +113,7 @@
  
  
  
+
  
  
  
diff --git a/doc/src/sgml/high-availability.sgml b/doc/src/sgml/high-availability.sgml

index a265409f025140f5b8945aa0bf954c9b3a2cb94c..437712762ae8f0e72634e76774287191bd9210fa 100644 (file)
--- a/doc/src/sgml/high-availability.sgml
+++ b/doc/src/sgml/high-availability.sgml
@@ -935,7 +935,7 @@ primary_conninfo = 'host=192.168.1.50 port=5432 user=foo password=foopass'
      In lieu of using replication slots, it is possible to prevent the removal
      of old WAL segments using , or by
      storing the segments in an archive using
-    command"/>.
+    library"/>.
      However, these methods often result in retaining more WAL segments than
      required, whereas replication slots retain only the number of segments
      known to be needed.  On the other hand, replication slots can retain so
@@ -1386,10 +1386,10 @@ synchronous_standby_names = 'ANY 2 (s1, s2, s3)'
       to always, and the standby will call the archive
       command for every WAL segment it receives, whether it's by restoring
       from the archive or by streaming replication. The shared archive can
-     be handled similarly, but the archive_command must
+     be handled similarly, but the archive_library must
       test if the file being archived exists already, and if the existing file
       has identical contents. This requires more care in the
-     archive_command, as it must
+     archive_library, as it must
       be careful to not overwrite an existing file with different contents,
       but return success if the exactly same file is archived twice. And
       all that must be done free of race conditions, if two servers attempt
diff --git a/doc/src/sgml/postgres.sgml b/doc/src/sgml/postgres.sgml

index dba9cf413f901627716cc40c4795ad38da6fbc5d..3db6d2160b1088635413fd634035d21a8c1698d5 100644 (file)
--- a/doc/src/sgml/postgres.sgml
+++ b/doc/src/sgml/postgres.sgml
@@ -233,6 +233,7 @@ break is not needed in a wider output rendering.
    &bgworker;
    &logicaldecoding;
    &replication-origins;
+  &archive-modules;
  
   
  
diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml

index 1546f10c0d9e6e9f993352225d75cd25204648d3..e7ae29ec3d36d8c0ff5b1de22f8fcf0bdfbd3021 100644 (file)
--- a/doc/src/sgml/ref/pg_basebackup.sgml
+++ b/doc/src/sgml/ref/pg_basebackup.sgml
@@ -102,8 +102,8 @@ PostgreSQL documentation
       
        All WAL records required for the backup must contain sufficient full-page writes,
        which requires you to enable full_page_writes on the primary and
-      not to use a tool like pg_compresslog as
-      archive_command to remove full-page writes from WAL files.
+      not to use a tool in your archive_library to remove
+      full-page writes from WAL files.
       
      
     
diff --git a/doc/src/sgml/ref/pg_receivewal.sgml b/doc/src/sgml/ref/pg_receivewal.sgml

index b2e41ea814c72b06bd289b364a5e5bcc430882b6..b846213fb7bfc5e72d1cb676ea2427b1176f8618 100644 (file)
--- a/doc/src/sgml/ref/pg_receivewal.sgml
+++ b/doc/src/sgml/ref/pg_receivewal.sgml
@@ -40,7 +40,7 @@ PostgreSQL documentation
    
     pg_receivewal streams the write-ahead
     log in real time as it's being generated on the server, and does not wait
-   for segments to complete like command"/> does.
+   for segments to complete like library"/> does.
     For this reason, it is not necessary to set
      when using
      pg_receivewal.
@@ -487,11 +487,11 @@ PostgreSQL documentation
  
    
     When using pg_receivewal instead of
-   command"/> as the main WAL backup method, it is
+   library"/> as the main WAL backup method, it is
     strongly recommended to use replication slots.  Otherwise, the server is
     free to recycle or remove write-ahead log files before they are backed up,
     because it does not have any information, either
-   from command"/> or the replication slots, about
+   from library"/> or the replication slots, about
     how far the WAL stream has been archived.  Note, however, that a
     replication slot will fill up the server's disk space if the receiver does
     not keep up with fetching the WAL data.
diff --git a/doc/src/sgml/wal.sgml b/doc/src/sgml/wal.sgml

index 24e1c89503cbc12ef9aba4a92fb0f1152af01431..2bb27a846828aaabcad660d752c1732f955fe9bf 100644 (file)
--- a/doc/src/sgml/wal.sgml
+++ b/doc/src/sgml/wal.sgml
@@ -636,7 +636,7 @@
     WAL files plus one additional WAL file are
     kept at all times. Also, if WAL archiving is used, old segments cannot be
     removed or recycled until they are archived. If WAL archiving cannot keep up
-   with the pace that WAL is generated, or if archive_command
+   with the pace that WAL is generated, or if archive_library
     fails repeatedly, old WAL files will accumulate in pg_wal
     until the situation is resolved. A slow or failed standby server that
     uses a replication slot will have the same effect (see
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c

index dfe2a0bcce984b6be4e05ddda493c20d1fd1b4e9..958220c495bf0a572bc6f5f38ea95522c3d6a70b 100644 (file)
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -8831,7 +8831,7 @@ ShutdownXLOG(int code, Datum arg)
          * process one more time at the end of shutdown). The checkpoint
          * record will go to the next XLOG file and won't be archived (yet).
          */
-       if (XLogArchivingActive() && XLogArchiveCommandSet())
+       if (XLogArchivingActive())
             RequestXLogSwitch(false);
  
         CreateCheckPoint(CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_IMMEDIATE);
diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c

index 6e3fcedc9788395a6623938980b7723fc352a798..d916ed39a8c45f93ca519a1c0dc55d4d460286f2 100644 (file)
--- a/src/backend/postmaster/pgarch.c
+++ b/src/backend/postmaster/pgarch.c
@@ -89,6 +89,8 @@ typedef struct PgArchData
     slock_t     arch_lck;
  } PgArchData;
  
+char *XLogArchiveLibrary = "";
+
  
  /* ----------
   * Local data
@@ -96,6 +98,8 @@ typedef struct PgArchData
   */
  static time_t last_sigterm_time = 0;
  static PgArchData *PgArch = NULL;
+static ArchiveModuleCallbacks ArchiveContext;
+
  
  /*
   * Stuff for tracking multiple files to archive from each scan of
@@ -140,6 +144,8 @@ static void pgarch_archiveDone(char *xlog);
  static void pgarch_die(int code, Datum arg);
  static void HandlePgArchInterrupts(void);
  static int ready_file_comparator(Datum a, Datum b, void *arg);
+static void LoadArchiveLibrary(void);
+static void call_archive_module_shutdown_callback(int code, Datum arg);
  
  /* Report shared memory space needed by PgArchShmemInit */
  Size
@@ -244,7 +250,16 @@ PgArchiverMain(void)
     arch_files->arch_heap = binaryheap_allocate(NUM_FILES_PER_DIRECTORY_SCAN,
                                                 ready_file_comparator, NULL);
  
-   pgarch_MainLoop();
+   /* Load the archive_library. */
+   LoadArchiveLibrary();
+
+   PG_ENSURE_ERROR_CLEANUP(call_archive_module_shutdown_callback, 0);
+   {
+       pgarch_MainLoop();
+   }
+   PG_END_ENSURE_ERROR_CLEANUP(call_archive_module_shutdown_callback, 0);
+
+   call_archive_module_shutdown_callback(0, 0);
  
     proc_exit(0);
  }
@@ -407,11 +422,12 @@ pgarch_ArchiverCopyLoop(void)
              */
             HandlePgArchInterrupts();
  
-           /* can't do anything if no command ... */
-           if (!XLogArchiveCommandSet())
+           /* can't do anything if not configured ... */
+           if (ArchiveContext.check_configured_cb != NULL &&
+               !ArchiveContext.check_configured_cb())
             {
                 ereport(WARNING,
-                       (errmsg("archive_mode enabled, yet archive_command is not set")));
+                       (errmsg("archive_mode enabled, yet archiving is not configured")));
                 return;
             }
  
@@ -492,7 +508,7 @@ pgarch_ArchiverCopyLoop(void)
  /*
   * pgarch_archiveXlog
   *
- * Invokes system(3) to copy one archive file to wherever it should go
+ * Invokes archive_file_cb to copy one archive file to wherever it should go
   *
   * Returns true if successful
   */
@@ -509,7 +525,7 @@ pgarch_archiveXlog(char *xlog)
     snprintf(activitymsg, sizeof(activitymsg), "archiving %s", xlog);
     set_ps_display(activitymsg);
  
-   ret = shell_archive_file(xlog, pathname);
+   ret = ArchiveContext.archive_file_cb(xlog, pathname);
     if (ret)
         snprintf(activitymsg, sizeof(activitymsg), "last was %s", xlog);
     else
@@ -759,13 +775,89 @@ HandlePgArchInterrupts(void)
     if (ProcSignalBarrierPending)
         ProcessProcSignalBarrier();
  
+   /* Perform logging of memory contexts of this process */
+   if (LogMemoryContextPending)
+       ProcessLogMemoryContextInterrupt();
+
     if (ConfigReloadPending)
     {
+       char       *archiveLib = pstrdup(XLogArchiveLibrary);
+       bool        archiveLibChanged;
+
         ConfigReloadPending = false;
         ProcessConfigFile(PGC_SIGHUP);
+
+       archiveLibChanged = strcmp(XLogArchiveLibrary, archiveLib) != 0;
+       pfree(archiveLib);
+
+       if (archiveLibChanged)
+       {
+           /*
+            * Call the currently loaded archive module's shutdown callback, if
+            * one is defined.
+            */
+           call_archive_module_shutdown_callback(0, 0);
+
+           /*
+            * Ideally, we would simply unload the previous archive module and
+            * load the new one, but there is presently no mechanism for
+            * unloading a library (see the comment above
+            * internal_unload_library()).  To deal with this, we simply restart
+            * the archiver.  The new archive module will be loaded when the new
+            * archiver process starts up.
+            */
+           ereport(LOG,
+                   (errmsg("restarting archiver process because value of "
+                           "\"archive_library\" was changed")));
+
+           proc_exit(0);
+       }
     }
+}
  
-   /* Perform logging of memory contexts of this process */
-   if (LogMemoryContextPending)
-       ProcessLogMemoryContextInterrupt();
+/*
+ * LoadArchiveLibrary
+ *
+ * Loads the archiving callbacks into our local ArchiveContext.
+ */
+static void
+LoadArchiveLibrary(void)
+{
+   ArchiveModuleInit archive_init;
+
+   memset(&ArchiveContext, 0, sizeof(ArchiveModuleCallbacks));
+
+   /*
+    * If shell archiving is enabled, use our special initialization
+    * function.  Otherwise, load the library and call its
+    * _PG_archive_module_init().
+    */
+   if (XLogArchiveLibrary[0] == '\0')
+       archive_init = shell_archive_init;
+   else
+       archive_init = (ArchiveModuleInit)
+           load_external_function(XLogArchiveLibrary,
+                                  "_PG_archive_module_init", false, NULL);
+
+   if (archive_init == NULL)
+       ereport(ERROR,
+               (errmsg("archive modules have to declare the _PG_archive_module_init symbol")));
+
+   (*archive_init) (&ArchiveContext);
+
+   if (ArchiveContext.archive_file_cb == NULL)
+       ereport(ERROR,
+               (errmsg("archive modules must register an archive callback")));
+}
+
+/*
+ * call_archive_module_shutdown_callback
+ *
+ * Calls the loaded archive module's shutdown callback, if one is defined.
+ */
+static void
+call_archive_module_shutdown_callback(int code, Datum arg)
+{
+   if (ArchiveContext.shutdown_cb != NULL)
+       ArchiveContext.shutdown_cb();
  }
diff --git a/src/backend/postmaster/shell_archive.c b/src/backend/postmaster/shell_archive.c

index b54e701da4d5b40abb66c09e7183254dc87c5755..19e240c2053bc84caa8bb66127675ed10682fbac 100644 (file)
--- a/src/backend/postmaster/shell_archive.c
+++ b/src/backend/postmaster/shell_archive.c
@@ -2,6 +2,10 @@
   *
   * shell_archive.c
   *
+ * This archiving function uses a user-specified shell command (the
+ * archive_command GUC) to copy write-ahead log files.  It is used as the
+ * default, but other modules may define their own custom archiving logic.
+ *
   * Copyright (c) 2022, PostgreSQL Global Development Group
   *
   * IDENTIFICATION
@@ -17,7 +21,25 @@
  #include "pgstat.h"
  #include "postmaster/pgarch.h"
  
-bool
+static bool shell_archive_configured(void);
+static bool shell_archive_file(const char *file, const char *path);
+
+void
+shell_archive_init(ArchiveModuleCallbacks *cb)
+{
+   AssertVariableIsOfType(&shell_archive_init, ArchiveModuleInit);
+
+   cb->check_configured_cb = shell_archive_configured;
+   cb->archive_file_cb = shell_archive_file;
+}
+
+static bool
+shell_archive_configured(void)
+{
+   return XLogArchiveCommand[0] != '\0';
+}
+
+static bool
  shell_archive_file(const char *file, const char *path)
  {
     char        xlogarchcmd[MAXPGPATH];
diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c

index 0f2570d6264769a57b4c1fbf193a6e8db130cdd7..0868e5a24f648ffe124e8d7f85119ba36aa9c387 100644 (file)
--- a/src/backend/utils/init/miscinit.c
+++ b/src/backend/utils/init/miscinit.c
@@ -38,6 +38,7 @@
  #include "pgstat.h"
  #include "postmaster/autovacuum.h"
  #include "postmaster/interrupt.h"
+#include "postmaster/pgarch.h"
  #include "postmaster/postmaster.h"
  #include "storage/fd.h"
  #include "storage/ipc.h"
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c

index b3fd42e0f185eca184071fdd5902b654a7b1f302..f505413a7f96f065cea6d1620e36d4f8e6bf5920 100644 (file)
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -3881,13 +3881,23 @@ static struct config_string ConfigureNamesString[] =
     {
         {"archive_command", PGC_SIGHUP, WAL_ARCHIVING,
             gettext_noop("Sets the shell command that will be called to archive a WAL file."),
-           NULL
+           gettext_noop("This is used only if \"archive_library\" is not set.")
         },
         &XLogArchiveCommand,
         "",
         NULL, NULL, show_archive_command
     },
  
+   {
+       {"archive_library", PGC_SIGHUP, WAL_ARCHIVING,
+           gettext_noop("Sets the library that will be called to archive a WAL file."),
+           gettext_noop("An empty string indicates that \"archive_command\" should be used.")
+       },
+       &XLogArchiveLibrary,
+       "",
+       NULL, NULL, NULL
+   },
+
     {
         {"restore_command", PGC_SIGHUP, WAL_ARCHIVE_RECOVERY,
             gettext_noop("Sets the shell command that will be called to retrieve an archived WAL file."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample

index 817d5f5324671b0f46b9b3d9d050f1995622564a..56d0bee6d9be6756c2b043c5fa230eff12d74ae7 100644 (file)
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -245,6 +245,9 @@
  
  #archive_mode = off        # enables archiving; off, on, or always
                 # (change requires restart)
+#archive_library = ''      # library to use to archive a logfile segment
+               # (empty string indicates archive_command should
+               # be used)
  #archive_command = ''      # command to use to archive a logfile segment
                 # placeholders: %p = path of file to archive
                 #               %f = file name only
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h

index 5f934dd65ae3dd3ab9084639ac3089ecfc1e5bbc..a4b1c1286f2b8a825e119ec4b2b330d0d0b2d2f9 100644 (file)
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -154,7 +154,6 @@ extern PGDLLIMPORT int wal_level;
  /* Is WAL archiving enabled always (even during recovery)? */
  #define XLogArchivingAlways() \
     (AssertMacro(XLogArchiveMode == ARCHIVE_MODE_OFF || wal_level >= WAL_LEVEL_REPLICA), XLogArchiveMode == ARCHIVE_MODE_ALWAYS)
-#define XLogArchiveCommandSet() (XLogArchiveCommand[0] != '\0')
  
  /*
   * Is WAL-logging necessary for archival or log-shipping, or can we skip
diff --git a/src/include/postmaster/pgarch.h b/src/include/postmaster/pgarch.h

index 991a6d061608d16533f40cc18862361f63483be9..9bc7593a2df046914ef0d6cc68e0689d327c1c95 100644 (file)
--- a/src/include/postmaster/pgarch.h
+++ b/src/include/postmaster/pgarch.h
@@ -33,7 +33,41 @@ extern void PgArchiverMain(void) pg_attribute_noreturn();
  extern void PgArchWakeup(void);
  extern void PgArchForceDirScan(void);
  
-/* in shell_archive.c */
-extern bool shell_archive_file(const char *file, const char *path);
+/*
+ * The value of the archive_library GUC.
+ */
+extern char *XLogArchiveLibrary;
+
+/*
+ * Archive module callbacks
+ *
+ * These callback functions should be defined by archive libraries and returned
+ * via _PG_archive_module_init().  ArchiveFileCB is the only required callback.
+ * For more information about the purpose of each callback, refer to the
+ * archive modules documentation.
+ */
+typedef bool (*ArchiveCheckConfiguredCB) (void);
+typedef bool (*ArchiveFileCB) (const char *file, const char *path);
+typedef void (*ArchiveShutdownCB) (void);
+
+typedef struct ArchiveModuleCallbacks
+{
+   ArchiveCheckConfiguredCB check_configured_cb;
+   ArchiveFileCB archive_file_cb;
+   ArchiveShutdownCB shutdown_cb;
+} ArchiveModuleCallbacks;
+
+/*
+ * Type of the shared library symbol _PG_archive_module_init that is looked
+ * up when loading an archive library.
+ */
+typedef void (*ArchiveModuleInit) (ArchiveModuleCallbacks *cb);
+
+/*
+ * Since the logic for archiving via a shell command is in the core server
+ * and does not need to be loaded via a shared library, it has a special
+ * initialization function.
+ */
+extern void shell_archive_init(ArchiveModuleCallbacks *cb);
  
  #endif                         /* _PGARCH_H */
author	Robert Haas
	Thu, 3 Feb 2022 18:57:27 +0000 (13:57 -0500)
committer	Robert Haas
	Thu, 3 Feb 2022 19:05:02 +0000 (14:05 -0500)
contrib/Makefile		patch \| blob \| blame \| history
contrib/basic_archive/.gitignore	[new file with mode: 0644]	patch \| blob
contrib/basic_archive/Makefile	[new file with mode: 0644]	patch \| blob
contrib/basic_archive/basic_archive.c	[new file with mode: 0644]	patch \| blob
contrib/basic_archive/basic_archive.conf	[new file with mode: 0644]	patch \| blob
contrib/basic_archive/expected/basic_archive.out	[new file with mode: 0644]	patch \| blob
contrib/basic_archive/sql/basic_archive.sql	[new file with mode: 0644]	patch \| blob
doc/src/sgml/archive-modules.sgml	[new file with mode: 0644]	patch \| blob
doc/src/sgml/backup.sgml		patch \| blob \| blame \| history
doc/src/sgml/basic-archive.sgml	[new file with mode: 0644]	patch \| blob
doc/src/sgml/config.sgml		patch \| blob \| blame \| history
doc/src/sgml/contrib.sgml		patch \| blob \| blame \| history
doc/src/sgml/filelist.sgml		patch \| blob \| blame \| history
doc/src/sgml/high-availability.sgml		patch \| blob \| blame \| history
doc/src/sgml/postgres.sgml		patch \| blob \| blame \| history
doc/src/sgml/ref/pg_basebackup.sgml		patch \| blob \| blame \| history
doc/src/sgml/ref/pg_receivewal.sgml		patch \| blob \| blame \| history
doc/src/sgml/wal.sgml		patch \| blob \| blame \| history
src/backend/access/transam/xlog.c		patch \| blob \| blame \| history
src/backend/postmaster/pgarch.c		patch \| blob \| blame \| history
src/backend/postmaster/shell_archive.c		patch \| blob \| blame \| history
src/backend/utils/init/miscinit.c		patch \| blob \| blame \| history
src/backend/utils/misc/guc.c		patch \| blob \| blame \| history
src/backend/utils/misc/postgresql.conf.sample		patch \| blob \| blame \| history
src/include/access/xlog.h		patch \| blob \| blame \| history
src/include/postmaster/pgarch.h		patch \| blob \| blame \| history