Get rid of the dynamic shared memory state file.
authorRobert Haas
Tue, 8 Apr 2014 15:39:55 +0000 (11:39 -0400)
committerRobert Haas
Tue, 8 Apr 2014 15:39:55 +0000 (11:39 -0400)
Instead of storing the ID of the dynamic shared memory control
segment in a file within the data directory, store it in the main
control segment.  This avoids a number of nasty corner cases,
most seriously that doing an online backup and then using it on
the same machine (e.g. to fire up a standby) would result in the
standby clobbering all of the master's dynamic shared memory
segments.

Per complaints from Heikki Linnakangas, Fujii Masao, and Tom
Lane.

src/backend/port/sysv_shmem.c
src/backend/port/win32_shmem.c
src/backend/storage/ipc/dsm.c
src/backend/storage/ipc/ipci.c
src/include/storage/dsm.h
src/include/storage/pg_shmem.h

index 51c1a2b71f85f07b7901907aa8cf364b84d3c0e8..5e3850b024d939e82da5087c70d5eaa2014b15d4 100644 (file)
@@ -30,6 +30,7 @@
 
 #include "miscadmin.h"
 #include "portability/mem.h"
+#include "storage/dsm.h"
 #include "storage/ipc.h"
 #include "storage/pg_shmem.h"
 #include "utils/guc.h"
@@ -421,7 +422,8 @@ CreateAnonymousSegment(Size *size)
  * zero will be passed.
  */
 PGShmemHeader *
-PGSharedMemoryCreate(Size size, bool makePrivate, int port)
+PGSharedMemoryCreate(Size size, bool makePrivate, int port,
+                    PGShmemHeader **shim)
 {
    IpcMemoryKey NextShmemSegID;
    void       *memAddress;
@@ -509,10 +511,13 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port)
 
        /*
         * The segment appears to be from a dead Postgres process, or from a
-        * previous cycle of life in this same process.  Zap it, if possible.
+        * previous cycle of life in this same process.  Zap it, if possible,
+        * and any associated dynamic shared memory segments, as well.
         * This probably shouldn't fail, but if it does, assume the segment
         * belongs to someone else after all, and continue quietly.
         */
+       if (hdr->dsm_control != 0)
+           dsm_cleanup_using_control_segment(hdr->dsm_control);
        shmdt(memAddress);
        if (shmctl(shmid, IPC_RMID, NULL) < 0)
            continue;
@@ -539,6 +544,7 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port)
    hdr = (PGShmemHeader *) memAddress;
    hdr->creatorPID = getpid();
    hdr->magic = PGShmemMagic;
+   hdr->dsm_control = 0;
 
    /* Fill in the data directory ID info, too */
    if (stat(DataDir, &statbuf) < 0)
@@ -554,6 +560,7 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port)
     */
    hdr->totalsize = size;
    hdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader));
+   *shim = hdr;
 
    /* Save info for possible future use */
    UsedShmemSegAddr = memAddress;
@@ -608,6 +615,7 @@ PGSharedMemoryReAttach(void)
    if (hdr != origUsedShmemSegAddr)
        elog(FATAL, "reattaching to shared memory returned unexpected address (got %p, expected %p)",
             hdr, origUsedShmemSegAddr);
+   dsm_set_control_handle(((PGShmemHeader *) hdr)->dsm_control);
 
    UsedShmemSegAddr = hdr;     /* probably redundant */
 }
index dca371cce62bf02dfe07a218e1d56c403a26a3c9..3a0ded4865091fa98d071835b2f4e7e43b89dd53 100644 (file)
@@ -117,7 +117,8 @@ PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
  *
  */
 PGShmemHeader *
-PGSharedMemoryCreate(Size size, bool makePrivate, int port)
+PGSharedMemoryCreate(Size size, bool makePrivate, int port,
+                    PGShmemHeader **shim)
 {
    void       *memAddress;
    PGShmemHeader *hdr;
@@ -245,12 +246,14 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port)
     */
    hdr->totalsize = size;
    hdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader));
+   hdr->dsm_control = 0;
 
    /* Save info for possible future use */
    UsedShmemSegAddr = memAddress;
    UsedShmemSegSize = size;
    UsedShmemSegID = hmap2;
 
+   *shim = NULL;
    return hdr;
 }
 
@@ -289,6 +292,7 @@ PGSharedMemoryReAttach(void)
             hdr, origUsedShmemSegAddr);
    if (hdr->magic != PGShmemMagic)
        elog(FATAL, "reattaching to shared memory returned non-PostgreSQL memory");
+   dsm_set_control_handle(hdr->dsm_control);
 
    UsedShmemSegAddr = hdr;     /* probably redundant */
 }
index c967177c4b17c4eefdfce60bf24e79cfa8da725c..6c410f77d9e91bd2976618aa82adeaae7ccaca0f 100644 (file)
 #include "storage/dsm.h"
 #include "storage/ipc.h"
 #include "storage/lwlock.h"
+#include "storage/pg_shmem.h"
 #include "utils/guc.h"
 #include "utils/memutils.h"
 #include "utils/resowner_private.h"
 
-#define PG_DYNSHMEM_STATE_FILE         PG_DYNSHMEM_DIR "/state"
-#define PG_DYNSHMEM_NEW_STATE_FILE     PG_DYNSHMEM_DIR "/state.new"
-#define PG_DYNSHMEM_STATE_BUFSIZ       512
 #define PG_DYNSHMEM_CONTROL_MAGIC      0x9a503d32
 
 /*
@@ -95,10 +93,7 @@ typedef struct dsm_control_header
    dsm_control_item    item[FLEXIBLE_ARRAY_MEMBER];
 } dsm_control_header;
 
-static void dsm_cleanup_using_control_segment(void);
 static void dsm_cleanup_for_mmap(void);
-static bool dsm_read_state_file(dsm_handle *h);
-static void dsm_write_state_file(dsm_handle h);
 static void dsm_postmaster_shutdown(int code, Datum arg);
 static dsm_segment *dsm_create_descriptor(void);
 static bool dsm_control_segment_sane(dsm_control_header *control,
@@ -146,7 +141,7 @@ static void *dsm_control_impl_private = NULL;
  * startup time.
  */
 void
-dsm_postmaster_startup(void)
+dsm_postmaster_startup(PGShmemHeader *shim)
 {
    void       *dsm_control_address = NULL;
    uint32      maxitems;
@@ -159,26 +154,13 @@ dsm_postmaster_startup(void)
        return;
 
    /*
-    * Check for, and remove, shared memory segments left behind by a dead
-    * postmaster.  This isn't necessary on Windows, which always removes them
-    * when the last reference is gone.
+    * If we're using the mmap implementations, clean up any leftovers.
+    * Cleanup isn't needed on Windows, and happens earlier in startup for
+    * POSIX and System V shared memory, via a direct call to
+    * dsm_cleanup_using_control_segment.
     */
-   switch (dynamic_shared_memory_type)
-   {
-       case DSM_IMPL_POSIX:
-       case DSM_IMPL_SYSV:
-           dsm_cleanup_using_control_segment();
-           break;
-       case DSM_IMPL_MMAP:
-           dsm_cleanup_for_mmap();
-           break;
-       case DSM_IMPL_WINDOWS:
-           /* Nothing to do. */
-           break;
-       default:
-           elog(ERROR, "unknown dynamic shared memory type: %d",
-                dynamic_shared_memory_type);
-   }
+   if (dynamic_shared_memory_type == DSM_IMPL_MMAP)
+       dsm_cleanup_for_mmap();
 
    /* Determine size for new control segment. */
    maxitems = PG_DYNSHMEM_FIXED_SLOTS
@@ -187,23 +169,30 @@ dsm_postmaster_startup(void)
        maxitems);
    segsize = dsm_control_bytes_needed(maxitems);
 
-   /* Loop until we find an unused identifier for the new control segment. */
+   /*
+    * Loop until we find an unused identifier for the new control segment.
+    * We sometimes use 0 as a sentinel value indicating that no control
+    * segment is known to exist, so avoid using that value for a real
+    * control segment.
+    */
    for (;;)
    {
        Assert(dsm_control_address == NULL);
        Assert(dsm_control_mapped_size == 0);
        dsm_control_handle = random();
+       if (dsm_control_handle == 0)
+           continue;
        if (dsm_impl_op(DSM_OP_CREATE, dsm_control_handle, segsize,
                        &dsm_control_impl_private, &dsm_control_address,
                        &dsm_control_mapped_size, ERROR))
            break;
    }
    dsm_control = dsm_control_address;
-   on_shmem_exit(dsm_postmaster_shutdown, 0);
+   on_shmem_exit(dsm_postmaster_shutdown, PointerGetDatum(shim));
    elog(DEBUG2,
         "created dynamic shared memory control segment %u (%zu bytes)",
         dsm_control_handle, segsize);
-   dsm_write_state_file(dsm_control_handle);
+   shim->dsm_control = dsm_control_handle;
 
    /* Initialize control segment. */
    dsm_control->magic = PG_DYNSHMEM_CONTROL_MAGIC;
@@ -216,8 +205,8 @@ dsm_postmaster_startup(void)
  * invocation still exists.  If so, remove the dynamic shared memory
  * segments to which it refers, and then the control segment itself.
  */
-static void
-dsm_cleanup_using_control_segment(void)
+void
+dsm_cleanup_using_control_segment(dsm_handle old_control_handle)
 {
    void       *mapped_address = NULL;
    void       *junk_mapped_address = NULL;
@@ -227,14 +216,10 @@ dsm_cleanup_using_control_segment(void)
    Size        junk_mapped_size = 0;
    uint32      nitems;
    uint32      i;
-   dsm_handle  old_control_handle;
    dsm_control_header *old_control;
 
-   /*
-    * Read the state file.  If it doesn't exist or is empty, there's nothing
-    * more to do.
-    */
-   if (!dsm_read_state_file(&old_control_handle))
+   /* If dynamic shared memory is disabled, there's nothing to do. */
+   if (dynamic_shared_memory_type == DSM_IMPL_NONE)
        return;
 
    /*
@@ -346,111 +331,6 @@ dsm_cleanup_for_mmap(void)
    FreeDir(dir);
 }
 
-/*
- * Read and parse the state file.
- *
- * If the state file is empty or the contents are garbled, it probably means
- * that the operating system rebooted before the data written by the previous
- * postmaster made it to disk.  In that case, we can just ignore it; any shared
- * memory from before the reboot should be gone anyway.
- */
-static bool
-dsm_read_state_file(dsm_handle *h)
-{
-   int         statefd;
-   char        statebuf[PG_DYNSHMEM_STATE_BUFSIZ];
-   int         nbytes = 0;
-   char       *endptr,
-              *s;
-   dsm_handle  handle;
-
-   /* Read the state file to get the ID of the old control segment. */
-   statefd = BasicOpenFile(PG_DYNSHMEM_STATE_FILE, O_RDONLY | PG_BINARY, 0);
-   if (statefd < 0)
-   {
-       if (errno == ENOENT)
-           return false;
-       ereport(ERROR,
-               (errcode_for_file_access(),
-                errmsg("could not open file \"%s\": %m",
-                   PG_DYNSHMEM_STATE_FILE)));
-   }
-   nbytes = read(statefd, statebuf, PG_DYNSHMEM_STATE_BUFSIZ - 1);
-   if (nbytes < 0)
-   {
-       close(statefd);
-       ereport(ERROR,
-               (errcode_for_file_access(),
-                errmsg("could not read file \"%s\": %m",
-                   PG_DYNSHMEM_STATE_FILE)));
-   }
-   /* make sure buffer is NUL terminated */
-   statebuf[nbytes] = '\0';
-   close(statefd);
-
-   /*
-    * We expect to find the handle of the old control segment here,
-    * on a line by itself.
-    */
-   handle = strtoul(statebuf, &endptr, 10);
-   for (s = endptr; *s == ' ' || *s == '\t'; ++s)
-       ;
-   if (*s != '\n' && *s != '\0')
-       return false;
-
-   /* Looks good. */
-   *h = handle;
-   return true;
-}
-
-/*
- * Write our control segment handle to the state file, so that if the
- * postmaster is killed without running it's on_shmem_exit hooks, the
- * next postmaster can clean things up after restart.
- */
-static void
-dsm_write_state_file(dsm_handle h)
-{
-   int         statefd;
-   char        statebuf[PG_DYNSHMEM_STATE_BUFSIZ];
-   int         nbytes;
-
-   /* Create or truncate the file. */
-   statefd = open(PG_DYNSHMEM_NEW_STATE_FILE,
-                  O_RDWR | O_CREAT | O_TRUNC | PG_BINARY, 0600);
-   if (statefd < 0)
-       ereport(ERROR,
-               (errcode_for_file_access(),
-                errmsg("could not create file \"%s\": %m",
-                   PG_DYNSHMEM_NEW_STATE_FILE)));
-
-   /* Write contents. */
-   snprintf(statebuf, PG_DYNSHMEM_STATE_BUFSIZ, "%u\n", dsm_control_handle);
-   nbytes = strlen(statebuf);
-   if (write(statefd, statebuf, nbytes) != nbytes)
-   {
-       if (errno == 0)
-           errno = ENOSPC;     /* if no error signalled, assume no space */
-       ereport(ERROR,
-               (errcode_for_file_access(),
-                errmsg("could not write file \"%s\": %m",
-                   PG_DYNSHMEM_NEW_STATE_FILE)));
-   }
-
-   /* Close file. */
-   close(statefd);
-
-   /*
-    * Atomically rename file into place, so that no one ever sees a partially
-    * written state file.
-    */
-   if (rename(PG_DYNSHMEM_NEW_STATE_FILE, PG_DYNSHMEM_STATE_FILE) < 0)
-       ereport(ERROR,
-               (errcode_for_file_access(),
-                errmsg("could not rename file \"%s\": %m",
-                   PG_DYNSHMEM_NEW_STATE_FILE)));
-}
-
 /*
  * At shutdown time, we iterate over the control segment and remove all
  * remaining dynamic shared memory segments.  We avoid throwing errors here;
@@ -466,6 +346,7 @@ dsm_postmaster_shutdown(int code, Datum arg)
    void       *junk_mapped_address = NULL;
    void       *junk_impl_private = NULL;
    Size        junk_mapped_size = 0;
+   PGShmemHeader *shim = (PGShmemHeader *) DatumGetPointer(arg);
 
    /*
     * If some other backend exited uncleanly, it might have corrupted the
@@ -510,13 +391,7 @@ dsm_postmaster_shutdown(int code, Datum arg)
                &dsm_control_impl_private, &dsm_control_address,
                &dsm_control_mapped_size, LOG);
    dsm_control = dsm_control_address;
-
-   /* And, finally, remove the state file. */
-   if (unlink(PG_DYNSHMEM_STATE_FILE) < 0)
-       ereport(LOG,
-               (errcode_for_file_access(),
-                errmsg("could not unlink file \"%s\": %m",
-                   PG_DYNSHMEM_STATE_FILE)));
+   shim->dsm_control = 0;
 }
 
 /*
@@ -536,25 +411,18 @@ dsm_backend_startup(void)
 
 #ifdef EXEC_BACKEND
    {
-       dsm_handle  control_handle;
        void       *control_address = NULL;
 
-       /* Read the control segment information from the state file. */
-       if (!dsm_read_state_file(&control_handle))
-           ereport(ERROR,
-                   (errcode(ERRCODE_INTERNAL_ERROR),
-                    errmsg("could not parse dynamic shared memory state file")));
-
        /* Attach control segment. */
-       dsm_impl_op(DSM_OP_ATTACH, control_handle, 0,
+       Assert(dsm_control_handle != 0);
+       dsm_impl_op(DSM_OP_ATTACH, dsm_control_handle, 0,
                    &dsm_control_impl_private, &control_address,
                    &dsm_control_mapped_size, ERROR);
-       dsm_control_handle = control_handle;
        dsm_control = control_address;
        /* If control segment doesn't look sane, something is badly wrong. */
        if (!dsm_control_segment_sane(dsm_control, dsm_control_mapped_size))
        {
-           dsm_impl_op(DSM_OP_DETACH, control_handle, 0,
+           dsm_impl_op(DSM_OP_DETACH, dsm_control_handle, 0,
                        &dsm_control_impl_private, &control_address,
                        &dsm_control_mapped_size, WARNING);
            ereport(FATAL,
@@ -567,6 +435,20 @@ dsm_backend_startup(void)
    dsm_init_done = true;
 }
 
+#ifdef EXEC_BACKEND
+/*
+ * When running under EXEC_BACKEND, we get a callback here when the main
+ * shared memory segment is re-attached, so that we can record the control
+ * handle retrieved from it.
+ */
+void
+dsm_set_control_handle(dsm_handle h)
+{
+   Assert(dsm_control_handle == 0 && h != 0);
+   dsm_control_handle = h;
+}
+#endif
+
 /*
  * Create a new dynamic shared memory segment.
  */
index c392d4fa228a3e1403ea4d340aec2b7d9d8e1f86..4290d2dc81dc2419b43a3f33ab1b561c963f3c3c 100644 (file)
@@ -90,6 +90,8 @@ RequestAddinShmemSpace(Size size)
 void
 CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
 {
+   PGShmemHeader *shim = NULL;
+
    if (!IsUnderPostmaster)
    {
        PGShmemHeader *seghdr;
@@ -149,7 +151,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
        /*
         * Create the shmem segment
         */
-       seghdr = PGSharedMemoryCreate(size, makePrivate, port);
+       seghdr = PGSharedMemoryCreate(size, makePrivate, port, &shim);
 
        InitShmemAccess(seghdr);
 
@@ -254,7 +256,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
 
    /* Initialize dynamic shared memory facilities. */
    if (!IsUnderPostmaster)
-       dsm_postmaster_startup();
+       dsm_postmaster_startup(shim);
 
    /*
     * Now give loadable modules a chance to set up their shmem allocations
index e4669a1e3c1038cc9ffad57d4721185ef0c4e04c..272787adc6b2e3ad774716f73f5e13ef8aa43999 100644 (file)
 typedef struct dsm_segment dsm_segment;
 
 /* Startup and shutdown functions. */
-extern void dsm_postmaster_startup(void);
+struct PGShmemHeader;      /* avoid including pg_shmem.h */
+extern void dsm_cleanup_using_control_segment(dsm_handle old_control_handle);
+extern void dsm_postmaster_startup(struct PGShmemHeader *);
 extern void dsm_backend_shutdown(void);
 extern void dsm_detach_all(void);
 
+#ifdef EXEC_BACKEND
+extern void dsm_set_control_handle(dsm_handle h);
+#endif
+
 /* Functions that create, update, or remove mappings. */
 extern dsm_segment *dsm_create(Size size);
 extern dsm_segment *dsm_attach(dsm_handle h);
index 0dc960b597f99535a1fc3a9149f85d4d17bf0289..ab28ebee846cb4069e7ac40f0c8bf81a61b238b2 100644 (file)
@@ -24,6 +24,8 @@
 #ifndef PG_SHMEM_H
 #define PG_SHMEM_H
 
+#include "storage/dsm_impl.h"
+
 typedef struct PGShmemHeader   /* standard header for all Postgres shmem */
 {
    int32       magic;          /* magic # to identify Postgres segments */
@@ -31,6 +33,7 @@ typedef struct PGShmemHeader  /* standard header for all Postgres shmem */
    pid_t       creatorPID;     /* PID of creating process */
    Size        totalsize;      /* total size of segment */
    Size        freeoffset;     /* offset to first free space */
+   dsm_handle  dsm_control;    /* ID of dynamic shared memory control seg */
    void       *index;          /* pointer to ShmemIndex table */
 #ifndef WIN32                  /* Windows doesn't have useful inode#s */
    dev_t       device;         /* device data directory is on */
@@ -61,7 +64,7 @@ extern void PGSharedMemoryReAttach(void);
 #endif
 
 extern PGShmemHeader *PGSharedMemoryCreate(Size size, bool makePrivate,
-                    int port);
+                    int port, PGShmemHeader **shim);
 extern bool PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2);
 extern void PGSharedMemoryDetach(void);