Sync unlogged relations to disk after they have been reset.
authorAndres Freund
Fri, 14 Nov 2014 17:21:30 +0000 (18:21 +0100)
committerAndres Freund
Sat, 15 Nov 2014 00:21:07 +0000 (01:21 +0100)
Unlogged relations are only reset when performing a unclean
restart. That means they have to be synced to disk during clean
shutdowns. During normal processing that's achieved by registering a
buffer's file to be fsynced at the next checkpoint when flushed. But
ResetUnloggedRelations() doesn't go through the buffer manager, so
nothing will force reset relations to disk before the next shutdown
checkpoint.

So just make ResetUnloggedRelations() fsync the newly created main
forks to disk.

Discussion: 20140912112246[email protected]

Backpatch to 9.1 where unlogged tables were introduced.

Abhijit Menon-Sen and Andres Freund

src/backend/storage/file/reinit.c

index d62d5848a7a39d7dcc6b04c68f49fe4f50e9f444..31c18f1c788d9ba969866cd06272d0c8e28b5bde 100644 (file)
@@ -339,6 +339,53 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
        }
 
        FreeDir(dbspace_dir);
+
+       /*
+        * copy_file() above has already called pg_flush_data() on the
+        * files it created. Now we need to fsync those files, because
+        * a checkpoint won't do it for us while we're in recovery. We
+        * do this in a separate pass to allow the kernel to perform
+        * all the flushes (especially the metadata ones) at once.
+        */
+       dbspace_dir = AllocateDir(dbspacedirname);
+       if (dbspace_dir == NULL)
+       {
+           /* we just saw this directory, so it really ought to be there */
+           elog(LOG,
+                "could not open dbspace directory \"%s\": %m",
+                dbspacedirname);
+           return;
+       }
+
+       while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
+       {
+           ForkNumber  forkNum;
+           int         oidchars;
+           char        oidbuf[OIDCHARS + 1];
+           char        mainpath[MAXPGPATH];
+
+           /* Skip anything that doesn't look like a relation data file. */
+           if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars,
+                                                    &forkNum))
+               continue;
+
+           /* Also skip it unless this is the init fork. */
+           if (forkNum != INIT_FORKNUM)
+               continue;
+
+           /* Construct main fork pathname. */
+           memcpy(oidbuf, de->d_name, oidchars);
+           oidbuf[oidchars] = '\0';
+           snprintf(mainpath, sizeof(mainpath), "%s/%s%s",
+                    dbspacedirname, oidbuf, de->d_name + oidchars + 1 +
+                    strlen(forkNames[INIT_FORKNUM]));
+
+           fsync_fname(mainpath, false);
+       }
+
+       FreeDir(dbspace_dir);
+
+       fsync_fname((char *) dbspacedirname, true);
    }
 }