Some additional doc changes based around compression of page images in

author Bruce Momjian

Thu, 13 Oct 2005 17:32:42 +0000 (17:32 +0000)

committer Bruce Momjian

Thu, 13 Oct 2005 17:32:42 +0000 (17:32 +0000)
author Bruce Momjian
Thu, 13 Oct 2005 17:32:42 +0000 (17:32 +0000)
committer Bruce Momjian
Thu, 13 Oct 2005 17:32:42 +0000 (17:32 +0000)
diff --git a/doc/src/sgml/backup.sgml b/doc/src/sgml/backup.sgml

index 01cdae83d69f3a6352bf0eaed03ad0000b8adad8..4dbeae9fd66851d6f23a125f2982c8ed183adfc9 100644 (file)
--- a/doc/src/sgml/backup.sgml
+++ b/doc/src/sgml/backup.sgml
@@ -1,5 +1,5 @@
  
  
   Backup and Restore
@@ -1147,13 +1147,22 @@ restore_command = 'copy /mnt/server/archivedir/%f "%p"'  # Windows
     
  
     
-    It should also be noted that the present WAL
-    format is extremely bulky since it includes many disk page
-    snapshots.  This is appropriate for crash recovery purposes,
+    It should also be noted that the default WAL
+    format is fairly bulky since it includes many disk page snapshots. The pages
+    are partially compressed, using the simple expedient of removing the
+    empty space (if any) within each block. You can significantly reduce
+    the total volume of archived logs by turning off page snapshots 
+    using the  parameter, 
+    though you should read the notes and warnings in 
+     before you do so. 
+    These page snapshots are designed to allow crash recovery,
      since we may need to fix partially-written disk pages.  It is not
-    necessary to store so many page copies for PITR operations, however.
+    necessary to store these page copies for PITR operations, however.
+    If you turn off , your PITR
+    backup and recovery operations will continue to work successfully.
      An area for future development is to compress archived WAL data by
-    removing unnecessary page copies.  In the meantime, administrators
+    removing unnecessary page copies when 
+    is turned on.  In the meantime, administrators
      may wish to reduce the number of page snapshots included in WAL by
      increasing the checkpoint interval parameters as much as feasible.
     
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml

index 68557a26d23871e80793433f4aa5160adf57b512..5da06fddce351a420aa46dad56270de608f3f5c3 100644 (file)
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -1,5 +1,5 @@
  
  
    Run-time Configuration
@@ -1360,7 +1360,7 @@ SET ENABLE_SEQSCAN TO OFF;
         
          When this option is on, the PostgreSQL server
          writes full pages to WAL when they are first modified after a
-        checkpoint so full recovery is possible. Turning this option off
+        checkpoint so crash recovery is possible. Turning this option off
          might lead to a corrupt system after an operating system crash
          or power failure because uncorrected partial pages might contain
          inconsistent or corrupt data. The risks are less but similar to
diff --git a/doc/src/sgml/wal.sgml b/doc/src/sgml/wal.sgml

index 8f96f483622f538334b4665c75c21f0e40b5ebac..62595c594e4fd00fc6d15b7883f1824e98df82fe 100644 (file)
--- a/doc/src/sgml/wal.sgml
+++ b/doc/src/sgml/wal.sgml
@@ -1,4 +1,4 @@
-
+
  
  
   Reliability
@@ -12,7 +12,7 @@
     failure (unrelated to the non-volatile area itself). To accomplish
     this, PostgreSQL uses the magnetic platters of modern
     disk drives for permanent storage that is immune to the failures
-   listed above. In fact, a computer can be completely destroyed, but if
+   listed above. In fact, even if a computer is fatally damaged, if
     the disk drives survive they can be moved to another computer with
     similar hardware and all committed transactions will remain intact.
    
@@ -68,11 +68,13 @@
     these partially written cases. To guard against that,
     PostgreSQL periodically writes full page images to
     permanent storage before modifying the actual page on
-   disk. By doing this, during recovery PostgreSQL can
+   disk. By doing this, during crash recovery PostgreSQL can
     restore partially-written pages. If you have a battery-backed disk
-   controller that prevents partial page writes, you can turn off this
-   page imaging by using the 
-   parameter.
+   controller or filesystem (e.g. Reiser4) that prevents partial page writes, 
+   you can turn off this page imaging by using the 
+    parameter. This parameter has no 
+   effect on the successful use of Point in Time Recovery (PITR), 
+   described in .
    
   
    
@@ -107,14 +109,10 @@
      the data pages can be redone from the log records.  (This is
      roll-forward recovery, also known as REDO.)
     
-  
-
-  
-   Benefits of Write-Ahead Logging
  
-   <indexterm zone="wal-benefits">
-    fsync
-   indexterm>
+   <para>
+    WAL brings three major benefits:
+   para>
  
     
      The first major benefit of using WAL is a
@@ -131,11 +129,11 @@
     
  
     
-    The next benefit is consistency of the data pages. The truth is
-    that, before WAL,
+    The next benefit is crash recovery protection. The truth is
+    that, before WAL was introduced back in release 7.1,
      PostgreSQL was never able to guarantee
-    consistency in the case of a crash.  Before
-    WAL, any crash during writing could result in:
+    consistency in the case of a crash.  Now, 
+    WAL protects fully against the following problems:
  
      
       
@@ -151,13 +149,6 @@
        of partially written data pages
       
      
-
-    Problems with indexes (problems 1 and 2) could possibly have been
-    fixed by additional fsync calls, but it is
-    not obvious how to handle the last case without
-    WAL.  WAL saves the entire data
-    page content in the log if that is required to ensure page
-    consistency for after-crash recovery.
     
  
     
@@ -214,12 +205,14 @@
     checkpoint_timeout causes checkpoints to be done
     more often. This allows faster after-crash recovery (since less work
     will need to be redone). However, one must balance this against the
-   increased cost of flushing dirty data pages more often. In addition,
-   to ensure data page consistency, the first modification of a data
-   page after each checkpoint results in logging the entire page
-   content. Thus a smaller checkpoint interval increases the volume of
-   output to the WAL log, partially negating the goal of using a smaller
-   interval, and in any case causing more disk I/O.
+   increased cost of flushing dirty data pages more often. If 
+    is set (the default), there is 
+   another factor to consider. To ensure data page consistency, 
+   the first modification of a data page after each checkpoint results in 
+   logging the entire page content. In that case,
+   a smaller checkpoint interval increases the volume of output to the WAL log,
+   partially negating the goal of using a smaller interval, 
+   and in any case causing more disk I/O.
    
  
    
@@ -234,7 +227,9 @@
     a message will be output to the server log recommending increasing 
     checkpoint_segments.  Occasional appearance of such
     a message is not cause for alarm, but if it appears often then the
-   checkpoint control parameters should be increased.
+   checkpoint control parameters should be increased. Bulk operations such
+   as a COPY, INSERT SELECT etc. may cause a number of such warnings if you
+   do not set  high enough.
    
  
    
@@ -252,7 +247,7 @@
    
  
    
-   There are two commonly used WAL functions:
+   There are two commonly used internal WAL functions:
     LogInsert and LogFlush.
     LogInsert is used to place a new record into
     the WAL buffers in shared memory. If there is no
@@ -275,9 +270,11 @@
     modifying the configuration parameter 
    linkend="guc-wal-buffers">.  The default number of WAL
    buffers is 8.  Increasing this value will
-   correspondingly increase shared memory usage.  (It should be noted
-   that there is presently little evidence to suggest that increasing
-   wal_buffers beyond the default is worthwhile.)
+   correspondingly increase shared memory usage.  When 
+    is set and the system is very busy, 
+   setting this value higher will help smooth response times during the 
+   period immediately following each checkpoint.  As a guide, a setting of 1024 
+   would be considered to be high.
   
 
   
@@ -313,7 +310,8 @@
    (provided that PostgreSQL has been
    compiled with support for it) will result in each
    LogInsert and LogFlush
-   WAL call being logged to the server log. This
+   WAL call being logged to the server log. The output
+   is too verbose for use as a guide to performance tuning. This
    option may be replaced by a more general mechanism in the future.
   
  
     linkend="guc-wal-buffers">.  The default number of WAL
     buffers is 8.  Increasing this value will
-   correspondingly increase shared memory usage.  (It should be noted
-   that there is presently little evidence to suggest that increasing
-   wal_buffers beyond the default is worthwhile.)
+   correspondingly increase shared memory usage.  When 
+    is set and the system is very busy, 
+   setting this value higher will help smooth response times during the 
+   period immediately following each checkpoint.  As a guide, a setting of 1024 
+   would be considered to be high.
    
  
    
@@ -313,7 +310,8 @@
     (provided that PostgreSQL has been
     compiled with support for it) will result in each
     LogInsert and LogFlush
-   WAL call being logged to the server log. This
+   WAL call being logged to the server log. The output
+   is too verbose for use as a guide to performance tuning. This
     option may be replaced by a more general mechanism in the future.
author	Bruce Momjian
	Thu, 13 Oct 2005 17:32:42 +0000 (17:32 +0000)
committer	Bruce Momjian
	Thu, 13 Oct 2005 17:32:42 +0000 (17:32 +0000)
doc/src/sgml/backup.sgml		patch \| blob \| blame \| history
doc/src/sgml/config.sgml		patch \| blob \| blame \| history
doc/src/sgml/wal.sgml		patch \| blob \| blame \| history