From 8087446b143fbb8da962a138fef393dfe8314e3a Mon Sep 17 00:00:00 2001
From: Bruce Momjian <bruce@momjian.us>
Date: Fri, 10 Oct 2003 02:08:42 +0000
Subject: [PATCH] > That's a fairly useless place to put it, though, since
 someone would > only think to look at sort_mem if they already had a clue. 
 It should > be mentioned under bulk data load (in performance tips chapter)

Attached is a doc patch that does this. The way I've worded it may not
be the best, though.

Neil Conway
---
 doc/src/sgml/perform.sgml | 30 +++++++++++++++++++++---------
 doc/src/sgml/runtime.sgml |  7 ++++---
 2 files changed, 25 insertions(+), 12 deletions(-)
diff --git a/doc/src/sgml/perform.sgml b/doc/src/sgml/perform.sgml
index 6b7afe2b025..2eaab4d0f0d 100644
--- a/doc/src/sgml/perform.sgml
+++ b/doc/src/sgml/perform.sgml
@@ -1,5 +1,5 @@
 <!--
-$Header: /cvsroot/pgsql/doc/src/sgml/perform.sgml,v 1.33 2003/09/11 18:30:38 momjian Exp $
+$Header: /cvsroot/pgsql/doc/src/sgml/perform.sgml,v 1.34 2003/10/10 02:08:42 momjian Exp $
 -->
 
  <chapter id="performance-tips">
@@ -751,11 +751,10 @@ SELECT * FROM x, y, a, b, c WHERE something AND somethingelse;
 
    <para>
     Use <command>COPY FROM STDIN</command> to load all the rows in one
-    command, instead of using
-    a series of <command>INSERT</command> commands.  This reduces parsing,
-    planning, etc.
-    overhead a great deal. If you do this then it is not necessary to turn
-    off autocommit, since it is only one command anyway.
+    command, instead of using a series of <command>INSERT</command>
+    commands.  This reduces parsing, planning, etc.  overhead a great
+    deal. If you do this then it is not necessary to turn off
+    autocommit, since it is only one command anyway.
    </para>
   </sect2>
 
@@ -764,9 +763,9 @@ SELECT * FROM x, y, a, b, c WHERE something AND somethingelse;
 
    <para>
     If you are loading a freshly created table, the fastest way is to
-    create the table, bulk-load with <command>COPY</command>, then create any
-    indexes needed 
-    for the table.  Creating an index on pre-existing data is quicker than
+    create the table, bulk load the table's data using
+    <command>COPY</command>, then create any indexes needed for the
+    table.  Creating an index on pre-existing data is quicker than
     updating it incrementally as each row is loaded.
    </para>
 
@@ -780,6 +779,19 @@ SELECT * FROM x, y, a, b, c WHERE something AND somethingelse;
    </para>
   </sect2>
 
+  <sect2 id="populate-sort-mem">
+   <title>Increase <varname>sort_mem</varname></title>
+
+   <para>
+    Temporarily increasing the <varname>sort_mem</varname>
+    configuration variable when restoring large amounts of data can
+    lead to improved performance. This is because when a B-tree index
+    is created from scratch, the existing content of the table needs
+    to be sorted. Allowing the merge sort to use more buffer pages
+    means that fewer merge passes will be required.
+   </para>
+  </sect2>
+
   <sect2 id="populate-analyze">
    <title>Run <command>ANALYZE</command> Afterwards</title>
 
diff --git a/doc/src/sgml/runtime.sgml b/doc/src/sgml/runtime.sgml
index 4854d0fbaeb..4928aeda472 100644
--- a/doc/src/sgml/runtime.sgml
+++ b/doc/src/sgml/runtime.sgml
@@ -1,5 +1,5 @@
 <!--
-$Header: /cvsroot/pgsql/doc/src/sgml/runtime.sgml,v 1.212 2003/10/09 19:05:09 momjian Exp $
+$Header: /cvsroot/pgsql/doc/src/sgml/runtime.sgml,v 1.213 2003/10/10 02:08:42 momjian Exp $
 -->
 
 <Chapter Id="runtime">
@@ -928,8 +928,9 @@ SET ENABLE_SEQSCAN TO OFF;
         by <literal>ORDER BY</>, merge joins, and <command>CREATE INDEX</>.
         Hash tables are used in hash joins, hash-based aggregation, and
         hash-based processing of <literal>IN</> subqueries.  Because 
-        <command>CREATE INDEX</> is used when restoring a database, it might
-        be good to temporarily increase this value during a restore.
+        <command>CREATE INDEX</> is used when restoring a database,
+        increasing <varname>sort_mem</varname> before doing a large
+        restore operation can improve performance.
        </para>
       </listitem>
      </varlistentry>
-- 
2.39.5