When creating a large hash index, pre-sort the index entries by estimated

author Tom Lane

Sun, 16 Mar 2008 23:15:08 +0000 (23:15 +0000)

committer Tom Lane

Sun, 16 Mar 2008 23:15:08 +0000 (23:15 +0000)
author Tom Lane
Sun, 16 Mar 2008 23:15:08 +0000 (23:15 +0000)
committer Tom Lane
Sun, 16 Mar 2008 23:15:08 +0000 (23:15 +0000)
diff --git a/src/backend/access/hash/Makefile b/src/backend/access/hash/Makefile

index 38eb29125391cc0ffc94b6c7e1b9f9364f83b272..80f9ea61e9f4e9cfcfcdd6a6b9c3c843415ee6c6 100644 (file)
--- a/src/backend/access/hash/Makefile
+++ b/src/backend/access/hash/Makefile
@@ -4,7 +4,7 @@
  #    Makefile for access/hash
  #
  # IDENTIFICATION
-#    $PostgreSQL: pgsql/src/backend/access/hash/Makefile,v 1.14 2008/02/19 10:30:06 petere Exp $
+#    $PostgreSQL: pgsql/src/backend/access/hash/Makefile,v 1.15 2008/03/16 23:15:08 tgl Exp $
  #
  #-------------------------------------------------------------------------
  
@@ -13,6 +13,6 @@ top_builddir = ../../../..
  include $(top_builddir)/src/Makefile.global
  
  OBJS = hash.o hashfunc.o hashinsert.o hashovfl.o hashpage.o hashscan.o \
-       hashsearch.o hashutil.o
+       hashsearch.o hashsort.o hashutil.o
  
  include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c

index b008c0aa4a7abde67c91cc158abc16ae34b5d888..01da35ec9f20605192170efbb6e628c8a69e9636 100644 (file)
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.99 2008/03/15 20:46:31 tgl Exp $
+ *   $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.100 2008/03/16 23:15:08 tgl Exp $
   *
   * NOTES
   *   This file contains only the public interface routines.
@@ -22,13 +22,15 @@
  #include "access/hash.h"
  #include "catalog/index.h"
  #include "commands/vacuum.h"
+#include "optimizer/cost.h"
  #include "optimizer/plancat.h"
  
  
  /* Working state for hashbuild and its callback */
  typedef struct
  {
-   double      indtuples;
+   HSpool     *spool;          /* NULL if not using spooling */
+   double      indtuples;      /* # tuples accepted into index */
  } HashBuildState;
  
  static void hashbuildCallback(Relation index,
@@ -51,6 +53,7 @@ hashbuild(PG_FUNCTION_ARGS)
     IndexBuildResult *result;
     BlockNumber relpages;
     double      reltuples;
+   uint32      num_buckets;
     HashBuildState buildstate;
  
     /*
@@ -61,19 +64,43 @@ hashbuild(PG_FUNCTION_ARGS)
         elog(ERROR, "index \"%s\" already contains data",
              RelationGetRelationName(index));
  
-   /* estimate the number of rows currently present in the table */
+   /* Estimate the number of rows currently present in the table */
     estimate_rel_size(heap, NULL, &relpages, &reltuples);
  
-   /* initialize the hash index metadata page and initial buckets */
-   _hash_metapinit(index, reltuples);
+   /* Initialize the hash index metadata page and initial buckets */
+   num_buckets = _hash_metapinit(index, reltuples);
  
-   /* build the index */
+   /*
+    * If we just insert the tuples into the index in scan order, then
+    * (assuming their hash codes are pretty random) there will be no locality
+    * of access to the index, and if the index is bigger than available RAM
+    * then we'll thrash horribly.  To prevent that scenario, we can sort the
+    * tuples by (expected) bucket number.  However, such a sort is useless
+    * overhead when the index does fit in RAM.  We choose to sort if the
+    * initial index size exceeds effective_cache_size.
+    *
+    * NOTE: this test will need adjustment if a bucket is ever different
+    * from one page.
+    */
+   if (num_buckets >= (uint32) effective_cache_size)
+       buildstate.spool = _h_spoolinit(index, num_buckets);
+   else
+       buildstate.spool = NULL;
+
+   /* prepare to build the index */
     buildstate.indtuples = 0;
  
     /* do the heap scan */
     reltuples = IndexBuildHeapScan(heap, index, indexInfo,
                                    hashbuildCallback, (void *) &buildstate);
  
+   if (buildstate.spool)
+   {
+       /* sort the tuples and insert them into the index */
+       _h_indexbuild(buildstate.spool);
+       _h_spooldestroy(buildstate.spool);
+   }
+
     /*
      * Return statistics
      */
@@ -110,7 +137,11 @@ hashbuildCallback(Relation index,
         return;
     }
  
-   _hash_doinsert(index, itup);
+   /* Either spool the tuple for sorting, or just put it into the index */
+   if (buildstate->spool)
+       _h_spool(itup, buildstate->spool);
+   else
+       _hash_doinsert(index, itup);
  
     buildstate->indtuples += 1;
  
diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c

index ec6f4b390fd944305e908aaebb035e325b733860..d179af0121b32be806dcb86d9a0887f38081f344 100644 (file)
--- a/src/backend/access/hash/hashpage.c
+++ b/src/backend/access/hash/hashpage.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.73 2008/03/15 20:46:31 tgl Exp $
+ *   $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.74 2008/03/16 23:15:08 tgl Exp $
   *
   * NOTES
   *   Postgres hash pages look like ordinary relation pages.  The opaque
@@ -315,13 +315,14 @@ _hash_chgbufaccess(Relation rel,
   *             the initial buckets, and the initial bitmap page.
   *
   * The initial number of buckets is dependent on num_tuples, an estimate
- * of the number of tuples to be loaded into the index initially.
+ * of the number of tuples to be loaded into the index initially.  The
+ * chosen number of buckets is returned.
   *
   * We are fairly cavalier about locking here, since we know that no one else
   * could be accessing this index.  In particular the rule about not holding
   * multiple buffer locks is ignored.
   */
-void
+uint32
  _hash_metapinit(Relation rel, double num_tuples)
  {
     HashMetaPage metap;
@@ -437,11 +438,22 @@ _hash_metapinit(Relation rel, double num_tuples)
     metap->hashm_ovflpoint = log2_num_buckets;
     metap->hashm_firstfree = 0;
  
+   /*
+    * Release buffer lock on the metapage while we initialize buckets.
+    * Otherwise, we'll be in interrupt holdoff and the CHECK_FOR_INTERRUPTS
+    * won't accomplish anything.  It's a bad idea to hold buffer locks
+    * for long intervals in any case, since that can block the bgwriter.
+    */
+   _hash_chgbufaccess(rel, metabuf, HASH_WRITE, HASH_NOLOCK);
+
     /*
      * Initialize the first N buckets
      */
     for (i = 0; i < num_buckets; i++)
     {
+       /* Allow interrupts, in case N is huge */
+       CHECK_FOR_INTERRUPTS();
+
         buf = _hash_getnewbuf(rel, BUCKET_TO_BLKNO(metap, i));
         pg = BufferGetPage(buf);
         pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg);
@@ -453,6 +465,9 @@ _hash_metapinit(Relation rel, double num_tuples)
         _hash_wrtbuf(rel, buf);
     }
  
+   /* Now reacquire buffer lock on metapage */
+   _hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);
+
     /*
      * Initialize first bitmap page
      */
@@ -460,6 +475,8 @@ _hash_metapinit(Relation rel, double num_tuples)
  
     /* all done */
     _hash_wrtbuf(rel, metabuf);
+
+   return num_buckets;
  }
  
  /*
diff --git a/src/backend/access/hash/hashsort.c b/src/backend/access/hash/hashsort.c

new file mode 100644 (file)

index 0000000..71a6568
--- /dev/null
+++ b/src/backend/access/hash/hashsort.c
@@ -0,0 +1,116 @@
+/*-------------------------------------------------------------------------
+ *
+ * hashsort.c
+ *     Sort tuples for insertion into a new hash index.
+ *
+ * When building a very large hash index, we pre-sort the tuples by bucket
+ * number to improve locality of access to the index, and thereby avoid
+ * thrashing.  We use tuplesort.c to sort the given index tuples into order.
+ *
+ * Note: if the number of rows in the table has been underestimated,
+ * bucket splits may occur during the index build.  In that case we'd
+ * be inserting into two or more buckets for each possible masked-off
+ * hash code value.  That's no big problem though, since we'll still have
+ * plenty of locality of access.
+ *
+ *
+ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *   $PostgreSQL: pgsql/src/backend/access/hash/hashsort.c,v 1.1 2008/03/16 23:15:08 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/hash.h"
+#include "miscadmin.h"
+#include "utils/tuplesort.h"
+
+
+/*
+ * Status record for spooling/sorting phase.
+ */
+struct HSpool
+{
+   Tuplesortstate *sortstate;  /* state data for tuplesort.c */
+   Relation    index;
+};
+
+
+/*
+ * create and initialize a spool structure
+ */
+HSpool *
+_h_spoolinit(Relation index, uint32 num_buckets)
+{
+   HSpool     *hspool = (HSpool *) palloc0(sizeof(HSpool));
+   uint32      hash_mask;
+
+   hspool->index = index;
+
+   /*
+    * Determine the bitmask for hash code values.  Since there are currently
+    * num_buckets buckets in the index, the appropriate mask can be computed
+    * as follows.
+    *
+    * Note: at present, the passed-in num_buckets is always a power of 2,
+    * so we could just compute num_buckets - 1.  We prefer not to assume
+    * that here, though.
+    */
+   hash_mask = (((uint32) 1) << _hash_log2(num_buckets)) - 1;
+
+   /*
+    * We size the sort area as maintenance_work_mem rather than work_mem to
+    * speed index creation.  This should be OK since a single backend can't
+    * run multiple index creations in parallel.
+    */
+   hspool->sortstate = tuplesort_begin_index_hash(index,
+                                                  hash_mask,
+                                                  maintenance_work_mem,
+                                                  false);
+
+   return hspool;
+}
+
+/*
+ * clean up a spool structure and its substructures.
+ */
+void
+_h_spooldestroy(HSpool *hspool)
+{
+   tuplesort_end(hspool->sortstate);
+   pfree(hspool);
+}
+
+/*
+ * spool an index entry into the sort file.
+ */
+void
+_h_spool(IndexTuple itup, HSpool *hspool)
+{
+   tuplesort_putindextuple(hspool->sortstate, itup);
+}
+
+/*
+ * given a spool loaded by successive calls to _h_spool,
+ * create an entire index.
+ */
+void
+_h_indexbuild(HSpool *hspool)
+{
+   IndexTuple  itup;
+   bool        should_free;
+
+   tuplesort_performsort(hspool->sortstate);
+
+   while ((itup = tuplesort_getindextuple(hspool->sortstate,
+                                          true, &should_free)) != NULL)
+   {
+       _hash_doinsert(hspool->index, itup);
+       if (should_free)
+           pfree(itup);
+   }
+}
diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c

index 6ecd21150ad5dcb107c1a9edf6329182ced87fe1..96274b6ba3a89b8d9e77e6bde28b9cb76230f5ea 100644 (file)
--- a/src/backend/access/nbtree/nbtsort.c
+++ b/src/backend/access/nbtree/nbtsort.c
@@ -57,7 +57,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.114 2008/01/01 19:45:46 momjian Exp $
+ *   $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.115 2008/03/16 23:15:08 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -158,8 +158,8 @@ _bt_spoolinit(Relation index, bool isunique, bool isdead)
      * work_mem.
      */
     btKbytes = isdead ? work_mem : maintenance_work_mem;
-   btspool->sortstate = tuplesort_begin_index(index, isunique,
-                                              btKbytes, false);
+   btspool->sortstate = tuplesort_begin_index_btree(index, isunique,
+                                                    btKbytes, false);
  
     return btspool;
  }
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c

index da3dd13a09001a6218456766df79b10c34e26843..04782c8e6587a1ba7fd723f45ef9bea43144dfdf 100644 (file)
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -91,7 +91,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/utils/sort/tuplesort.c,v 1.81 2008/01/01 19:45:55 momjian Exp $
+ *   $PostgreSQL: pgsql/src/backend/utils/sort/tuplesort.c,v 1.82 2008/03/16 23:15:08 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -100,6 +100,7 @@
  
  #include 
  
+#include "access/hash.h"
  #include "access/heapam.h"
  #include "access/nbtree.h"
  #include "catalog/pg_amop.h"
@@ -336,12 +337,17 @@ struct Tuplesortstate
  
     /*
      * These variables are specific to the IndexTuple case; they are set by
-    * tuplesort_begin_index and used only by the IndexTuple routines.
+    * tuplesort_begin_index_xxx and used only by the IndexTuple routines.
      */
-   Relation    indexRel;
+   Relation    indexRel;       /* index being built */
+
+   /* These are specific to the index_btree subcase: */
     ScanKey     indexScanKey;
     bool        enforceUnique;  /* complain if we find duplicate tuples */
  
+   /* These are specific to the index_hash subcase: */
+   uint32      hash_mask;      /* mask for sortable part of hash code */
+
     /*
      * These variables are specific to the Datum case; they are set by
      * tuplesort_begin_datum and used only by the DatumTuple routines.
@@ -437,14 +443,17 @@ static void writetup_heap(Tuplesortstate *state, int tapenum,
  static void readtup_heap(Tuplesortstate *state, SortTuple *stup,
              int tapenum, unsigned int len);
  static void reversedirection_heap(Tuplesortstate *state);
-static int comparetup_index(const SortTuple *a, const SortTuple *b,
+static int comparetup_index_btree(const SortTuple *a, const SortTuple *b,
+                Tuplesortstate *state);
+static int comparetup_index_hash(const SortTuple *a, const SortTuple *b,
                  Tuplesortstate *state);
  static void copytup_index(Tuplesortstate *state, SortTuple *stup, void *tup);
  static void writetup_index(Tuplesortstate *state, int tapenum,
                SortTuple *stup);
  static void readtup_index(Tuplesortstate *state, SortTuple *stup,
               int tapenum, unsigned int len);
-static void reversedirection_index(Tuplesortstate *state);
+static void reversedirection_index_btree(Tuplesortstate *state);
+static void reversedirection_index_hash(Tuplesortstate *state);
  static int comparetup_datum(const SortTuple *a, const SortTuple *b,
                  Tuplesortstate *state);
  static void copytup_datum(Tuplesortstate *state, SortTuple *stup, void *tup);
@@ -606,9 +615,9 @@ tuplesort_begin_heap(TupleDesc tupDesc,
  }
  
  Tuplesortstate *
-tuplesort_begin_index(Relation indexRel,
-                     bool enforceUnique,
-                     int workMem, bool randomAccess)
+tuplesort_begin_index_btree(Relation indexRel,
+                           bool enforceUnique,
+                           int workMem, bool randomAccess)
  {
     Tuplesortstate *state = tuplesort_begin_common(workMem, randomAccess);
     MemoryContext oldcontext;
@@ -625,14 +634,13 @@ tuplesort_begin_index(Relation indexRel,
  
     state->nKeys = RelationGetNumberOfAttributes(indexRel);
  
-   state->comparetup = comparetup_index;
+   state->comparetup = comparetup_index_btree;
     state->copytup = copytup_index;
     state->writetup = writetup_index;
     state->readtup = readtup_index;
-   state->reversedirection = reversedirection_index;
+   state->reversedirection = reversedirection_index_btree;
  
     state->indexRel = indexRel;
-   /* see comments below about btree dependence of this code... */
     state->indexScanKey = _bt_mkscankey_nodata(indexRel);
     state->enforceUnique = enforceUnique;
  
@@ -641,6 +649,40 @@ tuplesort_begin_index(Relation indexRel,
     return state;
  }
  
+Tuplesortstate *
+tuplesort_begin_index_hash(Relation indexRel,
+                          uint32 hash_mask,
+                          int workMem, bool randomAccess)
+{
+   Tuplesortstate *state = tuplesort_begin_common(workMem, randomAccess);
+   MemoryContext oldcontext;
+
+   oldcontext = MemoryContextSwitchTo(state->sortcontext);
+
+#ifdef TRACE_SORT
+   if (trace_sort)
+       elog(LOG,
+            "begin index sort: hash_mask = 0x%x, workMem = %d, randomAccess = %c",
+            hash_mask,
+            workMem, randomAccess ? 't' : 'f');
+#endif
+
+   state->nKeys = 1;           /* Only one sort column, the hash code */
+
+   state->comparetup = comparetup_index_hash;
+   state->copytup = copytup_index;
+   state->writetup = writetup_index;
+   state->readtup = readtup_index;
+   state->reversedirection = reversedirection_index_hash;
+
+   state->indexRel = indexRel;
+   state->hash_mask = hash_mask;
+
+   MemoryContextSwitchTo(oldcontext);
+
+   return state;
+}
+
  Tuplesortstate *
  tuplesort_begin_datum(Oid datumType,
                       Oid sortOperator, bool nullsFirstFlag,
@@ -2637,14 +2679,14 @@ reversedirection_heap(Tuplesortstate *state)
  /*
   * Routines specialized for IndexTuple case
   *
- * NOTE: actually, these are specialized for the btree case; it's not
- * clear whether you could use them for a non-btree index. Possibly
- * you'd need to make another set of routines if you needed to sort
- * according to another kind of index.
+ * The btree and hash cases require separate comparison functions, but the
+ * IndexTuple representation is the same so the copy/write/read support
+ * functions can be shared.
   */
  
  static int
-comparetup_index(const SortTuple *a, const SortTuple *b, Tuplesortstate *state)
+comparetup_index_btree(const SortTuple *a, const SortTuple *b,
+                      Tuplesortstate *state)
  {
     /*
      * This is similar to _bt_tuplecompare(), but we have already done the
@@ -2748,6 +2790,62 @@ comparetup_index(const SortTuple *a, const SortTuple *b, Tuplesortstate *state)
     return 0;
  }
  
+static int
+comparetup_index_hash(const SortTuple *a, const SortTuple *b,
+                     Tuplesortstate *state)
+{
+   /*
+    * It's slightly annoying to redo the hash function each time, although
+    * most hash functions ought to be cheap.  Is it worth having a variant
+    * tuple storage format so we can store the hash code?
+    */
+   uint32      hash1;
+   uint32      hash2;
+   IndexTuple  tuple1;
+   IndexTuple  tuple2;
+
+   /* Allow interrupting long sorts */
+   CHECK_FOR_INTERRUPTS();
+
+   /* Compute hash codes and mask off bits we don't want to sort by */
+   Assert(!a->isnull1);
+   Assert(!b->isnull1);
+
+   hash1 = _hash_datum2hashkey(state->indexRel, a->datum1) & state->hash_mask;
+   hash2 = _hash_datum2hashkey(state->indexRel, b->datum1) & state->hash_mask;
+
+   if (hash1 > hash2)
+       return 1;
+   else if (hash1 < hash2)
+       return -1;
+
+   /*
+    * If hash values are equal, we sort on ItemPointer.  This does not affect
+    * validity of the finished index, but it offers cheap insurance against
+    * performance problems with bad qsort implementations that have trouble
+    * with large numbers of equal keys.
+    */
+   tuple1 = (IndexTuple) a->tuple;
+   tuple2 = (IndexTuple) b->tuple;
+
+   {
+       BlockNumber blk1 = ItemPointerGetBlockNumber(&tuple1->t_tid);
+       BlockNumber blk2 = ItemPointerGetBlockNumber(&tuple2->t_tid);
+
+       if (blk1 != blk2)
+           return (blk1 < blk2) ? -1 : 1;
+   }
+   {
+       OffsetNumber pos1 = ItemPointerGetOffsetNumber(&tuple1->t_tid);
+       OffsetNumber pos2 = ItemPointerGetOffsetNumber(&tuple2->t_tid);
+
+       if (pos1 != pos2)
+           return (pos1 < pos2) ? -1 : 1;
+   }
+
+   return 0;
+}
+
  static void
  copytup_index(Tuplesortstate *state, SortTuple *stup, void *tup)
  {
@@ -2810,7 +2908,7 @@ readtup_index(Tuplesortstate *state, SortTuple *stup,
  }
  
  static void
-reversedirection_index(Tuplesortstate *state)
+reversedirection_index_btree(Tuplesortstate *state)
  {
     ScanKey     scanKey = state->indexScanKey;
     int         nkey;
@@ -2821,6 +2919,13 @@ reversedirection_index(Tuplesortstate *state)
     }
  }
  
+static void
+reversedirection_index_hash(Tuplesortstate *state)
+{
+   /* We don't support reversing direction in a hash index sort */
+   elog(ERROR, "reversedirection_index_hash is not implemented");
+}
+
  
  /*
   * Routines specialized for DatumTuple case
diff --git a/src/include/access/hash.h b/src/include/access/hash.h

index fd7b68e9aebeb18382e99628a041d66a78a0e3c2..34275ad9d60b68fc9933d5fe4d565aac83287284 100644 (file)
--- a/src/include/access/hash.h
+++ b/src/include/access/hash.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/access/hash.h,v 1.85 2008/03/15 20:46:31 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/hash.h,v 1.86 2008/03/16 23:15:08 tgl Exp $
   *
   * NOTES
   *     modeled after Margo Seltzer's hash implementation for unix.
@@ -298,7 +298,7 @@ extern void _hash_dropbuf(Relation rel, Buffer buf);
  extern void _hash_wrtbuf(Relation rel, Buffer buf);
  extern void _hash_chgbufaccess(Relation rel, Buffer buf, int from_access,
                    int to_access);
-extern void _hash_metapinit(Relation rel, double num_tuples);
+extern uint32 _hash_metapinit(Relation rel, double num_tuples);
  extern void _hash_pageinit(Page page, Size size);
  extern void _hash_expandtable(Relation rel, Buffer metabuf);
  
@@ -313,6 +313,14 @@ extern bool _hash_next(IndexScanDesc scan, ScanDirection dir);
  extern bool _hash_first(IndexScanDesc scan, ScanDirection dir);
  extern bool _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir);
  
+/* hashsort.c */
+typedef struct HSpool HSpool;  /* opaque struct in hashsort.c */
+
+extern HSpool *_h_spoolinit(Relation index, uint32 num_buckets);
+extern void _h_spooldestroy(HSpool *hspool);
+extern void _h_spool(IndexTuple itup, HSpool *hspool);
+extern void _h_indexbuild(HSpool *hspool);
+
  /* hashutil.c */
  extern bool _hash_checkqual(IndexScanDesc scan, IndexTuple itup);
  extern uint32 _hash_datum2hashkey(Relation rel, Datum key);
diff --git a/src/include/utils/tuplesort.h b/src/include/utils/tuplesort.h

index f552de91dba45fa277f3ba5ea8f5fc91818d8f7f..7bd497092ed47d7d3484ff2862c01161da0f024d 100644 (file)
--- a/src/include/utils/tuplesort.h
+++ b/src/include/utils/tuplesort.h
@@ -13,7 +13,7 @@
   * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/utils/tuplesort.h,v 1.28 2008/01/01 19:45:59 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/utils/tuplesort.h,v 1.29 2008/03/16 23:15:08 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -41,16 +41,24 @@ typedef struct Tuplesortstate Tuplesortstate;
   * rather than forming actual HeapTuples (which'd have to be converted to
   * MinimalTuples).
   *
- * Yet a third slightly different interface supports sorting bare Datums.
+ * The IndexTuple case is itself broken into two subcases, one for btree
+ * indexes and one for hash indexes; the latter variant actually sorts
+ * the tuples by hash code.  The API is the same except for the "begin"
+ * routine.
+ *
+ * Yet another slightly different interface supports sorting bare Datums.
   */
  
  extern Tuplesortstate *tuplesort_begin_heap(TupleDesc tupDesc,
                      int nkeys, AttrNumber *attNums,
                      Oid *sortOperators, bool *nullsFirstFlags,
                      int workMem, bool randomAccess);
-extern Tuplesortstate *tuplesort_begin_index(Relation indexRel,
-                     bool enforceUnique,
-                     int workMem, bool randomAccess);
+extern Tuplesortstate *tuplesort_begin_index_btree(Relation indexRel,
+                           bool enforceUnique,
+                           int workMem, bool randomAccess);
+extern Tuplesortstate *tuplesort_begin_index_hash(Relation indexRel,
+                           uint32 hash_mask,
+                           int workMem, bool randomAccess);
  extern Tuplesortstate *tuplesort_begin_datum(Oid datumType,
                       Oid sortOperator, bool nullsFirstFlag,
                       int workMem, bool randomAccess);
author	Tom Lane
	Sun, 16 Mar 2008 23:15:08 +0000 (23:15 +0000)
committer	Tom Lane
	Sun, 16 Mar 2008 23:15:08 +0000 (23:15 +0000)
src/backend/access/hash/Makefile		patch \| blob \| blame \| history
src/backend/access/hash/hash.c		patch \| blob \| blame \| history
src/backend/access/hash/hashpage.c		patch \| blob \| blame \| history
src/backend/access/hash/hashsort.c	[new file with mode: 0644]	patch \| blob
src/backend/access/nbtree/nbtsort.c		patch \| blob \| blame \| history
src/backend/utils/sort/tuplesort.c		patch \| blob \| blame \| history
src/include/access/hash.h		patch \| blob \| blame \| history
src/include/utils/tuplesort.h		patch \| blob \| blame \| history