Improve dynahash.c's API so that caller can specify the comparison function

author Tom Lane

Tue, 19 Aug 2003 01:13:41 +0000 (01:13 +0000)

committer Tom Lane

Tue, 19 Aug 2003 01:13:41 +0000 (01:13 +0000)
author Tom Lane
Tue, 19 Aug 2003 01:13:41 +0000 (01:13 +0000)
committer Tom Lane
Tue, 19 Aug 2003 01:13:41 +0000 (01:13 +0000)
diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c

index 100e7a1c375caa47a6b724a518bc872c3b5e83fc..d293bb7ff29a2e107f68f8ba98b6ec5c04710463 100644 (file)
--- a/src/backend/executor/execGrouping.c
+++ b/src/backend/executor/execGrouping.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/executor/execGrouping.c,v 1.7 2003/08/08 21:41:34 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/executor/execGrouping.c,v 1.8 2003/08/19 01:13:40 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -23,6 +23,13 @@
  #include "utils/syscache.h"
  
  
+static TupleHashTable CurTupleHashTable = NULL;
+
+static uint32 TupleHashTableHash(const void *key, Size keysize);
+static int TupleHashTableMatch(const void *key1, const void *key2,
+                               Size keysize);
+
+
  /*****************************************************************************
   *     Utility routines for grouping tuples together
   *****************************************************************************/
@@ -272,7 +279,7 @@ execTuplesHashPrepare(TupleDesc tupdesc,
   * numCols, keyColIdx: identify the tuple fields to use as lookup key
   * eqfunctions: equality comparison functions to use
   * hashfunctions: datatype-specific hashing functions to use
- * nbuckets: number of buckets to make
+ * nbuckets: initial estimate of hashtable size
   * entrysize: size of each entry (at least sizeof(TupleHashEntryData))
   * tablecxt: memory context in which to store table and table entries
   * tempcxt: short-lived context for evaluation hash and comparison functions
@@ -290,14 +297,13 @@ BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
                     MemoryContext tablecxt, MemoryContext tempcxt)
  {
     TupleHashTable hashtable;
-   Size        tabsize;
+   HASHCTL     hash_ctl;
  
     Assert(nbuckets > 0);
     Assert(entrysize >= sizeof(TupleHashEntryData));
  
-   tabsize = sizeof(TupleHashTableData) +
-       (nbuckets - 1) *sizeof(TupleHashEntry);
-   hashtable = (TupleHashTable) MemoryContextAllocZero(tablecxt, tabsize);
+   hashtable = (TupleHashTable) MemoryContextAlloc(tablecxt,
+                                               sizeof(TupleHashTableData));
  
     hashtable->numCols = numCols;
     hashtable->keyColIdx = keyColIdx;
@@ -306,7 +312,20 @@ BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
     hashtable->tablecxt = tablecxt;
     hashtable->tempcxt = tempcxt;
     hashtable->entrysize = entrysize;
-   hashtable->nbuckets = nbuckets;
+
+   MemSet(&hash_ctl, 0, sizeof(hash_ctl));
+   hash_ctl.keysize = sizeof(TupleHashEntryData);
+   hash_ctl.entrysize = entrysize;
+   hash_ctl.hash = TupleHashTableHash;
+   hash_ctl.match = TupleHashTableMatch;
+   hash_ctl.hcxt = tablecxt;
+   hashtable->hashtab = hash_create("TupleHashTable", (long) nbuckets,
+                                    &hash_ctl,
+                   HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT);
+   if (hashtable->hashtab == NULL)
+       ereport(ERROR,
+               (errcode(ERRCODE_OUT_OF_MEMORY),
+                errmsg("out of memory")));
  
     return hashtable;
  }
@@ -327,19 +346,93 @@ TupleHashEntry
  LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
                      bool *isnew)
  {
-   int         numCols = hashtable->numCols;
-   AttrNumber *keyColIdx = hashtable->keyColIdx;
     HeapTuple   tuple = slot->val;
     TupleDesc   tupdesc = slot->ttc_tupleDescriptor;
-   uint32      hashkey = 0;
-   int         i;
-   int         bucketno;
     TupleHashEntry entry;
     MemoryContext oldContext;
+   TupleHashTable saveCurHT;
+   bool        found;
  
-   /* Need to run the hash function in short-lived context */
+   /* Need to run the hash functions in short-lived context */
     oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
  
+   /*
+    * Set up data needed by hash and match functions
+    *
+    * We save and restore CurTupleHashTable just in case someone manages
+    * to invoke this code re-entrantly.
+    */
+   hashtable->tupdesc = tupdesc;
+   saveCurHT = CurTupleHashTable;
+   CurTupleHashTable = hashtable;
+
+   /* Search the hash table */
+   entry = (TupleHashEntry) hash_search(hashtable->hashtab,
+                                        &tuple,
+                                        isnew ? HASH_ENTER : HASH_FIND,
+                                        &found);
+
+   if (isnew)
+   {
+       if (found)
+       {
+           /* found pre-existing entry */
+           *isnew = false;
+       }
+       else
+       {
+           /* created new entry ... we hope */
+           if (entry == NULL)
+               ereport(ERROR,
+                       (errcode(ERRCODE_OUT_OF_MEMORY),
+                        errmsg("out of memory")));
+
+           /*
+            * Zero any caller-requested space in the entry.  (This zaps
+            * the "key data" dynahash.c copied into the new entry, but
+            * we don't care since we're about to overwrite it anyway.)
+            */
+           MemSet(entry, 0, hashtable->entrysize);
+
+           /* Copy the first tuple into the table context */
+           MemoryContextSwitchTo(hashtable->tablecxt);
+           entry->firstTuple = heap_copytuple(tuple);
+
+           *isnew = true;
+       }
+   }
+
+   CurTupleHashTable = saveCurHT;
+
+   MemoryContextSwitchTo(oldContext);
+
+   return entry;
+}
+
+/*
+ * Compute the hash value for a tuple
+ *
+ * The passed-in key is a pointer to a HeapTuple pointer -- this is either
+ * the firstTuple field of a TupleHashEntry struct, or the key value passed
+ * to hash_search.  We ignore the keysize.
+ *
+ * CurTupleHashTable must be set before calling this, since dynahash.c
+ * doesn't provide any API that would let us get at the hashtable otherwise.
+ *
+ * Also, the caller must select an appropriate memory context for running
+ * the hash functions.  (dynahash.c doesn't change CurrentMemoryContext.)
+ */
+static uint32
+TupleHashTableHash(const void *key, Size keysize)
+{
+   HeapTuple   tuple = *(const HeapTuple *) key;
+   TupleHashTable hashtable = CurTupleHashTable;
+   int         numCols = hashtable->numCols;
+   AttrNumber *keyColIdx = hashtable->keyColIdx;
+   TupleDesc   tupdesc = hashtable->tupdesc;
+   uint32      hashkey = 0;
+   int         i;
+
     for (i = 0; i < numCols; i++)
     {
         AttrNumber  att = keyColIdx[i];
@@ -360,72 +453,36 @@ LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
             hashkey ^= hkey;
         }
     }
-   bucketno = hashkey % (uint32) hashtable->nbuckets;
-
-   for (entry = hashtable->buckets[bucketno];
-        entry != NULL;
-        entry = entry->next)
-   {
-       /* Quick check using hashkey */
-       if (entry->hashkey != hashkey)
-           continue;
-       if (execTuplesMatch(entry->firstTuple,
-                           tuple,
-                           tupdesc,
-                           numCols, keyColIdx,
-                           hashtable->eqfunctions,
-                           hashtable->tempcxt))
-       {
-           if (isnew)
-               *isnew = false;
-           MemoryContextSwitchTo(oldContext);
-           return entry;
-       }
-   }
-
-   /* Not there, so build a new one if requested */
-   if (isnew)
-   {
-       MemoryContextSwitchTo(hashtable->tablecxt);
-
-       entry = (TupleHashEntry) palloc0(hashtable->entrysize);
-
-       entry->hashkey = hashkey;
-       entry->firstTuple = heap_copytuple(tuple);
-
-       entry->next = hashtable->buckets[bucketno];
-       hashtable->buckets[bucketno] = entry;
-
-       *isnew = true;
-   }
-
-   MemoryContextSwitchTo(oldContext);
  
-   return entry;
+   return hashkey;
  }
  
  /*
- * Walk through all the entries of a hash table, in no special order.
- * Returns NULL when no more entries remain.
+ * See whether two tuples (presumably of the same hash value) match
+ *
+ * As above, the passed pointers are pointers to HeapTuple pointers.
   *
- * Iterator state must be initialized with ResetTupleHashIterator() macro.
+ * CurTupleHashTable must be set before calling this, since dynahash.c
+ * doesn't provide any API that would let us get at the hashtable otherwise.
+ *
+ * Also, the caller must select an appropriate memory context for running
+ * the compare functions.  (dynahash.c doesn't change CurrentMemoryContext.)
   */
-TupleHashEntry
-ScanTupleHashTable(TupleHashTable hashtable, TupleHashIterator *state)
+static int
+TupleHashTableMatch(const void *key1, const void *key2, Size keysize)
  {
-   TupleHashEntry entry;
-
-   entry = state->next_entry;
-   while (entry == NULL)
-   {
-       if (state->next_bucket >= hashtable->nbuckets)
-       {
-           /* No more entries in hashtable, so done */
-           return NULL;
-       }
-       entry = hashtable->buckets[state->next_bucket++];
-   }
-   state->next_entry = entry->next;
-
-   return entry;
+   HeapTuple   tuple1 = *(const HeapTuple *) key1;
+   HeapTuple   tuple2 = *(const HeapTuple *) key2;
+   TupleHashTable hashtable = CurTupleHashTable;
+
+   if (execTuplesMatch(tuple1,
+                       tuple2,
+                       hashtable->tupdesc,
+                       hashtable->numCols,
+                       hashtable->keyColIdx,
+                       hashtable->eqfunctions,
+                       hashtable->tempcxt))
+       return 0;
+   else
+       return 1;
  }
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c

index d8fb9a9565da6919d8a9adf07ebbc3d5fe564da9..d9adb09dafbe05ae10adf618cb63a969c478c667 100644 (file)
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -45,7 +45,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.115 2003/08/08 21:41:41 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.116 2003/08/19 01:13:40 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -905,7 +905,7 @@ agg_fill_hash_table(AggState *aggstate)
  
     aggstate->table_filled = true;
     /* Initialize to walk the hash table */
-   ResetTupleHashIterator(&aggstate->hashiter);
+   ResetTupleHashIterator(aggstate->hashtable, &aggstate->hashiter);
  }
  
  /*
@@ -920,7 +920,6 @@ agg_retrieve_hash_table(AggState *aggstate)
     bool       *aggnulls;
     AggStatePerAgg peragg;
     AggStatePerGroup pergroup;
-   TupleHashTable hashtable;
     AggHashEntry entry;
     TupleTableSlot *firstSlot;
     TupleTableSlot *resultSlot;
@@ -935,7 +934,6 @@ agg_retrieve_hash_table(AggState *aggstate)
     aggnulls = econtext->ecxt_aggnulls;
     projInfo = aggstate->ss.ps.ps_ProjInfo;
     peragg = aggstate->peragg;
-   hashtable = aggstate->hashtable;
     firstSlot = aggstate->ss.ss_ScanTupleSlot;
  
     /*
@@ -950,8 +948,7 @@ agg_retrieve_hash_table(AggState *aggstate)
         /*
          * Find the next entry in the hash table
          */
-       entry = (AggHashEntry) ScanTupleHashTable(hashtable,
-                                                 &aggstate->hashiter);
+       entry = (AggHashEntry) ScanTupleHashTable(&aggstate->hashiter);
         if (entry == NULL)
         {
             /* No more entries in hashtable, so done */
@@ -1440,7 +1437,7 @@ ExecReScanAgg(AggState *node, ExprContext *exprCtxt)
          */
         if (((PlanState *) node)->lefttree->chgParam == NULL)
         {
-           ResetTupleHashIterator(&node->hashiter);
+           ResetTupleHashIterator(node->hashtable, &node->hashiter);
             return;
         }
     }
diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c

index 7530be263f32b0d286e8bfb5f9a43d368ae6550c..23b0cd3bf3d6c9499698cadfeea1b78dc33ab2c3 100644 (file)
--- a/src/backend/executor/nodeSubplan.c
+++ b/src/backend/executor/nodeSubplan.c
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/executor/nodeSubplan.c,v 1.54 2003/08/08 21:41:42 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/executor/nodeSubplan.c,v 1.55 2003/08/19 01:13:40 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -627,8 +627,8 @@ findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot)
     TupleHashIterator hashiter;
     TupleHashEntry entry;
  
-   ResetTupleHashIterator(&hashiter);
-   while ((entry = ScanTupleHashTable(hashtable, &hashiter)) != NULL)
+   ResetTupleHashIterator(hashtable, &hashiter);
+   while ((entry = ScanTupleHashTable(&hashiter)) != NULL)
     {
         if (!execTuplesUnequal(entry->firstTuple,
                                tuple,
diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c

index 7090d000587c7fb704106f023bddedc4b405db51..c6f9b0236975f153b3cdea846313e73161d6a1e2 100644 (file)
--- a/src/backend/utils/hash/dynahash.c
+++ b/src/backend/utils/hash/dynahash.c
@@ -9,7 +9,7 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/utils/hash/dynahash.c,v 1.47 2003/08/04 02:40:06 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/utils/hash/dynahash.c,v 1.48 2003/08/19 01:13:41 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -44,7 +44,6 @@
  
  #include "postgres.h"
  
-
  #include "utils/dynahash.h"
  #include "utils/hsearch.h"
  #include "utils/memutils.h"
@@ -63,7 +62,6 @@
   * Private function prototypes
   */
  static void *DynaHashAlloc(Size size);
-static uint32 call_hash(HTAB *hashp, void *k);
  static HASHSEGMENT seg_alloc(HTAB *hashp);
  static bool element_alloc(HTAB *hashp);
  static bool dir_realloc(HTAB *hashp);
@@ -133,6 +131,19 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
     else
         hashp->hash = string_hash;      /* default hash function */
  
+   /*
+    * If you don't specify a match function, it defaults to strncmp() if
+    * you used string_hash (either explicitly or by default) and to
+    * memcmp() otherwise.  (Prior to PostgreSQL 7.4, memcmp() was always
+    * used.)
+    */
+   if (flags & HASH_COMPARE)
+       hashp->match = info->match;
+   else if (hashp->hash == string_hash)
+       hashp->match = (HashCompareFunc) strncmp;
+   else
+       hashp->match = memcmp;
+
     if (flags & HASH_SHARED_MEM)
     {
         /*
@@ -155,7 +166,7 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
         hashp->hctl = NULL;
         hashp->dir = NULL;
         hashp->alloc = MEM_ALLOC;
-       hashp->hcxt = DynaHashCxt;
+       hashp->hcxt = CurrentDynaHashCxt;
         hashp->isshared = false;
     }
  
@@ -207,26 +218,13 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
         hashp->alloc = info->alloc;
     else
     {
-       if (flags & HASH_CONTEXT)
-       {
-           /* hash table structures live in child of given context */
-           CurrentDynaHashCxt = AllocSetContextCreate(info->hcxt,
-                                                      "DynaHashTable",
-                                               ALLOCSET_DEFAULT_MINSIZE,
-                                              ALLOCSET_DEFAULT_INITSIZE,
-                                              ALLOCSET_DEFAULT_MAXSIZE);
-           hashp->hcxt = CurrentDynaHashCxt;
-       }
-       else
-       {
-           /* hash table structures live in child of DynaHashCxt */
-           CurrentDynaHashCxt = AllocSetContextCreate(DynaHashCxt,
-                                                      "DynaHashTable",
-                                               ALLOCSET_DEFAULT_MINSIZE,
-                                              ALLOCSET_DEFAULT_INITSIZE,
-                                              ALLOCSET_DEFAULT_MAXSIZE);
-           hashp->hcxt = CurrentDynaHashCxt;
-       }
+       /* remaining hash table structures live in child of given context */
+       hashp->hcxt = AllocSetContextCreate(CurrentDynaHashCxt,
+                                           "DynaHashTable",
+                                           ALLOCSET_DEFAULT_MINSIZE,
+                                           ALLOCSET_DEFAULT_INITSIZE,
+                                           ALLOCSET_DEFAULT_MAXSIZE);
+       CurrentDynaHashCxt = hashp->hcxt;
     }
  
     if (!init_htab(hashp, nelem))
@@ -351,7 +349,7 @@ init_htab(HTAB *hashp, long nelem)
   * NB: assumes that all hash structure parameters have default values!
   */
  long
-hash_estimate_size(long num_entries, long entrysize)
+hash_estimate_size(long num_entries, Size entrysize)
  {
     long        size = 0;
     long        nBuckets,
@@ -447,7 +445,6 @@ void
  hash_stats(const char *where, HTAB *hashp)
  {
  #if HASH_STATISTICS
-
     fprintf(stderr, "%s: this HTAB -- accesses %ld collisions %ld\n",
             where, hashp->hctl->accesses, hashp->hctl->collisions);
  
@@ -459,19 +456,16 @@ hash_stats(const char *where, HTAB *hashp)
     fprintf(stderr, "hash_stats: total expansions %ld\n",
             hash_expansions);
  #endif
-
  }
  
  /*******************************SEARCH ROUTINES *****************************/
  
-static uint32
-call_hash(HTAB *hashp, void *k)
-{
-   HASHHDR    *hctl = hashp->hctl;
-   uint32      hash_val,
-               bucket;
  
-   hash_val = hashp->hash(k, (int) hctl->keysize);
+/* Convert a hash value to a bucket number */
+static inline uint32
+calc_bucket(HASHHDR *hctl, uint32 hash_val)
+{
+   uint32      bucket;
  
     bucket = hash_val & hctl->high_mask;
     if (bucket > hctl->max_bucket)
@@ -506,11 +500,12 @@ call_hash(HTAB *hashp, void *k)
   */
  void *
  hash_search(HTAB *hashp,
-           void *keyPtr,
+           const void *keyPtr,
             HASHACTION action,
             bool *foundPtr)
  {
     HASHHDR    *hctl = hashp->hctl;
+   uint32      hashvalue = 0;
     uint32      bucket;
     long        segment_num;
     long        segment_ndx;
@@ -545,7 +540,12 @@ hash_search(HTAB *hashp,
     }
     else
     {
-       bucket = call_hash(hashp, keyPtr);
+       HashCompareFunc match;
+       Size        keysize = hctl->keysize;
+
+       hashvalue = hashp->hash(keyPtr, keysize);
+       bucket = calc_bucket(hctl, hashvalue);
+
         segment_num = bucket >> hctl->sshift;
         segment_ndx = MOD(bucket, hctl->ssize);
  
@@ -560,9 +560,11 @@ hash_search(HTAB *hashp,
         /*
          * Follow collision chain looking for matching key
          */
+       match = hashp->match;   /* save one fetch in inner loop */
         while (currBucket != NULL)
         {
-           if (memcmp(ELEMENTKEY(currBucket), keyPtr, hctl->keysize) == 0)
+           if (currBucket->hashvalue == hashvalue &&
+               match(ELEMENTKEY(currBucket), keyPtr, keysize) == 0)
                 break;
             prevBucketPtr = &(currBucket->link);
             currBucket = *prevBucketPtr;
@@ -641,6 +643,7 @@ hash_search(HTAB *hashp,
             currBucket->link = NULL;
  
             /* copy key into record */
+           currBucket->hashvalue = hashvalue;
             memcpy(ELEMENTKEY(currBucket), keyPtr, hctl->keysize);
  
             /* caller is expected to fill the data field on return */
@@ -802,7 +805,7 @@ expand_table(HTAB *hashp)
  
     /*
      * Relocate records to the new bucket.  NOTE: because of the way the
-    * hash masking is done in call_hash, only one old bucket can need to
+    * hash masking is done in calc_bucket, only one old bucket can need to
      * be split at this point.  With a different way of reducing the hash
      * value, that might not be true!
      */
@@ -820,8 +823,7 @@ expand_table(HTAB *hashp)
          currElement = nextElement)
     {
         nextElement = currElement->link;
-       if ((long) call_hash(hashp, (void *) ELEMENTKEY(currElement))
-           == old_bucket)
+       if ((long) calc_bucket(hctl, currElement->hashvalue) == old_bucket)
         {
             *oldlink = currElement;
             oldlink = &currElement->link;
diff --git a/src/backend/utils/hash/hashfn.c b/src/backend/utils/hash/hashfn.c

index 835bd007a9746a0ea6ea689f42a1f4778f9a8fd1..3f7a0089075fa1243afe30500dc33940e9d1d211 100644 (file)
--- a/src/backend/utils/hash/hashfn.c
+++ b/src/backend/utils/hash/hashfn.c
@@ -9,7 +9,7 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/utils/hash/hashfn.c,v 1.18 2003/08/04 02:40:06 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/utils/hash/hashfn.c,v 1.19 2003/08/19 01:13:41 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -22,24 +22,21 @@
  /*
   * string_hash: hash function for keys that are null-terminated strings.
   *
- * NOTE: since dynahash.c backs this up with a fixed-length memcmp(),
- * the key must actually be zero-padded to the specified maximum length
- * to work correctly.  However, if it is known that nothing after the
- * first zero byte is interesting, this is the right hash function to use.
- *
   * NOTE: this is the default hash function if none is specified.
   */
  uint32
-string_hash(void *key, int keysize)
+string_hash(const void *key, Size keysize)
  {
-   return DatumGetUInt32(hash_any((unsigned char *) key, strlen((char *) key)));
+   return DatumGetUInt32(hash_any((const unsigned char *) key,
+                                  (int) strlen((const char *) key)));
  }
  
  /*
   * tag_hash: hash function for fixed-size tag values
   */
  uint32
-tag_hash(void *key, int keysize)
+tag_hash(const void *key, Size keysize)
  {
-   return DatumGetUInt32(hash_any((unsigned char *) key, keysize));
+   return DatumGetUInt32(hash_any((const unsigned char *) key,
+                                  (int) keysize));
  }
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h

index af2f123d2d6f329e30cf9c1895e2a53227dd9095..88449034feec56d90ff0c0294c8443fab3c9b6e8 100644 (file)
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: executor.h,v 1.99 2003/08/08 21:42:44 momjian Exp $
+ * $Id: executor.h,v 1.100 2003/08/19 01:13:41 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -71,8 +71,6 @@ extern TupleHashTable BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
  extern TupleHashEntry LookupTupleHashEntry(TupleHashTable hashtable,
                      TupleTableSlot *slot,
                      bool *isnew);
-extern TupleHashEntry ScanTupleHashTable(TupleHashTable hashtable,
-                  TupleHashIterator *state);
  
  /*
   * prototypes from functions in execJunk.c
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h

index 3f163b8fdaa9aafa574638c4293c4fb3fe59f7fc..8d180009bfd97cb3f2ce34a123fee8de312f9934 100644 (file)
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: execnodes.h,v 1.103 2003/08/08 21:42:47 momjian Exp $
+ * $Id: execnodes.h,v 1.104 2003/08/19 01:13:41 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -21,6 +21,7 @@
  #include "nodes/bitmapset.h"
  #include "nodes/params.h"
  #include "nodes/plannodes.h"
+#include "utils/hsearch.h"
  #include "utils/tuplestore.h"
  
  
@@ -344,14 +345,14 @@ typedef struct TupleHashTableData *TupleHashTable;
  
  typedef struct TupleHashEntryData
  {
-   TupleHashEntry next;        /* next entry in same hash bucket */
-   uint32      hashkey;        /* exact hash key of this entry */
+   /* firstTuple must be the first field in this struct! */
     HeapTuple   firstTuple;     /* copy of first tuple in this group */
     /* there may be additional data beyond the end of this struct */
  } TupleHashEntryData;          /* VARIABLE LENGTH STRUCT */
  
  typedef struct TupleHashTableData
  {
+   HTAB       *hashtab;        /* underlying dynahash table */
     int         numCols;        /* number of columns in lookup key */
     AttrNumber *keyColIdx;      /* attr numbers of key columns */
     FmgrInfo   *eqfunctions;    /* lookup data for comparison functions */
@@ -359,19 +360,15 @@ typedef struct TupleHashTableData
     MemoryContext tablecxt;     /* memory context containing table */
     MemoryContext tempcxt;      /* context for function evaluations */
     Size        entrysize;      /* actual size to make each hash entry */
-   int         nbuckets;       /* number of buckets in hash table */
-   TupleHashEntry buckets[1];  /* VARIABLE LENGTH ARRAY */
-} TupleHashTableData;          /* VARIABLE LENGTH STRUCT */
+   TupleDesc   tupdesc;        /* tuple descriptor */
+} TupleHashTableData;
  
-typedef struct
-{
-   TupleHashEntry next_entry;  /* next entry in current chain */
-   int         next_bucket;    /* next chain */
-} TupleHashIterator;
+typedef HASH_SEQ_STATUS TupleHashIterator;
  
-#define ResetTupleHashIterator(iter) \
-   ((iter)->next_entry = NULL, \
-    (iter)->next_bucket = 0)
+#define ResetTupleHashIterator(htable, iter) \
+   hash_seq_init(iter, (htable)->hashtab)
+#define ScanTupleHashTable(iter) \
+   ((TupleHashEntry) hash_seq_search(iter))
  
  
  /* ----------------------------------------------------------------
diff --git a/src/include/utils/hsearch.h b/src/include/utils/hsearch.h

index 905268badc6199e7cee00cf20bf93553daa4bf81..05d26e9a15092b95965fe1fe774a0bdd0812b8a8 100644 (file)
--- a/src/include/utils/hsearch.h
+++ b/src/include/utils/hsearch.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: hsearch.h,v 1.28 2003/08/04 02:40:15 momjian Exp $
+ * $Id: hsearch.h,v 1.29 2003/08/19 01:13:41 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -15,6 +15,23 @@
  #define HSEARCH_H
  
  
+/*
+ * Hash and comparison functions must have these signatures.  Comparison
+ * functions return zero for match, nonzero for no match.  (The comparison
+ * function definition is designed to allow memcmp() and strncmp() to be
+ * used directly as key comparison functions.)
+ */
+typedef uint32 (*HashValueFunc) (const void *key, Size keysize);
+typedef int (*HashCompareFunc) (const void *key1, const void *key2,
+                               Size keysize);
+
+/*
+ * Space allocation function for a hashtable --- designed to match malloc().
+ * Note: there is no free function API; can't destroy a hashtable unless you
+ * use the default allocator.
+ */
+typedef void *(*HashAllocFunc) (Size request);
+
  /*
   * Constants
   *
@@ -44,6 +61,7 @@
  typedef struct HASHELEMENT
  {
     struct HASHELEMENT *link;   /* link to next entry in same bucket */
+   uint32  hashvalue;          /* hash function result for this entry */
  } HASHELEMENT;
  
  /* A hash bucket is a linked list of HASHELEMENTs */
@@ -64,8 +82,8 @@ typedef struct HASHHDR
     long        ffactor;        /* Fill factor */
     long        nentries;       /* Number of entries in hash table */
     long        nsegs;          /* Number of allocated segments */
-   long        keysize;        /* hash key length in bytes */
-   long        entrysize;      /* total user element size in bytes */
+   Size        keysize;        /* hash key length in bytes */
+   Size        entrysize;      /* total user element size in bytes */
     long        max_dsize;      /* 'dsize' limit if directory is fixed
                                  * size */
     HASHELEMENT *freeList;      /* linked list of free elements */
@@ -83,8 +101,9 @@ typedef struct HTAB
  {
     HASHHDR    *hctl;           /* shared control information */
     HASHSEGMENT *dir;           /* directory of segment starts */
-   uint32      (*hash) (void *key, int keysize);       /* Hash Function */
-   void       *(*alloc) (Size);    /* memory allocator */
+   HashValueFunc hash;         /* hash function */
+   HashCompareFunc match;      /* key comparison function */
+   HashAllocFunc alloc;        /* memory allocator */
     MemoryContext hcxt;         /* memory context if default allocator
                                  * used */
     char       *tabname;        /* table name (for error messages) */
@@ -97,28 +116,30 @@ typedef struct HASHCTL
  {
     long        ssize;          /* Segment Size */
     long        dsize;          /* (initial) Directory Size */
-   long        ffactor;        /* Fill factor */
-   uint32      (*hash) (void *key, int keysize);       /* Hash Function */
-   long        keysize;        /* hash key length in bytes */
-   long        entrysize;      /* total user element size in bytes */
     long        max_dsize;      /* limit to dsize if directory size is
                                  * limited */
-   void       *(*alloc) (Size);    /* memory allocation function */
+   long        ffactor;        /* Fill factor */
+   Size        keysize;        /* hash key length in bytes */
+   Size        entrysize;      /* total user element size in bytes */
+   HashValueFunc hash;         /* hash function */
+   HashCompareFunc match;      /* key comparison function */
+   HashAllocFunc alloc;        /* memory allocator */
     HASHSEGMENT *dir;           /* directory of segment starts */
     HASHHDR    *hctl;           /* location of header in shared mem */
     MemoryContext hcxt;         /* memory context to use for allocations */
  } HASHCTL;
  
  /* Flags to indicate which parameters are supplied */
-#define HASH_SEGMENT   0x002   /* Setting segment size */
-#define HASH_DIRSIZE   0x004   /* Setting directory size */
-#define HASH_FFACTOR   0x008   /* Setting fill factor */
+#define HASH_SEGMENT   0x002   /* Set segment size */
+#define HASH_DIRSIZE   0x004   /* Set directory size */
+#define HASH_FFACTOR   0x008   /* Set fill factor */
  #define HASH_FUNCTION  0x010   /* Set user defined hash function */
-#define HASH_ELEM      0x020   /* Setting key/entry size */
-#define HASH_SHARED_MEM 0x040  /* Setting shared mem const */
+#define HASH_ELEM      0x020   /* Set key/entry size */
+#define HASH_SHARED_MEM 0x040  /* Set shared mem const */
  #define HASH_ATTACH        0x080   /* Do not initialize hctl */
-#define HASH_ALLOC     0x100   /* Setting memory allocator */
-#define HASH_CONTEXT   0x200   /* Setting explicit memory context */
+#define HASH_ALLOC     0x100   /* Set memory allocator */
+#define HASH_CONTEXT   0x200   /* Set explicit memory context */
+#define HASH_COMPARE   0x400   /* Set user defined comparison function */
  
  
  /* max_dsize value to indicate expansible directory */
@@ -151,17 +172,17 @@ extern HTAB *hash_create(const char *tabname, long nelem,
             HASHCTL *info, int flags);
  extern void hash_destroy(HTAB *hashp);
  extern void hash_stats(const char *where, HTAB *hashp);
-extern void *hash_search(HTAB *hashp, void *keyPtr, HASHACTION action,
+extern void *hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action,
             bool *foundPtr);
  extern void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp);
  extern void *hash_seq_search(HASH_SEQ_STATUS *status);
-extern long hash_estimate_size(long num_entries, long entrysize);
+extern long hash_estimate_size(long num_entries, Size entrysize);
  extern long hash_select_dirsize(long num_entries);
  
  /*
   * prototypes for functions in hashfn.c
   */
-extern uint32 string_hash(void *key, int keysize);
-extern uint32 tag_hash(void *key, int keysize);
+extern uint32 string_hash(const void *key, Size keysize);
+extern uint32 tag_hash(const void *key, Size keysize);
  
  #endif   /* HSEARCH_H */
diff --git a/src/test/regress/expected/polymorphism.out b/src/test/regress/expected/polymorphism.out

index 04b52738e6e9236b14b30c975463f7958fef3d5f..bc6b9e4d85d9ba425df987a24a2507a6138d05ce 100644 (file)
--- a/src/test/regress/expected/polymorphism.out
+++ b/src/test/regress/expected/polymorphism.out
@@ -350,183 +350,183 @@ select f3, myaggp01a(*) from t group by f3;
   f3 | myaggp01a 
  ----+-----------
   b  | {}
- a  | {}
   c  | {}
+ a  | {}
  (3 rows)
  
  select f3, myaggp03a(*) from t group by f3;
   f3 | myaggp03a 
  ----+-----------
   b  | {}
- a  | {}
   c  | {}
+ a  | {}
  (3 rows)
  
  select f3, myaggp03b(*) from t group by f3;
   f3 | myaggp03b 
  ----+-----------
   b  | {}
- a  | {}
   c  | {}
+ a  | {}
  (3 rows)
  
  select f3, myaggp05a(f1) from t group by f3;
   f3 | myaggp05a 
  ----+-----------
   b  | {1,2,3}
- a  | {1,2,3}
   c  | {1,2}
+ a  | {1,2,3}
  (3 rows)
  
  select f3, myaggp06a(f1) from t group by f3;
   f3 | myaggp06a 
  ----+-----------
   b  | {}
- a  | {}
   c  | {}
+ a  | {}
  (3 rows)
  
  select f3, myaggp08a(f1) from t group by f3;
   f3 | myaggp08a 
  ----+-----------
   b  | {}
- a  | {}
   c  | {}
+ a  | {}
  (3 rows)
  
  select f3, myaggp09a(f1) from t group by f3;
   f3 | myaggp09a 
  ----+-----------
   b  | {}
- a  | {}
   c  | {}
+ a  | {}
  (3 rows)
  
  select f3, myaggp09b(f1) from t group by f3;
   f3 | myaggp09b 
  ----+-----------
   b  | {}
- a  | {}
   c  | {}
+ a  | {}
  (3 rows)
  
  select f3, myaggp10a(f1) from t group by f3;
   f3 | myaggp10a 
  ----+-----------
   b  | {1,2,3}
- a  | {1,2,3}
   c  | {1,2}
+ a  | {1,2,3}
  (3 rows)
  
  select f3, myaggp10b(f1) from t group by f3;
   f3 | myaggp10b 
  ----+-----------
   b  | {1,2,3}
- a  | {1,2,3}
   c  | {1,2}
+ a  | {1,2,3}
  (3 rows)
  
  select f3, myaggp20a(f1) from t group by f3;
   f3 | myaggp20a 
  ----+-----------
   b  | {1,2,3}
- a  | {1,2,3}
   c  | {1,2}
+ a  | {1,2,3}
  (3 rows)
  
  select f3, myaggp20b(f1) from t group by f3;
   f3 | myaggp20b 
  ----+-----------
   b  | {1,2,3}
- a  | {1,2,3}
   c  | {1,2}
+ a  | {1,2,3}
  (3 rows)
  
  select f3, myaggn01a(*) from t group by f3;
   f3 | myaggn01a 
  ----+-----------
   b  | {}
- a  | {}
   c  | {}
+ a  | {}
  (3 rows)
  
  select f3, myaggn01b(*) from t group by f3;
   f3 | myaggn01b 
  ----+-----------
   b  | {}
- a  | {}
   c  | {}
+ a  | {}
  (3 rows)
  
  select f3, myaggn03a(*) from t group by f3;
   f3 | myaggn03a 
  ----+-----------
   b  | {}
- a  | {}
   c  | {}
+ a  | {}
  (3 rows)
  
  select f3, myaggn05a(f1) from t group by f3;
   f3 | myaggn05a 
  ----+-----------
   b  | {1,2,3}
- a  | {1,2,3}
   c  | {1,2}
+ a  | {1,2,3}
  (3 rows)
  
  select f3, myaggn05b(f1) from t group by f3;
   f3 | myaggn05b 
  ----+-----------
   b  | {1,2,3}
- a  | {1,2,3}
   c  | {1,2}
+ a  | {1,2,3}
  (3 rows)
  
  select f3, myaggn06a(f1) from t group by f3;
   f3 | myaggn06a 
  ----+-----------
   b  | {}
- a  | {}
   c  | {}
+ a  | {}
  (3 rows)
  
  select f3, myaggn06b(f1) from t group by f3;
   f3 | myaggn06b 
  ----+-----------
   b  | {}
- a  | {}
   c  | {}
+ a  | {}
  (3 rows)
  
  select f3, myaggn08a(f1) from t group by f3;
   f3 | myaggn08a 
  ----+-----------
   b  | {}
- a  | {}
   c  | {}
+ a  | {}
  (3 rows)
  
  select f3, myaggn08b(f1) from t group by f3;
   f3 | myaggn08b 
  ----+-----------
   b  | {}
- a  | {}
   c  | {}
+ a  | {}
  (3 rows)
  
  select f3, myaggn09a(f1) from t group by f3;
   f3 | myaggn09a 
  ----+-----------
   b  | {}
- a  | {}
   c  | {}
+ a  | {}
  (3 rows)
  
  select f3, myaggn10a(f1) from t group by f3;
   f3 | myaggn10a 
  ----+-----------
   b  | {1,2,3}
- a  | {1,2,3}
   c  | {1,2}
+ a  | {1,2,3}
  (3 rows)
author	Tom Lane
	Tue, 19 Aug 2003 01:13:41 +0000 (01:13 +0000)
committer	Tom Lane
	Tue, 19 Aug 2003 01:13:41 +0000 (01:13 +0000)
src/backend/executor/execGrouping.c		patch \| blob \| blame \| history
src/backend/executor/nodeAgg.c		patch \| blob \| blame \| history
src/backend/executor/nodeSubplan.c		patch \| blob \| blame \| history
src/backend/utils/hash/dynahash.c		patch \| blob \| blame \| history
src/backend/utils/hash/hashfn.c		patch \| blob \| blame \| history
src/include/executor/executor.h		patch \| blob \| blame \| history
src/include/nodes/execnodes.h		patch \| blob \| blame \| history
src/include/utils/hsearch.h		patch \| blob \| blame \| history
src/test/regress/expected/polymorphism.out		patch \| blob \| blame \| history