Create a new file executor/execGrouping.c to centralize utility routines

author Tom Lane

Fri, 10 Jan 2003 23:54:24 +0000 (23:54 +0000)

committer Tom Lane

Fri, 10 Jan 2003 23:54:24 +0000 (23:54 +0000)
author Tom Lane
Fri, 10 Jan 2003 23:54:24 +0000 (23:54 +0000)
committer Tom Lane
Fri, 10 Jan 2003 23:54:24 +0000 (23:54 +0000)
diff --git a/src/backend/executor/Makefile b/src/backend/executor/Makefile

index b875259bc1a6c294301ad982acfa0e8c4ce46f49..7e3f5d2d2c83545b6c2195b91e9934ab6dac772c 100644 (file)
--- a/src/backend/executor/Makefile
+++ b/src/backend/executor/Makefile
@@ -4,7 +4,7 @@
  #    Makefile for executor
  #
  # IDENTIFICATION
-#    $Header: /cvsroot/pgsql/src/backend/executor/Makefile,v 1.19 2002/05/12 23:43:02 tgl Exp $
+#    $Header: /cvsroot/pgsql/src/backend/executor/Makefile,v 1.20 2003/01/10 23:54:24 tgl Exp $
  #
  #-------------------------------------------------------------------------
  
@@ -12,7 +12,7 @@ subdir = src/backend/executor
  top_builddir = ../../..
  include $(top_builddir)/src/Makefile.global
  
-OBJS = execAmi.o execJunk.o execMain.o \
+OBJS = execAmi.o execGrouping.o execJunk.o execMain.o \
         execProcnode.o execQual.o execScan.o execTuples.o \
         execUtils.o functions.o instrument.o nodeAppend.o nodeAgg.o nodeHash.o \
         nodeHashjoin.o nodeIndexscan.o nodeMaterial.o nodeMergejoin.o \
diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c

new file mode 100644 (file)

index 0000000..e3f7720
--- /dev/null
+++ b/src/backend/executor/execGrouping.c
@@ -0,0 +1,369 @@
+/*-------------------------------------------------------------------------
+ *
+ * execGrouping.c
+ *   executor utility routines for grouping, hashing, and aggregation
+ *
+ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *   $Header: /cvsroot/pgsql/src/backend/executor/execGrouping.c,v 1.1 2003/01/10 23:54:24 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/hash.h"
+#include "access/heapam.h"
+#include "executor/executor.h"
+#include "parser/parse_oper.h"
+#include "utils/memutils.h"
+
+
+/*****************************************************************************
+ *     Utility routines for grouping tuples together
+ *
+ * These routines actually implement SQL's notion of "distinct/not distinct".
+ * Two tuples match if they are not distinct in all the compared columns,
+ * i.e., the column values are either both null, or both non-null and equal.
+ *****************************************************************************/
+
+/*
+ * execTuplesMatch
+ *     Return true if two tuples match in all the indicated fields.
+ *     This is used to detect group boundaries in nodeGroup and nodeAgg,
+ *     and to decide whether two tuples are distinct or not in nodeUnique.
+ *
+ * tuple1, tuple2: the tuples to compare
+ * tupdesc: tuple descriptor applying to both tuples
+ * numCols: the number of attributes to be examined
+ * matchColIdx: array of attribute column numbers
+ * eqFunctions: array of fmgr lookup info for the equality functions to use
+ * evalContext: short-term memory context for executing the functions
+ *
+ * NB: evalContext is reset each time!
+ */
+bool
+execTuplesMatch(HeapTuple tuple1,
+               HeapTuple tuple2,
+               TupleDesc tupdesc,
+               int numCols,
+               AttrNumber *matchColIdx,
+               FmgrInfo *eqfunctions,
+               MemoryContext evalContext)
+{
+   MemoryContext oldContext;
+   bool        result;
+   int         i;
+
+   /* Reset and switch into the temp context. */
+   MemoryContextReset(evalContext);
+   oldContext = MemoryContextSwitchTo(evalContext);
+
+   /*
+    * We cannot report a match without checking all the fields, but we
+    * can report a non-match as soon as we find unequal fields.  So,
+    * start comparing at the last field (least significant sort key).
+    * That's the most likely to be different if we are dealing with
+    * sorted input.
+    */
+   result = true;
+
+   for (i = numCols; --i >= 0;)
+   {
+       AttrNumber  att = matchColIdx[i];
+       Datum       attr1,
+                   attr2;
+       bool        isNull1,
+                   isNull2;
+
+       attr1 = heap_getattr(tuple1,
+                            att,
+                            tupdesc,
+                            &isNull1);
+
+       attr2 = heap_getattr(tuple2,
+                            att,
+                            tupdesc,
+                            &isNull2);
+
+       if (isNull1 != isNull2)
+       {
+           result = false;     /* one null and one not; they aren't equal */
+           break;
+       }
+
+       if (isNull1)
+           continue;           /* both are null, treat as equal */
+
+       /* Apply the type-specific equality function */
+
+       if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
+                                       attr1, attr2)))
+       {
+           result = false;     /* they aren't equal */
+           break;
+       }
+   }
+
+   MemoryContextSwitchTo(oldContext);
+
+   return result;
+}
+
+
+/*
+ * execTuplesMatchPrepare
+ *     Look up the equality functions needed for execTuplesMatch.
+ *     The result is a palloc'd array.
+ */
+FmgrInfo *
+execTuplesMatchPrepare(TupleDesc tupdesc,
+                      int numCols,
+                      AttrNumber *matchColIdx)
+{
+   FmgrInfo   *eqfunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
+   int         i;
+
+   for (i = 0; i < numCols; i++)
+   {
+       AttrNumber  att = matchColIdx[i];
+       Oid         typid = tupdesc->attrs[att - 1]->atttypid;
+       Oid         eq_function;
+
+       eq_function = equality_oper_funcid(typid);
+       fmgr_info(eq_function, &eqfunctions[i]);
+   }
+
+   return eqfunctions;
+}
+
+
+/*****************************************************************************
+ *     Utility routines for hashing
+ *****************************************************************************/
+
+/*
+ * ComputeHashFunc
+ *
+ *     the hash function for hash joins (also used for hash aggregation)
+ *
+ *     XXX this probably ought to be replaced with datatype-specific
+ *     hash functions, such as those already implemented for hash indexes.
+ */
+uint32
+ComputeHashFunc(Datum key, int typLen, bool byVal)
+{
+   unsigned char *k;
+
+   if (byVal)
+   {
+       /*
+        * If it's a by-value data type, just hash the whole Datum value.
+        * This assumes that datatypes narrower than Datum are
+        * consistently padded (either zero-extended or sign-extended, but
+        * not random bits) to fill Datum; see the XXXGetDatum macros in
+        * postgres.h. NOTE: it would not work to do hash_any(&key, len)
+        * since this would get the wrong bytes on a big-endian machine.
+        */
+       k = (unsigned char *) &key;
+       typLen = sizeof(Datum);
+   }
+   else
+   {
+       if (typLen > 0)
+       {
+           /* fixed-width pass-by-reference type */
+           k = (unsigned char *) DatumGetPointer(key);
+       }
+       else if (typLen == -1)
+       {
+           /*
+            * It's a varlena type, so 'key' points to a "struct varlena".
+            * NOTE: VARSIZE returns the "real" data length plus the
+            * sizeof the "vl_len" attribute of varlena (the length
+            * information). 'key' points to the beginning of the varlena
+            * struct, so we have to use "VARDATA" to find the beginning
+            * of the "real" data.  Also, we have to be careful to detoast
+            * the datum if it's toasted.  (We don't worry about freeing
+            * the detoasted copy; that happens for free when the
+            * per-tuple memory context is reset in ExecHashGetBucket.)
+            */
+           struct varlena *vkey = PG_DETOAST_DATUM(key);
+
+           typLen = VARSIZE(vkey) - VARHDRSZ;
+           k = (unsigned char *) VARDATA(vkey);
+       }
+       else if (typLen == -2)
+       {
+           /* It's a null-terminated C string */
+           typLen = strlen(DatumGetCString(key)) + 1;
+           k = (unsigned char *) DatumGetPointer(key);
+       }
+       else
+       {
+           elog(ERROR, "ComputeHashFunc: Invalid typLen %d", typLen);
+           k = NULL;           /* keep compiler quiet */
+       }
+   }
+
+   return DatumGetUInt32(hash_any(k, typLen));
+}
+
+
+/*****************************************************************************
+ *     Utility routines for all-in-memory hash tables
+ *
+ * These routines build hash tables for grouping tuples together (eg, for
+ * hash aggregation).  There is one entry for each not-distinct set of tuples
+ * presented.
+ *****************************************************************************/
+
+/*
+ * Construct an empty TupleHashTable
+ *
+ * numCols, keyColIdx: identify the tuple fields to use as lookup key
+ * eqfunctions: equality comparison functions to use
+ * nbuckets: number of buckets to make
+ * entrysize: size of each entry (at least sizeof(TupleHashEntryData))
+ * tablecxt: memory context in which to store table and table entries
+ * tempcxt: short-lived context for evaluation hash and comparison functions
+ *
+ * The eqfunctions array may be made with execTuplesMatchPrepare().
+ *
+ * Note that keyColIdx and eqfunctions must be allocated in storage that
+ * will live as long as the hashtable does.
+ */
+TupleHashTable
+BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
+                   FmgrInfo *eqfunctions,
+                   int nbuckets, Size entrysize,
+                   MemoryContext tablecxt, MemoryContext tempcxt)
+{
+   TupleHashTable  hashtable;
+   Size            tabsize;
+
+   Assert(nbuckets > 0);
+   Assert(entrysize >= sizeof(TupleHashEntryData));
+
+   tabsize = sizeof(TupleHashTableData) +
+       (nbuckets - 1) * sizeof(TupleHashEntry);
+   hashtable = (TupleHashTable) MemoryContextAllocZero(tablecxt, tabsize);
+
+   hashtable->numCols = numCols;
+   hashtable->keyColIdx = keyColIdx;
+   hashtable->eqfunctions = eqfunctions;
+   hashtable->tablecxt = tablecxt;
+   hashtable->tempcxt = tempcxt;
+   hashtable->entrysize = entrysize;
+   hashtable->nbuckets = nbuckets;
+
+   return hashtable;
+}
+
+/*
+ * Find or create a hashtable entry for the tuple group containing the
+ * given tuple.
+ *
+ * On return, *isnew is true if the entry is newly created, false if it
+ * existed already.  Any extra space in a new entry has been zeroed.
+ */
+TupleHashEntry
+LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
+                    bool *isnew)
+{
+   int         numCols = hashtable->numCols;
+   AttrNumber *keyColIdx = hashtable->keyColIdx;
+   HeapTuple   tuple = slot->val;
+   TupleDesc   tupdesc = slot->ttc_tupleDescriptor;
+   uint32      hashkey = 0;
+   int         i;
+   int         bucketno;
+   TupleHashEntry entry;
+   MemoryContext oldContext;
+
+   /* Need to run the hash function in short-lived context */
+   oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
+
+   for (i = 0; i < numCols; i++)
+   {
+       AttrNumber  att = keyColIdx[i];
+       Datum       attr;
+       bool        isNull;
+
+       /* rotate hashkey left 1 bit at each step */
+       hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);
+
+       attr = heap_getattr(tuple, att, tupdesc, &isNull);
+       if (isNull)
+           continue;           /* treat nulls as having hash key 0 */
+       hashkey ^= ComputeHashFunc(attr,
+                                  (int) tupdesc->attrs[att - 1]->attlen,
+                                  tupdesc->attrs[att - 1]->attbyval);
+   }
+   bucketno = hashkey % (uint32) hashtable->nbuckets;
+
+   for (entry = hashtable->buckets[bucketno];
+        entry != NULL;
+        entry = entry->next)
+   {
+       /* Quick check using hashkey */
+       if (entry->hashkey != hashkey)
+           continue;
+       if (execTuplesMatch(entry->firstTuple,
+                           tuple,
+                           tupdesc,
+                           numCols, keyColIdx,
+                           hashtable->eqfunctions,
+                           hashtable->tempcxt))
+       {
+           MemoryContextSwitchTo(oldContext);
+           *isnew = false;
+           return entry;
+       }
+   }
+
+   /* Not there, so build a new one */
+   MemoryContextSwitchTo(hashtable->tablecxt);
+
+   entry = (TupleHashEntry) palloc0(hashtable->entrysize);
+
+   entry->hashkey = hashkey;
+   entry->firstTuple = heap_copytuple(tuple);
+
+   entry->next = hashtable->buckets[bucketno];
+   hashtable->buckets[bucketno] = entry;
+
+   MemoryContextSwitchTo(oldContext);
+
+   *isnew = true;
+
+   return entry;
+}
+
+/*
+ * Walk through all the entries of a hash table, in no special order.
+ * Returns NULL when no more entries remain.
+ *
+ * Iterator state must be initialized with ResetTupleHashIterator() macro.
+ */
+TupleHashEntry
+ScanTupleHashTable(TupleHashTable hashtable, TupleHashIterator *state)
+{
+   TupleHashEntry  entry;
+
+   entry = state->next_entry;
+   while (entry == NULL)
+   {
+       if (state->next_bucket >= hashtable->nbuckets)
+       {
+           /* No more entries in hashtable, so done */
+           return NULL;
+       }
+       entry = hashtable->buckets[state->next_bucket++];
+   }
+   state->next_entry = entry->next;
+
+   return entry;
+}
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c

index 769e88a839779347305416088488ba8e20ce24fa..d8eeae15ad7f5bf7e2f7fdb52f47175accff3458 100644 (file)
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -45,7 +45,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.101 2002/12/15 16:17:46 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.102 2003/01/10 23:54:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -57,8 +57,6 @@
  #include "catalog/pg_operator.h"
  #include "executor/executor.h"
  #include "executor/nodeAgg.h"
-#include "executor/nodeGroup.h"
-#include "executor/nodeHash.h"
  #include "miscadmin.h"
  #include "optimizer/clauses.h"
  #include "parser/parse_coerce.h"
@@ -182,21 +180,15 @@ typedef struct AggStatePerGroupData
   * distinct set of GROUP BY column values.  We compute the hash key from
   * the GROUP BY columns.
   */
+typedef struct AggHashEntryData *AggHashEntry;
+
  typedef struct AggHashEntryData
  {
-   AggHashEntry    next;       /* next entry in same hash bucket */
-   uint32      hashkey;        /* exact hash key of this entry */
-   HeapTuple   firstTuple;     /* copy of first tuple in this group */
+   TupleHashEntryData shared;  /* common header for hash table entries */
     /* per-aggregate transition status array - must be last! */
     AggStatePerGroupData pergroup[1];   /* VARIABLE LENGTH ARRAY */
  } AggHashEntryData;                /* VARIABLE LENGTH STRUCT */
  
-typedef struct AggHashTableData
-{
-   int         nbuckets;       /* number of buckets in hash table */
-   AggHashEntry buckets[1];    /* VARIABLE LENGTH ARRAY */
-} AggHashTableData;                /* VARIABLE LENGTH STRUCT */
-
  
  static void initialize_aggregates(AggState *aggstate,
                                   AggStatePerAgg peragg,
@@ -578,18 +570,22 @@ static void
  build_hash_table(AggState *aggstate)
  {
     Agg            *node = (Agg *) aggstate->ss.ps.plan;
-   AggHashTable    hashtable;
-   Size            tabsize;
+   MemoryContext   tmpmem = aggstate->tmpcontext->ecxt_per_tuple_memory;
+   Size            entrysize;
  
     Assert(node->aggstrategy == AGG_HASHED);
     Assert(node->numGroups > 0);
-   tabsize = sizeof(AggHashTableData) +
-       (node->numGroups - 1) * sizeof(AggHashEntry);
-   hashtable = (AggHashTable) MemoryContextAlloc(aggstate->aggcontext,
-                                                 tabsize);
-   MemSet(hashtable, 0, tabsize);
-   hashtable->nbuckets = node->numGroups;
-   aggstate->hashtable = hashtable;
+
+   entrysize = sizeof(AggHashEntryData) +
+       (aggstate->numaggs - 1) * sizeof(AggStatePerGroupData);
+
+   aggstate->hashtable = BuildTupleHashTable(node->numCols,
+                                             node->grpColIdx,
+                                             aggstate->eqfunctions,
+                                             node->numGroups,
+                                             entrysize,
+                                             aggstate->aggcontext,
+                                             tmpmem);
  }
  
  /*
@@ -601,75 +597,19 @@ build_hash_table(AggState *aggstate)
  static AggHashEntry
  lookup_hash_entry(AggState *aggstate, TupleTableSlot *slot)
  {
-   Agg        *node = (Agg *) aggstate->ss.ps.plan;
-   AggHashTable hashtable = aggstate->hashtable;
-   MemoryContext   tmpmem = aggstate->tmpcontext->ecxt_per_tuple_memory;
-   HeapTuple   tuple = slot->val;
-   TupleDesc   tupdesc = slot->ttc_tupleDescriptor;
-   uint32      hashkey = 0;
-   int         i;
-   int         bucketno;
-   AggHashEntry    entry;
-   MemoryContext oldContext;
-   Size        entrysize;
-
-   /* Need to run the hash function in short-lived context */
-   oldContext = MemoryContextSwitchTo(tmpmem);
-
-   for (i = 0; i < node->numCols; i++)
-   {
-       AttrNumber  att = node->grpColIdx[i];
-       Datum       attr;
-       bool        isNull;
+   AggHashEntry entry;
+   bool        isnew;
  
-       /* rotate hashkey left 1 bit at each step */
-       hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);
+   entry = (AggHashEntry) LookupTupleHashEntry(aggstate->hashtable,
+                                               slot,
+                                               &isnew);
  
-       attr = heap_getattr(tuple, att, tupdesc, &isNull);
-       if (isNull)
-           continue;           /* treat nulls as having hash key 0 */
-       hashkey ^= ComputeHashFunc(attr,
-                                  (int) tupdesc->attrs[att - 1]->attlen,
-                                  tupdesc->attrs[att - 1]->attbyval);
-   }
-   bucketno = hashkey % (uint32) hashtable->nbuckets;
-
-   for (entry = hashtable->buckets[bucketno];
-        entry != NULL;
-        entry = entry->next)
+   if (isnew)
     {
-       /* Quick check using hashkey */
-       if (entry->hashkey != hashkey)
-           continue;
-       if (execTuplesMatch(entry->firstTuple,
-                           tuple,
-                           tupdesc,
-                           node->numCols, node->grpColIdx,
-                           aggstate->eqfunctions,
-                           tmpmem))
-       {
-           MemoryContextSwitchTo(oldContext);
-           return entry;
-       }
+       /* initialize aggregates for new tuple group */
+       initialize_aggregates(aggstate, aggstate->peragg, entry->pergroup);
     }
  
-   /* Not there, so build a new one */
-   MemoryContextSwitchTo(aggstate->aggcontext);
-   entrysize = sizeof(AggHashEntryData) +
-       (aggstate->numaggs - 1) * sizeof(AggStatePerGroupData);
-   entry = (AggHashEntry) palloc0(entrysize);
-
-   entry->hashkey = hashkey;
-   entry->firstTuple = heap_copytuple(tuple);
-
-   entry->next = hashtable->buckets[bucketno];
-   hashtable->buckets[bucketno] = entry;
-
-   MemoryContextSwitchTo(oldContext);
-
-   /* initialize aggregates for new tuple group */
-   initialize_aggregates(aggstate, aggstate->peragg, entry->pergroup);
-
     return entry;
  }
  
@@ -964,8 +904,7 @@ agg_fill_hash_table(AggState *aggstate)
  
     aggstate->table_filled = true;
     /* Initialize to walk the hash table */
-   aggstate->next_hash_entry = NULL;
-   aggstate->next_hash_bucket = 0;
+   ResetTupleHashIterator(&aggstate->hashiter);
  }
  
  /*
@@ -980,7 +919,7 @@ agg_retrieve_hash_table(AggState *aggstate)
     bool       *aggnulls;
     AggStatePerAgg peragg;
     AggStatePerGroup pergroup;
-   AggHashTable    hashtable;
+   TupleHashTable  hashtable;
     AggHashEntry    entry;
     TupleTableSlot *firstSlot;
     TupleTableSlot *resultSlot;
@@ -1010,18 +949,14 @@ agg_retrieve_hash_table(AggState *aggstate)
         /*
          * Find the next entry in the hash table
          */
-       entry = aggstate->next_hash_entry;
-       while (entry == NULL)
+       entry = (AggHashEntry) ScanTupleHashTable(hashtable,
+                                                 &aggstate->hashiter);
+       if (entry == NULL)
         {
-           if (aggstate->next_hash_bucket >= hashtable->nbuckets)
-           {
-               /* No more entries in hashtable, so done */
-               aggstate->agg_done = TRUE;
-               return NULL;
-           }
-           entry = hashtable->buckets[aggstate->next_hash_bucket++];
+           /* No more entries in hashtable, so done */
+           aggstate->agg_done = TRUE;
+           return NULL;
         }
-       aggstate->next_hash_entry = entry->next;
  
         /*
          * Clear the per-output-tuple context for each group
@@ -1032,7 +967,7 @@ agg_retrieve_hash_table(AggState *aggstate)
          * Store the copied first input tuple in the tuple table slot
          * reserved for it, so that it can be used in ExecProject.
          */
-       ExecStoreTuple(entry->firstTuple,
+       ExecStoreTuple(entry->shared.firstTuple,
                        firstSlot,
                        InvalidBuffer,
                        false);
@@ -1187,6 +1122,17 @@ ExecInitAgg(Agg *node, EState *estate)
         numaggs = 1;
     }
  
+   /*
+    * If we are grouping, precompute fmgr lookup data for inner loop
+    */
+   if (node->numCols > 0)
+   {
+       aggstate->eqfunctions =
+           execTuplesMatchPrepare(ExecGetScanType(&aggstate->ss),
+                                  node->numCols,
+                                  node->grpColIdx);
+   }
+
     /*
      * Set up aggregate-result storage in the output expr context, and also
      * allocate my private per-agg working storage
@@ -1211,17 +1157,6 @@ ExecInitAgg(Agg *node, EState *estate)
         aggstate->pergroup = pergroup;
     }
  
-   /*
-    * If we are grouping, precompute fmgr lookup data for inner loop
-    */
-   if (node->numCols > 0)
-   {
-       aggstate->eqfunctions =
-           execTuplesMatchPrepare(ExecGetScanType(&aggstate->ss),
-                                  node->numCols,
-                                  node->grpColIdx);
-   }
-
     /*
      * Perform lookups of aggregate function info, and initialize the
      * unchanging fields of the per-agg data
diff --git a/src/backend/executor/nodeGroup.c b/src/backend/executor/nodeGroup.c

index 58f6c1b34e906d97f156ecb826ce745b571121a7..b480e388a2119aef3ec0beb7b41306236d92c5c0 100644 (file)
--- a/src/backend/executor/nodeGroup.c
+++ b/src/backend/executor/nodeGroup.c
@@ -15,7 +15,7 @@
   *   locate group boundaries.
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/executor/nodeGroup.c,v 1.53 2002/12/15 16:17:46 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/executor/nodeGroup.c,v 1.54 2003/01/10 23:54:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -23,13 +23,8 @@
  #include "postgres.h"
  
  #include "access/heapam.h"
-#include "catalog/pg_operator.h"
  #include "executor/executor.h"
  #include "executor/nodeGroup.h"
-#include "parser/parse_oper.h"
-#include "utils/builtins.h"
-#include "utils/lsyscache.h"
-#include "utils/syscache.h"
  
  
  /*
@@ -241,116 +236,3 @@ ExecReScanGroup(GroupState *node, ExprContext *exprCtxt)
         ((PlanState *) node)->lefttree->chgParam == NULL)
         ExecReScan(((PlanState *) node)->lefttree, exprCtxt);
  }
-
-/*****************************************************************************
- *     Code shared with nodeUnique.c and nodeAgg.c
- *****************************************************************************/
-
-/*
- * execTuplesMatch
- *     Return true if two tuples match in all the indicated fields.
- *     This is used to detect group boundaries in nodeGroup and nodeAgg,
- *     and to decide whether two tuples are distinct or not in nodeUnique.
- *
- * tuple1, tuple2: the tuples to compare
- * tupdesc: tuple descriptor applying to both tuples
- * numCols: the number of attributes to be examined
- * matchColIdx: array of attribute column numbers
- * eqFunctions: array of fmgr lookup info for the equality functions to use
- * evalContext: short-term memory context for executing the functions
- *
- * NB: evalContext is reset each time!
- */
-bool
-execTuplesMatch(HeapTuple tuple1,
-               HeapTuple tuple2,
-               TupleDesc tupdesc,
-               int numCols,
-               AttrNumber *matchColIdx,
-               FmgrInfo *eqfunctions,
-               MemoryContext evalContext)
-{
-   MemoryContext oldContext;
-   bool        result;
-   int         i;
-
-   /* Reset and switch into the temp context. */
-   MemoryContextReset(evalContext);
-   oldContext = MemoryContextSwitchTo(evalContext);
-
-   /*
-    * We cannot report a match without checking all the fields, but we
-    * can report a non-match as soon as we find unequal fields.  So,
-    * start comparing at the last field (least significant sort key).
-    * That's the most likely to be different if we are dealing with
-    * sorted input.
-    */
-   result = true;
-
-   for (i = numCols; --i >= 0;)
-   {
-       AttrNumber  att = matchColIdx[i];
-       Datum       attr1,
-                   attr2;
-       bool        isNull1,
-                   isNull2;
-
-       attr1 = heap_getattr(tuple1,
-                            att,
-                            tupdesc,
-                            &isNull1);
-
-       attr2 = heap_getattr(tuple2,
-                            att,
-                            tupdesc,
-                            &isNull2);
-
-       if (isNull1 != isNull2)
-       {
-           result = false;     /* one null and one not; they aren't equal */
-           break;
-       }
-
-       if (isNull1)
-           continue;           /* both are null, treat as equal */
-
-       /* Apply the type-specific equality function */
-
-       if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
-                                       attr1, attr2)))
-       {
-           result = false;     /* they aren't equal */
-           break;
-       }
-   }
-
-   MemoryContextSwitchTo(oldContext);
-
-   return result;
-}
-
-/*
- * execTuplesMatchPrepare
- *     Look up the equality functions needed for execTuplesMatch.
- *     The result is a palloc'd array.
- */
-FmgrInfo *
-execTuplesMatchPrepare(TupleDesc tupdesc,
-                      int numCols,
-                      AttrNumber *matchColIdx)
-{
-   FmgrInfo   *eqfunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
-   int         i;
-
-   for (i = 0; i < numCols; i++)
-   {
-       AttrNumber  att = matchColIdx[i];
-       Oid         typid = tupdesc->attrs[att - 1]->atttypid;
-       Oid         eq_function;
-
-       eq_function = equality_oper_funcid(typid);
-       fmgr_info(eq_function, &eqfunctions[i]);
-   }
-
-   return eqfunctions;
-}
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c

index bea89630993d372a4bc553a07e001e2aafcebb28..31152a3d8552d770570417c0a3060e16b1caa21d 100644 (file)
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.73 2002/12/30 15:21:18 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.74 2003/01/10 23:54:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -20,10 +20,6 @@
   */
  #include "postgres.h"
  
-#include 
-#include 
-
-#include "access/hash.h"
  #include "executor/execdebug.h"
  #include "executor/nodeHash.h"
  #include "executor/nodeHashjoin.h"
@@ -642,74 +638,6 @@ ExecScanHashBucket(HashJoinState *hjstate,
     return NULL;
  }
  
-/* ----------------------------------------------------------------
- *     ComputeHashFunc
- *
- *     the hash function for hash joins (also used for hash aggregation)
- *
- *     XXX this probably ought to be replaced with datatype-specific
- *     hash functions, such as those already implemented for hash indexes.
- * ----------------------------------------------------------------
- */
-uint32
-ComputeHashFunc(Datum key, int typLen, bool byVal)
-{
-   unsigned char *k;
-
-   if (byVal)
-   {
-       /*
-        * If it's a by-value data type, just hash the whole Datum value.
-        * This assumes that datatypes narrower than Datum are
-        * consistently padded (either zero-extended or sign-extended, but
-        * not random bits) to fill Datum; see the XXXGetDatum macros in
-        * postgres.h. NOTE: it would not work to do hash_any(&key, len)
-        * since this would get the wrong bytes on a big-endian machine.
-        */
-       k = (unsigned char *) &key;
-       typLen = sizeof(Datum);
-   }
-   else
-   {
-       if (typLen > 0)
-       {
-           /* fixed-width pass-by-reference type */
-           k = (unsigned char *) DatumGetPointer(key);
-       }
-       else if (typLen == -1)
-       {
-           /*
-            * It's a varlena type, so 'key' points to a "struct varlena".
-            * NOTE: VARSIZE returns the "real" data length plus the
-            * sizeof the "vl_len" attribute of varlena (the length
-            * information). 'key' points to the beginning of the varlena
-            * struct, so we have to use "VARDATA" to find the beginning
-            * of the "real" data.  Also, we have to be careful to detoast
-            * the datum if it's toasted.  (We don't worry about freeing
-            * the detoasted copy; that happens for free when the
-            * per-tuple memory context is reset in ExecHashGetBucket.)
-            */
-           struct varlena *vkey = PG_DETOAST_DATUM(key);
-
-           typLen = VARSIZE(vkey) - VARHDRSZ;
-           k = (unsigned char *) VARDATA(vkey);
-       }
-       else if (typLen == -2)
-       {
-           /* It's a null-terminated C string */
-           typLen = strlen(DatumGetCString(key)) + 1;
-           k = (unsigned char *) DatumGetPointer(key);
-       }
-       else
-       {
-           elog(ERROR, "ComputeHashFunc: Invalid typLen %d", typLen);
-           k = NULL;           /* keep compiler quiet */
-       }
-   }
-
-   return DatumGetUInt32(hash_any(k, typLen));
-}
-
  /* ----------------------------------------------------------------
   *     ExecHashTableReset
   *
diff --git a/src/backend/executor/nodeSetOp.c b/src/backend/executor/nodeSetOp.c

index 965a2a6466aff80572eba0189e56853e972b1d21..3946cd00246108f2fa2f85345ce83be732c71323 100644 (file)
--- a/src/backend/executor/nodeSetOp.c
+++ b/src/backend/executor/nodeSetOp.c
@@ -21,7 +21,7 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/executor/nodeSetOp.c,v 1.8 2002/12/15 16:17:46 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/executor/nodeSetOp.c,v 1.9 2003/01/10 23:54:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -36,9 +36,9 @@
  
  #include "access/heapam.h"
  #include "executor/executor.h"
-#include "executor/nodeGroup.h"
  #include "executor/nodeSetOp.h"
  
+
  /* ----------------------------------------------------------------
   *     ExecSetOp
   * ----------------------------------------------------------------
diff --git a/src/backend/executor/nodeUnique.c b/src/backend/executor/nodeUnique.c

index 415594f92c4ddc2a6d6a9a18142a8fba04769bcd..7a0ccb0b14c8ce01b8bc3f77fc8dd5a4f08051ec 100644 (file)
--- a/src/backend/executor/nodeUnique.c
+++ b/src/backend/executor/nodeUnique.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/executor/nodeUnique.c,v 1.36 2002/12/15 16:17:46 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/executor/nodeUnique.c,v 1.37 2003/01/10 23:54:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -27,9 +27,9 @@
  
  #include "access/heapam.h"
  #include "executor/executor.h"
-#include "executor/nodeGroup.h"
  #include "executor/nodeUnique.h"
  
+
  /* ----------------------------------------------------------------
   *     ExecUnique
   *
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h

index 31cc2107536aa7a096ef948ae4bf3e29f186d248..fb300fc044339990234ecccb70e8ccc57dcda5d5 100644 (file)
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: executor.h,v 1.85 2002/12/15 21:01:34 tgl Exp $
+ * $Id: executor.h,v 1.86 2003/01/10 23:54:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -36,6 +36,31 @@ extern void ExecMarkPos(PlanState *node);
  extern void ExecRestrPos(PlanState *node);
  extern bool ExecSupportsMarkRestore(NodeTag plantype);
  
+/*
+ * prototypes from functions in execGrouping.c
+ */
+extern bool execTuplesMatch(HeapTuple tuple1,
+               HeapTuple tuple2,
+               TupleDesc tupdesc,
+               int numCols,
+               AttrNumber *matchColIdx,
+               FmgrInfo *eqfunctions,
+               MemoryContext evalContext);
+extern FmgrInfo *execTuplesMatchPrepare(TupleDesc tupdesc,
+                      int numCols,
+                      AttrNumber *matchColIdx);
+extern uint32 ComputeHashFunc(Datum key, int typLen, bool byVal);
+extern TupleHashTable BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
+                                         FmgrInfo *eqfunctions,
+                                         int nbuckets, Size entrysize,
+                                         MemoryContext tablecxt,
+                                         MemoryContext tempcxt);
+extern TupleHashEntry LookupTupleHashEntry(TupleHashTable hashtable,
+                                          TupleTableSlot *slot,
+                                          bool *isnew);
+extern TupleHashEntry ScanTupleHashTable(TupleHashTable hashtable,
+                                        TupleHashIterator *state);
+
  /*
   * prototypes from functions in execJunk.c
   */
diff --git a/src/include/executor/nodeAgg.h b/src/include/executor/nodeAgg.h

index 036d67ccaadf0ba9ef6f5bd8c0f419797204cef2..a2817306da0adc0e8ae20c453e13323139e091a2 100644 (file)
--- a/src/include/executor/nodeAgg.h
+++ b/src/include/executor/nodeAgg.h
@@ -1,13 +1,13 @@
  /*-------------------------------------------------------------------------
   *
   * nodeAgg.h
- *
+ *   prototypes for nodeAgg.c
   *
   *
   * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: nodeAgg.h,v 1.18 2002/12/05 15:50:36 tgl Exp $
+ * $Id: nodeAgg.h,v 1.19 2003/01/10 23:54:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
diff --git a/src/include/executor/nodeGroup.h b/src/include/executor/nodeGroup.h

index 211e55b6cadfe3a0bb80aef808a0b9a14a5aab6b..2a6b733c9d2a4314856f04f7c9a937b597b6f5c2 100644 (file)
--- a/src/include/executor/nodeGroup.h
+++ b/src/include/executor/nodeGroup.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: nodeGroup.h,v 1.23 2002/12/05 15:50:37 tgl Exp $
+ * $Id: nodeGroup.h,v 1.24 2003/01/10 23:54:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -22,15 +22,4 @@ extern TupleTableSlot *ExecGroup(GroupState *node);
  extern void ExecEndGroup(GroupState *node);
  extern void ExecReScanGroup(GroupState *node, ExprContext *exprCtxt);
  
-extern bool execTuplesMatch(HeapTuple tuple1,
-               HeapTuple tuple2,
-               TupleDesc tupdesc,
-               int numCols,
-               AttrNumber *matchColIdx,
-               FmgrInfo *eqfunctions,
-               MemoryContext evalContext);
-extern FmgrInfo *execTuplesMatchPrepare(TupleDesc tupdesc,
-                      int numCols,
-                      AttrNumber *matchColIdx);
-
  #endif   /* NODEGROUP_H */
diff --git a/src/include/executor/nodeHash.h b/src/include/executor/nodeHash.h

index 02e56355263b66272a814d93bc2877a976d7f2b2..da1113b32daf13005270432d1c6ad24805b903f9 100644 (file)
--- a/src/include/executor/nodeHash.h
+++ b/src/include/executor/nodeHash.h
@@ -1,13 +1,13 @@
  /*-------------------------------------------------------------------------
   *
   * nodeHash.h
- *
+ *   prototypes for nodeHash.c
   *
   *
   * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: nodeHash.h,v 1.28 2002/12/30 15:21:23 tgl Exp $
+ * $Id: nodeHash.h,v 1.29 2003/01/10 23:54:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -38,6 +38,5 @@ extern void ExecChooseHashTableSize(double ntuples, int tupwidth,
                         int *virtualbuckets,
                         int *physicalbuckets,
                         int *numbatches);
-extern uint32 ComputeHashFunc(Datum key, int typLen, bool byVal);
  
  #endif   /* NODEHASH_H */
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h

index 1ce0635c632c7540c28a6d07b6150e11ab647d7c..9c43660c610ede090e2e6d3e9d92b866ff509101 100644 (file)
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: execnodes.h,v 1.89 2003/01/10 21:08:15 tgl Exp $
+ * $Id: execnodes.h,v 1.90 2003/01/10 23:54:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -324,6 +324,46 @@ typedef struct EState
  } EState;
  
  
+/* ----------------------------------------------------------------
+ *              Tuple Hash Tables
+ *
+ * All-in-memory tuple hash tables are used for a number of purposes.
+ * ----------------------------------------------------------------
+ */
+typedef struct TupleHashEntryData *TupleHashEntry;
+typedef struct TupleHashTableData *TupleHashTable;
+
+typedef struct TupleHashEntryData
+{
+   TupleHashEntry next;        /* next entry in same hash bucket */
+   uint32      hashkey;        /* exact hash key of this entry */
+   HeapTuple   firstTuple;     /* copy of first tuple in this group */
+   /* there may be additional data beyond the end of this struct */
+} TupleHashEntryData;          /* VARIABLE LENGTH STRUCT */
+
+typedef struct TupleHashTableData
+{
+   int         numCols;        /* number of columns in lookup key */
+   AttrNumber *keyColIdx;      /* attr numbers of key columns */
+   FmgrInfo   *eqfunctions;    /* lookup data for comparison functions */
+   MemoryContext tablecxt;     /* memory context containing table */
+   MemoryContext tempcxt;      /* context for function evaluations */
+   Size        entrysize;      /* actual size to make each hash entry */
+   int         nbuckets;       /* number of buckets in hash table */
+   TupleHashEntry buckets[1];  /* VARIABLE LENGTH ARRAY */
+} TupleHashTableData;          /* VARIABLE LENGTH STRUCT */
+
+typedef struct
+{
+   TupleHashEntry next_entry;  /* next entry in current chain */
+   int         next_bucket;    /* next chain */
+} TupleHashIterator;
+
+#define ResetTupleHashIterator(iter) \
+   ((iter)->next_entry = NULL, \
+    (iter)->next_bucket = 0)
+
+
  /* ----------------------------------------------------------------
   *              Expression State Trees
   *
@@ -445,9 +485,6 @@ typedef struct BoolExprState
   *     SubPlanState node
   * ----------------
   */
-/* this struct is private in nodeSubplan.c: */
-typedef struct SubPlanHashTableData *SubPlanHashTable;
-
  typedef struct SubPlanState
  {
     ExprState   xprstate;
@@ -458,8 +495,8 @@ typedef struct SubPlanState
     bool        needShutdown;   /* TRUE = need to shutdown subplan */
     HeapTuple   curTuple;       /* copy of most recent tuple from subplan */
     /* these are used when hashing the subselect's output: */
-   SubPlanHashTable hashtable; /* hash table for no-nulls subselect rows */
-   SubPlanHashTable hashnulls; /* hash table for rows with null(s) */
+   TupleHashTable hashtable;   /* hash table for no-nulls subselect rows */
+   TupleHashTable hashnulls;   /* hash table for rows with null(s) */
  } SubPlanState;
  
  /* ----------------
@@ -877,8 +914,6 @@ typedef struct GroupState
  /* these structs are private in nodeAgg.c: */
  typedef struct AggStatePerAggData *AggStatePerAgg;
  typedef struct AggStatePerGroupData *AggStatePerGroup;
-typedef struct AggHashEntryData *AggHashEntry;
-typedef struct AggHashTableData *AggHashTable;
  
  typedef struct AggState
  {
@@ -894,10 +929,9 @@ typedef struct AggState
     AggStatePerGroup pergroup;  /* per-Aggref-per-group working state */
     HeapTuple   grp_firstTuple; /* copy of first tuple of current group */
     /* these fields are used in AGG_HASHED mode: */
-   AggHashTable hashtable;     /* hash table with one entry per group */
+   TupleHashTable hashtable;   /* hash table with one entry per group */
     bool        table_filled;   /* hash table filled yet? */
-   AggHashEntry next_hash_entry; /* next entry in current chain */
-   int         next_hash_bucket; /* next chain */
+   TupleHashIterator hashiter; /* for iterating through hash table */
  } AggState;
  
  /* ----------------
author	Tom Lane
	Fri, 10 Jan 2003 23:54:24 +0000 (23:54 +0000)
committer	Tom Lane
	Fri, 10 Jan 2003 23:54:24 +0000 (23:54 +0000)
src/backend/executor/Makefile		patch \| blob \| blame \| history
src/backend/executor/execGrouping.c	[new file with mode: 0644]	patch \| blob
src/backend/executor/nodeAgg.c		patch \| blob \| blame \| history
src/backend/executor/nodeGroup.c		patch \| blob \| blame \| history
src/backend/executor/nodeHash.c		patch \| blob \| blame \| history
src/backend/executor/nodeSetOp.c		patch \| blob \| blame \| history
src/backend/executor/nodeUnique.c		patch \| blob \| blame \| history
src/include/executor/executor.h		patch \| blob \| blame \| history
src/include/executor/nodeAgg.h		patch \| blob \| blame \| history
src/include/executor/nodeGroup.h		patch \| blob \| blame \| history
src/include/executor/nodeHash.h		patch \| blob \| blame \| history
src/include/nodes/execnodes.h		patch \| blob \| blame \| history