First cut at implementing IN (and NOT IN) via hashtables. There is

author Tom Lane

Sun, 12 Jan 2003 04:03:34 +0000 (04:03 +0000)

committer Tom Lane

Sun, 12 Jan 2003 04:03:34 +0000 (04:03 +0000)
author Tom Lane
Sun, 12 Jan 2003 04:03:34 +0000 (04:03 +0000)
committer Tom Lane
Sun, 12 Jan 2003 04:03:34 +0000 (04:03 +0000)
diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c

index e3f7720ca75a35e7a6c4b46c322a126b85414790..0d4d5ed20f38896769bc4a4ff5c6cdb73ddcc8ab 100644 (file)
--- a/src/backend/executor/execGrouping.c
+++ b/src/backend/executor/execGrouping.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/executor/execGrouping.c,v 1.1 2003/01/10 23:54:24 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/executor/execGrouping.c,v 1.2 2003/01/12 04:03:34 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -23,17 +23,14 @@
  
  /*****************************************************************************
   *     Utility routines for grouping tuples together
- *
- * These routines actually implement SQL's notion of "distinct/not distinct".
- * Two tuples match if they are not distinct in all the compared columns,
- * i.e., the column values are either both null, or both non-null and equal.
   *****************************************************************************/
  
  /*
   * execTuplesMatch
   *     Return true if two tuples match in all the indicated fields.
- *     This is used to detect group boundaries in nodeGroup and nodeAgg,
- *     and to decide whether two tuples are distinct or not in nodeUnique.
+ *
+ * This actually implements SQL's notion of "not distinct".  Two nulls
+ * match, a null and a not-null don't match.
   *
   * tuple1, tuple2: the tuples to compare
   * tupdesc: tuple descriptor applying to both tuples
@@ -112,11 +109,88 @@ execTuplesMatch(HeapTuple tuple1,
     return result;
  }
  
+/*
+ * execTuplesUnequal
+ *     Return true if two tuples are definitely unequal in the indicated
+ *     fields.
+ *
+ * Nulls are neither equal nor unequal to anything else.  A true result
+ * is obtained only if there are non-null fields that compare not-equal.
+ *
+ * Parameters are identical to execTuplesMatch.
+ */
+bool
+execTuplesUnequal(HeapTuple tuple1,
+                 HeapTuple tuple2,
+                 TupleDesc tupdesc,
+                 int numCols,
+                 AttrNumber *matchColIdx,
+                 FmgrInfo *eqfunctions,
+                 MemoryContext evalContext)
+{
+   MemoryContext oldContext;
+   bool        result;
+   int         i;
+
+   /* Reset and switch into the temp context. */
+   MemoryContextReset(evalContext);
+   oldContext = MemoryContextSwitchTo(evalContext);
+
+   /*
+    * We cannot report a match without checking all the fields, but we
+    * can report a non-match as soon as we find unequal fields.  So,
+    * start comparing at the last field (least significant sort key).
+    * That's the most likely to be different if we are dealing with
+    * sorted input.
+    */
+   result = false;
+
+   for (i = numCols; --i >= 0;)
+   {
+       AttrNumber  att = matchColIdx[i];
+       Datum       attr1,
+                   attr2;
+       bool        isNull1,
+                   isNull2;
+
+       attr1 = heap_getattr(tuple1,
+                            att,
+                            tupdesc,
+                            &isNull1);
+
+       if (isNull1)
+           continue;           /* can't prove anything here */
+
+       attr2 = heap_getattr(tuple2,
+                            att,
+                            tupdesc,
+                            &isNull2);
+
+       if (isNull2)
+           continue;           /* can't prove anything here */
+
+       /* Apply the type-specific equality function */
+
+       if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
+                                       attr1, attr2)))
+       {
+           result = true;      /* they are unequal */
+           break;
+       }
+   }
+
+   MemoryContextSwitchTo(oldContext);
+
+   return result;
+}
+
  
  /*
   * execTuplesMatchPrepare
- *     Look up the equality functions needed for execTuplesMatch.
- *     The result is a palloc'd array.
+ *     Look up the equality functions needed for execTuplesMatch or
+ *     execTuplesUnequal.
+ *
+ * The result is a palloc'd array.
   */
  FmgrInfo *
  execTuplesMatchPrepare(TupleDesc tupdesc,
@@ -266,8 +340,13 @@ BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
   * Find or create a hashtable entry for the tuple group containing the
   * given tuple.
   *
- * On return, *isnew is true if the entry is newly created, false if it
- * existed already.  Any extra space in a new entry has been zeroed.
+ * If isnew is NULL, we do not create new entries; we return NULL if no
+ * match is found.
+ *
+ * If isnew isn't NULL, then a new entry is created if no existing entry
+ * matches.  On return, *isnew is true if the entry is newly created,
+ * false if it existed already.  Any extra space in a new entry has been
+ * zeroed.
   */
  TupleHashEntry
  LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
@@ -318,26 +397,30 @@ LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
                             hashtable->eqfunctions,
                             hashtable->tempcxt))
         {
+           if (isnew)
+               *isnew = false;
             MemoryContextSwitchTo(oldContext);
-           *isnew = false;
             return entry;
         }
     }
  
-   /* Not there, so build a new one */
-   MemoryContextSwitchTo(hashtable->tablecxt);
+   /* Not there, so build a new one if requested */
+   if (isnew)
+   {
+       MemoryContextSwitchTo(hashtable->tablecxt);
  
-   entry = (TupleHashEntry) palloc0(hashtable->entrysize);
+       entry = (TupleHashEntry) palloc0(hashtable->entrysize);
  
-   entry->hashkey = hashkey;
-   entry->firstTuple = heap_copytuple(tuple);
+       entry->hashkey = hashkey;
+       entry->firstTuple = heap_copytuple(tuple);
  
-   entry->next = hashtable->buckets[bucketno];
-   hashtable->buckets[bucketno] = entry;
+       entry->next = hashtable->buckets[bucketno];
+       hashtable->buckets[bucketno] = entry;
  
-   MemoryContextSwitchTo(oldContext);
+       *isnew = true;
+   }
  
-   *isnew = true;
+   MemoryContextSwitchTo(oldContext);
  
     return entry;
  }
diff --git a/src/backend/executor/execQual.c b/src/backend/executor/execQual.c

index 49986de2748f2952a80f37bd4b8fbf2a499de913..c13e1e1e4d8c078db72b2868f68bbadf7d281583 100644 (file)
--- a/src/backend/executor/execQual.c
+++ b/src/backend/executor/execQual.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/executor/execQual.c,v 1.122 2003/01/10 21:08:07 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/executor/execQual.c,v 1.123 2003/01/12 04:03:34 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -2324,8 +2324,13 @@ ExecCleanTargetListLength(List *targetlist)
  /* ----------------------------------------------------------------
   *     ExecTargetList
   *
- *     Evaluates a targetlist with respect to the current
- *     expression context and return a tuple.
+ *     Evaluates a targetlist with respect to the given
+ *     expression context and returns a tuple.
+ *
+ * The caller must pass workspace for the values and nulls arrays
+ * as well as the itemIsDone array.  This convention saves palloc'ing
+ * workspace on each call, and some callers may find it useful to examine
+ * the values array directly.
   *
   * As with ExecEvalExpr, the caller should pass isDone = NULL if not
   * prepared to deal with sets of result tuples.  Otherwise, a return
@@ -2335,21 +2340,15 @@ ExecCleanTargetListLength(List *targetlist)
   */
  static HeapTuple
  ExecTargetList(List *targetlist,
-              int nodomains,
                TupleDesc targettype,
-              Datum *values,
                ExprContext *econtext,
+              Datum *values,
+              char *nulls,
+              ExprDoneCond *itemIsDone,
                ExprDoneCond *isDone)
  {
     MemoryContext oldContext;
-
-#define NPREALLOCDOMAINS 64
-   char        nullsArray[NPREALLOCDOMAINS];
-   ExprDoneCond itemIsDoneArray[NPREALLOCDOMAINS];
-   char       *nulls;
-   ExprDoneCond *itemIsDone;
     List       *tl;
-   HeapTuple   newTuple;
     bool        isNull;
     bool        haveDoneSets;
     static struct tupleDesc NullTupleDesc;      /* we assume this inits to
@@ -2378,31 +2377,9 @@ ExecTargetList(List *targetlist,
     if (targettype == NULL)
         targettype = &NullTupleDesc;
  
-   /*
-    * allocate an array of char's to hold the "null" information only if
-    * we have a really large targetlist.  otherwise we use the stack.
-    *
-    * We also allocate another array that holds the isDone status for each
-    * targetlist item. The isDone status is needed so that we can iterate,
-    * generating multiple tuples, when one or more tlist items return
-    * sets.  (We expect the caller to call us again if we return
-    * isDone = ExprMultipleResult.)
-    */
-   if (nodomains > NPREALLOCDOMAINS)
-   {
-       nulls = (char *) palloc(nodomains * sizeof(char));
-       itemIsDone = (ExprDoneCond *) palloc(nodomains * sizeof(ExprDoneCond));
-   }
-   else
-   {
-       nulls = nullsArray;
-       itemIsDone = itemIsDoneArray;
-   }
-
     /*
      * evaluate all the expressions in the target list
      */
-
     if (isDone)
         *isDone = ExprSingleResult;     /* until proven otherwise */
  
@@ -2451,8 +2428,7 @@ ExecTargetList(List *targetlist,
              */
             *isDone = ExprEndResult;
             MemoryContextSwitchTo(oldContext);
-           newTuple = NULL;
-           goto exit;
+           return NULL;
         }
         else
         {
@@ -2511,8 +2487,7 @@ ExecTargetList(List *targetlist,
                 }
  
                 MemoryContextSwitchTo(oldContext);
-               newTuple = NULL;
-               goto exit;
+               return NULL;
             }
         }
     }
@@ -2522,20 +2497,7 @@ ExecTargetList(List *targetlist,
      */
     MemoryContextSwitchTo(oldContext);
  
-   newTuple = (HeapTuple) heap_formtuple(targettype, values, nulls);
-
-exit:
-
-   /*
-    * free the status arrays if we palloc'd them
-    */
-   if (nodomains > NPREALLOCDOMAINS)
-   {
-       pfree(nulls);
-       pfree(itemIsDone);
-   }
-
-   return newTuple;
+   return heap_formtuple(targettype, values, nulls);
  }
  
  /* ----------------------------------------------------------------
@@ -2555,11 +2517,7 @@ TupleTableSlot *
  ExecProject(ProjectionInfo *projInfo, ExprDoneCond *isDone)
  {
     TupleTableSlot *slot;
-   List       *targetlist;
-   int         len;
     TupleDesc   tupType;
-   Datum      *tupValue;
-   ExprContext *econtext;
     HeapTuple   newTuple;
  
     /*
@@ -2572,21 +2530,17 @@ ExecProject(ProjectionInfo *projInfo, ExprDoneCond *isDone)
      * get the projection info we want
      */
     slot = projInfo->pi_slot;
-   targetlist = projInfo->pi_targetlist;
-   len = projInfo->pi_len;
     tupType = slot->ttc_tupleDescriptor;
  
-   tupValue = projInfo->pi_tupValue;
-   econtext = projInfo->pi_exprContext;
-
     /*
      * form a new result tuple (if possible --- result can be NULL)
      */
-   newTuple = ExecTargetList(targetlist,
-                             len,
+   newTuple = ExecTargetList(projInfo->pi_targetlist,
                               tupType,
-                             tupValue,
-                             econtext,
+                             projInfo->pi_exprContext,
+                             projInfo->pi_tupValues,
+                             projInfo->pi_tupNulls,
+                             projInfo->pi_itemIsDone,
                               isDone);
  
     /*
diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c

index 054ec703866e7454edb26687a8482131ce4b9358..63eede2280234d613b4cbb1ef595fd4c65e17dea 100644 (file)
--- a/src/backend/executor/execUtils.c
+++ b/src/backend/executor/execUtils.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/executor/execUtils.c,v 1.94 2002/12/18 00:14:47 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/executor/execUtils.c,v 1.95 2003/01/12 04:03:34 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -476,28 +476,50 @@ ExecGetResultType(PlanState *planstate)
  }
  
  /* ----------------
- *     ExecAssignProjectionInfo
-         forms the projection information from the node's targetlist
+ *     ExecBuildProjectionInfo
+ *
+ * Build a ProjectionInfo node for evaluating the given tlist in the given
+ * econtext, and storing the result into the tuple slot.  (Caller must have
+ * ensured that tuple slot has a descriptor matching the tlist!)  Note that
+ * the given tlist should be a list of ExprState nodes, not Expr nodes.
   * ----------------
   */
-void
-ExecAssignProjectionInfo(PlanState *planstate)
+ProjectionInfo *
+ExecBuildProjectionInfo(List *targetList,
+                       ExprContext *econtext,
+                       TupleTableSlot *slot)
  {
-   ProjectionInfo *projInfo;
-   List       *targetList;
+   ProjectionInfo *projInfo = makeNode(ProjectionInfo);
     int         len;
  
-   targetList = planstate->targetlist;
     len = ExecTargetListLength(targetList);
  
-   projInfo = makeNode(ProjectionInfo);
     projInfo->pi_targetlist = targetList;
-   projInfo->pi_len = len;
-   projInfo->pi_tupValue = (len <= 0) ? NULL : (Datum *) palloc(sizeof(Datum) * len);
-   projInfo->pi_exprContext = planstate->ps_ExprContext;
-   projInfo->pi_slot = planstate->ps_ResultTupleSlot;
+   projInfo->pi_exprContext = econtext;
+   projInfo->pi_slot = slot;
+   if (len > 0)
+   {
+       projInfo->pi_tupValues = (Datum *) palloc(len * sizeof(Datum));
+       projInfo->pi_tupNulls = (char *) palloc(len * sizeof(char));
+       projInfo->pi_itemIsDone = (ExprDoneCond *) palloc(len * sizeof(ExprDoneCond));
+   }
+
+   return projInfo;
+}
  
-   planstate->ps_ProjInfo = projInfo;
+/* ----------------
+ *     ExecAssignProjectionInfo
+ *
+ * forms the projection information from the node's targetlist
+ * ----------------
+ */
+void
+ExecAssignProjectionInfo(PlanState *planstate)
+{
+   planstate->ps_ProjInfo =
+       ExecBuildProjectionInfo(planstate->targetlist,
+                               planstate->ps_ExprContext,
+                               planstate->ps_ResultTupleSlot);
  }
  
  
diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c

index 40eca6749ec94f94ad56e9593d5439de3c622826..d3f32913914f6fae8cc2689959be060725f935c4 100644 (file)
--- a/src/backend/executor/nodeSubplan.c
+++ b/src/backend/executor/nodeSubplan.c
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/executor/nodeSubplan.c,v 1.42 2003/01/10 21:08:08 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/executor/nodeSubplan.c,v 1.43 2003/01/12 04:03:34 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -22,11 +22,24 @@
  #include "access/heapam.h"
  #include "executor/executor.h"
  #include "executor/nodeSubplan.h"
+#include "nodes/makefuncs.h"
+#include "parser/parse_expr.h"
  #include "tcop/pquery.h"
  
  
+static Datum ExecHashSubPlan(SubPlanState *node,
+                            ExprContext *econtext,
+                            bool *isNull);
+static Datum ExecScanSubPlan(SubPlanState *node,
+                            ExprContext *econtext,
+                            bool *isNull);
+static void buildSubPlanHash(SubPlanState *node);
+static bool findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot);
+static bool tupleAllNulls(HeapTuple tuple);
+
+
  /* ----------------------------------------------------------------
- *     ExecSubPlan(node)
+ *     ExecSubPlan
   * ----------------------------------------------------------------
   */
  Datum
@@ -35,6 +48,155 @@ ExecSubPlan(SubPlanState *node,
             bool *isNull)
  {
     SubPlan    *subplan = (SubPlan *) node->xprstate.expr;
+
+   if (subplan->setParam != NIL)
+       elog(ERROR, "ExecSubPlan: can't set parent params from subquery");
+
+   if (subplan->useHashTable)
+       return ExecHashSubPlan(node, econtext, isNull);
+   else
+       return ExecScanSubPlan(node, econtext, isNull);
+}
+
+/*
+ * ExecHashSubPlan: store subselect result in an in-memory hash table
+ */
+static Datum
+ExecHashSubPlan(SubPlanState *node,
+               ExprContext *econtext,
+               bool *isNull)
+{
+   SubPlan    *subplan = (SubPlan *) node->xprstate.expr;
+   PlanState  *planstate = node->planstate;
+   ExprContext *innerecontext = node->innerecontext;
+   TupleTableSlot *slot;
+   HeapTuple   tup;
+
+   /* Shouldn't have any direct correlation Vars */
+   if (subplan->parParam != NIL || node->args != NIL)
+       elog(ERROR, "ExecHashSubPlan: direct correlation not supported");
+
+   /*
+    * If first time through or we need to rescan the subplan, build
+    * the hash table.
+    */
+   if (node->hashtable == NULL || planstate->chgParam != NIL)
+       buildSubPlanHash(node);
+
+   /*
+    * The result for an empty subplan is always FALSE; no need to
+    * evaluate lefthand side.
+    */
+   *isNull = false;
+   if (!node->havehashrows && !node->havenullrows)
+       return BoolGetDatum(false);
+
+   /*
+    * Evaluate lefthand expressions and form a projection tuple.
+    * First we have to set the econtext to use (hack alert!).
+    */
+   node->projLeft->pi_exprContext = econtext;
+   slot = ExecProject(node->projLeft, NULL);
+   tup = slot->val;
+
+   /*
+    * Note: because we are typically called in a per-tuple context,
+    * we have to explicitly clear the projected tuple before returning.
+    * Otherwise, we'll have a double-free situation: the per-tuple context
+    * will probably be reset before we're called again, and then the tuple
+    * slot will think it still needs to free the tuple.
+    */
+
+   /*
+    * Since the hashtable routines will use innerecontext's per-tuple
+    * memory as working memory, be sure to reset it for each tuple.
+    */
+   ResetExprContext(innerecontext);
+
+   /*
+    * If the LHS is all non-null, probe for an exact match in the
+    * main hash table.  If we find one, the result is TRUE.
+    * Otherwise, scan the partly-null table to see if there are any
+    * rows that aren't provably unequal to the LHS; if so, the result
+    * is UNKNOWN.  (We skip that part if we don't care about UNKNOWN.)
+    * Otherwise, the result is FALSE.
+    *
+    * Note: the reason we can avoid a full scan of the main hash table
+    * is that the combining operators are assumed never to yield NULL
+    * when both inputs are non-null.  If they were to do so, we might
+    * need to produce UNKNOWN instead of FALSE because of an UNKNOWN
+    * result in comparing the LHS to some main-table entry --- which
+    * is a comparison we will not even make, unless there's a chance
+    * match of hash keys.
+    */
+   if (HeapTupleNoNulls(tup))
+   {
+       if (node->havehashrows &&
+           LookupTupleHashEntry(node->hashtable, slot, NULL) != NULL)
+       {
+           ExecClearTuple(slot);
+           return BoolGetDatum(true);
+       }
+       if (node->havenullrows &&
+           findPartialMatch(node->hashnulls, slot))
+       {
+           ExecClearTuple(slot);
+           *isNull = true;
+           return BoolGetDatum(false);
+       }
+       ExecClearTuple(slot);
+       return BoolGetDatum(false);
+   }
+
+   /*
+    * When the LHS is partly or wholly NULL, we can never return TRUE.
+    * If we don't care about UNKNOWN, just return FALSE.  Otherwise,
+    * if the LHS is wholly NULL, immediately return UNKNOWN.  (Since the
+    * combining operators are strict, the result could only be FALSE if the
+    * sub-select were empty, but we already handled that case.)  Otherwise,
+    * we must scan both the main and partly-null tables to see if there are
+    * any rows that aren't provably unequal to the LHS; if so, the result is
+    * UNKNOWN.  Otherwise, the result is FALSE.
+    */
+   if (node->hashnulls == NULL)
+   {
+       ExecClearTuple(slot);
+       return BoolGetDatum(false);
+   }
+   if (tupleAllNulls(tup))
+   {
+       ExecClearTuple(slot);
+       *isNull = true;
+       return BoolGetDatum(false);
+   }
+   /* Scan partly-null table first, since more likely to get a match */
+   if (node->havenullrows &&
+       findPartialMatch(node->hashnulls, slot))
+   {
+       ExecClearTuple(slot);
+       *isNull = true;
+       return BoolGetDatum(false);
+   }
+   if (node->havehashrows &&
+       findPartialMatch(node->hashtable, slot))
+   {
+       ExecClearTuple(slot);
+       *isNull = true;
+       return BoolGetDatum(false);
+   }
+   ExecClearTuple(slot);
+   return BoolGetDatum(false);
+}
+
+/*
+ * ExecScanSubPlan: default case where we have to rescan subplan each time
+ */
+static Datum
+ExecScanSubPlan(SubPlanState *node,
+               ExprContext *econtext,
+               bool *isNull)
+{
+   SubPlan    *subplan = (SubPlan *) node->xprstate.expr;
     PlanState  *planstate = node->planstate;
     SubLinkType subLinkType = subplan->subLinkType;
     bool        useOr = subplan->useOr;
@@ -52,9 +214,6 @@ ExecSubPlan(SubPlanState *node,
      */
     oldcontext = MemoryContextSwitchTo(node->sub_estate->es_query_cxt);
  
-   if (subplan->setParam != NIL)
-       elog(ERROR, "ExecSubPlan: can't set parent params from subquery");
-
     /*
      * Set Params of this plan from parent plan correlation Vars
      */
@@ -267,6 +426,203 @@ ExecSubPlan(SubPlanState *node,
     return result;
  }
  
+/*
+ * buildSubPlanHash: load hash table by scanning subplan output.
+ */
+static void
+buildSubPlanHash(SubPlanState *node)
+{
+   SubPlan    *subplan = (SubPlan *) node->xprstate.expr;
+   PlanState  *planstate = node->planstate;
+   int         ncols = length(node->exprs);
+   ExprContext *innerecontext = node->innerecontext;
+   MemoryContext tempcxt = innerecontext->ecxt_per_tuple_memory;
+   MemoryContext oldcontext;
+   int         nbuckets;
+   TupleTableSlot *slot;
+
+   Assert(subplan->subLinkType == ANY_SUBLINK);
+   Assert(!subplan->useOr);
+
+   /*
+    * If we already had any hash tables, destroy 'em; then create
+    * empty hash table(s).
+    *
+    * If we need to distinguish accurately between FALSE and UNKNOWN
+    * (i.e., NULL) results of the IN operation, then we have to store
+    * subplan output rows that are partly or wholly NULL.  We store such
+    * rows in a separate hash table that we expect will be much smaller
+    * than the main table.  (We can use hashing to eliminate partly-null
+    * rows that are not distinct.  We keep them separate to minimize the
+    * cost of the inevitable full-table searches; see findPartialMatch.)
+    *
+    * If it's not necessary to distinguish FALSE and UNKNOWN, then we
+    * don't need to store subplan output rows that contain NULL.
+    */
+   MemoryContextReset(node->tablecxt);
+   node->hashtable = NULL;
+   node->hashnulls = NULL;
+   node->havehashrows = false;
+   node->havenullrows = false;
+
+   nbuckets = (int) ceil(planstate->plan->plan_rows);
+   if (nbuckets < 1)
+       nbuckets = 1;
+
+   node->hashtable = BuildTupleHashTable(ncols,
+                                         node->keyColIdx,
+                                         node->eqfunctions,
+                                         nbuckets,
+                                         sizeof(TupleHashEntryData),
+                                         node->tablecxt,
+                                         tempcxt);
+
+   if (!subplan->unknownEqFalse)
+   {
+       if (ncols == 1)
+           nbuckets = 1;       /* there can only be one entry */
+       else
+       {
+           nbuckets /= 16;
+           if (nbuckets < 1)
+               nbuckets = 1;
+       }
+       node->hashnulls = BuildTupleHashTable(ncols,
+                                             node->keyColIdx,
+                                             node->eqfunctions,
+                                             nbuckets,
+                                             sizeof(TupleHashEntryData),
+                                             node->tablecxt,
+                                             tempcxt);
+   }
+
+   /*
+    * We are probably in a short-lived expression-evaluation context.
+    * Switch to the child plan's per-query context for calling ExecProcNode.
+    */
+   oldcontext = MemoryContextSwitchTo(node->sub_estate->es_query_cxt);
+
+   /*
+    * Reset subplan to start.
+    */
+   ExecReScan(planstate, NULL);
+
+   /*
+    * Scan the subplan and load the hash table(s).  Note that when there are
+    * duplicate rows coming out of the sub-select, only one copy is stored.
+    */
+   for (slot = ExecProcNode(planstate);
+        !TupIsNull(slot);
+        slot = ExecProcNode(planstate))
+   {
+       HeapTuple   tup = slot->val;
+       TupleDesc   tdesc = slot->ttc_tupleDescriptor;
+       int         col = 1;
+       List       *plst;
+       bool        isnew;
+
+       /*
+        * Load up the Params representing the raw sub-select outputs,
+        * then form the projection tuple to store in the hashtable.
+        */
+       foreach(plst, subplan->paramIds)
+       {
+           int         paramid = lfirsti(plst);
+           ParamExecData *prmdata;
+
+           prmdata = &(innerecontext->ecxt_param_exec_vals[paramid]);
+           Assert(prmdata->execPlan == NULL);
+           prmdata->value = heap_getattr(tup, col, tdesc,
+                                         &(prmdata->isnull));
+           col++;
+       }
+       slot = ExecProject(node->projRight, NULL);
+       tup = slot->val;
+
+       /*
+        * If result contains any nulls, store separately or not at all.
+        * (Since we know the projection tuple has no junk columns, we
+        * can just look at the overall hasnull info bit, instead of
+        * groveling through the columns.)
+        */
+       if (HeapTupleNoNulls(tup))
+       {
+           (void) LookupTupleHashEntry(node->hashtable, slot, &isnew);
+           node->havehashrows = true;
+       }
+       else if (node->hashnulls)
+       {
+           (void) LookupTupleHashEntry(node->hashnulls, slot, &isnew);
+           node->havenullrows = true;
+       }
+
+       /*
+        * Reset innerecontext after each inner tuple to free any memory
+        * used in hash computation or comparison routines.
+        */
+       ResetExprContext(innerecontext);
+   }
+
+   /*
+    * Since the projected tuples are in the sub-query's context and not
+    * the main context, we'd better clear the tuple slot before there's
+    * any chance of a reset of the sub-query's context.  Else we will
+    * have the potential for a double free attempt.
+    */
+   ExecClearTuple(node->projRight->pi_slot);
+
+   MemoryContextSwitchTo(oldcontext);
+}
+
+/*
+ * findPartialMatch: does the hashtable contain an entry that is not
+ * provably distinct from the tuple?
+ *
+ * We have to scan the whole hashtable; we can't usefully use hashkeys
+ * to guide probing, since we might get partial matches on tuples with
+ * hashkeys quite unrelated to what we'd get from the given tuple.
+ */
+static bool
+findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot)
+{
+   int         numCols = hashtable->numCols;
+   AttrNumber *keyColIdx = hashtable->keyColIdx;
+   HeapTuple   tuple = slot->val;
+   TupleDesc   tupdesc = slot->ttc_tupleDescriptor;
+   TupleHashIterator hashiter;
+   TupleHashEntry  entry;
+
+   ResetTupleHashIterator(&hashiter);
+   while ((entry = ScanTupleHashTable(hashtable, &hashiter)) != NULL)
+   {
+       if (!execTuplesUnequal(entry->firstTuple,
+                              tuple,
+                              tupdesc,
+                              numCols, keyColIdx,
+                              hashtable->eqfunctions,
+                              hashtable->tempcxt))
+           return true;
+   }
+   return false;
+}
+
+/*
+ * tupleAllNulls: is the tuple completely NULL?
+ */
+static bool
+tupleAllNulls(HeapTuple tuple)
+{
+   int     ncols = tuple->t_data->t_natts;
+   int     i;
+
+   for (i = 1; i <= ncols; i++)
+   {
+       if (!heap_attisnull(tuple, i))
+           return false;
+   }
+   return true;
+}
+
  /* ----------------------------------------------------------------
   *     ExecInitSubPlan
   * ----------------------------------------------------------------
@@ -289,8 +645,14 @@ ExecInitSubPlan(SubPlanState *node, EState *estate)
      */
     node->needShutdown = false;
     node->curTuple = NULL;
+   node->projLeft = NULL;
+   node->projRight = NULL;
     node->hashtable = NULL;
     node->hashnulls = NULL;
+   node->tablecxt = NULL;
+   node->innerecontext = NULL;
+   node->keyColIdx = NULL;
+   node->eqfunctions = NULL;
  
     /*
      * create an EState for the subplan
@@ -343,6 +705,137 @@ ExecInitSubPlan(SubPlanState *node, EState *estate)
          * it, for others - it doesn't matter...
          */
     }
+
+   /*
+    * If we are going to hash the subquery output, initialize relevant
+    * stuff.  (We don't create the hashtable until needed, though.)
+    */
+   if (subplan->useHashTable)
+   {
+       int     ncols,
+               i;
+       TupleDesc   tupDesc;
+       TupleTable  tupTable;
+       TupleTableSlot *slot;
+       List       *lefttlist,
+                  *righttlist,
+                  *leftptlist,
+                  *rightptlist,
+                  *lexpr;
+
+       /* We need a memory context to hold the hash table(s) */
+       node->tablecxt =
+           AllocSetContextCreate(CurrentMemoryContext,
+                                 "Subplan HashTable Context",
+                                 ALLOCSET_DEFAULT_MINSIZE,
+                                 ALLOCSET_DEFAULT_INITSIZE,
+                                 ALLOCSET_DEFAULT_MAXSIZE);
+       /* and a short-lived exprcontext for function evaluation */
+       node->innerecontext = CreateExprContext(estate);
+       /* Silly little array of column numbers 1..n */
+       ncols = length(node->exprs);
+       node->keyColIdx = (AttrNumber *) palloc(ncols * sizeof(AttrNumber));
+       for (i = 0; i < ncols; i++)
+           node->keyColIdx[i] = i+1;
+       /*
+        * We use ExecProject to evaluate the lefthand and righthand
+        * expression lists and form tuples.  (You might think that we
+        * could use the sub-select's output tuples directly, but that is
+        * not the case if we had to insert any run-time coercions of the
+        * sub-select's output datatypes; anyway this avoids storing any
+        * resjunk columns that might be in the sub-select's output.)
+        * Run through the combining expressions to build tlists for the
+        * lefthand and righthand sides.  We need both the ExprState list
+        * (for ExecProject) and the underlying parse Exprs (for
+        * ExecTypeFromTL).
+        *
+        * We also extract the combining operators themselves to initialize
+        * the equality functions for the hash tables.
+        */
+       lefttlist = righttlist = NIL;
+       leftptlist = rightptlist = NIL;
+       node->eqfunctions = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
+       i = 1;
+       foreach(lexpr, node->exprs)
+       {
+           FuncExprState  *fstate = (FuncExprState *) lfirst(lexpr);
+           OpExpr     *opexpr = (OpExpr *) fstate->xprstate.expr;
+           ExprState  *exstate;
+           Expr       *expr;
+           TargetEntry *tle;
+           GenericExprState *tlestate;
+
+           Assert(IsA(fstate, FuncExprState));
+           Assert(IsA(opexpr, OpExpr));
+           Assert(length(fstate->args) == 2);
+
+           /* Process lefthand argument */
+           exstate = (ExprState *) lfirst(fstate->args);
+           expr = exstate->expr;
+           tle = makeTargetEntry(makeResdom(i,
+                                            exprType((Node *) expr),
+                                            exprTypmod((Node *) expr),
+                                            NULL,
+                                            false),
+                                 expr);
+           tlestate = makeNode(GenericExprState);
+           tlestate->xprstate.expr = (Expr *) tle;
+           tlestate->arg = exstate;
+           lefttlist = lappend(lefttlist, tlestate);
+           leftptlist = lappend(leftptlist, tle);
+
+           /* Process righthand argument */
+           exstate = (ExprState *) lsecond(fstate->args);
+           expr = exstate->expr;
+           tle = makeTargetEntry(makeResdom(i,
+                                            exprType((Node *) expr),
+                                            exprTypmod((Node *) expr),
+                                            NULL,
+                                            false),
+                                 expr);
+           tlestate = makeNode(GenericExprState);
+           tlestate->xprstate.expr = (Expr *) tle;
+           tlestate->arg = exstate;
+           righttlist = lappend(righttlist, tlestate);
+           rightptlist = lappend(rightptlist, tle);
+
+           /* Lookup the combining function */
+           fmgr_info(opexpr->opfuncid, &node->eqfunctions[i-1]);
+
+           i++;
+       }
+
+       /*
+        * Create a tupletable to hold these tuples.  (Note: we never bother
+        * to free the tupletable explicitly; that's okay because it will
+        * never store raw disk tuples that might have associated buffer
+        * pins.  The only resource involved is memory, which will be
+        * cleaned up by freeing the query context.)
+        */
+       tupTable = ExecCreateTupleTable(2);
+
+       /*
+        * Construct tupdescs, slots and projection nodes for left and
+        * right sides.  The lefthand expressions will be evaluated in
+        * the parent plan node's exprcontext, which we don't have access
+        * to here.  Fortunately we can just pass NULL for now and fill it
+        * in later (hack alert!).  The righthand expressions will be
+        * evaluated in our own innerecontext.
+        */
+       tupDesc = ExecTypeFromTL(leftptlist, false);
+       slot = ExecAllocTableSlot(tupTable);
+       ExecSetSlotDescriptor(slot, tupDesc, true);
+       node->projLeft = ExecBuildProjectionInfo(lefttlist,
+                                                NULL,
+                                                slot);
+
+       tupDesc = ExecTypeFromTL(rightptlist, false);
+       slot = ExecAllocTableSlot(tupTable);
+       ExecSetSlotDescriptor(slot, tupDesc, true);
+       node->projRight = ExecBuildProjectionInfo(righttlist,
+                                                 node->innerecontext,
+                                                 slot);
+   }
  }
  
  /* ----------------------------------------------------------------
@@ -476,11 +969,6 @@ ExecEndSubPlan(SubPlanState *node)
         node->planstate = NULL;
         node->needShutdown = false;
     }
-   if (node->curTuple)
-   {
-       heap_freetuple(node->curTuple);
-       node->curTuple = NULL;
-   }
  }
  
  void
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c

index 460d5c388352a10169c9fef8203f4ea0785a91c2..2feaff11f75ea183d75fcb605446ee5c4da266f9 100644 (file)
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.63 2003/01/10 21:08:11 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.64 2003/01/12 04:03:34 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -197,9 +197,9 @@ make_subplan(SubLink *slink, List *lefthand)
      * NOTE: if you change these numbers, also change cost_qual_eval_walker()
      * in path/costsize.c.
      *
-    * XXX If an ALL/ANY subplan is uncorrelated, we may decide to
-    * materialize its result below.  In that case it would've been better
-    * to specify full retrieval.  At present, however, we can only detect
+    * XXX If an ALL/ANY subplan is uncorrelated, we may decide to hash or
+    * materialize its result below.  In that case it would've been better to
+    * specify full retrieval.  At present, however, we can only detect
      * correlation or lack of it after we've made the subplan :-(. Perhaps
      * detection of correlation should be done as a separate step.
      * Meanwhile, we don't want to be too optimistic about the percentage
@@ -525,10 +525,17 @@ subplan_is_hashable(SubLink *slink, SubPlan *node)
     if (subquery_size > SortMem * 1024L)
         return false;
     /*
-    * The combining operators must be hashable and strict.  (Without
-    * strictness, behavior in the presence of nulls is too unpredictable.
-    * We actually must assume even more than plain strictness, see
-    * nodeSubplan.c for details.)
+    * The combining operators must be hashable, strict, and self-commutative.
+    * The need for hashability is obvious, since we want to use hashing.
+    * Without strictness, behavior in the presence of nulls is too
+    * unpredictable.  (We actually must assume even more than plain
+    * strictness, see nodeSubplan.c for details.)  And commutativity ensures
+    * that the left and right datatypes are the same; this allows us to
+    * assume that the combining operators are equality for the righthand
+    * datatype, so that they can be used to compare righthand tuples as
+    * well as comparing lefthand to righthand tuples.  (This last restriction
+    * could be relaxed by using two different sets of operators with the
+    * hash table, but there is no obvious usefulness to that at present.)
      */
     foreach(opids, slink->operOids)
     {
@@ -542,7 +549,8 @@ subplan_is_hashable(SubLink *slink, SubPlan *node)
         if (!HeapTupleIsValid(tup))
             elog(ERROR, "cache lookup failed for operator %u", opid);
         optup = (Form_pg_operator) GETSTRUCT(tup);
-       if (!optup->oprcanhash || !func_strict(optup->oprcode))
+       if (!optup->oprcanhash || optup->oprcom != opid ||
+           !func_strict(optup->oprcode))
         {
             ReleaseSysCache(tup);
             return false;
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h

index fb300fc044339990234ecccb70e8ccc57dcda5d5..cd462ac27a03eefd6e3b5ce84a5b3ede0ad95324 100644 (file)
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: executor.h,v 1.86 2003/01/10 23:54:24 tgl Exp $
+ * $Id: executor.h,v 1.87 2003/01/12 04:03:34 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -46,6 +46,13 @@ extern bool execTuplesMatch(HeapTuple tuple1,
                 AttrNumber *matchColIdx,
                 FmgrInfo *eqfunctions,
                 MemoryContext evalContext);
+extern bool execTuplesUnequal(HeapTuple tuple1,
+               HeapTuple tuple2,
+               TupleDesc tupdesc,
+               int numCols,
+               AttrNumber *matchColIdx,
+               FmgrInfo *eqfunctions,
+               MemoryContext evalContext);
  extern FmgrInfo *execTuplesMatchPrepare(TupleDesc tupdesc,
                        int numCols,
                        AttrNumber *matchColIdx);
@@ -214,6 +221,9 @@ extern void ExecAssignResultType(PlanState *planstate,
  extern void ExecAssignResultTypeFromOuterPlan(PlanState *planstate);
  extern void ExecAssignResultTypeFromTL(PlanState *planstate);
  extern TupleDesc ExecGetResultType(PlanState *planstate);
+extern ProjectionInfo *ExecBuildProjectionInfo(List *targetList,
+                                              ExprContext *econtext,
+                                              TupleTableSlot *slot);
  extern void ExecAssignProjectionInfo(PlanState *planstate);
  extern void ExecFreeExprContext(PlanState *planstate);
  extern TupleDesc ExecGetScanType(ScanState *scanstate);
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h

index 9c43660c610ede090e2e6d3e9d92b866ff509101..2aa672b65ea345f22ce67ecac7afecb292945b85 100644 (file)
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: execnodes.h,v 1.90 2003/01/10 23:54:24 tgl Exp $
+ * $Id: execnodes.h,v 1.91 2003/01/12 04:03:34 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -170,27 +170,34 @@ typedef struct ReturnSetInfo
  /* ----------------
   *     ProjectionInfo node information
   *
- *     This is all the information needed to perform projections
- *     on a tuple.  Nodes which need to do projections create one
- *     of these.  In theory, when a node wants to perform a projection
+ *     This is all the information needed to perform projections ---
+ *     that is, form new tuples by evaluation of targetlist expressions.
+ *     Nodes which need to do projections create one of these.
+ *     In theory, when a node wants to perform a projection
   *     it should just update this information as necessary and then
   *     call ExecProject().  -cim 6/3/91
   *
+ *     ExecProject() evaluates the tlist, forms a tuple, and stores it
+ *     in the given slot.  As a side-effect, the actual datum values and
+ *     null indicators are placed in the work arrays tupValues/tupNulls.
+ *
   *     targetlist      target list for projection
- *     len             length of target list
- *     tupValue        array of pointers to projection results
- *     exprContext     expression context for ExecTargetList
+ *     exprContext     expression context in which to evaluate targetlist
   *     slot            slot to place projection result in
+ *     tupValues       array of computed values
+ *     tupNull         array of null indicators
+ *     itemIsDone      workspace for ExecProject
   * ----------------
   */
  typedef struct ProjectionInfo
  {
     NodeTag     type;
     List       *pi_targetlist;
-   int         pi_len;
-   Datum      *pi_tupValue;
     ExprContext *pi_exprContext;
     TupleTableSlot *pi_slot;
+   Datum      *pi_tupValues;
+   char       *pi_tupNulls;
+   ExprDoneCond *pi_itemIsDone;
  } ProjectionInfo;
  
  /* ----------------
@@ -495,8 +502,16 @@ typedef struct SubPlanState
     bool        needShutdown;   /* TRUE = need to shutdown subplan */
     HeapTuple   curTuple;       /* copy of most recent tuple from subplan */
     /* these are used when hashing the subselect's output: */
+   ProjectionInfo *projLeft;   /* for projecting lefthand exprs */
+   ProjectionInfo *projRight;  /* for projecting subselect output */
     TupleHashTable hashtable;   /* hash table for no-nulls subselect rows */
     TupleHashTable hashnulls;   /* hash table for rows with null(s) */
+   bool        havehashrows;   /* TRUE if hashtable is not empty */
+   bool        havenullrows;   /* TRUE if hashnulls is not empty */
+   MemoryContext tablecxt;     /* memory context containing tables */
+   ExprContext *innerecontext; /* working context for comparisons */
+   AttrNumber *keyColIdx;      /* control data for hash tables */
+   FmgrInfo   *eqfunctions;    /* comparison functions for hash tables */
  } SubPlanState;
  
  /* ----------------
author	Tom Lane
	Sun, 12 Jan 2003 04:03:34 +0000 (04:03 +0000)
committer	Tom Lane
	Sun, 12 Jan 2003 04:03:34 +0000 (04:03 +0000)
src/backend/executor/execGrouping.c		patch \| blob \| blame \| history
src/backend/executor/execQual.c		patch \| blob \| blame \| history
src/backend/executor/execUtils.c		patch \| blob \| blame \| history
src/backend/executor/nodeSubplan.c		patch \| blob \| blame \| history
src/backend/optimizer/plan/subselect.c		patch \| blob \| blame \| history
src/include/executor/executor.h		patch \| blob \| blame \| history
src/include/nodes/execnodes.h		patch \| blob \| blame \| history