Implement choice between hash-based and sort-based grouping for doing
authorTom Lane
Wed, 22 Jan 2003 00:07:00 +0000 (00:07 +0000)
committerTom Lane
Wed, 22 Jan 2003 00:07:00 +0000 (00:07 +0000)
DISTINCT processing on the output of an IN sub-select.

src/backend/optimizer/plan/createplan.c
src/backend/optimizer/util/pathnode.c

index b7b1204e76e30f2ceb77244e39e54e2b48b9edd6..eb7e922d9a1b47bd4ca7058f31b79533ebea8234 100644 (file)
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.132 2003/01/20 18:54:52 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.133 2003/01/22 00:07:00 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"
 
+#include 
 
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
@@ -418,6 +419,7 @@ create_unique_plan(Query *root, UniquePath *best_path)
    Plan       *plan;
    Plan       *subplan;
    List       *sub_targetlist;
+   List       *my_tlist;
    List       *l;
 
    subplan = create_plan(root, best_path->subpath);
@@ -474,21 +476,39 @@ create_unique_plan(Query *root, UniquePath *best_path)
            subplan->targetlist = newtlist;
    }
 
+   my_tlist = new_unsorted_tlist(subplan->targetlist);
+
    if (best_path->use_hash)
    {
-       elog(ERROR, "create_unique_plan: hash case not implemented yet");
-       plan = NULL;
+       int     numGroupCols = length(my_tlist);
+       long    numGroups;
+       AttrNumber *groupColIdx;
+       int     i;
+
+       numGroups = (long) Min(best_path->rows, (double) LONG_MAX);
+
+       groupColIdx = (AttrNumber *) palloc(numGroupCols * sizeof(AttrNumber));
+       for (i = 0; i < numGroupCols; i++)
+           groupColIdx[i] = i+1;
+
+       plan = (Plan *) make_agg(root,
+                                my_tlist,
+                                NIL,
+                                AGG_HASHED,
+                                numGroupCols,
+                                groupColIdx,
+                                numGroups,
+                                0,
+                                subplan);
    }
    else
    {
-       List       *sort_tlist;
        List       *sortList;
 
-       sort_tlist = new_unsorted_tlist(subplan->targetlist);
-       sortList = addAllTargetsToSortList(NIL, sort_tlist);
-       plan = (Plan *) make_sort_from_sortclauses(root, sort_tlist,
+       sortList = addAllTargetsToSortList(NIL, my_tlist);
+       plan = (Plan *) make_sort_from_sortclauses(root, my_tlist,
                                                   subplan, sortList);
-       plan = (Plan *) make_unique(sort_tlist, plan, sortList);
+       plan = (Plan *) make_unique(my_tlist, plan, sortList);
    }
 
    plan->plan_rows = best_path->rows;
index a5cc94e831b0dacd93caaff9c569206c37382121..3e8d37cb28968212af3920969bcc7d72747722a9 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.84 2003/01/20 18:54:56 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.85 2003/01/22 00:07:00 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 
 #include 
 
+#include "catalog/pg_operator.h"
 #include "executor/executor.h"
+#include "miscadmin.h"
 #include "nodes/plannodes.h"
 #include "optimizer/cost.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "optimizer/restrictinfo.h"
+#include "parser/parse_expr.h"
+#include "parser/parse_oper.h"
 #include "utils/memutils.h"
 #include "utils/selfuncs.h"
+#include "utils/syscache.h"
+
+
+static bool hash_safe_tlist(List *tlist);
 
 
 /*****************************************************************************
@@ -506,6 +514,7 @@ create_unique_path(Query *root, RelOptInfo *rel, Path *subpath)
 {
    UniquePath *pathnode;
    Path        sort_path;      /* dummy for result of cost_sort */
+   Path        agg_path;       /* dummy for result of cost_agg */
    MemoryContext oldcontext;
    List       *sub_targetlist;
    List       *l;
@@ -587,16 +596,80 @@ create_unique_path(Query *root, RelOptInfo *rel, Path *subpath)
     */
    sort_path.total_cost += cpu_operator_cost * rel->rows * numCols;
 
-   pathnode->use_hash = false; /* for now */
+   /*
+    * Is it safe to use a hashed implementation?  If so, estimate and
+    * compare costs.  We only try this if we know the targetlist for
+    * sure (else we can't be sure about the datatypes involved).
+    */
+   pathnode->use_hash = false;
+   if (enable_hashagg && sub_targetlist && hash_safe_tlist(sub_targetlist))
+   {
+       /*
+        * Estimate the overhead per hashtable entry at 64 bytes (same
+        * as in planner.c).
+        */
+       int     hashentrysize = rel->width + 64;
 
-   pathnode->path.startup_cost = sort_path.startup_cost;
-   pathnode->path.total_cost = sort_path.total_cost;
+       if (hashentrysize * pathnode->rows <= SortMem * 1024L)
+       {
+           cost_agg(&agg_path, root,
+                    AGG_HASHED, 0,
+                    numCols, pathnode->rows,
+                    subpath->startup_cost,
+                    subpath->total_cost,
+                    rel->rows);
+           if (agg_path.total_cost < sort_path.total_cost)
+               pathnode->use_hash = true;
+       }
+   }
+
+   if (pathnode->use_hash)
+   {
+       pathnode->path.startup_cost = agg_path.startup_cost;
+       pathnode->path.total_cost = agg_path.total_cost;
+   }
+   else
+   {
+       pathnode->path.startup_cost = sort_path.startup_cost;
+       pathnode->path.total_cost = sort_path.total_cost;
+   }
 
    rel->cheapest_unique_path = (Path *) pathnode;
 
    return pathnode;
 }
 
+/*
+ * hash_safe_tlist - can datatypes of given tlist be hashed?
+ *
+ * We assume hashed aggregation will work if the datatype's equality operator
+ * is marked hashjoinable.
+ *
+ * XXX this probably should be somewhere else.  See also hash_safe_grouping
+ * in plan/planner.c.
+ */
+static bool
+hash_safe_tlist(List *tlist)
+{
+   List       *tl;
+
+   foreach(tl, tlist)
+   {
+       Node       *expr = (Node *) lfirst(tl);
+       Operator    optup;
+       bool        oprcanhash;
+
+       optup = equality_oper(exprType(expr), true);
+       if (!optup)
+           return false;
+       oprcanhash = ((Form_pg_operator) GETSTRUCT(optup))->oprcanhash;
+       ReleaseSysCache(optup);
+       if (!oprcanhash)
+           return false;
+   }
+   return true;
+}
+
 /*
  * create_subqueryscan_path
  *   Creates a path corresponding to a sequential scan of a subquery,