Fix some bogosities in the code that deals with estimating the fraction

author Tom Lane

Tue, 14 Mar 2000 02:23:15 +0000 (02:23 +0000)

committer Tom Lane

Tue, 14 Mar 2000 02:23:15 +0000 (02:23 +0000)
author Tom Lane
Tue, 14 Mar 2000 02:23:15 +0000 (02:23 +0000)
committer Tom Lane
Tue, 14 Mar 2000 02:23:15 +0000 (02:23 +0000)
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c

index c14692d5b97edfe9bbe1ff69ba83a14a92b3f7ea..e70d2a7abee59cf86cbb6f418ea8cf5f23e58c7e 100644 (file)
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -42,7 +42,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.52 2000/02/15 20:49:16 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.53 2000/03/14 02:23:14 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -687,8 +687,8 @@ cost_qual_eval_walker(Node *node, Cost *total)
                  * (We assume that sub-selects that can be executed as
                  * InitPlans have already been removed from the expression.)
                  *
-                * NOTE: this logic should agree with make_subplan in
-                * subselect.c. 
+                * NOTE: this logic should agree with the estimates used by
+                * make_subplan() in plan/subselect.c. 
                  */
                 {
                     SubPlan    *subplan = (SubPlan *) expr->oper;
@@ -701,16 +701,18 @@ cost_qual_eval_walker(Node *node, Cost *total)
                         subcost = plan->startup_cost +
                             (plan->total_cost - plan->startup_cost) / plan->plan_rows;
                     }
-                   else if (subplan->sublink->subLinkType == EXPR_SUBLINK)
-                   {
-                       /* assume we need all tuples */
-                       subcost = plan->total_cost;
-                   }
-                   else
+                   else if (subplan->sublink->subLinkType == ALL_SUBLINK ||
+                            subplan->sublink->subLinkType == ANY_SUBLINK)
                     {
                         /* assume we need 50% of the tuples */
                         subcost = plan->startup_cost +
                             0.50 * (plan->total_cost - plan->startup_cost);
+                       /* XXX what if subplan has been materialized? */
+                   }
+                   else
+                   {
+                       /* assume we need all tuples */
+                       subcost = plan->total_cost;
                     }
                     *total += subcost;
                 }
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c

index 21794fe0d386dfdf27d3012411fcc224b2a149e4..3faf0904d393d41dd637efd90b955fb17923ca41 100644 (file)
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.76 2000/02/21 01:13:04 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.77 2000/03/14 02:23:15 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -360,11 +360,14 @@ union_planner(Query *parse,
              * In GROUP BY mode, we have the little problem that we don't
              * really know how many input tuples will be needed to make a
              * group, so we can't translate an output LIMIT count into an
-            * input count.  For lack of a better idea, assume 10% of the
+            * input count.  For lack of a better idea, assume 25% of the
              * input data will be processed if there is any output limit.
+            * However, if the caller gave us a fraction rather than an
+            * absolute count, we can keep using that fraction (which amounts
+            * to assuming that all the groups are about the same size).
              */
-           if (tuple_fraction > 0.0)
-               tuple_fraction = 0.10;
+           if (tuple_fraction >= 1.0)
+               tuple_fraction = 0.25;
             /*
              * If both GROUP BY and ORDER BY are specified, we will need
              * two levels of sort --- and, therefore, certainly need to
@@ -386,11 +389,10 @@ union_planner(Query *parse,
         {
             /*
              * SELECT DISTINCT, like GROUP, will absorb an unpredictable
-            * number of input tuples per output tuple.  So, fall back to
-            * our same old 10% default...
+            * number of input tuples per output tuple.  Handle the same way.
              */
-           if (tuple_fraction > 0.0)
-               tuple_fraction = 0.10;
+           if (tuple_fraction >= 1.0)
+               tuple_fraction = 0.25;
         }
  
         /* Generate the (sub) plan */
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c

index a5df56d92046c51e967d2d8838a1b1c69e622897..16f4a95a7852b15d8f73ee79946cad90ccd5c904 100644 (file)
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.30 2000/03/11 23:53:41 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.31 2000/03/14 02:23:15 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -134,25 +134,34 @@ make_subplan(SubLink *slink)
  
     PlannerInitPlan = NULL;
  
-   PlannerQueryLevel++;        /* we becomes child */
+   PlannerQueryLevel++;        /* we become child */
  
     /*
      * For an EXISTS subplan, tell lower-level planner to expect that
-    * only the first tuple will be retrieved.  For ALL, ANY, and MULTIEXPR
-    * subplans, we will be able to stop evaluating if the test condition
-    * fails, so very often not all the tuples will be retrieved; for lack
-    * of a better idea, specify 50% retrieval.  For EXPR_SUBLINK use default
-    * behavior.
+    * only the first tuple will be retrieved.  For ALL and ANY subplans,
+    * we will be able to stop evaluating if the test condition fails,
+    * so very often not all the tuples will be retrieved; for lack of a
+    * better idea, specify 50% retrieval.  For EXPR and MULTIEXPR subplans,
+    * use default behavior (we're only expecting one row out, anyway).
      *
-    * NOTE: if you change these numbers, also change cost_qual_eval_walker
-    * in costsize.c.
+    * NOTE: if you change these numbers, also change cost_qual_eval_walker()
+    * in path/costsize.c.
+    *
+    * XXX If an ALL/ANY subplan is uncorrelated, we may decide to materialize
+    * its result below.  In that case it would've been better to specify
+    * full retrieval.  At present, however, we can only detect correlation
+    * or lack of it after we've made the subplan :-(.  Perhaps detection
+    * of correlation should be done as a separate step.  Meanwhile, we don't
+    * want to be too optimistic about the percentage of tuples retrieved,
+    * for fear of selecting a plan that's bad for the materialization case.
      */
     if (slink->subLinkType == EXISTS_SUBLINK)
         tuple_fraction = 1.0;   /* just like a LIMIT 1 */
-   else if (slink->subLinkType == EXPR_SUBLINK)
-       tuple_fraction = -1.0;  /* default behavior */
-   else
+   else if (slink->subLinkType == ALL_SUBLINK ||
+            slink->subLinkType == ANY_SUBLINK)
         tuple_fraction = 0.5;   /* 50% */
+   else
+       tuple_fraction = -1.0;  /* default behavior */
  
     node->plan = plan = union_planner(subquery, tuple_fraction);
author	Tom Lane
	Tue, 14 Mar 2000 02:23:15 +0000 (02:23 +0000)
committer	Tom Lane
	Tue, 14 Mar 2000 02:23:15 +0000 (02:23 +0000)
src/backend/optimizer/path/costsize.c		patch \| blob \| blame \| history
src/backend/optimizer/plan/planner.c		patch \| blob \| blame \| history
src/backend/optimizer/plan/subselect.c		patch \| blob \| blame \| history