First cut at unifying regular selectivity estimation with indexscan
authorTom Lane
Sun, 23 Jan 2000 02:07:00 +0000 (02:07 +0000)
committerTom Lane
Sun, 23 Jan 2000 02:07:00 +0000 (02:07 +0000)
selectivity estimation wasn't right.  This is better...

src/backend/optimizer/path/clausesel.c
src/backend/optimizer/path/costsize.c
src/backend/optimizer/plan/createplan.c
src/backend/utils/adt/selfuncs.c
src/include/optimizer/cost.h

index a25dd68da3c25a0c1f0a5f7f5d6f95860486f98a..d3a494f9bc9f65b864f135e52bdc9ed5fbe5127e 100644 (file)
@@ -1,13 +1,13 @@
 /*-------------------------------------------------------------------------
  *
  * clausesel.c
- *   Routines to compute and set clause selectivities
+ *   Routines to compute clause selectivities
  *
  * Copyright (c) 1994, Regents of the University of California
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.27 2000/01/09 00:26:31 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.28 2000/01/23 02:06:58 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
  ****************************************************************************/
 
 /*
- * restrictlist_selec -
+ * restrictlist_selectivity -
  *   Compute the selectivity of an implicitly-ANDed list of RestrictInfo
  *   clauses.
  *
- * This is the same as clauselist_selec except for the form of the input.
+ * This is the same as clauselist_selectivity except for the representation
+ * of the clause list.
  */
 Selectivity
-restrictlist_selec(Query *root, List *restrictinfo_list)
+restrictlist_selectivity(Query *root,
+                        List *restrictinfo_list,
+                        int varRelid)
 {
    List       *clauselist = get_actual_clauses(restrictinfo_list);
    Selectivity result;
 
-   result = clauselist_selec(root, clauselist);
+   result = clauselist_selectivity(root, clauselist, varRelid);
    freeList(clauselist);
    return result;
 }
 
 /*
- * clauselist_selec -
+ * clauselist_selectivity -
  *   Compute the selectivity of an implicitly-ANDed list of boolean
- *   expression clauses.
+ *   expression clauses.  The list can be empty, in which case 1.0
+ *   must be returned.
+ *
+ * See clause_selectivity() for the meaning of the varRelid parameter.
  */
 Selectivity
-clauselist_selec(Query *root, List *clauses)
+clauselist_selectivity(Query *root,
+                      List *clauses,
+                      int varRelid)
 {
    Selectivity     s1 = 1.0;
    List           *clause;
 
    /* Use the product of the selectivities of the subclauses.
-    * XXX this is probably too optimistic, since the subclauses
+    * XXX this is too optimistic, since the subclauses
     * are very likely not independent...
     */
    foreach(clause, clauses)
    {
-       Selectivity s2 = compute_clause_selec(root, (Node *) lfirst(clause));
+       Selectivity s2 = clause_selectivity(root,
+                                           (Node *) lfirst(clause),
+                                           varRelid);
        s1 = s1 * s2;
    }
    return s1;
 }
 
 /*
- * compute_clause_selec -
+ * clause_selectivity -
  *   Compute the selectivity of a general boolean expression clause.
+ *
+ * varRelid is either 0 or a rangetable index.
+ *
+ * When varRelid is not 0, only variables belonging to that relation are
+ * considered in computing selectivity; other vars are treated as constants
+ * of unknown values.  This is appropriate for estimating the selectivity of
+ * a join clause that is being used as a restriction clause in a scan of a
+ * nestloop join's inner relation --- varRelid should then be the ID of the
+ * inner relation.
+ *
+ * When varRelid is 0, all variables are treated as variables.  This
+ * is appropriate for ordinary join clauses and restriction clauses.
  */
 Selectivity
-compute_clause_selec(Query *root, Node *clause)
+clause_selectivity(Query *root,
+                  Node *clause,
+                  int varRelid)
 {
    Selectivity     s1 = 1.0;   /* default for any unhandled clause type */
 
@@ -88,13 +112,16 @@ compute_clause_selec(Query *root, Node *clause)
         * didn't want to have to do system cache look ups to find out all
         * of that info.
         */
-       s1 = restriction_selectivity(F_EQSEL,
-                                    BooleanEqualOperator,
-                                    getrelid(((Var *) clause)->varno,
-                                             root->rtable),
-                                    ((Var *) clause)->varattno,
-                                    Int8GetDatum(true),
-                                    SEL_CONSTANT | SEL_RIGHT);
+       Index   varno = ((Var *) clause)->varno;
+
+       if (varRelid == 0 || varRelid == varno)
+           s1 = restriction_selectivity(F_EQSEL,
+                                        BooleanEqualOperator,
+                                        getrelid(varno, root->rtable),
+                                        ((Var *) clause)->varattno,
+                                        Int8GetDatum(true),
+                                        SEL_CONSTANT | SEL_RIGHT);
+       /* an outer-relation bool var is taken as always true... */
    }
    else if (IsA(clause, Param))
    {
@@ -109,12 +136,16 @@ compute_clause_selec(Query *root, Node *clause)
    else if (not_clause(clause))
    {
        /* inverse of the selectivity of the underlying clause */
-       s1 = 1.0 - compute_clause_selec(root,
-                                       (Node *) get_notclausearg((Expr *) clause));
+       s1 = 1.0 - clause_selectivity(root,
+                                     (Node*) get_notclausearg((Expr*) clause),
+                                     varRelid);
    }
    else if (and_clause(clause))
    {
-       s1 = clauselist_selec(root, ((Expr *) clause)->args);
+       /* share code with clauselist_selectivity() */
+       s1 = clauselist_selectivity(root,
+                                   ((Expr *) clause)->args,
+                                   varRelid);
    }
    else if (or_clause(clause))
    {
@@ -127,50 +158,37 @@ compute_clause_selec(Query *root, Node *clause)
        s1 = 0.0;
        foreach(arg, ((Expr *) clause)->args)
        {
-           Selectivity s2 = compute_clause_selec(root, (Node *) lfirst(arg));
+           Selectivity s2 = clause_selectivity(root,
+                                               (Node *) lfirst(arg),
+                                               varRelid);
            s1 = s1 + s2 - s1 * s2;
        }
    }
    else if (is_opclause(clause))
    {
-       if (NumRelids(clause) == 1)
-       {
-           /* The opclause is not a join clause, since there is only one
-            * relid in the clause.  The clause selectivity will be based on
-            * the operator selectivity and operand values.
-            */
-           Oid         opno = ((Oper *) ((Expr *) clause)->oper)->opno;
-           RegProcedure oprrest = get_oprrest(opno);
+       Oid         opno = ((Oper *) ((Expr *) clause)->oper)->opno;
+       bool        is_join_clause;
 
+       if (varRelid != 0)
+       {
            /*
-            * if the oprrest procedure is missing for whatever reason, use a
-            * selectivity of 0.5
+            * If we are considering a nestloop join then all clauses
+            * are restriction clauses, since we are only interested in
+            * the one relation.
             */
-           if (!oprrest)
-               s1 = (Selectivity) 0.5;
-           else
-           {
-               int         relidx;
-               AttrNumber  attno;
-               Datum       constval;
-               int         flag;
-               Oid         reloid;
-
-               get_relattval(clause, 0, &relidx, &attno, &constval, &flag);
-               reloid = relidx ? getrelid(relidx, root->rtable) : InvalidOid;
-               s1 = restriction_selectivity(oprrest, opno,
-                                            reloid, attno,
-                                            constval, flag);
-           }
+           is_join_clause = false;
        }
        else
        {
            /*
-            * The clause must be a join clause.  The clause selectivity will
-            * be based on the relations to be scanned and the attributes they
-            * are to be joined on.
+            * Otherwise, it's a join if there's more than one relation used.
             */
-           Oid         opno = ((Oper *) ((Expr *) clause)->oper)->opno;
+           is_join_clause = (NumRelids(clause) > 1);
+       }
+
+       if (is_join_clause)
+       {
+           /* Estimate selectivity for a join clause. */
            RegProcedure oprjoin = get_oprjoin(opno);
 
            /*
@@ -196,6 +214,33 @@ compute_clause_selec(Query *root, Node *clause)
                                      reloid2, attno2);
            }
        }
+       else
+       {
+           /* Estimate selectivity for a restriction clause. */
+           RegProcedure oprrest = get_oprrest(opno);
+
+           /*
+            * if the oprrest procedure is missing for whatever reason, use a
+            * selectivity of 0.5
+            */
+           if (!oprrest)
+               s1 = (Selectivity) 0.5;
+           else
+           {
+               int         relidx;
+               AttrNumber  attno;
+               Datum       constval;
+               int         flag;
+               Oid         reloid;
+
+               get_relattval(clause, varRelid,
+                             &relidx, &attno, &constval, &flag);
+               reloid = relidx ? getrelid(relidx, root->rtable) : InvalidOid;
+               s1 = restriction_selectivity(oprrest, opno,
+                                            reloid, attno,
+                                            constval, flag);
+           }
+       }
    }
    else if (is_funcclause(clause))
    {
index 5c0f54a73e2f7894b673e4e567da8416e7c82aed..ca2d5867578f52c054ae3f269325babb180b6840 100644 (file)
@@ -18,7 +18,7 @@
  * Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.48 2000/01/22 23:50:14 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.49 2000/01/23 02:06:59 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -459,7 +459,10 @@ set_rel_rows_width(Query *root, RelOptInfo *rel)
    /* Should only be applied to base relations */
    Assert(length(rel->relids) == 1);
 
-   rel->rows = rel->tuples * restrictlist_selec(root, rel->restrictinfo);
+   rel->rows = rel->tuples *
+       restrictlist_selectivity(root,
+                                rel->restrictinfo,
+                                lfirsti(rel->relids));
    Assert(rel->rows >= 0);
 
    set_rel_width(root, rel);
@@ -479,8 +482,10 @@ set_joinrel_rows_width(Query *root, RelOptInfo *rel,
    temp = joinpath->outerjoinpath->parent->rows *
        joinpath->innerjoinpath->parent->rows;
 
-   /* apply restrictivity */
-   temp *= restrictlist_selec(root, joinpath->path.parent->restrictinfo);
+   /* apply join restrictivity */
+   temp *= restrictlist_selectivity(root,
+                                    joinpath->path.parent->restrictinfo,
+                                    0);
 
    Assert(temp >= 0);
    rel->rows = temp;
index fd87a89968f6b8ca441588df21c610039ca94d05..da3e74af7109c76293e3ebb90ca9f4da601b957e 100644 (file)
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.79 2000/01/15 02:59:30 petere Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.80 2000/01/23 02:07:00 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -305,6 +305,7 @@ create_indexscan_node(Query *root,
                      List *scan_clauses)
 {
    List       *indxqual = best_path->indexqual;
+   Index       baserelid;
    List       *qpqual;
    List       *fixed_indxqual;
    List       *ixid;
@@ -314,6 +315,7 @@ create_indexscan_node(Query *root,
 
    /* there should be exactly one base rel involved... */
    Assert(length(best_path->path.parent->relids) == 1);
+   baserelid = lfirsti(best_path->path.parent->relids);
 
    /* check to see if any of the indices are lossy */
    foreach(ixid, best_path->indexid)
@@ -382,7 +384,9 @@ create_indexscan_node(Query *root,
        {
            /* recompute output row estimate using all available quals */
            plan_rows = best_path->path.parent->tuples *
-               clauselist_selec(root, lcons(indxqual_expr, qpqual));
+               clauselist_selectivity(root,
+                                      lcons(indxqual_expr, qpqual),
+                                      baserelid);
        }
 
        if (lossy)
@@ -401,7 +405,9 @@ create_indexscan_node(Query *root,
        {
            /* recompute output row estimate using all available quals */
            plan_rows = best_path->path.parent->tuples *
-               clauselist_selec(root, nconc(listCopy(indxqual_list), qpqual));
+               clauselist_selectivity(root,
+                                      nconc(listCopy(indxqual_list), qpqual),
+                                      baserelid);
        }
 
        if (lossy)
@@ -417,7 +423,7 @@ create_indexscan_node(Query *root,
 
    scan_node = make_indexscan(tlist,
                               qpqual,
-                              lfirsti(best_path->path.parent->relids),
+                              baserelid,
                               best_path->indexid,
                               fixed_indxqual,
                               indxqual);
index f1c458b761cf7286252a341b06ab87e089765758..6af241f9a182c7e63c55f195aaaa81f737144426 100644 (file)
@@ -14,7 +14,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.49 2000/01/22 23:50:20 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.50 2000/01/23 02:06:56 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -725,7 +725,8 @@ genericcostestimate(Query *root, RelOptInfo *rel,
    double numIndexPages;
 
    /* Estimate the fraction of main-table tuples that will be visited */
-    *indexSelectivity = clauselist_selec(root, indexQuals);
+    *indexSelectivity = clauselist_selectivity(root, indexQuals,
+                                              lfirsti(rel->relids));
 
    /* Estimate the number of index tuples that will be visited */
    numIndexTuples = *indexSelectivity * index->tuples;
index 9c16fc8fea6ff510f3b86105385ddc1cd7930b4d..fd6daee1b1ffef225bb776c44a4a1851add94105 100644 (file)
@@ -6,7 +6,7 @@
  *
  * Copyright (c) 1994, Regents of the University of California
  *
- * $Id: cost.h,v 1.26 2000/01/22 23:50:26 tgl Exp $
+ * $Id: cost.h,v 1.27 2000/01/23 02:06:57 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 
 #include "nodes/relation.h"
 
+/* defaults for costsize.c's Cost parameters */
+/* NB: cost-estimation code should use the variables, not the constants! */
+#define CPU_PAGE_WEIGHT  0.033
+#define CPU_INDEX_PAGE_WEIGHT  0.017
+
 /* defaults for function attributes used for expensive function calculations */
 #define BYTE_PCT 100
 #define PERBYTE_CPU 0
 #define PERCALL_CPU 0
 #define OUTIN_RATIO 100
-/* defaults for costsize.c's Cost parameters */
-/* NB: cost-estimation code should use the variables, not the constants! */
-#define CPU_PAGE_WEIGHT  0.033
-#define CPU_INDEX_PAGE_WEIGHT  0.017
 
 
 /*
@@ -61,8 +62,14 @@ extern void set_joinrel_rows_width(Query *root, RelOptInfo *rel,
  * prototypes for clausesel.c
  *   routines to compute clause selectivities
  */
-extern Selectivity restrictlist_selec(Query *root, List *restrictinfo_list);
-extern Selectivity clauselist_selec(Query *root, List *clauses);
-extern Selectivity compute_clause_selec(Query *root, Node *clause);
+extern Selectivity restrictlist_selectivity(Query *root,
+                                           List *restrictinfo_list,
+                                           int varRelid);
+extern Selectivity clauselist_selectivity(Query *root,
+                                         List *clauses,
+                                         int varRelid);
+extern Selectivity clause_selectivity(Query *root,
+                                     Node *clause,
+                                     int varRelid);
 
 #endif  /* COST_H */