Extend join-selectivity API (oprjoin interface) so that join type is
authorTom Lane
Tue, 28 Jan 2003 22:13:41 +0000 (22:13 +0000)
committerTom Lane
Tue, 28 Jan 2003 22:13:41 +0000 (22:13 +0000)
passed to join selectivity estimators.  Make use of this in eqjoinsel
to derive non-bogus selectivity for IN clauses.  Further tweaking of
cost estimation for IN.
initdb forced because of pg_proc.h changes.

16 files changed:
doc/src/sgml/indexcost.sgml
src/backend/catalog/pg_operator.c
src/backend/optimizer/path/clausesel.c
src/backend/optimizer/path/costsize.c
src/backend/optimizer/path/indxpath.c
src/backend/optimizer/plan/subselect.c
src/backend/optimizer/util/plancat.c
src/backend/utils/adt/selfuncs.c
src/include/catalog/catversion.h
src/include/catalog/pg_proc.h
src/include/optimizer/cost.h
src/include/optimizer/plancat.h
src/include/utils/selfuncs.h
src/test/regress/expected/opr_sanity.out
src/test/regress/expected/subselect.out
src/test/regress/sql/opr_sanity.sql

index 1211653edd2a34496a01e294de7c1a4a8a5dfe03..6c8c940c100f49e540c661b8cdea5bc8c6746e0a 100644 (file)
@@ -1,5 +1,5 @@
 
 
  
@@ -205,7 +205,8 @@ amcostestimate (Query *root,
 
      
 *indexSelectivity = clauselist_selectivity(root, indexQuals,
-                                           lfirsti(rel->relids));
+                                           lfirsti(rel->relids),
+                                           JOIN_INNER);
      
     
    
index 941212a649ff15c6db0219cd5db69df1af12b15f..4c09a40b1d74a2385d07ee7e05c93e8cfd6f8ab7 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/catalog/pg_operator.c,v 1.77 2002/09/04 20:31:14 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/catalog/pg_operator.c,v 1.78 2003/01/28 22:13:25 tgl Exp $
  *
  * NOTES
  *   these routines moved here from commands/define.c and somewhat cleaned up.
@@ -485,10 +485,11 @@ OperatorCreate(const char *operatorName,
        typeId[0] = INTERNALOID;    /* Query */
        typeId[1] = OIDOID;     /* operator OID */
        typeId[2] = INTERNALOID;    /* args list */
+       typeId[3] = INT2OID;    /* jointype */
 
-       joinOid = LookupFuncName(joinName, 3, typeId);
+       joinOid = LookupFuncName(joinName, 4, typeId);
        if (!OidIsValid(joinOid))
-           func_error("OperatorDef", joinName, 3, typeId, NULL);
+           func_error("OperatorDef", joinName, 4, typeId, NULL);
    }
    else
        joinOid = InvalidOid;
index 84041a566d18b3fc7fed3e7d8985143e87475842..9df0a79478230f77bc95e13b30676e05c2e2f6a6 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.55 2003/01/15 19:35:39 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.56 2003/01/28 22:13:29 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -65,12 +65,13 @@ static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
 Selectivity
 restrictlist_selectivity(Query *root,
                         List *restrictinfo_list,
-                        int varRelid)
+                        int varRelid,
+                        JoinType jointype)
 {
    List       *clauselist = get_actual_clauses(restrictinfo_list);
    Selectivity result;
 
-   result = clauselist_selectivity(root, clauselist, varRelid);
+   result = clauselist_selectivity(root, clauselist, varRelid, jointype);
    freeList(clauselist);
    return result;
 }
@@ -81,7 +82,7 @@ restrictlist_selectivity(Query *root,
  *   expression clauses.  The list can be empty, in which case 1.0
  *   must be returned.
  *
- * See clause_selectivity() for the meaning of the varRelid parameter.
+ * See clause_selectivity() for the meaning of the additional parameters.
  *
  * Our basic approach is to take the product of the selectivities of the
  * subclauses. However, that's only right if the subclauses have independent
@@ -113,7 +114,8 @@ restrictlist_selectivity(Query *root,
 Selectivity
 clauselist_selectivity(Query *root,
                       List *clauses,
-                      int varRelid)
+                      int varRelid,
+                      JoinType jointype)
 {
    Selectivity s1 = 1.0;
    RangeQueryClause *rqlist = NULL;
@@ -184,7 +186,7 @@ clauselist_selectivity(Query *root,
            }
        }
        /* Not the right form, so treat it generically. */
-       s2 = clause_selectivity(root, clause, varRelid);
+       s2 = clause_selectivity(root, clause, varRelid, jointype);
        s1 = s1 * s2;
    }
 
@@ -362,11 +364,15 @@ addRangeClause(RangeQueryClause **rqlist, Node *clause,
  *
  * When varRelid is 0, all variables are treated as variables. This
  * is appropriate for ordinary join clauses and restriction clauses.
+ *
+ * jointype is the join type, if the clause is a join clause.  Pass JOIN_INNER
+ * if the clause isn't a join clause or the context is uncertain.
  */
 Selectivity
 clause_selectivity(Query *root,
                   Node *clause,
-                  int varRelid)
+                  int varRelid,
+                  JoinType jointype)
 {
    Selectivity s1 = 1.0;       /* default for any unhandled clause type */
 
@@ -424,14 +430,16 @@ clause_selectivity(Query *root,
        /* inverse of the selectivity of the underlying clause */
        s1 = 1.0 - clause_selectivity(root,
                              (Node *) get_notclausearg((Expr *) clause),
-                                     varRelid);
+                                     varRelid,
+                                     jointype);
    }
    else if (and_clause(clause))
    {
        /* share code with clauselist_selectivity() */
        s1 = clauselist_selectivity(root,
                                    ((BoolExpr *) clause)->args,
-                                   varRelid);
+                                   varRelid,
+                                   jointype);
    }
    else if (or_clause(clause))
    {
@@ -447,7 +455,8 @@ clause_selectivity(Query *root,
        {
            Selectivity s2 = clause_selectivity(root,
                                                (Node *) lfirst(arg),
-                                               varRelid);
+                                               varRelid,
+                                               jointype);
 
            s1 = s1 + s2 - s1 * s2;
        }
@@ -479,7 +488,8 @@ clause_selectivity(Query *root,
        {
            /* Estimate selectivity for a join clause. */
            s1 = join_selectivity(root, opno,
-                                 ((OpExpr *) clause)->args);
+                                 ((OpExpr *) clause)->args,
+                                 jointype);
        }
        else
        {
@@ -519,14 +529,16 @@ clause_selectivity(Query *root,
        s1 = booltestsel(root,
                         ((BooleanTest *) clause)->booltesttype,
                         (Node *) ((BooleanTest *) clause)->arg,
-                        varRelid);
+                        varRelid,
+                        jointype);
    }
    else if (IsA(clause, RelabelType))
    {
        /* Not sure this case is needed, but it can't hurt */
        s1 = clause_selectivity(root,
                                (Node *) ((RelabelType *) clause)->arg,
-                               varRelid);
+                               varRelid,
+                               jointype);
    }
 
 #ifdef SELECTIVITY_DEBUG
index d18e29ad6f4872981a91920ba9863c9b62db37db..56282406129f5ab54c30f76b0cc85bc450083fe2 100644 (file)
@@ -49,7 +49,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.103 2003/01/27 20:51:50 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.104 2003/01/28 22:13:33 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -104,7 +104,8 @@ bool        enable_hashjoin = true;
 static Selectivity estimate_hash_bucketsize(Query *root, Var *var,
                                            int nbuckets);
 static bool cost_qual_eval_walker(Node *node, QualCost *total);
-static Selectivity approx_selectivity(Query *root, List *quals);
+static Selectivity approx_selectivity(Query *root, List *quals,
+                                     JoinType jointype);
 static void set_rel_width(Query *root, RelOptInfo *rel);
 static double relation_byte_size(double tuples, int width);
 static double page_size(double tuples, int width);
@@ -697,7 +698,8 @@ cost_nestloop(NestPath *path, Query *root)
     */
    if (path->jointype == JOIN_IN)
    {
-       Selectivity qual_selec = approx_selectivity(root, restrictlist);
+       Selectivity qual_selec = approx_selectivity(root, restrictlist,
+                                                   path->jointype);
        double  qptuples;
 
        qptuples = ceil(qual_selec * outer_path_rows * inner_path_rows);
@@ -816,10 +818,12 @@ cost_mergejoin(MergePath *path, Query *root)
     * Note: it's probably bogus to use the normal selectivity calculation
     * here when either the outer or inner path is a UniquePath.
     */
-   merge_selec = approx_selectivity(root, mergeclauses);
+   merge_selec = approx_selectivity(root, mergeclauses,
+                                    path->jpath.jointype);
    cost_qual_eval(&merge_qual_cost, mergeclauses);
    qpquals = set_ptrDifference(restrictlist, mergeclauses);
-   qp_selec = approx_selectivity(root, qpquals);
+   qp_selec = approx_selectivity(root, qpquals,
+                                 path->jpath.jointype);
    cost_qual_eval(&qp_qual_cost, qpquals);
    freeList(qpquals);
 
@@ -1044,10 +1048,12 @@ cost_hashjoin(HashPath *path, Query *root)
     * Note: it's probably bogus to use the normal selectivity calculation
     * here when either the outer or inner path is a UniquePath.
     */
-   hash_selec = approx_selectivity(root, hashclauses);
+   hash_selec = approx_selectivity(root, hashclauses,
+                                   path->jpath.jointype);
    cost_qual_eval(&hash_qual_cost, hashclauses);
    qpquals = set_ptrDifference(restrictlist, hashclauses);
-   qp_selec = approx_selectivity(root, qpquals);
+   qp_selec = approx_selectivity(root, qpquals,
+                                 path->jpath.jointype);
    cost_qual_eval(&qp_qual_cost, qpquals);
    freeList(qpquals);
 
@@ -1084,54 +1090,67 @@ cost_hashjoin(HashPath *path, Query *root)
     * Determine bucketsize fraction for inner relation.  We use the
     * smallest bucketsize estimated for any individual hashclause;
     * this is undoubtedly conservative.
+    *
+    * BUT: if inner relation has been unique-ified, we can assume it's
+    * good for hashing.  This is important both because it's the right
+    * answer, and because we avoid contaminating the cache with a value
+    * that's wrong for non-unique-ified paths.
     */
-   innerbucketsize = 1.0;
-   foreach(hcl, hashclauses)
+   if (IsA(inner_path, UniquePath))
+       innerbucketsize = 1.0 / virtualbuckets;
+   else
    {
-       RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(hcl);
-       Selectivity thisbucketsize;
+       innerbucketsize = 1.0;
+       foreach(hcl, hashclauses)
+       {
+           RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(hcl);
+           Selectivity thisbucketsize;
 
-       Assert(IsA(restrictinfo, RestrictInfo));
+           Assert(IsA(restrictinfo, RestrictInfo));
 
-       /*
-        * First we have to figure out which side of the hashjoin clause is the
-        * inner side.
-        *
-        * Since we tend to visit the same clauses over and over when planning
-        * a large query, we cache the bucketsize estimate in the RestrictInfo
-        * node to avoid repeated lookups of statistics.
-        */
-       if (is_subseti(restrictinfo->right_relids, inner_path->parent->relids))
-       {
-           /* righthand side is inner */
-           thisbucketsize = restrictinfo->right_bucketsize;
-           if (thisbucketsize < 0)
+           /*
+            * First we have to figure out which side of the hashjoin clause
+            * is the inner side.
+            *
+            * Since we tend to visit the same clauses over and over when
+            * planning a large query, we cache the bucketsize estimate in the
+            * RestrictInfo node to avoid repeated lookups of statistics.
+            */
+           if (is_subseti(restrictinfo->right_relids,
+                          inner_path->parent->relids))
            {
-               /* not cached yet */
-               thisbucketsize = estimate_hash_bucketsize(root,
+               /* righthand side is inner */
+               thisbucketsize = restrictinfo->right_bucketsize;
+               if (thisbucketsize < 0)
+               {
+                   /* not cached yet */
+                   thisbucketsize =
+                       estimate_hash_bucketsize(root,
                                    (Var *) get_rightop(restrictinfo->clause),
-                                                         virtualbuckets);
-               restrictinfo->right_bucketsize = thisbucketsize;
+                                                virtualbuckets);
+                   restrictinfo->right_bucketsize = thisbucketsize;
+               }
            }
-       }
-       else
-       {
-           Assert(is_subseti(restrictinfo->left_relids,
-                             inner_path->parent->relids));
-           /* lefthand side is inner */
-           thisbucketsize = restrictinfo->left_bucketsize;
-           if (thisbucketsize < 0)
+           else
            {
-               /* not cached yet */
-               thisbucketsize = estimate_hash_bucketsize(root,
+               Assert(is_subseti(restrictinfo->left_relids,
+                                 inner_path->parent->relids));
+               /* lefthand side is inner */
+               thisbucketsize = restrictinfo->left_bucketsize;
+               if (thisbucketsize < 0)
+               {
+                   /* not cached yet */
+                   thisbucketsize =
+                       estimate_hash_bucketsize(root,
                                    (Var *) get_leftop(restrictinfo->clause),
-                                                         virtualbuckets);
-               restrictinfo->left_bucketsize = thisbucketsize;
+                                                virtualbuckets);
+                   restrictinfo->left_bucketsize = thisbucketsize;
+               }
            }
-       }
 
-       if (innerbucketsize > thisbucketsize)
-           innerbucketsize = thisbucketsize;
+           if (innerbucketsize > thisbucketsize)
+               innerbucketsize = thisbucketsize;
+       }
    }
 
    /*
@@ -1557,7 +1576,7 @@ cost_qual_eval_walker(Node *node, QualCost *total)
  * seems OK to live with the approximation.
  */
 static Selectivity
-approx_selectivity(Query *root, List *quals)
+approx_selectivity(Query *root, List *quals, JoinType jointype)
 {
    Selectivity total = 1.0;
    List       *l;
@@ -1582,13 +1601,14 @@ approx_selectivity(Query *root, List *quals)
                restrictinfo->this_selec =
                    clause_selectivity(root,
                                       (Node *) restrictinfo->clause,
-                                      0);
+                                      0,
+                                      jointype);
            selec = restrictinfo->this_selec;
        }
        else
        {
            /* If it's a bare expression, must always do it the hard way */
-           selec = clause_selectivity(root, qual, 0);
+           selec = clause_selectivity(root, qual, 0, jointype);
        }
        total *= selec;
    }
@@ -1620,7 +1640,8 @@ set_baserel_size_estimates(Query *root, RelOptInfo *rel)
    temp = rel->tuples *
        restrictlist_selectivity(root,
                                 rel->baserestrictinfo,
-                                lfirsti(rel->relids));
+                                lfirsti(rel->relids),
+                                JOIN_INNER);
 
    /*
     * Force estimate to be at least one row, to make explain output look
@@ -1682,7 +1703,8 @@ set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
     */
    selec = restrictlist_selectivity(root,
                                     restrictlist,
-                                    0);
+                                    0,
+                                    jointype);
 
    /*
     * Basically, we multiply size of Cartesian product by selectivity.
@@ -1694,8 +1716,6 @@ set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
     * For JOIN_IN and variants, the Cartesian product is figured with
     * respect to a unique-ified input, and then we can clamp to the size
     * of the other input.
-    * XXX it's not at all clear that the ordinary selectivity calculation
-    * is appropriate in this case.
     */
    switch (jointype)
    {
@@ -1798,7 +1818,8 @@ set_function_size_estimates(Query *root, RelOptInfo *rel)
    temp = rel->tuples *
        restrictlist_selectivity(root,
                                 rel->baserestrictinfo,
-                                lfirsti(rel->relids));
+                                lfirsti(rel->relids),
+                                JOIN_INNER);
 
    /*
     * Force estimate to be at least one row, to make explain output look
index 443d54c64733ba0ed20edaf1151257e5460e6a49..98e4d59f2df6b4db001036a198d2408be2d4d9ee 100644 (file)
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.133 2003/01/24 03:58:34 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.134 2003/01/28 22:13:33 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1599,12 +1599,16 @@ make_innerjoin_index_path(Query *root,
     * selectivity.  However, since RestrictInfo nodes aren't copied when
     * linking them into different lists, it should be sufficient to use
     * pointer comparison to remove duplicates.)
+    *
+    * Always assume the join type is JOIN_INNER; even if some of the
+    * join clauses come from other contexts, that's not our problem.
     */
    pathnode->rows = rel->tuples *
        restrictlist_selectivity(root,
                                 set_ptrUnion(rel->baserestrictinfo,
                                              clausegroup),
-                                lfirsti(rel->relids));
+                                lfirsti(rel->relids),
+                                JOIN_INNER);
    /* Like costsize.c, force estimate to be at least one row */
    if (pathnode->rows < 1.0)
        pathnode->rows = 1.0;
index 5f420f3725029c2372b868ea8f407ab08667e0cd..9f56a9f38d515ca2c977efb74ff2bd5d593078b3 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.68 2003/01/20 18:54:53 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.69 2003/01/28 22:13:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -351,7 +351,7 @@ make_subplan(SubLink *slink, List *lefthand, bool isTopQual)
 
                        qualsel = clauselist_selectivity(subquery,
                                                         plan->qual,
-                                                        0);
+                                                        0, JOIN_INNER);
                        /* Is 10% selectivity a good threshold?? */
                        use_material = qualsel < 0.10;
                    }
index 15120fafcd8ea3ee5a8b628be27c8c3b8ced20af..4a9f63312c3a070c393fbf48f03bbe1d18f800c4 100644 (file)
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.75 2002/11/24 21:52:14 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.76 2003/01/28 22:13:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -196,8 +196,7 @@ find_secondary_indexes(Oid relationObjectId)
  * This code executes registered procedures stored in the
  * operator relation, by calling the function manager.
  *
- * varRelid is either 0 or a rangetable index. See clause_selectivity()
- * for details about its meaning.
+ * See clause_selectivity() for the meaning of the additional parameters.
  */
 Selectivity
 restriction_selectivity(Query *root,
@@ -237,7 +236,8 @@ restriction_selectivity(Query *root,
 Selectivity
 join_selectivity(Query *root,
                 Oid operator,
-                List *args)
+                List *args,
+                JoinType jointype)
 {
    RegProcedure oprjoin = get_oprjoin(operator);
    float8      result;
@@ -249,10 +249,11 @@ join_selectivity(Query *root,
    if (!oprjoin)
        return (Selectivity) 0.5;
 
-   result = DatumGetFloat8(OidFunctionCall3(oprjoin,
+   result = DatumGetFloat8(OidFunctionCall4(oprjoin,
                                             PointerGetDatum(root),
                                             ObjectIdGetDatum(operator),
-                                            PointerGetDatum(args)));
+                                            PointerGetDatum(args),
+                                            Int16GetDatum(jointype)));
 
    if (result < 0.0 || result > 1.0)
        elog(ERROR, "join_selectivity: bad value %f", result);
index 8fb4e84ad772bf15b1075dec12c9f55a03a13b2f..d099262c46fa9a83ce5e88cd3a367db0c936b77d 100644 (file)
@@ -15,7 +15,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.130 2003/01/27 20:51:54 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.131 2003/01/28 22:13:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
  *     float8 oprrest (internal, oid, internal, int4);
  *
  * The call convention for a join estimator (oprjoin function) is similar
- * except that varRelid is not needed:
+ * except that varRelid is not needed, and instead the join type is
+ * supplied:
  *
  *     Selectivity oprjoin (Query *root,
  *                          Oid operator,
- *                          List *args);
+ *                          List *args,
+ *                          JoinType jointype);
+ *
+ *     float8 oprjoin (internal, oid, internal, int2);
  *
- *     float8 oprjoin (internal, oid, internal);
+ * (We deliberately make the SQL signature different to facilitate
+ * catching errors.)
  *----------
  */
 
@@ -1009,7 +1014,8 @@ icnlikesel(PG_FUNCTION_ARGS)
  *     booltestsel     - Selectivity of BooleanTest Node.
  */
 Selectivity
-booltestsel(Query *root, BoolTestType booltesttype, Node *arg, int varRelid)
+booltestsel(Query *root, BoolTestType booltesttype, Node *arg,
+           int varRelid, JoinType jointype)
 {
    Var        *var;
    Oid         relid;
@@ -1047,11 +1053,13 @@ booltestsel(Query *root, BoolTestType booltesttype, Node *arg, int varRelid)
                break;
            case IS_TRUE:
            case IS_NOT_FALSE:
-               selec = (double) clause_selectivity(root, arg, varRelid);
+               selec = (double) clause_selectivity(root, arg,
+                                                   varRelid, jointype);
                break;
            case IS_FALSE:
            case IS_NOT_TRUE:
-               selec = 1.0 - (double) clause_selectivity(root, arg, varRelid);
+               selec = 1.0 - (double) clause_selectivity(root, arg,
+                                                         varRelid, jointype);
                break;
            default:
                elog(ERROR, "booltestsel: unexpected booltesttype %d",
@@ -1321,6 +1329,7 @@ eqjoinsel(PG_FUNCTION_ARGS)
    Query      *root = (Query *) PG_GETARG_POINTER(0);
    Oid         operator = PG_GETARG_OID(1);
    List       *args = (List *) PG_GETARG_POINTER(2);
+   JoinType    jointype = (JoinType) PG_GETARG_INT16(3);
    Var        *var1;
    Var        *var2;
    double      selec;
@@ -1421,6 +1430,8 @@ eqjoinsel(PG_FUNCTION_ARGS)
            FmgrInfo    eqproc;
            bool       *hasmatch1;
            bool       *hasmatch2;
+           double      nullfrac1 = stats1->stanullfrac;
+           double      nullfrac2 = stats2->stanullfrac;
            double      matchprodfreq,
                        matchfreq1,
                        matchfreq2,
@@ -1434,10 +1445,36 @@ eqjoinsel(PG_FUNCTION_ARGS)
                        nmatches;
 
            fmgr_info(get_opcode(operator), &eqproc);
-           hasmatch1 = (bool *) palloc(nvalues1 * sizeof(bool));
-           memset(hasmatch1, 0, nvalues1 * sizeof(bool));
-           hasmatch2 = (bool *) palloc(nvalues2 * sizeof(bool));
-           memset(hasmatch2, 0, nvalues2 * sizeof(bool));
+           hasmatch1 = (bool *) palloc0(nvalues1 * sizeof(bool));
+           hasmatch2 = (bool *) palloc0(nvalues2 * sizeof(bool));
+
+           /*
+            * If we are doing any variant of JOIN_IN, pretend all the values
+            * of the righthand relation are unique (ie, act as if it's been
+            * DISTINCT'd).
+            *
+            * NOTE: it might seem that we should unique-ify the lefthand
+            * input when considering JOIN_REVERSE_IN.  But this is not so,
+            * because the join clause we've been handed has not been
+            * commuted from the way the parser originally wrote it.  We know
+            * that the unique side of the IN clause is *always* on the right.
+            *
+            * NOTE: it would be dangerous to try to be smart about JOIN_LEFT
+            * or JOIN_RIGHT here, because we do not have enough information
+            * to determine which var is really on which side of the join.
+            * Perhaps someday we should pass in more information.
+            */
+           if (jointype == JOIN_IN ||
+               jointype == JOIN_REVERSE_IN ||
+               jointype == JOIN_UNIQUE_INNER ||
+               jointype == JOIN_UNIQUE_OUTER)
+           {
+               float4  oneovern = 1.0 / nd2;
+
+               for (i = 0; i < nvalues2; i++)
+                   numbers2[i] = oneovern;
+               nullfrac2 = oneovern;
+           }
 
            /*
             * Note we assume that each MCV will match at most one member
@@ -1496,8 +1533,8 @@ eqjoinsel(PG_FUNCTION_ARGS)
             * Compute total frequency of non-null values that are not in
             * the MCV lists.
             */
-           otherfreq1 = 1.0 - stats1->stanullfrac - matchfreq1 - unmatchfreq1;
-           otherfreq2 = 1.0 - stats2->stanullfrac - matchfreq2 - unmatchfreq2;
+           otherfreq1 = 1.0 - nullfrac1 - matchfreq1 - unmatchfreq1;
+           otherfreq2 = 1.0 - nullfrac2 - matchfreq2 - unmatchfreq2;
            CLAMP_PROBABILITY(otherfreq1);
            CLAMP_PROBABILITY(otherfreq2);
 
@@ -1585,6 +1622,7 @@ neqjoinsel(PG_FUNCTION_ARGS)
    Query      *root = (Query *) PG_GETARG_POINTER(0);
    Oid         operator = PG_GETARG_OID(1);
    List       *args = (List *) PG_GETARG_POINTER(2);
+   JoinType    jointype = (JoinType) PG_GETARG_INT16(3);
    Oid         eqop;
    float8      result;
 
@@ -1595,11 +1633,11 @@ neqjoinsel(PG_FUNCTION_ARGS)
    eqop = get_negator(operator);
    if (eqop)
    {
-       result = DatumGetFloat8(DirectFunctionCall3(eqjoinsel,
+       result = DatumGetFloat8(DirectFunctionCall4(eqjoinsel,
                                                    PointerGetDatum(root),
-                                                 ObjectIdGetDatum(eqop),
-                                                PointerGetDatum(args)));
-
+                                                   ObjectIdGetDatum(eqop),
+                                                   PointerGetDatum(args),
+                                                   Int16GetDatum(jointype)));
    }
    else
    {
@@ -3784,7 +3822,8 @@ genericcostestimate(Query *root, RelOptInfo *rel,
 
    /* Estimate the fraction of main-table tuples that will be visited */
    *indexSelectivity = clauselist_selectivity(root, selectivityQuals,
-                                              lfirsti(rel->relids));
+                                              lfirsti(rel->relids),
+                                              JOIN_INNER);
 
    /*
     * Estimate the number of tuples that will be visited.  We do it in
index b679fdb5ddc78a45feb9292dfa9163ab81201a70..d234eb3289541aaa7e242862e64d1336df4a735a 100644 (file)
@@ -37,7 +37,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: catversion.h,v 1.173 2003/01/23 23:39:04 petere Exp $
+ * $Id: catversion.h,v 1.174 2003/01/28 22:13:36 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -53,6 +53,6 @@
  */
 
 /*                         yyyymmddN */
-#define CATALOG_VERSION_NO 200301241
+#define CATALOG_VERSION_NO 200301281
 
 #endif
index db907f745fa67e8f37a1099cf23ae2ac63d16ccd..d7b13a762ebbdde928115e96070760ffcda0cd3f 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pg_proc.h,v 1.281 2003/01/09 00:58:41 tgl Exp $
+ * $Id: pg_proc.h,v 1.282 2003/01/28 22:13:36 tgl Exp $
  *
  * NOTES
  *   The script catalog/genbki.sh reads this file and generates .bki
@@ -218,13 +218,13 @@ DATA(insert OID = 103 (  scalarltsel     PGNSP PGUID 12 f f t f s 4 701 "2281 26
 DESCR("restriction selectivity of < and related operators on scalar datatypes");
 DATA(insert OID = 104 (  scalargtsel      PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 23"  scalargtsel - _null_ ));
 DESCR("restriction selectivity of > and related operators on scalar datatypes");
-DATA(insert OID = 105 (  eqjoinsel        PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281"  eqjoinsel - _null_ ));
+DATA(insert OID = 105 (  eqjoinsel        PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21"  eqjoinsel - _null_ ));
 DESCR("join selectivity of = and related operators");
-DATA(insert OID = 106 (  neqjoinsel           PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281"  neqjoinsel - _null_ ));
+DATA(insert OID = 106 (  neqjoinsel           PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21"  neqjoinsel - _null_ ));
 DESCR("join selectivity of <> and related operators");
-DATA(insert OID = 107 (  scalarltjoinsel   PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281"  scalarltjoinsel - _null_ ));
+DATA(insert OID = 107 (  scalarltjoinsel   PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21"  scalarltjoinsel - _null_ ));
 DESCR("join selectivity of < and related operators on scalar datatypes");
-DATA(insert OID = 108 (  scalargtjoinsel   PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281"  scalargtjoinsel - _null_ ));
+DATA(insert OID = 108 (  scalargtjoinsel   PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21"  scalargtjoinsel - _null_ ));
 DESCR("join selectivity of > and related operators on scalar datatypes");
 
 DATA(insert OID =  109 (  unknownin           PGNSP PGUID 12 f f t f i 1 705 "2275"    unknownin - _null_ ));
@@ -290,7 +290,7 @@ DATA(insert OID = 138 (  box_center        PGNSP PGUID 12 f f t f i 1 600 "603"  bo
 DESCR("center of");
 DATA(insert OID = 139 (  areasel          PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 23"  areasel - _null_ ));
 DESCR("restriction selectivity for area-comparison operators");
-DATA(insert OID = 140 (  areajoinsel      PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281"  areajoinsel - _null_ ));
+DATA(insert OID = 140 (  areajoinsel      PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21"  areajoinsel - _null_ ));
 DESCR("join selectivity for area-comparison operators");
 DATA(insert OID = 141 (  int4mul          PGNSP PGUID 12 f f t f i 2 23 "23 23"    int4mul - _null_ ));
 DESCR("multiply");
@@ -1590,11 +1590,11 @@ DESCR("current transaction time");
 
 DATA(insert OID = 1300 (  positionsel         PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 23"  positionsel - _null_ ));
 DESCR("restriction selectivity for position-comparison operators");
-DATA(insert OID = 1301 (  positionjoinsel     PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281"  positionjoinsel - _null_ ));
+DATA(insert OID = 1301 (  positionjoinsel     PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21"  positionjoinsel - _null_ ));
 DESCR("join selectivity for position-comparison operators");
 DATA(insert OID = 1302 (  contsel         PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 23"  contsel - _null_ ));
 DESCR("restriction selectivity for containment comparison operators");
-DATA(insert OID = 1303 (  contjoinsel     PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281"  contjoinsel - _null_ ));
+DATA(insert OID = 1303 (  contjoinsel     PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21"  contjoinsel - _null_ ));
 DESCR("join selectivity for containment comparison operators");
 
 DATA(insert OID = 1304 ( overlaps           PGNSP PGUID 12 f f f f i 4 16 "1184 1184 1184 1184"    overlaps_timestamp - _null_ ));
@@ -2545,9 +2545,9 @@ DATA(insert OID = 1814 ( iclikesel            PGNSP PGUID 12 f f t f s 4 701 "2281 26 228
 DESCR("restriction selectivity of ILIKE");
 DATA(insert OID = 1815 ( icnlikesel            PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 23"  icnlikesel - _null_ ));
 DESCR("restriction selectivity of NOT ILIKE");
-DATA(insert OID = 1816 ( iclikejoinsel     PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281"  iclikejoinsel - _null_ ));
+DATA(insert OID = 1816 ( iclikejoinsel     PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21"  iclikejoinsel - _null_ ));
 DESCR("join selectivity of ILIKE");
-DATA(insert OID = 1817 ( icnlikejoinsel        PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281"  icnlikejoinsel - _null_ ));
+DATA(insert OID = 1817 ( icnlikejoinsel        PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21"  icnlikejoinsel - _null_ ));
 DESCR("join selectivity of NOT ILIKE");
 DATA(insert OID = 1818 ( regexeqsel            PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 23"  regexeqsel - _null_ ));
 DESCR("restriction selectivity of regex match");
@@ -2561,17 +2561,17 @@ DATA(insert OID = 1822 ( nlikesel           PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281
 DESCR("restriction selectivity of NOT LIKE");
 DATA(insert OID = 1823 ( icregexnesel      PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 23"  icregexnesel - _null_ ));
 DESCR("restriction selectivity of case-insensitive regex non-match");
-DATA(insert OID = 1824 ( regexeqjoinsel        PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281"  regexeqjoinsel - _null_ ));
+DATA(insert OID = 1824 ( regexeqjoinsel        PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21"  regexeqjoinsel - _null_ ));
 DESCR("join selectivity of regex match");
-DATA(insert OID = 1825 ( likejoinsel       PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281"  likejoinsel - _null_ ));
+DATA(insert OID = 1825 ( likejoinsel       PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21"  likejoinsel - _null_ ));
 DESCR("join selectivity of LIKE");
-DATA(insert OID = 1826 ( icregexeqjoinsel  PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281"  icregexeqjoinsel - _null_ ));
+DATA(insert OID = 1826 ( icregexeqjoinsel  PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21"  icregexeqjoinsel - _null_ ));
 DESCR("join selectivity of case-insensitive regex match");
-DATA(insert OID = 1827 ( regexnejoinsel        PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281"  regexnejoinsel - _null_ ));
+DATA(insert OID = 1827 ( regexnejoinsel        PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21"  regexnejoinsel - _null_ ));
 DESCR("join selectivity of regex non-match");
-DATA(insert OID = 1828 ( nlikejoinsel      PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281"  nlikejoinsel - _null_ ));
+DATA(insert OID = 1828 ( nlikejoinsel      PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21"  nlikejoinsel - _null_ ));
 DESCR("join selectivity of NOT LIKE");
-DATA(insert OID = 1829 ( icregexnejoinsel  PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281"  icregexnejoinsel - _null_ ));
+DATA(insert OID = 1829 ( icregexnejoinsel  PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21"  icregexnejoinsel - _null_ ));
 DESCR("join selectivity of case-insensitive regex non-match");
 
 /* Aggregate-related functions */
index aca6097bc1cc89800221d85394a7d2719774fd09..0feb56dd7c926833023cf2de84126e82c669fced 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: cost.h,v 1.51 2003/01/27 20:51:54 tgl Exp $
+ * $Id: cost.h,v 1.52 2003/01/28 22:13:41 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -88,13 +88,16 @@ extern void set_function_size_estimates(Query *root, RelOptInfo *rel);
  *   routines to compute clause selectivities
  */
 extern Selectivity restrictlist_selectivity(Query *root,
-                        List *restrictinfo_list,
-                        int varRelid);
+                                           List *restrictinfo_list,
+                                           int varRelid,
+                                           JoinType jointype);
 extern Selectivity clauselist_selectivity(Query *root,
-                      List *clauses,
-                      int varRelid);
+                                         List *clauses,
+                                         int varRelid,
+                                         JoinType jointype);
 extern Selectivity clause_selectivity(Query *root,
-                  Node *clause,
-                  int varRelid);
+                                     Node *clause,
+                                     int varRelid,
+                                     JoinType jointype);
 
 #endif   /* COST_H */
index abd09871feb13fa7e518020e7bd0d936931e05fc..255d196d7d713cb1f0629d7231f254b7288bc43a 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: plancat.h,v 1.27 2002/06/20 20:29:51 momjian Exp $
+ * $Id: plancat.h,v 1.28 2003/01/28 22:13:41 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -34,7 +34,8 @@ extern Selectivity restriction_selectivity(Query *root,
                        int varRelid);
 
 extern Selectivity join_selectivity(Query *root,
-                Oid operator,
-                List *args);
+                                   Oid operator,
+                                   List *args,
+                                   JoinType jointype);
 
 #endif   /* PLANCAT_H */
index 037c2b2f5e3833afb78bfde398b2db503eaaef24..757c0e1e1ac7eb54bfea83cff1afa235e84747a9 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: selfuncs.h,v 1.11 2003/01/20 18:55:07 tgl Exp $
+ * $Id: selfuncs.h,v 1.12 2003/01/28 22:13:41 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -67,7 +67,7 @@ extern Datum nlikejoinsel(PG_FUNCTION_ARGS);
 extern Datum icnlikejoinsel(PG_FUNCTION_ARGS);
 
 extern Selectivity booltestsel(Query *root, BoolTestType booltesttype,
-                              Node *arg, int varRelid);
+                              Node *arg, int varRelid, JoinType jointype);
 extern Selectivity nulltestsel(Query *root, NullTestType nulltesttype,
                               Node *arg, int varRelid);
 
index 7ef807a95db1375103f19a0f1d55ad210dc8c48c..dcf295919c9f58ab08099dbbbf8fc92c71e6509f 100644 (file)
@@ -530,16 +530,17 @@ WHERE p1.oprrest = p2.oid AND
 -- If oprjoin is set, the operator must be a binary boolean op,
 -- and it must link to a proc with the right signature
 -- to be a join selectivity estimator.
--- The proc signature we want is: float8 proc(internal, oid, internal)
+-- The proc signature we want is: float8 proc(internal, oid, internal, int2)
 SELECT p1.oid, p1.oprname, p2.oid, p2.proname
 FROM pg_operator AS p1, pg_proc AS p2
 WHERE p1.oprjoin = p2.oid AND
     (p1.oprkind != 'b' OR p1.oprresult != 'bool'::regtype OR
      p2.prorettype != 'float8'::regtype OR p2.proretset OR
-     p2.pronargs != 3 OR
+     p2.pronargs != 4 OR
      p2.proargtypes[0] != 'internal'::regtype OR
      p2.proargtypes[1] != 'oid'::regtype OR
-     p2.proargtypes[2] != 'internal'::regtype);
+     p2.proargtypes[2] != 'internal'::regtype OR
+     p2.proargtypes[3] != 'int2'::regtype);
  oid | oprname | oid | proname 
 -----+---------+-----+---------
 (0 rows)
index 8d7597863fcfd9c7ff24591d2c58798f93984619..5a2ef11c21b9cb294b74e89b8a9fdf1638f059d1 100644 (file)
@@ -134,10 +134,10 @@ SELECT '' AS five, f1 AS "Correlated Field"
                      WHERE f3 IS NOT NULL);
  five | Correlated Field 
 ------+------------------
-      |                1
-      |                2
       |                2
       |                3
+      |                1
+      |                2
       |                3
 (5 rows)
 
index 650073cccc1dd128cd6930dd50d4b51261ad9a2d..8d543932a7c30a5d540555fc6c6bcaca19e02467 100644 (file)
@@ -444,17 +444,18 @@ WHERE p1.oprrest = p2.oid AND
 -- If oprjoin is set, the operator must be a binary boolean op,
 -- and it must link to a proc with the right signature
 -- to be a join selectivity estimator.
--- The proc signature we want is: float8 proc(internal, oid, internal)
+-- The proc signature we want is: float8 proc(internal, oid, internal, int2)
 
 SELECT p1.oid, p1.oprname, p2.oid, p2.proname
 FROM pg_operator AS p1, pg_proc AS p2
 WHERE p1.oprjoin = p2.oid AND
     (p1.oprkind != 'b' OR p1.oprresult != 'bool'::regtype OR
      p2.prorettype != 'float8'::regtype OR p2.proretset OR
-     p2.pronargs != 3 OR
+     p2.pronargs != 4 OR
      p2.proargtypes[0] != 'internal'::regtype OR
      p2.proargtypes[1] != 'oid'::regtype OR
-     p2.proargtypes[2] != 'internal'::regtype);
+     p2.proargtypes[2] != 'internal'::regtype OR
+     p2.proargtypes[3] != 'int2'::regtype);
 
 -- **************** pg_aggregate ****************