Generate EquivalenceClass members for partitionwise child join rels.
authorTom Lane
Tue, 5 Nov 2019 16:42:25 +0000 (11:42 -0500)
committerTom Lane
Tue, 5 Nov 2019 16:42:25 +0000 (11:42 -0500)
Commit d25ea0127 got rid of what I thought were entirely unnecessary
derived child expressions in EquivalenceClasses for EC members that
mention multiple baserels.  But it turns out that some of the child
expressions that code created are necessary for partitionwise joins,
else we fail to find matching pathkeys for Sort nodes.  (This happens
only for certain shapes of the resulting plan; it may be that
partitionwise aggregation is also necessary to show the failure,
though I'm not sure of that.)

Reverting that commit entirely would be quite painful performance-wise
for large partition sets.  So instead, add code that explicitly
generates child expressions that match only partitionwise child join
rels we have actually generated.

Per report from Justin Pryzby.  (Amit Langote noticed the problem
earlier, though it's not clear if he recognized then that it could
result in a planner error, not merely failure to exploit partitionwise
join, in the code as-committed.)  Back-patch to v12 where commit
d25ea0127 came in.

Amit Langote, with lots of kibitzing from me

Discussion: https://postgr.es/m/CA+HiwqG2WVUGmLJqtR0tPFhniO=H=9qQ+Z3L_ZC+Y3-EVQHFGg@mail.gmail.com
Discussion: https://postgr.es/m/20191011143703[email protected]

src/backend/optimizer/path/equivclass.c
src/backend/optimizer/util/relnode.c
src/include/optimizer/paths.h
src/test/regress/expected/partition_join.out
src/test/regress/sql/partition_join.sql

index 688d9b07075235b370b12add36d2777fce94fbc4..9264d8191095e35c1389769d92c0fc6b6e02f57e 100644 (file)
@@ -2103,7 +2103,7 @@ match_eclasses_to_foreign_key_col(PlannerInfo *root,
 
 /*
  * add_child_rel_equivalences
- *   Search for EC members that reference the parent_rel, and
+ *   Search for EC members that reference the root parent of child_rel, and
  *   add transformed members referencing the child_rel.
  *
  * Note that this function won't be called at all unless we have at least some
@@ -2111,6 +2111,12 @@ match_eclasses_to_foreign_key_col(PlannerInfo *root,
  *
  * parent_rel and child_rel could be derived from appinfo, but since the
  * caller has already computed them, we might as well just pass them in.
+ *
+ * The passed-in AppendRelInfo is not used when the parent_rel is not a
+ * top-level baserel, since it shows the mapping from the parent_rel but
+ * we need to translate EC expressions that refer to the top-level parent.
+ * Using it is faster than using adjust_appendrel_attrs_multilevel(), though,
+ * so we prefer it when we can.
  */
 void
 add_child_rel_equivalences(PlannerInfo *root,
@@ -2118,8 +2124,12 @@ add_child_rel_equivalences(PlannerInfo *root,
                           RelOptInfo *parent_rel,
                           RelOptInfo *child_rel)
 {
+   Relids      top_parent_relids = child_rel->top_parent_relids;
+   Relids      child_relids = child_rel->relids;
    ListCell   *lc1;
 
+   Assert(IS_SIMPLE_REL(parent_rel));
+
    foreach(lc1, root->eq_classes)
    {
        EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
@@ -2137,7 +2147,7 @@ add_child_rel_equivalences(PlannerInfo *root,
         * No point in searching if child's topmost parent rel is not
         * mentioned in eclass.
         */
-       if (!bms_is_subset(child_rel->top_parent_relids, cur_ec->ec_relids))
+       if (!bms_is_subset(top_parent_relids, cur_ec->ec_relids))
            continue;
 
        foreach(lc2, cur_ec->ec_members)
@@ -2152,13 +2162,14 @@ add_child_rel_equivalences(PlannerInfo *root,
             * already-transformed child members.  Otherwise, if some original
             * member expression references more than one appendrel, we'd get
             * an O(N^2) explosion of useless derived expressions for
-            * combinations of children.
+            * combinations of children.  (But add_child_join_rel_equivalences
+            * may add targeted combinations for partitionwise-join purposes.)
             */
            if (cur_em->em_is_child)
                continue;       /* ignore children here */
 
            /* Does this member reference child's topmost parent rel? */
-           if (bms_overlap(cur_em->em_relids, child_rel->top_parent_relids))
+           if (bms_overlap(cur_em->em_relids, top_parent_relids))
            {
                /* Yes, generate transformed child version */
                Expr       *child_expr;
@@ -2179,8 +2190,8 @@ add_child_rel_equivalences(PlannerInfo *root,
                    child_expr = (Expr *)
                        adjust_appendrel_attrs_multilevel(root,
                                                          (Node *) cur_em->em_expr,
-                                                         child_rel->relids,
-                                                         child_rel->top_parent_relids);
+                                                         child_relids,
+                                                         top_parent_relids);
                }
 
                /*
@@ -2190,22 +2201,141 @@ add_child_rel_equivalences(PlannerInfo *root,
                 * don't want the child member to be marked as constant.
                 */
                new_relids = bms_difference(cur_em->em_relids,
-                                           child_rel->top_parent_relids);
-               new_relids = bms_add_members(new_relids, child_rel->relids);
+                                           top_parent_relids);
+               new_relids = bms_add_members(new_relids, child_relids);
 
                /*
                 * And likewise for nullable_relids.  Note this code assumes
                 * parent and child relids are singletons.
                 */
                new_nullable_relids = cur_em->em_nullable_relids;
-               if (bms_overlap(new_nullable_relids,
-                               child_rel->top_parent_relids))
+               if (bms_overlap(new_nullable_relids, top_parent_relids))
                {
                    new_nullable_relids = bms_difference(new_nullable_relids,
-                                                        child_rel->top_parent_relids);
+                                                        top_parent_relids);
                    new_nullable_relids = bms_add_members(new_nullable_relids,
-                                                         child_rel->relids);
+                                                         child_relids);
+               }
+
+               (void) add_eq_member(cur_ec, child_expr,
+                                    new_relids, new_nullable_relids,
+                                    true, cur_em->em_datatype);
+           }
+       }
+   }
+}
+
+/*
+ * add_child_join_rel_equivalences
+ *   Like add_child_rel_equivalences(), but for joinrels
+ *
+ * Here we find the ECs relevant to the top parent joinrel and add transformed
+ * member expressions that refer to this child joinrel.
+ *
+ * Note that this function won't be called at all unless we have at least some
+ * reason to believe that the EC members it generates will be useful.
+ */
+void
+add_child_join_rel_equivalences(PlannerInfo *root,
+                               int nappinfos, AppendRelInfo **appinfos,
+                               RelOptInfo *parent_joinrel,
+                               RelOptInfo *child_joinrel)
+{
+   Relids      top_parent_relids = child_joinrel->top_parent_relids;
+   Relids      child_relids = child_joinrel->relids;
+   ListCell   *lc1;
+
+   Assert(IS_JOIN_REL(child_joinrel) && IS_JOIN_REL(parent_joinrel));
+
+   foreach(lc1, root->eq_classes)
+   {
+       EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
+       ListCell   *lc2;
+
+       /*
+        * If this EC contains a volatile expression, then generating child
+        * EMs would be downright dangerous, so skip it.  We rely on a
+        * volatile EC having only one EM.
+        */
+       if (cur_ec->ec_has_volatile)
+           continue;
+
+       /*
+        * No point in searching if child's topmost parent rel is not
+        * mentioned in eclass.
+        */
+       if (!bms_overlap(top_parent_relids, cur_ec->ec_relids))
+           continue;
+
+       foreach(lc2, cur_ec->ec_members)
+       {
+           EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
+
+           if (cur_em->em_is_const)
+               continue;       /* ignore consts here */
+
+           /*
+            * We consider only original EC members here, not
+            * already-transformed child members.
+            */
+           if (cur_em->em_is_child)
+               continue;       /* ignore children here */
+
+           /*
+            * We may ignore expressions that reference a single baserel,
+            * because add_child_rel_equivalences should have handled them.
+            */
+           if (bms_membership(cur_em->em_relids) != BMS_MULTIPLE)
+               continue;
+
+           /* Does this member reference child's topmost parent rel? */
+           if (bms_overlap(cur_em->em_relids, top_parent_relids))
+           {
+               /* Yes, generate transformed child version */
+               Expr       *child_expr;
+               Relids      new_relids;
+               Relids      new_nullable_relids;
+
+               if (parent_joinrel->reloptkind == RELOPT_JOINREL)
+               {
+                   /* Simple single-level transformation */
+                   child_expr = (Expr *)
+                       adjust_appendrel_attrs(root,
+                                              (Node *) cur_em->em_expr,
+                                              nappinfos, appinfos);
                }
+               else
+               {
+                   /* Must do multi-level transformation */
+                   Assert(parent_joinrel->reloptkind == RELOPT_OTHER_JOINREL);
+                   child_expr = (Expr *)
+                       adjust_appendrel_attrs_multilevel(root,
+                                                         (Node *) cur_em->em_expr,
+                                                         child_relids,
+                                                         top_parent_relids);
+               }
+
+               /*
+                * Transform em_relids to match.  Note we do *not* do
+                * pull_varnos(child_expr) here, as for example the
+                * transformation might have substituted a constant, but we
+                * don't want the child member to be marked as constant.
+                */
+               new_relids = bms_difference(cur_em->em_relids,
+                                           top_parent_relids);
+               new_relids = bms_add_members(new_relids, child_relids);
+
+               /*
+                * For nullable_relids, we must selectively replace parent
+                * nullable relids with child ones.
+                */
+               new_nullable_relids = cur_em->em_nullable_relids;
+               if (bms_overlap(new_nullable_relids, top_parent_relids))
+                   new_nullable_relids =
+                       adjust_child_relids_multilevel(root,
+                                                      new_nullable_relids,
+                                                      child_relids,
+                                                      top_parent_relids);
 
                (void) add_eq_member(cur_ec, child_expr,
                                     new_relids, new_nullable_relids,
index 6054bd2b5353d96fe5b8ad3829ea8061e23d1da6..cefa4105edd6cf0af60dfb0c98b0304e37e0b36f 100644 (file)
@@ -837,6 +837,7 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
    /* Compute information relevant to foreign relations. */
    set_foreign_rel_properties(joinrel, outer_rel, inner_rel);
 
+   /* Compute information needed for mapping Vars to the child rel */
    appinfos = find_appinfos_by_relids(root, joinrel->relids, &nappinfos);
 
    /* Set up reltarget struct */
@@ -848,7 +849,6 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
                                                        (Node *) parent_joinrel->joininfo,
                                                        nappinfos,
                                                        appinfos);
-   pfree(appinfos);
 
    /*
     * Lateral relids referred in child join will be same as that referred in
@@ -883,6 +883,19 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
    /* Add the relation to the PlannerInfo. */
    add_join_rel(root, joinrel);
 
+   /*
+    * We might need EquivalenceClass members corresponding to the child join,
+    * so that we can represent sort pathkeys for it.  As with children of
+    * baserels, we shouldn't need this unless there are relevant eclass joins
+    * (implying that a merge join might be possible) or pathkeys to sort by.
+    */
+   if (joinrel->has_eclass_joins || has_useful_pathkeys(root, parent_joinrel))
+       add_child_join_rel_equivalences(root,
+                                       nappinfos, appinfos,
+                                       parent_joinrel, joinrel);
+
+   pfree(appinfos);
+
    return joinrel;
 }
 
index 7345137d1da4e0f4df71ae9e101c99d11e742edc..c6c34630c2828563a4dd1666e3e35861f47580af 100644 (file)
@@ -153,6 +153,11 @@ extern void add_child_rel_equivalences(PlannerInfo *root,
                                       AppendRelInfo *appinfo,
                                       RelOptInfo *parent_rel,
                                       RelOptInfo *child_rel);
+extern void add_child_join_rel_equivalences(PlannerInfo *root,
+                                           int nappinfos,
+                                           AppendRelInfo **appinfos,
+                                           RelOptInfo *parent_rel,
+                                           RelOptInfo *child_rel);
 extern List *generate_implied_equalities_for_column(PlannerInfo *root,
                                                    RelOptInfo *rel,
                                                    ec_matches_callback_type callback,
index cad8dd591aa6b2d3b5c50ee49ef2a51d7d0b3849..975bf6765ca5f79a227dc83936846ea91c69969c 100644 (file)
@@ -459,6 +459,83 @@ SELECT t1.a, ss.t2a, ss.t2c FROM prt1 t1 LEFT JOIN LATERAL
  550 |     | 
 (12 rows)
 
+-- bug with inadequate sort key representation
+SET enable_partitionwise_aggregate TO true;
+SET enable_hashjoin TO false;
+EXPLAIN (COSTS OFF)
+SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
+  WHERE a BETWEEN 490 AND 510
+  GROUP BY 1, 2 ORDER BY 1, 2;
+                                                    QUERY PLAN                                                     
+-------------------------------------------------------------------------------------------------------------------
+ Group
+   Group Key: (COALESCE(prt1_p1.a, p2.a)), (COALESCE(prt1_p1.b, p2.b))
+   ->  Merge Append
+         Sort Key: (COALESCE(prt1_p1.a, p2.a)), (COALESCE(prt1_p1.b, p2.b))
+         ->  Group
+               Group Key: (COALESCE(prt1_p1.a, p2.a)), (COALESCE(prt1_p1.b, p2.b))
+               ->  Sort
+                     Sort Key: (COALESCE(prt1_p1.a, p2.a)), (COALESCE(prt1_p1.b, p2.b))
+                     ->  Merge Full Join
+                           Merge Cond: ((prt1_p1.a = p2.a) AND (prt1_p1.b = p2.b))
+                           Filter: ((COALESCE(prt1_p1.a, p2.a) >= 490) AND (COALESCE(prt1_p1.a, p2.a) <= 510))
+                           ->  Sort
+                                 Sort Key: prt1_p1.a, prt1_p1.b
+                                 ->  Seq Scan on prt1_p1
+                           ->  Sort
+                                 Sort Key: p2.a, p2.b
+                                 ->  Seq Scan on prt2_p1 p2
+         ->  Group
+               Group Key: (COALESCE(prt1_p2.a, p2_1.a)), (COALESCE(prt1_p2.b, p2_1.b))
+               ->  Sort
+                     Sort Key: (COALESCE(prt1_p2.a, p2_1.a)), (COALESCE(prt1_p2.b, p2_1.b))
+                     ->  Merge Full Join
+                           Merge Cond: ((prt1_p2.a = p2_1.a) AND (prt1_p2.b = p2_1.b))
+                           Filter: ((COALESCE(prt1_p2.a, p2_1.a) >= 490) AND (COALESCE(prt1_p2.a, p2_1.a) <= 510))
+                           ->  Sort
+                                 Sort Key: prt1_p2.a, prt1_p2.b
+                                 ->  Seq Scan on prt1_p2
+                           ->  Sort
+                                 Sort Key: p2_1.a, p2_1.b
+                                 ->  Seq Scan on prt2_p2 p2_1
+         ->  Group
+               Group Key: (COALESCE(prt1_p3.a, p2_2.a)), (COALESCE(prt1_p3.b, p2_2.b))
+               ->  Sort
+                     Sort Key: (COALESCE(prt1_p3.a, p2_2.a)), (COALESCE(prt1_p3.b, p2_2.b))
+                     ->  Merge Full Join
+                           Merge Cond: ((prt1_p3.a = p2_2.a) AND (prt1_p3.b = p2_2.b))
+                           Filter: ((COALESCE(prt1_p3.a, p2_2.a) >= 490) AND (COALESCE(prt1_p3.a, p2_2.a) <= 510))
+                           ->  Sort
+                                 Sort Key: prt1_p3.a, prt1_p3.b
+                                 ->  Seq Scan on prt1_p3
+                           ->  Sort
+                                 Sort Key: p2_2.a, p2_2.b
+                                 ->  Seq Scan on prt2_p3 p2_2
+(43 rows)
+
+SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
+  WHERE a BETWEEN 490 AND 510
+  GROUP BY 1, 2 ORDER BY 1, 2;
+  a  | b  
+-----+----
+ 490 | 15
+ 492 | 17
+ 494 | 19
+ 495 | 20
+ 496 | 21
+ 498 | 23
+ 500 |  0
+ 501 |  1
+ 502 |  2
+ 504 |  4
+ 506 |  6
+ 507 |  7
+ 508 |  8
+ 510 | 10
+(14 rows)
+
+RESET enable_partitionwise_aggregate;
+RESET enable_hashjoin;
 --
 -- partitioned by expression
 --
index fb3ba18a26ff617cd13443c5073e429f1307ee76..92994b479bba982f58bc72b1b08e9d7362cdadc7 100644 (file)
@@ -91,6 +91,21 @@ SELECT t1.a, ss.t2a, ss.t2c FROM prt1 t1 LEFT JOIN LATERAL
              (SELECT t2.a AS t2a, t3.a AS t3a, t2.b t2b, t2.c t2c, least(t1.a,t2.a,t3.a) FROM prt1 t2 JOIN prt2 t3 ON (t2.a = t3.b)) ss
              ON t1.c = ss.t2c WHERE (t1.b + coalesce(ss.t2b, 0)) = 0 ORDER BY t1.a;
 
+-- bug with inadequate sort key representation
+SET enable_partitionwise_aggregate TO true;
+SET enable_hashjoin TO false;
+
+EXPLAIN (COSTS OFF)
+SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
+  WHERE a BETWEEN 490 AND 510
+  GROUP BY 1, 2 ORDER BY 1, 2;
+SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
+  WHERE a BETWEEN 490 AND 510
+  GROUP BY 1, 2 ORDER BY 1, 2;
+
+RESET enable_partitionwise_aggregate;
+RESET enable_hashjoin;
+
 --
 -- partitioned by expression
 --