Remove planner's have_dangerous_phv() join-order restriction.

author Tom Lane

Fri, 20 Jun 2025 19:55:12 +0000 (15:55 -0400)

committer Tom Lane

Fri, 20 Jun 2025 19:55:12 +0000 (15:55 -0400)
author Tom Lane
Fri, 20 Jun 2025 19:55:12 +0000 (15:55 -0400)
committer Tom Lane
Fri, 20 Jun 2025 19:55:12 +0000 (15:55 -0400)
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c

index 26f0336f1e40972918cd1f980464bf94200af8a8..7aa8f5d799cac8652736004a1b09956b30b05ef0 100644 (file)
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -876,16 +876,13 @@ try_nestloop_path(PlannerInfo *root,
     /*
      * Check to see if proposed path is still parameterized, and reject if the
      * parameterization wouldn't be sensible --- unless allow_star_schema_join
-    * says to allow it anyway.  Also, we must reject if have_dangerous_phv
-    * doesn't like the look of it, which could only happen if the nestloop is
-    * still parameterized.
+    * says to allow it anyway.
      */
     required_outer = calc_nestloop_required_outer(outerrelids, outer_paramrels,
                                                   innerrelids, inner_paramrels);
     if (required_outer &&
-       ((!bms_overlap(required_outer, extra->param_source_rels) &&
-         !allow_star_schema_join(root, outerrelids, inner_paramrels)) ||
-        have_dangerous_phv(root, outerrelids, inner_paramrels)))
+       !bms_overlap(required_outer, extra->param_source_rels) &&
+       !allow_star_schema_join(root, outerrelids, inner_paramrels))
     {
         /* Waste no memory when we reject a path here */
         bms_free(required_outer);
diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c

index 60d65762b5d5e03bd1e328593f7215d5980bdbbf..aad41b940091db693e2b570199c3db3ca3d9d3ea 100644 (file)
--- a/src/backend/optimizer/path/joinrels.c
+++ b/src/backend/optimizer/path/joinrels.c
@@ -565,9 +565,6 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
          * Also, if the lateral reference is only indirect, we should reject
          * the join; whatever rel(s) the reference chain goes through must be
          * joined to first.
-        *
-        * Another case that might keep us from building a valid plan is the
-        * implementation restriction described by have_dangerous_phv().
          */
         lateral_fwd = bms_overlap(rel1->relids, rel2->lateral_relids);
         lateral_rev = bms_overlap(rel2->relids, rel1->lateral_relids);
@@ -584,9 +581,6 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
             /* check there is a direct reference from rel2 to rel1 */
             if (!bms_overlap(rel1->relids, rel2->direct_lateral_relids))
                 return false;   /* only indirect refs, so reject */
-           /* check we won't have a dangerous PHV */
-           if (have_dangerous_phv(root, rel1->relids, rel2->lateral_relids))
-               return false;   /* might be unable to handle required PHV */
         }
         else if (lateral_rev)
         {
@@ -599,9 +593,6 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
             /* check there is a direct reference from rel1 to rel2 */
             if (!bms_overlap(rel2->relids, rel1->direct_lateral_relids))
                 return false;   /* only indirect refs, so reject */
-           /* check we won't have a dangerous PHV */
-           if (have_dangerous_phv(root, rel2->relids, rel1->lateral_relids))
-               return false;   /* might be unable to handle required PHV */
         }
  
         /*
@@ -1278,57 +1269,6 @@ has_legal_joinclause(PlannerInfo *root, RelOptInfo *rel)
  }
  
  
-/*
- * There's a pitfall for creating parameterized nestloops: suppose the inner
- * rel (call it A) has a parameter that is a PlaceHolderVar, and that PHV's
- * minimum eval_at set includes the outer rel (B) and some third rel (C).
- * We might think we could create a B/A nestloop join that's parameterized by
- * C.  But we would end up with a plan in which the PHV's expression has to be
- * evaluated as a nestloop parameter at the B/A join; and the executor is only
- * set up to handle simple Vars as NestLoopParams.  Rather than add complexity
- * and overhead to the executor for such corner cases, it seems better to
- * forbid the join.  (Note that we can still make use of A's parameterized
- * path with pre-joined B+C as the outer rel.  have_join_order_restriction()
- * ensures that we will consider making such a join even if there are not
- * other reasons to do so.)
- *
- * So we check whether any PHVs used in the query could pose such a hazard.
- * We don't have any simple way of checking whether a risky PHV would actually
- * be used in the inner plan, and the case is so unusual that it doesn't seem
- * worth working very hard on it.
- *
- * This needs to be checked in two places.  If the inner rel's minimum
- * parameterization would trigger the restriction, then join_is_legal() should
- * reject the join altogether, because there will be no workable paths for it.
- * But joinpath.c has to check again for every proposed nestloop path, because
- * the inner path might have more than the minimum parameterization, causing
- * some PHV to be dangerous for it that otherwise wouldn't be.
- */
-bool
-have_dangerous_phv(PlannerInfo *root,
-                  Relids outer_relids, Relids inner_params)
-{
-   ListCell   *lc;
-
-   foreach(lc, root->placeholder_list)
-   {
-       PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc);
-
-       if (!bms_is_subset(phinfo->ph_eval_at, inner_params))
-           continue;           /* ignore, could not be a nestloop param */
-       if (!bms_overlap(phinfo->ph_eval_at, outer_relids))
-           continue;           /* ignore, not relevant to this join */
-       if (bms_is_subset(phinfo->ph_eval_at, outer_relids))
-           continue;           /* safe, it can be eval'd within outerrel */
-       /* Otherwise, it's potentially unsafe, so reject the join */
-       return true;
-   }
-
-   /* OK to perform the join */
-   return false;
-}
-
-
  /*
   * is_dummy_rel --- has relation been proven empty?
   */
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c

index 4ad30b7627e6ec6713ce30af32b830fc128288e1..8baf36ba4b7914a2738898a4e135f8825afe3a0e 100644 (file)
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -4348,9 +4348,11 @@ create_nestloop_plan(PlannerInfo *root,
     List       *joinrestrictclauses = best_path->jpath.joinrestrictinfo;
     List       *joinclauses;
     List       *otherclauses;
-   Relids      outerrelids;
     List       *nestParams;
+   List       *outer_tlist;
+   bool        outer_parallel_safe;
     Relids      saveOuterRels = root->curOuterRels;
+   ListCell   *lc;
  
     /*
      * If the inner path is parameterized by the topmost parent of the outer
@@ -4412,9 +4414,47 @@ create_nestloop_plan(PlannerInfo *root,
      * Identify any nestloop parameters that should be supplied by this join
      * node, and remove them from root->curOuterParams.
      */
-   outerrelids = best_path->jpath.outerjoinpath->parent->relids;
-   nestParams = identify_current_nestloop_params(root, outerrelids);
+   nestParams = identify_current_nestloop_params(root,
+                                                 best_path->jpath.outerjoinpath);
+
+   /*
+    * While nestloop parameters that are Vars had better be available from
+    * the outer_plan already, there are edge cases where nestloop parameters
+    * that are PHVs won't be.  In such cases we must add them to the
+    * outer_plan's tlist, since the executor's NestLoopParam machinery
+    * requires the params to be simple outer-Var references to that tlist.
+    */
+   outer_tlist = outer_plan->targetlist;
+   outer_parallel_safe = outer_plan->parallel_safe;
+   foreach(lc, nestParams)
+   {
+       NestLoopParam *nlp = (NestLoopParam *) lfirst(lc);
+       TargetEntry *tle;
+
+       if (IsA(nlp->paramval, Var))
+           continue;           /* nothing to do for simple Vars */
+       if (tlist_member((Expr *) nlp->paramval, outer_tlist))
+           continue;           /* already available */
+
+       /* Make a shallow copy of outer_tlist, if we didn't already */
+       if (outer_tlist == outer_plan->targetlist)
+           outer_tlist = list_copy(outer_tlist);
+       /* ... and add the needed expression */
+       tle = makeTargetEntry((Expr *) copyObject(nlp->paramval),
+                             list_length(outer_tlist) + 1,
+                             NULL,
+                             true);
+       outer_tlist = lappend(outer_tlist, tle);
+       /* ... and track whether tlist is (still) parallel-safe */
+       if (outer_parallel_safe)
+           outer_parallel_safe = is_parallel_safe(root,
+                                                  (Node *) nlp->paramval);
+   }
+   if (outer_tlist != outer_plan->targetlist)
+       outer_plan = change_plan_targetlist(outer_plan, outer_tlist,
+                                           outer_parallel_safe);
  
+   /* And finally, we can build the join plan node */
     join_plan = make_nestloop(tlist,
                               joinclauses,
                               otherclauses,
diff --git a/src/backend/optimizer/util/paramassign.c b/src/backend/optimizer/util/paramassign.c

index 3bd3ce37c8fce20f11965f766058106175211c3d..9836abf947995d4e01eda6afee08a0d22b0c802f 100644 (file)
--- a/src/backend/optimizer/util/paramassign.c
+++ b/src/backend/optimizer/util/paramassign.c
@@ -600,7 +600,7 @@ process_subquery_nestloop_params(PlannerInfo *root, List *subplan_params)
  
  /*
   * Identify any NestLoopParams that should be supplied by a NestLoop plan
- * node with the specified lefthand rels.  Remove them from the active
+ * node with the specified lefthand input path.  Remove them from the active
   * root->curOuterParams list and return them as the result list.
   *
   * XXX Here we also hack up the returned Vars and PHVs so that they do not
@@ -626,11 +626,26 @@ process_subquery_nestloop_params(PlannerInfo *root, List *subplan_params)
   * subquery, which'd be unduly expensive.
   */
  List *
-identify_current_nestloop_params(PlannerInfo *root, Relids leftrelids)
+identify_current_nestloop_params(PlannerInfo *root, Path *leftpath)
  {
     List       *result;
+   Relids      leftrelids = leftpath->parent->relids;
+   Relids      outerrelids = PATH_REQ_OUTER(leftpath);
+   Relids      allleftrelids;
     ListCell   *cell;
  
+   /*
+    * We'll be able to evaluate a PHV in the lefthand path if it uses the
+    * lefthand rels plus any available required-outer rels.  But don't do so
+    * if it uses *only* required-outer rels; in that case it should be
+    * evaluated higher in the tree.  For Vars, no such hair-splitting is
+    * necessary since they depend on only one relid.
+    */
+   if (outerrelids)
+       allleftrelids = bms_union(leftrelids, outerrelids);
+   else
+       allleftrelids = leftrelids;
+
     result = NIL;
     foreach(cell, root->curOuterParams)
     {
@@ -653,18 +668,20 @@ identify_current_nestloop_params(PlannerInfo *root, Relids leftrelids)
                                                 leftrelids);
             result = lappend(result, nlp);
         }
-       else if (IsA(nlp->paramval, PlaceHolderVar) &&
-                bms_is_subset(find_placeholder_info(root,
-                                                    (PlaceHolderVar *) nlp->paramval)->ph_eval_at,
-                              leftrelids))
+       else if (IsA(nlp->paramval, PlaceHolderVar))
         {
             PlaceHolderVar *phv = (PlaceHolderVar *) nlp->paramval;
+           Relids      eval_at = find_placeholder_info(root, phv)->ph_eval_at;
  
-           root->curOuterParams = foreach_delete_current(root->curOuterParams,
-                                                         cell);
-           phv->phnullingrels = bms_intersect(phv->phnullingrels,
-                                              leftrelids);
-           result = lappend(result, nlp);
+           if (bms_is_subset(eval_at, allleftrelids) &&
+               bms_overlap(eval_at, leftrelids))
+           {
+               root->curOuterParams = foreach_delete_current(root->curOuterParams,
+                                                             cell);
+               phv->phnullingrels = bms_intersect(phv->phnullingrels,
+                                                  leftrelids);
+               result = lappend(result, nlp);
+           }
         }
     }
     return result;
diff --git a/src/include/optimizer/paramassign.h b/src/include/optimizer/paramassign.h

index 59dcb1ff0539931c5391d9aa429cc23f28ee4f2c..d30d20de299229b21a8454e0652a29924ef45c13 100644 (file)
--- a/src/include/optimizer/paramassign.h
+++ b/src/include/optimizer/paramassign.h
@@ -30,7 +30,7 @@ extern Param *replace_nestloop_param_placeholdervar(PlannerInfo *root,
  extern void process_subquery_nestloop_params(PlannerInfo *root,
                                              List *subplan_params);
  extern List *identify_current_nestloop_params(PlannerInfo *root,
-                                             Relids leftrelids);
+                                             Path *leftpath);
  extern Param *generate_new_exec_param(PlannerInfo *root, Oid paramtype,
                                       int32 paramtypmod, Oid paramcollation);
  extern int assign_special_exec_param(PlannerInfo *root);
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h

index a48c972179712867c78f8c736e238d761e904d34..8410531f2d640d4c3c236f18536d23c72d96e338 100644 (file)
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -109,8 +109,6 @@ extern Relids add_outer_joins_to_relids(PlannerInfo *root, Relids input_relids,
                                         List **pushed_down_joins);
  extern bool have_join_order_restriction(PlannerInfo *root,
                                         RelOptInfo *rel1, RelOptInfo *rel2);
-extern bool have_dangerous_phv(PlannerInfo *root,
-                              Relids outer_relids, Relids inner_params);
  extern void mark_dummy_rel(RelOptInfo *rel);
  extern void init_dummy_sjinfo(SpecialJoinInfo *sjinfo, Relids left_relids,
                               Relids right_relids);
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out

index f35a0b18c37cbbcaad246647cb34e8ebd3bf875f..c292f04fdbaab50cb155f16659532ab613367438 100644 (file)
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -3946,6 +3946,59 @@ where t1.unique2 < 42 and t1.stringu1 > t2.stringu2;
  (1 row)
  
  -- variant that isn't quite a star-schema case
+explain (verbose, costs off)
+select ss1.d1 from
+  tenk1 as t1
+  inner join tenk1 as t2
+  on t1.tenthous = t2.ten
+  inner join
+    int8_tbl as i8
+    left join int4_tbl as i4
+      inner join (select 64::information_schema.cardinal_number as d1
+                  from tenk1 t3,
+                       lateral (select abs(t3.unique1) + random()) ss0(x)
+                  where t3.fivethous < 0) as ss1
+      on i4.f1 = ss1.d1
+    on i8.q1 = i4.f1
+  on t1.tenthous = ss1.d1
+where t1.unique1 < i4.f1;
+                                                                                                             QUERY PLAN                                                                                                              
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Nested Loop
+   Output: (64)::information_schema.cardinal_number
+   Join Filter: (t1.tenthous = ((64)::information_schema.cardinal_number)::integer)
+   ->  Seq Scan on public.tenk1 t3
+         Output: t3.unique1, t3.unique2, t3.two, t3.four, t3.ten, t3.twenty, t3.hundred, t3.thousand, t3.twothousand, t3.fivethous, t3.tenthous, t3.odd, t3.even, t3.stringu1, t3.stringu2, t3.string4
+         Filter: (t3.fivethous < 0)
+   ->  Nested Loop
+         Output: t1.tenthous, t2.ten
+         ->  Nested Loop
+               Output: t1.tenthous, t2.ten, i4.f1
+               Join Filter: (t1.unique1 < i4.f1)
+               ->  Hash Join
+                     Output: t1.tenthous, t1.unique1, t2.ten
+                     Hash Cond: (t2.ten = t1.tenthous)
+                     ->  Seq Scan on public.tenk1 t2
+                           Output: t2.unique1, t2.unique2, t2.two, t2.four, t2.ten, t2.twenty, t2.hundred, t2.thousand, t2.twothousand, t2.fivethous, t2.tenthous, t2.odd, t2.even, t2.stringu1, t2.stringu2, t2.string4
+                     ->  Hash
+                           Output: t1.tenthous, t1.unique1
+                           ->  Nested Loop
+                                 Output: t1.tenthous, t1.unique1
+                                 ->  Subquery Scan on ss0
+                                       Output: ss0.x, (64)::information_schema.cardinal_number
+                                       ->  Result
+                                             Output: ((abs(t3.unique1))::double precision + random())
+                                 ->  Index Scan using tenk1_thous_tenthous on public.tenk1 t1
+                                       Output: t1.unique1, t1.unique2, t1.two, t1.four, t1.ten, t1.twenty, t1.hundred, t1.thousand, t1.twothousand, t1.fivethous, t1.tenthous, t1.odd, t1.even, t1.stringu1, t1.stringu2, t1.string4
+                                       Index Cond: (t1.tenthous = (((64)::information_schema.cardinal_number))::integer)
+               ->  Seq Scan on public.int4_tbl i4
+                     Output: i4.f1
+                     Filter: (i4.f1 = ((64)::information_schema.cardinal_number)::integer)
+         ->  Seq Scan on public.int8_tbl i8
+               Output: i8.q1, i8.q2
+               Filter: (i8.q1 = ((64)::information_schema.cardinal_number)::integer)
+(33 rows)
+
  select ss1.d1 from
    tenk1 as t1
    inner join tenk1 as t2
@@ -4035,6 +4088,37 @@ select * from
   1 | 2 | 2
  (1 row)
  
+-- This example demonstrates the folly of our old "have_dangerous_phv" logic
+begin;
+set local from_collapse_limit to 2;
+explain (verbose, costs off)
+select * from int8_tbl t1
+  left join
+    (select coalesce(t2.q1 + x, 0) from int8_tbl t2,
+       lateral (select t3.q1 as x from int8_tbl t3,
+                  lateral (select t2.q1, t3.q1 offset 0) s))
+  on true;
+                            QUERY PLAN                            
+------------------------------------------------------------------
+ Nested Loop Left Join
+   Output: t1.q1, t1.q2, (COALESCE((t2.q1 + t3.q1), '0'::bigint))
+   ->  Seq Scan on public.int8_tbl t1
+         Output: t1.q1, t1.q2
+   ->  Materialize
+         Output: (COALESCE((t2.q1 + t3.q1), '0'::bigint))
+         ->  Nested Loop
+               Output: COALESCE((t2.q1 + t3.q1), '0'::bigint)
+               ->  Seq Scan on public.int8_tbl t2
+                     Output: t2.q1, t2.q2
+               ->  Nested Loop
+                     Output: t3.q1
+                     ->  Seq Scan on public.int8_tbl t3
+                           Output: t3.q1, t3.q2
+                     ->  Result
+                           Output: NULL::bigint, NULL::bigint
+(16 rows)
+
+rollback;
  -- Test proper handling of appendrel PHVs during useless-RTE removal
  explain (costs off)
  select * from
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql

index cc5128add4df0271213b0fa75a276eb0850526df..88d2204e4471d271e0a22cb47ff28ecb0b546017 100644 (file)
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -1277,6 +1277,23 @@ where t1.unique2 < 42 and t1.stringu1 > t2.stringu2;
  
  -- variant that isn't quite a star-schema case
  
+explain (verbose, costs off)
+select ss1.d1 from
+  tenk1 as t1
+  inner join tenk1 as t2
+  on t1.tenthous = t2.ten
+  inner join
+    int8_tbl as i8
+    left join int4_tbl as i4
+      inner join (select 64::information_schema.cardinal_number as d1
+                  from tenk1 t3,
+                       lateral (select abs(t3.unique1) + random()) ss0(x)
+                  where t3.fivethous < 0) as ss1
+      on i4.f1 = ss1.d1
+    on i8.q1 = i4.f1
+  on t1.tenthous = ss1.d1
+where t1.unique1 < i4.f1;
+
  select ss1.d1 from
    tenk1 as t1
    inner join tenk1 as t2
@@ -1332,6 +1349,18 @@ select * from
    (select 1 as x) ss1 left join (select 2 as y) ss2 on (true),
    lateral (select ss2.y as z limit 1) ss3;
  
+-- This example demonstrates the folly of our old "have_dangerous_phv" logic
+begin;
+set local from_collapse_limit to 2;
+explain (verbose, costs off)
+select * from int8_tbl t1
+  left join
+    (select coalesce(t2.q1 + x, 0) from int8_tbl t2,
+       lateral (select t3.q1 as x from int8_tbl t3,
+                  lateral (select t2.q1, t3.q1 offset 0) s))
+  on true;
+rollback;
+
  -- Test proper handling of appendrel PHVs during useless-RTE removal
  explain (costs off)
  select * from
author	Tom Lane
	Fri, 20 Jun 2025 19:55:12 +0000 (15:55 -0400)
committer	Tom Lane
	Fri, 20 Jun 2025 19:55:12 +0000 (15:55 -0400)
src/backend/optimizer/path/joinpath.c		patch \| blob \| blame \| history
src/backend/optimizer/path/joinrels.c		patch \| blob \| blame \| history
src/backend/optimizer/plan/createplan.c		patch \| blob \| blame \| history
src/backend/optimizer/util/paramassign.c		patch \| blob \| blame \| history
src/include/optimizer/paramassign.h		patch \| blob \| blame \| history
src/include/optimizer/paths.h		patch \| blob \| blame \| history
src/test/regress/expected/join.out		patch \| blob \| blame \| history
src/test/regress/sql/join.sql		patch \| blob \| blame \| history