Fix join removal logic to clean up sub-RestrictInfos of OR clauses.
authorTom Lane
Fri, 10 Feb 2023 19:52:36 +0000 (14:52 -0500)
committerTom Lane
Fri, 10 Feb 2023 19:52:36 +0000 (14:52 -0500)
analyzejoins.c took care to clean out removed relids from the
clause_relids and required_relids of RestrictInfos associated with
the doomed rel ... but it paid no attention to the fact that if such a
RestrictInfo contains an OR clause, there will be sub-RestrictInfos
containing similar fields.

I'm more than a bit surprised that this oversight hasn't caused
visible problems before.  In any case, it's certainly broken now,
so add logic to clean out the sub-RestrictInfos recursively.
We might need to back-patch this someday.

Per bug #17786 from Robins Tharakan.

Discussion: https://postgr.es/m/17786-f1ea7fbdab97daec@postgresql.org

src/backend/optimizer/plan/analyzejoins.c
src/test/regress/expected/join.out
src/test/regress/sql/join.sql

index 283539e53c404eb5748de0e244c98ec6f35739aa..7fd11df9afb2297f6dc19b7f67aca9be26eec3ed 100644 (file)
@@ -29,6 +29,7 @@
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "optimizer/planmain.h"
+#include "optimizer/restrictinfo.h"
 #include "optimizer/tlist.h"
 #include "utils/lsyscache.h"
 
@@ -36,6 +37,8 @@
 static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
 static void remove_rel_from_query(PlannerInfo *root, int relid, int ojrelid,
                                  Relids joinrelids);
+static void remove_rel_from_restrictinfo(RestrictInfo *rinfo,
+                                        int relid, int ojrelid);
 static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
 static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel);
 static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
@@ -470,24 +473,12 @@ remove_rel_from_query(PlannerInfo *root, int relid, int ojrelid,
        {
            /*
             * There might be references to relid or ojrelid in the
-            * clause_relids as a consequence of PHVs having ph_eval_at sets
+            * RestrictInfo, as a consequence of PHVs having ph_eval_at sets
             * that include those.  We already checked above that any such PHV
             * is safe, so we can just drop those references.
-            *
-            * The clause_relids probably aren't shared with anything else,
-            * but let's copy them just to be sure.
             */
-           rinfo->clause_relids = bms_copy(rinfo->clause_relids);
-           rinfo->clause_relids = bms_del_member(rinfo->clause_relids,
-                                                 relid);
-           rinfo->clause_relids = bms_del_member(rinfo->clause_relids,
-                                                 ojrelid);
-           /* Likewise for required_relids */
-           rinfo->required_relids = bms_copy(rinfo->required_relids);
-           rinfo->required_relids = bms_del_member(rinfo->required_relids,
-                                                   relid);
-           rinfo->required_relids = bms_del_member(rinfo->required_relids,
-                                                   ojrelid);
+           remove_rel_from_restrictinfo(rinfo, relid, ojrelid);
+           /* Now throw it back into the joininfo lists */
            distribute_restrictinfo_to_rels(root, rinfo);
        }
    }
@@ -498,6 +489,62 @@ remove_rel_from_query(PlannerInfo *root, int relid, int ojrelid,
     */
 }
 
+/*
+ * Remove any references to relid or ojrelid from the RestrictInfo.
+ *
+ * We only bother to clean out bits in clause_relids and required_relids,
+ * not nullingrel bits in contained Vars and PHVs.  (This might have to be
+ * improved sometime.)  However, if the RestrictInfo contains an OR clause
+ * we have to also clean up the sub-clauses.
+ */
+static void
+remove_rel_from_restrictinfo(RestrictInfo *rinfo, int relid, int ojrelid)
+{
+   /*
+    * The clause_relids probably aren't shared with anything else, but let's
+    * copy them just to be sure.
+    */
+   rinfo->clause_relids = bms_copy(rinfo->clause_relids);
+   rinfo->clause_relids = bms_del_member(rinfo->clause_relids, relid);
+   rinfo->clause_relids = bms_del_member(rinfo->clause_relids, ojrelid);
+   /* Likewise for required_relids */
+   rinfo->required_relids = bms_copy(rinfo->required_relids);
+   rinfo->required_relids = bms_del_member(rinfo->required_relids, relid);
+   rinfo->required_relids = bms_del_member(rinfo->required_relids, ojrelid);
+
+   /* If it's an OR, recurse to clean up sub-clauses */
+   if (restriction_is_or_clause(rinfo))
+   {
+       ListCell   *lc;
+
+       Assert(is_orclause(rinfo->orclause));
+       foreach(lc, ((BoolExpr *) rinfo->orclause)->args)
+       {
+           Node       *orarg = (Node *) lfirst(lc);
+
+           /* OR arguments should be ANDs or sub-RestrictInfos */
+           if (is_andclause(orarg))
+           {
+               List       *andargs = ((BoolExpr *) orarg)->args;
+               ListCell   *lc2;
+
+               foreach(lc2, andargs)
+               {
+                   RestrictInfo *rinfo2 = lfirst_node(RestrictInfo, lc2);
+
+                   remove_rel_from_restrictinfo(rinfo2, relid, ojrelid);
+               }
+           }
+           else
+           {
+               RestrictInfo *rinfo2 = castNode(RestrictInfo, orarg);
+
+               remove_rel_from_restrictinfo(rinfo2, relid, ojrelid);
+           }
+       }
+   }
+}
+
 /*
  * Remove any occurrences of the target relid from a joinlist structure.
  *
index 98d611412dfb562205d1fb26cf845bc0931a49a3..2f1f8b8dbe6730e1a1255cbedd0d1afc19a084c0 100644 (file)
@@ -5344,6 +5344,26 @@ SELECT q2 FROM
          One-Time Filter: false
 (9 rows)
 
+-- join removal bug #17786: check that OR conditions are cleaned up
+EXPLAIN (COSTS OFF)
+SELECT f1, x
+FROM int4_tbl
+     JOIN ((SELECT 42 AS x FROM int8_tbl LEFT JOIN innertab ON q1 = id) AS ss1
+           RIGHT JOIN tenk1 ON NULL)
+        ON tenk1.unique1 = ss1.x OR tenk1.unique2 = ss1.x;
+                                QUERY PLAN                                
+--------------------------------------------------------------------------
+ Nested Loop
+   ->  Seq Scan on int4_tbl
+   ->  Materialize
+         ->  Nested Loop Left Join
+               Join Filter: NULL::boolean
+               Filter: ((tenk1.unique1 = (42)) OR (tenk1.unique2 = (42)))
+               ->  Seq Scan on tenk1
+               ->  Result
+                     One-Time Filter: false
+(9 rows)
+
 rollback;
 -- another join removal bug: we must clean up correctly when removing a PHV
 begin;
index e50a769606938719791eeaf4e6aaaf416bc1b4bd..400c16958f31465576e5004f9604af852ec120e4 100644 (file)
@@ -1955,6 +1955,14 @@ SELECT q2 FROM
   RIGHT JOIN int4_tbl ON NULL
  WHERE x >= x;
 
+-- join removal bug #17786: check that OR conditions are cleaned up
+EXPLAIN (COSTS OFF)
+SELECT f1, x
+FROM int4_tbl
+     JOIN ((SELECT 42 AS x FROM int8_tbl LEFT JOIN innertab ON q1 = id) AS ss1
+           RIGHT JOIN tenk1 ON NULL)
+        ON tenk1.unique1 = ss1.x OR tenk1.unique2 = ss1.x;
+
 rollback;
 
 -- another join removal bug: we must clean up correctly when removing a PHV