Don't allow LIMIT/OFFSET clause within sub-selects to be pushed to workers.
authorAmit Kapila
Fri, 14 Sep 2018 04:47:31 +0000 (10:17 +0530)
committerAmit Kapila
Fri, 14 Sep 2018 04:47:31 +0000 (10:17 +0530)
Allowing sub-select containing LIMIT/OFFSET in workers can lead to
inconsistent results at the top-level as there is no guarantee that the
row order will be fully deterministic.  The fix is to prohibit pushing
LIMIT/OFFSET within sub-selects to workers.

Reported-by: Andrew Fletcher
Bug: 15324
Author: Amit Kapila
Reviewed-by: Dilip Kumar
Backpatch-through: 9.6
Discussion: https://postgr.es/m/153417684333.10284.11356259990921828616@wrigleys.postgresql.org

src/backend/optimizer/path/allpaths.c
src/backend/optimizer/plan/planner.c
src/include/optimizer/planner.h
src/test/regress/expected/select_parallel.out
src/test/regress/sql/select_parallel.sql

index b4a1e8ba21427d4c19ef8b95afef476a09e9f2e2..3f99e56818837c82a184f1dc34f99e8df9865e9a 100644 (file)
@@ -585,7 +585,20 @@ set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel,
             * the SubqueryScanPath as not parallel-safe.  (Note that
             * set_subquery_pathlist() might push some of these quals down
             * into the subquery itself, but that doesn't change anything.)
+            *
+            * We can't push sub-select containing LIMIT/OFFSET to workers as
+            * there is no guarantee that the row order will be fully
+            * deterministic, and applying LIMIT/OFFSET will lead to
+            * inconsistent results at the top-level.  (In some cases, where
+            * the result is ordered, we could relax this restriction.  But it
+            * doesn't currently seem worth expending extra effort to do so.)
             */
+           {
+               Query      *subquery = castNode(Query, rte->subquery);
+
+               if (limit_needed(subquery))
+                   return;
+           }
            break;
 
        case RTE_JOIN:
index 73eb307ff0b12defefd45bedaa67e777fbc388f1..f34849328b5cb56f5b21f69dc4d11923e686b1d3 100644 (file)
@@ -98,7 +98,6 @@ static void preprocess_rowmarks(PlannerInfo *root);
 static double preprocess_limit(PlannerInfo *root,
                 double tuple_fraction,
                 int64 *offset_est, int64 *count_est);
-static bool limit_needed(Query *parse);
 static void remove_useless_groupby_columns(PlannerInfo *root);
 static List *preprocess_groupclause(PlannerInfo *root, List *force);
 static List *extract_rollup_sets(List *groupingSets);
@@ -2492,7 +2491,7 @@ preprocess_limit(PlannerInfo *root, double tuple_fraction,
  * a key distinction: here we need hard constants in OFFSET/LIMIT, whereas
  * in preprocess_limit it's good enough to consider estimated values.
  */
-static bool
+bool
 limit_needed(Query *parse)
 {
    Node       *node;
index d9790d7a970bdf7bacf7299e3c02a3ee0a361e27..b4546b11367c4171eeda8c6fb3155d5b6144df73 100644 (file)
@@ -46,6 +46,8 @@ extern bool is_dummy_plan(Plan *plan);
 extern RowMarkType select_rowmark_type(RangeTblEntry *rte,
                    LockClauseStrength strength);
 
+extern bool limit_needed(Query *parse);
+
 extern void mark_partial_aggref(Aggref *agg, AggSplit aggsplit);
 
 extern Path *get_cheapest_fractional_path(RelOptInfo *rel,
index 926202192b3a123634b7b0e5cb459b7ff57b330e..07fd37e720e892bb6ea763558ab53d1af4e5319d 100644 (file)
@@ -205,6 +205,25 @@ explain (costs off, verbose)
                            Output: b.unique1
 (15 rows)
 
+-- LIMIT/OFFSET within sub-selects can't be pushed to workers.
+explain (costs off)
+  select * from tenk1 a where two in
+    (select two from tenk1 b where stringu1 like '%AAAA' limit 3);
+                          QUERY PLAN                           
+---------------------------------------------------------------
+ Hash Semi Join
+   Hash Cond: (a.two = b.two)
+   ->  Gather
+         Workers Planned: 4
+         ->  Parallel Seq Scan on tenk1 a
+   ->  Hash
+         ->  Limit
+               ->  Gather
+                     Workers Planned: 4
+                     ->  Parallel Seq Scan on tenk1 b
+                           Filter: (stringu1 ~~ '%AAAA'::text)
+(11 rows)
+
 explain (costs off)
   select stringu1::int2 from tenk1 where unique1 = 1;
                   QUERY PLAN                   
index 266d0dd64646dd2f19f44ca85696f74ca4efd527..a7e2a61cffdf5527f042d1d944524ebbb421847d 100644 (file)
@@ -88,6 +88,11 @@ explain (costs off, verbose)
   select count(*) from tenk1 a where (unique1, two) in
     (select unique1, row_number() over() from tenk1 b);
 
+-- LIMIT/OFFSET within sub-selects can't be pushed to workers.
+explain (costs off)
+  select * from tenk1 a where two in
+    (select two from tenk1 b where stringu1 like '%AAAA' limit 3);
+
 explain (costs off)
   select stringu1::int2 from tenk1 where unique1 = 1;