Consider explicit incremental sort for Append and MergeAppend
authorRichard Guo
Tue, 8 Jul 2025 01:21:44 +0000 (10:21 +0900)
committerRichard Guo
Tue, 8 Jul 2025 01:21:44 +0000 (10:21 +0900)
For an ordered Append or MergeAppend, we need to inject an explicit
sort into any subpath that is not already well enough ordered.
Currently, only explicit full sorts are considered; incremental sorts
are not yet taken into account.

In this patch, for subpaths of an ordered Append or MergeAppend, we
choose to use explicit incremental sort if it is enabled and there are
presorted keys.

The rationale is based on the assumption that incremental sort is
always faster than full sort when there are presorted keys, a premise
that has been applied in various parts of the code.  In addition, the
current cost model tends to favor incremental sort as being cheaper
than full sort in the presence of presorted keys, making it reasonable
not to consider full sort in such cases.

No backpatch as this could result in plan changes.

Author: Richard Guo 
Reviewed-by: Andrei Lepikhov
Reviewed-by: Robert Haas
Discussion: https://postgr.es/m/CAMbWs4_V7a2enTR+T3pOY_YZ-FU8ZsFYym2swOz4jNMqmSgyuw@mail.gmail.com

src/backend/optimizer/path/costsize.c
src/backend/optimizer/plan/createplan.c
src/backend/optimizer/util/pathnode.c
src/include/optimizer/cost.h
src/test/regress/expected/incremental_sort.out
src/test/regress/expected/inherit.out
src/test/regress/sql/incremental_sort.sql

index 3d44815ed5adf577ecfae8eaa064a140ca333368..1f04a2c182ca993d78e3175d6ab9e112318bf5a3 100644 (file)
@@ -2247,7 +2247,7 @@ append_nonpartial_cost(List *subpaths, int numpaths, int parallel_workers)
  *   Determines and returns the cost of an Append node.
  */
 void
-cost_append(AppendPath *apath)
+cost_append(AppendPath *apath, PlannerInfo *root)
 {
    ListCell   *l;
 
@@ -2309,26 +2309,52 @@ cost_append(AppendPath *apath)
            foreach(l, apath->subpaths)
            {
                Path       *subpath = (Path *) lfirst(l);
-               Path        sort_path;  /* dummy for result of cost_sort */
+               int         presorted_keys;
+               Path        sort_path;  /* dummy for result of
+                                        * cost_sort/cost_incremental_sort */
 
-               if (!pathkeys_contained_in(pathkeys, subpath->pathkeys))
+               if (!pathkeys_count_contained_in(pathkeys, subpath->pathkeys,
+                                                &presorted_keys))
                {
                    /*
                     * We'll need to insert a Sort node, so include costs for
-                    * that.  We can use the parent's LIMIT if any, since we
+                    * that.  We choose to use incremental sort if it is
+                    * enabled and there are presorted keys; otherwise we use
+                    * full sort.
+                    *
+                    * We can use the parent's LIMIT if any, since we
                     * certainly won't pull more than that many tuples from
                     * any child.
                     */
-                   cost_sort(&sort_path,
-                             NULL, /* doesn't currently need root */
-                             pathkeys,
-                             subpath->disabled_nodes,
-                             subpath->total_cost,
-                             subpath->rows,
-                             subpath->pathtarget->width,
-                             0.0,
-                             work_mem,
-                             apath->limit_tuples);
+                   if (enable_incremental_sort && presorted_keys > 0)
+                   {
+                       cost_incremental_sort(&sort_path,
+                                             root,
+                                             pathkeys,
+                                             presorted_keys,
+                                             subpath->disabled_nodes,
+                                             subpath->startup_cost,
+                                             subpath->total_cost,
+                                             subpath->rows,
+                                             subpath->pathtarget->width,
+                                             0.0,
+                                             work_mem,
+                                             apath->limit_tuples);
+                   }
+                   else
+                   {
+                       cost_sort(&sort_path,
+                                 root,
+                                 pathkeys,
+                                 subpath->disabled_nodes,
+                                 subpath->total_cost,
+                                 subpath->rows,
+                                 subpath->pathtarget->width,
+                                 0.0,
+                                 work_mem,
+                                 apath->limit_tuples);
+                   }
+
                    subpath = &sort_path;
                }
 
index 0b61aef962c6d2739384f3686de357ed4fd0d621..8a9f1d7a943a8bd0c34080f9e5708c6026eec006 100644 (file)
@@ -1318,6 +1318,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags)
            Oid        *sortOperators;
            Oid        *collations;
            bool       *nullsFirst;
+           int         presorted_keys;
 
            /*
             * Compute sort column info, and adjust subplan's tlist as needed.
@@ -1353,14 +1354,38 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags)
                          numsortkeys * sizeof(bool)) == 0);
 
            /* Now, insert a Sort node if subplan isn't sufficiently ordered */
-           if (!pathkeys_contained_in(pathkeys, subpath->pathkeys))
+           if (!pathkeys_count_contained_in(pathkeys, subpath->pathkeys,
+                                            &presorted_keys))
            {
-               Sort       *sort = make_sort(subplan, numsortkeys,
+               Plan       *sort_plan;
+
+               /*
+                * We choose to use incremental sort if it is enabled and
+                * there are presorted keys; otherwise we use full sort.
+                */
+               if (enable_incremental_sort && presorted_keys > 0)
+               {
+                   sort_plan = (Plan *)
+                       make_incrementalsort(subplan, numsortkeys, presorted_keys,
                                             sortColIdx, sortOperators,
                                             collations, nullsFirst);
 
-               label_sort_with_costsize(root, sort, best_path->limit_tuples);
-               subplan = (Plan *) sort;
+                   label_incrementalsort_with_costsize(root,
+                                                       (IncrementalSort *) sort_plan,
+                                                       pathkeys,
+                                                       best_path->limit_tuples);
+               }
+               else
+               {
+                   sort_plan = (Plan *) make_sort(subplan, numsortkeys,
+                                                  sortColIdx, sortOperators,
+                                                  collations, nullsFirst);
+
+                   label_sort_with_costsize(root, (Sort *) sort_plan,
+                                            best_path->limit_tuples);
+               }
+
+               subplan = sort_plan;
            }
        }
 
@@ -1491,6 +1516,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path,
        Oid        *sortOperators;
        Oid        *collations;
        bool       *nullsFirst;
+       int         presorted_keys;
 
        /* Build the child plan */
        /* Must insist that all children return the same tlist */
@@ -1525,14 +1551,38 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path,
                      numsortkeys * sizeof(bool)) == 0);
 
        /* Now, insert a Sort node if subplan isn't sufficiently ordered */
-       if (!pathkeys_contained_in(pathkeys, subpath->pathkeys))
+       if (!pathkeys_count_contained_in(pathkeys, subpath->pathkeys,
+                                        &presorted_keys))
        {
-           Sort       *sort = make_sort(subplan, numsortkeys,
+           Plan       *sort_plan;
+
+           /*
+            * We choose to use incremental sort if it is enabled and there
+            * are presorted keys; otherwise we use full sort.
+            */
+           if (enable_incremental_sort && presorted_keys > 0)
+           {
+               sort_plan = (Plan *)
+                   make_incrementalsort(subplan, numsortkeys, presorted_keys,
                                         sortColIdx, sortOperators,
                                         collations, nullsFirst);
 
-           label_sort_with_costsize(root, sort, best_path->limit_tuples);
-           subplan = (Plan *) sort;
+               label_incrementalsort_with_costsize(root,
+                                                   (IncrementalSort *) sort_plan,
+                                                   pathkeys,
+                                                   best_path->limit_tuples);
+           }
+           else
+           {
+               sort_plan = (Plan *) make_sort(subplan, numsortkeys,
+                                              sortColIdx, sortOperators,
+                                              collations, nullsFirst);
+
+               label_sort_with_costsize(root, (Sort *) sort_plan,
+                                        best_path->limit_tuples);
+           }
+
+           subplan = sort_plan;
        }
 
        subplans = lappend(subplans, subplan);
index e0192d4a491d25e693027d51c83e1dd068c18844..9cc602788eaae54e6c8d20276a32b04f892a6fb1 100644 (file)
@@ -1404,12 +1404,12 @@ create_append_path(PlannerInfo *root,
            pathnode->path.total_cost = child->total_cost;
        }
        else
-           cost_append(pathnode);
+           cost_append(pathnode, root);
        /* Must do this last, else cost_append complains */
        pathnode->path.pathkeys = child->pathkeys;
    }
    else
-       cost_append(pathnode);
+       cost_append(pathnode, root);
 
    /* If the caller provided a row estimate, override the computed value. */
    if (rows >= 0)
@@ -1515,6 +1515,9 @@ create_merge_append_path(PlannerInfo *root,
    foreach(l, subpaths)
    {
        Path       *subpath = (Path *) lfirst(l);
+       int         presorted_keys;
+       Path        sort_path;  /* dummy for result of
+                                * cost_sort/cost_incremental_sort */
 
        /* All child paths should be unparameterized */
        Assert(bms_is_empty(PATH_REQ_OUTER(subpath)));
@@ -1523,32 +1526,52 @@ create_merge_append_path(PlannerInfo *root,
        pathnode->path.parallel_safe = pathnode->path.parallel_safe &&
            subpath->parallel_safe;
 
-       if (pathkeys_contained_in(pathkeys, subpath->pathkeys))
+       if (!pathkeys_count_contained_in(pathkeys, subpath->pathkeys,
+                                        &presorted_keys))
        {
-           /* Subpath is adequately ordered, we won't need to sort it */
-           input_disabled_nodes += subpath->disabled_nodes;
-           input_startup_cost += subpath->startup_cost;
-           input_total_cost += subpath->total_cost;
-       }
-       else
-       {
-           /* We'll need to insert a Sort node, so include cost for that */
-           Path        sort_path;  /* dummy for result of cost_sort */
+           /*
+            * We'll need to insert a Sort node, so include costs for that. We
+            * choose to use incremental sort if it is enabled and there are
+            * presorted keys; otherwise we use full sort.
+            *
+            * We can use the parent's LIMIT if any, since we certainly won't
+            * pull more than that many tuples from any child.
+            */
+           if (enable_incremental_sort && presorted_keys > 0)
+           {
+               cost_incremental_sort(&sort_path,
+                                     root,
+                                     pathkeys,
+                                     presorted_keys,
+                                     subpath->disabled_nodes,
+                                     subpath->startup_cost,
+                                     subpath->total_cost,
+                                     subpath->rows,
+                                     subpath->pathtarget->width,
+                                     0.0,
+                                     work_mem,
+                                     pathnode->limit_tuples);
+           }
+           else
+           {
+               cost_sort(&sort_path,
+                         root,
+                         pathkeys,
+                         subpath->disabled_nodes,
+                         subpath->total_cost,
+                         subpath->rows,
+                         subpath->pathtarget->width,
+                         0.0,
+                         work_mem,
+                         pathnode->limit_tuples);
+           }
 
-           cost_sort(&sort_path,
-                     root,
-                     pathkeys,
-                     subpath->disabled_nodes,
-                     subpath->total_cost,
-                     subpath->rows,
-                     subpath->pathtarget->width,
-                     0.0,
-                     work_mem,
-                     pathnode->limit_tuples);
-           input_disabled_nodes += sort_path.disabled_nodes;
-           input_startup_cost += sort_path.startup_cost;
-           input_total_cost += sort_path.total_cost;
+           subpath = &sort_path;
        }
+
+       input_disabled_nodes += subpath->disabled_nodes;
+       input_startup_cost += subpath->startup_cost;
+       input_total_cost += subpath->total_cost;
    }
 
    /*
index d397fe27dc1e1638e23cf941aea9164023ec93b9..b523bcda8f3d0d44628ea24f622f61f12923e694 100644 (file)
@@ -118,7 +118,7 @@ extern void cost_incremental_sort(Path *path,
                                  Cost input_startup_cost, Cost input_total_cost,
                                  double input_tuples, int width, Cost comparison_cost, int sort_mem,
                                  double limit_tuples);
-extern void cost_append(AppendPath *apath);
+extern void cost_append(AppendPath *apath, PlannerInfo *root);
 extern void cost_merge_append(Path *path, PlannerInfo *root,
                              List *pathkeys, int n_streams,
                              int input_disabled_nodes,
index b00219643b9ad2dc325d1dde3c914f25011dd54b..5a1dd9fc02270f8c40f5e33eb70902d46840d2a5 100644 (file)
@@ -1722,3 +1722,43 @@ order by t1.four, t1.two limit 1;
                ->  Seq Scan on tenk1 t2
 (12 rows)
 
+--
+-- Test incremental sort for Append/MergeAppend
+--
+create table prt_tbl (a int, b int) partition by range (a);
+create table prt_tbl_1 partition of prt_tbl for values from (0) to (100);
+create table prt_tbl_2 partition of prt_tbl for values from (100) to (200);
+insert into prt_tbl select i%200, i from generate_series(1,1000)i;
+create index on prt_tbl_1(a);
+create index on prt_tbl_2(a, b);
+analyze prt_tbl;
+set enable_seqscan to off;
+set enable_bitmapscan to off;
+-- Ensure we get an incremental sort for the subpath of Append
+explain (costs off) select * from prt_tbl order by a, b;
+                         QUERY PLAN                         
+------------------------------------------------------------
+ Append
+   ->  Incremental Sort
+         Sort Key: prt_tbl_1.a, prt_tbl_1.b
+         Presorted Key: prt_tbl_1.a
+         ->  Index Scan using prt_tbl_1_a_idx on prt_tbl_1
+   ->  Index Only Scan using prt_tbl_2_a_b_idx on prt_tbl_2
+(6 rows)
+
+-- Ensure we get an incremental sort for the subpath of MergeAppend
+explain (costs off) select * from prt_tbl_1 union all select * from prt_tbl_2 order by a, b;
+                         QUERY PLAN                         
+------------------------------------------------------------
+ Merge Append
+   Sort Key: prt_tbl_1.a, prt_tbl_1.b
+   ->  Incremental Sort
+         Sort Key: prt_tbl_1.a, prt_tbl_1.b
+         Presorted Key: prt_tbl_1.a
+         ->  Index Scan using prt_tbl_1_a_idx on prt_tbl_1
+   ->  Index Only Scan using prt_tbl_2_a_b_idx on prt_tbl_2
+(7 rows)
+
+reset enable_bitmapscan;
+reset enable_seqscan;
+drop table prt_tbl;
index 78dead65325e9812f1c9a8bac0ddd7be7d4242c4..5b5055babdcb081a463bfe8795910110ab752b39 100644 (file)
@@ -1898,10 +1898,11 @@ ORDER BY thousand, tenthous;
  Merge Append
    Sort Key: tenk1.thousand, tenk1.tenthous
    ->  Index Only Scan using tenk1_thous_tenthous on tenk1
-   ->  Sort
+   ->  Incremental Sort
          Sort Key: tenk1_1.thousand, tenk1_1.thousand
+         Presorted Key: tenk1_1.thousand
          ->  Index Only Scan using tenk1_thous_tenthous on tenk1 tenk1_1
-(6 rows)
+(7 rows)
 
 explain (costs off)
 SELECT thousand, tenthous, thousand+tenthous AS x FROM tenk1
@@ -1982,10 +1983,11 @@ ORDER BY x, y;
  Merge Append
    Sort Key: a.thousand, a.tenthous
    ->  Index Only Scan using tenk1_thous_tenthous on tenk1 a
-   ->  Sort
+   ->  Incremental Sort
          Sort Key: b.unique2, b.unique2
+         Presorted Key: b.unique2
          ->  Index Only Scan using tenk1_unique2 on tenk1 b
-(6 rows)
+(7 rows)
 
 -- exercise rescan code path via a repeatedly-evaluated subquery
 explain (costs off)
index f1f8fae56549ac5423ffed0492ad7bcc79f878b6..bbe658a7588c959d359f3357d14ef615780cb939 100644 (file)
@@ -298,3 +298,27 @@ explain (costs off)
 select * from
   (select * from tenk1 order by four) t1 join tenk1 t2 on t1.four = t2.four and t1.two = t2.two
 order by t1.four, t1.two limit 1;
+
+--
+-- Test incremental sort for Append/MergeAppend
+--
+create table prt_tbl (a int, b int) partition by range (a);
+create table prt_tbl_1 partition of prt_tbl for values from (0) to (100);
+create table prt_tbl_2 partition of prt_tbl for values from (100) to (200);
+insert into prt_tbl select i%200, i from generate_series(1,1000)i;
+create index on prt_tbl_1(a);
+create index on prt_tbl_2(a, b);
+analyze prt_tbl;
+
+set enable_seqscan to off;
+set enable_bitmapscan to off;
+
+-- Ensure we get an incremental sort for the subpath of Append
+explain (costs off) select * from prt_tbl order by a, b;
+
+-- Ensure we get an incremental sort for the subpath of MergeAppend
+explain (costs off) select * from prt_tbl_1 union all select * from prt_tbl_2 order by a, b;
+
+reset enable_bitmapscan;
+reset enable_seqscan;
+drop table prt_tbl;