#include "optimizer/tlist.h"
#include "parser/analyze.h"
#include "parser/parse_agg.h"
+#include "parser/parse_clause.h"
#include "parser/parse_relation.h"
#include "parser/parsetree.h"
#include "partitioning/partdesc.h"
{
List *activeWindows; /* active windows, if any */
grouping_sets_data *gset_data; /* grouping sets data, if any */
+ SetOperationStmt *setop; /* parent set operation or NULL if not a
+ * subquery belonging to a set operation */
} standard_qp_extra;
/* Local functions */
static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
-static void grouping_planner(PlannerInfo *root, double tuple_fraction);
+static void grouping_planner(PlannerInfo *root, double tuple_fraction,
+ SetOperationStmt *setops);
static grouping_sets_data *preprocess_grouping_sets(PlannerInfo *root);
static List *remap_to_groupclause_idx(List *groupClause, List *gsets,
int *tleref_to_colnum_map);
List *targetList,
List *groupClause);
static int common_prefix_cmp(const void *a, const void *b);
+static List *generate_setop_child_grouplist(SetOperationStmt *op,
+ List *targetlist);
/*****************************************************************************
}
/* primary planning entry point (may recurse for subqueries) */
- root = subquery_planner(glob, parse, NULL,
- false, tuple_fraction);
+ root = subquery_planner(glob, parse, NULL, false, tuple_fraction, NULL);
/* Select best Path and turn it into a Plan */
final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
* hasRecursion is true if this is a recursive WITH query.
* tuple_fraction is the fraction of tuples we expect will be retrieved.
* tuple_fraction is interpreted as explained for grouping_planner, below.
+ * setops is used for set operation subqueries to provide the subquery with
+ * the context in which it's being used so that Paths correctly sorted for the
+ * set operation can be generated. NULL when not planning a set operation
+ * child.
*
* Basically, this routine does the stuff that should only be done once
* per Query object. It then calls grouping_planner. At one time,
*--------------------
*/
PlannerInfo *
-subquery_planner(PlannerGlobal *glob, Query *parse,
- PlannerInfo *parent_root,
- bool hasRecursion, double tuple_fraction)
+subquery_planner(PlannerGlobal *glob, Query *parse, PlannerInfo *parent_root,
+ bool hasRecursion, double tuple_fraction,
+ SetOperationStmt *setops)
{
PlannerInfo *root;
List *newWithCheckOptions;
/*
* Do the main planning.
*/
- grouping_planner(root, tuple_fraction);
+ grouping_planner(root, tuple_fraction, setops);
/*
* Capture the set of outer-level param IDs we have access to, for use in
* 0 < tuple_fraction < 1: expect the given fraction of tuples available
* from the plan to be retrieved
* tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
- * expected to be retrieved (ie, a LIMIT specification)
+ * expected to be retrieved (ie, a LIMIT specification).
+ * setops is used for set operation subqueries to provide the subquery with
+ * the context in which it's being used so that Paths correctly sorted for the
+ * set operation can be generated. NULL when not planning a set operation
+ * child.
*
* Returns nothing; the useful output is in the Paths we attach to the
* (UPPERREL_FINAL, NULL) upperrel in *root. In addition,
*--------------------
*/
static void
-grouping_planner(PlannerInfo *root, double tuple_fraction)
+grouping_planner(PlannerInfo *root, double tuple_fraction,
+ SetOperationStmt *setops)
{
Query *parse = root->parse;
int64 offset_est = 0;
if (parse->setOperations)
{
- /*
- * If there's a top-level ORDER BY, assume we have to fetch all the
- * tuples. This might be too simplistic given all the hackery below
- * to possibly avoid the sort; but the odds of accurate estimates here
- * are pretty low anyway. XXX try to get rid of this in favor of
- * letting plan_set_operations generate both fast-start and
- * cheapest-total paths.
- */
- if (parse->sortClause)
- root->tuple_fraction = 0.0;
-
/*
* Construct Paths for set operations. The results will not need any
* work except perhaps a top-level sort and/or LIMIT. Note that any
qp_extra.activeWindows = activeWindows;
qp_extra.gset_data = gset_data;
+ /*
+ * If we're a subquery for a set operation, store the SetOperationStmt
+ * in qp_extra.
+ */
+ qp_extra.setop = setops;
+
/*
* Generate the best unsorted and presorted paths for the scan/join
* portion of this Query, ie the processing represented by the
parse->sortClause,
tlist);
+ /* setting setop_pathkeys might be useful to the union planner */
+ if (qp_extra->setop != NULL &&
+ set_operation_ordered_results_useful(qp_extra->setop))
+ {
+ List *groupClauses;
+ bool sortable;
+
+ groupClauses = generate_setop_child_grouplist(qp_extra->setop, tlist);
+
+ root->setop_pathkeys =
+ make_pathkeys_for_sortclauses_extended(root,
+ &groupClauses,
+ tlist,
+ false,
+ &sortable);
+ if (!sortable)
+ root->setop_pathkeys = NIL;
+ }
+ else
+ root->setop_pathkeys = NIL;
+
/*
* Figure out whether we want a sorted result from query_planner.
*
* sortable DISTINCT clause that's more rigorous than the ORDER BY clause,
* we try to produce output that's sufficiently well sorted for the
* DISTINCT. Otherwise, if there is an ORDER BY clause, we want to sort
- * by the ORDER BY clause.
+ * by the ORDER BY clause. Otherwise, if we're a subquery being planned
+ * for a set operation which can benefit from presorted results and have a
+ * sortable targetlist, we want to sort by the target list.
*
* Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a superset
* of GROUP BY, it would be tempting to request sort by ORDER BY --- but
root->query_pathkeys = root->distinct_pathkeys;
else if (root->sort_pathkeys)
root->query_pathkeys = root->sort_pathkeys;
+ else if (root->setop_pathkeys != NIL)
+ root->query_pathkeys = root->setop_pathkeys;
else
root->query_pathkeys = NIL;
}
return true;
}
+
+/*
+ * generate_setop_child_grouplist
+ * Build a SortGroupClause list defining the sort/grouping properties
+ * of the child of a set operation.
+ *
+ * This is similar to generate_setop_grouplist() but differs as the setop
+ * child query's targetlist entries may already have a tleSortGroupRef
+ * assigned for other purposes, such as GROUP BYs. Here we keep the
+ * SortGroupClause list in the same order as 'op' groupClauses and just adjust
+ * the tleSortGroupRef to reference the TargetEntry's 'ressortgroupref'.
+ */
+static List *
+generate_setop_child_grouplist(SetOperationStmt *op, List *targetlist)
+{
+ List *grouplist = copyObject(op->groupClauses);
+ ListCell *lg;
+ ListCell *lt;
+
+ lg = list_head(grouplist);
+ foreach(lt, targetlist)
+ {
+ TargetEntry *tle = (TargetEntry *) lfirst(lt);
+ SortGroupClause *sgc;
+
+ /* resjunk columns could have sortgrouprefs. Leave these alone */
+ if (tle->resjunk)
+ continue;
+
+ /* we expect every non-resjunk target to have a SortGroupClause */
+ Assert(lg != NULL);
+ sgc = (SortGroupClause *) lfirst(lg);
+ lg = lnext(grouplist, lg);
+
+ /* assign a tleSortGroupRef, or reuse the existing one */
+ sgc->tleSortGroupRef = assignSortGroupRef(tle, targetlist);
+ }
+ Assert(lg == NULL);
+ return grouplist;
+}
bool junkOK,
int flag, List *refnames_tlist,
List **pTargetList,
- double *pNumGroups);
+ bool *istrivial_tlist);
static RelOptInfo *generate_recursion_path(SetOperationStmt *setOp,
PlannerInfo *root,
List *refnames_tlist,
List **pTargetList);
+static void build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel,
+ bool trivial_tlist, List *child_tlist,
+ List *interesting_pathkeys,
+ double *pNumGroups);
static RelOptInfo *generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
List *refnames_tlist,
List **pTargetList);
static List *plan_union_children(PlannerInfo *root,
SetOperationStmt *top_union,
List *refnames_tlist,
- List **tlist_list);
-static Path *make_union_unique(SetOperationStmt *op, Path *path, List *tlist,
- PlannerInfo *root);
+ List **tlist_list,
+ List **istrivial_tlist);
static void postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel);
static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses,
Path *input_path,
Assert(parse->distinctClause == NIL);
/*
- * In the outer query level, we won't have any true equivalences to deal
- * with; but we do want to be able to make pathkeys, which will require
- * single-member EquivalenceClasses. Indicate that EC merging is complete
- * so that pathkeys.c won't complain.
+ * In the outer query level, equivalence classes are limited to classes
+ * which define that the top-level target entry is equivalent to the
+ * corresponding child target entry. There won't be any equivalence class
+ * merging. Mark that merging is complete to allow us to make pathkeys.
*/
Assert(root->eq_classes == NIL);
root->ec_merging_done = true;
}
else
{
+ bool trivial_tlist;
+
/*
* Recurse on setOperations tree to generate paths for set ops. The
* final output paths should have just the column types shown as the
true, -1,
leftmostQuery->targetList,
&top_tlist,
- NULL);
+ &trivial_tlist);
}
/* Must return the built tlist into root->processed_tlist. */
return setop_rel;
}
+/*
+ * set_operation_ordered_results_useful
+ * Return true if the given SetOperationStmt can be executed by utilizing
+ * paths that provide sorted input according to the setop's targetlist.
+ * Returns false when sorted paths are not any more useful then unsorted
+ * ones.
+ */
+bool
+set_operation_ordered_results_useful(SetOperationStmt *setop)
+{
+ /*
+ * Paths sorted by the targetlist are useful for UNION as we can opt to
+ * MergeAppend the sorted paths then Unique them. Ordered paths are no
+ * more useful than unordered ones for UNION ALL.
+ */
+ if (!setop->all && setop->op == SETOP_UNION)
+ return true;
+
+ /*
+ * EXCEPT / EXCEPT ALL / INTERSECT / INTERSECT ALL cannot yet utilize
+ * correctly sorted input paths.
+ */
+ return false;
+}
+
/*
* recurse_set_operations
* Recursively handle one step in a tree of set operations
*
* Returns a RelOptInfo for the subtree, as well as these output parameters:
* *pTargetList: receives the fully-fledged tlist for the subtree's top plan
- * *pNumGroups: if not NULL, we estimate the number of distinct groups
- * in the result, and store it there
+ * *istrivial_tlist: true if, and only if, datatypes between parent and child
+ * match.
*
* The pTargetList output parameter is mostly redundant with the pathtarget
* of the returned RelOptInfo, but for the moment we need it because much of
bool junkOK,
int flag, List *refnames_tlist,
List **pTargetList,
- double *pNumGroups)
+ bool *istrivial_tlist)
{
- RelOptInfo *rel = NULL; /* keep compiler quiet */
+ RelOptInfo *rel;
+
+ *istrivial_tlist = true; /* for now */
/* Guard against stack overflow due to overly complex setop nests */
check_stack_depth();
{
RangeTblRef *rtr = (RangeTblRef *) setOp;
RangeTblEntry *rte = root->simple_rte_array[rtr->rtindex];
+ SetOperationStmt *setops;
Query *subquery = rte->subquery;
PlannerInfo *subroot;
- RelOptInfo *final_rel;
- Path *subpath;
- Path *path;
List *tlist;
bool trivial_tlist;
/* plan_params should not be in use in current query level */
Assert(root->plan_params == NIL);
+ /*
+ * Pass the set operation details to the subquery_planner to have it
+ * consider generating Paths correctly ordered for the set operation.
+ */
+ setops = castNode(SetOperationStmt, root->parse->setOperations);
+
/* Generate a subroot and Paths for the subquery */
- subroot = rel->subroot = subquery_planner(root->glob, subquery,
- root,
- false,
- root->tuple_fraction);
+ subroot = rel->subroot = subquery_planner(root->glob, subquery, root,
+ false, root->tuple_fraction,
+ setops);
/*
* It should not be possible for the primitive query to contain any
/* Return the fully-fledged tlist to caller, too */
*pTargetList = tlist;
-
- /*
- * Mark rel with estimated output rows, width, etc. Note that we have
- * to do this before generating outer-query paths, else
- * cost_subqueryscan is not happy.
- */
- set_subquery_size_estimates(root, rel);
-
- /*
- * Since we may want to add a partial path to this relation, we must
- * set its consider_parallel flag correctly.
- */
- final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL);
- rel->consider_parallel = final_rel->consider_parallel;
-
- /*
- * For the moment, we consider only a single Path for the subquery.
- * This should change soon (make it look more like
- * set_subquery_pathlist).
- */
- subpath = get_cheapest_fractional_path(final_rel,
- root->tuple_fraction);
-
- /*
- * Stick a SubqueryScanPath atop that.
- *
- * We don't bother to determine the subquery's output ordering since
- * it won't be reflected in the set-op result anyhow; so just label
- * the SubqueryScanPath with nil pathkeys. (XXX that should change
- * soon too, likely.)
- */
- path = (Path *) create_subqueryscan_path(root, rel, subpath,
- trivial_tlist,
- NIL, NULL);
-
- add_path(rel, path);
-
- /*
- * If we have a partial path for the child relation, we can use that
- * to build a partial path for this relation. But there's no point in
- * considering any path but the cheapest.
- */
- if (rel->consider_parallel && bms_is_empty(rel->lateral_relids) &&
- final_rel->partial_pathlist != NIL)
- {
- Path *partial_subpath;
- Path *partial_path;
-
- partial_subpath = linitial(final_rel->partial_pathlist);
- partial_path = (Path *)
- create_subqueryscan_path(root, rel, partial_subpath,
- trivial_tlist,
- NIL, NULL);
- add_partial_path(rel, partial_path);
- }
-
- /*
- * Estimate number of groups if caller wants it. If the subquery used
- * grouping or aggregation, its output is probably mostly unique
- * anyway; otherwise do statistical estimation.
- *
- * XXX you don't really want to know about this: we do the estimation
- * using the subquery's original targetlist expressions, not the
- * subroot->processed_tlist which might seem more appropriate. The
- * reason is that if the subquery is itself a setop, it may return a
- * processed_tlist containing "varno 0" Vars generated by
- * generate_append_tlist, and those would confuse estimate_num_groups
- * mightily. We ought to get rid of the "varno 0" hack, but that
- * requires a redesign of the parsetree representation of setops, so
- * that there can be an RTE corresponding to each setop's output.
- */
- if (pNumGroups)
- {
- if (subquery->groupClause || subquery->groupingSets ||
- subquery->distinctClause ||
- subroot->hasHavingQual || subquery->hasAggs)
- *pNumGroups = subpath->rows;
- else
- *pNumGroups = estimate_num_groups(subroot,
- get_tlist_exprs(subquery->targetList, false),
- subpath->rows,
- NULL,
- NULL);
- }
+ *istrivial_tlist = trivial_tlist;
}
else if (IsA(setOp, SetOperationStmt))
{
rel = generate_nonunion_paths(op, root,
refnames_tlist,
pTargetList);
- if (pNumGroups)
- *pNumGroups = rel->rows;
/*
* If necessary, add a Result node to project the caller-requested
*pTargetList,
refnames_tlist,
&trivial_tlist);
+ *istrivial_tlist = trivial_tlist;
target = create_pathtarget(root, *pTargetList);
/* Apply projection to each path */
lfirst(lc) = path;
}
}
+ postprocess_setop_rel(root, rel);
}
else
{
elog(ERROR, "unrecognized node type: %d",
(int) nodeTag(setOp));
*pTargetList = NIL;
+ rel = NULL; /* keep compiler quiet */
}
- postprocess_setop_rel(root, rel);
-
return rel;
}
Path *lpath;
Path *rpath;
List *lpath_tlist;
+ bool lpath_trivial_tlist;
List *rpath_tlist;
+ bool rpath_trivial_tlist;
List *tlist;
List *groupList;
double dNumGroups;
false, -1,
refnames_tlist,
&lpath_tlist,
- NULL);
+ &lpath_trivial_tlist);
+ if (lrel->rtekind == RTE_SUBQUERY)
+ build_setop_child_paths(root, lrel, lpath_trivial_tlist, lpath_tlist,
+ NIL, NULL);
lpath = lrel->cheapest_total_path;
/* The right path will want to look at the left one ... */
root->non_recursive_path = lpath;
false, -1,
refnames_tlist,
&rpath_tlist,
- NULL);
+ &rpath_trivial_tlist);
+ if (rrel->rtekind == RTE_SUBQUERY)
+ build_setop_child_paths(root, rrel, rpath_trivial_tlist, rpath_tlist,
+ NIL, NULL);
rpath = rrel->cheapest_total_path;
root->non_recursive_path = NULL;
return result_rel;
}
+/*
+ * build_setop_child_paths
+ * Build paths for the set op child relation denoted by 'rel'.
+ *
+ * interesting_pathkeys: if not NIL, also include paths that suit these
+ * pathkeys, sorting any unsorted paths as required.
+ * *pNumGroups: if not NULL, we estimate the number of distinct groups
+ * in the result, and store it there
+ */
+static void
+build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel,
+ bool trivial_tlist, List *child_tlist,
+ List *interesting_pathkeys, double *pNumGroups)
+{
+ RelOptInfo *final_rel;
+ List *setop_pathkeys = rel->subroot->setop_pathkeys;
+ ListCell *lc;
+
+ /* it can't be a set op child rel if it's not a subquery */
+ Assert(rel->rtekind == RTE_SUBQUERY);
+
+ /* when sorting is needed, add child rel equivalences */
+ if (interesting_pathkeys != NIL)
+ add_setop_child_rel_equivalences(root,
+ rel,
+ child_tlist,
+ interesting_pathkeys);
+
+ /*
+ * Mark rel with estimated output rows, width, etc. Note that we have to
+ * do this before generating outer-query paths, else cost_subqueryscan is
+ * not happy.
+ */
+ set_subquery_size_estimates(root, rel);
+
+ /*
+ * Since we may want to add a partial path to this relation, we must set
+ * its consider_parallel flag correctly.
+ */
+ final_rel = fetch_upper_rel(rel->subroot, UPPERREL_FINAL, NULL);
+ rel->consider_parallel = final_rel->consider_parallel;
+
+ /* Generate subquery scan paths for any interesting path in final_rel */
+ foreach(lc, final_rel->pathlist)
+ {
+ Path *subpath = (Path *) lfirst(lc);
+ List *pathkeys;
+ Path *cheapest_input_path = final_rel->cheapest_total_path;
+ bool is_sorted;
+ int presorted_keys;
+
+ /*
+ * Include the cheapest path as-is so that the set operation can be
+ * cheaply implemented using a method which does not require the input
+ * to be sorted.
+ */
+ if (subpath == cheapest_input_path)
+ {
+ /* Convert subpath's pathkeys to outer representation */
+ pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys,
+ make_tlist_from_pathtarget(subpath->pathtarget));
+
+ /* Generate outer path using this subpath */
+ add_path(rel, (Path *) create_subqueryscan_path(root,
+ rel,
+ subpath,
+ trivial_tlist,
+ pathkeys,
+ NULL));
+ }
+
+ /* skip dealing with sorted paths if the setop doesn't need them */
+ if (interesting_pathkeys == NIL)
+ continue;
+
+ /*
+ * Create paths to suit final sort order required for setop_pathkeys.
+ * Here we'll sort the cheapest input path (if not sorted already) and
+ * incremental sort any paths which are partially sorted.
+ */
+ is_sorted = pathkeys_count_contained_in(setop_pathkeys,
+ subpath->pathkeys,
+ &presorted_keys);
+
+ if (!is_sorted)
+ {
+ double limittuples = rel->subroot->limit_tuples;
+
+ /*
+ * Try at least sorting the cheapest path and also try
+ * incrementally sorting any path which is partially sorted
+ * already (no need to deal with paths which have presorted keys
+ * when incremental sort is disabled unless it's the cheapest
+ * input path).
+ */
+ if (subpath != cheapest_input_path &&
+ (presorted_keys == 0 || !enable_incremental_sort))
+ continue;
+
+ /*
+ * We've no need to consider both a sort and incremental sort.
+ * We'll just do a sort if there are no presorted keys and an
+ * incremental sort when there are presorted keys.
+ */
+ if (presorted_keys == 0 || !enable_incremental_sort)
+ subpath = (Path *) create_sort_path(rel->subroot,
+ final_rel,
+ subpath,
+ setop_pathkeys,
+ limittuples);
+ else
+ subpath = (Path *) create_incremental_sort_path(rel->subroot,
+ final_rel,
+ subpath,
+ setop_pathkeys,
+ presorted_keys,
+ limittuples);
+ }
+
+ /*
+ * subpath is now sorted, so add it to the pathlist. We already added
+ * the cheapest_input_path above, so don't add it again unless we just
+ * sorted it.
+ */
+ if (subpath != cheapest_input_path)
+ {
+ /* Convert subpath's pathkeys to outer representation */
+ pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys,
+ make_tlist_from_pathtarget(subpath->pathtarget));
+
+ /* Generate outer path using this subpath */
+ add_path(rel, (Path *) create_subqueryscan_path(root,
+ rel,
+ subpath,
+ trivial_tlist,
+ pathkeys,
+ NULL));
+ }
+ }
+
+ /* if consider_parallel is false, there should be no partial paths */
+ Assert(final_rel->consider_parallel ||
+ final_rel->partial_pathlist == NIL);
+
+ /*
+ * If we have a partial path for the child relation, we can use that to
+ * build a partial path for this relation. But there's no point in
+ * considering any path but the cheapest.
+ */
+ if (rel->consider_parallel && bms_is_empty(rel->lateral_relids) &&
+ final_rel->partial_pathlist != NIL)
+ {
+ Path *partial_subpath;
+ Path *partial_path;
+
+ partial_subpath = linitial(final_rel->partial_pathlist);
+ partial_path = (Path *)
+ create_subqueryscan_path(root, rel, partial_subpath,
+ trivial_tlist,
+ NIL, NULL);
+ add_partial_path(rel, partial_path);
+ }
+
+ postprocess_setop_rel(root, rel);
+
+ /*
+ * Estimate number of groups if caller wants it. If the subquery used
+ * grouping or aggregation, its output is probably mostly unique anyway;
+ * otherwise do statistical estimation.
+ *
+ * XXX you don't really want to know about this: we do the estimation
+ * using the subquery's original targetlist expressions, not the
+ * subroot->processed_tlist which might seem more appropriate. The reason
+ * is that if the subquery is itself a setop, it may return a
+ * processed_tlist containing "varno 0" Vars generated by
+ * generate_append_tlist, and those would confuse estimate_num_groups
+ * mightily. We ought to get rid of the "varno 0" hack, but that requires
+ * a redesign of the parsetree representation of setops, so that there can
+ * be an RTE corresponding to each setop's output.
+ */
+ if (pNumGroups)
+ {
+ PlannerInfo *subroot = rel->subroot;
+ Query *subquery = subroot->parse;
+
+ if (subquery->groupClause || subquery->groupingSets ||
+ subquery->distinctClause || subroot->hasHavingQual ||
+ subquery->hasAggs)
+ *pNumGroups = rel->cheapest_total_path->rows;
+ else
+ *pNumGroups = estimate_num_groups(subroot,
+ get_tlist_exprs(subquery->targetList, false),
+ rel->cheapest_total_path->rows,
+ NULL,
+ NULL);
+ }
+}
+
/*
* Generate paths for a UNION or UNION ALL node
*/
{
Relids relids = NULL;
RelOptInfo *result_rel;
- double save_fraction = root->tuple_fraction;
ListCell *lc;
- List *pathlist = NIL;
+ ListCell *lc2;
+ ListCell *lc3;
+ List *cheapest_pathlist = NIL;
+ List *ordered_pathlist = NIL;
List *partial_pathlist = NIL;
bool partial_paths_valid = true;
bool consider_parallel = true;
List *rellist;
List *tlist_list;
+ List *trivial_tlist_list;
List *tlist;
- Path *path;
-
- /*
- * If plain UNION, tell children to fetch all tuples.
- *
- * Note: in UNION ALL, we pass the top-level tuple_fraction unmodified to
- * each arm of the UNION ALL. One could make a case for reducing the
- * tuple fraction for later arms (discounting by the expected size of the
- * earlier arms' results) but it seems not worth the trouble. The normal
- * case where tuple_fraction isn't already zero is a LIMIT at top level,
- * and passing it down as-is is usually enough to get the desired result
- * of preferring fast-start plans.
- */
- if (!op->all)
- root->tuple_fraction = 0.0;
+ List *groupList = NIL;
+ Path *apath;
+ Path *gpath = NULL;
+ bool try_sorted;
+ List *union_pathkeys = NIL;
/*
* If any of my children are identical UNION nodes (same op, all-flag, and
* colTypes) then they can be merged into this node so that we generate
- * only one Append and unique-ification for the lot. Recurse to find such
- * nodes and compute their children's paths.
+ * only one Append/MergeAppend and unique-ification for the lot. Recurse
+ * to find such nodes.
*/
- rellist = plan_union_children(root, op, refnames_tlist, &tlist_list);
+ rellist = plan_union_children(root,
+ op,
+ refnames_tlist,
+ &tlist_list,
+ &trivial_tlist_list);
/*
- * Generate tlist for Append plan node.
+ * Generate tlist for Append/MergeAppend plan node.
*
* The tlist for an Append plan isn't important as far as the Append is
* concerned, but we must make it look real anyway for the benefit of the
*/
tlist = generate_append_tlist(op->colTypes, op->colCollations, false,
tlist_list, refnames_tlist);
-
*pTargetList = tlist;
+ /* For for UNIONs (not UNION ALL), try sorting, if sorting is possible */
+ try_sorted = !op->all && grouping_is_sortable(op->groupClauses);
+
+ if (try_sorted)
+ {
+ /* Identify the grouping semantics */
+ groupList = generate_setop_grouplist(op, tlist);
+
+ /* Determine the pathkeys for sorting by the whole target list */
+ union_pathkeys = make_pathkeys_for_sortclauses(root, groupList, tlist);
+
+ root->query_pathkeys = union_pathkeys;
+ }
+
+ /*
+ * Now that we've got the append target list, we can build the union child
+ * paths.
+ */
+ forthree(lc, rellist, lc2, trivial_tlist_list, lc3, tlist_list)
+ {
+ RelOptInfo *rel = lfirst(lc);
+ bool trivial_tlist = lfirst_int(lc2);
+ List *child_tlist = lfirst_node(List, lc3);
+
+ /* only build paths for the union children */
+ if (rel->rtekind == RTE_SUBQUERY)
+ build_setop_child_paths(root, rel, trivial_tlist, child_tlist,
+ union_pathkeys, NULL);
+ }
+
/* Build path lists and relid set. */
foreach(lc, rellist)
{
RelOptInfo *rel = lfirst(lc);
+ Path *ordered_path;
- pathlist = lappend(pathlist, rel->cheapest_total_path);
+ cheapest_pathlist = lappend(cheapest_pathlist,
+ rel->cheapest_total_path);
+
+ if (try_sorted)
+ {
+ ordered_path = get_cheapest_path_for_pathkeys(rel->pathlist,
+ union_pathkeys,
+ NULL,
+ TOTAL_COST,
+ false);
+
+ if (ordered_path != NULL)
+ ordered_pathlist = lappend(ordered_pathlist, ordered_path);
+ else
+ {
+ /*
+ * If we can't find a sorted path, just give up trying to
+ * generate a list of correctly sorted child paths. This can
+ * happen when type coercion was added to the targetlist due
+ * to mismatching types from the union children.
+ */
+ try_sorted = false;
+ }
+ }
if (consider_parallel)
{
result_rel = fetch_upper_rel(root, UPPERREL_SETOP, relids);
result_rel->reltarget = create_pathtarget(root, tlist);
result_rel->consider_parallel = consider_parallel;
+ result_rel->consider_startup = (root->tuple_fraction > 0);
/*
- * Append the child results together.
- */
- path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
- NIL, NULL, 0, false, -1);
-
- /*
- * For UNION ALL, we just need the Append path. For UNION, need to add
- * node(s) to remove duplicates.
+ * Append the child results together using the cheapest paths from each
+ * union child.
*/
- if (!op->all)
- path = make_union_unique(op, path, tlist, root);
-
- add_path(result_rel, path);
+ apath = (Path *) create_append_path(root, result_rel, cheapest_pathlist,
+ NIL, NIL, NULL, 0, false, -1);
/*
* Estimate number of groups. For now we just assume the output is unique
* --- this is certainly true for the UNION case, and we want worst-case
* estimates anyway.
*/
- result_rel->rows = path->rows;
+ result_rel->rows = apath->rows;
/*
* Now consider doing the same thing using the partial paths plus Append
*/
if (partial_paths_valid)
{
- Path *ppath;
+ Path *papath;
int parallel_workers = 0;
/* Find the highest number of workers requested for any subpath. */
}
Assert(parallel_workers > 0);
- ppath = (Path *)
+ papath = (Path *)
create_append_path(root, result_rel, NIL, partial_pathlist,
- NIL, NULL,
- parallel_workers, enable_parallel_append,
- -1);
- ppath = (Path *)
- create_gather_path(root, result_rel, ppath,
+ NIL, NULL, parallel_workers,
+ enable_parallel_append, -1);
+ gpath = (Path *)
+ create_gather_path(root, result_rel, papath,
result_rel->reltarget, NULL, NULL);
- if (!op->all)
- ppath = make_union_unique(op, ppath, tlist, root);
- add_path(result_rel, ppath);
}
- /* Undo effects of possibly forcing tuple_fraction to 0 */
- root->tuple_fraction = save_fraction;
+ if (!op->all)
+ {
+ double dNumGroups;
+ bool can_sort = grouping_is_sortable(groupList);
+ bool can_hash = grouping_is_hashable(groupList);
+
+ /*
+ * XXX for the moment, take the number of distinct groups as equal to
+ * the total input size, i.e., the worst case. This is too
+ * conservative, but it's not clear how to get a decent estimate of
+ * the true size. One should note as well the propensity of novices
+ * to write UNION rather than UNION ALL even when they don't expect
+ * any duplicates...
+ */
+ dNumGroups = apath->rows;
+
+ if (can_hash)
+ {
+ Path *path;
+
+ /*
+ * Try a hash aggregate plan on 'apath'. This is the cheapest
+ * available path containing each append child.
+ */
+ path = (Path *) create_agg_path(root,
+ result_rel,
+ apath,
+ create_pathtarget(root, tlist),
+ AGG_HASHED,
+ AGGSPLIT_SIMPLE,
+ groupList,
+ NIL,
+ NULL,
+ dNumGroups);
+ add_path(result_rel, path);
+
+ /* Try hash aggregate on the Gather path, if valid */
+ if (gpath != NULL)
+ {
+ /* Hashed aggregate plan --- no sort needed */
+ path = (Path *) create_agg_path(root,
+ result_rel,
+ gpath,
+ create_pathtarget(root, tlist),
+ AGG_HASHED,
+ AGGSPLIT_SIMPLE,
+ groupList,
+ NIL,
+ NULL,
+ dNumGroups);
+ add_path(result_rel, path);
+ }
+ }
+
+ if (can_sort)
+ {
+ Path *path = apath;
+
+ /* Try Sort -> Unique on the Append path */
+ if (groupList != NIL)
+ path = (Path *) create_sort_path(root, result_rel, path,
+ make_pathkeys_for_sortclauses(root, groupList, tlist),
+ -1.0);
+
+ path = (Path *) create_upper_unique_path(root,
+ result_rel,
+ path,
+ list_length(path->pathkeys),
+ dNumGroups);
+
+ add_path(result_rel, path);
+
+ /* Try Sort -> Unique on the Gather path, if set */
+ if (gpath != NULL)
+ {
+ path = gpath;
+
+ path = (Path *) create_sort_path(root, result_rel, path,
+ make_pathkeys_for_sortclauses(root, groupList, tlist),
+ -1.0);
+
+ path = (Path *) create_upper_unique_path(root,
+ result_rel,
+ path,
+ list_length(path->pathkeys),
+ dNumGroups);
+ add_path(result_rel, path);
+ }
+ }
+
+ /*
+ * Try making a MergeAppend path if we managed to find a path with the
+ * correct pathkeys in each union child query.
+ */
+ if (try_sorted && groupList != NIL)
+ {
+ Path *path;
+
+ path = (Path *) create_merge_append_path(root,
+ result_rel,
+ ordered_pathlist,
+ union_pathkeys,
+ NULL);
+
+ /* and make the MergeAppend unique */
+ path = (Path *) create_upper_unique_path(root,
+ result_rel,
+ path,
+ list_length(tlist),
+ dNumGroups);
+
+ add_path(result_rel, path);
+ }
+ }
+ else
+ {
+ /* UNION ALL */
+ add_path(result_rel, apath);
+
+ if (gpath != NULL)
+ add_path(result_rel, gpath);
+ }
return result_rel;
}
*tlist,
*groupList,
*pathlist;
+ bool lpath_trivial_tlist,
+ rpath_trivial_tlist;
double dLeftGroups,
dRightGroups,
dNumGroups,
false, 0,
refnames_tlist,
&lpath_tlist,
- &dLeftGroups);
+ &lpath_trivial_tlist);
+ if (lrel->rtekind == RTE_SUBQUERY)
+ build_setop_child_paths(root, lrel, lpath_trivial_tlist, lpath_tlist,
+ NIL, &dLeftGroups);
+ else
+ dLeftGroups = lrel->rows;
+
lpath = lrel->cheapest_total_path;
rrel = recurse_set_operations(op->rarg, root,
op->colTypes, op->colCollations,
false, 1,
refnames_tlist,
&rpath_tlist,
- &dRightGroups);
+ &rpath_trivial_tlist);
+ if (rrel->rtekind == RTE_SUBQUERY)
+ build_setop_child_paths(root, rrel, rpath_trivial_tlist, rpath_tlist,
+ NIL, &dRightGroups);
+ else
+ dRightGroups = rrel->rows;
+
rpath = rrel->cheapest_total_path;
/* Undo effects of forcing tuple_fraction to 0 */
plan_union_children(PlannerInfo *root,
SetOperationStmt *top_union,
List *refnames_tlist,
- List **tlist_list)
+ List **tlist_list,
+ List **istrivial_tlist)
{
List *pending_rels = list_make1(top_union);
List *result = NIL;
List *child_tlist;
+ bool trivial_tlist;
*tlist_list = NIL;
+ *istrivial_tlist = NIL;
while (pending_rels != NIL)
{
false, -1,
refnames_tlist,
&child_tlist,
- NULL));
+ &trivial_tlist));
*tlist_list = lappend(*tlist_list, child_tlist);
+ *istrivial_tlist = lappend_int(*istrivial_tlist, trivial_tlist);
}
return result;
}
-/*
- * Add nodes to the given path tree to unique-ify the result of a UNION.
- */
-static Path *
-make_union_unique(SetOperationStmt *op, Path *path, List *tlist,
- PlannerInfo *root)
-{
- RelOptInfo *result_rel = fetch_upper_rel(root, UPPERREL_SETOP, NULL);
- List *groupList;
- double dNumGroups;
-
- /* Identify the grouping semantics */
- groupList = generate_setop_grouplist(op, tlist);
-
- /*
- * XXX for the moment, take the number of distinct groups as equal to the
- * total input size, ie, the worst case. This is too conservative, but
- * it's not clear how to get a decent estimate of the true size. One
- * should note as well the propensity of novices to write UNION rather
- * than UNION ALL even when they don't expect any duplicates...
- */
- dNumGroups = path->rows;
-
- /* Decide whether to hash or sort */
- if (choose_hashed_setop(root, groupList, path,
- dNumGroups, dNumGroups,
- "UNION"))
- {
- /* Hashed aggregate plan --- no sort needed */
- path = (Path *) create_agg_path(root,
- result_rel,
- path,
- create_pathtarget(root, tlist),
- AGG_HASHED,
- AGGSPLIT_SIMPLE,
- groupList,
- NIL,
- NULL,
- dNumGroups);
- }
- else
- {
- /* Sort and Unique */
- if (groupList)
- path = (Path *)
- create_sort_path(root,
- result_rel,
- path,
- make_pathkeys_for_sortclauses(root,
- groupList,
- tlist),
- -1.0);
- path = (Path *) create_upper_unique_path(root,
- result_rel,
- path,
- list_length(path->pathkeys),
- dNumGroups);
- }
-
- return path;
-}
-
/*
* postprocess_setop_rel - perform steps required after adding paths
*/