Selectivity *rest_selec);
static Selectivity prefix_selectivity(PlannerInfo *root,
VariableStatData *vardata,
- Oid vartype, Oid opfamily, Const *prefixcon);
+ Oid vartype, Oid opfamily,
+ Oid collation,
+ Const *prefixcon);
static Selectivity like_selectivity(const char *patt, int pattlen,
bool case_insensitive);
static Selectivity regex_selectivity(const char *patt, int pattlen,
if (eqopr == InvalidOid)
elog(ERROR, "no = operator for opfamily %u", opfamily);
- result = var_eq_const(&vardata, eqopr, prefix->constvalue,
- false, true, false);
+ result = var_eq_const_ext(&vardata, eqopr, collation,
+ prefix->constvalue, false, true, false);
}
else
{
opfuncid = get_opcode(oprid);
fmgr_info(opfuncid, &opproc);
- selec = histogram_selectivity(&vardata, &opproc, constval, true,
- 10, 1, &hist_size);
+ selec = histogram_selectivity_ext(&vardata, &opproc, collation,
+ constval, true,
+ 10, 1, &hist_size);
/* If not at least 100 entries, use the heuristic method */
if (hist_size < 100)
if (pstatus == Pattern_Prefix_Partial)
prefixsel = prefix_selectivity(root, &vardata, vartype,
- opfamily, prefix);
+ opfamily, collation, prefix);
else
prefixsel = 1.0;
heursel = prefixsel * rest_selec;
* directly to the result selectivity. Also add up the total fraction
* represented by MCV entries.
*/
- mcv_selec = mcv_selectivity(&vardata, &opproc, constval, true,
- &sumcommon);
+ mcv_selec = mcv_selectivity_ext(&vardata, &opproc, collation,
+ constval, true,
+ &sumcommon);
/*
* Now merge the results from the MCV and histogram calculations,
*/
static Selectivity
prefix_selectivity(PlannerInfo *root, VariableStatData *vardata,
- Oid vartype, Oid opfamily, Const *prefixcon)
+ Oid vartype, Oid opfamily,
+ Oid collation,
+ Const *prefixcon)
{
Selectivity prefixsel;
Oid cmpopr;
FmgrInfo opproc;
- AttStatsSlot sslot;
Const *greaterstrcon;
Selectivity eq_sel;
elog(ERROR, "no >= operator for opfamily %u", opfamily);
fmgr_info(get_opcode(cmpopr), &opproc);
- prefixsel = ineq_histogram_selectivity(root, vardata,
- &opproc, true, true,
- prefixcon->constvalue,
- prefixcon->consttype);
+ prefixsel = ineq_histogram_selectivity_ext(root, vardata,
+ &opproc, true, true,
+ collation,
+ prefixcon->constvalue,
+ prefixcon->consttype);
if (prefixsel < 0.0)
{
return DEFAULT_MATCH_SEL;
}
- /*-------
- * If we can create a string larger than the prefix, say
- * "x < greaterstr". We try to generate the string referencing the
- * collation of the var's statistics, but if that's not available,
- * use DEFAULT_COLLATION_OID.
- *-------
+ /*
+ * If we can create a string larger than the prefix, say "x < greaterstr".
*/
- if (HeapTupleIsValid(vardata->statsTuple) &&
- get_attstatsslot(&sslot, vardata->statsTuple,
- STATISTIC_KIND_HISTOGRAM, InvalidOid, 0))
- /* sslot.stacoll is set up */ ;
- else
- sslot.stacoll = DEFAULT_COLLATION_OID;
cmpopr = get_opfamily_member(opfamily, vartype, vartype,
BTLessStrategyNumber);
if (cmpopr == InvalidOid)
elog(ERROR, "no < operator for opfamily %u", opfamily);
fmgr_info(get_opcode(cmpopr), &opproc);
- greaterstrcon = make_greater_string(prefixcon, &opproc, sslot.stacoll);
+ greaterstrcon = make_greater_string(prefixcon, &opproc, collation);
if (greaterstrcon)
{
Selectivity topsel;
- topsel = ineq_histogram_selectivity(root, vardata,
- &opproc, false, false,
- greaterstrcon->constvalue,
- greaterstrcon->consttype);
+ topsel = ineq_histogram_selectivity_ext(root, vardata,
+ &opproc, false, false,
+ collation,
+ greaterstrcon->constvalue,
+ greaterstrcon->consttype);
/* ineq_histogram_selectivity worked before, it shouldn't fail now */
Assert(topsel >= 0.0);
BTEqualStrategyNumber);
if (cmpopr == InvalidOid)
elog(ERROR, "no = operator for opfamily %u", opfamily);
- eq_sel = var_eq_const(vardata, cmpopr, prefixcon->constvalue,
- false, true, false);
+ eq_sel = var_eq_const_ext(vardata, cmpopr, collation, prefixcon->constvalue,
+ false, true, false);
prefixsel = Max(prefixsel, eq_sel);
* (if any) is passed using the standard fmgr mechanism, so that the estimator
* function can fetch it with PG_GET_COLLATION(). Note, however, that all
* statistics in pg_statistic are currently built using the relevant column's
- * collation. Thus, in most cases where we are looking at statistics, we
- * should ignore the operator collation and use the stats entry's collation.
- * We expect that the error induced by doing this is usually not large enough
- * to justify complicating matters. In any case, doing otherwise would yield
- * entirely garbage results for ordered stats data such as histograms.
+ * collation.
*----------
*/
get_index_stats_hook_type get_index_stats_hook = NULL;
static double eqsel_internal(PG_FUNCTION_ARGS, bool negate);
-static double eqjoinsel_inner(Oid opfuncoid,
+static double eqjoinsel_inner(Oid opfuncoid, Oid collation,
VariableStatData *vardata1, VariableStatData *vardata2,
double nd1, double nd2,
bool isdefault1, bool isdefault2,
AttStatsSlot *sslot1, AttStatsSlot *sslot2,
Form_pg_statistic stats1, Form_pg_statistic stats2,
bool have_mcvs1, bool have_mcvs2);
-static double eqjoinsel_semi(Oid opfuncoid,
+static double eqjoinsel_semi(Oid opfuncoid, Oid collation,
VariableStatData *vardata1, VariableStatData *vardata2,
double nd1, double nd2,
bool isdefault1, bool isdefault2,
static void examine_simple_variable(PlannerInfo *root, Var *var,
VariableStatData *vardata);
static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata,
- Oid sortop, Datum *min, Datum *max);
+ Oid sortop, Oid collation,
+ Datum *min, Datum *max);
static bool get_actual_variable_range(PlannerInfo *root,
VariableStatData *vardata,
- Oid sortop,
+ Oid sortop, Oid collation,
Datum *min, Datum *max);
static bool get_actual_variable_endpoint(Relation heapRel,
Relation indexRel,
Oid operator = PG_GETARG_OID(1);
List *args = (List *) PG_GETARG_POINTER(2);
int varRelid = PG_GETARG_INT32(3);
+ Oid collation = PG_GET_COLLATION();
VariableStatData vardata;
Node *other;
bool varonleft;
* in the query.)
*/
if (IsA(other, Const))
- selec = var_eq_const(&vardata, operator,
- ((Const *) other)->constvalue,
- ((Const *) other)->constisnull,
- varonleft, negate);
+ selec = var_eq_const_ext(&vardata, operator, collation,
+ ((Const *) other)->constvalue,
+ ((Const *) other)->constisnull,
+ varonleft, negate);
else
selec = var_eq_non_const(&vardata, operator, other,
varonleft, negate);
var_eq_const(VariableStatData *vardata, Oid operator,
Datum constval, bool constisnull,
bool varonleft, bool negate)
+{
+ return var_eq_const_ext(vardata, operator, DEFAULT_COLLATION_OID,
+ constval, constisnull,
+ varonleft, negate);
+}
+
+double
+var_eq_const_ext(VariableStatData *vardata, Oid operator, Oid collation,
+ Datum constval, bool constisnull,
+ bool varonleft, bool negate)
{
double selec;
double nullfrac = 0.0;
/* be careful to apply operator right way 'round */
if (varonleft)
match = DatumGetBool(FunctionCall2Coll(&eqproc,
- sslot.stacoll,
+ collation,
sslot.values[i],
constval));
else
match = DatumGetBool(FunctionCall2Coll(&eqproc,
- sslot.stacoll,
+ collation,
constval,
sslot.values[i]));
if (match)
*/
static double
scalarineqsel(PlannerInfo *root, Oid operator, bool isgt, bool iseq,
+ Oid collation,
VariableStatData *vardata, Datum constval, Oid consttype)
{
Form_pg_statistic stats;
* to the result selectivity. Also add up the total fraction represented
* by MCV entries.
*/
- mcv_selec = mcv_selectivity(vardata, &opproc, constval, true,
- &sumcommon);
+ mcv_selec = mcv_selectivity_ext(vardata, &opproc, collation, constval, true,
+ &sumcommon);
/*
* If there is a histogram, determine which bin the constant falls in, and
* compute the resulting contribution to selectivity.
*/
- hist_selec = ineq_histogram_selectivity(root, vardata,
- &opproc, isgt, iseq,
- constval, consttype);
+ hist_selec = ineq_histogram_selectivity_ext(root, vardata,
+ &opproc, isgt, iseq,
+ collation,
+ constval, consttype);
/*
* Now merge the results from the MCV and histogram calculations,
mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
Datum constval, bool varonleft,
double *sumcommonp)
+{
+ return mcv_selectivity_ext(vardata, opproc, DEFAULT_COLLATION_OID,
+ constval, varonleft, sumcommonp);
+}
+
+double
+mcv_selectivity_ext(VariableStatData *vardata, FmgrInfo *opproc, Oid collation,
+ Datum constval, bool varonleft,
+ double *sumcommonp)
{
double mcv_selec,
sumcommon;
{
if (varonleft ?
DatumGetBool(FunctionCall2Coll(opproc,
- sslot.stacoll,
+ collation,
sslot.values[i],
constval)) :
DatumGetBool(FunctionCall2Coll(opproc,
- sslot.stacoll,
+ collation,
constval,
sslot.values[i])))
mcv_selec += sslot.numbers[i];
Datum constval, bool varonleft,
int min_hist_size, int n_skip,
int *hist_size)
+{
+ return histogram_selectivity_ext(vardata,
+ opproc, DEFAULT_COLLATION_OID,
+ constval, varonleft,
+ min_hist_size, n_skip,
+ hist_size);
+}
+
+double
+histogram_selectivity_ext(VariableStatData *vardata,
+ FmgrInfo *opproc, Oid collation,
+ Datum constval, bool varonleft,
+ int min_hist_size, int n_skip,
+ int *hist_size)
{
double result;
AttStatsSlot sslot;
{
if (varonleft ?
DatumGetBool(FunctionCall2Coll(opproc,
- sslot.stacoll,
+ collation,
sslot.values[i],
constval)) :
DatumGetBool(FunctionCall2Coll(opproc,
- sslot.stacoll,
+ collation,
constval,
sslot.values[i])))
nmatch++;
VariableStatData *vardata,
FmgrInfo *opproc, bool isgt, bool iseq,
Datum constval, Oid consttype)
+{
+ return ineq_histogram_selectivity_ext(root, vardata,
+ opproc, isgt, iseq,
+ DEFAULT_COLLATION_OID,
+ constval, consttype);
+}
+
+double
+ineq_histogram_selectivity_ext(PlannerInfo *root,
+ VariableStatData *vardata,
+ FmgrInfo *opproc, bool isgt, bool iseq,
+ Oid collation,
+ Datum constval, Oid consttype)
{
double hist_selec;
AttStatsSlot sslot;
* column type. However, to make that work we will need to figure out
* which staop to search for --- it's not necessarily the one we have at
* hand! (For example, we might have a '<=' operator rather than the '<'
- * operator that will appear in staop.) For now, assume that whatever
- * appears in pg_statistic is sorted the same way our operator sorts, or
- * the reverse way if isgt is true.
+ * operator that will appear in staop.) The collation might not agree
+ * either. For now, just assume that whatever appears in pg_statistic is
+ * sorted the same way our operator sorts, or the reverse way if isgt is
+ * true. This could result in a bogus estimate, but it still seems better
+ * than falling back to the default estimate.
*/
if (HeapTupleIsValid(vardata->statsTuple) &&
statistic_proc_security_check(vardata, opproc->fn_oid) &&
have_end = get_actual_variable_range(root,
vardata,
sslot.staop,
+ collation,
&sslot.values[0],
&sslot.values[1]);
have_end = get_actual_variable_range(root,
vardata,
sslot.staop,
+ collation,
&sslot.values[0],
NULL);
else if (probe == sslot.nvalues - 1 && sslot.nvalues > 2)
have_end = get_actual_variable_range(root,
vardata,
sslot.staop,
+ collation,
NULL,
&sslot.values[probe]);
ltcmp = DatumGetBool(FunctionCall2Coll(opproc,
- sslot.stacoll,
+ collation,
sslot.values[probe],
constval));
if (isgt)
* values to a uniform comparison scale, and do a linear
* interpolation within this bin.
*/
- if (convert_to_scalar(constval, consttype, sslot.stacoll,
+ if (convert_to_scalar(constval, consttype, collation,
&val,
sslot.values[i - 1], sslot.values[i],
vardata->vartype,
Oid operator = PG_GETARG_OID(1);
List *args = (List *) PG_GETARG_POINTER(2);
int varRelid = PG_GETARG_INT32(3);
+ Oid collation = PG_GET_COLLATION();
VariableStatData vardata;
Node *other;
bool varonleft;
}
/* The rest of the work is done by scalarineqsel(). */
- selec = scalarineqsel(root, operator, isgt, iseq,
+ selec = scalarineqsel(root, operator, isgt, iseq, collation,
&vardata, constval, consttype);
ReleaseVariableStats(vardata);
* A boolean variable V is equivalent to the clause V = 't', so we
* compute the selectivity as if that is what we have.
*/
- selec = var_eq_const(&vardata, BooleanEqualOperator,
- BoolGetDatum(true), false, true, false);
+ selec = var_eq_const_ext(&vardata, BooleanEqualOperator, InvalidOid,
+ BoolGetDatum(true), false, true, false);
}
else
{
JoinType jointype = (JoinType) PG_GETARG_INT16(3);
#endif
SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
+ Oid collation = PG_GET_COLLATION();
double selec;
double selec_inner;
VariableStatData vardata1;
}
/* We need to compute the inner-join selectivity in all cases */
- selec_inner = eqjoinsel_inner(opfuncoid,
+ selec_inner = eqjoinsel_inner(opfuncoid, collation,
&vardata1, &vardata2,
nd1, nd2,
isdefault1, isdefault2,
inner_rel = find_join_input_rel(root, sjinfo->min_righthand);
if (!join_is_reversed)
- selec = eqjoinsel_semi(opfuncoid,
+ selec = eqjoinsel_semi(opfuncoid, collation,
&vardata1, &vardata2,
nd1, nd2,
isdefault1, isdefault2,
Oid commop = get_commutator(operator);
Oid commopfuncoid = OidIsValid(commop) ? get_opcode(commop) : InvalidOid;
- selec = eqjoinsel_semi(commopfuncoid,
+ selec = eqjoinsel_semi(commopfuncoid, collation,
&vardata2, &vardata1,
nd2, nd1,
isdefault2, isdefault1,
* that it's worth trying to distinguish them here.
*/
static double
-eqjoinsel_inner(Oid opfuncoid,
+eqjoinsel_inner(Oid opfuncoid, Oid collation,
VariableStatData *vardata1, VariableStatData *vardata2,
double nd1, double nd2,
bool isdefault1, bool isdefault2,
if (hasmatch2[j])
continue;
if (DatumGetBool(FunctionCall2Coll(&eqproc,
- sslot1->stacoll,
+ collation,
sslot1->values[i],
sslot2->values[j])))
{
* Unlike eqjoinsel_inner, we have to cope with opfuncoid being InvalidOid.
*/
static double
-eqjoinsel_semi(Oid opfuncoid,
+eqjoinsel_semi(Oid opfuncoid, Oid collation,
VariableStatData *vardata1, VariableStatData *vardata2,
double nd1, double nd2,
bool isdefault1, bool isdefault2,
if (hasmatch2[j])
continue;
if (DatumGetBool(FunctionCall2Coll(&eqproc,
- sslot1->stacoll,
+ collation,
sslot1->values[i],
sslot2->values[j])))
{
Oid op_lefttype;
Oid op_righttype;
Oid opno,
+ collation,
lsortop,
rsortop,
lstatop,
if (!is_opclause(clause))
return; /* shouldn't happen */
opno = ((OpExpr *) clause)->opno;
+ collation = ((OpExpr *) clause)->inputcollid;
left = get_leftop((Expr *) clause);
right = get_rightop((Expr *) clause);
if (!right)
/* Try to get ranges of both inputs */
if (!isgt)
{
- if (!get_variable_range(root, &leftvar, lstatop,
+ if (!get_variable_range(root, &leftvar, lstatop, collation,
&leftmin, &leftmax))
goto fail; /* no range available from stats */
- if (!get_variable_range(root, &rightvar, rstatop,
+ if (!get_variable_range(root, &rightvar, rstatop, collation,
&rightmin, &rightmax))
goto fail; /* no range available from stats */
}
else
{
/* need to swap the max and min */
- if (!get_variable_range(root, &leftvar, lstatop,
+ if (!get_variable_range(root, &leftvar, lstatop, collation,
&leftmax, &leftmin))
goto fail; /* no range available from stats */
- if (!get_variable_range(root, &rightvar, rstatop,
+ if (!get_variable_range(root, &rightvar, rstatop, collation,
&rightmax, &rightmin))
goto fail; /* no range available from stats */
}
* fraction that's <= the right-side maximum value. But only believe
* non-default estimates, else stick with our 1.0.
*/
- selec = scalarineqsel(root, leop, isgt, true, &leftvar,
+ selec = scalarineqsel(root, leop, isgt, true, collation, &leftvar,
rightmax, op_righttype);
if (selec != DEFAULT_INEQ_SEL)
*leftend = selec;
/* And similarly for the right variable. */
- selec = scalarineqsel(root, revleop, isgt, true, &rightvar,
+ selec = scalarineqsel(root, revleop, isgt, true, collation, &rightvar,
leftmax, op_lefttype);
if (selec != DEFAULT_INEQ_SEL)
*rightend = selec;
* minimum value. But only believe non-default estimates, else stick with
* our own default.
*/
- selec = scalarineqsel(root, ltop, isgt, false, &leftvar,
+ selec = scalarineqsel(root, ltop, isgt, false, collation, &leftvar,
rightmin, op_righttype);
if (selec != DEFAULT_INEQ_SEL)
*leftstart = selec;
/* And similarly for the right variable. */
- selec = scalarineqsel(root, revltop, isgt, false, &rightvar,
+ selec = scalarineqsel(root, revltop, isgt, false, collation, &rightvar,
leftmin, op_lefttype);
if (selec != DEFAULT_INEQ_SEL)
*rightstart = selec;
*
* sortop is the "<" comparison operator to use. This should generally
* be "<" not ">", as only the former is likely to be found in pg_statistic.
+ * The collation must be specified too.
*/
static bool
-get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop,
+get_variable_range(PlannerInfo *root, VariableStatData *vardata,
+ Oid sortop, Oid collation,
Datum *min, Datum *max)
{
Datum tmin = 0;
* before enabling this.
*/
#ifdef NOT_USED
- if (get_actual_variable_range(root, vardata, sortop, min, max))
+ if (get_actual_variable_range(root, vardata, sortop, collation, min, max))
return true;
#endif
*
* If there is a histogram that is sorted with some other operator than
* the one we want, fail --- this suggests that there is data we can't
- * use.
+ * use. XXX consider collation too.
*/
if (get_attstatsslot(&sslot, vardata->statsTuple,
STATISTIC_KIND_HISTOGRAM, sortop,
continue;
}
if (DatumGetBool(FunctionCall2Coll(&opproc,
- sslot.stacoll,
+ collation,
sslot.values[i], tmin)))
{
tmin = sslot.values[i];
tmin_is_mcv = true;
}
if (DatumGetBool(FunctionCall2Coll(&opproc,
- sslot.stacoll,
+ collation,
tmax, sslot.values[i])))
{
tmax = sslot.values[i];
* If no data available, return false.
*
* sortop is the "<" comparison operator to use.
+ * collation is the required collation.
*/
static bool
get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
- Oid sortop,
+ Oid sortop, Oid collation,
Datum *min, Datum *max)
{
bool have_data = false;
continue;
/*
- * The first index column must match the desired variable and sort
- * operator --- but we can use a descending-order index.
+ * The first index column must match the desired variable, sortop, and
+ * collation --- but we can use a descending-order index.
*/
+ if (collation != index->indexcollations[0])
+ continue; /* test first 'cause it's cheapest */
if (!match_index_to_operand(vardata->var, 0, index))
continue;
switch (get_op_opfamily_strategy(sortop, index->sortopfamily[0]))