by the query (again, without considering the effects of LIMIT).
- Average width is pretty bogus because the thing really doesn't have
- any idea of the average length of variable-length columns. I'm thinking
- about improving that in the future, but it may not be worth the trouble,
- because the width isn't used for very much.
-
-
Here are some examples (using the regress test database after a
vacuum analyze, and almost-7.0 sources):
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.71 2001/05/07 00:43:20 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.72 2001/05/09 00:35:09 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "utils/syscache.h"
-/*
- * The length of a variable-length field in bytes (stupid estimate...)
- */
-#define _DEFAULT_ATTRIBUTE_WIDTH_ 12
-
-
#define LOG2(x) (log(x) / 0.693147180559945)
#define LOG6(x) (log(x) / 1.79175946922805)
static bool cost_qual_eval_walker(Node *node, Cost *total);
static void set_rel_width(Query *root, RelOptInfo *rel);
-static int compute_attribute_width(TargetEntry *tlistentry);
static double relation_byte_size(double tuples, int width);
static double page_size(double tuples, int width);
/*
* set_rel_width
* Set the estimated output width of the relation.
+ *
+ * NB: this works best on base relations because it prefers to look at
+ * real Vars. It will fail to make use of pg_statistic info when applied
+ * to a subquery relation, even if the subquery outputs are simple vars
+ * that we could have gotten info for. Is it worth trying to be smarter
+ * about subqueries?
*/
static void
set_rel_width(Query *root, RelOptInfo *rel)
{
- int tuple_width = 0;
- List *tle;
+ int32 tuple_width = 0;
+ List *tllist;
- foreach(tle, rel->targetlist)
- tuple_width += compute_attribute_width((TargetEntry *) lfirst(tle));
- Assert(tuple_width >= 0);
- rel->width = tuple_width;
-}
+ foreach(tllist, rel->targetlist)
+ {
+ TargetEntry *tle = (TargetEntry *) lfirst(tllist);
+ int32 item_width;
-/*
- * compute_attribute_width
- * Given a target list entry, find the size in bytes of the attribute.
- *
- * If a field is variable-length, we make a default assumption. Would be
- * better if VACUUM recorded some stats about the average field width...
- * also, we have access to the atttypmod, but fail to use it...
- */
-static int
-compute_attribute_width(TargetEntry *tlistentry)
-{
- int width = get_typlen(tlistentry->resdom->restype);
+ /*
+ * If it's a Var, try to get statistical info from pg_statistic.
+ */
+ if (tle->expr && IsA(tle->expr, Var))
+ {
+ Var *var = (Var *) tle->expr;
+ Oid relid;
- if (width < 0)
- return _DEFAULT_ATTRIBUTE_WIDTH_;
- else
- return width;
+ relid = getrelid(var->varno, root->rtable);
+ if (relid != InvalidOid)
+ {
+ item_width = get_attavgwidth(relid, var->varattno);
+ if (item_width > 0)
+ {
+ tuple_width += item_width;
+ continue;
+ }
+ }
+ }
+ /*
+ * Not a Var, or can't find statistics for it. Estimate using
+ * just the type info.
+ */
+ item_width = get_typavgwidth(tle->resdom->restype,
+ tle->resdom->restypmod);
+ Assert(item_width > 0);
+ tuple_width += item_width;
+ }
+ Assert(tuple_width >= 0);
+ rel->width = tuple_width;
}
/*
static double
relation_byte_size(double tuples, int width)
{
- return tuples * ((double) (width + sizeof(HeapTupleData)));
+ return tuples * ((double) MAXALIGN(width + sizeof(HeapTupleData)));
}
/*
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/utils/cache/lsyscache.c,v 1.53 2001/05/07 00:43:24 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/utils/cache/lsyscache.c,v 1.54 2001/05/09 00:35:09 tgl Exp $
*
* NOTES
* Eventually, the index information should go through here, too.
return returnValue;
}
+/*
+ * get_typavgwidth
+ *
+ * Given a type OID and a typmod value (pass -1 if typmod is unknown),
+ * estimate the average width of values of the type. This is used by
+ * the planner, which doesn't require absolutely correct results;
+ * it's OK (and expected) to guess if we don't know for sure.
+ */
+int32
+get_typavgwidth(Oid typid, int32 typmod)
+{
+ int typlen = get_typlen(typid);
+ int32 maxwidth;
+
+ /*
+ * Easy if it's a fixed-width type
+ */
+ if (typlen > 0)
+ return typlen;
+ /*
+ * type_maximum_size knows the encoding of typmod for some datatypes;
+ * don't duplicate that knowledge here.
+ */
+ maxwidth = type_maximum_size(typid, typmod);
+ if (maxwidth > 0)
+ {
+ /*
+ * For BPCHAR, the max width is also the only width. Otherwise
+ * we need to guess about the typical data width given the max.
+ * A sliding scale for percentage of max width seems reasonable.
+ */
+ if (typid == BPCHAROID)
+ return maxwidth;
+ if (maxwidth <= 32)
+ return maxwidth; /* assume full width */
+ if (maxwidth < 1000)
+ return 32 + (maxwidth - 32) / 2; /* assume 50% */
+ /*
+ * Beyond 1000, assume we're looking at something like
+ * "varchar(10000)" where the limit isn't actually reached often,
+ * and use a fixed estimate.
+ */
+ return 32 + (1000 - 32) / 2;
+ }
+ /*
+ * Ooops, we have no idea ... wild guess time.
+ */
+ return 32;
+}
+
/*
* get_typtype
*
/* ---------- STATISTICS CACHE ---------- */
+/*
+ * get_attavgwidth
+ *
+ * Given the table and attribute number of a column, get the average
+ * width of entries in the column. Return zero if no data available.
+ */
+int32
+get_attavgwidth(Oid relid, AttrNumber attnum)
+{
+ HeapTuple tp;
+
+ tp = SearchSysCache(STATRELATT,
+ ObjectIdGetDatum(relid),
+ Int16GetDatum(attnum),
+ 0, 0);
+ if (HeapTupleIsValid(tp))
+ {
+ int32 stawidth = ((Form_pg_statistic) GETSTRUCT(tp))->stawidth;
+
+ ReleaseSysCache(tp);
+ if (stawidth > 0)
+ return stawidth;
+ }
+ return 0;
+}
+
/*
* get_attstatsslot
*
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: lsyscache.h,v 1.31 2001/05/07 00:43:26 tgl Exp $
+ * $Id: lsyscache.h,v 1.32 2001/05/09 00:35:09 tgl Exp $
*
*-------------------------------------------------------------------------
*/
extern void get_typlenbyval(Oid typid, int16 *typlen, bool *typbyval);
extern char get_typstorage(Oid typid);
extern Datum get_typdefault(Oid typid);
+extern int32 get_typavgwidth(Oid typid, int32 typmod);
+extern int32 get_attavgwidth(Oid relid, AttrNumber attnum);
extern bool get_attstatsslot(HeapTuple statstuple,
Oid atttype, int32 atttypmod,
int reqkind, Oid reqop,