Rewrite of planner statistics-gathering code. ANALYZE is now available as

author Tom Lane

Mon, 7 May 2001 00:43:27 +0000 (00:43 +0000)

committer Tom Lane

Mon, 7 May 2001 00:43:27 +0000 (00:43 +0000)
author Tom Lane
Mon, 7 May 2001 00:43:27 +0000 (00:43 +0000)
committer Tom Lane
Mon, 7 May 2001 00:43:27 +0000 (00:43 +0000)
diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml

index 1738a5bf1d4c6d60241e373dece5128168a608b1..01885a5095b4d1c1f0de162d381270098772d146 100644 (file)
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -1,6 +1,6 @@
  
  
  
@@ -16,7 +16,7 @@
     PostgreSQL's system catalogs are regular
     tables.  You can drop and recreate the tables, add columns, insert
     and update values, and severely mess up your system that way.
-   Normally one never has to change the system catalogs by hand, there
+   Normally one should not change the system catalogs by hand, there
     are always SQL commands to do that.  (For example, CREATE
     DATABASE inserts a row into the
     pg_database catalog -- and actually
@@ -185,7 +185,7 @@
    
     pg_aggregate stores information about
     aggregate functions.  An aggregate function is a function that
-   operates on a set of values (typically one column from each the row
+   operates on a set of values (typically one column from each row
     that matches a query condition) and returns a single value computed
     from all these values.  Typical aggregate functions are
     sum, count, and
@@ -233,7 +233,7 @@
        aggbasetype
        oid
        pg_type.oid
-      The type on which this function operates when invoked from SQL
+      The input datatype for this aggregate function
       
       
        aggtranstype
@@ -269,7 +269,7 @@
  
    
     An aggregate function is identified through name
-   and argument type.  Hence aggname and aggname
+   and argument type.  Hence aggname and aggbasetype
     are the composite primary key.
    
  
@@ -311,11 +311,8 @@
       
        adnum
        int2
-      
-      
-       The number of the column; see
-       pg_attribute.pg_attnum
-      
+      pg_attribute.attnum
+      The number of the column
       
  
       
@@ -390,20 +387,18 @@
       
  
       
-      attdispersion
-      float4
+      attstattarget
+      int4
        
        
-       attdispersion is the dispersion
-       statistic of the column (0.0 to 1.0), or zero if the statistic
-       has not been calculated, or -1.0 if VACUUM
-       found that the column contains no duplicate entries (in which
-       case the dispersion should be taken as
-       1.0/numberOfRows for the current table size).
-       The -1.0 hack is useful because the number of rows may be
-       updated more often than
-       attdispersion is. We assume that the
-       column will retain its no-duplicate-entry property.
+       attstattarget controls the level of detail
+       of statistics accumulated for this column by
+       ANALYZE.
+       A zero value indicates that no statistics should be collected.
+       The exact meaning of positive values is datatype-dependent.
+       For scalar datatypes, attstattarget
+       is both the target number of most common values
+       to collect, and the target number of histogram bins to create.
        
       
  
@@ -430,10 +425,12 @@
       
  
       
-      attnelems
+      attndims
        int4
        
-      Number of dimensions, if the column is an array
+      
+       Number of dimensions, if the column is an array; otherwise 0.
+      
       
  
       
@@ -610,18 +607,22 @@
        
        
         Size of the on-disk representation of this table in pages (size
-       BLCKSZ).  This is only an approximate value
-       which is calculated during vacuum.
+       BLCKSZ).
+       This is only an estimate used by the planner.
+       It is updated by VACUUM,
+       ANALYZE, and CREATE INDEX.
        
       
  
       
        reltuples
-      int4
+      float4
        
        
-       Number of tuples in the table.  This is only an estimate used
-       by the planner, updated by VACUUM.
+       Number of tuples in the table.
+       This is only an estimate used by the planner.
+       It is updated by VACUUM,
+       ANALYZE, and CREATE INDEX.
        
       
  
@@ -1671,6 +1672,130 @@
   
  
  
+ 
+  pg_statistic
+
+  
+   pg_statistic stores statistical data about
+   the contents of the database.  Entries are created by
+   ANALYZE and subsequently used by the query planner.
+   There is one entry for each table column that has been analyzed.
+   Note that all the statistical data is inherently approximate,
+   even assuming that it is up-to-date.
+  
+
+  
+   Since different kinds of statistics may be appropriate for different
+   kinds of data, pg_statistic is designed not
+   to assume very much about what sort of statistics it stores.  Only
+   extremely general statistics (such as NULL-ness) are given dedicated
+   columns in pg_statistic.  Everything else
+   is stored in "slots", which are groups of associated columns whose
+   content is identified by a code number in one of the slot's columns.
+   For more information see
+   src/include/catalog/pg_statistic.h.
+  
+
+  +   pg_statistic Columns
+
+   
+    
+     
+      Name
+      Type
+      References
+      Description
+     
+    
+
+    
+     
+      starelid
+      oid
+      pg_class.oid
+      The table that the described column belongs to
+     
+
+     
+      staattnum
+      int2
+      pg_attribute.attnum
+      The number of the described column
+     
+
+     
+      stanullfrac
+      float4
+      
+      The fraction of the column's entries that are NULL
+     
+
+     
+      stawidth
+      int4
+      
+      The average stored width, in bytes, of non-NULL entries
+     
+
+     
+      stadistinct
+      float4
+      
+      The number of distinct non-NULL data values in the column.
+      A value greater than zero is the actual number of distinct values.
+      A value less than zero is the negative of a fraction of the number
+      of rows in the table (for example, a column in which values appear about
+      twice on the average could be represented by stadistinct = -0.5).
+      A zero value means the number of distinct values is unknown.
+      
+     
+
+     
+      stakindN
+      int2
+      
+      A code number indicating the kind of statistics stored in the Nth
+      "slot" of the pg_statistic row.
+      
+     
+
+     
+      staopN
+      oid
+      pg_operator.oid
+      An operator used to derive the statistics stored in the
+      Nth "slot".  For example, a histogram slot would show the "<"
+      operator that defines the sort order of the data.
+      
+     
+
+     
+      stanumbersN
+      float4[]
+      
+      Numerical statistics of the appropriate kind for the Nth
+      "slot", or NULL if the slot kind does not involve numerical values.
+      
+     
+
+     
+      stavaluesN
+      text[]
+      
+      Column data values of the appropriate kind for the Nth
+      "slot", or NULL if the slot kind does not store any data values.
+      For datatype independence, all column data values are converted
+      to external textual form and stored as TEXT datums.
+      
+     
+    
+   
+  
+   pg_statistic Columns
+
+   
+    
+     
+      Name
+      Type
+      References
+      Description
+     
+    
+
+    
+     
+      starelid
+      oid
+      pg_class.oid
+      The table that the described column belongs to
+     
+
+     
+      staattnum
+      int2
+      pg_attribute.attnum
+      The number of the described column
+     
+
+     
+      stanullfrac
+      float4
+      
+      The fraction of the column's entries that are NULL
+     
+
+     
+      stawidth
+      int4
+      
+      The average stored width, in bytes, of non-NULL entries
+     
+
+     
+      stadistinct
+      float4
+      
+      The number of distinct non-NULL data values in the column.
+      A value greater than zero is the actual number of distinct values.
+      A value less than zero is the negative of a fraction of the number
+      of rows in the table (for example, a column in which values appear about
+      twice on the average could be represented by stadistinct = -0.5).
+      A zero value means the number of distinct values is unknown.
+      
+     
+
+     
+      stakindN
+      int2
+      
+      A code number indicating the kind of statistics stored in the Nth
+      "slot" of the pg_statistic row.
+      
+     
+
+     
+      staopN
+      oid
+      pg_operator.oid
+      An operator used to derive the statistics stored in the
+      Nth "slot".  For example, a histogram slot would show the "<"
+      operator that defines the sort order of the data.
+      
+     
+
+     
+      stanumbersN
+      float4[]
+      
+      Numerical statistics of the appropriate kind for the Nth
+      "slot", or NULL if the slot kind does not involve numerical values.
+      
+     
+
+     
+      stavaluesN
+      text[]
+      
+      Column data values of the appropriate kind for the Nth
+      "slot", or NULL if the slot kind does not store any data values.
+      For datatype independence, all column data values are converted
+      to external textual form and stored as TEXT datums.
+      
+     
+    
+   
+  
+
+ 
+
+
   
    pg_type
  
diff --git a/doc/src/sgml/indices.sgml b/doc/src/sgml/indices.sgml

index 32ecd9e6695ab7efd0fe9fa8f37b25d96b77ab4f..42cab244ab8a1af0c53b14f536f9337e58b81e28 100644 (file)
--- a/doc/src/sgml/indices.sgml
+++ b/doc/src/sgml/indices.sgml
@@ -1,4 +1,4 @@
-
+
  
  
   Indices
@@ -71,7 +71,7 @@ CREATE INDEX test1_id_index ON test1 (id);
     Once the index is created, no further intervention is required: the
     system will use the index when it thinks it would be more efficient
     than a sequential table scan.  But you may have to run the
-   VACUUM ANALYZE command regularly to update
+   ANALYZE command regularly to update
     statistics to allow the query planner to make educated decisions.
     Also read  for information about
     how to find out whether an index is used and when and why the
diff --git a/doc/src/sgml/ref/allfiles.sgml b/doc/src/sgml/ref/allfiles.sgml

index 0088896131e1e5217568528f2bc319a0f03e41ad..dea65e98f2edfccd58e15ccd91593206013a5ad7 100644 (file)
--- a/doc/src/sgml/ref/allfiles.sgml
+++ b/doc/src/sgml/ref/allfiles.sgml
@@ -1,5 +1,5 @@
  
@@ -40,6 +40,7 @@ Complete list of usable sgml source files in this directory.
  
  
  
+
  
  
  
diff --git a/doc/src/sgml/ref/alter_table.sgml b/doc/src/sgml/ref/alter_table.sgml

index 4c258c816506f3448b6d987104c2251ce616b5a4..21fc8c2ebdbe724e3b68c0c272ea4a18d58ee323 100644 (file)
--- a/doc/src/sgml/ref/alter_table.sgml
+++ b/doc/src/sgml/ref/alter_table.sgml
@@ -1,5 +1,5 @@
  
  
@@ -29,7 +29,9 @@ ALTER TABLE [ ONLY ] table [ * ]
  ALTER TABLE [ ONLY ] table [ * ]
      ALTER [ COLUMN ] column { SET DEFAULT 
     class="PARAMETER">value | DROP DEFAULT }
-ALTER TABLE table [ * ]
+ALTER TABLE [ ONLY ] table [ * ]
+    ALTER [ COLUMN ] column SET STATISTICS integer
+ALTER TABLE [ ONLY ] table [ * ]
     RENAME [ COLUMN ] column TO 
     class="PARAMETER">newcolumn
 ALTER TABLE table
@@ -159,9 +161,14 @@ ALTER TABLE table
    ALTER TABLE changes the definition of an existing table.
    The ADD COLUMN form adds a new column to the table
    using the same syntax as 
-   endterm="SQL-CREATETABLE-title">. The ALTER COLUMN form
-   allows you to set or remove the default for the column. Note that defaults
-   only apply to newly inserted rows.
+   endterm="SQL-CREATETABLE-title">.
+   The ALTER COLUMN SET/DROP DEFAULT forms
+   allow you to set or remove the default for the column. Note that defaults
+   only apply to subsequent INSERT commands; they do not
+   cause rows already in the table to change.
+   The ALTER COLUMN SET STATISTICS form allows you to
+   set the statistics-gathering target for subsequent
+    operations.
    The RENAME clause causes the name of a table or column
    to change without changing any of the data contained in
    the affected table. Thus, the table or column will
@@ -170,7 +177,7 @@ ALTER TABLE table
    The ADD table constraint definition clause 
    adds a new constraint to the table using the same syntax as 
    linkend="SQL-CREATETABLE" endterm="SQL-CREATETABLE-title">. 
-   The OWNER clause chnages the owner of the table to the user 
+   The OWNER clause changes the owner of the table to the user 
    new user.
   
 
@@ -190,10 +197,11 @@ ALTER TABLE table
    
 
    
-    In the current implementation, default and constraint clauses for the
+    In the current implementation of ADD COLUMN,
+    default and constraint clauses for the
     new column will be ignored. You can use the SET DEFAULT
     form of ALTER TABLE to set the default later.
-    (You will also have to update the already existing rows to the
+    (You may also want to update the already existing rows to the
     new default value, using 
     endterm="sql-update-title">.)
    
@@ -210,7 +218,7 @@ ALTER TABLE table
 
    
     You must own the table in order to change it.
-    Renaming any  part  of  the schema of a system
+    Changing any  part  of  the schema of a system
     catalog is not permitted.
     The PostgreSQL User's Guide has further
     information on inheritance.


diff --git a/doc/src/sgml/ref/analyze.sgml b/doc/src/sgml/ref/analyze.sgml

new file mode 100644 (file)

index 0000000..57d3213


--- /dev/null
+++ b/doc/src/sgml/ref/analyze.sgml
@@ -0,0 +1,219 @@
+
+
+
+ 
+  
+   ANALYZE
+  
+  SQL - Language Statements
+ 
+ 
+  
+   ANALYZE
+  
+  
+   Collect statistics about a Postgres database
+  
+ 
+ 
+  
+   2001-05-04
+  
+  
+ANALYZE [ VERBOSE ] [ table [ (column [, ...] ) ] ]
+  
+
+  
+   
+    2001-05-04
+   
+   </div>
<div class="diff add">+    Inputs</div>
<div class="diff add">+   
+
+   
+    
+     
+      VERBOSE
+      
+       
+   Enables display of progress messages.
+       
+      
+     
+     
+      table
+      
+       
+   The name of a specific table to analyze. Defaults to all tables.
+       
+      
+     
+     
+      column
+      
+       
+   The name of a specific column to analyze. Defaults to all columns.
+       
+      
+     
+    
+   
+  
+
+  
+   
+    2001-05-04
+   
+   </div>
<div class="diff add">+    Outputs</div>
<div class="diff add">+   
+   
+
+    
+     
+      
+ANALYZE
+       
+      
+       
+   The command is complete.
+       
+      
+     
+
+    
+   
+  
+ 
+
+ 
+  
+   2001-05-04
+  
+  </div>
<div class="diff add">+   Description</div>
<div class="diff add">+  
+  
+   ANALYZE collects statistics about the contents of
+   Postgres tables, and stores the results in
+   the system table pg_statistic.  Subsequently,
+   the query planner uses the statistics to help determine the most efficient
+   execution plans for queries.
+  
+
+  
+   With no parameter, ANALYZE examines every table in the
+   current database.  With a parameter, ANALYZE examines
+   only that table.  It is further possible to give a list of column names,
+   in which case only the statistics for those columns are updated.
+  
+
+  
+   
+    2001-05-04
+   
+   </div>
<div class="diff add">+    Notes</div>
<div class="diff add">+   
+
+  
+   It is a good idea to run ANALYZE periodically, or
+   just after making major changes in the contents of a table.  Accurate
+   statistics will help the planner to choose the most appropriate query
+   plan, and thereby improve the speed of query processing.  A common
+   strategy is to run VACUUM and ANALYZE
+   once a day during a low-usage time of day.
+  
+
+  
+   Unlike ,
+   ANALYZE requires
+   only a read lock on the target table, so it can run in parallel with
+   other activity on the table.
+  
+
+  
+   For large tables, ANALYZE takes a random sample of the
+   table contents, rather than examining every row.  This allows even very
+   large tables to be analyzed in a small amount of time.  Note however
+   that the statistics are only approximate, and will change slightly each
+   time ANALYZE is run, even if the actual table contents
+   did not change.  This may result in small changes in the planner's
+   estimated costs shown by EXPLAIN.
+  
+
+  
+   The collected statistics usually include a list of some of the most common
+   values in each column and a histogram showing the approximate data
+   distribution in each column.  One or both of these may be omitted if
+   ANALYZE deems them uninteresting (for example, in
+   a unique-key column, there are no common values) or if the column
+   datatype does not support the appropriate operators.
+  
+
+  
+   The extent of analysis can be controlled by adjusting the per-column
+   statistics target with ALTER TABLE ALTER COLUMN SET
+   STATISTICS (see
+   ).  The
+   target value sets the maximum number of entries in the most-common-value
+   list and the maximum number of bins in the histogram.  The default
+   target value is 10, but this can be adjusted up or down to trade off
+   accuracy of planner estimates against the time taken for
+   ANALYZE and the
+   amount of space occupied in pg_statistic.
+   In particular, setting the statistics target to zero disables collection of
+   statistics for that column.  It may be useful to do that for columns that
+   are never used as part of the WHERE, GROUP BY, or ORDER BY clauses of
+   queries, since the planner will have no use for statistics on such columns.
+  
+
+  
+   The largest statistics target among the columns being analyzed determines
+   the number of table rows sampled to prepare the statistics.  Increasing
+   the target causes a proportional increase in the time and space needed
+   to do ANALYZE.
+  
+
+  
+ 
+
+ 
+  </div>
<div class="diff add">+   Compatibility</div>
<div class="diff add">+  
+
+  
+   
+    2001-05-04
+   
+   </div>
<div class="diff add">+    SQL92</div>
<div class="diff add">+   
+   
+    There is no ANALYZE statement in SQL92.
+   
+  
+ 
+
+
+


diff --git a/doc/src/sgml/ref/vacuum.sgml b/doc/src/sgml/ref/vacuum.sgml

index 51cb8a9ffdacfe41115a94d41b11e97fa1e6b6f9..cbb182466ea44d231b4271f54f2c14da9534307b 100644 (file)


--- a/doc/src/sgml/ref/vacuum.sgml
+++ b/doc/src/sgml/ref/vacuum.sgml
@@ -1,5 +1,5 @@
 
 
@@ -15,15 +15,15 @@ Postgres documentation
    VACUUM
   
   
-   Clean and analyze a Postgres database
+   Clean and optionally analyze a Postgres database
   
  
  
   
-   1999-07-20
+   2001-05-04
   
   
-VACUUM [ VERBOSE ] [ ANALYZE ] [ table ]
+VACUUM [ VERBOSE ] [ table ]
 VACUUM [ VERBOSE ] ANALYZE [ table [ (column [, ...] ) ] ]
   
 
@@ -49,7 +49,7 @@ VACUUM [ VERBOSE ] ANALYZE [ table
       ANALYZE
       
        
-   Updates column statistics used by the optimizer to
+   Updates statistics used by the optimizer to
    determine the most efficient way to execute a query.
        
       
@@ -90,7 +90,7 @@ VACUUM [ VERBOSE ] ANALYZE [ table
        
       
        
-   The command has been accepted and the database is being cleaned.
+   The command is complete.
        
       
      
@@ -144,28 +144,26 @@ NOTICE:  Index index: Pages 28;
    Description
   
   
-   VACUUM serves two purposes in 
-   Postgres as both a means to reclaim storage and
-   also a means to collect information for the optimizer.
+   VACUUM reclaims storage occupied by deleted tuples.
+   In normal Postgres operation, tuples that
+   are DELETEd or obsoleted by UPDATE are not physically removed from
+   their table; they remain present until a VACUUM is
+   done.  Therefore it's necessary to do VACUUM
+   periodically, especially on frequently-updated tables.
   
 
   
-   VACUUM opens every table in the database,
-   cleans out records from rolled back transactions, and updates statistics in the
-   system catalogs.  The statistics maintained include the number of
-   tuples and number of pages stored in all tables.
-  
-
-
-  
-   VACUUM ANALYZE collects statistics representing the
-   dispersion of the data in each column.
-   This information is valuable when several query execution paths are possible.
+   With no parameter, VACUUM processes every table in the
+   current database.  With a parameter, VACUUM processes
+   only that table.
   
 
   
-   Running VACUUM
-   periodically will increase the speed of the database in processing user queries.
+   VACUUM ANALYZE performs a VACUUM
+   and then an ANALYZE for each selected table.  This
+   is a handy combination form for routine maintenance scripts.  See
+   
+   for more details about its processing.
   
 
   
@@ -175,16 +173,15 @@ NOTICE:  Index index: Pages 28;
    </div>
<div class="diff ctx">     Notes</div>
<div class="diff ctx">    
-   
-    The open database is the target for VACUUM.
-   
+
    
     We recommend that active production databases be
     VACUUM-ed nightly, in order to remove
     expired rows. After copying a large table into
     Postgres or after deleting a large number
     of records, it may be a good idea to issue a VACUUM
-    ANALYZE query. This will update the system catalogs with
+    ANALYZE command for the affected table. This will update the
+    system catalogs with
     the results of all recent changes, and allow the
     Postgres query optimizer to make better
     choices in planning user queries.


diff --git a/doc/src/sgml/reference.sgml b/doc/src/sgml/reference.sgml

index b92ee0868d029cf48443f4240fab5224bc958862..9a977a6515c97db601f13f5f43413bc3e81a46c8 100644 (file)


--- a/doc/src/sgml/reference.sgml
+++ b/doc/src/sgml/reference.sgml
@@ -1,5 +1,5 @@
 
@@ -26,6 +26,7 @@ PostgreSQL Reference Manual
    &alterGroup;
    &alterTable;
    &alterUser;
+   &analyze;
    &begin;
    &checkpoint;
    &close;


diff --git a/doc/src/sgml/xoper.sgml b/doc/src/sgml/xoper.sgml

index d38e78a4e1af22651531a65d320f427ea71b175b..57d8bb79c28d69da43ce1897f0dacb4f3dd1a56b 100644 (file)


--- a/doc/src/sgml/xoper.sgml
+++ b/doc/src/sgml/xoper.sgml
@@ -1,5 +1,5 @@
 
 
  
@@ -244,7 +244,7 @@ SELECT (a + b) AS c FROM test_complex;
     only a small fraction.  '<' will accept a fraction that depends on
     where the given constant falls in the range of values for that table
     column (which, it just so happens, is information collected by
-    VACUUM ANALYZE and made available to the selectivity estimator).
+    ANALYZE and made available to the selectivity estimator).
     '<=' will accept a slightly larger fraction than '<' for the same
     comparison constant, but they're close enough to not be worth
     distinguishing, especially since we're not likely to do better than a


diff --git a/src/backend/access/common/tupdesc.c b/src/backend/access/common/tupdesc.c

index 769f754b6690919606bdaaf8a016260382abdef8..86d704e8d08779e32b38e3d4d4f938072adeccf7 100644 (file)


--- a/src/backend/access/common/tupdesc.c
+++ b/src/backend/access/common/tupdesc.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/access/common/tupdesc.c,v 1.73 2001/03/22 06:16:06 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/access/common/tupdesc.c,v 1.74 2001/05/07 00:43:15 tgl Exp $
  *
  * NOTES
  *   some of the executor utility code such as "ExecTypeFromTL" should be
@@ -237,16 +237,16 @@ equalTupleDescs(TupleDesc tupdesc1, TupleDesc tupdesc2)
        Form_pg_attribute attr2 = tupdesc2->attrs[i];
 
        /*
-        * We do not need to check every single field here, and in fact
-        * some fields such as attdispersion probably shouldn't be
-        * compared.  We can also disregard attnum (it was used to place
-        * the row in the attrs array) and everything derived from the
-        * column datatype.
+        * We do not need to check every single field here: we can disregard
+        * attrelid, attnum (it was used to place the row in the attrs array)
+        * and everything derived from the column datatype.
         */
        if (strcmp(NameStr(attr1->attname), NameStr(attr2->attname)) != 0)
            return false;
        if (attr1->atttypid != attr2->atttypid)
            return false;
+       if (attr1->attstattarget != attr2->attstattarget)
+           return false;
        if (attr1->atttypmod != attr2->atttypmod)
            return false;
        if (attr1->attstorage != attr2->attstorage)
@@ -365,12 +365,12 @@ TupleDescInitEntry(TupleDesc desc,
    else
        MemSet(NameStr(att->attname), 0, NAMEDATALEN);
 
-   att->attdispersion = 0;     /* dummy value */
+   att->attstattarget = 0;
    att->attcacheoff = -1;
    att->atttypmod = typmod;
 
    att->attnum = attributeNumber;
-   att->attnelems = attdim;
+   att->attndims = attdim;
    att->attisset = attisset;
 
    att->attnotnull = false;
@@ -506,7 +506,7 @@ TupleDescMakeSelfReference(TupleDesc desc,
    att->attbyval = true;
    att->attalign = 'i';
    att->attstorage = 'p';
-   att->attnelems = 0;
+   att->attndims = 0;
 }
 
 /* ----------------------------------------------------------------


diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c

index 1c5577b88a066a1ecebfd6ce317147efc28d489c..06010896821e5caa9627c17f6328239ec3c277b6 100644 (file)


--- a/src/backend/access/gist/gist.c
+++ b/src/backend/access/gist/gist.c
@@ -6,7 +6,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/access/gist/gist.c,v 1.72 2001/03/22 03:59:12 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/access/gist/gist.c,v 1.73 2001/05/07 00:43:15 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -84,8 +84,8 @@ static void gist_dumptree(Relation r, int level, BlockNumber blk, OffsetNumber c
 #endif
 
 /*
-** routine to build an index.  Basically calls insert over and over
-*/
+ * routine to build an index.  Basically calls insert over and over
+ */
 Datum
 gistbuild(PG_FUNCTION_ARGS)
 {
@@ -105,7 +105,7 @@ gistbuild(PG_FUNCTION_ARGS)
                itupdesc;
    Datum       attdata[INDEX_MAX_KEYS];
    char        nulls[INDEX_MAX_KEYS];
-   int         nhtups,
+   double      nhtups,
                nitups;
    Node       *pred = indexInfo->ii_Predicate;
 
@@ -172,7 +172,7 @@ gistbuild(PG_FUNCTION_ARGS)
 #endif  /* OMIT_PARTIAL_INDEX */
 
    /* build the index */
-   nhtups = nitups = 0;
+   nhtups = nitups = 0.0;
 
    compvec = (bool *) palloc(sizeof(bool) * indexInfo->ii_NumIndexAttrs);
 
@@ -183,7 +183,7 @@ gistbuild(PG_FUNCTION_ARGS)
    {
        MemoryContextReset(econtext->ecxt_per_tuple_memory);
 
-       nhtups++;
+       nhtups += 1.0;
 
 #ifndef OMIT_PARTIAL_INDEX
 
@@ -196,7 +196,7 @@ gistbuild(PG_FUNCTION_ARGS)
            slot->val = htup;
            if (ExecQual((List *) oldPred, econtext, false))
            {
-               nitups++;
+               nitups += 1.0;
                continue;
            }
        }
@@ -213,7 +213,7 @@ gistbuild(PG_FUNCTION_ARGS)
        }
 #endif  /* OMIT_PARTIAL_INDEX */
 
-       nitups++;
+       nitups += 1.0;
 
        /*
         * For the current heap tuple, extract all the attributes we use


diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c

index aa76ba232a05c21da94012fbefbc287924aa154f..9617fcc33a6a0bb5bf4556944cc433be26ad0331 100644 (file)


--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.50 2001/03/22 03:59:12 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.51 2001/05/07 00:43:15 tgl Exp $
  *
  * NOTES
  *   This file contains only the public interface routines.
@@ -57,7 +57,7 @@ hashbuild(PG_FUNCTION_ARGS)
                itupdesc;
    Datum       attdata[INDEX_MAX_KEYS];
    char        nulls[INDEX_MAX_KEYS];
-   int         nhtups,
+   double      nhtups,
                nitups;
    HashItem    hitem;
    Node       *pred = indexInfo->ii_Predicate;
@@ -109,7 +109,7 @@ hashbuild(PG_FUNCTION_ARGS)
 #endif  /* OMIT_PARTIAL_INDEX */
 
    /* build the index */
-   nhtups = nitups = 0;
+   nhtups = nitups = 0.0;
 
    /* start a heap scan */
    hscan = heap_beginscan(heap, 0, SnapshotNow, 0, (ScanKey) NULL);
@@ -118,7 +118,7 @@ hashbuild(PG_FUNCTION_ARGS)
    {
        MemoryContextReset(econtext->ecxt_per_tuple_memory);
 
-       nhtups++;
+       nhtups += 1.0;
 
 #ifndef OMIT_PARTIAL_INDEX
 
@@ -131,7 +131,7 @@ hashbuild(PG_FUNCTION_ARGS)
            slot->val = htup;
            if (ExecQual((List *) oldPred, econtext, false))
            {
-               nitups++;
+               nitups += 1.0;
                continue;
            }
        }
@@ -148,7 +148,7 @@ hashbuild(PG_FUNCTION_ARGS)
        }
 #endif  /* OMIT_PARTIAL_INDEX */
 
-       nitups++;
+       nitups += 1.0;
 
        /*
         * For the current heap tuple, extract all the attributes we use


diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c

index fb509ab66de99d90fcdab322dd36af40551316d1..2a9df577b10c56de723c68ae329e47847849fb71 100644 (file)


--- a/src/backend/access/heap/tuptoaster.c
+++ b/src/backend/access/heap/tuptoaster.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/access/heap/tuptoaster.c,v 1.21 2001/03/25 00:45:20 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/access/heap/tuptoaster.c,v 1.22 2001/05/07 00:43:15 tgl Exp $
  *
  *
  * INTERFACE ROUTINES
@@ -166,6 +166,43 @@ heap_tuple_untoast_attr(varattrib *attr)
 }
 
 
+/* ----------
+ * toast_raw_datum_size -
+ *
+ * Return the raw (detoasted) size of a varlena datum
+ * ----------
+ */
+Size
+toast_raw_datum_size(Datum value)
+{
+   varattrib  *attr = (varattrib *) DatumGetPointer(value);
+   Size        result;
+
+   if (VARATT_IS_COMPRESSED(attr))
+   {
+       /*
+        * va_rawsize shows the original data size, whether the datum
+        * is external or not.
+        */
+       result = attr->va_content.va_compressed.va_rawsize + VARHDRSZ;
+   }
+   else if (VARATT_IS_EXTERNAL(attr))
+   {
+       /*
+        * an uncompressed external attribute has rawsize including the
+        * header (not too consistent!)
+        */
+       result = attr->va_content.va_external.va_rawsize;
+   }
+   else
+   {
+       /* plain untoasted datum */
+       result = VARSIZE(attr);
+   }
+   return result;
+}
+
+
 /* ----------
  * toast_delete -
  *


diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c

index 97d99da4fde7bbbfe009c7c7baf04dc557390cd9..f456e0c9306f4f3c191d75172463bf852e905041 100644 (file)


--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -12,7 +12,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.79 2001/03/22 03:59:15 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.80 2001/05/07 00:43:16 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -69,7 +69,7 @@ btbuild(PG_FUNCTION_ARGS)
                itupdesc;
    Datum       attdata[INDEX_MAX_KEYS];
    char        nulls[INDEX_MAX_KEYS];
-   int         nhtups,
+   double      nhtups,
                nitups;
    Node       *pred = indexInfo->ii_Predicate;
 
@@ -156,7 +156,7 @@ btbuild(PG_FUNCTION_ARGS)
 #endif  /* OMIT_PARTIAL_INDEX */
 
    /* build the index */
-   nhtups = nitups = 0;
+   nhtups = nitups = 0.0;
 
    if (usefast)
    {
@@ -196,7 +196,7 @@ btbuild(PG_FUNCTION_ARGS)
 
        MemoryContextReset(econtext->ecxt_per_tuple_memory);
 
-       nhtups++;
+       nhtups += 1.0;
 
 #ifndef OMIT_PARTIAL_INDEX
 
@@ -209,7 +209,7 @@ btbuild(PG_FUNCTION_ARGS)
            slot->val = htup;
            if (ExecQual((List *) oldPred, econtext, false))
            {
-               nitups++;
+               nitups += 1.0;
                continue;
            }
        }
@@ -226,7 +226,7 @@ btbuild(PG_FUNCTION_ARGS)
        }
 #endif  /* OMIT_PARTIAL_INDEX */
 
-       nitups++;
+       nitups += 1.0;
 
        /*
         * For the current heap tuple, extract all the attributes we use


diff --git a/src/backend/access/rtree/rtree.c b/src/backend/access/rtree/rtree.c

index 3752a59e99a3259dcef8feb7660927baf8308a4a..a8c6a13ea3c14626245bad59e372b66b0d5c25e2 100644 (file)


--- a/src/backend/access/rtree/rtree.c
+++ b/src/backend/access/rtree/rtree.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.61 2001/03/22 03:59:16 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.62 2001/05/07 00:43:16 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -100,7 +100,7 @@ rtbuild(PG_FUNCTION_ARGS)
                itupdesc;
    Datum       attdata[INDEX_MAX_KEYS];
    char        nulls[INDEX_MAX_KEYS];
-   int         nhtups,
+   double      nhtups,
                nitups;
    Node       *pred = indexInfo->ii_Predicate;
 
@@ -163,7 +163,7 @@ rtbuild(PG_FUNCTION_ARGS)
 #endif  /* OMIT_PARTIAL_INDEX */
 
    /* count the tuples as we insert them */
-   nhtups = nitups = 0;
+   nhtups = nitups = 0.0;
 
    /* start a heap scan */
    hscan = heap_beginscan(heap, 0, SnapshotNow, 0, (ScanKey) NULL);
@@ -172,7 +172,7 @@ rtbuild(PG_FUNCTION_ARGS)
    {
        MemoryContextReset(econtext->ecxt_per_tuple_memory);
 
-       nhtups++;
+       nhtups += 1.0;
 
 #ifndef OMIT_PARTIAL_INDEX
 
@@ -185,7 +185,7 @@ rtbuild(PG_FUNCTION_ARGS)
            slot->val = htup;
            if (ExecQual((List *) oldPred, econtext, false))
            {
-               nitups++;
+               nitups += 1.0;
                continue;
            }
        }
@@ -202,7 +202,7 @@ rtbuild(PG_FUNCTION_ARGS)
        }
 #endif  /* OMIT_PARTIAL_INDEX */
 
-       nitups++;
+       nitups += 1.0;
 
        /*
         * For the current heap tuple, extract all the attributes we use


diff --git a/src/backend/catalog/genbki.sh b/src/backend/catalog/genbki.sh

index c2993fa8fc6b474bc13badd0c4369ca56fdbb9d4..cac53f3e0853262c213239e698170311a6ee8e1c 100644 (file)


--- a/src/backend/catalog/genbki.sh
+++ b/src/backend/catalog/genbki.sh
@@ -10,7 +10,7 @@
 #
 #
 # IDENTIFICATION
-#    $Header: /cvsroot/pgsql/src/backend/catalog/Attic/genbki.sh,v 1.19 2001/01/16 22:48:34 tgl Exp $
+#    $Header: /cvsroot/pgsql/src/backend/catalog/Attic/genbki.sh,v 1.20 2001/05/07 00:43:16 tgl Exp $
 #
 # NOTES
 #    non-essential whitespace is removed from the generated file.
@@ -126,10 +126,12 @@ for dir in $INCLUDE_DIRS; do
     fi
 done
 
-# Get INDEX_MAX_KEYS from config.h (who needs consistency?)
+# Get INDEX_MAX_KEYS and DEFAULT_ATTSTATTARGET from config.h
+# (who needs consistency?)
 for dir in $INCLUDE_DIRS; do
     if [ -f "$dir/config.h" ]; then
         INDEXMAXKEYS=`grep '#define[   ]*INDEX_MAX_KEYS' $dir/config.h | $AWK '{ print $3 }'`
+        DEFAULTATTSTATTARGET=`grep '#define[   ]*DEFAULT_ATTSTATTARGET' $dir/config.h | $AWK '{ print $3 }'`
         break
     fi
 done
@@ -168,6 +170,7 @@ sed -e "s/;[    ]*$//g" \
     -e "s/(NameData/(name/g" \
     -e "s/(Oid/(oid/g" \
     -e "s/NAMEDATALEN/$NAMEDATALEN/g" \
+    -e "s/DEFAULT_ATTSTATTARGET/$DEFAULTATTSTATTARGET/g" \
     -e "s/INDEX_MAX_KEYS\*2/$INDEXMAXKEYS2/g" \
     -e "s/INDEX_MAX_KEYS\*4/$INDEXMAXKEYS4/g" \
     -e "s/INDEX_MAX_KEYS/$INDEXMAXKEYS/g" \


diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c

index 54867d51a4b631241e649453750b03ee0c1aeef4..03f16e11c3f3710b2589d8e7330bfd0a2bb386b8 100644 (file)


--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/catalog/heap.c,v 1.162 2001/03/22 06:16:10 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/catalog/heap.c,v 1.163 2001/05/07 00:43:17 tgl Exp $
  *
  *
  * INTERFACE ROUTINES
@@ -96,54 +96,72 @@ static void RemoveStatistics(Relation rel);
 
 /*
  * Note:
- *     Should the executor special case these attributes in the future?
- *     Advantage:  consume 1/2 the space in the ATTRIBUTE relation.
- *     Disadvantage:  having rules to compute values in these tuples may
- *             be more difficult if not impossible.
+ *     Should the system special case these attributes in the future?
+ *     Advantage:  consume much less space in the ATTRIBUTE relation.
+ *     Disadvantage:  special cases will be all over the place.
  */
 
 static FormData_pg_attribute a1 = {
-   0xffffffff, {"ctid"}, TIDOID, 0, sizeof(ItemPointerData),
-   SelfItemPointerAttributeNumber, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0'
+   0, {"ctid"}, TIDOID, 0, sizeof(ItemPointerData),
+   SelfItemPointerAttributeNumber, 0, -1, -1,
+   false, 'p', false, 'i', false, false
 };
 
 static FormData_pg_attribute a2 = {
-   0xffffffff, {"oid"}, OIDOID, 0, sizeof(Oid),
-   ObjectIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+   0, {"oid"}, OIDOID, 0, sizeof(Oid),
+   ObjectIdAttributeNumber, 0, -1, -1,
+   true, 'p', false, 'i', false, false
 };
 
 static FormData_pg_attribute a3 = {
-   0xffffffff, {"xmin"}, XIDOID, 0, sizeof(TransactionId),
-   MinTransactionIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+   0, {"xmin"}, XIDOID, 0, sizeof(TransactionId),
+   MinTransactionIdAttributeNumber, 0, -1, -1,
+   true, 'p', false, 'i', false, false
 };
 
 static FormData_pg_attribute a4 = {
-   0xffffffff, {"cmin"}, CIDOID, 0, sizeof(CommandId),
-   MinCommandIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+   0, {"cmin"}, CIDOID, 0, sizeof(CommandId),
+   MinCommandIdAttributeNumber, 0, -1, -1,
+   true, 'p', false, 'i', false, false
 };
 
 static FormData_pg_attribute a5 = {
-   0xffffffff, {"xmax"}, XIDOID, 0, sizeof(TransactionId),
-   MaxTransactionIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+   0, {"xmax"}, XIDOID, 0, sizeof(TransactionId),
+   MaxTransactionIdAttributeNumber, 0, -1, -1,
+   true, 'p', false, 'i', false, false
 };
 
 static FormData_pg_attribute a6 = {
-   0xffffffff, {"cmax"}, CIDOID, 0, sizeof(CommandId),
-   MaxCommandIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+   0, {"cmax"}, CIDOID, 0, sizeof(CommandId),
+   MaxCommandIdAttributeNumber, 0, -1, -1,
+   true, 'p', false, 'i', false, false
 };
 
 /*
-   We decide to call this attribute "tableoid" rather than say
-"classoid" on the basis that in the future there may be more than one
-table of a particular class/type. In any case table is still the word
-used in SQL.
-*/
+ * We decided to call this attribute "tableoid" rather than say
+ * "classoid" on the basis that in the future there may be more than one
+ * table of a particular class/type. In any case table is still the word
+ * used in SQL.
+ */
 static FormData_pg_attribute a7 = {
-   0xffffffff, {"tableoid"}, OIDOID, 0, sizeof(Oid),
-   TableOidAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+   0, {"tableoid"}, OIDOID, 0, sizeof(Oid),
+   TableOidAttributeNumber, 0, -1, -1,
+   true, 'p', false, 'i', false, false
 };
 
-static Form_pg_attribute HeapAtt[] = {&a1, &a2, &a3, &a4, &a5, &a6, &a7};
+static Form_pg_attribute SysAtt[] = {&a1, &a2, &a3, &a4, &a5, &a6, &a7};
+
+/*
+ * This function returns a Form_pg_attribute pointer for a system attribute.
+ */
+Form_pg_attribute
+SystemAttributeDefinition(AttrNumber attno)
+{
+   if (attno >= 0 || attno < - (int) lengthof(SysAtt))
+       elog(ERROR, "SystemAttributeDefinition: invalid attribute number %d",
+            attno);
+   return SysAtt[-attno - 1];
+}
 
 /* ----------------------------------------------------------------
  *             XXX END OF UGLY HARD CODED BADNESS XXX
@@ -380,32 +398,6 @@ heap_storage_create(Relation rel)
  *     8) the relations are closed and the new relation's oid
  *        is returned.
  *
- * old comments:
- *     A new relation is inserted into the RELATION relation
- *     with the specified attribute(s) (newly inserted into
- *     the ATTRIBUTE relation).  How does concurrency control
- *     work?  Is it automatic now?  Expects the caller to have
- *     attname, atttypid, atttyparg, attproc, and attlen domains filled.
- *     Create fills the attnum domains sequentually from zero,
- *     fills the attdispersion domains with zeros, and fills the
- *     attrelid fields with the relid.
- *
- *     scan relation catalog for name conflict
- *     scan type catalog for typids (if not arg)
- *     create and insert attribute(s) into attribute catalog
- *     create new relation
- *     insert new relation into attribute catalog
- *
- *     Should coordinate with heap_create_with_catalog(). Either
- *     it should not be called or there should be a way to prevent
- *     the relation from being removed at the end of the
- *     transaction if it is successful ('u'/'r' may be enough).
- *     Also, if the transaction does not commit, then the
- *     relation should be removed.
- *
- *     XXX amcreate ignores "off" when inserting (for now).
- *     XXX amcreate (like the other utilities) needs to understand indexes.
- *
  * ----------------------------------------------------------------
  */
 
@@ -432,14 +424,14 @@ CheckAttributeNames(TupleDesc tupdesc)
     */
    for (i = 0; i < natts; i++)
    {
-       for (j = 0; j < (int) (sizeof(HeapAtt) / sizeof(HeapAtt[0])); j++)
+       for (j = 0; j < (int) lengthof(SysAtt); j++)
        {
-           if (strcmp(NameStr(HeapAtt[j]->attname),
+           if (strcmp(NameStr(SysAtt[j]->attname),
                       NameStr(tupdesc->attrs[i]->attname)) == 0)
            {
                elog(ERROR, "Attribute '%s' has a name conflict"
                     "\n\tName matches an existing system attribute",
-                    NameStr(HeapAtt[j]->attname));
+                    NameStr(SysAtt[j]->attname));
            }
        }
        if (tupdesc->attrs[i]->atttypid == UNKNOWNOID)
@@ -574,7 +566,7 @@ AddNewAttributeTuples(Oid new_rel_oid,
        /* Fill in the correct relation OID */
        (*dpp)->attrelid = new_rel_oid;
        /* Make sure these are OK, too */
-       (*dpp)->attdispersion = 0;
+       (*dpp)->attstattarget = DEFAULT_ATTSTATTARGET;
        (*dpp)->attcacheoff = -1;
 
        tup = heap_addheader(Natts_pg_attribute,
@@ -593,14 +585,14 @@ AddNewAttributeTuples(Oid new_rel_oid,
    /*
     * next we add the system attributes..
     */
-   dpp = HeapAtt;
+   dpp = SysAtt;
    for (i = 0; i < -1 - FirstLowInvalidHeapAttributeNumber; i++)
    {
        /* Fill in the correct relation OID */
        /* HACK: we are writing on static data here */
        (*dpp)->attrelid = new_rel_oid;
        /* Unneeded since they should be OK in the constant data anyway */
-       /* (*dpp)->attdispersion = 0; */
+       /* (*dpp)->attstattarget = 0; */
        /* (*dpp)->attcacheoff = -1; */
 
        tup = heap_addheader(Natts_pg_attribute,
@@ -669,8 +661,23 @@ AddNewRelationTuple(Relation pg_class_desc,
     * save. (NOTE: CREATE INDEX inserts the same bogus estimates if it
     * finds the relation has 0 rows and pages. See index.c.)
     */
-   new_rel_reltup->relpages = 10;      /* bogus estimates */
-   new_rel_reltup->reltuples = 1000;
+   switch (relkind)
+   {
+       case RELKIND_RELATION:
+       case RELKIND_INDEX:
+       case RELKIND_TOASTVALUE:
+           new_rel_reltup->relpages = 10;  /* bogus estimates */
+           new_rel_reltup->reltuples = 1000;
+           break;
+       case RELKIND_SEQUENCE:
+           new_rel_reltup->relpages = 1;
+           new_rel_reltup->reltuples = 1;
+           break;
+       default:                /* views, etc */
+           new_rel_reltup->relpages = 0;
+           new_rel_reltup->reltuples = 0;
+           break;
+   }
 
    new_rel_reltup->relowner = GetUserId();
    new_rel_reltup->reltype = new_type_oid;


diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c

index 2adb30e1ed8ecf91d12c0028495b8911ece7068d..5eefab114891fdc1b2bbcc7b407d6c96ac3c75ca 100644 (file)


--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/catalog/index.c,v 1.145 2001/04/02 14:34:25 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/catalog/index.c,v 1.146 2001/05/07 00:43:17 tgl Exp $
  *
  *
  * INTERFACE ROUTINES
@@ -55,7 +55,7 @@
  */
 #define AVG_ATTR_SIZE 8
 #define NTUPLES_PER_PAGE(natts) \
-   ((BLCKSZ - MAXALIGN(sizeof (PageHeaderData))) / \
+   ((BLCKSZ - MAXALIGN(sizeof(PageHeaderData))) / \
    ((natts) * AVG_ATTR_SIZE + MAXALIGN(sizeof(HeapTupleHeaderData))))
 
 /* non-export function prototypes */
@@ -98,39 +98,6 @@ IsReindexProcessing(void)
    return reindexing;
 }
 
-/* ----------------------------------------------------------------
- *   sysatts is a structure containing attribute tuple forms
- *   for system attributes (numbered -1, -2, ...).  This really
- *   should be generated or eliminated or moved elsewhere. -cim 1/19/91
- *
- * typedef struct FormData_pg_attribute {
- *     Oid             attrelid;
- *     NameData        attname;
- *     Oid             atttypid;
- *     uint32          attnvals;
- *     int16           attlen;
- *     AttrNumber      attnum;
- *     uint32          attnelems;
- *     int32           attcacheoff;
- *     int32           atttypmod;
- *     bool            attbyval;
- *     bool            attisset;
- *     char            attalign;
- *     bool            attnotnull;
- *     bool            atthasdef;
- * } FormData_pg_attribute;
- *
- * ----------------------------------------------------------------
- */
-static FormData_pg_attribute sysatts[] = {
-   {0, {"ctid"}, TIDOID, 0, 6, -1, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0'},
-   {0, {"oid"}, OIDOID, 0, 4, -2, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'},
-   {0, {"xmin"}, XIDOID, 0, 4, -3, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'},
-   {0, {"cmin"}, CIDOID, 0, 4, -4, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'},
-   {0, {"xmax"}, XIDOID, 0, 4, -5, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'},
-   {0, {"cmax"}, CIDOID, 0, 4, -6, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'},
-};
-
 /* ----------------------------------------------------------------
  *     GetHeapRelationOid
  * ----------------------------------------------------------------
@@ -250,7 +217,6 @@ ConstructTupleDescriptor(Relation heapRelation,
    for (i = 0; i < numatts; i++)
    {
        AttrNumber  atnum;      /* attributeNumber[attributeOffset] */
-       AttrNumber  atind;
        Form_pg_attribute from;
        Form_pg_attribute to;
 
@@ -264,16 +230,9 @@ ConstructTupleDescriptor(Relation heapRelation,
        {
 
            /*
-            * here we are indexing on a system attribute (-1...-n) so we
-            * convert atnum into a usable index 0...n-1 so we can use it
-            * to dereference the array sysatts[] which stores tuple
-            * descriptor information for system attributes.
+            * here we are indexing on a system attribute (-1...-n)
             */
-           if (atnum <= FirstLowInvalidHeapAttributeNumber || atnum >= 0)
-               elog(ERROR, "Cannot create index on system attribute: attribute number out of range (%d)", atnum);
-           atind = (-atnum) - 1;
-
-           from = &sysatts[atind];
+           from = SystemAttributeDefinition(atnum);
        }
        else
        {
@@ -284,9 +243,8 @@ ConstructTupleDescriptor(Relation heapRelation,
            if (atnum > natts)
                elog(ERROR, "Cannot create index: attribute %d does not exist",
                     atnum);
-           atind = AttrNumberGetAttrOffset(atnum);
 
-           from = heapTupDesc->attrs[atind];
+           from = heapTupDesc->attrs[AttrNumberGetAttrOffset(atnum)];
        }
 
        /*
@@ -303,10 +261,10 @@ ConstructTupleDescriptor(Relation heapRelation,
         */
        to->attnum = i + 1;
 
-       to->attdispersion = 0.0;
+       to->attstattarget = 0;
+       to->attcacheoff = -1;
        to->attnotnull = false;
        to->atthasdef = false;
-       to->attcacheoff = -1;
 
        /*
         * We do not yet have the correct relation OID for the index, so
@@ -1542,10 +1500,14 @@ setNewRelfilenode(Relation relation)
 
 /* ----------------
  *     UpdateStats
+ *
+ * Update pg_class' relpages and reltuples statistics for the given relation
+ * (which can be either a table or an index).  Note that this is not used
+ * in the context of VACUUM.
  * ----------------
  */
 void
-UpdateStats(Oid relid, long reltuples)
+UpdateStats(Oid relid, double reltuples)
 {
    Relation    whichRel;
    Relation    pg_class;
@@ -1636,6 +1598,10 @@ UpdateStats(Oid relid, long reltuples)
     * with zero size statistics until a VACUUM is done.  The optimizer
     * will generate very bad plans if the stats claim the table is empty
     * when it is actually sizable.  See also CREATE TABLE in heap.c.
+    *
+    * Note: this path is also taken during bootstrap, because bootstrap.c
+    * passes reltuples = 0 after loading a table.  We have to estimate some
+    * number for reltuples based on the actual number of pages.
     */
    relpages = RelationGetNumberOfBlocks(whichRel);
 
@@ -1689,15 +1655,15 @@ UpdateStats(Oid relid, long reltuples)
 
        for (i = 0; i < Natts_pg_class; i++)
        {
-           nulls[i] = heap_attisnull(tuple, i + 1) ? 'n' : ' ';
+           nulls[i] = ' ';
            replace[i] = ' ';
            values[i] = (Datum) NULL;
        }
 
        replace[Anum_pg_class_relpages - 1] = 'r';
-       values[Anum_pg_class_relpages - 1] = (Datum) relpages;
+       values[Anum_pg_class_relpages - 1] = Int32GetDatum(relpages);
        replace[Anum_pg_class_reltuples - 1] = 'r';
-       values[Anum_pg_class_reltuples - 1] = (Datum) reltuples;
+       values[Anum_pg_class_reltuples - 1] = Float4GetDatum((float4) reltuples);
        newtup = heap_modifytuple(tuple, pg_class, values, nulls, replace);
        simple_heap_update(pg_class, &tuple->t_self, newtup);
        if (!IsIgnoringSystemIndexes())
@@ -1741,7 +1707,7 @@ DefaultBuild(Relation heapRelation,
    TupleDesc   heapDescriptor;
    Datum       datum[INDEX_MAX_KEYS];
    char        nullv[INDEX_MAX_KEYS];
-   long        reltuples,
+   double      reltuples,
                indtuples;
    Node       *predicate = indexInfo->ii_Predicate;
 
@@ -1796,7 +1762,7 @@ DefaultBuild(Relation heapRelation,
                          0,    /* number of keys */
                          (ScanKey) NULL);      /* scan key */
 
-   reltuples = indtuples = 0;
+   reltuples = indtuples = 0.0;
 
    /*
     * for each tuple in the base relation, we create an index tuple and
@@ -1808,7 +1774,7 @@ DefaultBuild(Relation heapRelation,
    {
        MemoryContextReset(econtext->ecxt_per_tuple_memory);
 
-       reltuples++;
+       reltuples += 1.0;
 
 #ifndef OMIT_PARTIAL_INDEX
 
@@ -1821,7 +1787,7 @@ DefaultBuild(Relation heapRelation,
            slot->val = heapTuple;
            if (ExecQual((List *) oldPred, econtext, false))
            {
-               indtuples++;
+               indtuples += 1.0;
                continue;
            }
        }
@@ -1838,7 +1804,7 @@ DefaultBuild(Relation heapRelation,
        }
 #endif  /* OMIT_PARTIAL_INDEX */
 
-       indtuples++;
+       indtuples += 1.0;
 
        /*
         * FormIndexDatum fills in its datum and null parameters with


diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c

index 88e56869da58eee31d6c7b0a764b93c6c73476a7..24cc7a8b254dc9a10dea74b263e52cf30f477964 100644 (file)


--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -8,19 +8,16 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/commands/analyze.c,v 1.16 2001/03/22 06:16:11 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/commands/analyze.c,v 1.17 2001/05/07 00:43:17 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"
 
-#include 
-#include 
-#include 
-#include 
-#include 
+#include 
 
 #include "access/heapam.h"
+#include "access/tuptoaster.h"
 #include "catalog/catname.h"
 #include "catalog/indexing.h"
 #include "catalog/pg_operator.h"
@@ -29,43 +26,139 @@
 #include "commands/vacuum.h"
 #include "miscadmin.h"
 #include "parser/parse_oper.h"
-#include "tcop/tcopprot.h"
 #include "utils/acl.h"
 #include "utils/builtins.h"
+#include "utils/datum.h"
 #include "utils/fmgroids.h"
-#include "utils/inval.h"
 #include "utils/syscache.h"
+#include "utils/tuplesort.h"
 
-#define swapLong(a,b)  {long tmp; tmp=a; a=b; b=tmp;}
-#define swapInt(a,b)   {int tmp; tmp=a; a=b; b=tmp;}
-#define swapDatum(a,b) {Datum tmp; tmp=a; a=b; b=tmp;}
-#define VacAttrStatsEqValid(stats) ( stats->f_cmpeq.fn_addr != NULL )
-#define VacAttrStatsLtGtValid(stats) ( stats->f_cmplt.fn_addr != NULL && \
-                                  stats->f_cmpgt.fn_addr != NULL && \
-                                  RegProcedureIsValid(stats->outfunc) )
 
+/*
+ * Analysis algorithms supported
+ */
+typedef enum {
+   ALG_MINIMAL = 1,            /* Compute only most-common-values */
+   ALG_SCALAR                  /* Compute MCV, histogram, sort correlation */
+} AlgCode;
+
+/*
+ * To avoid consuming too much memory during analysis and/or too much space
+ * in the resulting pg_statistic rows, we ignore varlena datums that are wider
+ * than WIDTH_THRESHOLD (after detoasting!).  This is legitimate for MCV
+ * and distinct-value calculations since a wide value is unlikely to be
+ * duplicated at all, much less be a most-common value.  For the same reason,
+ * ignoring wide values will not affect our estimates of histogram bin
+ * boundaries very much.
+ */
+#define WIDTH_THRESHOLD  256
+
+/*
+ * We build one of these structs for each attribute (column) that is to be
+ * analyzed.  The struct and subsidiary data are in TransactionCommandContext,
+ * so they live until the end of the ANALYZE operation.
+ */
+typedef struct
+{
+   /* These fields are set up by examine_attribute */
+   int         attnum;         /* attribute number */
+   AlgCode     algcode;        /* Which algorithm to use for this column */
+   int         minrows;        /* Minimum # of rows needed for stats */
+   Form_pg_attribute attr;     /* copy of pg_attribute row for column */
+   Form_pg_type attrtype;      /* copy of pg_type row for column */
+   Oid         eqopr;          /* '=' operator for datatype, if any */
+   Oid         eqfunc;         /* and associated function */
+   Oid         ltopr;          /* '<' operator for datatype, if any */
+
+   /* These fields are filled in by the actual statistics-gathering routine */
+   bool        stats_valid;
+   float4      stanullfrac;    /* fraction of entries that are NULL */
+   int4        stawidth;       /* average width */
+   float4      stadistinct;    /* # distinct values */
+   int2        stakind[STATISTIC_NUM_SLOTS];
+   Oid         staop[STATISTIC_NUM_SLOTS];
+   int         numnumbers[STATISTIC_NUM_SLOTS];
+   float4     *stanumbers[STATISTIC_NUM_SLOTS];
+   int         numvalues[STATISTIC_NUM_SLOTS];
+   Datum      *stavalues[STATISTIC_NUM_SLOTS];
+} VacAttrStats;
+
+
+typedef struct
+{
+   Datum       value;          /* a data value */
+   int         tupno;          /* position index for tuple it came from */
+} ScalarItem;
+
+typedef struct
+{
+   int         count;          /* # of duplicates */
+   int         first;          /* values[] index of first occurrence */
+} ScalarMCVItem;
+
+
+#define swapInt(a,b)   {int _tmp; _tmp=a; a=b; b=_tmp;}
+#define swapDatum(a,b) {Datum _tmp; _tmp=a; a=b; b=_tmp;}
 
-static void attr_stats(Relation onerel, int attr_cnt, VacAttrStats *vacattrstats, HeapTuple tuple);
-static void bucketcpy(Form_pg_attribute attr, Datum value, Datum *bucket, int *bucket_len);
-static void update_attstats(Oid relid, int natts, VacAttrStats *vacattrstats);
-static void del_stats(Oid relid, int attcnt, int *attnums);
+
+static int MESSAGE_LEVEL;
+
+/* context information for compare_scalars() */
+static FmgrInfo *datumCmpFn;
+static SortFunctionKind datumCmpFnKind;
+static int *datumCmpTupnoLink;
+
+
+static VacAttrStats *examine_attribute(Relation onerel, int attnum);
+static int acquire_sample_rows(Relation onerel, HeapTuple *rows,
+                              int targrows, long *totalrows);
+static double random_fract(void);
+static double init_selection_state(int n);
+static long select_next_random_record(long t, int n, double *stateptr);
+static int compare_rows(const void *a, const void *b);
+static int compare_scalars(const void *a, const void *b);
+static int compare_mcvs(const void *a, const void *b);
+static OffsetNumber get_page_max_offset(Relation relation,
+                                       BlockNumber blocknumber);
+static void compute_minimal_stats(VacAttrStats *stats,
+                                 TupleDesc tupDesc, long totalrows,
+                                 HeapTuple *rows, int numrows);
+static void compute_scalar_stats(VacAttrStats *stats,
+                                TupleDesc tupDesc, long totalrows,
+                                HeapTuple *rows, int numrows);
+static void update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats);
 
 
 /*
- * analyze_rel() -- analyze relation
+ * analyze_rel() -- analyze one relation
  */
 void
-analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL)
+analyze_rel(Oid relid, VacuumStmt *vacstmt)
 {
-   HeapTuple   tuple;
    Relation    onerel;
-   int32       i;
-   int         attr_cnt,
-              *attnums = NULL;
    Form_pg_attribute *attr;
-   VacAttrStats *vacattrstats;
-   HeapScanDesc scan;
+   int         attr_cnt,
+               tcnt,
+               i;
+   VacAttrStats **vacattrstats;
+   int         targrows,
+               numrows;
+   long        totalrows;
+   HeapTuple  *rows;
+   HeapTuple   tuple;
+
+   if (vacstmt->verbose)
+       MESSAGE_LEVEL = NOTICE;
+   else
+       MESSAGE_LEVEL = DEBUG;
 
+   /*
+    * Begin a transaction for analyzing this relation.
+    *
+    * Note: All memory allocated during ANALYZE will live in
+    * TransactionCommandContext or a subcontext thereof, so it will
+    * all be released by transaction commit at the end of this routine.
+    */
    StartTransactionCommand();
 
    /*
@@ -76,7 +169,7 @@ analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL)
 
    /*
     * Race condition -- if the pg_class tuple has gone away since the
-    * last time we saw it, we don't need to vacuum it.
+    * last time we saw it, we don't need to process it.
     */
    tuple = SearchSysCache(RELOID,
                           ObjectIdGetDatum(relid),
@@ -88,8 +181,7 @@ analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL)
    }
 
    /*
-    * We can VACUUM ANALYZE any table except pg_statistic. see
-    * update_relstats
+    * We can ANALYZE any table except pg_statistic. See update_attstats
     */
    if (strcmp(NameStr(((Form_pg_class) GETSTRUCT(tuple))->relname),
               StatisticRelationName) == 0)
@@ -100,586 +192,1466 @@ analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL)
    }
    ReleaseSysCache(tuple);
 
+   /*
+    * Open the class, getting only a read lock on it, and check permissions
+    */
    onerel = heap_open(relid, AccessShareLock);
 
    if (!pg_ownercheck(GetUserId(), RelationGetRelationName(onerel),
                       RELNAME))
    {
-
-       /*
-        * we already did an elog during vacuum elog(NOTICE, "Skipping
-        * \"%s\" --- only table owner can VACUUM it",
-        * RelationGetRelationName(onerel));
-        */
+       /* No need for a notice if we already complained during VACUUM */
+       if (!vacstmt->vacuum)
+           elog(NOTICE, "Skipping \"%s\" --- only table owner can ANALYZE it",
+                RelationGetRelationName(onerel));
        heap_close(onerel, NoLock);
        CommitTransactionCommand();
        return;
    }
 
-   elog(MESSAGE_LEVEL, "Analyzing...");
+   elog(MESSAGE_LEVEL, "Analyzing %s", RelationGetRelationName(onerel));
 
-   attr_cnt = onerel->rd_att->natts;
+   /*
+    * Determine which columns to analyze
+    *
+    * Note that system attributes are never analyzed.
+    */
    attr = onerel->rd_att->attrs;
+   attr_cnt = onerel->rd_att->natts;
 
-   if (anal_cols2 != NIL)
+   if (vacstmt->va_cols != NIL)
    {
-       int         tcnt = 0;
        List       *le;
 
-       if (length(anal_cols2) > attr_cnt)
-           elog(ERROR, "vacuum: too many attributes specified for relation %s",
-                RelationGetRelationName(onerel));
-       attnums = (int *) palloc(attr_cnt * sizeof(int));
-       foreach(le, anal_cols2)
+       vacattrstats = (VacAttrStats **) palloc(length(vacstmt->va_cols) *
+                                               sizeof(VacAttrStats *));
+       tcnt = 0;
+       foreach(le, vacstmt->va_cols)
        {
-           char       *col = (char *) lfirst(le);
+           char       *col = strVal(lfirst(le));
 
            for (i = 0; i < attr_cnt; i++)
            {
                if (namestrcmp(&(attr[i]->attname), col) == 0)
                    break;
            }
-           if (i < attr_cnt)   /* found */
-               attnums[tcnt++] = i;
-           else
-           {
-               elog(ERROR, "vacuum: there is no attribute %s in %s",
+           if (i >= attr_cnt)
+               elog(ERROR, "ANALYZE: there is no attribute %s in %s",
                     col, RelationGetRelationName(onerel));
-           }
+           vacattrstats[tcnt] = examine_attribute(onerel, i+1);
+           if (vacattrstats[tcnt] != NULL)
+               tcnt++;
+       }
+       attr_cnt = tcnt;
+   }
+   else
+   {
+       vacattrstats = (VacAttrStats **) palloc(attr_cnt *
+                                               sizeof(VacAttrStats *));
+       tcnt = 0;
+       for (i = 0; i < attr_cnt; i++)
+       {
+           vacattrstats[tcnt] = examine_attribute(onerel, i+1);
+           if (vacattrstats[tcnt] != NULL)
+               tcnt++;
        }
        attr_cnt = tcnt;
    }
 
-   vacattrstats = (VacAttrStats *) palloc(attr_cnt * sizeof(VacAttrStats));
+   /*
+    * Quit if no analyzable columns
+    */
+   if (attr_cnt <= 0)
+   {
+       heap_close(onerel, NoLock);
+       CommitTransactionCommand();
+       return;
+   }
 
+   /*
+    * Determine how many rows we need to sample, using the worst case
+    * from all analyzable columns.  We use a lower bound of 100 rows
+    * to avoid possible overflow in Vitter's algorithm.
+    */
+   targrows = 100;
    for (i = 0; i < attr_cnt; i++)
    {
-       Operator    func_operator;
-       VacAttrStats *stats;
-
-       stats = &vacattrstats[i];
-       stats->attr = palloc(ATTRIBUTE_TUPLE_SIZE);
-       memcpy(stats->attr, attr[((attnums) ? attnums[i] : i)],
-              ATTRIBUTE_TUPLE_SIZE);
-       stats->best = stats->guess1 = stats->guess2 = 0;
-       stats->max = stats->min = 0;
-       stats->best_len = stats->guess1_len = stats->guess2_len = 0;
-       stats->max_len = stats->min_len = 0;
-       stats->initialized = false;
-       stats->best_cnt = stats->guess1_cnt = stats->guess1_hits = stats->guess2_hits = 0;
-       stats->max_cnt = stats->min_cnt = stats->null_cnt = stats->nonnull_cnt = 0;
-
-       func_operator = compatible_oper("=",
-                                       stats->attr->atttypid,
-                                       stats->attr->atttypid,
-                                       true);
-       if (func_operator != NULL)
-       {
-           fmgr_info(oprfuncid(func_operator), &(stats->f_cmpeq));
-           ReleaseSysCache(func_operator);
-       }
-       else
-           stats->f_cmpeq.fn_addr = NULL;
+       if (targrows < vacattrstats[i]->minrows)
+           targrows = vacattrstats[i]->minrows;
+   }
+
+   /*
+    * Acquire the sample rows
+    */
+   rows = (HeapTuple *) palloc(targrows * sizeof(HeapTuple));
+   numrows = acquire_sample_rows(onerel, rows, targrows, &totalrows);
 
-       func_operator = compatible_oper("<",
-                                       stats->attr->atttypid,
-                                       stats->attr->atttypid,
-                                       true);
-       if (func_operator != NULL)
+   /*
+    * If we are running a standalone ANALYZE, update pages/tuples stats
+    * in pg_class.  We have the accurate page count from heap_beginscan,
+    * but only an approximate number of tuples; therefore, if we are
+    * part of VACUUM ANALYZE do *not* overwrite the accurate count already
+    * inserted by VACUUM.
+    */
+   if (!vacstmt->vacuum)
+       vac_update_relstats(RelationGetRelid(onerel),
+                           onerel->rd_nblocks,
+                           (double) totalrows,
+                           RelationGetForm(onerel)->relhasindex);
+
+   /*
+    * Compute the statistics.  Temporary results during the calculations
+    * for each column are stored in a child context.  The calc routines
+    * are responsible to make sure that whatever they store into the
+    * VacAttrStats structure is allocated in TransactionCommandContext.
+    */
+   if (numrows > 0)
+   {
+       MemoryContext col_context,
+                   old_context;
+
+       col_context = AllocSetContextCreate(CurrentMemoryContext,
+                                           "Analyze Column",
+                                           ALLOCSET_DEFAULT_MINSIZE,
+                                           ALLOCSET_DEFAULT_INITSIZE,
+                                           ALLOCSET_DEFAULT_MAXSIZE);
+       old_context = MemoryContextSwitchTo(col_context);
+       for (i = 0; i < attr_cnt; i++)
        {
-           fmgr_info(oprfuncid(func_operator), &(stats->f_cmplt));
-           stats->op_cmplt = oprid(func_operator);
-           ReleaseSysCache(func_operator);
+           switch (vacattrstats[i]->algcode)
+           {
+               case ALG_MINIMAL:
+                   compute_minimal_stats(vacattrstats[i],
+                                         onerel->rd_att, totalrows,
+                                         rows, numrows);
+                   break;
+               case ALG_SCALAR:
+                   compute_scalar_stats(vacattrstats[i],
+                                        onerel->rd_att, totalrows,
+                                        rows, numrows);
+                   break;
+           }
+           MemoryContextResetAndDeleteChildren(col_context);
        }
-       else
+       MemoryContextSwitchTo(old_context);
+       MemoryContextDelete(col_context);
+
+       /*
+        * Emit the completed stats rows into pg_statistic, replacing any
+        * previous statistics for the target columns.  (If there are stats
+        * in pg_statistic for columns we didn't process, we leave them alone.)
+        */
+       update_attstats(relid, attr_cnt, vacattrstats);
+   }
+
+   /*
+    * Close source relation now, but keep lock so that no one deletes it
+    * before we commit.  (If someone did, they'd fail to clean up the
+    * entries we made in pg_statistic.)
+    */
+   heap_close(onerel, NoLock);
+
+   /* Commit and release working memory */
+   CommitTransactionCommand();
+}
+
+/*
+ * examine_attribute -- pre-analysis of a single column
+ *
+ * Determine whether the column is analyzable; if so, create and initialize
+ * a VacAttrStats struct for it.  If not, return NULL.
+ */
+static VacAttrStats *
+examine_attribute(Relation onerel, int attnum)
+{
+   Form_pg_attribute attr = onerel->rd_att->attrs[attnum-1];
+   Operator    func_operator;
+   Oid         oprrest;
+   HeapTuple   typtuple;
+   Oid         eqopr = InvalidOid;
+   Oid         eqfunc = InvalidOid;
+   Oid         ltopr = InvalidOid;
+   VacAttrStats *stats;
+
+   /* Don't analyze column if user has specified not to */
+   if (attr->attstattarget <= 0)
+       return NULL;
+
+   /* If column has no "=" operator, we can't do much of anything */
+   func_operator = compatible_oper("=",
+                                   attr->atttypid,
+                                   attr->atttypid,
+                                   true);
+   if (func_operator != NULL)
+   {
+       oprrest = ((Form_pg_operator) GETSTRUCT(func_operator))->oprrest;
+       if (oprrest == F_EQSEL)
        {
-           stats->f_cmplt.fn_addr = NULL;
-           stats->op_cmplt = InvalidOid;
+           eqopr = oprid(func_operator);
+           eqfunc = oprfuncid(func_operator);
        }
+       ReleaseSysCache(func_operator);
+   }
+   if (!OidIsValid(eqfunc))
+       return NULL;
 
-       func_operator = compatible_oper(">",
-                                       stats->attr->atttypid,
-                                       stats->attr->atttypid,
-                                       true);
-       if (func_operator != NULL)
+   /*
+    * If we have "=" then we're at least able to do the minimal algorithm,
+    * so start filling in a VacAttrStats struct.
+    */
+   stats = (VacAttrStats *) palloc(sizeof(VacAttrStats));
+   MemSet(stats, 0, sizeof(VacAttrStats));
+   stats->attnum = attnum;
+   stats->attr = (Form_pg_attribute) palloc(ATTRIBUTE_TUPLE_SIZE);
+   memcpy(stats->attr, attr, ATTRIBUTE_TUPLE_SIZE);
+   typtuple = SearchSysCache(TYPEOID,
+                             ObjectIdGetDatum(attr->atttypid),
+                             0, 0, 0);
+   if (!HeapTupleIsValid(typtuple))
+       elog(ERROR, "cache lookup of type %u failed", attr->atttypid);
+   stats->attrtype = (Form_pg_type) palloc(sizeof(FormData_pg_type));
+   memcpy(stats->attrtype, GETSTRUCT(typtuple), sizeof(FormData_pg_type));
+   ReleaseSysCache(typtuple);
+   stats->eqopr = eqopr;
+   stats->eqfunc = eqfunc;
+
+   /* Is there a "<" operator with suitable semantics? */
+   func_operator = compatible_oper("<",
+                                   attr->atttypid,
+                                   attr->atttypid,
+                                   true);
+   if (func_operator != NULL)
+   {
+       oprrest = ((Form_pg_operator) GETSTRUCT(func_operator))->oprrest;
+       if (oprrest == F_SCALARLTSEL)
        {
-           fmgr_info(oprfuncid(func_operator), &(stats->f_cmpgt));
-           ReleaseSysCache(func_operator);
+           ltopr = oprid(func_operator);
        }
-       else
-           stats->f_cmpgt.fn_addr = NULL;
+       ReleaseSysCache(func_operator);
+   }
+   stats->ltopr = ltopr;
+
+   /*
+    * Determine the algorithm to use (this will get more complicated later)
+    */
+   if (OidIsValid(ltopr))
+   {
+       /* Seems to be a scalar datatype */
+       stats->algcode = ALG_SCALAR;
+       /*--------------------
+        * The following choice of minrows is based on the paper
+        * "Random sampling for histogram construction: how much is enough?"
+        * by Surajit Chaudhuri, Rajeev Motwani and Vivek Narasayya, in
+        * Proceedings of ACM SIGMOD International Conference on Management
+        * of Data, 1998, Pages 436-447.  Their Corollary 1 to Theorem 5
+        * says that for table size n, histogram size k, maximum relative
+        * error in bin size f, and error probability gamma, the minimum
+        * random sample size is
+        *      r = 4 * k * ln(2*n/gamma) / f^2
+        * Taking f = 0.5, gamma = 0.01, n = 1 million rows, we obtain
+        *      r = 305.82 * k
+        * Note that because of the log function, the dependence on n is
+        * quite weak; even at n = 1 billion, a 300*k sample gives <= 0.59
+        * bin size error with probability 0.99.  So there's no real need to
+        * scale for n, which is a good thing because we don't necessarily
+        * know it at this point.
+        *--------------------
+        */
+       stats->minrows = 300 * attr->attstattarget;
+   }
+   else
+   {
+       /* Can't do much but the minimal stuff */
+       stats->algcode = ALG_MINIMAL;
+       /* Might as well use the same minrows as above */
+       stats->minrows = 300 * attr->attstattarget;
+   }
+
+   return stats;
+}
 
-       tuple = SearchSysCache(TYPEOID,
-                              ObjectIdGetDatum(stats->attr->atttypid),
-                              0, 0, 0);
-       if (HeapTupleIsValid(tuple))
+/*
+ * acquire_sample_rows -- acquire a random sample of rows from the table
+ *
+ * Up to targrows rows are collected (if there are fewer than that many
+ * rows in the table, all rows are collected).  When the table is larger
+ * than targrows, a truly random sample is collected: every row has an
+ * equal chance of ending up in the final sample.
+ *
+ * We also estimate the total number of rows in the table, and return that
+ * into *totalrows.
+ *
+ * The returned list of tuples is in order by physical position in the table.
+ * (We will rely on this later to derive correlation estimates.)
+ */
+static int
+acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
+                   long *totalrows)
+{
+   int         numrows = 0;
+   HeapScanDesc scan;
+   HeapTuple   tuple;
+   ItemPointer lasttuple;
+   BlockNumber lastblock,
+               estblock;
+   OffsetNumber lastoffset;
+   int         numest;
+   double      tuplesperpage;
+   long        t;
+   double      rstate;
+
+   Assert(targrows > 1);
+   /*
+    * Do a simple linear scan until we reach the target number of rows.
+    */
+   scan = heap_beginscan(onerel, false, SnapshotNow, 0, NULL);
+   while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
+   {
+       rows[numrows++] = heap_copytuple(tuple);
+       if (numrows >= targrows)
+           break;
+   }
+   heap_endscan(scan);
+   /*
+    * If we ran out of tuples then we're done, no matter how few we 
+    * collected.  No sort is needed, since they're already in order.
+    */
+   if (!HeapTupleIsValid(tuple))
+   {
+       *totalrows = numrows;
+       return numrows;
+   }
+   /*
+    * Otherwise, start replacing tuples in the sample until we reach the
+    * end of the relation.  This algorithm is from Jeff Vitter's paper
+    * (see full citation below).  It works by repeatedly computing the number
+    * of the next tuple we want to fetch, which will replace a randomly
+    * chosen element of the reservoir (current set of tuples).  At all times
+    * the reservoir is a true random sample of the tuples we've passed over
+    * so far, so when we fall off the end of the relation we're done.
+    *
+    * A slight difficulty is that since we don't want to fetch tuples or even
+    * pages that we skip over, it's not possible to fetch *exactly* the N'th
+    * tuple at each step --- we don't know how many valid tuples are on
+    * the skipped pages.  We handle this by assuming that the average number
+    * of valid tuples/page on the pages already scanned over holds good for
+    * the rest of the relation as well; this lets us estimate which page
+    * the next tuple should be on and its position in the page.  Then we
+    * fetch the first valid tuple at or after that position, being careful
+    * not to use the same tuple twice.  This approach should still give a
+    * good random sample, although it's not perfect.
+    */
+   lasttuple = &(rows[numrows-1]->t_self);
+   lastblock = ItemPointerGetBlockNumber(lasttuple);
+   lastoffset = ItemPointerGetOffsetNumber(lasttuple);
+   /*
+    * If possible, estimate tuples/page using only completely-scanned pages.
+    */
+   for (numest = numrows; numest > 0; numest--)
+   {
+       if (ItemPointerGetBlockNumber(&(rows[numest-1]->t_self)) != lastblock)
+           break;
+   }
+   if (numest == 0)
+   {
+       numest = numrows;       /* don't have a full page? */
+       estblock = lastblock + 1;
+   }
+   else
+   {
+       estblock = lastblock;
+   }
+   tuplesperpage = (double) numest / (double) estblock;
+
+   t = numrows;                /* t is the # of records processed so far */
+   rstate = init_selection_state(targrows);
+   for (;;)
+   {
+       double          targpos;
+       BlockNumber     targblock;
+       OffsetNumber    targoffset,
+                       maxoffset;
+
+       t = select_next_random_record(t, targrows, &rstate);
+       /* Try to read the t'th record in the table */
+       targpos = (double) t / tuplesperpage;
+       targblock = (BlockNumber) targpos;
+       targoffset = ((int) (targpos - targblock) * tuplesperpage) + 
+           FirstOffsetNumber;
+       /* Make sure we are past the last selected record */
+       if (targblock <= lastblock)
        {
-           stats->outfunc = ((Form_pg_type) GETSTRUCT(tuple))->typoutput;
-           stats->typelem = ((Form_pg_type) GETSTRUCT(tuple))->typelem;
-           ReleaseSysCache(tuple);
+           targblock = lastblock;
+           if (targoffset <= lastoffset)
+               targoffset = lastoffset + 1;
        }
-       else
+       /* Loop to find first valid record at or after given position */
+   pageloop:;
+       /*
+        * Have we fallen off the end of the relation?  (We rely on
+        * heap_beginscan to have updated rd_nblocks.)
+        */
+       if (targblock >= onerel->rd_nblocks)
+           break;
+       maxoffset = get_page_max_offset(onerel, targblock);
+       for (;;)
        {
-           stats->outfunc = InvalidOid;
-           stats->typelem = InvalidOid;
+           HeapTupleData targtuple;
+           Buffer      targbuffer;
+
+           if (targoffset > maxoffset)
+           {
+               /* Fell off end of this page, try next */
+               targblock++;
+               targoffset = FirstOffsetNumber;
+               goto pageloop;
+           }
+           ItemPointerSet(&targtuple.t_self, targblock, targoffset);
+           heap_fetch(onerel, SnapshotNow, &targtuple, &targbuffer);
+           if (targtuple.t_data != NULL)
+           {
+               /*
+                * Found a suitable tuple, so save it, replacing one old
+                * tuple at random
+                */
+               int     k = (int) (targrows * random_fract());
+
+               Assert(k >= 0 && k < targrows);
+               heap_freetuple(rows[k]);
+               rows[k] = heap_copytuple(&targtuple);
+               ReleaseBuffer(targbuffer);
+               lastblock = targblock;
+               lastoffset = targoffset;
+               break;
+           }
+           /* this tuple is dead, so advance to next one on same page */
+           targoffset++;
        }
    }
-   /* delete existing pg_statistic rows for relation */
-   del_stats(relid, ((attnums) ? attr_cnt : 0), attnums);
-
-   /* scan relation to gather statistics */
-   scan = heap_beginscan(onerel, false, SnapshotNow, 0, NULL);
 
-   while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
-       attr_stats(onerel, attr_cnt, vacattrstats, tuple);
+   /*
+    * Now we need to sort the collected tuples by position (itempointer).
+    */
+   qsort((void *) rows, numrows, sizeof(HeapTuple), compare_rows);
 
-   heap_endscan(scan);
+   /*
+    * Estimate total number of valid rows in relation.
+    */
+   *totalrows = (long) (onerel->rd_nblocks * tuplesperpage + 0.5);
 
-   /* close rel, but keep lock so it doesn't go away before commit */
-   heap_close(onerel, NoLock);
+   return numrows;
+}
 
-   /* update statistics in pg_class */
-   update_attstats(relid, attr_cnt, vacattrstats);
+/* Select a random value R uniformly distributed in 0 < R < 1 */
+static double
+random_fract(void)
+{
+   long    z;
 
-   CommitTransactionCommand();
+   /* random() can produce endpoint values, try again if so */
+   do
+   {
+       z = random();
+   } while (! (z > 0 && z < MAX_RANDOM_VALUE));
+   return (double) z / (double) MAX_RANDOM_VALUE;
 }
 
 /*
- * attr_stats() -- compute column statistics used by the planner
+ * These two routines embody Algorithm Z from "Random sampling with a
+ * reservoir" by Jeffrey S. Vitter, in ACM Trans. Math. Softw. 11, 1
+ * (Mar. 1985), Pages 37-57.  While Vitter describes his algorithm in terms
+ * of the count S of records to skip before processing another record,
+ * it is convenient to work primarily with t, the index (counting from 1)
+ * of the last record processed and next record to process.  The only extra
+ * state needed between calls is W, a random state variable.
  *
- * We compute the column min, max, null and non-null counts.
- * Plus we attempt to find the count of the value that occurs most
- * frequently in each column.  These figures are used to compute
- * the selectivity of the column.
+ * init_selection_state computes the initial W value.
  *
- * We use a three-bucket cache to get the most frequent item.
- * The 'guess' buckets count hits.  A cache miss causes guess1
- * to get the most hit 'guess' item in the most recent cycle, and
- * the new item goes into guess2.  Whenever the total count of hits
- * of a 'guess' entry is larger than 'best', 'guess' becomes 'best'.
+ * Given that we've already processed t records (t >= n),
+ * select_next_random_record determines the number of the next record to
+ * process.
+ */
+static double
+init_selection_state(int n)
+{
+   /* Initial value of W (for use when Algorithm Z is first applied) */
+   return exp(- log(random_fract())/n);
+}
+
+static long
+select_next_random_record(long t, int n, double *stateptr)
+{
+   /* The magic constant here is T from Vitter's paper */
+   if (t <= (22 * n))
+   {
+       /* Process records using Algorithm X until t is large enough */
+       double  V,
+               quot;
+
+       V = random_fract();     /* Generate V */
+       t++;
+       quot = (double) (t - n) / (double) t;
+       /* Find min S satisfying (4.1) */
+       while (quot > V)
+       {
+           t++;
+           quot *= (double) (t - n) / (double) t;
+       }
+   }
+   else
+   {
+       /* Now apply Algorithm Z */
+       double  W = *stateptr;
+       long    term = t - n + 1;
+       int     S;
+
+       for (;;)
+       {
+           long    numer,
+                   numer_lim,
+                   denom;
+           double  U,
+                   X,
+                   lhs,
+                   rhs,
+                   y,
+                   tmp;
+
+           /* Generate U and X */
+           U = random_fract();
+           X = t * (W - 1.0);
+           S = X;              /* S is tentatively set to floor(X) */
+           /* Test if U <= h(S)/cg(X) in the manner of (6.3) */
+           tmp = (double) (t + 1) / (double) term;
+           lhs = exp(log(((U * tmp * tmp) * (term + S))/(t + X))/n);
+           rhs = (((t + X)/(term + S)) * term)/t;
+           if (lhs <= rhs)
+           {
+               W = rhs/lhs;
+               break;
+           }
+           /* Test if U <= f(S)/cg(X) */
+           y = (((U * (t + 1))/term) * (t + S + 1))/(t + X);
+           if (n < S)
+           {
+               denom = t;
+               numer_lim = term + S;
+           }
+           else
+           {
+               denom = t - n + S;
+               numer_lim = t + 1;
+           }
+           for (numer = t + S; numer >= numer_lim; numer--)
+           {
+               y *= (double) numer / (double) denom;
+               denom--;
+           }
+           W = exp(- log(random_fract())/n); /* Generate W in advance */
+           if (exp(log(y)/n) <= (t + X)/t)
+               break;
+       }
+       t += S + 1;
+       *stateptr = W;
+   }
+   return t;
+}
+
+/*
+ * qsort comparator for sorting rows[] array
+ */
+static int
+compare_rows(const void *a, const void *b)
+{
+   HeapTuple   ha = * (HeapTuple *) a;
+   HeapTuple   hb = * (HeapTuple *) b;
+   BlockNumber ba = ItemPointerGetBlockNumber(&ha->t_self);
+   OffsetNumber oa = ItemPointerGetOffsetNumber(&ha->t_self);
+   BlockNumber bb = ItemPointerGetBlockNumber(&hb->t_self);
+   OffsetNumber ob = ItemPointerGetOffsetNumber(&hb->t_self);
+
+   if (ba < bb)
+       return -1;
+   if (ba > bb)
+       return 1;
+   if (oa < ob)
+       return -1;
+   if (oa > ob)
+       return 1;
+   return 0;
+}
+
+/*
+ * Discover the largest valid tuple offset number on the given page
+ *
+ * This code probably ought to live in some other module.
+ */
+static OffsetNumber
+get_page_max_offset(Relation relation, BlockNumber blocknumber)
+{
+   Buffer      buffer;
+   Page        p;
+   OffsetNumber offnum;
+
+   buffer = ReadBuffer(relation, blocknumber);
+   if (!BufferIsValid(buffer))
+       elog(ERROR, "get_page_max_offset: %s relation: ReadBuffer(%ld) failed",
+            RelationGetRelationName(relation), (long) blocknumber);
+   LockBuffer(buffer, BUFFER_LOCK_SHARE);
+   p = BufferGetPage(buffer);
+   offnum = PageGetMaxOffsetNumber(p);
+   LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+   ReleaseBuffer(buffer);
+   return offnum;
+}
+
+
+/*
+ * compute_minimal_stats() -- compute minimal column statistics
  *
- * This method works perfectly for columns with unique values, and columns
- * with only two unique values, plus nulls.
+ * We use this when we can find only an "=" operator for the datatype.
  *
- * It becomes less perfect as the number of unique values increases and
- * their distribution in the table becomes more random.
+ * We determine the fraction of non-null rows, the average width, the
+ * most common values, and the (estimated) number of distinct values.
  *
+ * The most common values are determined by brute force: we keep a list
+ * of previously seen values, ordered by number of times seen, as we scan
+ * the samples.  A newly seen value is inserted just after the last
+ * multiply-seen value, causing the bottommost (oldest) singly-seen value
+ * to drop off the list.  The accuracy of this method, and also its cost,
+ * depend mainly on the length of the list we are willing to keep.
  */
 static void
-attr_stats(Relation onerel, int attr_cnt, VacAttrStats *vacattrstats, HeapTuple tuple)
+compute_minimal_stats(VacAttrStats *stats,
+                     TupleDesc tupDesc, long totalrows,
+                     HeapTuple *rows, int numrows)
 {
    int         i;
-   TupleDesc   tupDesc = onerel->rd_att;
-
-   for (i = 0; i < attr_cnt; i++)
+   int         null_cnt = 0;
+   int         nonnull_cnt = 0;
+   int         toowide_cnt = 0;
+   double      total_width = 0;
+   bool        is_varlena = (!stats->attr->attbyval &&
+                             stats->attr->attlen == -1);
+   FmgrInfo    f_cmpeq;
+   typedef struct
+   {
+       Datum   value;
+       int     count;
+   } TrackItem;
+   TrackItem  *track;
+   int         track_cnt,
+               track_max;
+   int         num_mcv = stats->attr->attstattarget;
+
+   /* We track up to 2*n values for an n-element MCV list; but at least 10 */
+   track_max = 2 * num_mcv;
+   if (track_max < 10)
+       track_max = 10;
+   track = (TrackItem *) palloc(track_max * sizeof(TrackItem));
+   track_cnt = 0;
+
+   fmgr_info(stats->eqfunc, &f_cmpeq);
+
+   for (i = 0; i < numrows; i++)
    {
-       VacAttrStats *stats = &vacattrstats[i];
-       Datum       origvalue;
+       HeapTuple   tuple = rows[i];
        Datum       value;
        bool        isnull;
-       bool        value_hit;
-
-       if (!VacAttrStatsEqValid(stats))
-           continue;
-
-#ifdef _DROP_COLUMN_HACK__
-       if (COLUMN_IS_DROPPED(stats->attr))
-           continue;
-#endif  /* _DROP_COLUMN_HACK__ */
+       bool        match;
+       int         firstcount1,
+                   j;
 
-       origvalue = heap_getattr(tuple, stats->attr->attnum,
-                                tupDesc, &isnull);
+       value = heap_getattr(tuple, stats->attnum, tupDesc, &isnull);
 
+       /* Check for null/nonnull */
        if (isnull)
        {
-           stats->null_cnt++;
+           null_cnt++;
            continue;
        }
-       stats->nonnull_cnt++;
+       nonnull_cnt++;
 
        /*
-        * If the value is toasted, detoast it to avoid repeated
-        * detoastings and resultant memory leakage inside the comparison
-        * routines.
+        * If it's a varlena field, add up widths for average width
+        * calculation.  Note that if the value is toasted, we
+        * use the toasted width.  We don't bother with this calculation
+        * if it's a fixed-width type.
         */
-       if (!stats->attr->attbyval && stats->attr->attlen == -1)
-           value = PointerGetDatum(PG_DETOAST_DATUM(origvalue));
-       else
-           value = origvalue;
-
-       if (!stats->initialized)
+       if (is_varlena)
        {
-           bucketcpy(stats->attr, value, &stats->best, &stats->best_len);
-           /* best_cnt gets incremented below */
-           bucketcpy(stats->attr, value, &stats->guess1, &stats->guess1_len);
-           stats->guess1_cnt = stats->guess1_hits = 1;
-           bucketcpy(stats->attr, value, &stats->guess2, &stats->guess2_len);
-           stats->guess2_hits = 1;
-           if (VacAttrStatsLtGtValid(stats))
+           total_width += VARSIZE(DatumGetPointer(value));
+           /*
+            * If the value is toasted, we want to detoast it just once to
+            * avoid repeated detoastings and resultant excess memory usage
+            * during the comparisons.  Also, check to see if the value is
+            * excessively wide, and if so don't detoast at all --- just
+            * ignore the value.
+            */
+           if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
            {
-               bucketcpy(stats->attr, value, &stats->max, &stats->max_len);
-               bucketcpy(stats->attr, value, &stats->min, &stats->min_len);
-               /* min_cnt, max_cnt get incremented below */
+               toowide_cnt++;
+               continue;
            }
-           stats->initialized = true;
+           value = PointerGetDatum(PG_DETOAST_DATUM(value));
        }
 
-       if (VacAttrStatsLtGtValid(stats))
+       /*
+        * See if the value matches anything we're already tracking.
+        */
+       match = false;
+       firstcount1 = track_cnt;
+       for (j = 0; j < track_cnt; j++)
        {
-           if (DatumGetBool(FunctionCall2(&stats->f_cmplt,
-                                          value, stats->min)))
+           if (DatumGetBool(FunctionCall2(&f_cmpeq, value, track[j].value)))
            {
-               bucketcpy(stats->attr, value, &stats->min, &stats->min_len);
-               stats->min_cnt = 1;
+               match = true;
+               break;
            }
-           else if (DatumGetBool(FunctionCall2(&stats->f_cmpeq,
-                                               value, stats->min)))
-               stats->min_cnt++;
+           if (j < firstcount1 && track[j].count == 1)
+               firstcount1 = j;
+       }
 
-           if (DatumGetBool(FunctionCall2(&stats->f_cmpgt,
-                                          value, stats->max)))
+       if (match)
+       {
+           /* Found a match */
+           track[j].count++;
+           /* This value may now need to "bubble up" in the track list */
+           while (j > 0 && track[j].count > track[j-1].count)
            {
-               bucketcpy(stats->attr, value, &stats->max, &stats->max_len);
-               stats->max_cnt = 1;
+               swapDatum(track[j].value, track[j-1].value);
+               swapInt(track[j].count, track[j-1].count);
+               j--;
            }
-           else if (DatumGetBool(FunctionCall2(&stats->f_cmpeq,
-                                               value, stats->max)))
-               stats->max_cnt++;
        }
-
-       value_hit = true;
-       if (DatumGetBool(FunctionCall2(&stats->f_cmpeq,
-                                      value, stats->best)))
-           stats->best_cnt++;
-       else if (DatumGetBool(FunctionCall2(&stats->f_cmpeq,
-                                           value, stats->guess1)))
+       else
        {
-           stats->guess1_cnt++;
-           stats->guess1_hits++;
+           /* No match.  Insert at head of count-1 list */
+           if (track_cnt < track_max)
+               track_cnt++;
+           for (j = track_cnt-1; j > firstcount1; j--)
+           {
+               track[j].value = track[j-1].value;
+               track[j].count = track[j-1].count;
+           }
+           if (firstcount1 < track_cnt)
+           {
+               track[firstcount1].value = value;
+               track[firstcount1].count = 1;
+           }
        }
-       else if (DatumGetBool(FunctionCall2(&stats->f_cmpeq,
-                                           value, stats->guess2)))
-           stats->guess2_hits++;
+   }
+
+   /* We can only compute valid stats if we found some non-null values. */
+   if (nonnull_cnt > 0)
+   {
+       int     nmultiple,
+               summultiple;
+
+       stats->stats_valid = true;
+       /* Do the simple null-frac and width stats */
+       stats->stanullfrac = (double) null_cnt / (double) numrows;
+       if (is_varlena)
+           stats->stawidth = total_width / (double) nonnull_cnt;
        else
-           value_hit = false;
+           stats->stawidth = stats->attrtype->typlen;
 
-       if (stats->guess2_hits > stats->guess1_hits)
+       /* Count the number of values we found multiple times */
+       summultiple = 0;
+       for (nmultiple = 0; nmultiple < track_cnt; nmultiple++)
        {
-           swapDatum(stats->guess1, stats->guess2);
-           swapInt(stats->guess1_len, stats->guess2_len);
-           swapLong(stats->guess1_hits, stats->guess2_hits);
-           stats->guess1_cnt = stats->guess1_hits;
+           if (track[nmultiple].count == 1)
+               break;
+           summultiple += track[nmultiple].count;
        }
-       if (stats->guess1_cnt > stats->best_cnt)
+
+       if (nmultiple == 0)
        {
-           swapDatum(stats->best, stats->guess1);
-           swapInt(stats->best_len, stats->guess1_len);
-           swapLong(stats->best_cnt, stats->guess1_cnt);
-           stats->guess1_hits = 1;
-           stats->guess2_hits = 1;
+           /* If we found no repeated values, assume it's a unique column */
+           stats->stadistinct = -1.0;
        }
-       if (!value_hit)
+       else if (track_cnt < track_max && toowide_cnt == 0 &&
+                nmultiple == track_cnt)
        {
-           bucketcpy(stats->attr, value, &stats->guess2, &stats->guess2_len);
-           stats->guess1_hits = 1;
-           stats->guess2_hits = 1;
+           /*
+            * Our track list includes every value in the sample, and every
+            * value appeared more than once.  Assume the column has just
+            * these values.
+            */
+           stats->stadistinct = track_cnt;
        }
+       else
+       {
+           /*----------
+            * Estimate the number of distinct values using the estimator
+            * proposed by Chaudhuri et al (see citation above).  This is
+            *      sqrt(n/r) * max(f1,1) + f2 + f3 + ...
+            * where fk is the number of distinct values that occurred
+            * exactly k times in our sample of r rows (from a total of n).
+            * We assume (not very reliably!) that all the multiply-occurring
+            * values are reflected in the final track[] list, and the other
+            * nonnull values all appeared but once.
+            *----------
+            */
+           int     f1 = nonnull_cnt - summultiple;
+           double  term1;
 
-       /* Clean up detoasted copy, if any */
-       if (value != origvalue)
-           pfree(DatumGetPointer(value));
-   }
-}
+           if (f1 < 1)
+               f1 = 1;
+           term1 = sqrt((double) totalrows / (double) numrows) * f1;
+           stats->stadistinct = floor(term1 + nmultiple + 0.5);
+       }
 
-/*
- * bucketcpy() -- copy a new value into one of the statistics buckets
- */
-static void
-bucketcpy(Form_pg_attribute attr, Datum value, Datum *bucket, int *bucket_len)
-{
-   if (attr->attbyval)
-       *bucket = value;
-   else
-   {
-       int         len = (attr->attlen != -1 ? attr->attlen : VARSIZE(value));
+       /*
+        * If we estimated the number of distinct values at more than 10%
+        * of the total row count (a very arbitrary limit), then assume
+        * that stadistinct should scale with the row count rather than be
+        * a fixed value.
+        */
+       if (stats->stadistinct > 0.1 * totalrows)
+           stats->stadistinct = - (stats->stadistinct / totalrows);
 
-       /* Avoid unnecessary palloc() traffic... */
-       if (len > *bucket_len)
+       /* Generate an MCV slot entry, only if we found multiples */
+       if (nmultiple < num_mcv)
+           num_mcv = nmultiple;
+       if (num_mcv > 0)
        {
-           if (*bucket_len != 0)
-               pfree(DatumGetPointer(*bucket));
-           *bucket = PointerGetDatum(palloc(len));
-           *bucket_len = len;
+           MemoryContext old_context;
+           Datum  *mcv_values;
+           float4 *mcv_freqs;
+
+           /* Must copy the target values into TransactionCommandContext */
+           old_context = MemoryContextSwitchTo(TransactionCommandContext);
+           mcv_values = (Datum *) palloc(num_mcv * sizeof(Datum));
+           mcv_freqs = (float4 *) palloc(num_mcv * sizeof(float4));
+           for (i = 0; i < num_mcv; i++)
+           {
+               mcv_values[i] = datumCopy(track[i].value,
+                                         stats->attr->attbyval,
+                                         stats->attr->attlen);
+               mcv_freqs[i] = (double) track[i].count / (double) numrows;
+           }
+           MemoryContextSwitchTo(old_context);
+
+           stats->stakind[0] = STATISTIC_KIND_MCV;
+           stats->staop[0] = stats->eqopr;
+           stats->stanumbers[0] = mcv_freqs;
+           stats->numnumbers[0] = num_mcv;
+           stats->stavalues[0] = mcv_values;
+           stats->numvalues[0] = num_mcv;
        }
-       memcpy(DatumGetPointer(*bucket), DatumGetPointer(value), len);
    }
+
+   /* We don't need to bother cleaning up any of our temporary palloc's */
 }
 
 
 /*
- * update_attstats() -- update attribute statistics for one relation
+ * compute_scalar_stats() -- compute column statistics
  *
- *     Statistics are stored in several places: the pg_class row for the
- *     relation has stats about the whole relation, the pg_attribute rows
- *     for each attribute store "dispersion", and there is a pg_statistic
- *     row for each (non-system) attribute.  (Dispersion probably ought to
- *     be moved to pg_statistic, but it's not worth doing unless there's
- *     another reason to have to change pg_attribute.)  The pg_class values
- *     are updated by VACUUM, not here.
- *
- *     We violate no-overwrite semantics here by storing new values for
- *     the dispersion column directly into the pg_attribute tuple that's
- *     already on the page.  The reason for this is that if we updated
- *     these tuples in the usual way, vacuuming pg_attribute itself
- *     wouldn't work very well --- by the time we got done with a vacuum
- *     cycle, most of the tuples in pg_attribute would've been obsoleted.
- *     Updating pg_attribute's own statistics would be especially tricky.
- *     Of course, this only works for fixed-size never-null columns, but
- *     dispersion is.
+ * We use this when we can find "=" and "<" operators for the datatype.
  *
- *     pg_statistic rows are just added normally.  This means that
- *     pg_statistic will probably contain some deleted rows at the
- *     completion of a vacuum cycle, unless it happens to get vacuumed last.
+ * We determine the fraction of non-null rows, the average width, the
+ * most common values, the (estimated) number of distinct values, the
+ * distribution histogram, and the correlation of physical to logical order.
  *
- *     To keep things simple, we punt for pg_statistic, and don't try
- *     to compute or store rows for pg_statistic itself in pg_statistic.
- *     This could possibly be made to work, but it's not worth the trouble.
+ * The desired stats can be determined fairly easily after sorting the
+ * data values into order.
  */
 static void
-update_attstats(Oid relid, int natts, VacAttrStats *vacattrstats)
+compute_scalar_stats(VacAttrStats *stats,
+                    TupleDesc tupDesc, long totalrows,
+                    HeapTuple *rows, int numrows)
 {
-   Relation    ad,
-               sd;
-   HeapScanDesc scan;
-   HeapTuple   atup,
-               stup;
-   ScanKeyData askey;
-   Form_pg_attribute attp;
-
-   ad = heap_openr(AttributeRelationName, RowExclusiveLock);
-   sd = heap_openr(StatisticRelationName, RowExclusiveLock);
-
-   /* Find pg_attribute rows for this relation */
-   ScanKeyEntryInitialize(&askey, 0, Anum_pg_attribute_attrelid,
-                          F_INT4EQ, relid);
-
-   scan = heap_beginscan(ad, false, SnapshotNow, 1, &askey);
-
-   while (HeapTupleIsValid(atup = heap_getnext(scan, 0)))
+   int         i;
+   int         null_cnt = 0;
+   int         nonnull_cnt = 0;
+   int         toowide_cnt = 0;
+   double      total_width = 0;
+   bool        is_varlena = (!stats->attr->attbyval &&
+                             stats->attr->attlen == -1);
+   double      corr_xysum;
+   RegProcedure cmpFn;
+   SortFunctionKind cmpFnKind;
+   FmgrInfo    f_cmpfn;
+   ScalarItem *values;
+   int         values_cnt = 0;
+   int        *tupnoLink;
+   ScalarMCVItem *track;
+   int         track_cnt = 0;
+   int         num_mcv = stats->attr->attstattarget;
+
+   values = (ScalarItem *) palloc(numrows * sizeof(ScalarItem));
+   tupnoLink = (int *) palloc(numrows * sizeof(int));
+   track = (ScalarMCVItem *) palloc(num_mcv * sizeof(ScalarMCVItem));
+
+   SelectSortFunction(stats->ltopr, &cmpFn, &cmpFnKind);
+   fmgr_info(cmpFn, &f_cmpfn);
+
+   /* Initial scan to find sortable values */
+   for (i = 0; i < numrows; i++)
    {
-       int         i;
-       VacAttrStats *stats;
+       HeapTuple   tuple = rows[i];
+       Datum       value;
+       bool        isnull;
 
-       attp = (Form_pg_attribute) GETSTRUCT(atup);
-       if (attp->attnum <= 0)  /* skip system attributes for now */
-           continue;
+       value = heap_getattr(tuple, stats->attnum, tupDesc, &isnull);
 
-       for (i = 0; i < natts; i++)
+       /* Check for null/nonnull */
+       if (isnull)
        {
-           if (attp->attnum == vacattrstats[i].attr->attnum)
-               break;
+           null_cnt++;
+           continue;
        }
-       if (i >= natts)
-           continue;           /* skip attr if no stats collected */
-       stats = &(vacattrstats[i]);
+       nonnull_cnt++;
 
-       if (VacAttrStatsEqValid(stats))
+       /*
+        * If it's a varlena field, add up widths for average width
+        * calculation.  Note that if the value is toasted, we
+        * use the toasted width.  We don't bother with this calculation
+        * if it's a fixed-width type.
+        */
+       if (is_varlena)
        {
-           float4      selratio;       /* average ratio of rows selected
-                                        * for a random constant */
-
-           /* Compute dispersion */
-           if (stats->nonnull_cnt == 0 && stats->null_cnt == 0)
+           total_width += VARSIZE(DatumGetPointer(value));
+           /*
+            * If the value is toasted, we want to detoast it just once to
+            * avoid repeated detoastings and resultant excess memory usage
+            * during the comparisons.  Also, check to see if the value is
+            * excessively wide, and if so don't detoast at all --- just
+            * ignore the value.
+            */
+           if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
            {
-
-               /*
-                * empty relation, so put a dummy value in attdispersion
-                */
-               selratio = 0;
+               toowide_cnt++;
+               continue;
            }
-           else if (stats->null_cnt <= 1 && stats->best_cnt == 1)
-           {
+           value = PointerGetDatum(PG_DETOAST_DATUM(value));
+       }
 
-               /*
-                * looks like we have a unique-key attribute --- flag this
-                * with special -1.0 flag value.
-                *
-                * The correct dispersion is 1.0/numberOfRows, but since the
-                * relation row count can get updated without recomputing
-                * dispersion, we want to store a "symbolic" value and
-                * figure 1.0/numberOfRows on the fly.
-                */
-               selratio = -1;
-           }
-           else
+       /* Add it to the list to be sorted */
+       values[values_cnt].value = value;
+       values[values_cnt].tupno = values_cnt;
+       tupnoLink[values_cnt] = values_cnt;
+       values_cnt++;
+   }
+
+   /* We can only compute valid stats if we found some sortable values. */
+   if (values_cnt > 0)
+   {
+       int     ndistinct,      /* # distinct values in sample */
+               nmultiple,      /* # that appear multiple times */
+               num_hist,
+               dups_cnt;
+       int     slot_idx = 0;
+
+       /* Sort the collected values */
+       datumCmpFn = &f_cmpfn;
+       datumCmpFnKind = cmpFnKind;
+       datumCmpTupnoLink = tupnoLink;
+       qsort((void *) values, values_cnt,
+             sizeof(ScalarItem), compare_scalars);
+
+       /*
+        * Now scan the values in order, find the most common ones,
+        * and also accumulate ordering-correlation statistics.
+        *
+        * To determine which are most common, we first have to count the
+        * number of duplicates of each value.  The duplicates are adjacent
+        * in the sorted list, so a brute-force approach is to compare
+        * successive datum values until we find two that are not equal.
+        * However, that requires N-1 invocations of the datum comparison
+        * routine, which are completely redundant with work that was done
+        * during the sort.  (The sort algorithm must at some point have
+        * compared each pair of items that are adjacent in the sorted order;
+        * otherwise it could not know that it's ordered the pair correctly.)
+        * We exploit this by having compare_scalars remember the highest
+        * tupno index that each ScalarItem has been found equal to.  At the
+        * end of the sort, a ScalarItem's tupnoLink will still point to
+        * itself if and only if it is the last item of its group of
+        * duplicates (since the group will be ordered by tupno).
+        */
+       corr_xysum = 0;
+       ndistinct = 0;
+       nmultiple = 0;
+       dups_cnt = 0;
+       for (i = 0; i < values_cnt; i++)
+       {
+           int         tupno = values[i].tupno;
+
+           corr_xysum += (double) i * (double) tupno;
+           dups_cnt++;
+           if (tupnoLink[tupno] == tupno)
            {
-               if (VacAttrStatsLtGtValid(stats) &&
-                   stats->min_cnt + stats->max_cnt == stats->nonnull_cnt)
+               /* Reached end of duplicates of this value */
+               ndistinct++;
+               if (dups_cnt > 1)
                {
+                   nmultiple++;
+                   if (track_cnt < num_mcv ||
+                       dups_cnt > track[track_cnt-1].count)
+                   {
+                       /*
+                        * Found a new item for the mcv list; find its
+                        * position, bubbling down old items if needed.
+                        * Loop invariant is that j points at an empty/
+                        * replaceable slot.
+                        */
+                       int     j;
+
+                       if (track_cnt < num_mcv)
+                           track_cnt++;
+                       for (j = track_cnt-1; j > 0; j--)
+                       {
+                           if (dups_cnt <= track[j-1].count)
+                               break;
+                           track[j].count = track[j-1].count;
+                           track[j].first = track[j-1].first;
+                       }
+                       track[j].count = dups_cnt;
+                       track[j].first = i + 1 - dups_cnt;
+                   }
+               }
+               dups_cnt = 0;
+           }
+       }
 
-                   /*
-                    * exact result when there are just 1 or 2 values...
-                    */
-                   double      min_cnt_d = stats->min_cnt,
-                               max_cnt_d = stats->max_cnt,
-                               null_cnt_d = stats->null_cnt;
-                   double      total = ((double) stats->nonnull_cnt) + null_cnt_d;
+       stats->stats_valid = true;
+       /* Do the simple null-frac and width stats */
+       stats->stanullfrac = (double) null_cnt / (double) numrows;
+       if (is_varlena)
+           stats->stawidth = total_width / (double) nonnull_cnt;
+       else
+           stats->stawidth = stats->attrtype->typlen;
 
-                   selratio = (min_cnt_d * min_cnt_d + max_cnt_d * max_cnt_d + null_cnt_d * null_cnt_d) / (total * total);
-               }
-               else
-               {
-                   double      most = (double) (stats->best_cnt > stats->null_cnt ? stats->best_cnt : stats->null_cnt);
-                   double      total = ((double) stats->nonnull_cnt) + ((double) stats->null_cnt);
+       if (nmultiple == 0)
+       {
+           /* If we found no repeated values, assume it's a unique column */
+           stats->stadistinct = -1.0;
+       }
+       else if (toowide_cnt == 0 && nmultiple == ndistinct)
+       {
+           /*
+            * Every value in the sample appeared more than once.  Assume the
+            * column has just these values.
+            */
+           stats->stadistinct = ndistinct;
+       }
+       else
+       {
+           /*----------
+            * Estimate the number of distinct values using the estimator
+            * proposed by Chaudhuri et al (see citation above).  This is
+            *      sqrt(n/r) * max(f1,1) + f2 + f3 + ...
+            * where fk is the number of distinct values that occurred
+            * exactly k times in our sample of r rows (from a total of n).
+            * Overwidth values are assumed to have been distinct.
+            *----------
+            */
+           int     f1 = ndistinct - nmultiple + toowide_cnt;
+           double  term1;
 
-                   /*
-                    * we assume count of other values are 20% of best
-                    * count in table
-                    */
-                   selratio = (most * most + 0.20 * most * (total - most)) / (total * total);
-               }
-               /* Make sure calculated values are in-range */
-               if (selratio < 0.0)
-                   selratio = 0.0;
-               else if (selratio > 1.0)
-                   selratio = 1.0;
+           if (f1 < 1)
+               f1 = 1;
+           term1 = sqrt((double) totalrows / (double) numrows) * f1;
+           stats->stadistinct = floor(term1 + nmultiple + 0.5);
+       }
+
+       /*
+        * If we estimated the number of distinct values at more than 10%
+        * of the total row count (a very arbitrary limit), then assume
+        * that stadistinct should scale with the row count rather than be
+        * a fixed value.
+        */
+       if (stats->stadistinct > 0.1 * totalrows)
+           stats->stadistinct = - (stats->stadistinct / totalrows);
+
+       /* Generate an MCV slot entry, only if we found multiples */
+       if (nmultiple < num_mcv)
+           num_mcv = nmultiple;
+       Assert(track_cnt >= num_mcv);
+       if (num_mcv > 0)
+       {
+           MemoryContext old_context;
+           Datum  *mcv_values;
+           float4 *mcv_freqs;
+
+           /* Must copy the target values into TransactionCommandContext */
+           old_context = MemoryContextSwitchTo(TransactionCommandContext);
+           mcv_values = (Datum *) palloc(num_mcv * sizeof(Datum));
+           mcv_freqs = (float4 *) palloc(num_mcv * sizeof(float4));
+           for (i = 0; i < num_mcv; i++)
+           {
+               mcv_values[i] = datumCopy(values[track[i].first].value,
+                                         stats->attr->attbyval,
+                                         stats->attr->attlen);
+               mcv_freqs[i] = (double) track[i].count / (double) numrows;
            }
+           MemoryContextSwitchTo(old_context);
+
+           stats->stakind[slot_idx] = STATISTIC_KIND_MCV;
+           stats->staop[slot_idx] = stats->eqopr;
+           stats->stanumbers[slot_idx] = mcv_freqs;
+           stats->numnumbers[slot_idx] = num_mcv;
+           stats->stavalues[slot_idx] = mcv_values;
+           stats->numvalues[slot_idx] = num_mcv;
+           slot_idx++;
+       }
 
-           /* overwrite the existing statistics in the tuple */
-           attp->attdispersion = selratio;
+       /*
+        * Generate a histogram slot entry if there are at least two
+        * distinct values not accounted for in the MCV list.  (This
+        * ensures the histogram won't collapse to empty or a singleton.)
+        */
+       num_hist = ndistinct - num_mcv;
+       if (num_hist > stats->attr->attstattarget)
+           num_hist = stats->attr->attstattarget + 1;
+       if (num_hist >= 2)
+       {
+           MemoryContext old_context;
+           Datum  *hist_values;
+           int     nvals;
 
-           /* invalidate the tuple in the cache and write the buffer */
-           RelationInvalidateHeapTuple(ad, atup);
-           WriteNoReleaseBuffer(scan->rs_cbuf);
+           /* Sort the MCV items into position order to speed next loop */
+           qsort((void *) track, num_mcv,
+                 sizeof(ScalarMCVItem), compare_mcvs);
 
            /*
-            * Create pg_statistic tuples for the relation, if we have
-            * gathered the right data.  del_stats() previously deleted
-            * all the pg_statistic tuples for the rel, so we just have to
-            * insert new ones here.
+            * Collapse out the MCV items from the values[] array.
             *
-            * Note analyze_rel() has seen to it that we won't come here when
-            * vacuuming pg_statistic itself.
+            * Note we destroy the values[] array here... but we don't need
+            * it for anything more.  We do, however, still need values_cnt.
             */
-           if (VacAttrStatsLtGtValid(stats) && stats->initialized)
+           if (num_mcv > 0)
            {
-               float4      nullratio;
-               float4      bestratio;
-               FmgrInfo    out_function;
-               char       *out_string;
-               double      best_cnt_d = stats->best_cnt,
-                           null_cnt_d = stats->null_cnt,
-                           nonnull_cnt_d = stats->nonnull_cnt; /* prevent overflow */
-               Datum       values[Natts_pg_statistic];
-               char        nulls[Natts_pg_statistic];
-               Relation    irelations[Num_pg_statistic_indices];
+               int     src,
+                       dest;
+               int     j;
 
-               nullratio = null_cnt_d / (nonnull_cnt_d + null_cnt_d);
-               bestratio = best_cnt_d / (nonnull_cnt_d + null_cnt_d);
-
-               fmgr_info(stats->outfunc, &out_function);
+               src = dest = 0;
+               j = 0;          /* index of next interesting MCV item */
+               while (src < values_cnt)
+               {
+                   int     ncopy;
+
+                   if (j < num_mcv)
+                   {
+                       int     first = track[j].first;
+
+                       if (src >= first)
+                       {
+                           /* advance past this MCV item */
+                           src = first + track[j].count;
+                           j++;
+                           continue;
+                       }
+                       ncopy = first - src;
+                   }
+                   else
+                   {
+                       ncopy = values_cnt - src;
+                   }
+                   memmove(&values[dest], &values[src],
+                           ncopy * sizeof(ScalarItem));
+                   src += ncopy;
+                   dest += ncopy;
+               }
+               nvals = dest;
+           }
+           else
+               nvals = values_cnt;
+           Assert(nvals >= num_hist);
 
-               for (i = 0; i < Natts_pg_statistic; ++i)
-                   nulls[i] = ' ';
+           /* Must copy the target values into TransactionCommandContext */
+           old_context = MemoryContextSwitchTo(TransactionCommandContext);
+           hist_values = (Datum *) palloc(num_hist * sizeof(Datum));
+           for (i = 0; i < num_hist; i++)
+           {
+               int     pos;
 
-               /*
-                * initialize values[]
-                */
-               i = 0;
-               values[i++] = ObjectIdGetDatum(relid);  /* starelid */
-               values[i++] = Int16GetDatum(attp->attnum);      /* staattnum */
-               values[i++] = ObjectIdGetDatum(stats->op_cmplt);        /* staop */
-               values[i++] = Float4GetDatum(nullratio);        /* stanullfrac */
-               values[i++] = Float4GetDatum(bestratio);        /* stacommonfrac */
-               out_string = DatumGetCString(FunctionCall3(&out_function,
-                                                          stats->best,
-                                       ObjectIdGetDatum(stats->typelem),
-                                Int32GetDatum(stats->attr->atttypmod)));
-               values[i++] = DirectFunctionCall1(textin,       /* stacommonval */
-                                           CStringGetDatum(out_string));
-               pfree(out_string);
-               out_string = DatumGetCString(FunctionCall3(&out_function,
-                                                          stats->min,
-                                       ObjectIdGetDatum(stats->typelem),
-                                Int32GetDatum(stats->attr->atttypmod)));
-               values[i++] = DirectFunctionCall1(textin,       /* staloval */
-                                           CStringGetDatum(out_string));
-               pfree(out_string);
-               out_string = DatumGetCString(FunctionCall3(&out_function,
-                                                          stats->max,
-                                       ObjectIdGetDatum(stats->typelem),
-                                Int32GetDatum(stats->attr->atttypmod)));
-               values[i++] = DirectFunctionCall1(textin,       /* stahival */
-                                           CStringGetDatum(out_string));
-               pfree(out_string);
-
-               stup = heap_formtuple(sd->rd_att, values, nulls);
-
-               /* store tuple and update indexes too */
-               heap_insert(sd, stup);
-
-               CatalogOpenIndices(Num_pg_statistic_indices, Name_pg_statistic_indices, irelations);
-               CatalogIndexInsert(irelations, Num_pg_statistic_indices, sd, stup);
-               CatalogCloseIndices(Num_pg_statistic_indices, irelations);
-
-               /* release allocated space */
-               pfree(DatumGetPointer(values[Anum_pg_statistic_stacommonval - 1]));
-               pfree(DatumGetPointer(values[Anum_pg_statistic_staloval - 1]));
-               pfree(DatumGetPointer(values[Anum_pg_statistic_stahival - 1]));
-               heap_freetuple(stup);
+               pos = (i * (nvals - 1)) / (num_hist - 1);
+               hist_values[i] = datumCopy(values[pos].value,
+                                          stats->attr->attbyval,
+                                          stats->attr->attlen);
            }
+           MemoryContextSwitchTo(old_context);
+
+           stats->stakind[slot_idx] = STATISTIC_KIND_HISTOGRAM;
+           stats->staop[slot_idx] = stats->ltopr;
+           stats->stavalues[slot_idx] = hist_values;
+           stats->numvalues[slot_idx] = num_hist;
+           slot_idx++;
+       }
+
+       /* Generate a correlation entry if there are multiple values */
+       if (values_cnt > 1)
+       {
+           MemoryContext old_context;
+           float4 *corrs;
+           double  corr_xsum,
+                   corr_x2sum;
+
+           /* Must copy the target values into TransactionCommandContext */
+           old_context = MemoryContextSwitchTo(TransactionCommandContext);
+           corrs = (float4 *) palloc(sizeof(float4));
+           MemoryContextSwitchTo(old_context);
+
+           /*----------
+            * Since we know the x and y value sets are both
+            *      0, 1, ..., values_cnt-1
+            * we have sum(x) = sum(y) =
+            *      (values_cnt-1)*values_cnt / 2
+            * and sum(x^2) = sum(y^2) =
+            *      (values_cnt-1)*values_cnt*(2*values_cnt-1) / 6.
+            *----------
+            */
+           corr_xsum = (double) (values_cnt-1) * (double) values_cnt / 2.0;
+           corr_x2sum = (double) (values_cnt-1) * (double) values_cnt *
+               (double) (2*values_cnt-1) / 6.0;
+           /* And the correlation coefficient reduces to */
+           corrs[0] = (values_cnt * corr_xysum - corr_xsum * corr_xsum) /
+               (values_cnt * corr_x2sum - corr_xsum * corr_xsum);
+
+           stats->stakind[slot_idx] = STATISTIC_KIND_CORRELATION;
+           stats->staop[slot_idx] = stats->ltopr;
+           stats->stanumbers[slot_idx] = corrs;
+           stats->numnumbers[slot_idx] = 1;
+           slot_idx++;
        }
    }
-   heap_endscan(scan);
-   /* close rels, but hold locks till upcoming commit */
-   heap_close(ad, NoLock);
-   heap_close(sd, NoLock);
+
+   /* We don't need to bother cleaning up any of our temporary palloc's */
 }
 
 /*
- * del_stats() -- delete pg_statistic rows for a relation
+ * qsort comparator for sorting ScalarItems
  *
- * If a list of attribute numbers is given, only zap stats for those attrs.
+ * Aside from sorting the items, we update the datumCmpTupnoLink[] array
+ * whenever two ScalarItems are found to contain equal datums.  The array
+ * is indexed by tupno; for each ScalarItem, it contains the highest
+ * tupno that that item's datum has been found to be equal to.  This allows
+ * us to avoid additional comparisons in compute_scalar_stats().
  */
-static void
-del_stats(Oid relid, int attcnt, int *attnums)
+static int
+compare_scalars(const void *a, const void *b)
 {
-   Relation    pgstatistic;
-   HeapScanDesc scan;
-   HeapTuple   tuple;
-   ScanKeyData key;
+   Datum       da = ((ScalarItem *) a)->value;
+   int         ta = ((ScalarItem *) a)->tupno;
+   Datum       db = ((ScalarItem *) b)->value;
+   int         tb = ((ScalarItem *) b)->tupno;
 
-   pgstatistic = heap_openr(StatisticRelationName, RowExclusiveLock);
+   if (datumCmpFnKind == SORTFUNC_LT)
+   {
+       if (DatumGetBool(FunctionCall2(datumCmpFn, da, db)))
+           return -1;          /* a < b */
+       if (DatumGetBool(FunctionCall2(datumCmpFn, db, da)))
+           return 1;           /* a > b */
+   }
+   else
+   {
+       /* sort function is CMP or REVCMP */
+       int32   compare;
 
-   ScanKeyEntryInitialize(&key, 0x0, Anum_pg_statistic_starelid,
-                          F_OIDEQ, ObjectIdGetDatum(relid));
-   scan = heap_beginscan(pgstatistic, false, SnapshotNow, 1, &key);
+       compare = DatumGetInt32(FunctionCall2(datumCmpFn, da, db));
+       if (compare != 0)
+       {
+           if (datumCmpFnKind == SORTFUNC_REVCMP)
+               compare = -compare;
+           return compare;
+       }
+   }
 
-   while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
+   /*
+    * The two datums are equal, so update datumCmpTupnoLink[].
+    */
+   if (datumCmpTupnoLink[ta] < tb)
+       datumCmpTupnoLink[ta] = tb;
+   if (datumCmpTupnoLink[tb] < ta)
+       datumCmpTupnoLink[tb] = ta;
+
+   /*
+    * For equal datums, sort by tupno
+    */
+   return ta - tb;
+}
+
+/*
+ * qsort comparator for sorting ScalarMCVItems by position
+ */
+static int
+compare_mcvs(const void *a, const void *b)
+{
+   int         da = ((ScalarMCVItem *) a)->first;
+   int         db = ((ScalarMCVItem *) b)->first;
+
+   return da - db;
+}
+
+
+/*
+ * update_attstats() -- update attribute statistics for one relation
+ *
+ *     Statistics are stored in several places: the pg_class row for the
+ *     relation has stats about the whole relation, and there is a
+ *     pg_statistic row for each (non-system) attribute that has ever
+ *     been analyzed.  The pg_class values are updated by VACUUM, not here.
+ *
+ *     pg_statistic rows are just added or updated normally.  This means
+ *     that pg_statistic will probably contain some deleted rows at the
+ *     completion of a vacuum cycle, unless it happens to get vacuumed last.
+ *
+ *     To keep things simple, we punt for pg_statistic, and don't try
+ *     to compute or store rows for pg_statistic itself in pg_statistic.
+ *     This could possibly be made to work, but it's not worth the trouble.
+ *     Note analyze_rel() has seen to it that we won't come here when
+ *     vacuuming pg_statistic itself.
+ */
+static void
+update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats)
+{
+   Relation    sd;
+   int         attno;
+
+   /*
+    * We use an ExclusiveLock on pg_statistic to ensure that only one
+    * backend is writing it at a time --- without that, we might have to
+    * deal with concurrent updates here, and it's not worth the trouble.
+    */
+   sd = heap_openr(StatisticRelationName, ExclusiveLock);
+
+   for (attno = 0; attno < natts; attno++)
    {
-       if (attcnt > 0)
+       VacAttrStats *stats = vacattrstats[attno];
+       FmgrInfo    out_function;
+       HeapTuple   stup,
+                   oldtup;
+       int         i, k, n;
+       Datum       values[Natts_pg_statistic];
+       char        nulls[Natts_pg_statistic];
+       char        replaces[Natts_pg_statistic];
+       Relation    irelations[Num_pg_statistic_indices];
+
+       /* Ignore attr if we weren't able to collect stats */
+       if (!stats->stats_valid)
+           continue;
+
+       fmgr_info(stats->attrtype->typoutput, &out_function);
+
+       /*
+        * Construct a new pg_statistic tuple
+        */
+       for (i = 0; i < Natts_pg_statistic; ++i)
        {
-           Form_pg_statistic pgs = (Form_pg_statistic) GETSTRUCT(tuple);
-           int         i;
+           nulls[i] = ' ';
+           replaces[i] = 'r';
+       }
 
-           for (i = 0; i < attcnt; i++)
+       i = 0;
+       values[i++] = ObjectIdGetDatum(relid); /* starelid */
+       values[i++] = Int16GetDatum(stats->attnum); /* staattnum */
+       values[i++] = Float4GetDatum(stats->stanullfrac); /* stanullfrac */
+       values[i++] = Int32GetDatum(stats->stawidth); /* stawidth */
+       values[i++] = Float4GetDatum(stats->stadistinct); /* stadistinct */
+       for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+       {
+           values[i++] = Int16GetDatum(stats->stakind[k]); /* stakindN */
+       }
+       for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+       {
+           values[i++] = ObjectIdGetDatum(stats->staop[k]); /* staopN */
+       }
+       for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+       {
+           int     nnum = stats->numnumbers[k];
+
+           if (nnum > 0)
            {
-               if (pgs->staattnum == attnums[i] + 1)
-                   break;
+               Datum      *numdatums = (Datum *) palloc(nnum * sizeof(Datum));
+               ArrayType  *arry;
+
+               for (n = 0; n < nnum; n++)
+                   numdatums[n] = Float4GetDatum(stats->stanumbers[k][n]);
+               /* XXX knows more than it should about type float4: */
+               arry = construct_array(numdatums, nnum,
+                                      false, sizeof(float4), 'i');
+               values[i++] = PointerGetDatum(arry); /* stanumbersN */
+           }
+           else
+           {
+               nulls[i] = 'n';
+               values[i++] = (Datum) 0;
            }
-           if (i >= attcnt)
-               continue;       /* don't delete it */
        }
-       simple_heap_delete(pgstatistic, &tuple->t_self);
-   }
+       for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+       {
+           int     ntxt = stats->numvalues[k];
 
-   heap_endscan(scan);
+           if (ntxt > 0)
+           {
+               Datum      *txtdatums = (Datum *) palloc(ntxt * sizeof(Datum));
+               ArrayType  *arry;
 
-   /*
-    * Close rel, but *keep* lock; we will need to reacquire it later, so
-    * there's a possibility of deadlock against another VACUUM process if
-    * we let go now.  Keeping the lock shouldn't delay any common
-    * operation other than an attempted VACUUM of pg_statistic itself.
-    */
-   heap_close(pgstatistic, NoLock);
+               for (n = 0; n < ntxt; n++)
+               {
+                   /*
+                    * Convert data values to a text string to be inserted
+                    * into the text array.
+                    */
+                   Datum   stringdatum;
+
+                   stringdatum =
+                       FunctionCall3(&out_function,
+                                     stats->stavalues[k][n],
+                                     ObjectIdGetDatum(stats->attrtype->typelem),
+                                     Int32GetDatum(stats->attr->atttypmod));
+                   txtdatums[n] = DirectFunctionCall1(textin, stringdatum);
+                   pfree(DatumGetPointer(stringdatum));
+               }
+               /* XXX knows more than it should about type text: */
+               arry = construct_array(txtdatums, ntxt,
+                                      false, -1, 'i');
+               values[i++] = PointerGetDatum(arry); /* stavaluesN */
+           }
+           else
+           {
+               nulls[i] = 'n';
+               values[i++] = (Datum) 0;
+           }
+       }
+
+       /* Is there already a pg_statistic tuple for this attribute? */
+       oldtup = SearchSysCache(STATRELATT,
+                               ObjectIdGetDatum(relid),
+                               Int16GetDatum(stats->attnum),
+                               0, 0);
+
+       if (HeapTupleIsValid(oldtup))
+       {
+           /* Yes, replace it */
+           stup = heap_modifytuple(oldtup,
+                                   sd,
+                                   values,
+                                   nulls,
+                                   replaces);
+           ReleaseSysCache(oldtup);
+           simple_heap_update(sd, &stup->t_self, stup);
+       }
+       else
+       {
+           /* No, insert new tuple */
+           stup = heap_formtuple(sd->rd_att, values, nulls);
+           heap_insert(sd, stup);
+       }
+
+       /* update indices too */
+       CatalogOpenIndices(Num_pg_statistic_indices, Name_pg_statistic_indices,
+                          irelations);
+       CatalogIndexInsert(irelations, Num_pg_statistic_indices, sd, stup);
+       CatalogCloseIndices(Num_pg_statistic_indices, irelations);
+
+       heap_freetuple(stup);
+   }
+
+   /* close rel, but hold lock till upcoming commit */
+   heap_close(sd, NoLock);
 }


diff --git a/src/backend/commands/command.c b/src/backend/commands/command.c

index 96d493688e328aaa1fc4bf56bc12e18865f2ee33..13a78f1177390f0108702c94a7cc005e0a28b183 100644 (file)


--- a/src/backend/commands/command.c
+++ b/src/backend/commands/command.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/commands/Attic/command.c,v 1.125 2001/03/23 04:49:52 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/commands/Attic/command.c,v 1.126 2001/05/07 00:43:17 tgl Exp $
  *
  * NOTES
  *   The PerformAddAttribute() code, like most of the relation
@@ -56,6 +56,7 @@
 #include "access/genam.h"
 
 
+static void drop_default(Oid relid, int16 attnum);
 static bool needs_toast_table(Relation rel);
 static bool is_relation(char *name);
 
@@ -408,7 +409,7 @@ AlterTableAddColumn(const char *relationName,
        HeapTuple   typeTuple;
        Form_pg_type tform;
        char       *typename;
-       int         attnelems;
+       int         attndims;
 
        if (SearchSysCacheExists(ATTNAME,
                                 ObjectIdGetDatum(reltup->t_data->t_oid),
@@ -425,11 +426,11 @@ AlterTableAddColumn(const char *relationName,
 
        if (colDef->typename->arrayBounds)
        {
-           attnelems = length(colDef->typename->arrayBounds);
+           attndims = length(colDef->typename->arrayBounds);
            typename = makeArrayTypeName(colDef->typename->name);
        }
        else
-           attnelems = 0;
+           attndims = 0;
 
        typeTuple = SearchSysCache(TYPENAME,
                                   PointerGetDatum(typename),
@@ -441,12 +442,12 @@ AlterTableAddColumn(const char *relationName,
        namestrcpy(&(attribute->attname), colDef->colname);
        attribute->atttypid = typeTuple->t_data->t_oid;
        attribute->attlen = tform->typlen;
-       attribute->attdispersion = 0;
+       attribute->attstattarget = DEFAULT_ATTSTATTARGET;
        attribute->attcacheoff = -1;
        attribute->atttypmod = colDef->typename->typmod;
        attribute->attnum = i;
        attribute->attbyval = tform->typbyval;
-       attribute->attnelems = attnelems;
+       attribute->attndims = attndims;
        attribute->attisset = (bool) (tform->typtype == 'c');
        attribute->attstorage = tform->typstorage;
        attribute->attalign = tform->typalign;
@@ -496,17 +497,13 @@ AlterTableAddColumn(const char *relationName,
 }
 
 
-
-static void drop_default(Oid relid, int16 attnum);
-
-
 /*
  * ALTER TABLE ALTER COLUMN SET/DROP DEFAULT
  */
 void
-AlterTableAlterColumn(const char *relationName,
-                     bool inh, const char *colName,
-                     Node *newDefault)
+AlterTableAlterColumnDefault(const char *relationName,
+                            bool inh, const char *colName,
+                            Node *newDefault)
 {
    Relation    rel;
    HeapTuple   tuple;
@@ -551,8 +548,8 @@ AlterTableAlterColumn(const char *relationName,
            if (childrelid == myrelid)
                continue;
            rel = heap_open(childrelid, AccessExclusiveLock);
-           AlterTableAlterColumn(RelationGetRelationName(rel),
-                                 false, colName, newDefault);
+           AlterTableAlterColumnDefault(RelationGetRelationName(rel),
+                                        false, colName, newDefault);
            heap_close(rel, AccessExclusiveLock);
        }
    }
@@ -560,7 +557,7 @@ AlterTableAlterColumn(const char *relationName,
    /* -= now do the thing on this relation =- */
 
    /* reopen the business */
-   rel = heap_openr((char *) relationName, AccessExclusiveLock);
+   rel = heap_openr(relationName, AccessExclusiveLock);
 
    /*
     * get the number of the attribute
@@ -647,7 +644,6 @@ AlterTableAlterColumn(const char *relationName,
 }
 
 
-
 static void
 drop_default(Oid relid, int16 attnum)
 {
@@ -675,6 +671,104 @@ drop_default(Oid relid, int16 attnum)
 }
 
 
+/*
+ * ALTER TABLE ALTER COLUMN SET STATISTICS
+ */
+void
+AlterTableAlterColumnStatistics(const char *relationName,
+                               bool inh, const char *colName,
+                               Node *statsTarget)
+{
+   Relation    rel;
+   Oid         myrelid;
+   int         newtarget;
+   Relation    attrelation;
+   HeapTuple   tuple;
+
+#ifndef NO_SECURITY
+   if (!pg_ownercheck(GetUserId(), relationName, RELNAME))
+       elog(ERROR, "ALTER TABLE: permission denied");
+#endif
+
+   rel = heap_openr(relationName, AccessExclusiveLock);
+   if (rel->rd_rel->relkind != RELKIND_RELATION)
+       elog(ERROR, "ALTER TABLE: relation \"%s\" is not a table",
+            relationName);
+   myrelid = RelationGetRelid(rel);
+   heap_close(rel, NoLock);    /* close rel, but keep lock! */
+
+   /*
+    * Propagate to children if desired
+    */
+   if (inh)
+   {
+       List       *child,
+                  *children;
+
+       /* this routine is actually in the planner */
+       children = find_all_inheritors(myrelid);
+
+       /*
+        * find_all_inheritors does the recursive search of the
+        * inheritance hierarchy, so all we have to do is process all of
+        * the relids in the list that it returns.
+        */
+       foreach(child, children)
+       {
+           Oid         childrelid = lfirsti(child);
+
+           if (childrelid == myrelid)
+               continue;
+           rel = heap_open(childrelid, AccessExclusiveLock);
+           AlterTableAlterColumnStatistics(RelationGetRelationName(rel),
+                                           false, colName, statsTarget);
+           heap_close(rel, AccessExclusiveLock);
+       }
+   }
+
+   /* -= now do the thing on this relation =- */
+
+   Assert(IsA(statsTarget, Integer));
+   newtarget = intVal(statsTarget);
+
+   /* Limit target to sane range (should we raise an error instead?) */
+   if (newtarget < 0)
+       newtarget = 0;
+   else if (newtarget > 1000)
+       newtarget = 1000;
+
+   attrelation = heap_openr(AttributeRelationName, RowExclusiveLock);
+
+   tuple = SearchSysCacheCopy(ATTNAME,
+                              ObjectIdGetDatum(myrelid),
+                              PointerGetDatum(colName),
+                              0, 0);
+   if (!HeapTupleIsValid(tuple))
+       elog(ERROR, "ALTER TABLE: relation \"%s\" has no column \"%s\"",
+            relationName, colName);
+
+   if (((Form_pg_attribute) GETSTRUCT(tuple))->attnum < 0)
+       elog(ERROR, "ALTER TABLE: cannot change system attribute \"%s\"",
+            colName);
+
+   ((Form_pg_attribute) GETSTRUCT(tuple))->attstattarget = newtarget;
+
+   simple_heap_update(attrelation, &tuple->t_self, tuple);
+
+   /* keep system catalog indices current */
+   {
+       Relation    irelations[Num_pg_attr_indices];
+
+       CatalogOpenIndices(Num_pg_attr_indices, Name_pg_attr_indices, irelations);
+       CatalogIndexInsert(irelations, Num_pg_attr_indices, attrelation, tuple);
+       CatalogCloseIndices(Num_pg_attr_indices, irelations);
+   }
+
+   heap_freetuple(tuple);
+   heap_close(attrelation, RowExclusiveLock);
+}
+
+
 #ifdef _DROP_COLUMN_HACK__
 /*
  * ALTER TABLE DROP COLUMN trial implementation


diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c

index 694d0e8bbc1491c39827ff833f8cc3fb68906e9e..9a0dbdc8c8e15c0b261068728c7d38546e3aa07c 100644 (file)


--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.189 2001/03/25 23:23:58 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.190 2001/05/07 00:43:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -53,25 +53,90 @@ extern XLogRecPtr log_heap_move(Relation reln,
              Buffer oldbuf, ItemPointerData from,
              Buffer newbuf, HeapTuple newtup);
 
+
+typedef struct VRelListData
+{
+   Oid         vrl_relid;
+   struct VRelListData *vrl_next;
+} VRelListData;
+
+typedef VRelListData *VRelList;
+
+typedef struct VacPageData
+{
+   BlockNumber blkno;          /* BlockNumber of this Page */
+   Size        free;           /* FreeSpace on this Page */
+   uint16      offsets_used;   /* Number of OffNums used by vacuum */
+   uint16      offsets_free;   /* Number of OffNums free or to be free */
+   OffsetNumber offsets[1];    /* Array of its OffNums */
+} VacPageData;
+
+typedef VacPageData *VacPage;
+
+typedef struct VacPageListData
+{
+   int         empty_end_pages;/* Number of "empty" end-pages */
+   int         num_pages;      /* Number of pages in pagedesc */
+   int         num_allocated_pages;    /* Number of allocated pages in
+                                        * pagedesc */
+   VacPage    *pagedesc;       /* Descriptions of pages */
+} VacPageListData;
+
+typedef VacPageListData *VacPageList;
+
+typedef struct VTupleLinkData
+{
+   ItemPointerData new_tid;
+   ItemPointerData this_tid;
+} VTupleLinkData;
+
+typedef VTupleLinkData *VTupleLink;
+
+typedef struct VTupleMoveData
+{
+   ItemPointerData tid;        /* tuple ID */
+   VacPage     vacpage;        /* where to move */
+   bool        cleanVpd;       /* clean vacpage before using */
+} VTupleMoveData;
+
+typedef VTupleMoveData *VTupleMove;
+
+typedef struct VRelStats
+{
+   Oid         relid;
+   long        num_pages;
+   long        num_tuples;
+   Size        min_tlen;
+   Size        max_tlen;
+   bool        hasindex;
+   int         num_vtlinks;
+   VTupleLink  vtlinks;
+} VRelStats;
+
+
 static MemoryContext vac_context = NULL;
 
 static int MESSAGE_LEVEL;      /* message level */
 
 static TransactionId XmaxRecent;
 
+
 /* non-export function prototypes */
 static void vacuum_init(void);
 static void vacuum_shutdown(void);
-static void vac_vacuum(NameData *VacRelP, bool analyze, List *anal_cols2);
-static VRelList getrels(NameData *VacRelP);
+static VRelList getrels(Name VacRelP, const char *stmttype);
 static void vacuum_rel(Oid relid);
-static void scan_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages, VacPageList fraged_pages);
-static void repair_frag(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages, VacPageList fraged_pages, int nindices, Relation *Irel);
-static void vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacpagelist);
+static void scan_heap(VRelStats *vacrelstats, Relation onerel,
+                     VacPageList vacuum_pages, VacPageList fraged_pages);
+static void repair_frag(VRelStats *vacrelstats, Relation onerel,
+                       VacPageList vacuum_pages, VacPageList fraged_pages,
+                       int nindices, Relation *Irel);
+static void vacuum_heap(VRelStats *vacrelstats, Relation onerel,
+                       VacPageList vacpagelist);
 static void vacuum_page(Relation onerel, Buffer buffer, VacPage vacpage);
-static void vacuum_index(VacPageList vacpagelist, Relation indrel, int num_tuples, int keep_tuples);
-static void scan_index(Relation indrel, int num_tuples);
-static void update_relstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats *vacrelstats);
+static void vacuum_index(VacPageList vacpagelist, Relation indrel,
+                        long num_tuples, int keep_tuples);
+static void scan_index(Relation indrel, long num_tuples);
 static VacPage tid_reaped(ItemPointer itemptr, VacPageList vacpagelist);
 static void reap_page(VacPageList vacpagelist, VacPage vacpage);
 static void vpage_insert(VacPageList vacpagelist, VacPage vpnew);
@@ -88,17 +153,17 @@ static bool enough_space(VacPage vacpage, Size len);
 static char *show_rusage(struct rusage * ru0);
 
 
+/*
+ * Primary entry point for VACUUM and ANALYZE commands.
+ */
 void
-vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols)
+vacuum(VacuumStmt *vacstmt)
 {
+   const char *stmttype = vacstmt->vacuum ? "VACUUM" : "ANALYZE";
    NameData    VacRel;
    Name        VacRelName;
-   MemoryContext old;
-   List       *le;
-   List       *anal_cols2 = NIL;
-
-   if (anal_cols != NIL && !analyze)
-       elog(ERROR, "Can't vacuum columns, only tables.  You can 'vacuum analyze' columns.");
+   VRelList    vrl,
+               cur;
 
    /*
     * We cannot run VACUUM inside a user transaction block; if we were
@@ -110,9 +175,9 @@ vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols)
     * behavior.
     */
    if (IsTransactionBlock())
-       elog(ERROR, "VACUUM cannot run inside a BEGIN/END block");
+       elog(ERROR, "%s cannot run inside a BEGIN/END block", stmttype);
 
-   if (verbose)
+   if (vacstmt->verbose)
        MESSAGE_LEVEL = NOTICE;
    else
        MESSAGE_LEVEL = DEBUG;
@@ -130,37 +195,36 @@ vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols)
                                        ALLOCSET_DEFAULT_INITSIZE,
                                        ALLOCSET_DEFAULT_MAXSIZE);
 
-   /* vacrel gets de-allocated on xact commit, so copy it to safe storage */
-   if (vacrel)
+   /* Convert vacrel, which is just a string, to a Name */
+   if (vacstmt->vacrel)
    {
-       namestrcpy(&VacRel, vacrel);
+       namestrcpy(&VacRel, vacstmt->vacrel);
        VacRelName = &VacRel;
    }
    else
        VacRelName = NULL;
 
-   /* must also copy the column list, if any, to safe storage */
-   old = MemoryContextSwitchTo(vac_context);
-   foreach(le, anal_cols)
-   {
-       char       *col = (char *) lfirst(le);
-
-       anal_cols2 = lappend(anal_cols2, pstrdup(col));
-   }
-   MemoryContextSwitchTo(old);
+   /* Build list of relations to process (note this lives in vac_context) */
+   vrl = getrels(VacRelName, stmttype);
 
    /*
     * Start up the vacuum cleaner.
-    *
-    * NOTE: since this commits the current transaction, the memory holding
-    * any passed-in parameters gets freed here.  We must have already
-    * copied pass-by-reference parameters to safe storage.  Don't make me
-    * fix this again!
     */
    vacuum_init();
 
-   /* vacuum the database */
-   vac_vacuum(VacRelName, analyze, anal_cols2);
+   /*
+    * Process each selected relation.  We are careful to process
+    * each relation in a separate transaction in order to avoid holding
+    * too many locks at one time.
+    */
+   for (cur = vrl; cur != (VRelList) NULL; cur = cur->vrl_next)
+   {
+       if (vacstmt->vacuum)
+           vacuum_rel(cur->vrl_relid);
+       /* analyze separately so locking is minimized */
+       if (vacstmt->analyze)
+           analyze_rel(cur->vrl_relid, vacstmt);
+   }
 
    /* clean up */
    vacuum_shutdown();
@@ -187,14 +251,14 @@ vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols)
  *     PostgresMain().
  */
 static void
-vacuum_init()
+vacuum_init(void)
 {
    /* matches the StartTransaction in PostgresMain() */
    CommitTransactionCommand();
 }
 
 static void
-vacuum_shutdown()
+vacuum_shutdown(void)
 {
    /* on entry, we are not in a transaction */
 
@@ -223,34 +287,10 @@ vacuum_shutdown()
 }
 
 /*
- * vac_vacuum() -- vacuum the database.
- *
- *     This routine builds a list of relations to vacuum, and then calls
- *     code that vacuums them one at a time.  We are careful to vacuum each
- *     relation in a separate transaction in order to avoid holding too many
- *     locks at one time.
+ * Build a list of VRelListData nodes for each relation to be processed
  */
-static void
-vac_vacuum(NameData *VacRelP, bool analyze, List *anal_cols2)
-{
-   VRelList    vrl,
-               cur;
-
-   /* get list of relations */
-   vrl = getrels(VacRelP);
-
-   /* vacuum each heap relation */
-   for (cur = vrl; cur != (VRelList) NULL; cur = cur->vrl_next)
-   {
-       vacuum_rel(cur->vrl_relid);
-       /* analyze separately so locking is minimized */
-       if (analyze)
-           analyze_rel(cur->vrl_relid, anal_cols2, MESSAGE_LEVEL);
-   }
-}
-
 static VRelList
-getrels(NameData *VacRelP)
+getrels(Name VacRelP, const char *stmttype)
 {
    Relation    rel;
    TupleDesc   tupdesc;
@@ -262,12 +302,9 @@ getrels(NameData *VacRelP)
    char       *rname;
    char        rkind;
    bool        n;
-   bool        found = false;
    ScanKeyData key;
 
-   StartTransactionCommand();
-
-   if (NameStr(*VacRelP))
+   if (VacRelP)
    {
 
        /*
@@ -287,6 +324,7 @@ getrels(NameData *VacRelP)
    }
    else
    {
+       /* find all relations listed in pg_class */
        ScanKeyEntryInitialize(&key, 0x0, Anum_pg_class_relkind,
                               F_CHAREQ, CharGetDatum('r'));
    }
@@ -300,21 +338,20 @@ getrels(NameData *VacRelP)
 
    while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
    {
-       found = true;
-
        d = heap_getattr(tuple, Anum_pg_class_relname, tupdesc, &n);
-       rname = (char *) DatumGetPointer(d);
+       rname = (char *) DatumGetName(d);
 
        d = heap_getattr(tuple, Anum_pg_class_relkind, tupdesc, &n);
        rkind = DatumGetChar(d);
 
        if (rkind != RELKIND_RELATION)
        {
-           elog(NOTICE, "Vacuum: can not process indices, views and certain system tables");
+           elog(NOTICE, "%s: can not process indexes, views or special system tables",
+                stmttype);
            continue;
        }
 
-       /* get a relation list entry for this guy */
+       /* Make a relation list entry for this guy */
        if (vrl == (VRelList) NULL)
            vrl = cur = (VRelList)
                MemoryContextAlloc(vac_context, sizeof(VRelListData));
@@ -332,10 +369,8 @@ getrels(NameData *VacRelP)
    heap_endscan(scan);
    heap_close(rel, AccessShareLock);
 
-   if (!found)
-       elog(NOTICE, "Vacuum: table not found");
-
-   CommitTransactionCommand();
+   if (vrl == NULL)
+       elog(NOTICE, "%s: table not found", stmttype);
 
    return vrl;
 }
@@ -432,7 +467,8 @@ vacuum_rel(Oid relid)
     */
    vacrelstats = (VRelStats *) palloc(sizeof(VRelStats));
    vacrelstats->relid = relid;
-   vacrelstats->num_pages = vacrelstats->num_tuples = 0;
+   vacrelstats->num_pages = 0;
+   vacrelstats->num_tuples = 0;
    vacrelstats->hasindex = false;
 
    GetXmaxRecent(&XmaxRecent);
@@ -457,8 +493,8 @@ vacuum_rel(Oid relid)
        vacrelstats->hasindex = true;
    else
        vacrelstats->hasindex = false;
-#ifdef NOT_USED
 
+#ifdef NOT_USED
    /*
     * reindex in VACUUM is dangerous under WAL. ifdef out until it
     * becomes safe.
@@ -528,9 +564,8 @@ vacuum_rel(Oid relid)
    heap_close(onerel, NoLock);
 
    /* update statistics in pg_class */
-   update_relstats(vacrelstats->relid, vacrelstats->num_pages,
-                   vacrelstats->num_tuples, vacrelstats->hasindex,
-                   vacrelstats);
+   vac_update_relstats(vacrelstats->relid, vacrelstats->num_pages,
+                       vacrelstats->num_tuples, vacrelstats->hasindex);
 
    /*
     * Complete the transaction and free all temporary memory used.
@@ -582,8 +617,8 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
    char       *relname;
    VacPage     vacpage,
                vp;
+   long        num_tuples;
    uint32      tups_vacuumed,
-               num_tuples,
                nkeep,
                nunused,
                ncrash,
@@ -913,7 +948,6 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
    /* save stats in the rel list for use later */
    vacrelstats->num_tuples = num_tuples;
    vacrelstats->num_pages = nblocks;
-/*   vacrelstats->natts = attr_cnt;*/
    if (num_tuples == 0)
        min_tlen = max_tlen = 0;
    vacrelstats->min_tlen = min_tlen;
@@ -960,7 +994,7 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
    }
 
    elog(MESSAGE_LEVEL, "Pages %u: Changed %u, reaped %u, Empty %u, New %u; \
-Tup %u: Vac %u, Keep/VTL %u/%u, Crash %u, UnUsed %u, MinLen %lu, MaxLen %lu; \
+Tup %lu: Vac %u, Keep/VTL %u/%u, Crash %u, UnUsed %u, MinLen %lu, MaxLen %lu; \
 Re-using: Free/Avail. Space %lu/%lu; EndEmpty/Avail. Pages %u/%u. %s",
         nblocks, changed_pages, vacuum_pages->num_pages, empty_pages,
         new_pages, num_tuples, tups_vacuumed,
@@ -2009,7 +2043,7 @@ vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages)
 {
    Buffer      buf;
    VacPage    *vacpage;
-   int         nblocks;
+   long        nblocks;
    int         i;
 
    nblocks = vacuum_pages->num_pages;
@@ -2044,7 +2078,7 @@ vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages)
    /* truncate relation if there are some empty end-pages */
    if (vacuum_pages->empty_end_pages > 0)
    {
-       elog(MESSAGE_LEVEL, "Rel %s: Pages: %u --> %u.",
+       elog(MESSAGE_LEVEL, "Rel %s: Pages: %lu --> %lu.",
             RelationGetRelationName(onerel),
             vacrelstats->num_pages, nblocks);
        nblocks = smgrtruncate(DEFAULT_SMGR, onerel, nblocks);
@@ -2094,11 +2128,11 @@ vacuum_page(Relation onerel, Buffer buffer, VacPage vacpage)
  *
  */
 static void
-scan_index(Relation indrel, int num_tuples)
+scan_index(Relation indrel, long num_tuples)
 {
    RetrieveIndexResult res;
    IndexScanDesc iscan;
-   int         nitups;
+   long        nitups;
    int         nipages;
    struct rusage ru0;
 
@@ -2119,14 +2153,14 @@ scan_index(Relation indrel, int num_tuples)
 
    /* now update statistics in pg_class */
    nipages = RelationGetNumberOfBlocks(indrel);
-   update_relstats(RelationGetRelid(indrel), nipages, nitups, false, NULL);
+   vac_update_relstats(RelationGetRelid(indrel), nipages, nitups, false);
 
-   elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %u. %s",
+   elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %lu. %s",
         RelationGetRelationName(indrel), nipages, nitups,
         show_rusage(&ru0));
 
    if (nitups != num_tuples)
-       elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%u) IS NOT THE SAME AS HEAP' (%u).\
+       elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%lu) IS NOT THE SAME AS HEAP' (%lu).\
 \n\tRecreate the index.",
             RelationGetRelationName(indrel), nitups, num_tuples);
 
@@ -2145,13 +2179,14 @@ scan_index(Relation indrel, int num_tuples)
  *     pg_class.
  */
 static void
-vacuum_index(VacPageList vacpagelist, Relation indrel, int num_tuples, int keep_tuples)
+vacuum_index(VacPageList vacpagelist, Relation indrel,
+            long num_tuples, int keep_tuples)
 {
    RetrieveIndexResult res;
    IndexScanDesc iscan;
    ItemPointer heapptr;
    int         tups_vacuumed;
-   int         num_index_tuples;
+   long        num_index_tuples;
    int         num_pages;
    VacPage     vp;
    struct rusage ru0;
@@ -2196,15 +2231,16 @@ vacuum_index(VacPageList vacpagelist, Relation indrel, int num_tuples, int keep_
 
    /* now update statistics in pg_class */
    num_pages = RelationGetNumberOfBlocks(indrel);
-   update_relstats(RelationGetRelid(indrel), num_pages, num_index_tuples, false, NULL);
+   vac_update_relstats(RelationGetRelid(indrel),
+                       num_pages, num_index_tuples, false);
 
-   elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %u: Deleted %u. %s",
+   elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %lu: Deleted %u. %s",
         RelationGetRelationName(indrel), num_pages,
         num_index_tuples - keep_tuples, tups_vacuumed,
         show_rusage(&ru0));
 
    if (num_index_tuples != num_tuples + keep_tuples)
-       elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%u) IS NOT THE SAME AS HEAP' (%u).\
+       elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%lu) IS NOT THE SAME AS HEAP' (%lu).\
 \n\tRecreate the index.",
          RelationGetRelationName(indrel), num_index_tuples, num_tuples);
 
@@ -2255,7 +2291,7 @@ tid_reaped(ItemPointer itemptr, VacPageList vacpagelist)
 }
 
 /*
- * update_relstats() -- update statistics for one relation
+ * vac_update_relstats() -- update statistics for one relation
  *
  *     Update the whole-relation statistics that are kept in its pg_class
  *     row.  There are additional stats that will be updated if we are
@@ -2268,13 +2304,12 @@ tid_reaped(ItemPointer itemptr, VacPageList vacpagelist)
  *     we updated these tuples in the usual way, vacuuming pg_class itself
  *     wouldn't work very well --- by the time we got done with a vacuum
  *     cycle, most of the tuples in pg_class would've been obsoleted.
- *     Updating pg_class's own statistics would be especially tricky.
  *     Of course, this only works for fixed-size never-null columns, but
  *     these are.
  */
-static void
-update_relstats(Oid relid, int num_pages, int num_tuples, bool hasindex,
-               VRelStats *vacrelstats)
+void
+vac_update_relstats(Oid relid, long num_pages, double num_tuples,
+                   bool hasindex)
 {
    Relation    rd;
    HeapTupleData rtup;


diff --git a/src/backend/executor/nodeSort.c b/src/backend/executor/nodeSort.c

index 12c6f82a8b224c0f773f79c7b53132447467d399..e0543a2810977526886fee0d639ec76cc069463f 100644 (file)


--- a/src/backend/executor/nodeSort.c
+++ b/src/backend/executor/nodeSort.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/executor/nodeSort.c,v 1.32 2001/03/22 06:16:13 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/executor/nodeSort.c,v 1.33 2001/05/07 00:43:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -20,24 +20,24 @@
 #include "utils/tuplesort.h"
 
 /* ----------------------------------------------------------------
- *     FormSortKeys(node)
+ *     ExtractSortKeys
  *
- *     Forms the structure containing information used to sort the relation.
+ *     Extract the sorting key information from the plan node.
  *
- *     Returns an array of ScanKeyData.
+ *     Returns two palloc'd arrays, one of sort operator OIDs and
+ *     one of attribute numbers.
  * ----------------------------------------------------------------
  */
-static ScanKey
-FormSortKeys(Sort *sortnode)
+static void
+ExtractSortKeys(Sort *sortnode,
+               Oid **sortOperators,
+               AttrNumber **attNums)
 {
-   ScanKey     sortkeys;
    List       *targetList;
-   List       *tl;
    int         keycount;
-   Resdom     *resdom;
-   AttrNumber  resno;
-   Index       reskey;
-   Oid         reskeyop;
+   Oid        *sortOps;
+   AttrNumber *attNos;
+   List       *tl;
 
    /*
     * get information from the node
@@ -46,36 +46,33 @@ FormSortKeys(Sort *sortnode)
    keycount = sortnode->keycount;
 
    /*
-    * first allocate space for scan keys
+    * first allocate space for results
     */
    if (keycount <= 0)
-       elog(ERROR, "FormSortKeys: keycount <= 0");
-   sortkeys = (ScanKey) palloc(keycount * sizeof(ScanKeyData));
-   MemSet((char *) sortkeys, 0, keycount * sizeof(ScanKeyData));
+       elog(ERROR, "ExtractSortKeys: keycount <= 0");
+   sortOps = (Oid *) palloc(keycount * sizeof(Oid));
+   MemSet(sortOps, 0, keycount * sizeof(Oid));
+   *sortOperators = sortOps;
+   attNos = (AttrNumber *) palloc(keycount * sizeof(AttrNumber));
+   MemSet(attNos, 0, keycount * sizeof(AttrNumber));
+   *attNums = attNos;
 
    /*
-    * form each scan key from the resdom info in the target list
+    * extract info from the resdom nodes in the target list
     */
    foreach(tl, targetList)
    {
        TargetEntry *target = (TargetEntry *) lfirst(tl);
-
-       resdom = target->resdom;
-       resno = resdom->resno;
-       reskey = resdom->reskey;
-       reskeyop = resdom->reskeyop;
+       Resdom     *resdom = target->resdom;
+       Index       reskey = resdom->reskey;
 
        if (reskey > 0)         /* ignore TLEs that are not sort keys */
        {
-           ScanKeyEntryInitialize(&sortkeys[reskey - 1],
-                                  0x0,
-                                  resno,
-                                  (RegProcedure) reskeyop,
-                                  (Datum) 0);
+           Assert(reskey <= keycount);
+           sortOps[reskey - 1] = resdom->reskeyop;
+           attNos[reskey - 1] = resdom->resno;
        }
    }
-
-   return sortkeys;
 }
 
 /* ----------------------------------------------------------------
@@ -124,8 +121,8 @@ ExecSort(Sort *node)
    {
        Plan       *outerNode;
        TupleDesc   tupDesc;
-       int         keycount;
-       ScanKey     sortkeys;
+       Oid        *sortOperators;
+       AttrNumber *attNums;
 
        SO1_printf("ExecSort: %s\n",
                   "sorting subplan");
@@ -145,14 +142,17 @@ ExecSort(Sort *node)
 
        outerNode = outerPlan((Plan *) node);
        tupDesc = ExecGetTupType(outerNode);
-       keycount = node->keycount;
-       sortkeys = (ScanKey) sortstate->sort_Keys;
 
-       tuplesortstate = tuplesort_begin_heap(tupDesc, keycount, sortkeys,
-                                             true /* randomAccess */ );
+       ExtractSortKeys(node, &sortOperators, &attNums);
 
+       tuplesortstate = tuplesort_begin_heap(tupDesc, node->keycount,
+                                             sortOperators, attNums,
+                                             true /* randomAccess */ );
        sortstate->tuplesortstate = (void *) tuplesortstate;
 
+       pfree(sortOperators);
+       pfree(attNums);
+
        /*
         * Scan the subplan and feed all the tuples to tuplesort.
         */
@@ -230,7 +230,6 @@ ExecInitSort(Sort *node, EState *estate, Plan *parent)
     */
    sortstate = makeNode(SortState);
    sortstate->sort_Done = false;
-   sortstate->sort_Keys = NULL;
    sortstate->tuplesortstate = NULL;
 
    node->sortstate = sortstate;
@@ -258,11 +257,6 @@ ExecInitSort(Sort *node, EState *estate, Plan *parent)
    outerPlan = outerPlan((Plan *) node);
    ExecInitNode(outerPlan, estate, (Plan *) node);
 
-   /*
-    * initialize sortstate information
-    */
-   sortstate->sort_Keys = FormSortKeys(node);
-
    /*
     * initialize tuple type.  no need to initialize projection info
     * because this node doesn't do projections.
@@ -321,9 +315,6 @@ ExecEndSort(Sort *node)
        tuplesort_end((Tuplesortstate *) sortstate->tuplesortstate);
    sortstate->tuplesortstate = NULL;
 
-   if (sortstate->sort_Keys != NULL)
-       pfree(sortstate->sort_Keys);
-
    pfree(sortstate);
    node->sortstate = NULL;
 


diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c

index ad50630931e357a1ca7bae5f806f8cc242062722..ee5a803b8025ac9817834537bb5b4ccd10708527 100644 (file)


--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -15,7 +15,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.140 2001/03/22 06:16:14 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.141 2001/05/07 00:43:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1378,8 +1378,8 @@ _copyRestrictInfo(RestrictInfo *from)
    newnode->left_pathkey = NIL;
    newnode->right_pathkey = NIL;
    newnode->hashjoinoperator = from->hashjoinoperator;
-   newnode->left_dispersion = from->left_dispersion;
-   newnode->right_dispersion = from->right_dispersion;
+   newnode->left_bucketsize = from->left_bucketsize;
+   newnode->right_bucketsize = from->right_bucketsize;
 
    return newnode;
 }
@@ -2209,11 +2209,12 @@ _copyVacuumStmt(VacuumStmt *from)
 {
    VacuumStmt *newnode = makeNode(VacuumStmt);
 
-   newnode->verbose = from->verbose;
+   newnode->vacuum = from->vacuum;
    newnode->analyze = from->analyze;
+   newnode->verbose = from->verbose;
    if (from->vacrel)
        newnode->vacrel = pstrdup(from->vacrel);
-   Node_Copy(from, newnode, va_spec);
+   Node_Copy(from, newnode, va_cols);
 
    return newnode;
 }


diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c

index 06ee63bbacd05398c5445bd4ce4f8dfb169090da..284a534aa966f03a5f69da55e5faa89a96925b1e 100644 (file)


--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -20,7 +20,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.88 2001/03/22 03:59:31 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.89 2001/05/07 00:43:19 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -516,7 +516,7 @@ _equalRestrictInfo(RestrictInfo *a, RestrictInfo *b)
        return false;
 
    /*
-    * ignore eval_cost, left/right_pathkey, and left/right_dispersion,
+    * ignore eval_cost, left/right_pathkey, and left/right_bucketsize,
     * since they may not be set yet, and should be derivable from the
     * clause anyway
     */
@@ -1113,13 +1113,15 @@ _equalDropdbStmt(DropdbStmt *a, DropdbStmt *b)
 static bool
 _equalVacuumStmt(VacuumStmt *a, VacuumStmt *b)
 {
-   if (a->verbose != b->verbose)
+   if (a->vacuum != b->vacuum)
        return false;
    if (a->analyze != b->analyze)
        return false;
+   if (a->verbose != b->verbose)
+       return false;
    if (!equalstr(a->vacrel, b->vacrel))
        return false;
-   if (!equal(a->va_spec, b->va_spec))
+   if (!equal(a->va_cols, b->va_cols))
        return false;
 
    return true;


diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c

index 9a071e7a250df88efe03c183927ffeadfa07a86c..4c0c1b03ef544c60b9161208ceb950a83862419c 100644 (file)


--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.107 2001/03/22 03:59:32 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.108 2001/05/07 00:43:19 tgl Exp $
  *
  * NOTES
  *   Most of the read functions for plan nodes are tested. (In fact, they
@@ -1874,11 +1874,11 @@ _readRestrictInfo(void)
 
    /* eval_cost is not part of saved representation; compute on first use */
    local_node->eval_cost = -1;
-   /* ditto for cached pathkeys and dispersion */
+   /* ditto for cached pathkeys and bucketsize */
    local_node->left_pathkey = NIL;
    local_node->right_pathkey = NIL;
-   local_node->left_dispersion = -1;
-   local_node->right_dispersion = -1;
+   local_node->left_bucketsize = -1;
+   local_node->right_bucketsize = -1;
 
    return local_node;
 }


diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c

index c52af72a16b824c1f37078bb4e185d8a34b22d2b..bdfbbb18186d9c7ef4201fa8eb294bbdb55e298c 100644 (file)


--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -41,7 +41,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.70 2001/04/25 22:04:37 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.71 2001/05/07 00:43:20 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -50,11 +50,15 @@
 
 #include 
 
+#include "catalog/pg_statistic.h"
 #include "executor/nodeHash.h"
 #include "miscadmin.h"
 #include "optimizer/clauses.h"
 #include "optimizer/cost.h"
+#include "optimizer/pathnode.h"
+#include "parser/parsetree.h"
 #include "utils/lsyscache.h"
+#include "utils/syscache.h"
 
 
 /*
@@ -573,7 +577,7 @@ cost_mergejoin(Path *path,
  * 'outer_path' is the path for the outer relation
  * 'inner_path' is the path for the inner relation
  * 'restrictlist' are the RestrictInfo nodes to be applied at the join
- * 'innerdispersion' is an estimate of the dispersion statistic
+ * 'innerbucketsize' is an estimate of the bucketsize statistic
  *             for the inner hash key.
  */
 void
@@ -581,7 +585,7 @@ cost_hashjoin(Path *path,
              Path *outer_path,
              Path *inner_path,
              List *restrictlist,
-             Selectivity innerdispersion)
+             Selectivity innerbucketsize)
 {
    Cost        startup_cost = 0;
    Cost        run_cost = 0;
@@ -607,22 +611,20 @@ cost_hashjoin(Path *path,
 
    /*
     * The number of tuple comparisons needed is the number of outer
-    * tuples times the typical hash bucket size.  nodeHash.c tries for
-    * average bucket loading of NTUP_PER_BUCKET, but that goal will be
-    * reached only if data values are uniformly distributed among the
-    * buckets.  To be conservative, we scale up the target bucket size by
-    * the number of inner rows times inner dispersion, giving an estimate
-    * of the typical number of duplicates of each value. We then charge
-    * one cpu_operator_cost per tuple comparison.
+    * tuples times the typical number of tuples in a hash bucket,
+    * which is the inner relation size times its bucketsize fraction.
+    * We charge one cpu_operator_cost per tuple comparison.
     */
    run_cost += cpu_operator_cost * outer_path->parent->rows *
-       NTUP_PER_BUCKET * ceil(inner_path->parent->rows * innerdispersion);
+       ceil(inner_path->parent->rows * innerbucketsize);
 
    /*
     * Estimate the number of tuples that get through the hashing filter
     * as one per tuple in the two source relations.  This could be a
     * drastic underestimate if there are many equal-keyed tuples in
-    * either relation, but we have no good way of estimating that...
+    * either relation, but we have no simple way of estimating that;
+    * and since this is only a second-order parameter, it's probably
+    * not worth expending a lot of effort on the estimate.
     */
    ntuples = outer_path->parent->rows + inner_path->parent->rows;
 
@@ -651,7 +653,7 @@ cost_hashjoin(Path *path,
    /*
     * Bias against putting larger relation on inside.  We don't want an
     * absolute prohibition, though, since larger relation might have
-    * better dispersion --- and we can't trust the size estimates
+    * better bucketsize --- and we can't trust the size estimates
     * unreservedly, anyway.  Instead, inflate the startup cost by the
     * square root of the size ratio.  (Why square root?  No real good
     * reason, but it seems reasonable...)
@@ -663,6 +665,171 @@ cost_hashjoin(Path *path,
    path->total_cost = startup_cost + run_cost;
 }
 
+/*
+ * Estimate hash bucketsize fraction (ie, number of entries in a bucket
+ * divided by total tuples in relation) if the specified Var is used
+ * as a hash key.
+ *
+ * This statistic is used by cost_hashjoin.  We split out the calculation
+ * because it's useful to cache the result for re-use across multiple path
+ * cost calculations.
+ *
+ * XXX This is really pretty bogus since we're effectively assuming that the
+ * distribution of hash keys will be the same after applying restriction
+ * clauses as it was in the underlying relation.  However, we are not nearly
+ * smart enough to figure out how the restrict clauses might change the
+ * distribution, so this will have to do for now.
+ *
+ * The executor tries for average bucket loading of NTUP_PER_BUCKET by setting
+ * number of buckets equal to ntuples / NTUP_PER_BUCKET, which would yield
+ * a bucketsize fraction of NTUP_PER_BUCKET / ntuples.  But that goal will
+ * be reached only if the data values are uniformly distributed among the
+ * buckets, which requires (a) at least ntuples / NTUP_PER_BUCKET distinct
+ * data values, and (b) a not-too-skewed data distribution.  Otherwise the
+ * buckets will be nonuniformly occupied.  If the other relation in the join
+ * has a similar distribution, the most-loaded buckets are exactly those
+ * that will be probed most often.  Therefore, the "average" bucket size for
+ * costing purposes should really be taken as something close to the "worst
+ * case" bucket size.  We try to estimate this by first scaling up if there
+ * are too few distinct data values, and then scaling up again by the
+ * ratio of the most common value's frequency to the average frequency.
+ *
+ * If no statistics are available, use a default estimate of 0.1.  This will
+ * discourage use of a hash rather strongly if the inner relation is large,
+ * which is what we want.  We do not want to hash unless we know that the
+ * inner rel is well-dispersed (or the alternatives seem much worse).
+ */
+Selectivity
+estimate_hash_bucketsize(Query *root, Var *var)
+{
+   Oid         relid;
+   RelOptInfo *rel;
+   HeapTuple   tuple;
+   Form_pg_statistic stats;
+   double      estfract,
+               ndistinct,
+               needdistinct,
+               mcvfreq,
+               avgfreq;
+   float4     *numbers;
+   int         nnumbers;
+
+   /*
+    * Lookup info about var's relation and attribute;
+    * if none available, return default estimate.
+    */
+   if (!IsA(var, Var))
+       return 0.1;
+
+   relid = getrelid(var->varno, root->rtable);
+   if (relid == InvalidOid)
+       return 0.1;
+
+   rel = get_base_rel(root, var->varno);
+
+   if (rel->tuples <= 0.0 || rel->rows <= 0.0)
+       return 0.1;             /* ensure we can divide below */
+
+   tuple = SearchSysCache(STATRELATT,
+                          ObjectIdGetDatum(relid),
+                          Int16GetDatum(var->varattno),
+                          0, 0);
+   if (!HeapTupleIsValid(tuple))
+   {
+       /*
+        * Perhaps the Var is a system attribute; if so, it will have no
+        * entry in pg_statistic, but we may be able to guess something
+        * about its distribution anyway.
+        */
+       switch (var->varattno)
+       {
+           case ObjectIdAttributeNumber:
+           case SelfItemPointerAttributeNumber:
+               /* these are unique, so buckets should be well-distributed */
+               return (double) NTUP_PER_BUCKET / rel->rows;
+           case TableOidAttributeNumber:
+               /* hashing this is a terrible idea... */
+               return 1.0;
+       }
+       return 0.1;
+   }
+   stats = (Form_pg_statistic) GETSTRUCT(tuple);
+
+   /*
+    * Obtain number of distinct data values in raw relation.
+    */
+   ndistinct = stats->stadistinct;
+   if (ndistinct < 0.0)
+       ndistinct = -ndistinct * rel->tuples;
+
+   /*
+    * Adjust ndistinct to account for restriction clauses.  Observe we are
+    * assuming that the data distribution is affected uniformly by the
+    * restriction clauses!
+    *
+    * XXX Possibly better way, but much more expensive: multiply by
+    * selectivity of rel's restriction clauses that mention the target Var.
+    */
+   ndistinct *= rel->rows / rel->tuples;
+
+   /*
+    * Discourage use of hash join if there seem not to be very many distinct
+    * data values.  The threshold here is somewhat arbitrary, as is the
+    * fraction used to "discourage" the choice.
+    */
+   if (ndistinct < 50.0)
+   {
+       ReleaseSysCache(tuple);
+       return 0.5;
+   }
+
+   /*
+    * Form initial estimate of bucketsize fraction.  Here we use rel->rows,
+    * ie the number of rows after applying restriction clauses, because
+    * that's what the fraction will eventually be multiplied by in
+    * cost_heapjoin.
+    */
+   estfract = (double) NTUP_PER_BUCKET / rel->rows;
+
+   /*
+    * Adjust estimated bucketsize if too few distinct values to fill
+    * all the buckets.
+    */
+   needdistinct = rel->rows / (double) NTUP_PER_BUCKET;
+   if (ndistinct < needdistinct)
+       estfract *= needdistinct / ndistinct;
+
+   /*
+    * Look up the frequency of the most common value, if available.
+    */
+   mcvfreq = 0.0;
+
+   if (get_attstatsslot(tuple, var->vartype, var->vartypmod,
+                        STATISTIC_KIND_MCV, InvalidOid,
+                        NULL, NULL, &numbers, &nnumbers))
+   {
+       /*
+        * The first MCV stat is for the most common value.
+        */
+       if (nnumbers > 0)
+           mcvfreq = numbers[0];
+       free_attstatsslot(var->vartype, NULL, 0,
+                         numbers, nnumbers);
+   }
+
+   /*
+    * Adjust estimated bucketsize upward to account for skewed distribution.
+    */
+   avgfreq = (1.0 - stats->stanullfrac) / ndistinct;
+
+   if (avgfreq > 0.0 && mcvfreq > avgfreq)
+       estfract *= mcvfreq / avgfreq;
+
+   ReleaseSysCache(tuple);
+
+   return (Selectivity) estfract;
+}
+
 
 /*
  * cost_qual_eval


diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c

index d41336ddcee0f9c26ad9a2ab0b1410a1f0ae38c7..cd7cabd41deb7bf52b323b437d847eede311b8cc 100644 (file)


--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -8,15 +8,15 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.63 2001/04/15 00:48:17 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.64 2001/05/07 00:43:20 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
+#include "postgres.h"
+
 #include 
 #include 
 
-#include "postgres.h"
-
 #include "optimizer/clauses.h"
 #include "optimizer/cost.h"
 #include "optimizer/pathnode.h"
@@ -45,7 +45,6 @@ static void hash_inner_and_outer(Query *root, RelOptInfo *joinrel,
                     List *restrictlist, JoinType jointype);
 static Path *best_innerjoin(List *join_paths, List *outer_relid,
               JoinType jointype);
-static Selectivity estimate_dispersion(Query *root, Var *var);
 static List *select_mergejoin_clauses(RelOptInfo *joinrel,
                         RelOptInfo *outerrel,
                         RelOptInfo *innerrel,
@@ -722,7 +721,7 @@ hash_inner_and_outer(Query *root,
        Expr       *clause;
        Var        *left,
                   *right;
-       Selectivity innerdispersion;
+       Selectivity innerbucketsize;
        List       *hashclauses;
 
        if (restrictinfo->hashjoinoperator == InvalidOid)
@@ -742,34 +741,34 @@ hash_inner_and_outer(Query *root,
 
        /*
         * Check if clause is usable with these sub-rels, find inner side,
-        * estimate dispersion of inner var for costing purposes.
+        * estimate bucketsize of inner var for costing purposes.
         *
         * Since we tend to visit the same clauses over and over when
-        * planning a large query, we cache the dispersion estimates in
+        * planning a large query, we cache the bucketsize estimates in
         * the RestrictInfo node to avoid repeated lookups of statistics.
         */
        if (intMember(left->varno, outerrelids) &&
            intMember(right->varno, innerrelids))
        {
            /* righthand side is inner */
-           innerdispersion = restrictinfo->right_dispersion;
-           if (innerdispersion < 0)
+           innerbucketsize = restrictinfo->right_bucketsize;
+           if (innerbucketsize < 0)
            {
                /* not cached yet */
-               innerdispersion = estimate_dispersion(root, right);
-               restrictinfo->right_dispersion = innerdispersion;
+               innerbucketsize = estimate_hash_bucketsize(root, right);
+               restrictinfo->right_bucketsize = innerbucketsize;
            }
        }
        else if (intMember(left->varno, innerrelids) &&
                 intMember(right->varno, outerrelids))
        {
            /* lefthand side is inner */
-           innerdispersion = restrictinfo->left_dispersion;
-           if (innerdispersion < 0)
+           innerbucketsize = restrictinfo->left_bucketsize;
+           if (innerbucketsize < 0)
            {
                /* not cached yet */
-               innerdispersion = estimate_dispersion(root, left);
-               restrictinfo->left_dispersion = innerdispersion;
+               innerbucketsize = estimate_hash_bucketsize(root, left);
+               restrictinfo->left_bucketsize = innerbucketsize;
            }
        }
        else
@@ -790,7 +789,7 @@ hash_inner_and_outer(Query *root,
                                      innerrel->cheapest_total_path,
                                      restrictlist,
                                      hashclauses,
-                                     innerdispersion));
+                                     innerbucketsize));
        if (outerrel->cheapest_startup_path != outerrel->cheapest_total_path)
            add_path(joinrel, (Path *)
                     create_hashjoin_path(joinrel,
@@ -799,7 +798,7 @@ hash_inner_and_outer(Query *root,
                                          innerrel->cheapest_total_path,
                                          restrictlist,
                                          hashclauses,
-                                         innerdispersion));
+                                         innerbucketsize));
    }
 }
 
@@ -866,31 +865,6 @@ best_innerjoin(List *join_paths, Relids outer_relids, JoinType jointype)
    return cheapest;
 }
 
-/*
- * Estimate dispersion of the specified Var
- *
- * We use a default of 0.1 if we can't figure out anything better.
- * This will typically discourage use of a hash rather strongly,
- * if the inner relation is large. We do not want to hash unless
- * we know that the inner rel is well-dispersed (or the alternatives
- * seem much worse).
- */
-static Selectivity
-estimate_dispersion(Query *root, Var *var)
-{
-   Oid         relid;
-
-   if (!IsA(var, Var))
-       return 0.1;
-
-   relid = getrelid(var->varno, root->rtable);
-
-   if (relid == InvalidOid)
-       return 0.1;
-
-   return (Selectivity) get_attdispersion(relid, var->varattno, 0.1);
-}
-
 /*
  * select_mergejoin_clauses
  *   Select mergejoin clauses that are usable for a particular join.


diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c

index 8c3b00289d3e7d467aeb03dcc1b53eb02f5a3a2b..2d264c46881730ba4ace2ade745fe6942c9d49fb 100644 (file)


--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -10,14 +10,14 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.104 2001/03/22 03:59:36 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.105 2001/05/07 00:43:20 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
-#include 
-
 #include "postgres.h"
 
+#include 
+
 #include "catalog/pg_index.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
@@ -1484,9 +1484,9 @@ make_sort_from_pathkeys(List *tlist, Plan *lefttree, List *pathkeys)
         */
        if (resdom->reskey == 0)
        {
-           /* OK, mark it as a sort key and set the sort operator regproc */
+           /* OK, mark it as a sort key and set the sort operator */
            resdom->reskey = ++numsortkeys;
-           resdom->reskeyop = get_opcode(pathkey->sortop);
+           resdom->reskeyop = pathkey->sortop;
        }
    }
 


diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c

index 7c3e15a8f88d81b206e4d3f618eae9658294ad6a..5d67e02dacb44bce678665c592ab184f588469a5 100644 (file)


--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -8,13 +8,14 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.59 2001/04/16 19:44:10 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.60 2001/05/07 00:43:21 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
+#include "postgres.h"
+
 #include 
 
-#include "postgres.h"
 #include "catalog/pg_operator.h"
 #include "catalog/pg_type.h"
 #include "nodes/makefuncs.h"
@@ -348,8 +349,8 @@ distribute_qual_to_rels(Query *root, Node *clause,
    restrictinfo->left_pathkey = NIL;   /* not computable yet */
    restrictinfo->right_pathkey = NIL;
    restrictinfo->hashjoinoperator = InvalidOid;
-   restrictinfo->left_dispersion = -1; /* not computed until needed */
-   restrictinfo->right_dispersion = -1;
+   restrictinfo->left_bucketsize = -1; /* not computed until needed */
+   restrictinfo->right_bucketsize = -1;
 
    /*
     * Retrieve all relids and vars contained within the clause.


diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c

index b2ab4600209dd566fd281c5110f0e1f6ba5c1cb1..0aba4808c160f3bf5ba3a9cc3fd2c6cf26fa2fa3 100644 (file)


--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.105 2001/04/30 19:24:47 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.106 2001/05/07 00:43:21 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1367,7 +1367,7 @@ make_groupplan(List *group_tlist,
            {
                /* OK, insert the ordering info needed by the executor. */
                resdom->reskey = ++keyno;
-               resdom->reskeyop = get_opcode(grpcl->sortop);
+               resdom->reskeyop = grpcl->sortop;
            }
        }
 
@@ -1412,7 +1412,7 @@ make_sortplan(List *tlist, Plan *plannode, List *sortcls)
        {
            /* OK, insert the ordering info needed by the executor. */
            resdom->reskey = ++keyno;
-           resdom->reskeyop = get_opcode(sortcl->sortop);
+           resdom->reskeyop = sortcl->sortop;
        }
    }
 


diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c

index 0b173466cf98061a3add13f850ba9e750dd9f4e0..ede4159d9707629729b5dffbc32f241f48629e72 100644 (file)


--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -14,7 +14,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v 1.62 2001/03/27 18:02:19 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v 1.63 2001/05/07 00:43:22 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -682,8 +682,8 @@ adjust_inherited_attrs_mutator(Node *node,
        newinfo->eval_cost = -1;        /* reset this too */
        newinfo->left_pathkey = NIL;    /* and these */
        newinfo->right_pathkey = NIL;
-       newinfo->left_dispersion = -1;
-       newinfo->right_dispersion = -1;
+       newinfo->left_bucketsize = -1;
+       newinfo->right_bucketsize = -1;
 
        return (Node *) newinfo;
    }


diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c

index cfba3ee395f2e0216f74c1e2497a7a8f5897d74b..407c132b4f7a6388b093806fd3eb01286906e084 100644 (file)


--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -8,14 +8,14 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.71 2001/03/22 03:59:39 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.72 2001/05/07 00:43:22 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
-#include 
-
 #include "postgres.h"
 
+#include 
+
 #include "nodes/plannodes.h"
 #include "optimizer/cost.h"
 #include "optimizer/pathnode.h"
@@ -559,7 +559,7 @@ create_mergejoin_path(RelOptInfo *joinrel,
  * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
  * 'hashclauses' is a list of the hash join clause (always a 1-element list)
  *     (this should be a subset of the restrict_clauses list)
- * 'innerdispersion' is an estimate of the dispersion of the inner hash key
+ * 'innerbucketsize' is an estimate of the bucketsize of the inner hash key
  *
  */
 HashPath   *
@@ -569,7 +569,7 @@ create_hashjoin_path(RelOptInfo *joinrel,
                     Path *inner_path,
                     List *restrict_clauses,
                     List *hashclauses,
-                    Selectivity innerdispersion)
+                    Selectivity innerbucketsize)
 {
    HashPath   *pathnode = makeNode(HashPath);
 
@@ -587,7 +587,7 @@ create_hashjoin_path(RelOptInfo *joinrel,
                  outer_path,
                  inner_path,
                  restrict_clauses,
-                 innerdispersion);
+                 innerbucketsize);
 
    return pathnode;
 }


diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c

index 4f711df203c846acf4402ed131def54dbbf94443..ee3523553e8693ac1b7762d01ebbabc3697a4d7a 100644 (file)


--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -9,11 +9,10 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.64 2001/03/22 03:59:40 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.65 2001/05/07 00:43:22 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
-
 #include "postgres.h"
 
 #include 


diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c

index 4687a5599623d09b416357721488369cc8eaaa38..76cc095bc4edcdbf4cfecad9627a1e5a29d03256 100644 (file)


--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Header: /cvsroot/pgsql/src/backend/parser/analyze.c,v 1.183 2001/03/22 06:16:15 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/parser/analyze.c,v 1.184 2001/05/07 00:43:22 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -2660,7 +2660,7 @@ transformForUpdate(Query *qry, List *forUpdate)
        /* just the named tables */
        foreach(l, forUpdate)
        {
-           char       *relname = lfirst(l);
+           char       *relname = strVal(lfirst(l));
 
            i = 0;
            foreach(rt, qry->rtable)


diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y

index bed0ce239a42e2f75c48bdda8aff299cb2f02f9b..40c379aca51f280882945b9f5caf4aaeccc4475f 100644 (file)


--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -11,7 +11,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.222 2001/05/01 01:36:10 thomas Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.223 2001/05/07 00:43:23 tgl Exp $
  *
  * HISTORY
  *   AUTHOR            DATE            MAJOR EVENT
@@ -104,7 +104,6 @@ static void doNegateFloat(Value *v);
    char                *str;
    bool                boolean;
    JoinType            jtype;
-   InhOption           inhOpt;
    List                *list;
    Node                *node;
    Value               *value;
@@ -130,6 +129,7 @@ static void doNegateFloat(Value *v);
 
 %type    stmt,
        AlterGroupStmt, AlterSchemaStmt, AlterTableStmt, AlterUserStmt,
+       AnalyzeStmt,
        ClosePortalStmt, ClusterStmt, CommentStmt, ConstraintsSetStmt,
        CopyStmt, CreateAsStmt, CreateGroupStmt, CreatePLangStmt,
        CreateSchemaStmt, CreateSeqStmt, CreateStmt, CreateTrigStmt,
@@ -147,7 +147,7 @@ static void doNegateFloat(Value *v);
 %type    select_no_parens, select_with_parens, select_clause,
                simple_select
 
-%type     alter_column_action
+%type     alter_column_default
 %type     drop_behavior
 
 %type    createdb_opt_list, createdb_opt_item
@@ -185,7 +185,7 @@ static void doNegateFloat(Value *v);
        OptTableElementList, OptInherit, definition, opt_distinct,
        opt_with, func_args, func_args_list, func_as,
        oper_argtypes, RuleActionList, RuleActionMulti,
-       opt_column_list, columnList, opt_va_list, va_list,
+       opt_column_list, columnList, opt_name_list,
        sort_clause, sortby_list, index_params, index_list, name_list,
        from_clause, from_list, opt_array_bounds,
        expr_list, attrs, target_list, update_target_list,
@@ -210,9 +210,7 @@ static void doNegateFloat(Value *v);
 %type    substr_from, substr_for
 
 %type     opt_binary, opt_using, opt_instead, opt_cursor
-%type     opt_with_copy, index_opt_unique, opt_verbose, opt_analyze
-
-%type  opt_inh_star, opt_only
+%type     opt_with_copy, index_opt_unique, opt_verbose, analyze_keyword
 
 %type    copy_dirn, direction, reindex_type, drop_type,
        opt_column, event, comment_type, comment_cl,
@@ -350,7 +348,8 @@ static void doNegateFloat(Value *v);
        NEW, NOCREATEDB, NOCREATEUSER, NONE, NOTHING, NOTIFY, NOTNULL,
        OFFSET, OIDS, OPERATOR, OWNER, PASSWORD, PROCEDURAL,
        REINDEX, RENAME, RESET, RETURNS, ROW, RULE,
-       SEQUENCE, SERIAL, SETOF, SHARE, SHOW, START, STATEMENT, STDIN, STDOUT, SYSID,
+       SEQUENCE, SERIAL, SETOF, SHARE, SHOW, START, STATEMENT,
+       STATISTICS, STDIN, STDOUT, SYSID,
        TEMP, TEMPLATE, TOAST, TRUNCATE, TRUSTED, 
        UNLISTEN, UNTIL, VACUUM, VALID, VERBOSE, VERSION
 
@@ -470,6 +469,7 @@ stmt :  AlterSchemaStmt
        | CreatedbStmt
        | DropdbStmt
        | VacuumStmt
+       | AnalyzeStmt
        | VariableSetStmt
        | VariableShowStmt
        | VariableResetStmt
@@ -938,57 +938,68 @@ CheckPointStmt: CHECKPOINT
  *****************************************************************************/
 
 AlterTableStmt:
-/* ALTER TABLE <name> ADD [COLUMN]  */
-       ALTER TABLE relation_name opt_inh_star ADD opt_column columnDef
+/* ALTER TABLE <relation> ADD [COLUMN]  */
+       ALTER TABLE relation_expr ADD opt_column columnDef
                {
                    AlterTableStmt *n = makeNode(AlterTableStmt);
                    n->subtype = 'A';
-                   n->relname = $3;
-                   n->inhOpt = $4;
-                   n->def = $7;
+                   n->relname = $3->relname;
+                   n->inhOpt = $3->inhOpt;
+                   n->def = $6;
                    $$ = (Node *)n;
                }
-/* ALTER TABLE <name> ALTER [COLUMN]  {SET DEFAULT |DROP DEFAULT} */
-       | ALTER TABLE relation_name opt_inh_star ALTER opt_column ColId alter_column_action
+/* ALTER TABLE <relation> ALTER [COLUMN]  {SET DEFAULT |DROP DEFAULT} */
+       | ALTER TABLE relation_expr ALTER opt_column ColId alter_column_default
                {
                    AlterTableStmt *n = makeNode(AlterTableStmt);
                    n->subtype = 'T';
-                   n->relname = $3;
-                   n->inhOpt = $4;
-                   n->name = $7;
-                   n->def = $8;
+                   n->relname = $3->relname;
+                   n->inhOpt = $3->inhOpt;
+                   n->name = $6;
+                   n->def = $7;
                    $$ = (Node *)n;
                }
-/* ALTER TABLE  DROP [COLUMN]  {RESTRICT|CASCADE} */
-       | ALTER TABLE relation_name opt_inh_star DROP opt_column ColId drop_behavior
+/* ALTER TABLE  ALTER [COLUMN]  SET STATISTICS  */
+       | ALTER TABLE relation_expr ALTER opt_column ColId SET STATISTICS Iconst
+               {
+                   AlterTableStmt *n = makeNode(AlterTableStmt);
+                   n->subtype = 'S';
+                   n->relname = $3->relname;
+                   n->inhOpt = $3->inhOpt;
+                   n->name = $6;
+                   n->def = (Node *) makeInteger($9);
+                   $$ = (Node *)n;
+               }
+/* ALTER TABLE  DROP [COLUMN]  {RESTRICT|CASCADE} */
+       | ALTER TABLE relation_expr DROP opt_column ColId drop_behavior
                {
                    AlterTableStmt *n = makeNode(AlterTableStmt);
                    n->subtype = 'D';
-                   n->relname = $3;
-                   n->inhOpt = $4;
-                   n->name = $7;
-                   n->behavior = $8;
+                   n->relname = $3->relname;
+                   n->inhOpt = $3->inhOpt;
+                   n->name = $6;
+                   n->behavior = $7;
                    $$ = (Node *)n;
                }
-/* ALTER TABLE <name> ADD CONSTRAINT ... */
-       | ALTER TABLE relation_name opt_inh_star ADD TableConstraint
+/* ALTER TABLE <relation> ADD CONSTRAINT ... */
+       | ALTER TABLE relation_expr ADD TableConstraint
                {
                    AlterTableStmt *n = makeNode(AlterTableStmt);
                    n->subtype = 'C';
-                   n->relname = $3;
-                   n->inhOpt = $4;
-                   n->def = $6;
+                   n->relname = $3->relname;
+                   n->inhOpt = $3->inhOpt;
+                   n->def = $5;
                    $$ = (Node *)n;
                }
-/* ALTER TABLE <name> DROP CONSTRAINT  {RESTRICT|CASCADE} */
-       | ALTER TABLE relation_name opt_inh_star DROP CONSTRAINT name drop_behavior
+/* ALTER TABLE <relation> DROP CONSTRAINT  {RESTRICT|CASCADE} */
+       | ALTER TABLE relation_expr DROP CONSTRAINT name drop_behavior
                {
                    AlterTableStmt *n = makeNode(AlterTableStmt);
                    n->subtype = 'X';
-                   n->relname = $3;
-                   n->inhOpt = $4;
-                   n->name = $7;
-                   n->behavior = $8;
+                   n->relname = $3->relname;
+                   n->inhOpt = $3->inhOpt;
+                   n->name = $6;
+                   n->behavior = $7;
                    $$ = (Node *)n;
                }
 /* ALTER TABLE  CREATE TOAST TABLE */
@@ -997,6 +1008,7 @@ AlterTableStmt:
                    AlterTableStmt *n = makeNode(AlterTableStmt);
                    n->subtype = 'E';
                    n->relname = $3;
+                   n->inhOpt = INH_NO;
                    $$ = (Node *)n;
                }
 /* ALTER TABLE  OWNER TO UserId */
@@ -1005,12 +1017,13 @@ AlterTableStmt:
                    AlterTableStmt *n = makeNode(AlterTableStmt);
                    n->subtype = 'U';
                    n->relname = $3;
+                   n->inhOpt = INH_NO;
                    n->name = $6;
                    $$ = (Node *)n;
                }
        ;
 
-alter_column_action:
+alter_column_default:
        SET DEFAULT a_expr
            {
                /* Treat SET DEFAULT NULL the same as DROP DEFAULT */
@@ -1478,10 +1491,6 @@ key_reference:  NO ACTION                { $$ = FKCONSTR_ON_KEY_NOACTION; }
        | SET DEFAULT                   { $$ = FKCONSTR_ON_KEY_SETDEFAULT; }
        ;
 
-opt_only: ONLY                                 { $$ = INH_NO; }
-        | /*EMPTY*/                                { $$ = INH_DEFAULT; } 
-       ;
-
 OptInherit:  INHERITS '(' relation_name_list ')'   { $$ = $3; }
        | /*EMPTY*/                                 { $$ = NIL; }
        ;
@@ -2598,14 +2607,13 @@ opt_force:  FORCE                                   {  $$ = TRUE; }
  *
  *****************************************************************************/
 
-RenameStmt:  ALTER TABLE relation_name opt_inh_star
-                 RENAME opt_column opt_name TO name
+RenameStmt:  ALTER TABLE relation_expr RENAME opt_column opt_name TO name
                {
                    RenameStmt *n = makeNode(RenameStmt);
-                   n->relname = $3;
-                   n->inhOpt = $4;
-                   n->column = $7;
-                   n->newname = $9;
+                   n->relname = $3->relname;
+                   n->inhOpt = $3->inhOpt;
+                   n->column = $6;
+                   n->newname = $8;
                    $$ = (Node *)n;
                }
        ;
@@ -2994,49 +3002,71 @@ ClusterStmt:  CLUSTER index_name ON relation_name
  *
  *     QUERY:
  *             vacuum
+ *             analyze
  *
  *****************************************************************************/
 
-VacuumStmt:  VACUUM opt_verbose opt_analyze
+VacuumStmt:  VACUUM opt_verbose
                {
                    VacuumStmt *n = makeNode(VacuumStmt);
+                   n->vacuum = true;
+                   n->analyze = false;
                    n->verbose = $2;
-                   n->analyze = $3;
                    n->vacrel = NULL;
-                   n->va_spec = NIL;
+                   n->va_cols = NIL;
                    $$ = (Node *)n;
                }
-       | VACUUM opt_verbose opt_analyze relation_name opt_va_list
+       | VACUUM opt_verbose relation_name
                {
                    VacuumStmt *n = makeNode(VacuumStmt);
+                   n->vacuum = true;
+                   n->analyze = false;
                    n->verbose = $2;
-                   n->analyze = $3;
-                   n->vacrel = $4;
-                   n->va_spec = $5;
-                   if ( $5 != NIL && !$4 )
-                       elog(ERROR,"VACUUM syntax error at or near \"(\""
-                           "\n\tRelation name must be specified");
+                   n->vacrel = $3;
+                   n->va_cols = NIL;
+                   $$ = (Node *)n;
+               }
+       | VACUUM opt_verbose AnalyzeStmt
+               {
+                   VacuumStmt *n = (VacuumStmt *) $3;
+                   n->vacuum = true;
+                   n->verbose |= $2;
                    $$ = (Node *)n;
                }
        ;
 
-opt_verbose:  VERBOSE                          { $$ = TRUE; }
-       | /*EMPTY*/                             { $$ = FALSE; }
+AnalyzeStmt:  analyze_keyword opt_verbose
+               {
+                   VacuumStmt *n = makeNode(VacuumStmt);
+                   n->vacuum = false;
+                   n->analyze = true;
+                   n->verbose = $2;
+                   n->vacrel = NULL;
+                   n->va_cols = NIL;
+                   $$ = (Node *)n;
+               }
+       | analyze_keyword opt_verbose relation_name opt_name_list
+               {
+                   VacuumStmt *n = makeNode(VacuumStmt);
+                   n->vacuum = false;
+                   n->analyze = true;
+                   n->verbose = $2;
+                   n->vacrel = $3;
+                   n->va_cols = $4;
+                   $$ = (Node *)n;
+               }
        ;
 
-opt_analyze:  ANALYZE                          { $$ = TRUE; }
+analyze_keyword:  ANALYZE                      { $$ = TRUE; }
        |     ANALYSE /* British */             { $$ = TRUE; }
-       | /*EMPTY*/                             { $$ = FALSE; }
        ;
 
-opt_va_list:  '(' va_list ')'                  { $$ = $2; }
-       | /*EMPTY*/                             { $$ = NIL; }
+opt_verbose:  VERBOSE                          { $$ = TRUE; }
+       | /*EMPTY*/                             { $$ = FALSE; }
        ;
 
-va_list:  name
-               { $$ = makeList1($1); }
-       | va_list ',' name
-               { $$ = lappend($1, $3); }
+opt_name_list:  '(' name_list ')'              { $$ = $2; }
+       | /*EMPTY*/                             { $$ = NIL; }
        ;
 
 
@@ -3160,12 +3190,12 @@ columnElem:  ColId opt_indirection
  *
  *****************************************************************************/
 
-DeleteStmt:  DELETE FROM opt_only relation_name where_clause
+DeleteStmt:  DELETE FROM relation_expr where_clause
                {
                    DeleteStmt *n = makeNode(DeleteStmt);
-                   n->inhOpt = $3;
-                   n->relname = $4;
-                   n->whereClause = $5;
+                   n->relname = $3->relname;
+                   n->inhOpt = $3->inhOpt;
+                   n->whereClause = $4;
                    $$ = (Node *)n;
                }
        ;
@@ -3202,17 +3232,17 @@ opt_lmode:  SHARE               { $$ = TRUE; }
  *
  *****************************************************************************/
 
-UpdateStmt:  UPDATE opt_only relation_name
+UpdateStmt:  UPDATE relation_expr
              SET update_target_list
              from_clause
              where_clause
                {
                    UpdateStmt *n = makeNode(UpdateStmt);
-                   n->inhOpt = $2;
-                   n->relname = $3;
-                   n->targetList = $5;
-                   n->fromClause = $6;
-                   n->whereClause = $7;
+                   n->relname = $2->relname;
+                   n->inhOpt = $2->inhOpt;
+                   n->targetList = $4;
+                   n->fromClause = $5;
+                   n->whereClause = $6;
                    $$ = (Node *)n;
                }
        ;
@@ -3545,10 +3575,6 @@ select_offset_value: Iconst
  * ...however, recursive addattr and rename supported.  make special
  * cases for these.
  */
-opt_inh_star:  '*'                             { $$ = INH_YES; }
-       | /*EMPTY*/                             { $$ = INH_DEFAULT; }
-       ;
-
 relation_name_list:  name_list;
 
 name_list:  name
@@ -3576,7 +3602,7 @@ opt_for_update_clause:    for_update_clause       { $$ = $1; }
        | /* EMPTY */                           { $$ = NULL; }
        ;
 
-update_list:  OF va_list                       { $$ = $2; }
+update_list:  OF name_list                     { $$ = $2; }
        | /* EMPTY */                           { $$ = makeList1(NULL); }
        ;
 
@@ -5525,6 +5551,7 @@ TokenId:  ABSOLUTE                        { $$ = "absolute"; }
        | SHARE                         { $$ = "share"; }
        | START                         { $$ = "start"; }
        | STATEMENT                     { $$ = "statement"; }
+       | STATISTICS                    { $$ = "statistics"; }
        | STDIN                         { $$ = "stdin"; }
        | STDOUT                        { $$ = "stdout"; }
        | SYSID                         { $$ = "sysid"; }


diff --git a/src/backend/parser/keywords.c b/src/backend/parser/keywords.c

index 402dbfd28ca561a2c9d9ba513e7986dda06ec7df..8ab19f86ae8582213730311845cdbdcae0977f18 100644 (file)


--- a/src/backend/parser/keywords.c
+++ b/src/backend/parser/keywords.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/parser/keywords.c,v 1.90 2001/03/22 03:59:40 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/parser/keywords.c,v 1.91 2001/05/07 00:43:23 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -238,6 +238,7 @@ static ScanKeyword ScanKeywords[] = {
    {"some", SOME},
    {"start", START},
    {"statement", STATEMENT},
+   {"statistics", STATISTICS},
    {"stdin", STDIN},
    {"stdout", STDOUT},
    {"substring", SUBSTRING},


diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c

index f5324cb37355532ef4233a335bc40fb5e5eb635e..e1d49842fd2398a3338bf8fb8329c7ca0677a2fe 100644 (file)


--- a/src/backend/parser/parse_relation.c
+++ b/src/backend/parser/parse_relation.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/parser/parse_relation.c,v 1.54 2001/04/18 17:04:24 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/parser/parse_relation.c,v 1.55 2001/05/07 00:43:23 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -75,7 +75,7 @@ static struct
    }
 };
 
-#define SPECIALS ((int) (sizeof(special_attr)/sizeof(special_attr[0])))
+#define SPECIALS ((int) lengthof(special_attr))
 
 
 /*
@@ -670,7 +670,7 @@ isForUpdate(ParseState *pstate, char *relname)
 
                foreach(l, pstate->p_forUpdate)
                {
-                   char       *rname = lfirst(l);
+                   char       *rname = strVal(lfirst(l));
 
                    if (strcmp(relname, rname) == 0)
                        return true;
@@ -1020,20 +1020,6 @@ attnameIsSet(Relation rd, char *name)
 
 #endif
 
-#ifdef NOT_USED
-/*
- * This should only be used if the relation is already
- * heap_open()'ed.  Use the cache version
- * for access to non-opened relations.
- */
-int
-attnumAttNelems(Relation rd, int attid)
-{
-   return rd->rd_att->attrs[attid - 1]->attnelems;
-}
-
-#endif
-
 /* given attribute id, return type of that attribute */
 /*
  * This should only be used if the relation is already


diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c

index ae6cd20a5db3838c76a6f006232a8f04e5d4a800..b616f7e68ef875a0774de3f270c4cf98aa3dcc94 100644 (file)


--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -10,7 +10,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/tcop/utility.c,v 1.109 2001/03/22 06:16:17 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/tcop/utility.c,v 1.110 2001/05/07 00:43:23 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -427,13 +427,19 @@ ProcessUtility(Node *parsetree,
                                        interpretInhOption(stmt->inhOpt),
                                            (ColumnDef *) stmt->def);
                        break;
-                   case 'T':   /* ALTER COLUMN */
-                       AlterTableAlterColumn(stmt->relname,
+                   case 'T':   /* ALTER COLUMN DEFAULT */
+                       AlterTableAlterColumnDefault(stmt->relname,
                                        interpretInhOption(stmt->inhOpt),
-                                             stmt->name,
-                                             stmt->def);
+                                                    stmt->name,
+                                                    stmt->def);
                        break;
-                   case 'D':   /* ALTER DROP */
+                   case 'S':   /* ALTER COLUMN STATISTICS */
+                       AlterTableAlterColumnStatistics(stmt->relname,
+                                       interpretInhOption(stmt->inhOpt),
+                                                       stmt->name,
+                                                       stmt->def);
+                       break;
+                   case 'D':   /* DROP COLUMN */
                        AlterTableDropColumn(stmt->relname,
                                        interpretInhOption(stmt->inhOpt),
                                             stmt->name,
@@ -703,12 +709,13 @@ ProcessUtility(Node *parsetree,
            break;
 
        case T_VacuumStmt:
-           set_ps_display(commandTag = "VACUUM");
+           if (((VacuumStmt *) parsetree)->vacuum)
+               commandTag = "VACUUM";
+           else
+               commandTag = "ANALYZE";
+           set_ps_display(commandTag);
 
-           vacuum(((VacuumStmt *) parsetree)->vacrel,
-                  ((VacuumStmt *) parsetree)->verbose,
-                  ((VacuumStmt *) parsetree)->analyze,
-                  ((VacuumStmt *) parsetree)->va_spec);
+           vacuum((VacuumStmt *) parsetree);
            break;
 
        case T_ExplainStmt:


diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c

index 1fe0afb0a35b44ad34e76fbb73439194a73690ad..41ba82db7b574d6ba6d095a25092376d04702250 100644 (file)


--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -15,7 +15,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.87 2001/03/23 04:49:54 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.88 2001/05/07 00:43:23 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -57,9 +57,6 @@
 /* default selectivity estimate for pattern-match operators such as LIKE */
 #define DEFAULT_MATCH_SEL  0.01
 
-/* "fudge factor" for estimating frequency of not-most-common values */
-#define NOT_MOST_COMMON_RATIO  0.1
-
 static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
                  Datum lobound, Datum hibound, Oid boundstypid,
                  double *scaledlobound, double *scaledhibound);
@@ -75,17 +72,9 @@ static double convert_one_string_to_scalar(unsigned char *value,
 static unsigned char *convert_string_datum(Datum value, Oid typid);
 static double convert_timevalue_to_scalar(Datum value, Oid typid);
 static void getattproperties(Oid relid, AttrNumber attnum,
-                Oid *typid,
-                int *typlen,
-                bool *typbyval,
-                int32 *typmod);
-static bool getattstatistics(Oid relid, AttrNumber attnum,
-                Oid typid, int32 typmod,
-                double *nullfrac,
-                double *commonfrac,
-                Datum *commonval,
-                Datum *loval,
-                Datum *hival);
+                            Oid *typid, int32 *typmod);
+static double get_att_numdistinct(Oid relid, AttrNumber attnum, Oid typid,
+                                 Form_pg_statistic stats);
 static Selectivity prefix_selectivity(char *prefix,
                   Oid relid,
                   AttrNumber attno,
@@ -115,134 +104,173 @@ eqsel(PG_FUNCTION_ARGS)
    AttrNumber  attno = PG_GETARG_INT16(2);
    Datum       value = PG_GETARG_DATUM(3);
    int32       flag = PG_GETARG_INT32(4);
-   float8      result;
-
-   if (NONVALUE(attno) || NONVALUE(relid))
-       result = DEFAULT_EQ_SEL;
-   else
+   Oid         typid;
+   int32       typmod;
+   HeapTuple   statsTuple;
+   Datum      *values;
+   int         nvalues;
+   float4     *numbers;
+   int         nnumbers;
+   double      selec;
+
+   if (NONVALUE(relid) || NONVALUE(attno))
+       PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
+
+   /* get info about the attribute */
+   getattproperties(relid, attno, &typid, &typmod);
+
+   /* get stats for the attribute, if available */
+   statsTuple = SearchSysCache(STATRELATT,
+                               ObjectIdGetDatum(relid),
+                               Int16GetDatum(attno),
+                               0, 0);
+   if (HeapTupleIsValid(statsTuple))
    {
-       Oid         typid;
-       int         typlen;
-       bool        typbyval;
-       int32       typmod;
-       double      nullfrac;
-       double      commonfrac;
-       Datum       commonval;
-       double      selec;
-
-       /* get info about the attribute */
-       getattproperties(relid, attno,
-                        &typid, &typlen, &typbyval, &typmod);
-
-       /* get stats for the attribute, if available */
-       if (getattstatistics(relid, attno, typid, typmod,
-                            &nullfrac, &commonfrac, &commonval,
-                            NULL, NULL))
-       {
-           if (flag & SEL_CONSTANT)
-           {
+       Form_pg_statistic stats;
 
-               /*
-                * Is the constant "=" to the column's most common value?
-                * (Although the operator may not really be "=", we will
-                * assume that seeing whether it returns TRUE for the most
-                * common value is useful information. If you don't like
-                * it, maybe you shouldn't be using eqsel for your
-                * operator...)
-                */
-               RegProcedure eqproc = get_opcode(opid);
-               bool        mostcommon;
+       stats = (Form_pg_statistic) GETSTRUCT(statsTuple);
 
-               if (eqproc == (RegProcedure) NULL)
-                   elog(ERROR, "eqsel: no procedure for operator %u",
-                        opid);
+       if (flag & SEL_CONSTANT)
+       {
+           bool    match = false;
+           int     i;
 
-               /* be careful to apply operator right way 'round */
-               if (flag & SEL_RIGHT)
-                   mostcommon = DatumGetBool(OidFunctionCall2(eqproc,
-                                                              commonval,
-                                                              value));
-               else
-                   mostcommon = DatumGetBool(OidFunctionCall2(eqproc,
-                                                              value,
-                                                            commonval));
+           /*
+            * Is the constant "=" to any of the column's most common
+            * values?  (Although the given operator may not really be
+            * "=", we will assume that seeing whether it returns TRUE
+            * is an appropriate test.  If you don't like this, maybe you
+            * shouldn't be using eqsel for your operator...)
+            */
+           if (get_attstatsslot(statsTuple, typid, typmod,
+                                STATISTIC_KIND_MCV, InvalidOid,
+                                &values, &nvalues,
+                                &numbers, &nnumbers))
+           {
+               FmgrInfo    eqproc;
 
-               if (mostcommon)
-               {
+               fmgr_info(get_opcode(opid), &eqproc);
 
-                   /*
-                    * Constant is "=" to the most common value.  We know
-                    * selectivity exactly (or as exactly as VACUUM could
-                    * calculate it, anyway).
-                    */
-                   selec = commonfrac;
-               }
-               else
+               for (i = 0; i < nvalues; i++)
                {
-
-                   /*
-                    * Comparison is against a constant that is neither
-                    * the most common value nor null.  Its selectivity
-                    * cannot be more than this:
-                    */
-                   selec = 1.0 - commonfrac - nullfrac;
-                   if (selec > commonfrac)
-                       selec = commonfrac;
-
-                   /*
-                    * and in fact it's probably less, so we should apply
-                    * a fudge factor.  The only case where we don't is
-                    * for a boolean column, where indeed we have
-                    * estimated the less-common value's frequency
-                    * exactly!
-                    */
-                   if (typid != BOOLOID)
-                       selec *= NOT_MOST_COMMON_RATIO;
+                   /* be careful to apply operator right way 'round */
+                   if (flag & SEL_RIGHT)
+                       match = DatumGetBool(FunctionCall2(&eqproc,
+                                                          values[i],
+                                                          value));
+                   else
+                       match = DatumGetBool(FunctionCall2(&eqproc,
+                                                          value,
+                                                          values[i]));
+                   if (match)
+                       break;
                }
            }
            else
            {
+               /* no most-common-value info available */
+               values = NULL;
+               numbers = NULL;
+               i = nvalues = nnumbers = 0;
+           }
 
+           if (match)
+           {
+               /*
+                * Constant is "=" to this common value.  We know
+                * selectivity exactly (or as exactly as VACUUM
+                * could calculate it, anyway).
+                */
+               selec = numbers[i];
+           }
+           else
+           {
                /*
-                * Search is for a value that we do not know a priori, but
-                * we will assume it is not NULL.  Selectivity cannot be
-                * more than this:
+                * Comparison is against a constant that is neither
+                * NULL nor any of the common values.  Its selectivity
+                * cannot be more than this:
                 */
-               selec = 1.0 - nullfrac;
-               if (selec > commonfrac)
-                   selec = commonfrac;
+               double  sumcommon = 0.0;
+               double  otherdistinct;
 
+               for (i = 0; i < nnumbers; i++)
+                   sumcommon += numbers[i];
+               selec = 1.0 - sumcommon - stats->stanullfrac;
+               /*
+                * and in fact it's probably a good deal less.
+                * We approximate that all the not-common values
+                * share this remaining fraction equally, so we
+                * divide by the number of other distinct values.
+                */
+               otherdistinct = get_att_numdistinct(relid, attno,
+                                                   typid, stats)
+                   - nnumbers;
+               if (otherdistinct > 1)
+                   selec /= otherdistinct;
                /*
-                * and in fact it's probably less, so apply a fudge
-                * factor.
+                * Another cross-check: selectivity shouldn't be
+                * estimated as more than the least common
+                * "most common value".
                 */
-               selec *= NOT_MOST_COMMON_RATIO;
+               if (nnumbers > 0 && selec > numbers[nnumbers-1])
+                   selec = numbers[nnumbers-1];
            }
 
-           /* result should be in range, but make sure... */
-           if (selec < 0.0)
-               selec = 0.0;
-           else if (selec > 1.0)
-               selec = 1.0;
-
-           if (!typbyval)
-               pfree(DatumGetPointer(commonval));
+           free_attstatsslot(typid, values, nvalues, numbers, nnumbers);
        }
        else
        {
+           double      ndistinct;
 
            /*
-            * No VACUUM ANALYZE stats available, so make a guess using
-            * the dispersion stat (if we have that, which is unlikely for
-            * a normal attribute; but for a system attribute we may be
-            * able to estimate it).
+            * Search is for a value that we do not know a priori, but
+            * we will assume it is not NULL.  Estimate the selectivity
+            * as non-null fraction divided by number of distinct values,
+            * so that we get a result averaged over all possible values
+            * whether common or uncommon.  (Essentially, we are assuming
+            * that the not-yet-known comparison value is equally likely
+            * to be any of the possible values, regardless of their
+            * frequency in the table.  Is that a good idea?)
+            */
+           selec = 1.0 - stats->stanullfrac;
+           ndistinct = get_att_numdistinct(relid, attno, typid, stats);
+           if (ndistinct > 1)
+               selec /= ndistinct;
+           /*
+            * Cross-check: selectivity should never be
+            * estimated as more than the most common value's.
             */
-           selec = get_attdispersion(relid, attno, 0.01);
+           if (get_attstatsslot(statsTuple, typid, typmod,
+                                STATISTIC_KIND_MCV, InvalidOid,
+                                NULL, NULL,
+                                &numbers, &nnumbers))
+           {
+               if (nnumbers > 0 && selec > numbers[0])
+                   selec = numbers[0];
+               free_attstatsslot(typid, NULL, 0, numbers, nnumbers);
+           }
        }
 
-       result = (float8) selec;
+       ReleaseSysCache(statsTuple);
    }
-   PG_RETURN_FLOAT8(result);
+   else
+   {
+       /*
+        * No VACUUM ANALYZE stats available, so make a guess using
+        * estimated number of distinct values and assuming they are
+        * equally common.  (The guess is unlikely to be very good,
+        * but we do know a few special cases.)
+        */
+       selec = 1.0 / get_att_numdistinct(relid, attno, typid, NULL);
+   }
+
+   /* result should be in range, but make sure... */
+   if (selec < 0.0)
+       selec = 0.0;
+   else if (selec > 1.0)
+       selec = 1.0;
+
+   PG_RETURN_FLOAT8((float8) selec);
 }
 
 /*
@@ -301,117 +329,263 @@ scalarltsel(PG_FUNCTION_ARGS)
    AttrNumber  attno = PG_GETARG_INT16(2);
    Datum       value = PG_GETARG_DATUM(3);
    int32       flag = PG_GETARG_INT32(4);
-   float8      result;
+   bool        isgt;
+   HeapTuple   oprTuple;
+   HeapTuple   statsTuple;
+   Form_pg_statistic stats;
+   Oid         contype;
+   FmgrInfo    opproc;
+   Oid         typid;
+   int32       typmod;
+   Datum      *values;
+   int         nvalues;
+   float4     *numbers;
+   int         nnumbers;
+   double      mcv_selec,
+               hist_selec,
+               sumcommon;
+   double      selec;
+   int         i;
+
+   if (NONVALUE(relid) || NONVALUE(attno))
+       PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
+
+   /* Can't do anything useful if no constant to compare against, either */
+   if (!(flag & SEL_CONSTANT))
+       PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
 
-   if (!(flag & SEL_CONSTANT) || NONVALUE(attno) || NONVALUE(relid))
-       result = DEFAULT_INEQ_SEL;
+   /*
+    * Force the constant to be on the right to simplify later logic.
+    * This means that we may be dealing with either "<" or ">" cases.
+    */
+   if (flag & SEL_RIGHT)
+   {
+       /* we have x < const */
+       isgt = false;
+   }
    else
    {
-       HeapTuple   oprtuple;
-       Oid         ltype,
-                   rtype,
-                   contype;
-       Oid         typid;
-       int         typlen;
-       bool        typbyval;
-       int32       typmod;
-       Datum       hival,
-                   loval;
-       double      val,
-                   high,
-                   low,
-                   numerator,
-                   denominator;
-
-       /*
-        * Get left and right datatypes of the operator so we know what
-        * type the constant is.
-        */
-       oprtuple = SearchSysCache(OPEROID,
-                                 ObjectIdGetDatum(opid),
-                                 0, 0, 0);
-       if (!HeapTupleIsValid(oprtuple))
-           elog(ERROR, "scalarltsel: no tuple for operator %u", opid);
-       ltype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprleft;
-       rtype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprright;
-       contype = (flag & SEL_RIGHT) ? rtype : ltype;
-       ReleaseSysCache(oprtuple);
-
-       /* Now get info and stats about the attribute */
-       getattproperties(relid, attno,
-                        &typid, &typlen, &typbyval, &typmod);
-
-       if (!getattstatistics(relid, attno, typid, typmod,
-                             NULL, NULL, NULL,
-                             &loval, &hival))
+       /* we have const < x, commute to make x > const */
+       opid = get_commutator(opid);
+       if (!opid)
        {
-           /* no stats available, so default result */
+           /* Use default selectivity (should we raise an error instead?) */
            PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
        }
+       isgt = true;
+   }
 
-       /* Convert the values to a uniform comparison scale. */
-       if (!convert_to_scalar(value, contype, &val,
-                              loval, hival, typid,
-                              &low, &high))
-       {
+   /*
+    * The constant might not be the same datatype as the column;
+    * look at the operator's input types to find out what it is.
+    * Also set up to be able to call the operator's execution proc.
+    */
+   oprTuple = SearchSysCache(OPEROID,
+                             ObjectIdGetDatum(opid),
+                             0, 0, 0);
+   if (!HeapTupleIsValid(oprTuple))
+       elog(ERROR, "scalarltsel: no tuple for operator %u", opid);
+   contype = ((Form_pg_operator) GETSTRUCT(oprTuple))->oprright;
+   fmgr_info(((Form_pg_operator) GETSTRUCT(oprTuple))->oprcode, &opproc);
+   ReleaseSysCache(oprTuple);
+
+   /* Now get info and stats about the attribute */
+   getattproperties(relid, attno, &typid, &typmod);
+
+   statsTuple = SearchSysCache(STATRELATT,
+                               ObjectIdGetDatum(relid),
+                               Int16GetDatum(attno),
+                               0, 0);
+   if (!HeapTupleIsValid(statsTuple))
+   {
+       /* no stats available, so default result */
+       PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
+   }
+   stats = (Form_pg_statistic) GETSTRUCT(statsTuple);
 
-           /*
-            * Ideally we'd produce an error here, on the grounds that the
-            * given operator shouldn't have scalarltsel registered as its
-            * selectivity func unless we can deal with its operand types.
-            * But currently, all manner of stuff is invoking scalarltsel,
-            * so give a default estimate until that can be fixed.
-            */
-           if (!typbyval)
-           {
-               pfree(DatumGetPointer(hival));
-               pfree(DatumGetPointer(loval));
-           }
-           PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
-       }
+   /*
+    * If we have most-common-values info, add up the fractions of the
+    * MCV entries that satisfy MCV OP CONST.  These fractions contribute
+    * directly to the result selectivity.  Also add up the total fraction
+    * represented by MCV entries.
+    */
+   mcv_selec = 0.0;
+   sumcommon = 0.0;
 
-       /* release temp storage if needed */
-       if (!typbyval)
+   if (get_attstatsslot(statsTuple, typid, typmod,
+                        STATISTIC_KIND_MCV, InvalidOid,
+                        &values, &nvalues,
+                        &numbers, &nnumbers))
+   {
+       for (i = 0; i < nvalues; i++)
        {
-           pfree(DatumGetPointer(hival));
-           pfree(DatumGetPointer(loval));
+           if (DatumGetBool(FunctionCall2(&opproc,
+                                          values[i],
+                                          value)))
+               mcv_selec += numbers[i];
+           sumcommon += numbers[i];
        }
+       free_attstatsslot(typid, values, nvalues, numbers, nnumbers);
+   }
+
+   /*
+    * If there is a histogram, determine which bin the constant falls in,
+    * and compute the resulting contribution to selectivity.
+    *
+    * Someday, VACUUM might store more than one histogram per rel/att,
+    * corresponding to more than one possible sort ordering defined for
+    * the column type.  However, to make that work we will need to figure
+    * out which staop to search for --- it's not necessarily the one we
+    * have at hand!  (For example, we might have a '<=' operator rather
+    * than the '<' operator that will appear in staop.)  For now, assume
+    * that whatever appears in pg_statistic is sorted the same way our
+    * operator sorts.
+    */
+   hist_selec = 0.0;
 
-       if (high <= low)
+   if (get_attstatsslot(statsTuple, typid, typmod,
+                        STATISTIC_KIND_HISTOGRAM, InvalidOid,
+                        &values, &nvalues,
+                        NULL, NULL))
+   {
+       if (nvalues > 1)
        {
+           double  histfrac;
+           bool    ltcmp;
+
+           ltcmp = DatumGetBool(FunctionCall2(&opproc,
+                                              values[0],
+                                              value));
+           if (isgt)
+               ltcmp = !ltcmp;
+           if (!ltcmp)
+           {
+               /* Constant is below lower histogram boundary. */
+               histfrac = 0.0;
+           }
+           else
+           {
+               /*
+                * Scan to find proper location.  This could be made faster
+                * by using a binary-search method, but it's probably not
+                * worth the trouble for typical histogram sizes.
+                */
+               for (i = 1; i < nvalues; i++)
+               {
+                   ltcmp = DatumGetBool(FunctionCall2(&opproc,
+                                                      values[i],
+                                                      value));
+                   if (isgt)
+                       ltcmp = !ltcmp;
+                   if (!ltcmp)
+                       break;
+               }
+               if (i >= nvalues)
+               {
+                   /* Constant is above upper histogram boundary. */
+                   histfrac = 1.0;
+               }
+               else
+               {
+                   double      val,
+                               high,
+                               low;
+                   double      binfrac;
 
+                   /*
+                    * We have values[i-1] < constant < values[i].
+                    *
+                    * Convert the constant and the two nearest bin boundary
+                    * values to a uniform comparison scale, and do a linear
+                    * interpolation within this bin.
+                    */
+                   if (convert_to_scalar(value, contype, &val,
+                                         values[i-1], values[i], typid,
+                                         &low, &high))
+                   {
+                       if (high <= low)
+                       {
+                           /* cope if bin boundaries appear identical */
+                           binfrac = 0.5;
+                       }
+                       else if (val <= low)
+                           binfrac = 0.0;
+                       else if (val >= high)
+                           binfrac = 1.0;
+                       else
+                           binfrac = (val - low) / (high - low);
+                   }
+                   else
+                   {
+                       /*
+                        * Ideally we'd produce an error here, on the grounds
+                        * that the given operator shouldn't have scalarltsel
+                        * registered as its selectivity func unless we can
+                        * deal with its operand types.  But currently, all
+                        * manner of stuff is invoking scalarltsel, so give a
+                        * default estimate until that can be fixed.
+                        */
+                       binfrac = 0.5;
+                   }
+                   /*
+                    * Now, compute the overall selectivity across the values
+                    * represented by the histogram.  We have i-1 full bins
+                    * and binfrac partial bin below the constant.
+                    */
+                   histfrac = (double) (i-1) + binfrac;
+                   histfrac /= (double) (nvalues - 1);
+               }
+           }
            /*
-            * If we trusted the stats fully, we could return a small or
-            * large selec depending on which side of the single data
-            * point the constant is on.  But it seems better to assume
-            * that the stats are wrong and return a default...
+            * Now histfrac = fraction of histogram entries below the constant.
+            *
+            * Account for "<" vs ">"
             */
-           result = DEFAULT_INEQ_SEL;
-       }
-       else if (val < low || val > high)
-       {
-
+           hist_selec = isgt ? (1.0 - histfrac) : histfrac;
            /*
-            * If given value is outside the statistical range, return a
-            * small or large value; but not 0.0/1.0 since there is a
-            * chance the stats are out of date.
+            * The histogram boundaries are only approximate to begin
+            * with, and may well be out of date anyway.  Therefore,
+            * don't believe extremely small or large selectivity
+            * estimates.
             */
-           if (flag & SEL_RIGHT)
-               result = (val < low) ? 0.001 : 0.999;
-           else
-               result = (val < low) ? 0.999 : 0.001;
-       }
-       else
-       {
-           denominator = high - low;
-           if (flag & SEL_RIGHT)
-               numerator = val - low;
-           else
-               numerator = high - val;
-           result = numerator / denominator;
+           if (hist_selec < 0.001)
+               hist_selec = 0.001;
+           else if (hist_selec > 0.999)
+               hist_selec = 0.999;
        }
+
+       free_attstatsslot(typid, values, nvalues, NULL, 0);
    }
-   PG_RETURN_FLOAT8(result);
+
+   /*
+    * Now merge the results from the MCV and histogram calculations,
+    * realizing that the histogram covers only the non-null values that
+    * are not listed in MCV.
+    */
+   selec = 1.0 - stats->stanullfrac - sumcommon;
+
+   if (hist_selec > 0.0)
+       selec *= hist_selec;
+   else
+   {
+       /*
+        * If no histogram but there are values not accounted for by MCV,
+        * arbitrarily assume half of them will match.
+        */
+       selec *= 0.5;
+   }
+
+   selec += mcv_selec;
+
+   ReleaseSysCache(statsTuple);
+
+   /* result should be in range, but make sure... */
+   if (selec < 0.0)
+       selec = 0.0;
+   else if (selec > 1.0)
+       selec = 1.0;
+
+   PG_RETURN_FLOAT8((float8) selec);
 }
 
 /*
@@ -428,34 +602,25 @@ scalargtsel(PG_FUNCTION_ARGS)
    Datum       value = PG_GETARG_DATUM(3);
    int32       flag = PG_GETARG_INT32(4);
    Oid         ltopid;
-   float8      result;
 
    /*
-    * Compute selectivity of "<", then invert --- but only if we were
-    * able to produce a non-default estimate.  Note that we get the
-    * negator which strictly speaking means we are looking at "<=" for
-    * ">" or "<" for ">=".  We assume this won't matter.
+    * Commute so that we have a "<" or "<=" operator, then apply
+    * scalarltsel.
     */
-   ltopid = get_negator(opid);
-   if (ltopid)
-   {
-       result = DatumGetFloat8(DirectFunctionCall5(scalarltsel,
-                                               ObjectIdGetDatum(ltopid),
-                                                ObjectIdGetDatum(relid),
-                                                   Int16GetDatum(attno),
-                                                   value,
-                                                   Int32GetDatum(flag)));
-   }
-   else
+   ltopid = get_commutator(opid);
+   if (!ltopid)
    {
        /* Use default selectivity (should we raise an error instead?) */
-       result = DEFAULT_INEQ_SEL;
+       PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
    }
 
-   if (result != DEFAULT_INEQ_SEL)
-       result = 1.0 - result;
-
-   PG_RETURN_FLOAT8(result);
+   flag ^= SEL_RIGHT;
+   return DirectFunctionCall5(scalarltsel,
+                              ObjectIdGetDatum(ltopid),
+                              ObjectIdGetDatum(relid),
+                              Int16GetDatum(attno),
+                              value,
+                              Int32GetDatum(flag));
 }
 
 /*
@@ -476,7 +641,7 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype)
        result = DEFAULT_MATCH_SEL;
    else
    {
-       HeapTuple   oprtuple;
+       HeapTuple   oprTuple;
        Oid         ltype,
                    rtype;
        char       *patt;
@@ -488,14 +653,14 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype)
         * Get left and right datatypes of the operator so we know what
         * type the attribute is.
         */
-       oprtuple = SearchSysCache(OPEROID,
+       oprTuple = SearchSysCache(OPEROID,
                                  ObjectIdGetDatum(opid),
                                  0, 0, 0);
-       if (!HeapTupleIsValid(oprtuple))
+       if (!HeapTupleIsValid(oprTuple))
            elog(ERROR, "patternsel: no tuple for operator %u", opid);
-       ltype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprleft;
-       rtype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprright;
-       ReleaseSysCache(oprtuple);
+       ltype = ((Form_pg_operator) GETSTRUCT(oprTuple))->oprleft;
+       rtype = ((Form_pg_operator) GETSTRUCT(oprTuple))->oprright;
+       ReleaseSysCache(oprTuple);
 
        /* the right-hand const is type text for all supported operators */
        Assert(rtype == TEXTOID);
@@ -659,42 +824,88 @@ eqjoinsel(PG_FUNCTION_ARGS)
    AttrNumber  attno1 = PG_GETARG_INT16(2);
    Oid         relid2 = PG_GETARG_OID(3);
    AttrNumber  attno2 = PG_GETARG_INT16(4);
-   float8      result;
-   float8      num1,
-               num2,
-               min;
    bool        unknown1 = NONVALUE(relid1) || NONVALUE(attno1);
    bool        unknown2 = NONVALUE(relid2) || NONVALUE(attno2);
+   double      selec;
 
    if (unknown1 && unknown2)
-       result = DEFAULT_EQ_SEL;
+       selec = DEFAULT_EQ_SEL;
    else
    {
-       num1 = unknown1 ? 1.0 : get_attdispersion(relid1, attno1, 0.01);
-       num2 = unknown2 ? 1.0 : get_attdispersion(relid2, attno2, 0.01);
+       Oid         typid1;
+       Oid         typid2;
+       int32       typmod1;
+       int32       typmod2;
+       HeapTuple   statsTuple1 = NULL;
+       HeapTuple   statsTuple2 = NULL;
+       Form_pg_statistic stats1 = NULL;
+       Form_pg_statistic stats2 = NULL;
+       double      nd1,
+                   nd2;
+
+       if (unknown1)
+       {
+           nd1 = 100.0;
+       }
+       else
+       {
+           /* get info about the attribute */
+           getattproperties(relid1, attno1, &typid1, &typmod1);
+
+           /* get stats for the attribute, if available */
+           statsTuple1 = SearchSysCache(STATRELATT,
+                                        ObjectIdGetDatum(relid1),
+                                        Int16GetDatum(attno1),
+                                        0, 0);
+           if (HeapTupleIsValid(statsTuple1))
+               stats1 = (Form_pg_statistic) GETSTRUCT(statsTuple1);
+
+           nd1 = get_att_numdistinct(relid1, attno1, typid1, stats1);
+       }
+
+       if (unknown2)
+       {
+           nd2 = 100.0;
+       }
+       else
+       {
+           /* get info about the attribute */
+           getattproperties(relid2, attno2, &typid2, &typmod2);
+
+           /* get stats for the attribute, if available */
+           statsTuple2 = SearchSysCache(STATRELATT,
+                                        ObjectIdGetDatum(relid2),
+                                        Int16GetDatum(attno2),
+                                        0, 0);
+           if (HeapTupleIsValid(statsTuple2))
+               stats2 = (Form_pg_statistic) GETSTRUCT(statsTuple2);
+
+           nd2 = get_att_numdistinct(relid2, attno2, typid2, stats2);
+       }
 
        /*
-        * The join selectivity cannot be more than num2, since each tuple
-        * in table 1 could match no more than num2 fraction of tuples in
-        * table 2 (and that's only if the table-1 tuple matches the most
-        * common value in table 2, so probably it's less).  By the same
-        * reasoning it is not more than num1. The min is therefore an
-        * upper bound.
+        * Estimate the join selectivity as 1 / sqrt(nd1*nd2)
+        * (can we produce any theory for this)?
         *
-        * If we know the dispersion of only one side, use it; the reasoning
-        * above still works.
+        * XXX possibility to do better: if both attributes have histograms
+        * then we could determine the exact join selectivity between the
+        * MCV sets, and only have to assume the join behavior of the non-MCV
+        * values.  This could be a big win when the MCVs cover a large part
+        * of the population.
         *
-        * XXX can we make a better estimate here?  Using the nullfrac
-        * statistic might be helpful, for example.  Assuming the operator
-        * is strict (does not succeed for null inputs) then the
-        * selectivity couldn't be more than (1-nullfrac1)*(1-nullfrac2),
-        * which might be usefully small if there are many nulls.  How
-        * about applying the operator to the most common values?
+        * XXX what about nulls?
         */
-       min = (num1 < num2) ? num1 : num2;
-       result = min;
+       selec = 1.0 / sqrt(nd1 * nd2);
+       if (selec > 1.0)
+           selec = 1.0;
+
+       if (HeapTupleIsValid(statsTuple1))
+           ReleaseSysCache(statsTuple1);
+       if (HeapTupleIsValid(statsTuple2))
+           ReleaseSysCache(statsTuple2);
+
    }
-   PG_RETURN_FLOAT8(result);
+   PG_RETURN_FLOAT8((float8) selec);
 }
 
 /*
@@ -829,7 +1040,8 @@ icnlikejoinsel(PG_FUNCTION_ARGS)
  *   Returns "true" if successful.
  *
  * All numeric datatypes are simply converted to their equivalent
- * "double" values.
+ * "double" values.  XXX what about NUMERIC values that are outside
+ * the range of "double"?
  *
  * String datatypes are converted by convert_string_to_scalar(),
  * which is explained below.  The reason why this routine deals with
@@ -917,7 +1129,7 @@ convert_numeric_to_scalar(Datum value, Oid typid)
 {
    switch (typid)
    {
-           case BOOLOID:
+       case BOOLOID:
            return (double) DatumGetBool(value);
        case INT2OID:
            return (double) DatumGetInt16(value);
@@ -963,6 +1175,8 @@ convert_numeric_to_scalar(Datum value, Oid typid)
  * three strings before computing the scaled values.  This allows us to
  * "zoom in" when we encounter a narrow data range.  An example is a phone
  * number database where all the values begin with the same area code.
+ * (Actually, the bounds will be adjacent histogram-bin-boundary values,
+ * so this is more likely to happen than you might think.)
  */
 static void
 convert_string_to_scalar(unsigned char *value,
@@ -1208,11 +1422,11 @@ convert_timevalue_to_scalar(Datum value, Oid typid)
 /*
  * getattproperties
  *   Retrieve pg_attribute properties for an attribute,
- *   including type OID, type len, type byval flag, typmod.
+ *   including type OID and typmod.
  */
 static void
 getattproperties(Oid relid, AttrNumber attnum,
-                Oid *typid, int *typlen, bool *typbyval, int32 *typmod)
+                Oid *typid, int32 *typmod)
 {
    HeapTuple   atp;
    Form_pg_attribute att_tup;
@@ -1227,164 +1441,87 @@ getattproperties(Oid relid, AttrNumber attnum,
    att_tup = (Form_pg_attribute) GETSTRUCT(atp);
 
    *typid = att_tup->atttypid;
-   *typlen = att_tup->attlen;
-   *typbyval = att_tup->attbyval;
    *typmod = att_tup->atttypmod;
 
    ReleaseSysCache(atp);
 }
 
 /*
- * getattstatistics
- *   Retrieve the pg_statistic data for an attribute.
- *   Returns 'false' if no stats are available.
+ * get_att_numdistinct
  *
- * Inputs:
- * 'relid' and 'attnum' are the relation and attribute number.
- * 'typid' and 'typmod' are the type and typmod of the column,
- * which the caller must already have looked up.
+ *   Estimate the number of distinct values of an attribute.
  *
- * Outputs:
- * The available stats are nullfrac, commonfrac, commonval, loval, hival.
- * The caller need not retrieve all five --- pass NULL pointers for the
- * unwanted values.
+ * relid, attnum: identify the attribute to examine.
+ * typid: type of attribute.
+ * stats: pg_statistic tuple for attribute, or NULL if not available.
  *
- * commonval, loval, hival are returned as Datums holding the internal
- * representation of the values.  (Note that these should be pfree'd
- * after use if the data type is not by-value.)
+ * XXX possible future improvement: look to see if there is a unique
+ * index on the attribute.  If so, we can estimate ndistinct = ntuples.
+ * This should probably override any info from pg_statistic.
  */
-static bool
-getattstatistics(Oid relid,
-                AttrNumber attnum,
-                Oid typid,
-                int32 typmod,
-                double *nullfrac,
-                double *commonfrac,
-                Datum *commonval,
-                Datum *loval,
-                Datum *hival)
+static double
+get_att_numdistinct(Oid relid, AttrNumber attnum, Oid typid,
+                   Form_pg_statistic stats)
 {
-   HeapTuple   tuple;
-   HeapTuple   typeTuple;
-   FmgrInfo    inputproc;
-   Oid         typelem;
-   bool        isnull;
+   HeapTuple   reltup;
+   double      ntuples;
 
    /*
-    * We assume that there will only be one entry in pg_statistic for the
-    * given rel/att, so we search WITHOUT considering the staop column.
-    * Someday, VACUUM might store more than one entry per rel/att,
-    * corresponding to more than one possible sort ordering defined for
-    * the column type.  However, to make that work we will need to figure
-    * out which staop to search for --- it's not necessarily the one we
-    * have at hand!  (For example, we might have a '>' operator rather
-    * than the '<' operator that will appear in staop.)
+    * Special-case boolean columns: presumably, two distinct values.
+    *
+    * Are there any other cases we should wire in special estimates for?
     */
-   tuple = SearchSysCache(STATRELID,
-                          ObjectIdGetDatum(relid),
-                          Int16GetDatum((int16) attnum),
-                          0, 0);
-   if (!HeapTupleIsValid(tuple))
-   {
-       /* no such stats entry */
-       return false;
-   }
+   if (typid == BOOLOID)
+       return 2.0;
 
-   if (nullfrac)
-       *nullfrac = ((Form_pg_statistic) GETSTRUCT(tuple))->stanullfrac;
-   if (commonfrac)
-       *commonfrac = ((Form_pg_statistic) GETSTRUCT(tuple))->stacommonfrac;
-
-   /* Get the type input proc for the column datatype */
-   typeTuple = SearchSysCache(TYPEOID,
-                              ObjectIdGetDatum(typid),
-                              0, 0, 0);
-   if (!HeapTupleIsValid(typeTuple))
-       elog(ERROR, "getattstatistics: Cache lookup failed for type %u",
-            typid);
-   fmgr_info(((Form_pg_type) GETSTRUCT(typeTuple))->typinput, &inputproc);
-   typelem = ((Form_pg_type) GETSTRUCT(typeTuple))->typelem;
-   ReleaseSysCache(typeTuple);
+   /*
+    * If VACUUM ANALYZE determined a fixed estimate, use it.
+    */
+   if (stats && stats->stadistinct > 0.0)
+       return stats->stadistinct;
 
    /*
-    * Values are variable-length fields, so cannot access as struct
-    * fields. Must do it the hard way with SysCacheGetAttr.
+    * Otherwise we need to get the relation size.
     */
-   if (commonval)
-   {
-       Datum       val = SysCacheGetAttr(STATRELID, tuple,
-                                         Anum_pg_statistic_stacommonval,
-                                         &isnull);
+   reltup = SearchSysCache(RELOID,
+                           ObjectIdGetDatum(relid),
+                           0, 0, 0);
+   if (!HeapTupleIsValid(reltup))
+       elog(ERROR, "get_att_numdistinct: no relation tuple %u", relid);
 
-       if (isnull)
-       {
-           elog(DEBUG, "getattstatistics: stacommonval is null");
-           *commonval = PointerGetDatum(NULL);
-       }
-       else
-       {
-           char       *strval = DatumGetCString(DirectFunctionCall1(textout,
-                                                                  val));
-
-           *commonval = FunctionCall3(&inputproc,
-                                      CStringGetDatum(strval),
-                                      ObjectIdGetDatum(typelem),
-                                      Int32GetDatum(typmod));
-           pfree(strval);
-       }
-   }
+   ntuples = ((Form_pg_class) GETSTRUCT(reltup))->reltuples;
 
-   if (loval)
-   {
-       Datum       val = SysCacheGetAttr(STATRELID, tuple,
-                                         Anum_pg_statistic_staloval,
-                                         &isnull);
+   ReleaseSysCache(reltup);
 
-       if (isnull)
-       {
-           elog(DEBUG, "getattstatistics: staloval is null");
-           *loval = PointerGetDatum(NULL);
-       }
-       else
-       {
-           char       *strval = DatumGetCString(DirectFunctionCall1(textout,
-                                                                  val));
-
-           *loval = FunctionCall3(&inputproc,
-                                  CStringGetDatum(strval),
-                                  ObjectIdGetDatum(typelem),
-                                  Int32GetDatum(typmod));
-           pfree(strval);
-       }
-   }
+   if (ntuples <= 0.0)
+       return 100.0;           /* no data available; return a default */
 
-   if (hival)
-   {
-       Datum       val = SysCacheGetAttr(STATRELID, tuple,
-                                         Anum_pg_statistic_stahival,
-                                         &isnull);
+   /*
+    * If VACUUM ANALYZE determined a scaled estimate, use it.
+    */
+   if (stats && stats->stadistinct < 0.0)
+       return - stats->stadistinct * ntuples;
 
-       if (isnull)
-       {
-           elog(DEBUG, "getattstatistics: stahival is null");
-           *hival = PointerGetDatum(NULL);
-       }
-       else
-       {
-           char       *strval = DatumGetCString(DirectFunctionCall1(textout,
-                                                                  val));
-
-           *hival = FunctionCall3(&inputproc,
-                                  CStringGetDatum(strval),
-                                  ObjectIdGetDatum(typelem),
-                                  Int32GetDatum(typmod));
-           pfree(strval);
-       }
+   /*
+    * VACUUM ANALYZE does not compute stats for system attributes,
+    * but some of them can reasonably be assumed unique anyway.
+    */
+   switch (attnum)
+   {
+       case ObjectIdAttributeNumber:
+       case SelfItemPointerAttributeNumber:
+           return ntuples;
+       case TableOidAttributeNumber:
+           return 1.0;
    }
 
-   ReleaseSysCache(tuple);
+   /*
+    * Estimate ndistinct = ntuples if the table is small, else 100.
+    */
+   if (ntuples < 100.0)
+       return ntuples;
 
-   return true;
+   return 100.0;
 }
 
 /*-------------------------------------------------------------------------


diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c

index 82d55866215aac34724aa44deb029feea9d94a76..3995de5d7a1325085c901b0d2427cbbd775170ee 100644 (file)


--- a/src/backend/utils/cache/lsyscache.c
+++ b/src/backend/utils/cache/lsyscache.c
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/utils/cache/lsyscache.c,v 1.52 2001/03/23 04:49:55 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/utils/cache/lsyscache.c,v 1.53 2001/05/07 00:43:24 tgl Exp $
  *
  * NOTES
  *   Eventually, the index information should go through here, too.
@@ -18,7 +18,10 @@
 #include "access/tupmacs.h"
 #include "catalog/pg_operator.h"
 #include "catalog/pg_proc.h"
+#include "catalog/pg_statistic.h"
 #include "catalog/pg_type.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
 #include "utils/lsyscache.h"
 #include "utils/syscache.h"
 
@@ -182,106 +185,6 @@ get_atttypmod(Oid relid, AttrNumber attnum)
        return -1;
 }
 
-/*
- * get_attdispersion
- *
- *   Retrieve the dispersion statistic for an attribute,
- *   or produce an estimate if no info is available.
- *
- * min_estimate is the minimum estimate to return if insufficient data
- * is available to produce a reliable value.  This value may vary
- * depending on context.  (For example, when deciding whether it is
- * safe to use a hashjoin, we want to be more conservative than when
- * estimating the number of tuples produced by an equijoin.)
- */
-double
-get_attdispersion(Oid relid, AttrNumber attnum, double min_estimate)
-{
-   HeapTuple   atp;
-   Form_pg_attribute att_tup;
-   double      dispersion;
-   Oid         atttypid;
-   int32       ntuples;
-
-   atp = SearchSysCache(ATTNUM,
-                        ObjectIdGetDatum(relid),
-                        Int16GetDatum(attnum),
-                        0, 0);
-   if (!HeapTupleIsValid(atp))
-   {
-       /* this should not happen */
-       elog(ERROR, "get_attdispersion: no attribute tuple %u %d",
-            relid, attnum);
-       return min_estimate;
-   }
-
-   att_tup = (Form_pg_attribute) GETSTRUCT(atp);
-
-   dispersion = att_tup->attdispersion;
-   atttypid = att_tup->atttypid;
-
-   ReleaseSysCache(atp);
-
-   if (dispersion > 0.0)
-       return dispersion;      /* we have a specific estimate from VACUUM */
-
-   /*
-    * Special-case boolean columns: the dispersion of a boolean is highly
-    * unlikely to be anywhere near 1/numtuples, instead it's probably
-    * more like 0.5.
-    *
-    * Are there any other cases we should wire in special estimates for?
-    */
-   if (atttypid == BOOLOID)
-       return 0.5;
-
-   /*
-    * Dispersion is either 0 (no data available) or -1 (dispersion is
-    * 1/numtuples).  Either way, we need the relation size.
-    */
-
-   atp = SearchSysCache(RELOID,
-                        ObjectIdGetDatum(relid),
-                        0, 0, 0);
-   if (!HeapTupleIsValid(atp))
-   {
-       /* this should not happen */
-       elog(ERROR, "get_attdispersion: no relation tuple %u", relid);
-       return min_estimate;
-   }
-
-   ntuples = ((Form_pg_class) GETSTRUCT(atp))->reltuples;
-
-   ReleaseSysCache(atp);
-
-   if (ntuples == 0)
-       return min_estimate;    /* no data available */
-
-   if (dispersion < 0.0)       /* VACUUM thinks there are no duplicates */
-       return 1.0 / (double) ntuples;
-
-   /*
-    * VACUUM ANALYZE does not compute dispersion for system attributes,
-    * but some of them can reasonably be assumed unique anyway.
-    */
-   if (attnum == ObjectIdAttributeNumber ||
-       attnum == SelfItemPointerAttributeNumber)
-       return 1.0 / (double) ntuples;
-   if (attnum == TableOidAttributeNumber)
-       return 1.0;
-
-   /*
-    * VACUUM ANALYZE has not been run for this table. Produce an estimate
-    * of 1/numtuples.  This may produce unreasonably small estimates for
-    * large tables, so limit the estimate to no less than min_estimate.
-    */
-   dispersion = 1.0 / (double) ntuples;
-   if (dispersion < min_estimate)
-       dispersion = min_estimate;
-
-   return dispersion;
-}
-
 /*             ---------- INDEX CACHE ----------                        */
 
 /*     watch this space...
@@ -876,3 +779,157 @@ get_typtype(Oid typid)
 }
 
 #endif
+
+/*             ---------- STATISTICS CACHE ----------                   */
+
+/*
+ * get_attstatsslot
+ *
+ *     Extract the contents of a "slot" of a pg_statistic tuple.
+ *     Returns TRUE if requested slot type was found, else FALSE.
+ *
+ * Unlike other routines in this file, this takes a pointer to an
+ * already-looked-up tuple in the pg_statistic cache.  We do this since
+ * most callers will want to extract more than one value from the cache
+ * entry, and we don't want to repeat the cache lookup unnecessarily.
+ *
+ * statstuple: pg_statistics tuple to be examined.
+ * atttype: type OID of attribute.
+ * atttypmod: typmod of attribute.
+ * reqkind: STAKIND code for desired statistics slot kind.
+ * reqop: STAOP value wanted, or InvalidOid if don't care.
+ * values, nvalues: if not NULL, the slot's stavalues are extracted.
+ * numbers, nnumbers: if not NULL, the slot's stanumbers are extracted.
+ *
+ * If assigned, values and numbers are set to point to palloc'd arrays.
+ * If the attribute type is pass-by-reference, the values referenced by
+ * the values array are themselves palloc'd.  The palloc'd stuff can be
+ * freed by calling free_attstatsslot.
+ */
+bool
+get_attstatsslot(HeapTuple statstuple,
+                Oid atttype, int32 atttypmod,
+                int reqkind, Oid reqop,
+                Datum **values, int *nvalues,
+                float4 **numbers, int *nnumbers)
+{
+   Form_pg_statistic stats = (Form_pg_statistic) GETSTRUCT(statstuple);
+   int         i,
+               j;
+   Datum       val;
+   bool        isnull;
+   ArrayType  *statarray;
+   int         narrayelem;
+   HeapTuple   typeTuple;
+   FmgrInfo    inputproc;
+   Oid         typelem;
+
+   for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
+   {
+       if ((&stats->stakind1)[i] == reqkind &&
+           (reqop == InvalidOid || (&stats->staop1)[i] == reqop))
+           break;
+   }
+   if (i >= STATISTIC_NUM_SLOTS)
+       return false;           /* not there */
+
+   if (values)
+   {
+       val = SysCacheGetAttr(STATRELATT, statstuple,
+                             Anum_pg_statistic_stavalues1 + i,
+                             &isnull);
+       if (isnull)
+           elog(ERROR, "get_attstatsslot: stavalues is null");
+       statarray = DatumGetArrayTypeP(val);
+       /*
+        * Do initial examination of the array.  This produces a list
+        * of text Datums --- ie, pointers into the text array value.
+        */
+       deconstruct_array(statarray, false, -1, 'i', values, nvalues);
+       narrayelem = *nvalues;
+       /*
+        * We now need to replace each text Datum by its internal equivalent.
+        *
+        * Get the type input proc and typelem for the column datatype.
+        */
+       typeTuple = SearchSysCache(TYPEOID,
+                                  ObjectIdGetDatum(atttype),
+                                  0, 0, 0);
+       if (!HeapTupleIsValid(typeTuple))
+           elog(ERROR, "get_attstatsslot: Cache lookup failed for type %u",
+                atttype);
+       fmgr_info(((Form_pg_type) GETSTRUCT(typeTuple))->typinput, &inputproc);
+       typelem = ((Form_pg_type) GETSTRUCT(typeTuple))->typelem;
+       ReleaseSysCache(typeTuple);
+       /*
+        * Do the conversions.  The palloc'd array of Datums is reused
+        * in place.
+        */
+       for (j = 0; j < narrayelem; j++)
+       {
+           char       *strval;
+
+           strval = DatumGetCString(DirectFunctionCall1(textout,
+                                                        (*values)[j]));
+           (*values)[j] = FunctionCall3(&inputproc,
+                                        CStringGetDatum(strval),
+                                        ObjectIdGetDatum(typelem),
+                                        Int32GetDatum(atttypmod));
+           pfree(strval);
+       }
+       /*
+        * Free statarray if it's a detoasted copy.
+        */
+       if ((Pointer) statarray != DatumGetPointer(val))
+           pfree(statarray);
+   }
+
+   if (numbers)
+   {
+       val = SysCacheGetAttr(STATRELATT, statstuple,
+                             Anum_pg_statistic_stanumbers1 + i,
+                             &isnull);
+       if (isnull)
+           elog(ERROR, "get_attstatsslot: stanumbers is null");
+       statarray = DatumGetArrayTypeP(val);
+       /*
+        * We expect the array to be a 1-D float4 array; verify that.
+        * We don't need to use deconstruct_array() since the array
+        * data is just going to look like a C array of float4 values.
+        */
+       narrayelem = ARR_DIMS(statarray)[0];
+       if (ARR_NDIM(statarray) != 1 || narrayelem <= 0 ||
+           ARR_SIZE(statarray) != (ARR_OVERHEAD(1) + narrayelem * sizeof(float4)))
+           elog(ERROR, "get_attstatsslot: stanumbers is bogus");
+       *numbers = (float4 *) palloc(narrayelem * sizeof(float4));
+       memcpy(*numbers, ARR_DATA_PTR(statarray), narrayelem * sizeof(float4));
+       *nnumbers = narrayelem;
+       /*
+        * Free statarray if it's a detoasted copy.
+        */
+       if ((Pointer) statarray != DatumGetPointer(val))
+           pfree(statarray);
+   }
+
+   return true;
+}
+
+void
+free_attstatsslot(Oid atttype,
+                 Datum *values, int nvalues,
+                 float4 *numbers, int nnumbers)
+{
+   if (values)
+   {
+       if (! get_typbyval(atttype))
+       {
+           int     i;
+
+           for (i = 0; i < nvalues; i++)
+               pfree(DatumGetPointer(values[i]));
+       }
+       pfree(values);
+   }
+   if (numbers)
+       pfree(numbers);
+}


diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c

index 75ef3179202695a3fb7a5336b7bc4f3e24d3f3f5..4e35b3fb35ba67aa78d337e6bdb39149c6256f8c 100644 (file)


--- a/src/backend/utils/cache/syscache.c
+++ b/src/backend/utils/cache/syscache.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/utils/cache/syscache.c,v 1.60 2001/03/22 03:59:57 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/utils/cache/syscache.c,v 1.61 2001/05/07 00:43:24 tgl Exp $
  *
  * NOTES
  *   These routines allow the parser/planner/executor to perform
@@ -313,7 +313,7 @@ static struct cachedesc cacheinfo[] = {
            0,
            0
    }},
-   {StatisticRelationName,     /* STATRELID */
+   {StatisticRelationName,     /* STATRELATT */
        StatisticRelidAttnumIndex,
        2,
        {


diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c

index d27bfb29668711e985f1ba29bd1285ab77201bf2..5a77c47c20085f0d24ae5b8edb6ef2ca70acdc27 100644 (file)


--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -78,7 +78,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/utils/sort/tuplesort.c,v 1.15 2001/03/23 04:49:55 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/utils/sort/tuplesort.c,v 1.16 2001/05/07 00:43:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -87,7 +87,11 @@
 
 #include "access/heapam.h"
 #include "access/nbtree.h"
+#include "catalog/catname.h"
+#include "catalog/pg_amop.h"
+#include "catalog/pg_amproc.h"
 #include "miscadmin.h"
+#include "utils/fmgroids.h"
 #include "utils/logtape.h"
 #include "utils/lsyscache.h"
 #include "utils/tuplesort.h"
@@ -263,6 +267,7 @@ struct Tuplesortstate
    TupleDesc   tupDesc;
    int         nKeys;
    ScanKey     scanKeys;
+   SortFunctionKind *sortFnKinds;
 
    /*
     * These variables are specific to the IndexTuple case; they are set
@@ -279,6 +284,7 @@ struct Tuplesortstate
    Oid         datumType;
    Oid         sortOperator;
    FmgrInfo    sortOpFn;       /* cached lookup data for sortOperator */
+   SortFunctionKind sortFnKind;
    /* we need typelen and byval in order to know how to copy the Datums. */
    int         datumTypeLen;
    bool        datumTypeByVal;
@@ -458,14 +464,14 @@ tuplesort_begin_common(bool randomAccess)
 
 Tuplesortstate *
 tuplesort_begin_heap(TupleDesc tupDesc,
-                    int nkeys, ScanKey keys,
+                    int nkeys,
+                    Oid *sortOperators, AttrNumber *attNums,
                     bool randomAccess)
 {
    Tuplesortstate *state = tuplesort_begin_common(randomAccess);
+   int         i;
 
-   AssertArg(nkeys >= 1);
-   AssertArg(keys[0].sk_attno != 0);
-   AssertArg(keys[0].sk_procedure != 0);
+   AssertArg(nkeys > 0);
 
    state->comparetup = comparetup_heap;
    state->copytup = copytup_heap;
@@ -475,7 +481,29 @@ tuplesort_begin_heap(TupleDesc tupDesc,
 
    state->tupDesc = tupDesc;
    state->nKeys = nkeys;
-   state->scanKeys = keys;
+   state->scanKeys = (ScanKey) palloc(nkeys * sizeof(ScanKeyData));
+   MemSet(state->scanKeys, 0, nkeys * sizeof(ScanKeyData));
+   state->sortFnKinds = (SortFunctionKind *)
+       palloc(nkeys * sizeof(SortFunctionKind));
+   MemSet(state->sortFnKinds, 0, nkeys * sizeof(SortFunctionKind));
+
+   for (i = 0; i < nkeys; i++)
+   {
+       RegProcedure sortFunction;
+
+       AssertArg(sortOperators[i] != 0);
+       AssertArg(attNums[i] != 0);
+
+       /* select a function that implements the sort operator */
+       SelectSortFunction(sortOperators[i], &sortFunction,
+                          &state->sortFnKinds[i]);
+
+       ScanKeyEntryInitialize(&state->scanKeys[i],
+                              0x0,
+                              attNums[i],
+                              sortFunction,
+                              (Datum) 0);
+   }
 
    return state;
 }
@@ -507,6 +535,7 @@ tuplesort_begin_datum(Oid datumType,
                      bool randomAccess)
 {
    Tuplesortstate *state = tuplesort_begin_common(randomAccess);
+   RegProcedure sortFunction;
    int16       typlen;
    bool        typbyval;
 
@@ -518,8 +547,12 @@ tuplesort_begin_datum(Oid datumType,
 
    state->datumType = datumType;
    state->sortOperator = sortOperator;
-   /* lookup the function that implements the sort operator */
-   fmgr_info(get_opcode(sortOperator), &state->sortOpFn);
+
+   /* select a function that implements the sort operator */
+   SelectSortFunction(sortOperator, &sortFunction, &state->sortFnKind);
+   /* and look up the function */
+   fmgr_info(sortFunction, &state->sortOpFn);
+
    /* lookup necessary attributes of the datum type */
    get_typlenbyval(datumType, &typlen, &typbyval);
    state->datumTypeLen = typlen;
@@ -548,6 +581,13 @@ tuplesort_end(Tuplesortstate *state)
    }
    if (state->memtupindex)
        pfree(state->memtupindex);
+
+   /* this stuff might better belong in a variant-specific shutdown routine */
+   if (state->scanKeys)
+       pfree(state->scanKeys);
+   if (state->sortFnKinds)
+       pfree(state->sortFnKinds);
+
    pfree(state);
 }
 
@@ -1692,6 +1732,7 @@ comparetup_heap(Tuplesortstate *state, const void *a, const void *b)
    for (nkey = 0; nkey < state->nKeys; nkey++)
    {
        ScanKey     scanKey = state->scanKeys + nkey;
+       SortFunctionKind fnKind = state->sortFnKinds[nkey];
        AttrNumber  attno = scanKey->sk_attno;
        Datum       lattr,
                    rattr;
@@ -1708,23 +1749,36 @@ comparetup_heap(Tuplesortstate *state, const void *a, const void *b)
        }
        else if (isnull2)
            return -1;
-       else if (scanKey->sk_flags & SK_COMMUTE)
-       {
-           if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
-                                          rattr, lattr)))
-               return -1;      /* a < b after commute */
-           if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
-                                          lattr, rattr)))
-               return 1;       /* a > b after commute */
-       }
        else
        {
-           if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
-                                          lattr, rattr)))
-               return -1;      /* a < b */
-           if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
-                                          rattr, lattr)))
-               return 1;       /* a > b */
+           int32       compare;
+
+           if (fnKind == SORTFUNC_LT)
+           {
+               if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
+                                              lattr, rattr)))
+                   compare = -1;   /* a < b */
+               else if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
+                                                   rattr, lattr)))
+                   compare = 1;    /* a > b */
+               else
+                   compare = 0;
+           }
+           else
+           {
+               /* sort function is CMP or REVCMP */
+               compare = DatumGetInt32(FunctionCall2(&scanKey->sk_func,
+                                                     lattr, rattr));
+               if (fnKind == SORTFUNC_REVCMP)
+                   compare = -compare;
+           }
+
+           if (compare != 0)
+           {
+               if (scanKey->sk_flags & SK_COMMUTE)
+                   compare = -compare;
+               return compare;
+           }
        }
    }
 
@@ -1852,8 +1906,10 @@ comparetup_index(Tuplesortstate *state, const void *a, const void *b)
        }
        else
        {
+           /* the comparison function is always of CMP type */
            compare = DatumGetInt32(FunctionCall2(&entry->sk_func,
-                                               attrDatum1, attrDatum2));
+                                                 attrDatum1,
+                                                 attrDatum2));
        }
 
        if (compare != 0)
@@ -1954,7 +2010,7 @@ comparetup_datum(Tuplesortstate *state, const void *a, const void *b)
    }
    else if (rtup->isNull)
        return -1;
-   else
+   else if (state->sortFnKind == SORTFUNC_LT)
    {
        if (DatumGetBool(FunctionCall2(&state->sortOpFn,
                                       ltup->val, rtup->val)))
@@ -1964,6 +2020,17 @@ comparetup_datum(Tuplesortstate *state, const void *a, const void *b)
            return 1;           /* a > b */
        return 0;
    }
+   else
+   {
+       /* sort function is CMP or REVCMP */
+       int32   compare;
+
+       compare = DatumGetInt32(FunctionCall2(&state->sortOpFn,
+                                             ltup->val, rtup->val));
+       if (state->sortFnKind == SORTFUNC_REVCMP)
+           compare = -compare;
+       return compare;
+   }
 }
 
 static void *
@@ -2032,3 +2099,119 @@ tuplesize_datum(Tuplesortstate *state, void *tup)
        return (unsigned int) tuplelen;
    }
 }
+
+
+/*
+ * This routine selects an appropriate sorting function to implement
+ * a sort operator as efficiently as possible.  The straightforward
+ * method is to use the operator's implementation proc --- ie, "<"
+ * comparison.  However, that way often requires two calls of the function
+ * per comparison.  If we can find a btree three-way comparator function
+ * associated with the operator, we can use it to do the comparisons
+ * more efficiently.  We also support the possibility that the operator
+ * is ">" (descending sort), in which case we have to reverse the output
+ * of the btree comparator.
+ *
+ * Possibly this should live somewhere else (backend/catalog/, maybe?).
+ */
+void
+SelectSortFunction(Oid sortOperator,
+                  RegProcedure *sortFunction,
+                  SortFunctionKind *kind)
+{
+   Relation    relation;
+   HeapScanDesc scan;
+   ScanKeyData skey[3];
+   HeapTuple   tuple;
+   Oid         opclass = InvalidOid;
+
+   /*
+    * Scan pg_amop to see if the target operator is registered as the
+    * "<" or ">" operator of any btree opclass.  It's possible that it
+    * might be registered both ways (eg, if someone were to build a
+    * "reverse sort" opclass for some reason); prefer the "<" case if so.
+    * If the operator is registered the same way in multiple opclasses,
+    * assume we can use the associated comparator function from any one.
+    */
+   relation = heap_openr(AccessMethodOperatorRelationName,
+                         AccessShareLock);
+
+   ScanKeyEntryInitialize(&skey[0], 0,
+                          Anum_pg_amop_amopid,
+                          F_OIDEQ,
+                          ObjectIdGetDatum(BTREE_AM_OID));
+
+   ScanKeyEntryInitialize(&skey[1], 0,
+                          Anum_pg_amop_amopopr,
+                          F_OIDEQ,
+                          ObjectIdGetDatum(sortOperator));
+
+   scan = heap_beginscan(relation, false, SnapshotNow, 2, skey);
+
+   while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
+   {
+       Form_pg_amop aform = (Form_pg_amop) GETSTRUCT(tuple);
+
+       if (aform->amopstrategy == BTLessStrategyNumber)
+       {
+           opclass = aform->amopclaid;
+           *kind = SORTFUNC_CMP;
+           break;              /* done looking */
+       }
+       else if (aform->amopstrategy == BTGreaterStrategyNumber)
+       {
+           opclass = aform->amopclaid;
+           *kind = SORTFUNC_REVCMP;
+           /* keep scanning in hopes of finding a BTLess entry */
+       }
+   }
+
+   heap_endscan(scan);
+   heap_close(relation, AccessShareLock);
+
+   if (OidIsValid(opclass))
+   {
+       /* Found a suitable opclass, get its comparator support function */
+       relation = heap_openr(AccessMethodProcedureRelationName,
+                             AccessShareLock);
+
+       ScanKeyEntryInitialize(&skey[0], 0,
+                              Anum_pg_amproc_amid,
+                              F_OIDEQ,
+                              ObjectIdGetDatum(BTREE_AM_OID));
+
+       ScanKeyEntryInitialize(&skey[1], 0,
+                              Anum_pg_amproc_amopclaid,
+                              F_OIDEQ,
+                              ObjectIdGetDatum(opclass));
+
+       ScanKeyEntryInitialize(&skey[2], 0,
+                              Anum_pg_amproc_amprocnum,
+                              F_INT2EQ,
+                              Int16GetDatum(BTORDER_PROC));
+
+       scan = heap_beginscan(relation, false, SnapshotNow, 3, skey);
+
+       *sortFunction = InvalidOid;
+
+       if (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
+       {
+           Form_pg_amproc aform = (Form_pg_amproc) GETSTRUCT(tuple);
+           *sortFunction = aform->amproc;
+       }
+
+       heap_endscan(scan);
+       heap_close(relation, AccessShareLock);
+
+       if (RegProcedureIsValid(*sortFunction))
+           return;
+   }
+
+   /* Can't find a comparator, so use the operator as-is */
+
+   *kind = SORTFUNC_LT;
+   *sortFunction = get_opcode(sortOperator);
+   if (!RegProcedureIsValid(*sortFunction))
+       elog(ERROR, "SelectSortFunction: operator %u has no implementation",
+            sortOperator);
+}


diff --git a/src/include/access/tuptoaster.h b/src/include/access/tuptoaster.h

index 759ab3d39e2494de4fa021c8070ac7e5da62d283..6e38529204dabaab44c078c0af05a6687fd0d966 100644 (file)


--- a/src/include/access/tuptoaster.h
+++ b/src/include/access/tuptoaster.h
@@ -6,15 +6,13 @@
  *
  * Copyright (c) 2000, PostgreSQL Development Team
  *
- * $Id: tuptoaster.h,v 1.10 2001/03/22 04:00:32 momjian Exp $
+ * $Id: tuptoaster.h,v 1.11 2001/05/07 00:43:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #ifndef TUPTOASTER_H
 #define TUPTOASTER_H
 
-#ifdef TUPLE_TOASTER_ACTIVE
-
 #include "access/heapam.h"
 #include "access/htup.h"
 #include "access/tupmacs.h"
@@ -109,7 +107,13 @@ extern varattrib *heap_tuple_untoast_attr(varattrib *attr);
  */
 extern Datum toast_compress_datum(Datum value);
 
-#endif  /* TUPLE_TOASTER_ACTIVE */
+/* ----------
+ * toast_raw_datum_size -
+ *
+ * Return the raw (detoasted) size of a varlena datum
+ * ----------
+ */
+extern Size toast_raw_datum_size(Datum value);
 
 
 #endif  /* TUPTOASTER_H */


diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h

index 963b11c1d386ed6df175ad0e2e92cfe2929af774..832f91fb09f172d5ffc3d31aba10fccd5431c783 100644 (file)


--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -37,7 +37,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: catversion.h,v 1.70 2001/03/22 04:00:35 momjian Exp $
+ * $Id: catversion.h,v 1.71 2001/05/07 00:43:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -53,6 +53,6 @@
  */
 
 /*                         yyyymmddN */
-#define CATALOG_VERSION_NO 200101061
+#define CATALOG_VERSION_NO 200105051
 
 #endif


diff --git a/src/include/catalog/heap.h b/src/include/catalog/heap.h

index a7248f6c6dc4fb438d27b29fe250c446534ad228..7ab04b05fb25b1dd765830e90ec1b717c6e2814e 100644 (file)


--- a/src/include/catalog/heap.h
+++ b/src/include/catalog/heap.h
@@ -7,13 +7,14 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: heap.h,v 1.34 2001/03/22 04:00:35 momjian Exp $
+ * $Id: heap.h,v 1.35 2001/05/07 00:43:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #ifndef HEAP_H
 #define HEAP_H
 
+#include "catalog/pg_attribute.h"
 #include "utils/rel.h"
 
 typedef struct RawColumnDefault
@@ -44,4 +45,6 @@ extern void AddRelationRawConstraints(Relation rel,
                          List *rawColDefaults,
                          List *rawConstraints);
 
+extern Form_pg_attribute SystemAttributeDefinition(AttrNumber attno);
+
 #endif  /* HEAP_H */


diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h

index 1dac0bb1c31596e1b31e5fc2d82f20835ed7879b..07aaad61c798bc295723dfe80cded8dbc848d6c9 100644 (file)


--- a/src/include/catalog/index.h
+++ b/src/include/catalog/index.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: index.h,v 1.33 2001/03/22 04:00:35 momjian Exp $
+ * $Id: index.h,v 1.34 2001/05/07 00:43:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -46,7 +46,7 @@ extern void FormIndexDatum(IndexInfo *indexInfo,
               Datum *datum,
               char *nullv);
 
-extern void UpdateStats(Oid relid, long reltuples);
+extern void UpdateStats(Oid relid, double reltuples);
 extern bool IndexesAreActive(Oid relid, bool comfirmCommitted);
 extern void setRelhasindex(Oid relid, bool hasindex);
 


diff --git a/src/include/catalog/indexing.h b/src/include/catalog/indexing.h

index 41a580a37779abae1d46c2b8422b8ece0fbebc2b..cc155cf1bbb314f4cb54a41c23a3a2ed5e1fd5d8 100644 (file)


--- a/src/include/catalog/indexing.h
+++ b/src/include/catalog/indexing.h
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: indexing.h,v 1.48 2001/03/22 04:00:36 momjian Exp $
+ * $Id: indexing.h,v 1.49 2001/05/07 00:43:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -171,7 +171,7 @@ DECLARE_UNIQUE_INDEX(pg_rewrite_rulename_index on pg_rewrite using btree(rulenam
 xDECLARE_UNIQUE_INDEX(pg_shadow_name_index on pg_shadow using btree(usename name_ops));
 xDECLARE_UNIQUE_INDEX(pg_shadow_sysid_index on pg_shadow using btree(usesysid int4_ops));
 */
-DECLARE_INDEX(pg_statistic_relid_att_index on pg_statistic using btree(starelid oid_ops, staattnum int2_ops));
+DECLARE_UNIQUE_INDEX(pg_statistic_relid_att_index on pg_statistic using btree(starelid oid_ops, staattnum int2_ops));
 DECLARE_INDEX(pg_trigger_tgconstrname_index on pg_trigger using btree(tgconstrname name_ops));
 DECLARE_INDEX(pg_trigger_tgconstrrelid_index on pg_trigger using btree(tgconstrrelid oid_ops));
 DECLARE_INDEX(pg_trigger_tgrelid_index on pg_trigger using btree(tgrelid oid_ops));


diff --git a/src/include/catalog/pg_attribute.h b/src/include/catalog/pg_attribute.h

index 58724e94dc966cef18b5345521cafa985a4dbf1e..6e11aa6d530707371c7b5b0f5af4e4174c4919f5 100644 (file)


--- a/src/include/catalog/pg_attribute.h
+++ b/src/include/catalog/pg_attribute.h
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pg_attribute.h,v 1.70 2001/03/22 04:00:37 momjian Exp $
+ * $Id: pg_attribute.h,v 1.71 2001/05/07 00:43:24 tgl Exp $
  *
  * NOTES
  *   the genbki.sh script reads this file and generates .bki
@@ -36,15 +36,14 @@
  *     typedef struct FormData_pg_attribute
  *
  *     If you change the following, make sure you change the structs for
- *     system attributes in heap.c and index.c also.
+ *     system attributes in catalog/heap.c also.
  * ----------------
  */
 CATALOG(pg_attribute) BOOTSTRAP
 {
    Oid         attrelid;       /* OID of relation containing this
                                 * attribute */
-   NameData    attname;
-   Oid         atttypid;
+   NameData    attname;        /* name of attribute */
 
    /*
     * atttypid is the OID of the instance in Catalog Class pg_type that
@@ -53,30 +52,20 @@ CATALOG(pg_attribute) BOOTSTRAP
     * attalign attributes of this instance, so they had better match or
     * Postgres will fail.
     */
-
-   float4      attdispersion;
+   Oid         atttypid;
 
    /*
-    * attdispersion is the dispersion statistic of the column (0.0 to
-    * 1.0), or zero if the statistic has not been calculated, or -1.0 if
-    * VACUUM found that the column contains no duplicate entries (in
-    * which case the dispersion should be taken as 1.0/numberOfRows for
-    * the current table size).  The -1.0 hack is useful because the
-    * number of rows may be updated more often than attdispersion is. We
-    * assume that the column will retain its no-duplicate-entry property.
-    * (Perhaps this should be driven off the existence of a UNIQUE index
-    * for the column, instead of being a statistical guess?)
+    * attstattarget is the target number of statistics datapoints to collect
+    * during VACUUM ANALYZE of this column.  A zero here indicates that we
+    * do not wish to collect any stats about this column.
     */
-
-   int2        attlen;
+   int4        attstattarget;
 
    /*
     * attlen is a copy of the typlen field from pg_type for this
-    * attribute.  See atttypid above.  See struct Form_pg_type for
-    * definition.
+    * attribute.  See atttypid comments above.
     */
-
-   int2        attnum;
+   int2        attlen;
 
    /*
     * attnum is the "attribute number" for the attribute:  A value that
@@ -91,10 +80,13 @@ CATALOG(pg_attribute) BOOTSTRAP
     *
     * Note that (attnum - 1) is often used as the index to an array.
     */
+   int2        attnum;
 
-   int4        attnelems;      /* number of dimensions, if an array type */
-
-   int4        attcacheoff;
+   /*
+    * attndims is the declared number of dimensions, if an array type,
+    * otherwise zero.
+    */
+   int4        attndims;
 
    /*
     * fastgetattr() uses attcacheoff to cache byte offsets of attributes
@@ -103,8 +95,7 @@ CATALOG(pg_attribute) BOOTSTRAP
     * tuple descriptor, we may then update attcacheoff in the copies.
     * This speeds up the attribute walking process.
     */
-
-   int4        atttypmod;
+   int4        attcacheoff;
 
    /*
     * atttypmod records type-specific data supplied at table creation
@@ -113,16 +104,13 @@ CATALOG(pg_attribute) BOOTSTRAP
     * argument. The value will generally be -1 for types that do not need
     * typmod.
     */
-
-   bool        attbyval;
+   int4        atttypmod;
 
    /*
     * attbyval is a copy of the typbyval field from pg_type for this
-    * attribute.  See atttypid above.  See struct Form_pg_type for
-    * definition.
+    * attribute.  See atttypid comments above.
     */
-
-   char        attstorage;
+   bool        attbyval;
 
    /*----------
     * attstorage tells for VARLENA attributes, what the heap access
@@ -137,30 +125,31 @@ CATALOG(pg_attribute) BOOTSTRAP
     * but only as a last resort ('e' and 'x' fields are moved first).
     *----------
     */
+   char        attstorage;
 
+   /* This flag indicates that the attribute is really a set */
    bool        attisset;
-   char        attalign;
 
    /*
     * attalign is a copy of the typalign field from pg_type for this
-    * attribute.  See atttypid above.  See struct Form_pg_type for
-    * definition.
+    * attribute.  See atttypid comments above.
     */
-
-   bool        attnotnull;
+   char        attalign;
 
    /* This flag represents the "NOT NULL" constraint */
-   bool        atthasdef;
+   bool        attnotnull;
 
    /* Has DEFAULT value or not */
+   bool        atthasdef;
 } FormData_pg_attribute;
 
 /*
  * someone should figure out how to do this properly. (The problem is
- * the size of the C struct is not the same as the size of the tuple.)
+ * the size of the C struct is not the same as the size of the tuple
+ * because of alignment padding at the end of the struct.)
  */
 #define ATTRIBUTE_TUPLE_SIZE \
-   (offsetof(FormData_pg_attribute,atthasdef) + sizeof(char))
+   (offsetof(FormData_pg_attribute,atthasdef) + sizeof(bool))
 
 /* ----------------
  *     Form_pg_attribute corresponds to a pointer to a tuple with
@@ -178,10 +167,10 @@ typedef FormData_pg_attribute *Form_pg_attribute;
 #define Anum_pg_attribute_attrelid     1
 #define Anum_pg_attribute_attname      2
 #define Anum_pg_attribute_atttypid     3
-#define Anum_pg_attribute_attdispersion 4
+#define Anum_pg_attribute_attstattarget 4
 #define Anum_pg_attribute_attlen       5
 #define Anum_pg_attribute_attnum       6
-#define Anum_pg_attribute_attnelems        7
+#define Anum_pg_attribute_attndims     7
 #define Anum_pg_attribute_attcacheoff  8
 #define Anum_pg_attribute_atttypmod        9
 #define Anum_pg_attribute_attbyval     10
@@ -206,6 +195,7 @@ typedef FormData_pg_attribute *Form_pg_attribute;
    (attribute)->attnotnull = false; \
    (attribute)->atthasdef = false;
 #endif  /* _DROP_COLUMN_HACK__ */
+
 /* ----------------
  *     SCHEMA_ macros for declaring hardcoded tuple descriptors.
  *     these are used in utils/cache/relcache.c
@@ -231,25 +221,25 @@ typedef FormData_pg_attribute *Form_pg_attribute;
  * ----------------
  */
 #define Schema_pg_type \
-{ 1247, {"typname"},      19, 0, NAMEDATALEN,  1, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typowner"},     23, 0,   4,  2, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typlen"},           21, 0,   2,  3, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1247, {"typprtlen"},    21, 0,   2,  4, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1247, {"typbyval"},     16, 0,   1,  5, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typtype"},      18, 0,   1,  6, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typisdefined"},  16, 0,  1,  7, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typdelim"},     18, 0,   1,  8, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typrelid"},     26, 0,   4,  9, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typelem"},      26, 0,   4, 10, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typinput"},     24, 0,   4, 11, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typoutput"},    24, 0,   4, 12, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typreceive"},    24, 0,  4, 13, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typsend"},      24, 0,   4, 14, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typalign"},     18, 0,   1, 15, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typstorage"},    18, 0,  1, 16, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typdefault"},    25, 0,  -1, 17, 0, -1, -1, '\0' , 'x', '\0', 'i', '\0', '\0' }
-
-DATA(insert OID = 0 ( 1247 typname         19 0 NAMEDATALEN   1 0 -1 -1 f p f i f f));
+{ 1247, {"typname"},      19, DEFAULT_ATTSTATTARGET, NAMEDATALEN,  1, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1247, {"typowner"},     23, 0,   4,  2, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typlen"},           21, 0,   2,  3, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1247, {"typprtlen"},    21, 0,   2,  4, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1247, {"typbyval"},     16, 0,   1,  5, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typtype"},      18, 0,   1,  6, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typisdefined"},  16, 0,  1,  7, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typdelim"},     18, 0,   1,  8, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typrelid"},     26, 0,   4,  9, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typelem"},      26, 0,   4, 10, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typinput"},     24, 0,   4, 11, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typoutput"},    24, 0,   4, 12, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typreceive"},    24, 0,  4, 13, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typsend"},      24, 0,   4, 14, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typalign"},     18, 0,   1, 15, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typstorage"},    18, 0,  1, 16, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typdefault"},    25, 0,  -1, 17, 0, -1, -1, false    , 'x', false, 'i', false, false }
+
+DATA(insert OID = 0 ( 1247 typname         19 DEFAULT_ATTSTATTARGET NAMEDATALEN   1 0 -1 -1 f p f i f f));
 DATA(insert OID = 0 ( 1247 typowner            23 0  4   2 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1247 typlen          21 0  2   3 0 -1 -1 t p f s f f));
 DATA(insert OID = 0 ( 1247 typprtlen       21 0  2   4 0 -1 -1 t p f s f f));
@@ -299,25 +289,25 @@ DATA(insert OID = 0 ( 1262 tableoid           26 0  4  -7 0 -1 -1 t p f i f f));
  * ----------------
  */
 #define Schema_pg_proc \
-{ 1255, {"proname"},           19, 0, NAMEDATALEN,  1, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"proowner"},          23, 0,  4,  2, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"prolang"},           26, 0,  4,  3, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"proisinh"},          16, 0,  1,  4, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1255, {"proistrusted"},      16, 0,  1,  5, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1255, {"proiscachable"},     16, 0,  1,  6, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1255, {"proisstrict"},       16, 0,  1,  7, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1255, {"pronargs"},          21, 0,  2,  8, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1255, {"proretset"},         16, 0,  1,  9, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1255, {"prorettype"},            26, 0,  4, 10, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"proargtypes"},       30, 0, INDEX_MAX_KEYS*4, 11, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"probyte_pct"},       23, 0,  4, 12, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"properbyte_cpu"},        23, 0,  4, 13, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"propercall_cpu"},        23, 0,  4, 14, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"prooutin_ratio"},        23, 0,  4, 15, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"prosrc"},                25, 0, -1, 16, 0, -1, -1, '\0', 'x', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"probin"},                17, 0, -1, 17, 0, -1, -1, '\0', 'x', '\0', 'i', '\0', '\0' }
-
-DATA(insert OID = 0 ( 1255 proname         19 0 NAMEDATALEN   1 0 -1 -1 f p f i f f));
+{ 1255, {"proname"},           19, DEFAULT_ATTSTATTARGET, NAMEDATALEN,  1, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1255, {"proowner"},          23, 0,  4,  2, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"prolang"},           26, 0,  4,  3, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"proisinh"},          16, 0,  1,  4, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1255, {"proistrusted"},      16, 0,  1,  5, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1255, {"proiscachable"},     16, 0,  1,  6, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1255, {"proisstrict"},       16, 0,  1,  7, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1255, {"pronargs"},          21, 0,  2,  8, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1255, {"proretset"},         16, 0,  1,  9, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1255, {"prorettype"},            26, 0,  4, 10, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"proargtypes"},       30, 0, INDEX_MAX_KEYS*4, 11, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1255, {"probyte_pct"},       23, 0,  4, 12, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"properbyte_cpu"},        23, 0,  4, 13, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"propercall_cpu"},        23, 0,  4, 14, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"prooutin_ratio"},        23, 0,  4, 15, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"prosrc"},                25, 0, -1, 16, 0, -1, -1, false, 'x', false, 'i', false, false }, \
+{ 1255, {"probin"},                17, 0, -1, 17, 0, -1, -1, false, 'x', false, 'i', false, false }
+
+DATA(insert OID = 0 ( 1255 proname         19 DEFAULT_ATTSTATTARGET NAMEDATALEN   1 0 -1 -1 f p f i f f));
 DATA(insert OID = 0 ( 1255 proowner            23 0  4   2 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1255 prolang         26 0  4   3 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1255 proisinh            16 0  1   4 0 -1 -1 t p f c f f));
@@ -346,8 +336,8 @@ DATA(insert OID = 0 ( 1255 tableoid         26 0  4  -7 0 -1 -1 t p f i f f));
  *     pg_shadow
  * ----------------
  */
-DATA(insert OID = 0 ( 1260 usename         19  0 NAMEDATALEN   1 0 -1 -1 f p f i f f));
-DATA(insert OID = 0 ( 1260 usesysid            23  0   4   2 0 -1 -1 t p f i f f));
+DATA(insert OID = 0 ( 1260 usename         19  DEFAULT_ATTSTATTARGET NAMEDATALEN   1 0 -1 -1 f p f i f f));
+DATA(insert OID = 0 ( 1260 usesysid            23  DEFAULT_ATTSTATTARGET   4   2 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1260 usecreatedb     16  0   1   3 0 -1 -1 t p f c f f));
 DATA(insert OID = 0 ( 1260 usetrace            16  0   1   4 0 -1 -1 t p f c f f));
 DATA(insert OID = 0 ( 1260 usesuper            16  0   1   5 0 -1 -1 t p f c f f));
@@ -366,8 +356,8 @@ DATA(insert OID = 0 ( 1260 tableoid         26 0  4  -7 0 -1 -1 t p f i f f));
  *     pg_group
  * ----------------
  */
-DATA(insert OID = 0 ( 1261 groname         19 0 NAMEDATALEN  1 0 -1 -1 f p f i f f));
-DATA(insert OID = 0 ( 1261 grosysid            23 0  4   2 0 -1 -1 t p f i f f));
+DATA(insert OID = 0 ( 1261 groname         19 DEFAULT_ATTSTATTARGET NAMEDATALEN  1 0 -1 -1 f p f i f f));
+DATA(insert OID = 0 ( 1261 grosysid            23 DEFAULT_ATTSTATTARGET  4   2 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1261 grolist       1007 0 -1   3 0 -1 -1 f x f i f f));
 DATA(insert OID = 0 ( 1261 ctid                27 0  6  -1 0 -1 -1 f p f i f f));
 DATA(insert OID = 0 ( 1261 oid             26 0  4  -2 0 -1 -1 t p f i f f));
@@ -382,29 +372,29 @@ DATA(insert OID = 0 ( 1261 tableoid           26 0  4  -7 0 -1 -1 t p f i f f));
  * ----------------
  */
 #define Schema_pg_attribute \
-{ 1249, {"attrelid"},    26, 0,    4,  1, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"attname"},     19, 0, NAMEDATALEN,   2, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"atttypid"},    26, 0,    4,  3, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"attdispersion"}, 700, 0, 4,  4, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"attlen"},          21, 0,    2,  5, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1249, {"attnum"},          21, 0,    2,  6, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1249, {"attnelems"},   23, 0,    4,  7, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"attcacheoff"},  23, 0,   4,  8, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"atttypmod"},   23, 0,    4,  9, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"attbyval"},    16, 0,    1, 10, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1249, {"attstorage"},   18, 0,   1, 11, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1249, {"attisset"},    16, 0,    1, 12, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1249, {"attalign"},    18, 0,    1, 13, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1249, {"attnotnull"},  16, 0, 1, 14, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1249, {"atthasdef"},  16, 0, 1, 15, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }
-
-DATA(insert OID = 0 ( 1249 attrelid            26 0  4   1 0 -1 -1 t p f i f f));
-DATA(insert OID = 0 ( 1249 attname         19 0 NAMEDATALEN  2 0 -1 -1 f p f i f f));
+{ 1249, {"attrelid"},    26, DEFAULT_ATTSTATTARGET,    4,  1, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"attname"},     19, DEFAULT_ATTSTATTARGET, NAMEDATALEN,   2, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1249, {"atttypid"},    26, 0,    4,  3, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"attstattarget"}, 23, 0,  4,  4, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"attlen"},          21, 0,    2,  5, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1249, {"attnum"},          21, 0,    2,  6, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1249, {"attndims"},    23, 0,    4,  7, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"attcacheoff"},  23, 0,   4,  8, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"atttypmod"},   23, 0,    4,  9, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"attbyval"},    16, 0,    1, 10, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1249, {"attstorage"},   18, 0,   1, 11, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1249, {"attisset"},    16, 0,    1, 12, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1249, {"attalign"},    18, 0,    1, 13, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1249, {"attnotnull"},  16, 0, 1, 14, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1249, {"atthasdef"},  16, 0, 1, 15, 0, -1, -1, true, 'p', false, 'c', false, false }
+
+DATA(insert OID = 0 ( 1249 attrelid            26 DEFAULT_ATTSTATTARGET  4   1 0 -1 -1 t p f i f f));
+DATA(insert OID = 0 ( 1249 attname         19 DEFAULT_ATTSTATTARGET NAMEDATALEN  2 0 -1 -1 f p f i f f));
 DATA(insert OID = 0 ( 1249 atttypid            26 0  4   3 0 -1 -1 t p f i f f));
-DATA(insert OID = 0 ( 1249 attdispersion   700 0  4   4 0 -1 -1 f p f i f f));
+DATA(insert OID = 0 ( 1249 attstattarget   23 0  4   4 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1249 attlen          21 0  2   5 0 -1 -1 t p f s f f));
 DATA(insert OID = 0 ( 1249 attnum          21 0  2   6 0 -1 -1 t p f s f f));
-DATA(insert OID = 0 ( 1249 attnelems       23 0  4   7 0 -1 -1 t p f i f f));
+DATA(insert OID = 0 ( 1249 attndims            23 0  4   7 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1249 attcacheoff     23 0  4   8 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1249 atttypmod       23 0  4   9 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1249 attbyval            16 0  1  10 0 -1 -1 t p f c f f));
@@ -426,36 +416,36 @@ DATA(insert OID = 0 ( 1249 tableoid           26 0  4  -7 0 -1 -1 t p f i f f));
  * ----------------
  */
 #define Schema_pg_class \
-{ 1259, {"relname"},      19, 0, NAMEDATALEN,  1, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"reltype"},      26, 0,   4,  2, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"relowner"},     23, 0,   4,  3, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"relam"},        26, 0,   4,  4, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"relfilenode"},   26, 0,  4,  5, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"relpages"},     23, 0,   4,  6, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"reltuples"},    23, 0,   4,  7, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"reltoastrelid"}, 26, 0,  4,  8, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"reltoastidxid"}, 26, 0,  4,  9, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"relhasindex"},   16, 0,  1, 10, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relisshared"},   16, 0,  1, 11, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relkind"},      18, 0,   1, 12, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relnatts"},     21, 0,   2, 13, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"relchecks"},    21, 0,   2, 14, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"reltriggers"},   21, 0,  2, 15, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"relukeys"},     21, 0,   2, 16, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"relfkeys"},     21, 0,   2, 17, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"relrefs"},      21, 0,   2, 18, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"relhaspkey"},    16, 0,  1, 19, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relhasrules"},   16, 0,  1, 20, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relhassubclass"},16, 0,  1, 21, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relacl"},         1034, 0,  -1, 22, 0, -1, -1,   '\0', 'x', '\0', 'i', '\0', '\0' }
-
-DATA(insert OID = 0 ( 1259 relname         19 0 NAMEDATALEN   1 0 -1 -1 f p f i f f));
+{ 1259, {"relname"},      19, DEFAULT_ATTSTATTARGET, NAMEDATALEN,  1, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1259, {"reltype"},      26, 0,   4,  2, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"relowner"},     23, 0,   4,  3, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"relam"},        26, 0,   4,  4, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"relfilenode"},   26, 0,  4,  5, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"relpages"},     23, 0,   4,  6, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"reltuples"},    700, 0,  4,  7, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1259, {"reltoastrelid"}, 26, 0,  4,  8, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"reltoastidxid"}, 26, 0,  4,  9, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"relhasindex"},   16, 0,  1, 10, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relisshared"},   16, 0,  1, 11, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relkind"},      18, 0,   1, 12, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relnatts"},     21, 0,   2, 13, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"relchecks"},    21, 0,   2, 14, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"reltriggers"},   21, 0,  2, 15, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"relukeys"},     21, 0,   2, 16, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"relfkeys"},     21, 0,   2, 17, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"relrefs"},      21, 0,   2, 18, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"relhaspkey"},    16, 0,  1, 19, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relhasrules"},   16, 0,  1, 20, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relhassubclass"},16, 0,  1, 21, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relacl"},         1034, 0,  -1, 22, 0, -1, -1,   false, 'x', false, 'i', false, false }
+
+DATA(insert OID = 0 ( 1259 relname         19 DEFAULT_ATTSTATTARGET NAMEDATALEN   1 0 -1 -1 f p f i f f));
 DATA(insert OID = 0 ( 1259 reltype         26 0  4   2 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1259 relowner            23 0  4   3 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1259 relam           26 0  4   4 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1259 relfilenode     26 0  4   5 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1259 relpages            23 0  4   6 0 -1 -1 t p f i f f));
-DATA(insert OID = 0 ( 1259 reltuples       23 0  4   7 0 -1 -1 t p f i f f));
+DATA(insert OID = 0 ( 1259 reltuples      700 0  4   7 0 -1 -1 f p f i f f));
 DATA(insert OID = 0 ( 1259 reltoastrelid   26 0  4   8 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1259 reltoastidxid   26 0  4   9 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1259 relhasindex     16 0  1  10 0 -1 -1 t p f c f f));
@@ -544,7 +534,7 @@ DATA(insert OID = 0 ( 1219 tableoid         26 0  4  -7 0 -1 -1 t p f i f f));
  * ----------------
  */
 #define Schema_pg_variable \
-{ 1264, {"varfoo"},  26, 0, 4, 1, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }
+{ 1264, {"varfoo"},  26, 0, 4, 1, 0, -1, -1, true, 'p', false, 'i', false, false }
 
 DATA(insert OID = 0 ( 1264 varfoo          26 0  4   1 0 -1 -1 t p f i f f));
 
@@ -555,7 +545,7 @@ DATA(insert OID = 0 ( 1264 varfoo           26 0  4   1 0 -1 -1 t p f i f f));
  * ----------------
  */
 #define Schema_pg_log \
-{ 1269, {"logfoo"},  26, 0, 4, 1, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }
+{ 1269, {"logfoo"},  26, 0, 4, 1, 0, -1, -1, true, 'p', false, 'i', false, false }
 
 DATA(insert OID = 0 ( 1269 logfoo          26 0  4   1 0 -1 -1 t p f i f f));
 
@@ -566,7 +556,7 @@ DATA(insert OID = 0 ( 1269 logfoo           26 0  4   1 0 -1 -1 t p f i f f));
  * ----------------
  */
 #define Schema_pg_xactlock \
-{ 376, {"xactlockfoo"},  26, 0, 4, 1, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }
+{ 376, {"xactlockfoo"},  26, 0, 4, 1, 0, -1, -1, true, 'p', false, 'i', false, false }
 
 DATA(insert OID = 0 ( 376 xactlockfoo      26 0  4   1 0 -1 -1 t p f i f f));
 


diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h

index 81e75e14b6a7dc7372f9dcd6808f824944f5f028..86de88cc9b662fe5c65f43301e2a28a247bf69ee 100644 (file)


--- a/src/include/catalog/pg_class.h
+++ b/src/include/catalog/pg_class.h
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pg_class.h,v 1.47 2001/03/22 04:00:38 momjian Exp $
+ * $Id: pg_class.h,v 1.48 2001/05/07 00:43:25 tgl Exp $
  *
  * NOTES
  *   the genbki.sh script reads this file and generates .bki
@@ -52,7 +52,7 @@ CATALOG(pg_class) BOOTSTRAP
    Oid         relam;
    Oid         relfilenode;
    int4        relpages;
-   int4        reltuples;
+   float4      reltuples;
    Oid         reltoastrelid;
    Oid         reltoastidxid;
    bool        relhasindex;


diff --git a/src/include/catalog/pg_statistic.h b/src/include/catalog/pg_statistic.h

index 2f39bea3245e1772984b1b3b4fca0dbb36f41c1d..8d6a6b37c16ac513468f052508aadf91a034ff85 100644 (file)


--- a/src/include/catalog/pg_statistic.h
+++ b/src/include/catalog/pg_statistic.h
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pg_statistic.h,v 1.10 2001/01/24 19:43:22 momjian Exp $
+ * $Id: pg_statistic.h,v 1.11 2001/05/07 00:43:25 tgl Exp $
  *
  * NOTES
  *   the genbki.sh script reads this file and generates .bki
@@ -36,40 +36,91 @@ CATALOG(pg_statistic)
    /* These fields form the unique key for the entry: */
    Oid         starelid;       /* relation containing attribute */
    int2        staattnum;      /* attribute (column) stats are for */
-   Oid         staop;          /* '<' comparison op used for lo/hi vals */
+
+   /* the fraction of the column's entries that are NULL: */
+   float4      stanullfrac;
 
    /*
-    * Note: the current VACUUM code will never produce more than one
-    * entry per column, but in theory there could be multiple entries if
-    * a datatype has more than one useful ordering operator.  Also, the
-    * current code will not write an entry unless it found at least one
-    * non-NULL value in the column; so the remaining fields will never be
-    * NULL.
+    * stawidth is the average width in bytes of non-null entries.  For
+    * fixed-width datatypes this is of course the same as the typlen, but
+    * for varlena types it is more useful.  Note that this is the average
+    * width of the data as actually stored, post-TOASTing (eg, for a
+    * moved-out-of-line value, only the size of the pointer object is
+    * counted).  This is the appropriate definition for the primary use of
+    * the statistic, which is to estimate sizes of in-memory hash tables of
+    * tuples.
+    */
+   int4        stawidth;
+
+   /* ----------------
+    * stadistinct indicates the (approximate) number of distinct non-null
+    * data values in the column.  The interpretation is:
+    *      0       unknown or not computed
+    *      > 0     actual number of distinct values
+    *      < 0     negative of multiplier for number of rows
+    * The special negative case allows us to cope with columns that are
+    * unique (stadistinct = -1) or nearly so (for example, a column in
+    * which values appear about twice on the average could be represented
+    * by stadistinct = -0.5).  Because the number-of-rows statistic in
+    * pg_class may be updated more frequently than pg_statistic is, it's
+    * important to be able to describe such situations as a multiple of
+    * the number of rows, rather than a fixed number of distinct values.
+    * But in other cases a fixed number is correct (eg, a boolean column).
+    * ----------------
+    */
+   float4      stadistinct;
+
+   /* ----------------
+    * To allow keeping statistics on different kinds of datatypes,
+    * we do not hard-wire any particular meaning for the remaining
+    * statistical fields.  Instead, we provide several "slots" in which
+    * statistical data can be placed.  Each slot includes:
+    *      kind            integer code identifying kind of data
+    *      op              OID of associated operator, if needed
+    *      numbers         float4 array (for statistical values)
+    *      values          text array (for representations of data values)
+    * The ID and operator fields are never NULL; they are zeroes in an
+    * unused slot.  The numbers and values fields are NULL in an unused
+    * slot, and might also be NULL in a used slot if the slot kind has
+    * no need for one or the other.
+    * ----------------
     */
 
+   int2        stakind1;
+   int2        stakind2;
+   int2        stakind3;
+   int2        stakind4;
+
+   Oid         staop1;
+   Oid         staop2;
+   Oid         staop3;
+   Oid         staop4;
+
    /*
-    * These fields contain the stats about the column indicated by the
-    * key
+    * THE REST OF THESE ARE VARIABLE LENGTH FIELDS, and may even be absent
+    * (NULL). They cannot be accessed as C struct entries; you have to use
+    * the full field access machinery (heap_getattr) for them.  We declare
+    * them here for the catalog machinery.
     */
-   float4      stanullfrac;    /* the fraction of the entries that are
-                                * NULL */
-   float4      stacommonfrac;  /* the fraction that are the most common
-                                * val */
+
+   float4      stanumbers1[1];
+   float4      stanumbers2[1];
+   float4      stanumbers3[1];
+   float4      stanumbers4[1];
 
    /*
-    * THE REST OF THESE ARE VARIABLE LENGTH FIELDS. They cannot be
-    * accessed as C struct entries; you have to use the full field access
-    * machinery (heap_getattr) for them.
-    *
-    * All three of these are text representations of data values of the
-    * column's data type.  To re-create the actual Datum, do
-    * datatypein(textout(givenvalue)).
+    * Values in these text arrays are external representations of values
+    * of the column's data type.  To re-create the actual Datum, do
+    * datatypein(textout(arrayelement)).
     */
-   text        stacommonval;   /* most common non-null value in column */
-   text        staloval;       /* smallest non-null value in column */
-   text        stahival;       /* largest non-null value in column */
+   text        stavalues1[1];
+   text        stavalues2[1];
+   text        stavalues3[1];
+   text        stavalues4[1];
 } FormData_pg_statistic;
 
+#define STATISTIC_NUM_SLOTS  4
+
 /* ----------------
  *     Form_pg_statistic corresponds to a pointer to a tuple with
  *     the format of pg_statistic relation.
@@ -81,14 +132,78 @@ typedef FormData_pg_statistic *Form_pg_statistic;
  *     compiler constants for pg_statistic
  * ----------------
  */
-#define Natts_pg_statistic             8
+#define Natts_pg_statistic             21
 #define Anum_pg_statistic_starelid     1
 #define Anum_pg_statistic_staattnum        2
-#define Anum_pg_statistic_staop            3
-#define Anum_pg_statistic_stanullfrac  4
-#define Anum_pg_statistic_stacommonfrac 5
-#define Anum_pg_statistic_stacommonval 6
-#define Anum_pg_statistic_staloval     7
-#define Anum_pg_statistic_stahival     8
+#define Anum_pg_statistic_stanullfrac  3
+#define Anum_pg_statistic_stawidth     4
+#define Anum_pg_statistic_stadistinct  5
+#define Anum_pg_statistic_stakind1     6
+#define Anum_pg_statistic_stakind2     7
+#define Anum_pg_statistic_stakind3     8
+#define Anum_pg_statistic_stakind4     9
+#define Anum_pg_statistic_staop1       10
+#define Anum_pg_statistic_staop2       11
+#define Anum_pg_statistic_staop3       12
+#define Anum_pg_statistic_staop4       13
+#define Anum_pg_statistic_stanumbers1  14
+#define Anum_pg_statistic_stanumbers2  15
+#define Anum_pg_statistic_stanumbers3  16
+#define Anum_pg_statistic_stanumbers4  17
+#define Anum_pg_statistic_stavalues1   18
+#define Anum_pg_statistic_stavalues2   19
+#define Anum_pg_statistic_stavalues3   20
+#define Anum_pg_statistic_stavalues4   21
+
+/*
+ * Currently, three statistical slot "kinds" are defined: most common values,
+ * histogram, and correlation.  Additional "kinds" will probably appear in
+ * future to help cope with non-scalar datatypes.
+ *
+ * Code reading the pg_statistic relation should not assume that a particular
+ * data "kind" will appear in any particular slot.  Instead, search the
+ * stakind fields to see if the desired data is available.
+ */
+
+/*
+ * In a "most common values" slot, staop is the OID of the "=" operator
+ * used to decide whether values are the same or not.  stavalues contains
+ * the K most common non-null values appearing in the column, and stanumbers
+ * contains their frequencies (fractions of total row count).  The values
+ * shall be ordered in decreasing frequency.  Note that since the arrays are
+ * variable-size, K may be chosen by the statistics collector.  Values should
+ * not appear in MCV unless they have been observed to occur more than once;
+ * a unique column will have no MCV slot.
+ */
+#define STATISTIC_KIND_MCV  1
+
+/*
+ * A "histogram" slot describes the distribution of scalar data.  staop is
+ * the OID of the "<" operator that describes the sort ordering.  (In theory,
+ * more than one histogram could appear, if a datatype has more than one
+ * useful sort operator.)  stavalues contains M (>=2) non-null values that
+ * divide the non-null column data values into M-1 bins of approximately equal
+ * population.  The first stavalues item is the MIN and the last is the MAX.
+ * stanumbers is not used and should be NULL.  IMPORTANT POINT: if an MCV
+ * slot is also provided, then the histogram describes the data distribution
+ * *after removing the values listed in MCV* (thus, it's a "compressed
+ * histogram" in the technical parlance).  This allows a more accurate
+ * representation of the distribution of a column with some very-common
+ * values.  In a column with only a few distinct values, it's possible that
+ * the MCV list describes the entire data population; in this case the
+ * histogram reduces to empty and should be omitted.
+ */
+#define STATISTIC_KIND_HISTOGRAM  2
+
+/*
+ * A "correlation" slot describes the correlation between the physical order
+ * of table tuples and the ordering of data values of this column, as seen
+ * by the "<" operator identified by staop.  (As with the histogram, more
+ * than one entry could theoretically appear.)  stavalues is not used and
+ * should be NULL.  stanumbers contains a single entry, the correlation
+ * coefficient between the sequence of data values and the sequence of
+ * their actual tuple positions.  The coefficient ranges from +1 to -1.
+ */
+#define STATISTIC_KIND_CORRELATION  3
 
 #endif  /* PG_STATISTIC_H */


diff --git a/src/include/commands/command.h b/src/include/commands/command.h

index 8b108451d2accff7969f55e6972ad389551829a1..7eb1a4fab846aeff33b3f5cca4f60b9c4c3b5fb5 100644 (file)


--- a/src/include/commands/command.h
+++ b/src/include/commands/command.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: command.h,v 1.26 2001/03/22 04:00:41 momjian Exp $
+ * $Id: command.h,v 1.27 2001/05/07 00:43:25 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -43,9 +43,13 @@ extern void PortalCleanup(Portal portal);
 extern void AlterTableAddColumn(const char *relationName,
                    bool inh, ColumnDef *colDef);
 
-extern void AlterTableAlterColumn(const char *relationName,
-                     bool inh, const char *colName,
-                     Node *newDefault);
+extern void AlterTableAlterColumnDefault(const char *relationName,
+                                        bool inh, const char *colName,
+                                        Node *newDefault);
+
+extern void AlterTableAlterColumnStatistics(const char *relationName,
+                                           bool inh, const char *colName,
+                                           Node *statsTarget);
 
 extern void AlterTableDropColumn(const char *relationName,
                     bool inh, const char *colName,


diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h

index d82d22fcdfcbe3482ed5dbf1b66bf52b607767c3..87bb0007aa067dcbfbe15d31cccfbe00f61df460 100644 (file)


--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -1,129 +1,27 @@
 /*-------------------------------------------------------------------------
  *
  * vacuum.h
- *   header file for postgres vacuum cleaner
+ *   header file for postgres vacuum cleaner and statistics analyzer
  *
  *
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: vacuum.h,v 1.34 2001/03/22 04:00:43 momjian Exp $
+ * $Id: vacuum.h,v 1.35 2001/05/07 00:43:25 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #ifndef VACUUM_H
 #define VACUUM_H
 
-#include "catalog/pg_attribute.h"
-#include "catalog/pg_index.h"
-#include "fmgr.h"
-#include "nodes/pg_list.h"
-#include "storage/itemptr.h"
+#include "nodes/parsenodes.h"
 
 
-typedef struct VAttListData
-{
-   int         val_dummy;
-   struct VAttListData *val_next;
-} VAttListData;
-
-typedef VAttListData *VAttList;
-
-typedef struct VacPageData
-{
-   BlockNumber blkno;          /* BlockNumber of this Page */
-   Size        free;           /* FreeSpace on this Page */
-   uint16      offsets_used;   /* Number of OffNums used by vacuum */
-   uint16      offsets_free;   /* Number of OffNums free or to be free */
-   OffsetNumber offsets[1];    /* Array of its OffNums */
-} VacPageData;
-
-typedef VacPageData *VacPage;
-
-typedef struct VacPageListData
-{
-   int         empty_end_pages;/* Number of "empty" end-pages */
-   int         num_pages;      /* Number of pages in pagedesc */
-   int         num_allocated_pages;    /* Number of allocated pages in
-                                        * pagedesc */
-   VacPage    *pagedesc;       /* Descriptions of pages */
-} VacPageListData;
-
-typedef VacPageListData *VacPageList;
-
-typedef struct
-{
-   Form_pg_attribute attr;
-   Datum       best,
-               guess1,
-               guess2,
-               max,
-               min;
-   int         best_len,
-               guess1_len,
-               guess2_len,
-               max_len,
-               min_len;
-   long        best_cnt,
-               guess1_cnt,
-               guess1_hits,
-               guess2_hits,
-               null_cnt,
-               nonnull_cnt,
-               max_cnt,
-               min_cnt;
-   FmgrInfo    f_cmpeq,
-               f_cmplt,
-               f_cmpgt;
-   Oid         op_cmplt;
-   regproc     outfunc;
-   Oid         typelem;
-   bool        initialized;
-} VacAttrStats;
-
-typedef struct VRelListData
-{
-   Oid         vrl_relid;
-   struct VRelListData *vrl_next;
-} VRelListData;
-
-typedef VRelListData *VRelList;
-
-typedef struct VTupleLinkData
-{
-   ItemPointerData new_tid;
-   ItemPointerData this_tid;
-} VTupleLinkData;
-
-typedef VTupleLinkData *VTupleLink;
-
-typedef struct VTupleMoveData
-{
-   ItemPointerData tid;        /* tuple ID */
-   VacPage     vacpage;        /* where to move */
-   bool        cleanVpd;       /* clean vacpage before using */
-} VTupleMoveData;
-
-typedef VTupleMoveData *VTupleMove;
-
-typedef struct VRelStats
-{
-   Oid         relid;
-   int         num_tuples;
-   int         num_pages;
-   Size        min_tlen;
-   Size        max_tlen;
-   bool        hasindex;
-   int         num_vtlinks;
-   VTupleLink  vtlinks;
-} VRelStats;
-
-extern bool VacuumRunning;
-
-extern void vc_abort(void);
-extern void vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols);
-extern void analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL);
-
-#define ATTNVALS_SCALE 1000000000      /* XXX so it can act as a float4 */
+/* in commands/vacuum.c */
+extern void vacuum(VacuumStmt *vacstmt);
+extern void vac_update_relstats(Oid relid, long num_pages, double num_tuples,
+                               bool hasindex);
+/* in commands/analyze.c */
+extern void analyze_rel(Oid relid, VacuumStmt *vacstmt);
 
 #endif  /* VACUUM_H */


diff --git a/src/include/config.h.in b/src/include/config.h.in

index 0d989dbbb3155bfaa218fce2d6181c45921191de..01593a4ce963a05484b025e5206f27d8b2bd952b 100644 (file)


--- a/src/include/config.h.in
+++ b/src/include/config.h.in
@@ -8,7 +8,7 @@
  * or in config.h afterwards.  Of course, if you edit config.h, then your
  * changes will be overwritten the next time you run configure.
  *
- * $Id: config.h.in,v 1.162 2001/04/14 22:55:02 petere Exp $
+ * $Id: config.h.in,v 1.163 2001/05/07 00:43:25 tgl Exp $
  */
 
 #ifndef CONFIG_H
@@ -156,6 +156,11 @@
 #define INDEX_MAX_KEYS     16
 #define FUNC_MAX_ARGS      INDEX_MAX_KEYS
 
+/*
+ * System default value for pg_attribute.attstattarget
+ */
+#define DEFAULT_ATTSTATTARGET  10
+
 /*
  * Define this to make libpgtcl's "pg_result -assign" command process C-style
  * backslash sequences in returned tuple data and convert Postgres array


diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h

index 2cf9378cf116426106be2cba0bb29d970e561c09..0967bef24ba9437360c5142ffc6f770107c9aa5a 100644 (file)


--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: execnodes.h,v 1.57 2001/03/22 04:00:50 momjian Exp $
+ * $Id: execnodes.h,v 1.58 2001/05/07 00:43:25 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -628,7 +628,6 @@ typedef struct GroupState
  *  SortState information
  *
  *     sort_Done       indicates whether sort has been performed yet
- *     sort_Keys       scan key structures describing the sort keys
  *     tuplesortstate  private state of tuplesort.c
  * ----------------
  */
@@ -636,7 +635,6 @@ typedef struct SortState
 {
    CommonScanState csstate;    /* its first field is NodeTag */
    bool        sort_Done;
-   ScanKey     sort_Keys;
    void       *tuplesortstate;
 } SortState;
 


diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h

index 1614d787bcb8d0ceac119c020b51ae18ffebd013..63b1b1046a8e71675ed81102c38134886a45f0bc 100644 (file)


--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: parsenodes.h,v 1.126 2001/03/23 04:49:56 momjian Exp $
+ * $Id: parsenodes.h,v 1.127 2001/05/07 00:43:25 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -118,11 +118,12 @@ typedef struct AlterTableStmt
    NodeTag     type;
    char        subtype;        /*------------
                                 *  A = add column
-                                *  T = alter column
+                                *  T = alter column default
+                                *  S = alter column statistics
                                 *  D = drop column
                                 *  C = add constraint
                                 *  X = drop constraint
-                                *  E = add toast table,
+                                *  E = create toast table
                                 *  U = change owner
                                 *------------
                                 */
@@ -690,16 +691,20 @@ typedef struct ClusterStmt
 } ClusterStmt;
 
 /* ----------------------
- *     Vacuum Statement
+ *     Vacuum and Analyze Statements
+ *
+ * Even though these are nominally two statements, it's convenient to use
+ * just one node type for both.
  * ----------------------
  */
 typedef struct VacuumStmt
 {
    NodeTag     type;
-   bool        verbose;        /* print status info */
-   bool        analyze;        /* analyze data */
-   char       *vacrel;         /* table to vacuum */
-   List       *va_spec;        /* columns to analyse */
+   bool        vacuum;         /* do VACUUM step */
+   bool        analyze;        /* do ANALYZE step */
+   bool        verbose;        /* print progress info */
+   char       *vacrel;         /* name of single table to process, or NULL */
+   List       *va_cols;        /* list of column names, or NIL for all */
 } VacuumStmt;
 
 /* ----------------------


diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h

index 3ae8e09f57a30468fdece0f7fe9098a3ca05653f..9e69ed60992a7b7307fcc79150eccd7a6f62f963 100644 (file)


--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -10,7 +10,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: primnodes.h,v 1.53 2001/03/22 04:00:52 momjian Exp $
+ * $Id: primnodes.h,v 1.54 2001/05/07 00:43:25 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -45,8 +45,8 @@ typedef struct FunctionCache *FunctionCachePtr;
  * reskey and reskeyop are the execution-time representation of sorting.
  * reskey must be zero in any non-sort-key item.  The reskey of sort key
  * targetlist items for a sort plan node is 1,2,...,n for the n sort keys.
- * The reskeyop of each such targetlist item is the sort operator's
- * regproc OID.  reskeyop will be zero in non-sort-key items.
+ * The reskeyop of each such targetlist item is the sort operator's OID.
+ * reskeyop will be zero in non-sort-key items.
  *
  * Both reskey and reskeyop are typically zero during parse/plan stages.
  * The executor does not pay any attention to ressortgroupref.
@@ -62,7 +62,7 @@ typedef struct Resdom
    Index       ressortgroupref;
    /* nonzero if referenced by a sort/group clause */
    Index       reskey;         /* order of key in a sort (for those > 0) */
-   Oid         reskeyop;       /* sort operator's regproc Oid */
+   Oid         reskeyop;       /* sort operator's Oid */
    bool        resjunk;        /* set to true to eliminate the attribute
                                 * from final target list */
 } Resdom;


diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h

index f643ef879689640186250b344d4734f80aa6dc49..c76d9b4af7136f23fdc022f53127925129760519 100644 (file)


--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: relation.h,v 1.54 2001/03/22 04:00:53 momjian Exp $
+ * $Id: relation.h,v 1.55 2001/05/07 00:43:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -512,8 +512,8 @@ typedef struct RestrictInfo
    Oid         hashjoinoperator;       /* copy of clause operator */
 
    /* cache space for hashclause processing; -1 if not yet set */
-   Selectivity left_dispersion;/* dispersion of left side */
-   Selectivity right_dispersion;       /* dispersion of right side */
+   Selectivity left_bucketsize;        /* avg bucketsize of left side */
+   Selectivity right_bucketsize;       /* avg bucketsize of right side */
 } RestrictInfo;
 
 /*


diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h

index 5caa576f0c0be002c3e1bc88a7ff75746f5c45b4..cbf6df063a3cc4ae782cab805acaaf80b9d2025f 100644 (file)


--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: cost.h,v 1.38 2001/02/16 00:03:05 tgl Exp $
+ * $Id: cost.h,v 1.39 2001/05/07 00:43:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -64,7 +64,8 @@ extern void cost_mergejoin(Path *path, Path *outer_path, Path *inner_path,
               List *restrictlist,
               List *outersortkeys, List *innersortkeys);
 extern void cost_hashjoin(Path *path, Path *outer_path, Path *inner_path,
-             List *restrictlist, Selectivity innerdispersion);
+             List *restrictlist, Selectivity innerbucketsize);
+extern Selectivity estimate_hash_bucketsize(Query *root, Var *var);
 extern Cost cost_qual_eval(List *quals);
 extern void set_baserel_size_estimates(Query *root, RelOptInfo *rel);
 extern void set_joinrel_size_estimates(Query *root, RelOptInfo *rel,


diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h

index 5b71eded86fcac8f21a5732ef81d8906fd9263a3..0839feb4b2fe5c0d137a7705469acb3814779181 100644 (file)


--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pathnode.h,v 1.35 2001/03/22 04:00:54 momjian Exp $
+ * $Id: pathnode.h,v 1.36 2001/05/07 00:43:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -59,7 +59,7 @@ extern HashPath *create_hashjoin_path(RelOptInfo *joinrel,
                     Path *inner_path,
                     List *restrict_clauses,
                     List *hashclauses,
-                    Selectivity innerdispersion);
+                    Selectivity innerbucketsize);
 
 /*
  * prototypes for relnode.c


diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h

index f1c4aff1c804172da17b24a438551c0b631c98c0..6b35deed2867649e350da0c081a983eb0dec5821 100644 (file)


--- a/src/include/utils/lsyscache.h
+++ b/src/include/utils/lsyscache.h
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: lsyscache.h,v 1.30 2001/03/22 04:01:13 momjian Exp $
+ * $Id: lsyscache.h,v 1.31 2001/05/07 00:43:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -21,8 +21,6 @@ extern AttrNumber get_attnum(Oid relid, char *attname);
 extern Oid get_atttype(Oid relid, AttrNumber attnum);
 extern bool get_attisset(Oid relid, char *attname);
 extern int32 get_atttypmod(Oid relid, AttrNumber attnum);
-extern double get_attdispersion(Oid relid, AttrNumber attnum,
-                 double min_estimate);
 extern RegProcedure get_opcode(Oid opno);
 extern char *get_opname(Oid opno);
 extern bool op_mergejoinable(Oid opno, Oid ltype, Oid rtype,
@@ -41,6 +39,14 @@ extern bool get_typbyval(Oid typid);
 extern void get_typlenbyval(Oid typid, int16 *typlen, bool *typbyval);
 extern char get_typstorage(Oid typid);
 extern Datum get_typdefault(Oid typid);
+extern bool get_attstatsslot(HeapTuple statstuple,
+                            Oid atttype, int32 atttypmod,
+                            int reqkind, Oid reqop,
+                            Datum **values, int *nvalues,
+                            float4 **numbers, int *nnumbers);
+extern void free_attstatsslot(Oid atttype,
+                             Datum *values, int nvalues,
+                             float4 *numbers, int nnumbers);
 
 #define TypeIsToastable(typid) (get_typstorage(typid) != 'p')
 


diff --git a/src/include/utils/syscache.h b/src/include/utils/syscache.h

index 8d4e2ae62c419658f44ec3f1adb9853a658ea2c6..342f7bf8a566b73e4f8393553ccb332ed067ed06 100644 (file)


--- a/src/include/utils/syscache.h
+++ b/src/include/utils/syscache.h
@@ -9,7 +9,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: syscache.h,v 1.29 2001/03/22 04:01:14 momjian Exp $
+ * $Id: syscache.h,v 1.30 2001/05/07 00:43:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -53,7 +53,7 @@
 #define RULEOID            22
 #define SHADOWNAME     23
 #define SHADOWSYSID        24
-#define STATRELID      25
+#define STATRELATT     25
 #define TYPENAME       26
 #define TYPEOID            27
 


diff --git a/src/include/utils/tuplesort.h b/src/include/utils/tuplesort.h

index 7f273776c36a26cc1e6b688b4a530f74a7c108f2..001761796e2492781d98aec7c8b311b4538e251a 100644 (file)


--- a/src/include/utils/tuplesort.h
+++ b/src/include/utils/tuplesort.h
@@ -13,7 +13,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: tuplesort.h,v 1.6 2001/01/24 19:43:29 momjian Exp $
+ * $Id: tuplesort.h,v 1.7 2001/05/07 00:43:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -36,8 +36,9 @@ typedef struct Tuplesortstate Tuplesortstate;
  */
 
 extern Tuplesortstate *tuplesort_begin_heap(TupleDesc tupDesc,
-                    int nkeys, ScanKey keys,
-                    bool randomAccess);
+                     int nkeys,
+                     Oid *sortOperators, AttrNumber *attNums,
+                     bool randomAccess);
 extern Tuplesortstate *tuplesort_begin_index(Relation indexRel,
                      bool enforceUnique,
                      bool randomAccess);
@@ -75,4 +76,19 @@ extern void tuplesort_rescan(Tuplesortstate *state);
 extern void tuplesort_markpos(Tuplesortstate *state);
 extern void tuplesort_restorepos(Tuplesortstate *state);
 
+/*
+ * This routine selects an appropriate sorting function to implement
+ * a sort operator as efficiently as possible.
+ */
+typedef enum
+{
+   SORTFUNC_LT,                /* raw "<" operator */
+   SORTFUNC_CMP,               /* -1 / 0 / 1 three-way comparator */
+   SORTFUNC_REVCMP             /* 1 / 0 / -1 (reversed) 3-way comparator */
+} SortFunctionKind;
+
+extern void SelectSortFunction(Oid sortOperator,
+                              RegProcedure *sortFunction,
+                              SortFunctionKind *kind);
+
 #endif  /* TUPLESORT_H */


diff --git a/src/interfaces/ecpg/preproc/keywords.c b/src/interfaces/ecpg/preproc/keywords.c

index 5614a34b0fef7390ba8ec0a4184fea10da5e7d69..c03880f497d0d62526a94157175fede654376f28 100644 (file)


--- a/src/interfaces/ecpg/preproc/keywords.c
+++ b/src/interfaces/ecpg/preproc/keywords.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/interfaces/ecpg/preproc/keywords.c,v 1.39 2001/03/22 04:01:21 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/interfaces/ecpg/preproc/keywords.c,v 1.40 2001/05/07 00:43:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -238,6 +238,7 @@ static ScanKeyword ScanKeywords[] = {
    {"some", SOME},
    {"start", START},
    {"statement", STATEMENT},
+   {"statistics", STATISTICS},
    {"stdin", STDIN},
    {"stdout", STDOUT},
    {"substring", SUBSTRING},


diff --git a/src/interfaces/ecpg/preproc/preproc.y b/src/interfaces/ecpg/preproc/preproc.y

index 345efb6576e2ddd8ff944993799a816b12bd34c8..91708bd91fae24f446576cacaea6ccbc1028163d 100644 (file)


--- a/src/interfaces/ecpg/preproc/preproc.y
+++ b/src/interfaces/ecpg/preproc/preproc.y
@@ -134,7 +134,7 @@ make_name(void)
 
 %union {
    double                  dval;
-        int                     ival;
+   int                     ival;
    char *                  str;
    struct when             action;
    struct index        index;
@@ -224,7 +224,7 @@ make_name(void)
        NONE, NOTHING, NOTIFY, NOTNULL, OFFSET, OIDS,
        OPERATOR, OWNER, PASSWORD, PROCEDURAL, REINDEX, RENAME, RESET,
        RETURNS, ROW, RULE, SEQUENCE, SERIAL, SETOF, SHARE,
-       SHOW, START, STATEMENT, STDIN, STDOUT, SYSID TEMP,
+       SHOW, START, STATEMENT, STATISTICS, STDIN, STDOUT, SYSID TEMP,
        TEMPLATE, TOAST, TRUNCATE, TRUSTED, UNLISTEN, UNTIL, VACUUM,
        VALID, VERBOSE, VERSION
 
@@ -285,7 +285,7 @@ make_name(void)
 %type      file_name AexprConst ParamNo c_expr ConstTypename
 %type     in_expr_nodes a_expr b_expr TruncateStmt CommentStmt
 %type     opt_indirection expr_list extract_list extract_arg
-%type     position_list substr_list substr_from alter_column_action
+%type     position_list substr_list substr_from alter_column_default
 %type     trim_list in_expr substr_for attr attrs drop_behavior
 %type     Typename SimpleTypename Generic Numeric generic opt_float opt_numeric
 %type     opt_decimal Character character opt_varying opt_charset
@@ -293,7 +293,7 @@ make_name(void)
 %type     row_expr row_descriptor row_list ConstDatetime opt_chain
 %type     SelectStmt into_clause OptTemp ConstraintAttributeSpec
 %type     opt_table opt_all sort_clause sortby_list ConstraintAttr 
-%type     sortby OptUseOp opt_inh_star relation_name_list name_list
+%type     sortby OptUseOp relation_name_list name_list
 %type     group_clause having_clause from_clause opt_distinct
 %type     join_outer where_clause relation_expr sub_type opt_arg
 %type     opt_column_list insert_rest InsertStmt OptimizableStmt
@@ -301,8 +301,8 @@ make_name(void)
 %type      NotifyStmt columnElem copy_dirn UnlistenStmt copy_null
 %type      copy_delimiter ListenStmt CopyStmt copy_file_name opt_binary
 %type      opt_with_copy FetchStmt direction fetch_how_many from_in
-%type      ClosePortalStmt DropStmt VacuumStmt opt_verbose func_arg
-%type      opt_analyze opt_va_list va_list ExplainStmt index_params
+%type      ClosePortalStmt DropStmt VacuumStmt AnalyzeStmt opt_verbose func_arg
+%type      analyze_keyword opt_name_list ExplainStmt index_params
 %type      index_list func_index index_elem opt_class access_method_clause
 %type      index_opt_unique IndexStmt func_return ConstInterval
 %type      func_args_list func_args opt_with ProcedureStmt def_arg
@@ -329,7 +329,7 @@ make_name(void)
 %type     opt_cursor opt_lmode ConstraintsSetStmt comment_tg AllConst
 %type     case_expr when_clause_list case_default case_arg when_clause
 %type      select_clause opt_select_limit select_limit_value ConstraintTimeSpec
-%type      select_offset_value ReindexStmt join_type opt_only opt_boolean
+%type      select_offset_value ReindexStmt join_type opt_boolean
 %type     join_qual update_list AlterSchemaStmt joined_table
 %type     opt_level opt_lock lock_type users_in_new_group_clause
 %type      OptConstrFromTable comment_op OptTempTableName StringConst
@@ -447,6 +447,7 @@ stmt:  AlterSchemaStmt          { output_statement($1, 0, NULL, connection); }
        | CreatedbStmt      { output_statement($1, 0, NULL, connection); }
        | DropdbStmt        { output_statement($1, 0, NULL, connection); }
        | VacuumStmt        { output_statement($1, 0, NULL, connection); }
+       | AnalyzeStmt       { output_statement($1, 0, NULL, connection); }
        | VariableSetStmt   { output_statement($1, 0, NULL, connection); }
        | VariableShowStmt  { output_statement($1, 0, NULL, connection); }
        | VariableResetStmt { output_statement($1, 0, NULL, connection); }
@@ -908,40 +909,41 @@ CheckPointStmt: CHECKPOINT     { $$= make_str("checkpoint"); }
 
 
 /*****************************************************************************
- *
- *     QUERY :
  *
  * ALTER TABLE variations
  *
  *****************************************************************************/
 
 AlterTableStmt:
-/* ALTER TABLE  ADD [COLUMN]  */
-        ALTER TABLE relation_name opt_inh_star ADD opt_column columnDef
+/* ALTER TABLE  ADD [COLUMN]  */
+        ALTER TABLE relation_expr ADD opt_column columnDef
+       {
+           $$ = cat_str(5, make_str("alter table"), $3, make_str("add"), $5, $6);
+       }
+/* ALTER TABLE  ALTER [COLUMN]  {SET DEFAULT |DROP DEFAULT} */
+   | ALTER TABLE relation_expr ALTER opt_column ColId alter_column_default
        {
-           $$ = cat_str(6, make_str("alter table"), $3, $4, make_str("add"), $6, $7);
+           $$ = cat_str(6, make_str("alter table"), $3, make_str("alter"), $5, $6, $7);
        }
-/* ALTER TABLE  ALTER [COLUMN]  {SET DEFAULT |DROP
-DEFAULT} */
-   | ALTER TABLE relation_name opt_inh_star ALTER opt_column ColId
-       alter_column_action
+/* ALTER TABLE  ALTER [COLUMN]  SET STATISTICS  */
+   | ALTER TABLE relation_expr ALTER opt_column ColId SET STATISTICS Iconst
        {
-           $$ = cat_str(7, make_str("alter table"), $3, $4, make_str("alter"), $6, $7, $8);
+           $$ = cat_str(7, make_str("alter table"), $3, make_str("alter"), $5, $6, make_str("set statistics"), $9);
        }
-/* ALTER TABLE <name> DROP [COLUMN] <name> {RESTRICT|CASCADE} */
-   | ALTER TABLE relation_name opt_inh_star DROP opt_column ColId drop_behavior
+/* ALTER TABLE <relation> DROP [COLUMN] name> {RESTRICT|CASCADE} */
+   | ALTER TABLE relation_expr DROP opt_column ColId drop_behavior
        {
-           $$ = cat_str(7, make_str("alter table"), $3, $4, make_str("drop"), $6, $7, $8);
+           $$ = cat_str(6, make_str("alter table"), $3, make_str("drop"), $5, $6, $7);
        }
-/* ALTER TABLE <name> ADD CONSTRAINT ... */
-   | ALTER TABLE relation_name opt_inh_star ADD TableConstraint
+/* ALTER TABLE <relation> ADD CONSTRAINT ... */
+   | ALTER TABLE relation_expr ADD TableConstraint
        {
-           $$ = cat_str(5, make_str("alter table"), $3, $4, make_str("add"), $6);
+           $$ = cat_str(4, make_str("alter table"), $3, make_str("add"), $5);
        }
-/* ALTER TABLE <name> DROP CONSTRAINT ... */
-   | ALTER TABLE relation_name opt_inh_star DROP CONSTRAINT name drop_behavior
+/* ALTER TABLE <relation> DROP CONSTRAINT ... */
+   | ALTER TABLE relation_expr DROP CONSTRAINT name drop_behavior
        {
-           $$ = cat_str(6, make_str("alter table"), $3, $4, make_str("drop constraint"), $7, $8);
+           $$ = cat_str(5, make_str("alter table"), $3, make_str("drop constraint"), $6, $7);
        }
 /* ALTER TABLE  OWNER TO UserId */     
    | ALTER TABLE relation_name OWNER TO UserId   
@@ -950,7 +952,7 @@ DEFAULT} */
        }
        ;
 
-alter_column_action:
+alter_column_default:
         SET DEFAULT a_expr { $$ = cat2_str(make_str("set default"), $3); }
         | DROP DEFAULT          { $$ = make_str("drop default"); }
         ;
@@ -1234,10 +1236,6 @@ key_reference:  NO ACTION    { $$ = make_str("no action"); }
        | SET NULL_P    { $$ = make_str("set null"); }
        ;
 
-opt_only: ONLY     { $$ = make_str("only"); }
-   | /*EMPTY*/ { $$ = EMPTY; }
-   ;
-
 OptInherit:  INHERITS '(' relation_name_list ')'                { $$ = cat_str(3, make_str("inherits ("), $3, make_str(")")); }
                 | /*EMPTY*/                    { $$ = EMPTY; }
                 ;      
@@ -2013,10 +2011,9 @@ opt_force:      FORCE        { $$ = make_str("force"); }
  *
  *****************************************************************************/
 
-RenameStmt:  ALTER TABLE relation_name opt_inh_star
-                 RENAME opt_column opt_name TO name
+RenameStmt:  ALTER TABLE relation_expr RENAME opt_column opt_name TO name
                {
-                   $$ = cat_str(8, make_str("alter table"), $3, $4, make_str("rename"), $6, $7, make_str("to"), $9);
+                   $$ = cat_str(7, make_str("alter table"), $3, make_str("rename"), $5, $6, make_str("to"), $8);
                }
        ;
 
@@ -2250,38 +2247,44 @@ ClusterStmt:  CLUSTER index_name ON relation_name
  *
  *     QUERY:
  *             vacuum
+ *             analyze
  *
  *****************************************************************************/
 
-VacuumStmt:  VACUUM opt_verbose opt_analyze
+VacuumStmt:  VACUUM opt_verbose
+               {
+                   $$ = cat_str(2, make_str("vacuum"), $2);
+               }
+       | VACUUM opt_verbose relation_name
                {
                    $$ = cat_str(3, make_str("vacuum"), $2, $3);
                }
-       | VACUUM opt_verbose opt_analyze relation_name opt_va_list
+       | VACUUM opt_verbose AnalyzeStmt
                {
-                   if ( strlen($5) > 0 && strlen($4) == 0 )
-                       mmerror(ET_ERROR, "VACUUM syntax error at or near \"(\"\n\tRelations name must be specified");
-                   $$ = cat_str(5, make_str("vacuum"), $2, $3, $4, $5);
+                   $$ = cat_str(3, make_str("vacuum"), $2, $3);
                }
        ;
 
-opt_verbose:  VERBOSE                  { $$ = make_str("verbose"); }
-       | /*EMPTY*/             { $$ = EMPTY; }
+AnalyzeStmt:  analyze_keyword opt_verbose
+               {
+                   $$ = cat_str(2, $1, $2);
+               }
+       | analyze_keyword opt_verbose relation_name opt_name_list
+               {
+                   $$ = cat_str(4, $1, $2, $3, $4);
+               }
        ;
 
-opt_analyze:  ANALYZE                  { $$ = make_str("analyze"); }
-       | ANALYSE               { $$ = make_str("analyse"); }
-       | /*EMPTY*/             { $$ = EMPTY; }
+analyze_keyword:  ANALYZE                  { $$ = make_str("analyze"); }
+       | ANALYSE                           { $$ = make_str("analyse"); }
        ;
 
-opt_va_list:  '(' va_list ')'              { $$ = cat_str(3, make_str("("), $2, make_str(")")); }
+opt_verbose:  VERBOSE                  { $$ = make_str("verbose"); }
        | /*EMPTY*/             { $$ = EMPTY; }
        ;
 
-va_list:  name
-               { $$=$1; }
-       | va_list ',' name
-               { $$=cat_str(3, $1, make_str(","), $3); }
+opt_name_list:  '(' name_list ')'      { $$ = cat_str(3, make_str("("), $2, make_str(")")); }
+       | /*EMPTY*/             { $$ = EMPTY; }
        ;
 
 
@@ -2383,9 +2386,9 @@ columnElem:  ColId opt_indirection
  *
  *****************************************************************************/
 
-DeleteStmt:  DELETE FROM opt_only name where_clause
+DeleteStmt:  DELETE FROM relation_expr where_clause
                {
-                   $$ = cat_str(4, make_str("delete from"), $3, $4, $5);
+                   $$ = cat_str(3, make_str("delete from"), $3, $4);
                }
        ;
 
@@ -2416,12 +2419,12 @@ opt_lmode:      SHARE                           { $$ = make_str("share"); }
  *
  *****************************************************************************/
 
-UpdateStmt:  UPDATE opt_only relation_name
+UpdateStmt:  UPDATE relation_expr
              SET update_target_list
              from_clause
              where_clause
                {
-                   $$ = cat_str(7, make_str("update"), $2, $3, make_str("set"), $5, $6, $7);
+                   $$ = cat_str(6, make_str("update"), $2, make_str("set"), $4, $5, $6);
                }
        ;
 
@@ -2667,10 +2670,6 @@ select_offset_value:     PosIntConst {
  * ...however, recursive addattr and rename supported.  make special
  * cases for these.
  */
-opt_inh_star:  '*'                 { $$ = make_str("*"); }
-       | /*EMPTY*/             { $$ = EMPTY; }
-       ;
-
 relation_name_list:  name_list { $$ = $1; };
 
 name_list:  name
@@ -2704,7 +2703,7 @@ opt_for_update_clause: for_update_clause                { $$ = $1; }
        | /* EMPTY */               { $$ = EMPTY; }
                 ;
 
-update_list:  OF va_list
+update_list:  OF name_list
               {
            $$ = cat2_str(make_str("of"), $2);
          }
@@ -5028,6 +5027,7 @@ TokenId:  ABSOLUTE            { $$ = make_str("absolute"); }
    | SHARE             { $$ = make_str("share"); }
    | START             { $$ = make_str("start"); }
    | STATEMENT         { $$ = make_str("statement"); }
+   | STATISTICS        { $$ = make_str("statistics"); }
    | STDIN                         { $$ = make_str("stdin"); }
    | STDOUT                        { $$ = make_str("stdout"); }
    | SYSID                         { $$ = make_str("sysid"); }


diff --git a/src/test/regress/expected/oidjoins.out b/src/test/regress/expected/oidjoins.out

index a2b0ad9e3e722827117e707ae7033a210771e9b4..46bc60f6955d60e4a52170d4b7281048b146d97d 100644 (file)


--- a/src/test/regress/expected/oidjoins.out
+++ b/src/test/regress/expected/oidjoins.out
@@ -353,12 +353,28 @@ WHERE pg_statistic.starelid != 0 AND
 -----+----------
 (0 rows)
 
-SELECT oid, pg_statistic.staop 
+SELECT oid, pg_statistic.staop1 
 FROM   pg_statistic 
-WHERE  pg_statistic.staop != 0 AND 
-   NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop);
- oid | staop 
------+-------
+WHERE  pg_statistic.staop1 != 0 AND 
+   NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop1);
+ oid | staop1 
+-----+--------
+(0 rows)
+
+SELECT oid, pg_statistic.staop2 
+FROM   pg_statistic 
+WHERE  pg_statistic.staop2 != 0 AND 
+   NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop2);
+ oid | staop2 
+-----+--------
+(0 rows)
+
+SELECT oid, pg_statistic.staop3 
+FROM   pg_statistic 
+WHERE  pg_statistic.staop3 != 0 AND 
+   NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop3);
+ oid | staop3 
+-----+--------
 (0 rows)
 
 SELECT oid, pg_trigger.tgrelid 


diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out

index 9d4ff1b39856b3d468938ef709578649fe4d84ce..1b094a6e3bfe2f58a8e7b108c0088131a36feb35 100644 (file)


--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -482,8 +482,8 @@ WHERE p1.aggtransfn = p2.oid AND
           (p2.pronargs = 1 AND p1.aggbasetype = 0)));
   oid  | aggname | oid |   proname   
 -------+---------+-----+-------------
- 16997 | max     | 768 | int4larger
- 17011 | min     | 769 | int4smaller
+ 17010 | max     | 768 | int4larger
+ 17024 | min     | 769 | int4smaller
 (2 rows)
 
 -- Cross-check finalfn (if present) against its entry in pg_proc.


diff --git a/src/test/regress/sql/oidjoins.sql b/src/test/regress/sql/oidjoins.sql

index b7ea1f63eaa8268d3583a670e9f3985619be0453..88727a6c76ec6922fc12f4456fba2dc650570f0a 100644 (file)


--- a/src/test/regress/sql/oidjoins.sql
+++ b/src/test/regress/sql/oidjoins.sql
@@ -177,10 +177,18 @@ SELECT    oid, pg_statistic.starelid
 FROM   pg_statistic 
 WHERE  pg_statistic.starelid != 0 AND 
    NOT EXISTS(SELECT * FROM pg_class AS t1 WHERE t1.oid = pg_statistic.starelid);
-SELECT oid, pg_statistic.staop 
+SELECT oid, pg_statistic.staop1 
 FROM   pg_statistic 
-WHERE  pg_statistic.staop != 0 AND 
-   NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop);
+WHERE  pg_statistic.staop1 != 0 AND 
+   NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop1);
+SELECT oid, pg_statistic.staop2 
+FROM   pg_statistic 
+WHERE  pg_statistic.staop2 != 0 AND 
+   NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop2);
+SELECT oid, pg_statistic.staop3 
+FROM   pg_statistic 
+WHERE  pg_statistic.staop3 != 0 AND 
+   NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop3);
 SELECT oid, pg_trigger.tgrelid 
 FROM   pg_trigger 
 WHERE  pg_trigger.tgrelid != 0 AND 




This is the main PostgreSQL git repository.
RSS
Atom
      class="PARAMETER">value | DROP DEFAULT }
-ALTER TABLE table [ * ]
+ALTER TABLE [ ONLY ] table [ * ]
+    ALTER [ COLUMN ] column SET STATISTICS integer
+ALTER TABLE [ ONLY ] table [ * ]
      RENAME [ COLUMN ] column TO 
     class="PARAMETER">newcolumn
 ALTER TABLE table
@@ -159,9 +161,14 @@ ALTER TABLE table
    ALTER TABLE changes the definition of an existing table.
    The ADD COLUMN form adds a new column to the table
    using the same syntax as 
-   endterm="SQL-CREATETABLE-title">. The ALTER COLUMN form
-   allows you to set or remove the default for the column. Note that defaults
-   only apply to newly inserted rows.
+   endterm="SQL-CREATETABLE-title">.
+   The ALTER COLUMN SET/DROP DEFAULT forms
+   allow you to set or remove the default for the column. Note that defaults
+   only apply to subsequent INSERT commands; they do not
+   cause rows already in the table to change.
+   The ALTER COLUMN SET STATISTICS form allows you to
+   set the statistics-gathering target for subsequent
+    operations.
    The RENAME clause causes the name of a table or column
    to change without changing any of the data contained in
    the affected table. Thus, the table or column will
@@ -170,7 +177,7 @@ ALTER TABLE table
    The ADD table constraint definition clause 
    adds a new constraint to the table using the same syntax as 
    linkend="SQL-CREATETABLE" endterm="SQL-CREATETABLE-title">. 
-   The OWNER clause chnages the owner of the table to the user 
+   The OWNER clause changes the owner of the table to the user 
    new user.
   
 
@@ -190,10 +197,11 @@ ALTER TABLE table
    
 
    
-    In the current implementation, default and constraint clauses for the
+    In the current implementation of ADD COLUMN,
+    default and constraint clauses for the
     new column will be ignored. You can use the SET DEFAULT
     form of ALTER TABLE to set the default later.
-    (You will also have to update the already existing rows to the
+    (You may also want to update the already existing rows to the
     new default value, using 
     endterm="sql-update-title">.)
    
@@ -210,7 +218,7 @@ ALTER TABLE table
 
    
     You must own the table in order to change it.
-    Renaming any  part  of  the schema of a system
+    Changing any  part  of  the schema of a system
     catalog is not permitted.
     The PostgreSQL User's Guide has further
     information on inheritance.


diff --git a/doc/src/sgml/ref/analyze.sgml b/doc/src/sgml/ref/analyze.sgml

new file mode 100644 (file)

index 0000000..57d3213


--- /dev/null
+++ b/doc/src/sgml/ref/analyze.sgml
@@ -0,0 +1,219 @@
+
+
+
+ 
+  
+   ANALYZE
+  
+  SQL - Language Statements
+ 
+ 
+  
+   ANALYZE
+  
+  
+   Collect statistics about a Postgres database
+  
+ 
+ 
+  
+   2001-05-04
+  
+  
+ANALYZE [ VERBOSE ] [ table [ (column [, ...] ) ] ]
+  
+
+  
+   
+    2001-05-04
+   
+   </div>
<div class="diff add">+    Inputs</div>
<div class="diff add">+   
+
+   
+    
+     
+      VERBOSE
+      
+       
+   Enables display of progress messages.
+       
+      
+     
+     
+      table
+      
+       
+   The name of a specific table to analyze. Defaults to all tables.
+       
+      
+     
+     
+      column
+      
+       
+   The name of a specific column to analyze. Defaults to all columns.
+       
+      
+     
+    
+   
+  
+
+  
+   
+    2001-05-04
+   
+   </div>
<div class="diff add">+    Outputs</div>
<div class="diff add">+   
+   
+
+    
+     
+      
+ANALYZE
+       
+      
+       
+   The command is complete.
+       
+      
+     
+
+    
+   
+  
+ 
+
+ 
+  
+   2001-05-04
+  
+  </div>
<div class="diff add">+   Description</div>
<div class="diff add">+  
+  
+   ANALYZE collects statistics about the contents of
+   Postgres tables, and stores the results in
+   the system table pg_statistic.  Subsequently,
+   the query planner uses the statistics to help determine the most efficient
+   execution plans for queries.
+  
+
+  
+   With no parameter, ANALYZE examines every table in the
+   current database.  With a parameter, ANALYZE examines
+   only that table.  It is further possible to give a list of column names,
+   in which case only the statistics for those columns are updated.
+  
+
+  
+   
+    2001-05-04
+   
+   </div>
<div class="diff add">+    Notes</div>
<div class="diff add">+   
+
+  
+   It is a good idea to run ANALYZE periodically, or
+   just after making major changes in the contents of a table.  Accurate
+   statistics will help the planner to choose the most appropriate query
+   plan, and thereby improve the speed of query processing.  A common
+   strategy is to run VACUUM and ANALYZE
+   once a day during a low-usage time of day.
+  
+
+  
+   Unlike ,
+   ANALYZE requires
+   only a read lock on the target table, so it can run in parallel with
+   other activity on the table.
+  
+
+  
+   For large tables, ANALYZE takes a random sample of the
+   table contents, rather than examining every row.  This allows even very
+   large tables to be analyzed in a small amount of time.  Note however
+   that the statistics are only approximate, and will change slightly each
+   time ANALYZE is run, even if the actual table contents
+   did not change.  This may result in small changes in the planner's
+   estimated costs shown by EXPLAIN.
+  
+
+  
+   The collected statistics usually include a list of some of the most common
+   values in each column and a histogram showing the approximate data
+   distribution in each column.  One or both of these may be omitted if
+   ANALYZE deems them uninteresting (for example, in
+   a unique-key column, there are no common values) or if the column
+   datatype does not support the appropriate operators.
+  
+
+  
+   The extent of analysis can be controlled by adjusting the per-column
+   statistics target with ALTER TABLE ALTER COLUMN SET
+   STATISTICS (see
+   ).  The
+   target value sets the maximum number of entries in the most-common-value
+   list and the maximum number of bins in the histogram.  The default
+   target value is 10, but this can be adjusted up or down to trade off
+   accuracy of planner estimates against the time taken for
+   ANALYZE and the
+   amount of space occupied in pg_statistic.
+   In particular, setting the statistics target to zero disables collection of
+   statistics for that column.  It may be useful to do that for columns that
+   are never used as part of the WHERE, GROUP BY, or ORDER BY clauses of
+   queries, since the planner will have no use for statistics on such columns.
+  
+
+  
+   The largest statistics target among the columns being analyzed determines
+   the number of table rows sampled to prepare the statistics.  Increasing
+   the target causes a proportional increase in the time and space needed
+   to do ANALYZE.
+  
+
+  
+ 
+
+ 
+  </div>
<div class="diff add">+   Compatibility</div>
<div class="diff add">+  
+
+  
+   
+    2001-05-04
+   
+   </div>
<div class="diff add">+    SQL92</div>
<div class="diff add">+   
+   
+    There is no ANALYZE statement in SQL92.
+   
+  
+ 
+
+
+


diff --git a/doc/src/sgml/ref/vacuum.sgml b/doc/src/sgml/ref/vacuum.sgml

index 51cb8a9ffdacfe41115a94d41b11e97fa1e6b6f9..cbb182466ea44d231b4271f54f2c14da9534307b 100644 (file)


--- a/doc/src/sgml/ref/vacuum.sgml
+++ b/doc/src/sgml/ref/vacuum.sgml
@@ -1,5 +1,5 @@
 
 
@@ -15,15 +15,15 @@ Postgres documentation
    VACUUM
   
   
-   Clean and analyze a Postgres database
+   Clean and optionally analyze a Postgres database
   
  
  
   
-   1999-07-20
+   2001-05-04
   
   
-VACUUM [ VERBOSE ] [ ANALYZE ] [ table ]
+VACUUM [ VERBOSE ] [ table ]
 VACUUM [ VERBOSE ] ANALYZE [ table [ (column [, ...] ) ] ]
   
 
@@ -49,7 +49,7 @@ VACUUM [ VERBOSE ] ANALYZE [ table
       ANALYZE
       
        
-   Updates column statistics used by the optimizer to
+   Updates statistics used by the optimizer to
    determine the most efficient way to execute a query.
        
       
@@ -90,7 +90,7 @@ VACUUM [ VERBOSE ] ANALYZE [ table
        
       
        
-   The command has been accepted and the database is being cleaned.
+   The command is complete.
        
       
      
@@ -144,28 +144,26 @@ NOTICE:  Index index: Pages 28;
    Description
   
   
-   VACUUM serves two purposes in 
-   Postgres as both a means to reclaim storage and
-   also a means to collect information for the optimizer.
+   VACUUM reclaims storage occupied by deleted tuples.
+   In normal Postgres operation, tuples that
+   are DELETEd or obsoleted by UPDATE are not physically removed from
+   their table; they remain present until a VACUUM is
+   done.  Therefore it's necessary to do VACUUM
+   periodically, especially on frequently-updated tables.
   
 
   
-   VACUUM opens every table in the database,
-   cleans out records from rolled back transactions, and updates statistics in the
-   system catalogs.  The statistics maintained include the number of
-   tuples and number of pages stored in all tables.
-  
-
-
-  
-   VACUUM ANALYZE collects statistics representing the
-   dispersion of the data in each column.
-   This information is valuable when several query execution paths are possible.
+   With no parameter, VACUUM processes every table in the
+   current database.  With a parameter, VACUUM processes
+   only that table.
   
 
   
-   Running VACUUM
-   periodically will increase the speed of the database in processing user queries.
+   VACUUM ANALYZE performs a VACUUM
+   and then an ANALYZE for each selected table.  This
+   is a handy combination form for routine maintenance scripts.  See
+   
+   for more details about its processing.
   
 
   
@@ -175,16 +173,15 @@ NOTICE:  Index index: Pages 28;
    </div>
<div class="diff ctx">     Notes</div>
<div class="diff ctx">    
-   
-    The open database is the target for VACUUM.
-   
+
    
     We recommend that active production databases be
     VACUUM-ed nightly, in order to remove
     expired rows. After copying a large table into
     Postgres or after deleting a large number
     of records, it may be a good idea to issue a VACUUM
-    ANALYZE query. This will update the system catalogs with
+    ANALYZE command for the affected table. This will update the
+    system catalogs with
     the results of all recent changes, and allow the
     Postgres query optimizer to make better
     choices in planning user queries.


diff --git a/doc/src/sgml/reference.sgml b/doc/src/sgml/reference.sgml

index b92ee0868d029cf48443f4240fab5224bc958862..9a977a6515c97db601f13f5f43413bc3e81a46c8 100644 (file)


--- a/doc/src/sgml/reference.sgml
+++ b/doc/src/sgml/reference.sgml
@@ -1,5 +1,5 @@
 
@@ -26,6 +26,7 @@ PostgreSQL Reference Manual
    &alterGroup;
    &alterTable;
    &alterUser;
+   &analyze;
    &begin;
    &checkpoint;
    &close;


diff --git a/doc/src/sgml/xoper.sgml b/doc/src/sgml/xoper.sgml

index d38e78a4e1af22651531a65d320f427ea71b175b..57d8bb79c28d69da43ce1897f0dacb4f3dd1a56b 100644 (file)


--- a/doc/src/sgml/xoper.sgml
+++ b/doc/src/sgml/xoper.sgml
@@ -1,5 +1,5 @@
 
 
  
@@ -244,7 +244,7 @@ SELECT (a + b) AS c FROM test_complex;
     only a small fraction.  '<' will accept a fraction that depends on
     where the given constant falls in the range of values for that table
     column (which, it just so happens, is information collected by
-    VACUUM ANALYZE and made available to the selectivity estimator).
+    ANALYZE and made available to the selectivity estimator).
     '<=' will accept a slightly larger fraction than '<' for the same
     comparison constant, but they're close enough to not be worth
     distinguishing, especially since we're not likely to do better than a


diff --git a/src/backend/access/common/tupdesc.c b/src/backend/access/common/tupdesc.c

index 769f754b6690919606bdaaf8a016260382abdef8..86d704e8d08779e32b38e3d4d4f938072adeccf7 100644 (file)


--- a/src/backend/access/common/tupdesc.c
+++ b/src/backend/access/common/tupdesc.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/access/common/tupdesc.c,v 1.73 2001/03/22 06:16:06 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/access/common/tupdesc.c,v 1.74 2001/05/07 00:43:15 tgl Exp $
  *
  * NOTES
  *   some of the executor utility code such as "ExecTypeFromTL" should be
@@ -237,16 +237,16 @@ equalTupleDescs(TupleDesc tupdesc1, TupleDesc tupdesc2)
        Form_pg_attribute attr2 = tupdesc2->attrs[i];
 
        /*
-        * We do not need to check every single field here, and in fact
-        * some fields such as attdispersion probably shouldn't be
-        * compared.  We can also disregard attnum (it was used to place
-        * the row in the attrs array) and everything derived from the
-        * column datatype.
+        * We do not need to check every single field here: we can disregard
+        * attrelid, attnum (it was used to place the row in the attrs array)
+        * and everything derived from the column datatype.
         */
        if (strcmp(NameStr(attr1->attname), NameStr(attr2->attname)) != 0)
            return false;
        if (attr1->atttypid != attr2->atttypid)
            return false;
+       if (attr1->attstattarget != attr2->attstattarget)
+           return false;
        if (attr1->atttypmod != attr2->atttypmod)
            return false;
        if (attr1->attstorage != attr2->attstorage)
@@ -365,12 +365,12 @@ TupleDescInitEntry(TupleDesc desc,
    else
        MemSet(NameStr(att->attname), 0, NAMEDATALEN);
 
-   att->attdispersion = 0;     /* dummy value */
+   att->attstattarget = 0;
    att->attcacheoff = -1;
    att->atttypmod = typmod;
 
    att->attnum = attributeNumber;
-   att->attnelems = attdim;
+   att->attndims = attdim;
    att->attisset = attisset;
 
    att->attnotnull = false;
@@ -506,7 +506,7 @@ TupleDescMakeSelfReference(TupleDesc desc,
    att->attbyval = true;
    att->attalign = 'i';
    att->attstorage = 'p';
-   att->attnelems = 0;
+   att->attndims = 0;
 }
 
 /* ----------------------------------------------------------------


diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c

index 1c5577b88a066a1ecebfd6ce317147efc28d489c..06010896821e5caa9627c17f6328239ec3c277b6 100644 (file)


--- a/src/backend/access/gist/gist.c
+++ b/src/backend/access/gist/gist.c
@@ -6,7 +6,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/access/gist/gist.c,v 1.72 2001/03/22 03:59:12 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/access/gist/gist.c,v 1.73 2001/05/07 00:43:15 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -84,8 +84,8 @@ static void gist_dumptree(Relation r, int level, BlockNumber blk, OffsetNumber c
 #endif
 
 /*
-** routine to build an index.  Basically calls insert over and over
-*/
+ * routine to build an index.  Basically calls insert over and over
+ */
 Datum
 gistbuild(PG_FUNCTION_ARGS)
 {
@@ -105,7 +105,7 @@ gistbuild(PG_FUNCTION_ARGS)
                itupdesc;
    Datum       attdata[INDEX_MAX_KEYS];
    char        nulls[INDEX_MAX_KEYS];
-   int         nhtups,
+   double      nhtups,
                nitups;
    Node       *pred = indexInfo->ii_Predicate;
 
@@ -172,7 +172,7 @@ gistbuild(PG_FUNCTION_ARGS)
 #endif  /* OMIT_PARTIAL_INDEX */
 
    /* build the index */
-   nhtups = nitups = 0;
+   nhtups = nitups = 0.0;
 
    compvec = (bool *) palloc(sizeof(bool) * indexInfo->ii_NumIndexAttrs);
 
@@ -183,7 +183,7 @@ gistbuild(PG_FUNCTION_ARGS)
    {
        MemoryContextReset(econtext->ecxt_per_tuple_memory);
 
-       nhtups++;
+       nhtups += 1.0;
 
 #ifndef OMIT_PARTIAL_INDEX
 
@@ -196,7 +196,7 @@ gistbuild(PG_FUNCTION_ARGS)
            slot->val = htup;
            if (ExecQual((List *) oldPred, econtext, false))
            {
-               nitups++;
+               nitups += 1.0;
                continue;
            }
        }
@@ -213,7 +213,7 @@ gistbuild(PG_FUNCTION_ARGS)
        }
 #endif  /* OMIT_PARTIAL_INDEX */
 
-       nitups++;
+       nitups += 1.0;
 
        /*
         * For the current heap tuple, extract all the attributes we use


diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c

index aa76ba232a05c21da94012fbefbc287924aa154f..9617fcc33a6a0bb5bf4556944cc433be26ad0331 100644 (file)


--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.50 2001/03/22 03:59:12 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.51 2001/05/07 00:43:15 tgl Exp $
  *
  * NOTES
  *   This file contains only the public interface routines.
@@ -57,7 +57,7 @@ hashbuild(PG_FUNCTION_ARGS)
                itupdesc;
    Datum       attdata[INDEX_MAX_KEYS];
    char        nulls[INDEX_MAX_KEYS];
-   int         nhtups,
+   double      nhtups,
                nitups;
    HashItem    hitem;
    Node       *pred = indexInfo->ii_Predicate;
@@ -109,7 +109,7 @@ hashbuild(PG_FUNCTION_ARGS)
 #endif  /* OMIT_PARTIAL_INDEX */
 
    /* build the index */
-   nhtups = nitups = 0;
+   nhtups = nitups = 0.0;
 
    /* start a heap scan */
    hscan = heap_beginscan(heap, 0, SnapshotNow, 0, (ScanKey) NULL);
@@ -118,7 +118,7 @@ hashbuild(PG_FUNCTION_ARGS)
    {
        MemoryContextReset(econtext->ecxt_per_tuple_memory);
 
-       nhtups++;
+       nhtups += 1.0;
 
 #ifndef OMIT_PARTIAL_INDEX
 
@@ -131,7 +131,7 @@ hashbuild(PG_FUNCTION_ARGS)
            slot->val = htup;
            if (ExecQual((List *) oldPred, econtext, false))
            {
-               nitups++;
+               nitups += 1.0;
                continue;
            }
        }
@@ -148,7 +148,7 @@ hashbuild(PG_FUNCTION_ARGS)
        }
 #endif  /* OMIT_PARTIAL_INDEX */
 
-       nitups++;
+       nitups += 1.0;
 
        /*
         * For the current heap tuple, extract all the attributes we use


diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c

index fb509ab66de99d90fcdab322dd36af40551316d1..2a9df577b10c56de723c68ae329e47847849fb71 100644 (file)


--- a/src/backend/access/heap/tuptoaster.c
+++ b/src/backend/access/heap/tuptoaster.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/access/heap/tuptoaster.c,v 1.21 2001/03/25 00:45:20 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/access/heap/tuptoaster.c,v 1.22 2001/05/07 00:43:15 tgl Exp $
  *
  *
  * INTERFACE ROUTINES
@@ -166,6 +166,43 @@ heap_tuple_untoast_attr(varattrib *attr)
 }
 
 
+/* ----------
+ * toast_raw_datum_size -
+ *
+ * Return the raw (detoasted) size of a varlena datum
+ * ----------
+ */
+Size
+toast_raw_datum_size(Datum value)
+{
+   varattrib  *attr = (varattrib *) DatumGetPointer(value);
+   Size        result;
+
+   if (VARATT_IS_COMPRESSED(attr))
+   {
+       /*
+        * va_rawsize shows the original data size, whether the datum
+        * is external or not.
+        */
+       result = attr->va_content.va_compressed.va_rawsize + VARHDRSZ;
+   }
+   else if (VARATT_IS_EXTERNAL(attr))
+   {
+       /*
+        * an uncompressed external attribute has rawsize including the
+        * header (not too consistent!)
+        */
+       result = attr->va_content.va_external.va_rawsize;
+   }
+   else
+   {
+       /* plain untoasted datum */
+       result = VARSIZE(attr);
+   }
+   return result;
+}
+
+
 /* ----------
  * toast_delete -
  *


diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c

index 97d99da4fde7bbbfe009c7c7baf04dc557390cd9..f456e0c9306f4f3c191d75172463bf852e905041 100644 (file)


--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -12,7 +12,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.79 2001/03/22 03:59:15 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.80 2001/05/07 00:43:16 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -69,7 +69,7 @@ btbuild(PG_FUNCTION_ARGS)
                itupdesc;
    Datum       attdata[INDEX_MAX_KEYS];
    char        nulls[INDEX_MAX_KEYS];
-   int         nhtups,
+   double      nhtups,
                nitups;
    Node       *pred = indexInfo->ii_Predicate;
 
@@ -156,7 +156,7 @@ btbuild(PG_FUNCTION_ARGS)
 #endif  /* OMIT_PARTIAL_INDEX */
 
    /* build the index */
-   nhtups = nitups = 0;
+   nhtups = nitups = 0.0;
 
    if (usefast)
    {
@@ -196,7 +196,7 @@ btbuild(PG_FUNCTION_ARGS)
 
        MemoryContextReset(econtext->ecxt_per_tuple_memory);
 
-       nhtups++;
+       nhtups += 1.0;
 
 #ifndef OMIT_PARTIAL_INDEX
 
@@ -209,7 +209,7 @@ btbuild(PG_FUNCTION_ARGS)
            slot->val = htup;
            if (ExecQual((List *) oldPred, econtext, false))
            {
-               nitups++;
+               nitups += 1.0;
                continue;
            }
        }
@@ -226,7 +226,7 @@ btbuild(PG_FUNCTION_ARGS)
        }
 #endif  /* OMIT_PARTIAL_INDEX */
 
-       nitups++;
+       nitups += 1.0;
 
        /*
         * For the current heap tuple, extract all the attributes we use


diff --git a/src/backend/access/rtree/rtree.c b/src/backend/access/rtree/rtree.c

index 3752a59e99a3259dcef8feb7660927baf8308a4a..a8c6a13ea3c14626245bad59e372b66b0d5c25e2 100644 (file)


--- a/src/backend/access/rtree/rtree.c
+++ b/src/backend/access/rtree/rtree.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.61 2001/03/22 03:59:16 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.62 2001/05/07 00:43:16 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -100,7 +100,7 @@ rtbuild(PG_FUNCTION_ARGS)
                itupdesc;
    Datum       attdata[INDEX_MAX_KEYS];
    char        nulls[INDEX_MAX_KEYS];
-   int         nhtups,
+   double      nhtups,
                nitups;
    Node       *pred = indexInfo->ii_Predicate;
 
@@ -163,7 +163,7 @@ rtbuild(PG_FUNCTION_ARGS)
 #endif  /* OMIT_PARTIAL_INDEX */
 
    /* count the tuples as we insert them */
-   nhtups = nitups = 0;
+   nhtups = nitups = 0.0;
 
    /* start a heap scan */
    hscan = heap_beginscan(heap, 0, SnapshotNow, 0, (ScanKey) NULL);
@@ -172,7 +172,7 @@ rtbuild(PG_FUNCTION_ARGS)
    {
        MemoryContextReset(econtext->ecxt_per_tuple_memory);
 
-       nhtups++;
+       nhtups += 1.0;
 
 #ifndef OMIT_PARTIAL_INDEX
 
@@ -185,7 +185,7 @@ rtbuild(PG_FUNCTION_ARGS)
            slot->val = htup;
            if (ExecQual((List *) oldPred, econtext, false))
            {
-               nitups++;
+               nitups += 1.0;
                continue;
            }
        }
@@ -202,7 +202,7 @@ rtbuild(PG_FUNCTION_ARGS)
        }
 #endif  /* OMIT_PARTIAL_INDEX */
 
-       nitups++;
+       nitups += 1.0;
 
        /*
         * For the current heap tuple, extract all the attributes we use


diff --git a/src/backend/catalog/genbki.sh b/src/backend/catalog/genbki.sh

index c2993fa8fc6b474bc13badd0c4369ca56fdbb9d4..cac53f3e0853262c213239e698170311a6ee8e1c 100644 (file)


--- a/src/backend/catalog/genbki.sh
+++ b/src/backend/catalog/genbki.sh
@@ -10,7 +10,7 @@
 #
 #
 # IDENTIFICATION
-#    $Header: /cvsroot/pgsql/src/backend/catalog/Attic/genbki.sh,v 1.19 2001/01/16 22:48:34 tgl Exp $
+#    $Header: /cvsroot/pgsql/src/backend/catalog/Attic/genbki.sh,v 1.20 2001/05/07 00:43:16 tgl Exp $
 #
 # NOTES
 #    non-essential whitespace is removed from the generated file.
@@ -126,10 +126,12 @@ for dir in $INCLUDE_DIRS; do
     fi
 done
 
-# Get INDEX_MAX_KEYS from config.h (who needs consistency?)
+# Get INDEX_MAX_KEYS and DEFAULT_ATTSTATTARGET from config.h
+# (who needs consistency?)
 for dir in $INCLUDE_DIRS; do
     if [ -f "$dir/config.h" ]; then
         INDEXMAXKEYS=`grep '#define[   ]*INDEX_MAX_KEYS' $dir/config.h | $AWK '{ print $3 }'`
+        DEFAULTATTSTATTARGET=`grep '#define[   ]*DEFAULT_ATTSTATTARGET' $dir/config.h | $AWK '{ print $3 }'`
         break
     fi
 done
@@ -168,6 +170,7 @@ sed -e "s/;[    ]*$//g" \
     -e "s/(NameData/(name/g" \
     -e "s/(Oid/(oid/g" \
     -e "s/NAMEDATALEN/$NAMEDATALEN/g" \
+    -e "s/DEFAULT_ATTSTATTARGET/$DEFAULTATTSTATTARGET/g" \
     -e "s/INDEX_MAX_KEYS\*2/$INDEXMAXKEYS2/g" \
     -e "s/INDEX_MAX_KEYS\*4/$INDEXMAXKEYS4/g" \
     -e "s/INDEX_MAX_KEYS/$INDEXMAXKEYS/g" \


diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c

index 54867d51a4b631241e649453750b03ee0c1aeef4..03f16e11c3f3710b2589d8e7330bfd0a2bb386b8 100644 (file)


--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/catalog/heap.c,v 1.162 2001/03/22 06:16:10 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/catalog/heap.c,v 1.163 2001/05/07 00:43:17 tgl Exp $
  *
  *
  * INTERFACE ROUTINES
@@ -96,54 +96,72 @@ static void RemoveStatistics(Relation rel);
 
 /*
  * Note:
- *     Should the executor special case these attributes in the future?
- *     Advantage:  consume 1/2 the space in the ATTRIBUTE relation.
- *     Disadvantage:  having rules to compute values in these tuples may
- *             be more difficult if not impossible.
+ *     Should the system special case these attributes in the future?
+ *     Advantage:  consume much less space in the ATTRIBUTE relation.
+ *     Disadvantage:  special cases will be all over the place.
  */
 
 static FormData_pg_attribute a1 = {
-   0xffffffff, {"ctid"}, TIDOID, 0, sizeof(ItemPointerData),
-   SelfItemPointerAttributeNumber, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0'
+   0, {"ctid"}, TIDOID, 0, sizeof(ItemPointerData),
+   SelfItemPointerAttributeNumber, 0, -1, -1,
+   false, 'p', false, 'i', false, false
 };
 
 static FormData_pg_attribute a2 = {
-   0xffffffff, {"oid"}, OIDOID, 0, sizeof(Oid),
-   ObjectIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+   0, {"oid"}, OIDOID, 0, sizeof(Oid),
+   ObjectIdAttributeNumber, 0, -1, -1,
+   true, 'p', false, 'i', false, false
 };
 
 static FormData_pg_attribute a3 = {
-   0xffffffff, {"xmin"}, XIDOID, 0, sizeof(TransactionId),
-   MinTransactionIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+   0, {"xmin"}, XIDOID, 0, sizeof(TransactionId),
+   MinTransactionIdAttributeNumber, 0, -1, -1,
+   true, 'p', false, 'i', false, false
 };
 
 static FormData_pg_attribute a4 = {
-   0xffffffff, {"cmin"}, CIDOID, 0, sizeof(CommandId),
-   MinCommandIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+   0, {"cmin"}, CIDOID, 0, sizeof(CommandId),
+   MinCommandIdAttributeNumber, 0, -1, -1,
+   true, 'p', false, 'i', false, false
 };
 
 static FormData_pg_attribute a5 = {
-   0xffffffff, {"xmax"}, XIDOID, 0, sizeof(TransactionId),
-   MaxTransactionIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+   0, {"xmax"}, XIDOID, 0, sizeof(TransactionId),
+   MaxTransactionIdAttributeNumber, 0, -1, -1,
+   true, 'p', false, 'i', false, false
 };
 
 static FormData_pg_attribute a6 = {
-   0xffffffff, {"cmax"}, CIDOID, 0, sizeof(CommandId),
-   MaxCommandIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+   0, {"cmax"}, CIDOID, 0, sizeof(CommandId),
+   MaxCommandIdAttributeNumber, 0, -1, -1,
+   true, 'p', false, 'i', false, false
 };
 
 /*
-   We decide to call this attribute "tableoid" rather than say
-"classoid" on the basis that in the future there may be more than one
-table of a particular class/type. In any case table is still the word
-used in SQL.
-*/
+ * We decided to call this attribute "tableoid" rather than say
+ * "classoid" on the basis that in the future there may be more than one
+ * table of a particular class/type. In any case table is still the word
+ * used in SQL.
+ */
 static FormData_pg_attribute a7 = {
-   0xffffffff, {"tableoid"}, OIDOID, 0, sizeof(Oid),
-   TableOidAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+   0, {"tableoid"}, OIDOID, 0, sizeof(Oid),
+   TableOidAttributeNumber, 0, -1, -1,
+   true, 'p', false, 'i', false, false
 };
 
-static Form_pg_attribute HeapAtt[] = {&a1, &a2, &a3, &a4, &a5, &a6, &a7};
+static Form_pg_attribute SysAtt[] = {&a1, &a2, &a3, &a4, &a5, &a6, &a7};
+
+/*
+ * This function returns a Form_pg_attribute pointer for a system attribute.
+ */
+Form_pg_attribute
+SystemAttributeDefinition(AttrNumber attno)
+{
+   if (attno >= 0 || attno < - (int) lengthof(SysAtt))
+       elog(ERROR, "SystemAttributeDefinition: invalid attribute number %d",
+            attno);
+   return SysAtt[-attno - 1];
+}
 
 /* ----------------------------------------------------------------
  *             XXX END OF UGLY HARD CODED BADNESS XXX
@@ -380,32 +398,6 @@ heap_storage_create(Relation rel)
  *     8) the relations are closed and the new relation's oid
  *        is returned.
  *
- * old comments:
- *     A new relation is inserted into the RELATION relation
- *     with the specified attribute(s) (newly inserted into
- *     the ATTRIBUTE relation).  How does concurrency control
- *     work?  Is it automatic now?  Expects the caller to have
- *     attname, atttypid, atttyparg, attproc, and attlen domains filled.
- *     Create fills the attnum domains sequentually from zero,
- *     fills the attdispersion domains with zeros, and fills the
- *     attrelid fields with the relid.
- *
- *     scan relation catalog for name conflict
- *     scan type catalog for typids (if not arg)
- *     create and insert attribute(s) into attribute catalog
- *     create new relation
- *     insert new relation into attribute catalog
- *
- *     Should coordinate with heap_create_with_catalog(). Either
- *     it should not be called or there should be a way to prevent
- *     the relation from being removed at the end of the
- *     transaction if it is successful ('u'/'r' may be enough).
- *     Also, if the transaction does not commit, then the
- *     relation should be removed.
- *
- *     XXX amcreate ignores "off" when inserting (for now).
- *     XXX amcreate (like the other utilities) needs to understand indexes.
- *
  * ----------------------------------------------------------------
  */
 
@@ -432,14 +424,14 @@ CheckAttributeNames(TupleDesc tupdesc)
     */
    for (i = 0; i < natts; i++)
    {
-       for (j = 0; j < (int) (sizeof(HeapAtt) / sizeof(HeapAtt[0])); j++)
+       for (j = 0; j < (int) lengthof(SysAtt); j++)
        {
-           if (strcmp(NameStr(HeapAtt[j]->attname),
+           if (strcmp(NameStr(SysAtt[j]->attname),
                       NameStr(tupdesc->attrs[i]->attname)) == 0)
            {
                elog(ERROR, "Attribute '%s' has a name conflict"
                     "\n\tName matches an existing system attribute",
-                    NameStr(HeapAtt[j]->attname));
+                    NameStr(SysAtt[j]->attname));
            }
        }
        if (tupdesc->attrs[i]->atttypid == UNKNOWNOID)
@@ -574,7 +566,7 @@ AddNewAttributeTuples(Oid new_rel_oid,
        /* Fill in the correct relation OID */
        (*dpp)->attrelid = new_rel_oid;
        /* Make sure these are OK, too */
-       (*dpp)->attdispersion = 0;
+       (*dpp)->attstattarget = DEFAULT_ATTSTATTARGET;
        (*dpp)->attcacheoff = -1;
 
        tup = heap_addheader(Natts_pg_attribute,
@@ -593,14 +585,14 @@ AddNewAttributeTuples(Oid new_rel_oid,
    /*
     * next we add the system attributes..
     */
-   dpp = HeapAtt;
+   dpp = SysAtt;
    for (i = 0; i < -1 - FirstLowInvalidHeapAttributeNumber; i++)
    {
        /* Fill in the correct relation OID */
        /* HACK: we are writing on static data here */
        (*dpp)->attrelid = new_rel_oid;
        /* Unneeded since they should be OK in the constant data anyway */
-       /* (*dpp)->attdispersion = 0; */
+       /* (*dpp)->attstattarget = 0; */
        /* (*dpp)->attcacheoff = -1; */
 
        tup = heap_addheader(Natts_pg_attribute,
@@ -669,8 +661,23 @@ AddNewRelationTuple(Relation pg_class_desc,
     * save. (NOTE: CREATE INDEX inserts the same bogus estimates if it
     * finds the relation has 0 rows and pages. See index.c.)
     */
-   new_rel_reltup->relpages = 10;      /* bogus estimates */
-   new_rel_reltup->reltuples = 1000;
+   switch (relkind)
+   {
+       case RELKIND_RELATION:
+       case RELKIND_INDEX:
+       case RELKIND_TOASTVALUE:
+           new_rel_reltup->relpages = 10;  /* bogus estimates */
+           new_rel_reltup->reltuples = 1000;
+           break;
+       case RELKIND_SEQUENCE:
+           new_rel_reltup->relpages = 1;
+           new_rel_reltup->reltuples = 1;
+           break;
+       default:                /* views, etc */
+           new_rel_reltup->relpages = 0;
+           new_rel_reltup->reltuples = 0;
+           break;
+   }
 
    new_rel_reltup->relowner = GetUserId();
    new_rel_reltup->reltype = new_type_oid;


diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c

index 2adb30e1ed8ecf91d12c0028495b8911ece7068d..5eefab114891fdc1b2bbcc7b407d6c96ac3c75ca 100644 (file)


--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/catalog/index.c,v 1.145 2001/04/02 14:34:25 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/catalog/index.c,v 1.146 2001/05/07 00:43:17 tgl Exp $
  *
  *
  * INTERFACE ROUTINES
@@ -55,7 +55,7 @@
  */
 #define AVG_ATTR_SIZE 8
 #define NTUPLES_PER_PAGE(natts) \
-   ((BLCKSZ - MAXALIGN(sizeof (PageHeaderData))) / \
+   ((BLCKSZ - MAXALIGN(sizeof(PageHeaderData))) / \
    ((natts) * AVG_ATTR_SIZE + MAXALIGN(sizeof(HeapTupleHeaderData))))
 
 /* non-export function prototypes */
@@ -98,39 +98,6 @@ IsReindexProcessing(void)
    return reindexing;
 }
 
-/* ----------------------------------------------------------------
- *   sysatts is a structure containing attribute tuple forms
- *   for system attributes (numbered -1, -2, ...).  This really
- *   should be generated or eliminated or moved elsewhere. -cim 1/19/91
- *
- * typedef struct FormData_pg_attribute {
- *     Oid             attrelid;
- *     NameData        attname;
- *     Oid             atttypid;
- *     uint32          attnvals;
- *     int16           attlen;
- *     AttrNumber      attnum;
- *     uint32          attnelems;
- *     int32           attcacheoff;
- *     int32           atttypmod;
- *     bool            attbyval;
- *     bool            attisset;
- *     char            attalign;
- *     bool            attnotnull;
- *     bool            atthasdef;
- * } FormData_pg_attribute;
- *
- * ----------------------------------------------------------------
- */
-static FormData_pg_attribute sysatts[] = {
-   {0, {"ctid"}, TIDOID, 0, 6, -1, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0'},
-   {0, {"oid"}, OIDOID, 0, 4, -2, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'},
-   {0, {"xmin"}, XIDOID, 0, 4, -3, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'},
-   {0, {"cmin"}, CIDOID, 0, 4, -4, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'},
-   {0, {"xmax"}, XIDOID, 0, 4, -5, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'},
-   {0, {"cmax"}, CIDOID, 0, 4, -6, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'},
-};
-
 /* ----------------------------------------------------------------
  *     GetHeapRelationOid
  * ----------------------------------------------------------------
@@ -250,7 +217,6 @@ ConstructTupleDescriptor(Relation heapRelation,
    for (i = 0; i < numatts; i++)
    {
        AttrNumber  atnum;      /* attributeNumber[attributeOffset] */
-       AttrNumber  atind;
        Form_pg_attribute from;
        Form_pg_attribute to;
 
@@ -264,16 +230,9 @@ ConstructTupleDescriptor(Relation heapRelation,
        {
 
            /*
-            * here we are indexing on a system attribute (-1...-n) so we
-            * convert atnum into a usable index 0...n-1 so we can use it
-            * to dereference the array sysatts[] which stores tuple
-            * descriptor information for system attributes.
+            * here we are indexing on a system attribute (-1...-n)
             */
-           if (atnum <= FirstLowInvalidHeapAttributeNumber || atnum >= 0)
-               elog(ERROR, "Cannot create index on system attribute: attribute number out of range (%d)", atnum);
-           atind = (-atnum) - 1;
-
-           from = &sysatts[atind];
+           from = SystemAttributeDefinition(atnum);
        }
        else
        {
@@ -284,9 +243,8 @@ ConstructTupleDescriptor(Relation heapRelation,
            if (atnum > natts)
                elog(ERROR, "Cannot create index: attribute %d does not exist",
                     atnum);
-           atind = AttrNumberGetAttrOffset(atnum);
 
-           from = heapTupDesc->attrs[atind];
+           from = heapTupDesc->attrs[AttrNumberGetAttrOffset(atnum)];
        }
 
        /*
@@ -303,10 +261,10 @@ ConstructTupleDescriptor(Relation heapRelation,
         */
        to->attnum = i + 1;
 
-       to->attdispersion = 0.0;
+       to->attstattarget = 0;
+       to->attcacheoff = -1;
        to->attnotnull = false;
        to->atthasdef = false;
-       to->attcacheoff = -1;
 
        /*
         * We do not yet have the correct relation OID for the index, so
@@ -1542,10 +1500,14 @@ setNewRelfilenode(Relation relation)
 
 /* ----------------
  *     UpdateStats
+ *
+ * Update pg_class' relpages and reltuples statistics for the given relation
+ * (which can be either a table or an index).  Note that this is not used
+ * in the context of VACUUM.
  * ----------------
  */
 void
-UpdateStats(Oid relid, long reltuples)
+UpdateStats(Oid relid, double reltuples)
 {
    Relation    whichRel;
    Relation    pg_class;
@@ -1636,6 +1598,10 @@ UpdateStats(Oid relid, long reltuples)
     * with zero size statistics until a VACUUM is done.  The optimizer
     * will generate very bad plans if the stats claim the table is empty
     * when it is actually sizable.  See also CREATE TABLE in heap.c.
+    *
+    * Note: this path is also taken during bootstrap, because bootstrap.c
+    * passes reltuples = 0 after loading a table.  We have to estimate some
+    * number for reltuples based on the actual number of pages.
     */
    relpages = RelationGetNumberOfBlocks(whichRel);
 
@@ -1689,15 +1655,15 @@ UpdateStats(Oid relid, long reltuples)
 
        for (i = 0; i < Natts_pg_class; i++)
        {
-           nulls[i] = heap_attisnull(tuple, i + 1) ? 'n' : ' ';
+           nulls[i] = ' ';
            replace[i] = ' ';
            values[i] = (Datum) NULL;
        }
 
        replace[Anum_pg_class_relpages - 1] = 'r';
-       values[Anum_pg_class_relpages - 1] = (Datum) relpages;
+       values[Anum_pg_class_relpages - 1] = Int32GetDatum(relpages);
        replace[Anum_pg_class_reltuples - 1] = 'r';
-       values[Anum_pg_class_reltuples - 1] = (Datum) reltuples;
+       values[Anum_pg_class_reltuples - 1] = Float4GetDatum((float4) reltuples);
        newtup = heap_modifytuple(tuple, pg_class, values, nulls, replace);
        simple_heap_update(pg_class, &tuple->t_self, newtup);
        if (!IsIgnoringSystemIndexes())
@@ -1741,7 +1707,7 @@ DefaultBuild(Relation heapRelation,
    TupleDesc   heapDescriptor;
    Datum       datum[INDEX_MAX_KEYS];
    char        nullv[INDEX_MAX_KEYS];
-   long        reltuples,
+   double      reltuples,
                indtuples;
    Node       *predicate = indexInfo->ii_Predicate;
 
@@ -1796,7 +1762,7 @@ DefaultBuild(Relation heapRelation,
                          0,    /* number of keys */
                          (ScanKey) NULL);      /* scan key */
 
-   reltuples = indtuples = 0;
+   reltuples = indtuples = 0.0;
 
    /*
     * for each tuple in the base relation, we create an index tuple and
@@ -1808,7 +1774,7 @@ DefaultBuild(Relation heapRelation,
    {
        MemoryContextReset(econtext->ecxt_per_tuple_memory);
 
-       reltuples++;
+       reltuples += 1.0;
 
 #ifndef OMIT_PARTIAL_INDEX
 
@@ -1821,7 +1787,7 @@ DefaultBuild(Relation heapRelation,
            slot->val = heapTuple;
            if (ExecQual((List *) oldPred, econtext, false))
            {
-               indtuples++;
+               indtuples += 1.0;
                continue;
            }
        }
@@ -1838,7 +1804,7 @@ DefaultBuild(Relation heapRelation,
        }
 #endif  /* OMIT_PARTIAL_INDEX */
 
-       indtuples++;
+       indtuples += 1.0;
 
        /*
         * FormIndexDatum fills in its datum and null parameters with


diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c

index 88e56869da58eee31d6c7b0a764b93c6c73476a7..24cc7a8b254dc9a10dea74b263e52cf30f477964 100644 (file)


--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -8,19 +8,16 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/commands/analyze.c,v 1.16 2001/03/22 06:16:11 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/commands/analyze.c,v 1.17 2001/05/07 00:43:17 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"
 
-#include 
-#include 
-#include 
-#include 
-#include 
+#include 
 
 #include "access/heapam.h"
+#include "access/tuptoaster.h"
 #include "catalog/catname.h"
 #include "catalog/indexing.h"
 #include "catalog/pg_operator.h"
@@ -29,43 +26,139 @@
 #include "commands/vacuum.h"
 #include "miscadmin.h"
 #include "parser/parse_oper.h"
-#include "tcop/tcopprot.h"
 #include "utils/acl.h"
 #include "utils/builtins.h"
+#include "utils/datum.h"
 #include "utils/fmgroids.h"
-#include "utils/inval.h"
 #include "utils/syscache.h"
+#include "utils/tuplesort.h"
 
-#define swapLong(a,b)  {long tmp; tmp=a; a=b; b=tmp;}
-#define swapInt(a,b)   {int tmp; tmp=a; a=b; b=tmp;}
-#define swapDatum(a,b) {Datum tmp; tmp=a; a=b; b=tmp;}
-#define VacAttrStatsEqValid(stats) ( stats->f_cmpeq.fn_addr != NULL )
-#define VacAttrStatsLtGtValid(stats) ( stats->f_cmplt.fn_addr != NULL && \
-                                  stats->f_cmpgt.fn_addr != NULL && \
-                                  RegProcedureIsValid(stats->outfunc) )
 
+/*
+ * Analysis algorithms supported
+ */
+typedef enum {
+   ALG_MINIMAL = 1,            /* Compute only most-common-values */
+   ALG_SCALAR                  /* Compute MCV, histogram, sort correlation */
+} AlgCode;
+
+/*
+ * To avoid consuming too much memory during analysis and/or too much space
+ * in the resulting pg_statistic rows, we ignore varlena datums that are wider
+ * than WIDTH_THRESHOLD (after detoasting!).  This is legitimate for MCV
+ * and distinct-value calculations since a wide value is unlikely to be
+ * duplicated at all, much less be a most-common value.  For the same reason,
+ * ignoring wide values will not affect our estimates of histogram bin
+ * boundaries very much.
+ */
+#define WIDTH_THRESHOLD  256
+
+/*
+ * We build one of these structs for each attribute (column) that is to be
+ * analyzed.  The struct and subsidiary data are in TransactionCommandContext,
+ * so they live until the end of the ANALYZE operation.
+ */
+typedef struct
+{
+   /* These fields are set up by examine_attribute */
+   int         attnum;         /* attribute number */
+   AlgCode     algcode;        /* Which algorithm to use for this column */
+   int         minrows;        /* Minimum # of rows needed for stats */
+   Form_pg_attribute attr;     /* copy of pg_attribute row for column */
+   Form_pg_type attrtype;      /* copy of pg_type row for column */
+   Oid         eqopr;          /* '=' operator for datatype, if any */
+   Oid         eqfunc;         /* and associated function */
+   Oid         ltopr;          /* '<' operator for datatype, if any */
+
+   /* These fields are filled in by the actual statistics-gathering routine */
+   bool        stats_valid;
+   float4      stanullfrac;    /* fraction of entries that are NULL */
+   int4        stawidth;       /* average width */
+   float4      stadistinct;    /* # distinct values */
+   int2        stakind[STATISTIC_NUM_SLOTS];
+   Oid         staop[STATISTIC_NUM_SLOTS];
+   int         numnumbers[STATISTIC_NUM_SLOTS];
+   float4     *stanumbers[STATISTIC_NUM_SLOTS];
+   int         numvalues[STATISTIC_NUM_SLOTS];
+   Datum      *stavalues[STATISTIC_NUM_SLOTS];
+} VacAttrStats;
+
+
+typedef struct
+{
+   Datum       value;          /* a data value */
+   int         tupno;          /* position index for tuple it came from */
+} ScalarItem;
+
+typedef struct
+{
+   int         count;          /* # of duplicates */
+   int         first;          /* values[] index of first occurrence */
+} ScalarMCVItem;
+
+
+#define swapInt(a,b)   {int _tmp; _tmp=a; a=b; b=_tmp;}
+#define swapDatum(a,b) {Datum _tmp; _tmp=a; a=b; b=_tmp;}
 
-static void attr_stats(Relation onerel, int attr_cnt, VacAttrStats *vacattrstats, HeapTuple tuple);
-static void bucketcpy(Form_pg_attribute attr, Datum value, Datum *bucket, int *bucket_len);
-static void update_attstats(Oid relid, int natts, VacAttrStats *vacattrstats);
-static void del_stats(Oid relid, int attcnt, int *attnums);
+
+static int MESSAGE_LEVEL;
+
+/* context information for compare_scalars() */
+static FmgrInfo *datumCmpFn;
+static SortFunctionKind datumCmpFnKind;
+static int *datumCmpTupnoLink;
+
+
+static VacAttrStats *examine_attribute(Relation onerel, int attnum);
+static int acquire_sample_rows(Relation onerel, HeapTuple *rows,
+                              int targrows, long *totalrows);
+static double random_fract(void);
+static double init_selection_state(int n);
+static long select_next_random_record(long t, int n, double *stateptr);
+static int compare_rows(const void *a, const void *b);
+static int compare_scalars(const void *a, const void *b);
+static int compare_mcvs(const void *a, const void *b);
+static OffsetNumber get_page_max_offset(Relation relation,
+                                       BlockNumber blocknumber);
+static void compute_minimal_stats(VacAttrStats *stats,
+                                 TupleDesc tupDesc, long totalrows,
+                                 HeapTuple *rows, int numrows);
+static void compute_scalar_stats(VacAttrStats *stats,
+                                TupleDesc tupDesc, long totalrows,
+                                HeapTuple *rows, int numrows);
+static void update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats);
 
 
 /*
- * analyze_rel() -- analyze relation
+ * analyze_rel() -- analyze one relation
  */
 void
-analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL)
+analyze_rel(Oid relid, VacuumStmt *vacstmt)
 {
-   HeapTuple   tuple;
    Relation    onerel;
-   int32       i;
-   int         attr_cnt,
-              *attnums = NULL;
    Form_pg_attribute *attr;
-   VacAttrStats *vacattrstats;
-   HeapScanDesc scan;
+   int         attr_cnt,
+               tcnt,
+               i;
+   VacAttrStats **vacattrstats;
+   int         targrows,
+               numrows;
+   long        totalrows;
+   HeapTuple  *rows;
+   HeapTuple   tuple;
+
+   if (vacstmt->verbose)
+       MESSAGE_LEVEL = NOTICE;
+   else
+       MESSAGE_LEVEL = DEBUG;
 
+   /*
+    * Begin a transaction for analyzing this relation.
+    *
+    * Note: All memory allocated during ANALYZE will live in
+    * TransactionCommandContext or a subcontext thereof, so it will
+    * all be released by transaction commit at the end of this routine.
+    */
    StartTransactionCommand();
 
    /*
@@ -76,7 +169,7 @@ analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL)
 
    /*
     * Race condition -- if the pg_class tuple has gone away since the
-    * last time we saw it, we don't need to vacuum it.
+    * last time we saw it, we don't need to process it.
     */
    tuple = SearchSysCache(RELOID,
                           ObjectIdGetDatum(relid),
@@ -88,8 +181,7 @@ analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL)
    }
 
    /*
-    * We can VACUUM ANALYZE any table except pg_statistic. see
-    * update_relstats
+    * We can ANALYZE any table except pg_statistic. See update_attstats
     */
    if (strcmp(NameStr(((Form_pg_class) GETSTRUCT(tuple))->relname),
               StatisticRelationName) == 0)
@@ -100,586 +192,1466 @@ analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL)
    }
    ReleaseSysCache(tuple);
 
+   /*
+    * Open the class, getting only a read lock on it, and check permissions
+    */
    onerel = heap_open(relid, AccessShareLock);
 
    if (!pg_ownercheck(GetUserId(), RelationGetRelationName(onerel),
                       RELNAME))
    {
-
-       /*
-        * we already did an elog during vacuum elog(NOTICE, "Skipping
-        * \"%s\" --- only table owner can VACUUM it",
-        * RelationGetRelationName(onerel));
-        */
+       /* No need for a notice if we already complained during VACUUM */
+       if (!vacstmt->vacuum)
+           elog(NOTICE, "Skipping \"%s\" --- only table owner can ANALYZE it",
+                RelationGetRelationName(onerel));
        heap_close(onerel, NoLock);
        CommitTransactionCommand();
        return;
    }
 
-   elog(MESSAGE_LEVEL, "Analyzing...");
+   elog(MESSAGE_LEVEL, "Analyzing %s", RelationGetRelationName(onerel));
 
-   attr_cnt = onerel->rd_att->natts;
+   /*
+    * Determine which columns to analyze
+    *
+    * Note that system attributes are never analyzed.
+    */
    attr = onerel->rd_att->attrs;
+   attr_cnt = onerel->rd_att->natts;
 
-   if (anal_cols2 != NIL)
+   if (vacstmt->va_cols != NIL)
    {
-       int         tcnt = 0;
        List       *le;
 
-       if (length(anal_cols2) > attr_cnt)
-           elog(ERROR, "vacuum: too many attributes specified for relation %s",
-                RelationGetRelationName(onerel));
-       attnums = (int *) palloc(attr_cnt * sizeof(int));
-       foreach(le, anal_cols2)
+       vacattrstats = (VacAttrStats **) palloc(length(vacstmt->va_cols) *
+                                               sizeof(VacAttrStats *));
+       tcnt = 0;
+       foreach(le, vacstmt->va_cols)
        {
-           char       *col = (char *) lfirst(le);
+           char       *col = strVal(lfirst(le));
 
            for (i = 0; i < attr_cnt; i++)
            {
                if (namestrcmp(&(attr[i]->attname), col) == 0)
                    break;
            }
-           if (i < attr_cnt)   /* found */
-               attnums[tcnt++] = i;
-           else
-           {
-               elog(ERROR, "vacuum: there is no attribute %s in %s",
+           if (i >= attr_cnt)
+               elog(ERROR, "ANALYZE: there is no attribute %s in %s",
                     col, RelationGetRelationName(onerel));
-           }
+           vacattrstats[tcnt] = examine_attribute(onerel, i+1);
+           if (vacattrstats[tcnt] != NULL)
+               tcnt++;
+       }
+       attr_cnt = tcnt;
+   }
+   else
+   {
+       vacattrstats = (VacAttrStats **) palloc(attr_cnt *
+                                               sizeof(VacAttrStats *));
+       tcnt = 0;
+       for (i = 0; i < attr_cnt; i++)
+       {
+           vacattrstats[tcnt] = examine_attribute(onerel, i+1);
+           if (vacattrstats[tcnt] != NULL)
+               tcnt++;
        }
        attr_cnt = tcnt;
    }
 
-   vacattrstats = (VacAttrStats *) palloc(attr_cnt * sizeof(VacAttrStats));
+   /*
+    * Quit if no analyzable columns
+    */
+   if (attr_cnt <= 0)
+   {
+       heap_close(onerel, NoLock);
+       CommitTransactionCommand();
+       return;
+   }
 
+   /*
+    * Determine how many rows we need to sample, using the worst case
+    * from all analyzable columns.  We use a lower bound of 100 rows
+    * to avoid possible overflow in Vitter's algorithm.
+    */
+   targrows = 100;
    for (i = 0; i < attr_cnt; i++)
    {
-       Operator    func_operator;
-       VacAttrStats *stats;
-
-       stats = &vacattrstats[i];
-       stats->attr = palloc(ATTRIBUTE_TUPLE_SIZE);
-       memcpy(stats->attr, attr[((attnums) ? attnums[i] : i)],
-              ATTRIBUTE_TUPLE_SIZE);
-       stats->best = stats->guess1 = stats->guess2 = 0;
-       stats->max = stats->min = 0;
-       stats->best_len = stats->guess1_len = stats->guess2_len = 0;
-       stats->max_len = stats->min_len = 0;
-       stats->initialized = false;
-       stats->best_cnt = stats->guess1_cnt = stats->guess1_hits = stats->guess2_hits = 0;
-       stats->max_cnt = stats->min_cnt = stats->null_cnt = stats->nonnull_cnt = 0;
-
-       func_operator = compatible_oper("=",
-                                       stats->attr->atttypid,
-                                       stats->attr->atttypid,
-                                       true);
-       if (func_operator != NULL)
-       {
-           fmgr_info(oprfuncid(func_operator), &(stats->f_cmpeq));
-           ReleaseSysCache(func_operator);
-       }
-       else
-           stats->f_cmpeq.fn_addr = NULL;
+       if (targrows < vacattrstats[i]->minrows)
+           targrows = vacattrstats[i]->minrows;
+   }
+
+   /*
+    * Acquire the sample rows
+    */
+   rows = (HeapTuple *) palloc(targrows * sizeof(HeapTuple));
+   numrows = acquire_sample_rows(onerel, rows, targrows, &totalrows);
 
-       func_operator = compatible_oper("<",
-                                       stats->attr->atttypid,
-                                       stats->attr->atttypid,
-                                       true);
-       if (func_operator != NULL)
+   /*
+    * If we are running a standalone ANALYZE, update pages/tuples stats
+    * in pg_class.  We have the accurate page count from heap_beginscan,
+    * but only an approximate number of tuples; therefore, if we are
+    * part of VACUUM ANALYZE do *not* overwrite the accurate count already
+    * inserted by VACUUM.
+    */
+   if (!vacstmt->vacuum)
+       vac_update_relstats(RelationGetRelid(onerel),
+                           onerel->rd_nblocks,
+                           (double) totalrows,
+                           RelationGetForm(onerel)->relhasindex);
+
+   /*
+    * Compute the statistics.  Temporary results during the calculations
+    * for each column are stored in a child context.  The calc routines
+    * are responsible to make sure that whatever they store into the
+    * VacAttrStats structure is allocated in TransactionCommandContext.
+    */
+   if (numrows > 0)
+   {
+       MemoryContext col_context,
+                   old_context;
+
+       col_context = AllocSetContextCreate(CurrentMemoryContext,
+                                           "Analyze Column",
+                                           ALLOCSET_DEFAULT_MINSIZE,
+                                           ALLOCSET_DEFAULT_INITSIZE,
+                                           ALLOCSET_DEFAULT_MAXSIZE);
+       old_context = MemoryContextSwitchTo(col_context);
+       for (i = 0; i < attr_cnt; i++)
        {
-           fmgr_info(oprfuncid(func_operator), &(stats->f_cmplt));
-           stats->op_cmplt = oprid(func_operator);
-           ReleaseSysCache(func_operator);
+           switch (vacattrstats[i]->algcode)
+           {
+               case ALG_MINIMAL:
+                   compute_minimal_stats(vacattrstats[i],
+                                         onerel->rd_att, totalrows,
+                                         rows, numrows);
+                   break;
+               case ALG_SCALAR:
+                   compute_scalar_stats(vacattrstats[i],
+                                        onerel->rd_att, totalrows,
+                                        rows, numrows);
+                   break;
+           }
+           MemoryContextResetAndDeleteChildren(col_context);
        }
-       else
+       MemoryContextSwitchTo(old_context);
+       MemoryContextDelete(col_context);
+
+       /*
+        * Emit the completed stats rows into pg_statistic, replacing any
+        * previous statistics for the target columns.  (If there are stats
+        * in pg_statistic for columns we didn't process, we leave them alone.)
+        */
+       update_attstats(relid, attr_cnt, vacattrstats);
+   }
+
+   /*
+    * Close source relation now, but keep lock so that no one deletes it
+    * before we commit.  (If someone did, they'd fail to clean up the
+    * entries we made in pg_statistic.)
+    */
+   heap_close(onerel, NoLock);
+
+   /* Commit and release working memory */
+   CommitTransactionCommand();
+}
+
+/*
+ * examine_attribute -- pre-analysis of a single column
+ *
+ * Determine whether the column is analyzable; if so, create and initialize
+ * a VacAttrStats struct for it.  If not, return NULL.
+ */
+static VacAttrStats *
+examine_attribute(Relation onerel, int attnum)
+{
+   Form_pg_attribute attr = onerel->rd_att->attrs[attnum-1];
+   Operator    func_operator;
+   Oid         oprrest;
+   HeapTuple   typtuple;
+   Oid         eqopr = InvalidOid;
+   Oid         eqfunc = InvalidOid;
+   Oid         ltopr = InvalidOid;
+   VacAttrStats *stats;
+
+   /* Don't analyze column if user has specified not to */
+   if (attr->attstattarget <= 0)
+       return NULL;
+
+   /* If column has no "=" operator, we can't do much of anything */
+   func_operator = compatible_oper("=",
+                                   attr->atttypid,
+                                   attr->atttypid,
+                                   true);
+   if (func_operator != NULL)
+   {
+       oprrest = ((Form_pg_operator) GETSTRUCT(func_operator))->oprrest;
+       if (oprrest == F_EQSEL)
        {
-           stats->f_cmplt.fn_addr = NULL;
-           stats->op_cmplt = InvalidOid;
+           eqopr = oprid(func_operator);
+           eqfunc = oprfuncid(func_operator);
        }
+       ReleaseSysCache(func_operator);
+   }
+   if (!OidIsValid(eqfunc))
+       return NULL;
 
-       func_operator = compatible_oper(">",
-                                       stats->attr->atttypid,
-                                       stats->attr->atttypid,
-                                       true);
-       if (func_operator != NULL)
+   /*
+    * If we have "=" then we're at least able to do the minimal algorithm,
+    * so start filling in a VacAttrStats struct.
+    */
+   stats = (VacAttrStats *) palloc(sizeof(VacAttrStats));
+   MemSet(stats, 0, sizeof(VacAttrStats));
+   stats->attnum = attnum;
+   stats->attr = (Form_pg_attribute) palloc(ATTRIBUTE_TUPLE_SIZE);
+   memcpy(stats->attr, attr, ATTRIBUTE_TUPLE_SIZE);
+   typtuple = SearchSysCache(TYPEOID,
+                             ObjectIdGetDatum(attr->atttypid),
+                             0, 0, 0);
+   if (!HeapTupleIsValid(typtuple))
+       elog(ERROR, "cache lookup of type %u failed", attr->atttypid);
+   stats->attrtype = (Form_pg_type) palloc(sizeof(FormData_pg_type));
+   memcpy(stats->attrtype, GETSTRUCT(typtuple), sizeof(FormData_pg_type));
+   ReleaseSysCache(typtuple);
+   stats->eqopr = eqopr;
+   stats->eqfunc = eqfunc;
+
+   /* Is there a "<" operator with suitable semantics? */
+   func_operator = compatible_oper("<",
+                                   attr->atttypid,
+                                   attr->atttypid,
+                                   true);
+   if (func_operator != NULL)
+   {
+       oprrest = ((Form_pg_operator) GETSTRUCT(func_operator))->oprrest;
+       if (oprrest == F_SCALARLTSEL)
        {
-           fmgr_info(oprfuncid(func_operator), &(stats->f_cmpgt));
-           ReleaseSysCache(func_operator);
+           ltopr = oprid(func_operator);
        }
-       else
-           stats->f_cmpgt.fn_addr = NULL;
+       ReleaseSysCache(func_operator);
+   }
+   stats->ltopr = ltopr;
+
+   /*
+    * Determine the algorithm to use (this will get more complicated later)
+    */
+   if (OidIsValid(ltopr))
+   {
+       /* Seems to be a scalar datatype */
+       stats->algcode = ALG_SCALAR;
+       /*--------------------
+        * The following choice of minrows is based on the paper
+        * "Random sampling for histogram construction: how much is enough?"
+        * by Surajit Chaudhuri, Rajeev Motwani and Vivek Narasayya, in
+        * Proceedings of ACM SIGMOD International Conference on Management
+        * of Data, 1998, Pages 436-447.  Their Corollary 1 to Theorem 5
+        * says that for table size n, histogram size k, maximum relative
+        * error in bin size f, and error probability gamma, the minimum
+        * random sample size is
+        *      r = 4 * k * ln(2*n/gamma) / f^2
+        * Taking f = 0.5, gamma = 0.01, n = 1 million rows, we obtain
+        *      r = 305.82 * k
+        * Note that because of the log function, the dependence on n is
+        * quite weak; even at n = 1 billion, a 300*k sample gives <= 0.59
+        * bin size error with probability 0.99.  So there's no real need to
+        * scale for n, which is a good thing because we don't necessarily
+        * know it at this point.
+        *--------------------
+        */
+       stats->minrows = 300 * attr->attstattarget;
+   }
+   else
+   {
+       /* Can't do much but the minimal stuff */
+       stats->algcode = ALG_MINIMAL;
+       /* Might as well use the same minrows as above */
+       stats->minrows = 300 * attr->attstattarget;
+   }
+
+   return stats;
+}
 
-       tuple = SearchSysCache(TYPEOID,
-                              ObjectIdGetDatum(stats->attr->atttypid),
-                              0, 0, 0);
-       if (HeapTupleIsValid(tuple))
+/*
+ * acquire_sample_rows -- acquire a random sample of rows from the table
+ *
+ * Up to targrows rows are collected (if there are fewer than that many
+ * rows in the table, all rows are collected).  When the table is larger
+ * than targrows, a truly random sample is collected: every row has an
+ * equal chance of ending up in the final sample.
+ *
+ * We also estimate the total number of rows in the table, and return that
+ * into *totalrows.
+ *
+ * The returned list of tuples is in order by physical position in the table.
+ * (We will rely on this later to derive correlation estimates.)
+ */
+static int
+acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
+                   long *totalrows)
+{
+   int         numrows = 0;
+   HeapScanDesc scan;
+   HeapTuple   tuple;
+   ItemPointer lasttuple;
+   BlockNumber lastblock,
+               estblock;
+   OffsetNumber lastoffset;
+   int         numest;
+   double      tuplesperpage;
+   long        t;
+   double      rstate;
+
+   Assert(targrows > 1);
+   /*
+    * Do a simple linear scan until we reach the target number of rows.
+    */
+   scan = heap_beginscan(onerel, false, SnapshotNow, 0, NULL);
+   while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
+   {
+       rows[numrows++] = heap_copytuple(tuple);
+       if (numrows >= targrows)
+           break;
+   }
+   heap_endscan(scan);
+   /*
+    * If we ran out of tuples then we're done, no matter how few we 
+    * collected.  No sort is needed, since they're already in order.
+    */
+   if (!HeapTupleIsValid(tuple))
+   {
+       *totalrows = numrows;
+       return numrows;
+   }
+   /*
+    * Otherwise, start replacing tuples in the sample until we reach the
+    * end of the relation.  This algorithm is from Jeff Vitter's paper
+    * (see full citation below).  It works by repeatedly computing the number
+    * of the next tuple we want to fetch, which will replace a randomly
+    * chosen element of the reservoir (current set of tuples).  At all times
+    * the reservoir is a true random sample of the tuples we've passed over
+    * so far, so when we fall off the end of the relation we're done.
+    *
+    * A slight difficulty is that since we don't want to fetch tuples or even
+    * pages that we skip over, it's not possible to fetch *exactly* the N'th
+    * tuple at each step --- we don't know how many valid tuples are on
+    * the skipped pages.  We handle this by assuming that the average number
+    * of valid tuples/page on the pages already scanned over holds good for
+    * the rest of the relation as well; this lets us estimate which page
+    * the next tuple should be on and its position in the page.  Then we
+    * fetch the first valid tuple at or after that position, being careful
+    * not to use the same tuple twice.  This approach should still give a
+    * good random sample, although it's not perfect.
+    */
+   lasttuple = &(rows[numrows-1]->t_self);
+   lastblock = ItemPointerGetBlockNumber(lasttuple);
+   lastoffset = ItemPointerGetOffsetNumber(lasttuple);
+   /*
+    * If possible, estimate tuples/page using only completely-scanned pages.
+    */
+   for (numest = numrows; numest > 0; numest--)
+   {
+       if (ItemPointerGetBlockNumber(&(rows[numest-1]->t_self)) != lastblock)
+           break;
+   }
+   if (numest == 0)
+   {
+       numest = numrows;       /* don't have a full page? */
+       estblock = lastblock + 1;
+   }
+   else
+   {
+       estblock = lastblock;
+   }
+   tuplesperpage = (double) numest / (double) estblock;
+
+   t = numrows;                /* t is the # of records processed so far */
+   rstate = init_selection_state(targrows);
+   for (;;)
+   {
+       double          targpos;
+       BlockNumber     targblock;
+       OffsetNumber    targoffset,
+                       maxoffset;
+
+       t = select_next_random_record(t, targrows, &rstate);
+       /* Try to read the t'th record in the table */
+       targpos = (double) t / tuplesperpage;
+       targblock = (BlockNumber) targpos;
+       targoffset = ((int) (targpos - targblock) * tuplesperpage) + 
+           FirstOffsetNumber;
+       /* Make sure we are past the last selected record */
+       if (targblock <= lastblock)
        {
-           stats->outfunc = ((Form_pg_type) GETSTRUCT(tuple))->typoutput;
-           stats->typelem = ((Form_pg_type) GETSTRUCT(tuple))->typelem;
-           ReleaseSysCache(tuple);
+           targblock = lastblock;
+           if (targoffset <= lastoffset)
+               targoffset = lastoffset + 1;
        }
-       else
+       /* Loop to find first valid record at or after given position */
+   pageloop:;
+       /*
+        * Have we fallen off the end of the relation?  (We rely on
+        * heap_beginscan to have updated rd_nblocks.)
+        */
+       if (targblock >= onerel->rd_nblocks)
+           break;
+       maxoffset = get_page_max_offset(onerel, targblock);
+       for (;;)
        {
-           stats->outfunc = InvalidOid;
-           stats->typelem = InvalidOid;
+           HeapTupleData targtuple;
+           Buffer      targbuffer;
+
+           if (targoffset > maxoffset)
+           {
+               /* Fell off end of this page, try next */
+               targblock++;
+               targoffset = FirstOffsetNumber;
+               goto pageloop;
+           }
+           ItemPointerSet(&targtuple.t_self, targblock, targoffset);
+           heap_fetch(onerel, SnapshotNow, &targtuple, &targbuffer);
+           if (targtuple.t_data != NULL)
+           {
+               /*
+                * Found a suitable tuple, so save it, replacing one old
+                * tuple at random
+                */
+               int     k = (int) (targrows * random_fract());
+
+               Assert(k >= 0 && k < targrows);
+               heap_freetuple(rows[k]);
+               rows[k] = heap_copytuple(&targtuple);
+               ReleaseBuffer(targbuffer);
+               lastblock = targblock;
+               lastoffset = targoffset;
+               break;
+           }
+           /* this tuple is dead, so advance to next one on same page */
+           targoffset++;
        }
    }
-   /* delete existing pg_statistic rows for relation */
-   del_stats(relid, ((attnums) ? attr_cnt : 0), attnums);
-
-   /* scan relation to gather statistics */
-   scan = heap_beginscan(onerel, false, SnapshotNow, 0, NULL);
 
-   while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
-       attr_stats(onerel, attr_cnt, vacattrstats, tuple);
+   /*
+    * Now we need to sort the collected tuples by position (itempointer).
+    */
+   qsort((void *) rows, numrows, sizeof(HeapTuple), compare_rows);
 
-   heap_endscan(scan);
+   /*
+    * Estimate total number of valid rows in relation.
+    */
+   *totalrows = (long) (onerel->rd_nblocks * tuplesperpage + 0.5);
 
-   /* close rel, but keep lock so it doesn't go away before commit */
-   heap_close(onerel, NoLock);
+   return numrows;
+}
 
-   /* update statistics in pg_class */
-   update_attstats(relid, attr_cnt, vacattrstats);
+/* Select a random value R uniformly distributed in 0 < R < 1 */
+static double
+random_fract(void)
+{
+   long    z;
 
-   CommitTransactionCommand();
+   /* random() can produce endpoint values, try again if so */
+   do
+   {
+       z = random();
+   } while (! (z > 0 && z < MAX_RANDOM_VALUE));
+   return (double) z / (double) MAX_RANDOM_VALUE;
 }
 
 /*
- * attr_stats() -- compute column statistics used by the planner
+ * These two routines embody Algorithm Z from "Random sampling with a
+ * reservoir" by Jeffrey S. Vitter, in ACM Trans. Math. Softw. 11, 1
+ * (Mar. 1985), Pages 37-57.  While Vitter describes his algorithm in terms
+ * of the count S of records to skip before processing another record,
+ * it is convenient to work primarily with t, the index (counting from 1)
+ * of the last record processed and next record to process.  The only extra
+ * state needed between calls is W, a random state variable.
  *
- * We compute the column min, max, null and non-null counts.
- * Plus we attempt to find the count of the value that occurs most
- * frequently in each column.  These figures are used to compute
- * the selectivity of the column.
+ * init_selection_state computes the initial W value.
  *
- * We use a three-bucket cache to get the most frequent item.
- * The 'guess' buckets count hits.  A cache miss causes guess1
- * to get the most hit 'guess' item in the most recent cycle, and
- * the new item goes into guess2.  Whenever the total count of hits
- * of a 'guess' entry is larger than 'best', 'guess' becomes 'best'.
+ * Given that we've already processed t records (t >= n),
+ * select_next_random_record determines the number of the next record to
+ * process.
+ */
+static double
+init_selection_state(int n)
+{
+   /* Initial value of W (for use when Algorithm Z is first applied) */
+   return exp(- log(random_fract())/n);
+}
+
+static long
+select_next_random_record(long t, int n, double *stateptr)
+{
+   /* The magic constant here is T from Vitter's paper */
+   if (t <= (22 * n))
+   {
+       /* Process records using Algorithm X until t is large enough */
+       double  V,
+               quot;
+
+       V = random_fract();     /* Generate V */
+       t++;
+       quot = (double) (t - n) / (double) t;
+       /* Find min S satisfying (4.1) */
+       while (quot > V)
+       {
+           t++;
+           quot *= (double) (t - n) / (double) t;
+       }
+   }
+   else
+   {
+       /* Now apply Algorithm Z */
+       double  W = *stateptr;
+       long    term = t - n + 1;
+       int     S;
+
+       for (;;)
+       {
+           long    numer,
+                   numer_lim,
+                   denom;
+           double  U,
+                   X,
+                   lhs,
+                   rhs,
+                   y,
+                   tmp;
+
+           /* Generate U and X */
+           U = random_fract();
+           X = t * (W - 1.0);
+           S = X;              /* S is tentatively set to floor(X) */
+           /* Test if U <= h(S)/cg(X) in the manner of (6.3) */
+           tmp = (double) (t + 1) / (double) term;
+           lhs = exp(log(((U * tmp * tmp) * (term + S))/(t + X))/n);
+           rhs = (((t + X)/(term + S)) * term)/t;
+           if (lhs <= rhs)
+           {
+               W = rhs/lhs;
+               break;
+           }
+           /* Test if U <= f(S)/cg(X) */
+           y = (((U * (t + 1))/term) * (t + S + 1))/(t + X);
+           if (n < S)
+           {
+               denom = t;
+               numer_lim = term + S;
+           }
+           else
+           {
+               denom = t - n + S;
+               numer_lim = t + 1;
+           }
+           for (numer = t + S; numer >= numer_lim; numer--)
+           {
+               y *= (double) numer / (double) denom;
+               denom--;
+           }
+           W = exp(- log(random_fract())/n); /* Generate W in advance */
+           if (exp(log(y)/n) <= (t + X)/t)
+               break;
+       }
+       t += S + 1;
+       *stateptr = W;
+   }
+   return t;
+}
+
+/*
+ * qsort comparator for sorting rows[] array
+ */
+static int
+compare_rows(const void *a, const void *b)
+{
+   HeapTuple   ha = * (HeapTuple *) a;
+   HeapTuple   hb = * (HeapTuple *) b;
+   BlockNumber ba = ItemPointerGetBlockNumber(&ha->t_self);
+   OffsetNumber oa = ItemPointerGetOffsetNumber(&ha->t_self);
+   BlockNumber bb = ItemPointerGetBlockNumber(&hb->t_self);
+   OffsetNumber ob = ItemPointerGetOffsetNumber(&hb->t_self);
+
+   if (ba < bb)
+       return -1;
+   if (ba > bb)
+       return 1;
+   if (oa < ob)
+       return -1;
+   if (oa > ob)
+       return 1;
+   return 0;
+}
+
+/*
+ * Discover the largest valid tuple offset number on the given page
+ *
+ * This code probably ought to live in some other module.
+ */
+static OffsetNumber
+get_page_max_offset(Relation relation, BlockNumber blocknumber)
+{
+   Buffer      buffer;
+   Page        p;
+   OffsetNumber offnum;
+
+   buffer = ReadBuffer(relation, blocknumber);
+   if (!BufferIsValid(buffer))
+       elog(ERROR, "get_page_max_offset: %s relation: ReadBuffer(%ld) failed",
+            RelationGetRelationName(relation), (long) blocknumber);
+   LockBuffer(buffer, BUFFER_LOCK_SHARE);
+   p = BufferGetPage(buffer);
+   offnum = PageGetMaxOffsetNumber(p);
+   LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+   ReleaseBuffer(buffer);
+   return offnum;
+}
+
+
+/*
+ * compute_minimal_stats() -- compute minimal column statistics
  *
- * This method works perfectly for columns with unique values, and columns
- * with only two unique values, plus nulls.
+ * We use this when we can find only an "=" operator for the datatype.
  *
- * It becomes less perfect as the number of unique values increases and
- * their distribution in the table becomes more random.
+ * We determine the fraction of non-null rows, the average width, the
+ * most common values, and the (estimated) number of distinct values.
  *
+ * The most common values are determined by brute force: we keep a list
+ * of previously seen values, ordered by number of times seen, as we scan
+ * the samples.  A newly seen value is inserted just after the last
+ * multiply-seen value, causing the bottommost (oldest) singly-seen value
+ * to drop off the list.  The accuracy of this method, and also its cost,
+ * depend mainly on the length of the list we are willing to keep.
  */
 static void
-attr_stats(Relation onerel, int attr_cnt, VacAttrStats *vacattrstats, HeapTuple tuple)
+compute_minimal_stats(VacAttrStats *stats,
+                     TupleDesc tupDesc, long totalrows,
+                     HeapTuple *rows, int numrows)
 {
    int         i;
-   TupleDesc   tupDesc = onerel->rd_att;
-
-   for (i = 0; i < attr_cnt; i++)
+   int         null_cnt = 0;
+   int         nonnull_cnt = 0;
+   int         toowide_cnt = 0;
+   double      total_width = 0;
+   bool        is_varlena = (!stats->attr->attbyval &&
+                             stats->attr->attlen == -1);
+   FmgrInfo    f_cmpeq;
+   typedef struct
+   {
+       Datum   value;
+       int     count;
+   } TrackItem;
+   TrackItem  *track;
+   int         track_cnt,
+               track_max;
+   int         num_mcv = stats->attr->attstattarget;
+
+   /* We track up to 2*n values for an n-element MCV list; but at least 10 */
+   track_max = 2 * num_mcv;
+   if (track_max < 10)
+       track_max = 10;
+   track = (TrackItem *) palloc(track_max * sizeof(TrackItem));
+   track_cnt = 0;
+
+   fmgr_info(stats->eqfunc, &f_cmpeq);
+
+   for (i = 0; i < numrows; i++)
    {
-       VacAttrStats *stats = &vacattrstats[i];
-       Datum       origvalue;
+       HeapTuple   tuple = rows[i];
        Datum       value;
        bool        isnull;
-       bool        value_hit;
-
-       if (!VacAttrStatsEqValid(stats))
-           continue;
-
-#ifdef _DROP_COLUMN_HACK__
-       if (COLUMN_IS_DROPPED(stats->attr))
-           continue;
-#endif  /* _DROP_COLUMN_HACK__ */
+       bool        match;
+       int         firstcount1,
+                   j;
 
-       origvalue = heap_getattr(tuple, stats->attr->attnum,
-                                tupDesc, &isnull);
+       value = heap_getattr(tuple, stats->attnum, tupDesc, &isnull);
 
+       /* Check for null/nonnull */
        if (isnull)
        {
-           stats->null_cnt++;
+           null_cnt++;
            continue;
        }
-       stats->nonnull_cnt++;
+       nonnull_cnt++;
 
        /*
-        * If the value is toasted, detoast it to avoid repeated
-        * detoastings and resultant memory leakage inside the comparison
-        * routines.
+        * If it's a varlena field, add up widths for average width
+        * calculation.  Note that if the value is toasted, we
+        * use the toasted width.  We don't bother with this calculation
+        * if it's a fixed-width type.
         */
-       if (!stats->attr->attbyval && stats->attr->attlen == -1)
-           value = PointerGetDatum(PG_DETOAST_DATUM(origvalue));
-       else
-           value = origvalue;
-
-       if (!stats->initialized)
+       if (is_varlena)
        {
-           bucketcpy(stats->attr, value, &stats->best, &stats->best_len);
-           /* best_cnt gets incremented below */
-           bucketcpy(stats->attr, value, &stats->guess1, &stats->guess1_len);
-           stats->guess1_cnt = stats->guess1_hits = 1;
-           bucketcpy(stats->attr, value, &stats->guess2, &stats->guess2_len);
-           stats->guess2_hits = 1;
-           if (VacAttrStatsLtGtValid(stats))
+           total_width += VARSIZE(DatumGetPointer(value));
+           /*
+            * If the value is toasted, we want to detoast it just once to
+            * avoid repeated detoastings and resultant excess memory usage
+            * during the comparisons.  Also, check to see if the value is
+            * excessively wide, and if so don't detoast at all --- just
+            * ignore the value.
+            */
+           if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
            {
-               bucketcpy(stats->attr, value, &stats->max, &stats->max_len);
-               bucketcpy(stats->attr, value, &stats->min, &stats->min_len);
-               /* min_cnt, max_cnt get incremented below */
+               toowide_cnt++;
+               continue;
            }
-           stats->initialized = true;
+           value = PointerGetDatum(PG_DETOAST_DATUM(value));
        }
 
-       if (VacAttrStatsLtGtValid(stats))
+       /*
+        * See if the value matches anything we're already tracking.
+        */
+       match = false;
+       firstcount1 = track_cnt;
+       for (j = 0; j < track_cnt; j++)
        {
-           if (DatumGetBool(FunctionCall2(&stats->f_cmplt,
-                                          value, stats->min)))
+           if (DatumGetBool(FunctionCall2(&f_cmpeq, value, track[j].value)))
            {
-               bucketcpy(stats->attr, value, &stats->min, &stats->min_len);
-               stats->min_cnt = 1;
+               match = true;
+               break;
            }
-           else if (DatumGetBool(FunctionCall2(&stats->f_cmpeq,
-                                               value, stats->min)))
-               stats->min_cnt++;
+           if (j < firstcount1 && track[j].count == 1)
+               firstcount1 = j;
+       }
 
-           if (DatumGetBool(FunctionCall2(&stats->f_cmpgt,
-                                          value, stats->max)))
+       if (match)
+       {
+           /* Found a match */
+           track[j].count++;
+           /* This value may now need to "bubble up" in the track list */
+           while (j > 0 && track[j].count > track[j-1].count)
            {
-               bucketcpy(stats->attr, value, &stats->max, &stats->max_len);
-               stats->max_cnt = 1;
+               swapDatum(track[j].value, track[j-1].value);
+               swapInt(track[j].count, track[j-1].count);
+               j--;
            }
-           else if (DatumGetBool(FunctionCall2(&stats->f_cmpeq,
-                                               value, stats->max)))
-               stats->max_cnt++;
        }
-
-       value_hit = true;
-       if (DatumGetBool(FunctionCall2(&stats->f_cmpeq,
-                                      value, stats->best)))
-           stats->best_cnt++;
-       else if (DatumGetBool(FunctionCall2(&stats->f_cmpeq,
-                                           value, stats->guess1)))
+       else
        {
-           stats->guess1_cnt++;
-           stats->guess1_hits++;
+           /* No match.  Insert at head of count-1 list */
+           if (track_cnt < track_max)
+               track_cnt++;
+           for (j = track_cnt-1; j > firstcount1; j--)
+           {
+               track[j].value = track[j-1].value;
+               track[j].count = track[j-1].count;
+           }
+           if (firstcount1 < track_cnt)
+           {
+               track[firstcount1].value = value;
+               track[firstcount1].count = 1;
+           }
        }
-       else if (DatumGetBool(FunctionCall2(&stats->f_cmpeq,
-                                           value, stats->guess2)))
-           stats->guess2_hits++;
+   }
+
+   /* We can only compute valid stats if we found some non-null values. */
+   if (nonnull_cnt > 0)
+   {
+       int     nmultiple,
+               summultiple;
+
+       stats->stats_valid = true;
+       /* Do the simple null-frac and width stats */
+       stats->stanullfrac = (double) null_cnt / (double) numrows;
+       if (is_varlena)
+           stats->stawidth = total_width / (double) nonnull_cnt;
        else
-           value_hit = false;
+           stats->stawidth = stats->attrtype->typlen;
 
-       if (stats->guess2_hits > stats->guess1_hits)
+       /* Count the number of values we found multiple times */
+       summultiple = 0;
+       for (nmultiple = 0; nmultiple < track_cnt; nmultiple++)
        {
-           swapDatum(stats->guess1, stats->guess2);
-           swapInt(stats->guess1_len, stats->guess2_len);
-           swapLong(stats->guess1_hits, stats->guess2_hits);
-           stats->guess1_cnt = stats->guess1_hits;
+           if (track[nmultiple].count == 1)
+               break;
+           summultiple += track[nmultiple].count;
        }
-       if (stats->guess1_cnt > stats->best_cnt)
+
+       if (nmultiple == 0)
        {
-           swapDatum(stats->best, stats->guess1);
-           swapInt(stats->best_len, stats->guess1_len);
-           swapLong(stats->best_cnt, stats->guess1_cnt);
-           stats->guess1_hits = 1;
-           stats->guess2_hits = 1;
+           /* If we found no repeated values, assume it's a unique column */
+           stats->stadistinct = -1.0;
        }
-       if (!value_hit)
+       else if (track_cnt < track_max && toowide_cnt == 0 &&
+                nmultiple == track_cnt)
        {
-           bucketcpy(stats->attr, value, &stats->guess2, &stats->guess2_len);
-           stats->guess1_hits = 1;
-           stats->guess2_hits = 1;
+           /*
+            * Our track list includes every value in the sample, and every
+            * value appeared more than once.  Assume the column has just
+            * these values.
+            */
+           stats->stadistinct = track_cnt;
        }
+       else
+       {
+           /*----------
+            * Estimate the number of distinct values using the estimator
+            * proposed by Chaudhuri et al (see citation above).  This is
+            *      sqrt(n/r) * max(f1,1) + f2 + f3 + ...
+            * where fk is the number of distinct values that occurred
+            * exactly k times in our sample of r rows (from a total of n).
+            * We assume (not very reliably!) that all the multiply-occurring
+            * values are reflected in the final track[] list, and the other
+            * nonnull values all appeared but once.
+            *----------
+            */
+           int     f1 = nonnull_cnt - summultiple;
+           double  term1;
 
-       /* Clean up detoasted copy, if any */
-       if (value != origvalue)
-           pfree(DatumGetPointer(value));
-   }
-}
+           if (f1 < 1)
+               f1 = 1;
+           term1 = sqrt((double) totalrows / (double) numrows) * f1;
+           stats->stadistinct = floor(term1 + nmultiple + 0.5);
+       }
 
-/*
- * bucketcpy() -- copy a new value into one of the statistics buckets
- */
-static void
-bucketcpy(Form_pg_attribute attr, Datum value, Datum *bucket, int *bucket_len)
-{
-   if (attr->attbyval)
-       *bucket = value;
-   else
-   {
-       int         len = (attr->attlen != -1 ? attr->attlen : VARSIZE(value));
+       /*
+        * If we estimated the number of distinct values at more than 10%
+        * of the total row count (a very arbitrary limit), then assume
+        * that stadistinct should scale with the row count rather than be
+        * a fixed value.
+        */
+       if (stats->stadistinct > 0.1 * totalrows)
+           stats->stadistinct = - (stats->stadistinct / totalrows);
 
-       /* Avoid unnecessary palloc() traffic... */
-       if (len > *bucket_len)
+       /* Generate an MCV slot entry, only if we found multiples */
+       if (nmultiple < num_mcv)
+           num_mcv = nmultiple;
+       if (num_mcv > 0)
        {
-           if (*bucket_len != 0)
-               pfree(DatumGetPointer(*bucket));
-           *bucket = PointerGetDatum(palloc(len));
-           *bucket_len = len;
+           MemoryContext old_context;
+           Datum  *mcv_values;
+           float4 *mcv_freqs;
+
+           /* Must copy the target values into TransactionCommandContext */
+           old_context = MemoryContextSwitchTo(TransactionCommandContext);
+           mcv_values = (Datum *) palloc(num_mcv * sizeof(Datum));
+           mcv_freqs = (float4 *) palloc(num_mcv * sizeof(float4));
+           for (i = 0; i < num_mcv; i++)
+           {
+               mcv_values[i] = datumCopy(track[i].value,
+                                         stats->attr->attbyval,
+                                         stats->attr->attlen);
+               mcv_freqs[i] = (double) track[i].count / (double) numrows;
+           }
+           MemoryContextSwitchTo(old_context);
+
+           stats->stakind[0] = STATISTIC_KIND_MCV;
+           stats->staop[0] = stats->eqopr;
+           stats->stanumbers[0] = mcv_freqs;
+           stats->numnumbers[0] = num_mcv;
+           stats->stavalues[0] = mcv_values;
+           stats->numvalues[0] = num_mcv;
        }
-       memcpy(DatumGetPointer(*bucket), DatumGetPointer(value), len);
    }
+
+   /* We don't need to bother cleaning up any of our temporary palloc's */
 }
 
 
 /*
- * update_attstats() -- update attribute statistics for one relation
+ * compute_scalar_stats() -- compute column statistics
  *
- *     Statistics are stored in several places: the pg_class row for the
- *     relation has stats about the whole relation, the pg_attribute rows
- *     for each attribute store "dispersion", and there is a pg_statistic
- *     row for each (non-system) attribute.  (Dispersion probably ought to
- *     be moved to pg_statistic, but it's not worth doing unless there's
- *     another reason to have to change pg_attribute.)  The pg_class values
- *     are updated by VACUUM, not here.
- *
- *     We violate no-overwrite semantics here by storing new values for
- *     the dispersion column directly into the pg_attribute tuple that's
- *     already on the page.  The reason for this is that if we updated
- *     these tuples in the usual way, vacuuming pg_attribute itself
- *     wouldn't work very well --- by the time we got done with a vacuum
- *     cycle, most of the tuples in pg_attribute would've been obsoleted.
- *     Updating pg_attribute's own statistics would be especially tricky.
- *     Of course, this only works for fixed-size never-null columns, but
- *     dispersion is.
+ * We use this when we can find "=" and "<" operators for the datatype.
  *
- *     pg_statistic rows are just added normally.  This means that
- *     pg_statistic will probably contain some deleted rows at the
- *     completion of a vacuum cycle, unless it happens to get vacuumed last.
+ * We determine the fraction of non-null rows, the average width, the
+ * most common values, the (estimated) number of distinct values, the
+ * distribution histogram, and the correlation of physical to logical order.
  *
- *     To keep things simple, we punt for pg_statistic, and don't try
- *     to compute or store rows for pg_statistic itself in pg_statistic.
- *     This could possibly be made to work, but it's not worth the trouble.
+ * The desired stats can be determined fairly easily after sorting the
+ * data values into order.
  */
 static void
-update_attstats(Oid relid, int natts, VacAttrStats *vacattrstats)
+compute_scalar_stats(VacAttrStats *stats,
+                    TupleDesc tupDesc, long totalrows,
+                    HeapTuple *rows, int numrows)
 {
-   Relation    ad,
-               sd;
-   HeapScanDesc scan;
-   HeapTuple   atup,
-               stup;
-   ScanKeyData askey;
-   Form_pg_attribute attp;
-
-   ad = heap_openr(AttributeRelationName, RowExclusiveLock);
-   sd = heap_openr(StatisticRelationName, RowExclusiveLock);
-
-   /* Find pg_attribute rows for this relation */
-   ScanKeyEntryInitialize(&askey, 0, Anum_pg_attribute_attrelid,
-                          F_INT4EQ, relid);
-
-   scan = heap_beginscan(ad, false, SnapshotNow, 1, &askey);
-
-   while (HeapTupleIsValid(atup = heap_getnext(scan, 0)))
+   int         i;
+   int         null_cnt = 0;
+   int         nonnull_cnt = 0;
+   int         toowide_cnt = 0;
+   double      total_width = 0;
+   bool        is_varlena = (!stats->attr->attbyval &&
+                             stats->attr->attlen == -1);
+   double      corr_xysum;
+   RegProcedure cmpFn;
+   SortFunctionKind cmpFnKind;
+   FmgrInfo    f_cmpfn;
+   ScalarItem *values;
+   int         values_cnt = 0;
+   int        *tupnoLink;
+   ScalarMCVItem *track;
+   int         track_cnt = 0;
+   int         num_mcv = stats->attr->attstattarget;
+
+   values = (ScalarItem *) palloc(numrows * sizeof(ScalarItem));
+   tupnoLink = (int *) palloc(numrows * sizeof(int));
+   track = (ScalarMCVItem *) palloc(num_mcv * sizeof(ScalarMCVItem));
+
+   SelectSortFunction(stats->ltopr, &cmpFn, &cmpFnKind);
+   fmgr_info(cmpFn, &f_cmpfn);
+
+   /* Initial scan to find sortable values */
+   for (i = 0; i < numrows; i++)
    {
-       int         i;
-       VacAttrStats *stats;
+       HeapTuple   tuple = rows[i];
+       Datum       value;
+       bool        isnull;
 
-       attp = (Form_pg_attribute) GETSTRUCT(atup);
-       if (attp->attnum <= 0)  /* skip system attributes for now */
-           continue;
+       value = heap_getattr(tuple, stats->attnum, tupDesc, &isnull);
 
-       for (i = 0; i < natts; i++)
+       /* Check for null/nonnull */
+       if (isnull)
        {
-           if (attp->attnum == vacattrstats[i].attr->attnum)
-               break;
+           null_cnt++;
+           continue;
        }
-       if (i >= natts)
-           continue;           /* skip attr if no stats collected */
-       stats = &(vacattrstats[i]);
+       nonnull_cnt++;
 
-       if (VacAttrStatsEqValid(stats))
+       /*
+        * If it's a varlena field, add up widths for average width
+        * calculation.  Note that if the value is toasted, we
+        * use the toasted width.  We don't bother with this calculation
+        * if it's a fixed-width type.
+        */
+       if (is_varlena)
        {
-           float4      selratio;       /* average ratio of rows selected
-                                        * for a random constant */
-
-           /* Compute dispersion */
-           if (stats->nonnull_cnt == 0 && stats->null_cnt == 0)
+           total_width += VARSIZE(DatumGetPointer(value));
+           /*
+            * If the value is toasted, we want to detoast it just once to
+            * avoid repeated detoastings and resultant excess memory usage
+            * during the comparisons.  Also, check to see if the value is
+            * excessively wide, and if so don't detoast at all --- just
+            * ignore the value.
+            */
+           if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
            {
-
-               /*
-                * empty relation, so put a dummy value in attdispersion
-                */
-               selratio = 0;
+               toowide_cnt++;
+               continue;
            }
-           else if (stats->null_cnt <= 1 && stats->best_cnt == 1)
-           {
+           value = PointerGetDatum(PG_DETOAST_DATUM(value));
+       }
 
-               /*
-                * looks like we have a unique-key attribute --- flag this
-                * with special -1.0 flag value.
-                *
-                * The correct dispersion is 1.0/numberOfRows, but since the
-                * relation row count can get updated without recomputing
-                * dispersion, we want to store a "symbolic" value and
-                * figure 1.0/numberOfRows on the fly.
-                */
-               selratio = -1;
-           }
-           else
+       /* Add it to the list to be sorted */
+       values[values_cnt].value = value;
+       values[values_cnt].tupno = values_cnt;
+       tupnoLink[values_cnt] = values_cnt;
+       values_cnt++;
+   }
+
+   /* We can only compute valid stats if we found some sortable values. */
+   if (values_cnt > 0)
+   {
+       int     ndistinct,      /* # distinct values in sample */
+               nmultiple,      /* # that appear multiple times */
+               num_hist,
+               dups_cnt;
+       int     slot_idx = 0;
+
+       /* Sort the collected values */
+       datumCmpFn = &f_cmpfn;
+       datumCmpFnKind = cmpFnKind;
+       datumCmpTupnoLink = tupnoLink;
+       qsort((void *) values, values_cnt,
+             sizeof(ScalarItem), compare_scalars);
+
+       /*
+        * Now scan the values in order, find the most common ones,
+        * and also accumulate ordering-correlation statistics.
+        *
+        * To determine which are most common, we first have to count the
+        * number of duplicates of each value.  The duplicates are adjacent
+        * in the sorted list, so a brute-force approach is to compare
+        * successive datum values until we find two that are not equal.
+        * However, that requires N-1 invocations of the datum comparison
+        * routine, which are completely redundant with work that was done
+        * during the sort.  (The sort algorithm must at some point have
+        * compared each pair of items that are adjacent in the sorted order;
+        * otherwise it could not know that it's ordered the pair correctly.)
+        * We exploit this by having compare_scalars remember the highest
+        * tupno index that each ScalarItem has been found equal to.  At the
+        * end of the sort, a ScalarItem's tupnoLink will still point to
+        * itself if and only if it is the last item of its group of
+        * duplicates (since the group will be ordered by tupno).
+        */
+       corr_xysum = 0;
+       ndistinct = 0;
+       nmultiple = 0;
+       dups_cnt = 0;
+       for (i = 0; i < values_cnt; i++)
+       {
+           int         tupno = values[i].tupno;
+
+           corr_xysum += (double) i * (double) tupno;
+           dups_cnt++;
+           if (tupnoLink[tupno] == tupno)
            {
-               if (VacAttrStatsLtGtValid(stats) &&
-                   stats->min_cnt + stats->max_cnt == stats->nonnull_cnt)
+               /* Reached end of duplicates of this value */
+               ndistinct++;
+               if (dups_cnt > 1)
                {
+                   nmultiple++;
+                   if (track_cnt < num_mcv ||
+                       dups_cnt > track[track_cnt-1].count)
+                   {
+                       /*
+                        * Found a new item for the mcv list; find its
+                        * position, bubbling down old items if needed.
+                        * Loop invariant is that j points at an empty/
+                        * replaceable slot.
+                        */
+                       int     j;
+
+                       if (track_cnt < num_mcv)
+                           track_cnt++;
+                       for (j = track_cnt-1; j > 0; j--)
+                       {
+                           if (dups_cnt <= track[j-1].count)
+                               break;
+                           track[j].count = track[j-1].count;
+                           track[j].first = track[j-1].first;
+                       }
+                       track[j].count = dups_cnt;
+                       track[j].first = i + 1 - dups_cnt;
+                   }
+               }
+               dups_cnt = 0;
+           }
+       }
 
-                   /*
-                    * exact result when there are just 1 or 2 values...
-                    */
-                   double      min_cnt_d = stats->min_cnt,
-                               max_cnt_d = stats->max_cnt,
-                               null_cnt_d = stats->null_cnt;
-                   double      total = ((double) stats->nonnull_cnt) + null_cnt_d;
+       stats->stats_valid = true;
+       /* Do the simple null-frac and width stats */
+       stats->stanullfrac = (double) null_cnt / (double) numrows;
+       if (is_varlena)
+           stats->stawidth = total_width / (double) nonnull_cnt;
+       else
+           stats->stawidth = stats->attrtype->typlen;
 
-                   selratio = (min_cnt_d * min_cnt_d + max_cnt_d * max_cnt_d + null_cnt_d * null_cnt_d) / (total * total);
-               }
-               else
-               {
-                   double      most = (double) (stats->best_cnt > stats->null_cnt ? stats->best_cnt : stats->null_cnt);
-                   double      total = ((double) stats->nonnull_cnt) + ((double) stats->null_cnt);
+       if (nmultiple == 0)
+       {
+           /* If we found no repeated values, assume it's a unique column */
+           stats->stadistinct = -1.0;
+       }
+       else if (toowide_cnt == 0 && nmultiple == ndistinct)
+       {
+           /*
+            * Every value in the sample appeared more than once.  Assume the
+            * column has just these values.
+            */
+           stats->stadistinct = ndistinct;
+       }
+       else
+       {
+           /*----------
+            * Estimate the number of distinct values using the estimator
+            * proposed by Chaudhuri et al (see citation above).  This is
+            *      sqrt(n/r) * max(f1,1) + f2 + f3 + ...
+            * where fk is the number of distinct values that occurred
+            * exactly k times in our sample of r rows (from a total of n).
+            * Overwidth values are assumed to have been distinct.
+            *----------
+            */
+           int     f1 = ndistinct - nmultiple + toowide_cnt;
+           double  term1;
 
-                   /*
-                    * we assume count of other values are 20% of best
-                    * count in table
-                    */
-                   selratio = (most * most + 0.20 * most * (total - most)) / (total * total);
-               }
-               /* Make sure calculated values are in-range */
-               if (selratio < 0.0)
-                   selratio = 0.0;
-               else if (selratio > 1.0)
-                   selratio = 1.0;
+           if (f1 < 1)
+               f1 = 1;
+           term1 = sqrt((double) totalrows / (double) numrows) * f1;
+           stats->stadistinct = floor(term1 + nmultiple + 0.5);
+       }
+
+       /*
+        * If we estimated the number of distinct values at more than 10%
+        * of the total row count (a very arbitrary limit), then assume
+        * that stadistinct should scale with the row count rather than be
+        * a fixed value.
+        */
+       if (stats->stadistinct > 0.1 * totalrows)
+           stats->stadistinct = - (stats->stadistinct / totalrows);
+
+       /* Generate an MCV slot entry, only if we found multiples */
+       if (nmultiple < num_mcv)
+           num_mcv = nmultiple;
+       Assert(track_cnt >= num_mcv);
+       if (num_mcv > 0)
+       {
+           MemoryContext old_context;
+           Datum  *mcv_values;
+           float4 *mcv_freqs;
+
+           /* Must copy the target values into TransactionCommandContext */
+           old_context = MemoryContextSwitchTo(TransactionCommandContext);
+           mcv_values = (Datum *) palloc(num_mcv * sizeof(Datum));
+           mcv_freqs = (float4 *) palloc(num_mcv * sizeof(float4));
+           for (i = 0; i < num_mcv; i++)
+           {
+               mcv_values[i] = datumCopy(values[track[i].first].value,
+                                         stats->attr->attbyval,
+                                         stats->attr->attlen);
+               mcv_freqs[i] = (double) track[i].count / (double) numrows;
            }
+           MemoryContextSwitchTo(old_context);
+
+           stats->stakind[slot_idx] = STATISTIC_KIND_MCV;
+           stats->staop[slot_idx] = stats->eqopr;
+           stats->stanumbers[slot_idx] = mcv_freqs;
+           stats->numnumbers[slot_idx] = num_mcv;
+           stats->stavalues[slot_idx] = mcv_values;
+           stats->numvalues[slot_idx] = num_mcv;
+           slot_idx++;
+       }
 
-           /* overwrite the existing statistics in the tuple */
-           attp->attdispersion = selratio;
+       /*
+        * Generate a histogram slot entry if there are at least two
+        * distinct values not accounted for in the MCV list.  (This
+        * ensures the histogram won't collapse to empty or a singleton.)
+        */
+       num_hist = ndistinct - num_mcv;
+       if (num_hist > stats->attr->attstattarget)
+           num_hist = stats->attr->attstattarget + 1;
+       if (num_hist >= 2)
+       {
+           MemoryContext old_context;
+           Datum  *hist_values;
+           int     nvals;
 
-           /* invalidate the tuple in the cache and write the buffer */
-           RelationInvalidateHeapTuple(ad, atup);
-           WriteNoReleaseBuffer(scan->rs_cbuf);
+           /* Sort the MCV items into position order to speed next loop */
+           qsort((void *) track, num_mcv,
+                 sizeof(ScalarMCVItem), compare_mcvs);
 
            /*
-            * Create pg_statistic tuples for the relation, if we have
-            * gathered the right data.  del_stats() previously deleted
-            * all the pg_statistic tuples for the rel, so we just have to
-            * insert new ones here.
+            * Collapse out the MCV items from the values[] array.
             *
-            * Note analyze_rel() has seen to it that we won't come here when
-            * vacuuming pg_statistic itself.
+            * Note we destroy the values[] array here... but we don't need
+            * it for anything more.  We do, however, still need values_cnt.
             */
-           if (VacAttrStatsLtGtValid(stats) && stats->initialized)
+           if (num_mcv > 0)
            {
-               float4      nullratio;
-               float4      bestratio;
-               FmgrInfo    out_function;
-               char       *out_string;
-               double      best_cnt_d = stats->best_cnt,
-                           null_cnt_d = stats->null_cnt,
-                           nonnull_cnt_d = stats->nonnull_cnt; /* prevent overflow */
-               Datum       values[Natts_pg_statistic];
-               char        nulls[Natts_pg_statistic];
-               Relation    irelations[Num_pg_statistic_indices];
+               int     src,
+                       dest;
+               int     j;
 
-               nullratio = null_cnt_d / (nonnull_cnt_d + null_cnt_d);
-               bestratio = best_cnt_d / (nonnull_cnt_d + null_cnt_d);
-
-               fmgr_info(stats->outfunc, &out_function);
+               src = dest = 0;
+               j = 0;          /* index of next interesting MCV item */
+               while (src < values_cnt)
+               {
+                   int     ncopy;
+
+                   if (j < num_mcv)
+                   {
+                       int     first = track[j].first;
+
+                       if (src >= first)
+                       {
+                           /* advance past this MCV item */
+                           src = first + track[j].count;
+                           j++;
+                           continue;
+                       }
+                       ncopy = first - src;
+                   }
+                   else
+                   {
+                       ncopy = values_cnt - src;
+                   }
+                   memmove(&values[dest], &values[src],
+                           ncopy * sizeof(ScalarItem));
+                   src += ncopy;
+                   dest += ncopy;
+               }
+               nvals = dest;
+           }
+           else
+               nvals = values_cnt;
+           Assert(nvals >= num_hist);
 
-               for (i = 0; i < Natts_pg_statistic; ++i)
-                   nulls[i] = ' ';
+           /* Must copy the target values into TransactionCommandContext */
+           old_context = MemoryContextSwitchTo(TransactionCommandContext);
+           hist_values = (Datum *) palloc(num_hist * sizeof(Datum));
+           for (i = 0; i < num_hist; i++)
+           {
+               int     pos;
 
-               /*
-                * initialize values[]
-                */
-               i = 0;
-               values[i++] = ObjectIdGetDatum(relid);  /* starelid */
-               values[i++] = Int16GetDatum(attp->attnum);      /* staattnum */
-               values[i++] = ObjectIdGetDatum(stats->op_cmplt);        /* staop */
-               values[i++] = Float4GetDatum(nullratio);        /* stanullfrac */
-               values[i++] = Float4GetDatum(bestratio);        /* stacommonfrac */
-               out_string = DatumGetCString(FunctionCall3(&out_function,
-                                                          stats->best,
-                                       ObjectIdGetDatum(stats->typelem),
-                                Int32GetDatum(stats->attr->atttypmod)));
-               values[i++] = DirectFunctionCall1(textin,       /* stacommonval */
-                                           CStringGetDatum(out_string));
-               pfree(out_string);
-               out_string = DatumGetCString(FunctionCall3(&out_function,
-                                                          stats->min,
-                                       ObjectIdGetDatum(stats->typelem),
-                                Int32GetDatum(stats->attr->atttypmod)));
-               values[i++] = DirectFunctionCall1(textin,       /* staloval */
-                                           CStringGetDatum(out_string));
-               pfree(out_string);
-               out_string = DatumGetCString(FunctionCall3(&out_function,
-                                                          stats->max,
-                                       ObjectIdGetDatum(stats->typelem),
-                                Int32GetDatum(stats->attr->atttypmod)));
-               values[i++] = DirectFunctionCall1(textin,       /* stahival */
-                                           CStringGetDatum(out_string));
-               pfree(out_string);
-
-               stup = heap_formtuple(sd->rd_att, values, nulls);
-
-               /* store tuple and update indexes too */
-               heap_insert(sd, stup);
-
-               CatalogOpenIndices(Num_pg_statistic_indices, Name_pg_statistic_indices, irelations);
-               CatalogIndexInsert(irelations, Num_pg_statistic_indices, sd, stup);
-               CatalogCloseIndices(Num_pg_statistic_indices, irelations);
-
-               /* release allocated space */
-               pfree(DatumGetPointer(values[Anum_pg_statistic_stacommonval - 1]));
-               pfree(DatumGetPointer(values[Anum_pg_statistic_staloval - 1]));
-               pfree(DatumGetPointer(values[Anum_pg_statistic_stahival - 1]));
-               heap_freetuple(stup);
+               pos = (i * (nvals - 1)) / (num_hist - 1);
+               hist_values[i] = datumCopy(values[pos].value,
+                                          stats->attr->attbyval,
+                                          stats->attr->attlen);
            }
+           MemoryContextSwitchTo(old_context);
+
+           stats->stakind[slot_idx] = STATISTIC_KIND_HISTOGRAM;
+           stats->staop[slot_idx] = stats->ltopr;
+           stats->stavalues[slot_idx] = hist_values;
+           stats->numvalues[slot_idx] = num_hist;
+           slot_idx++;
+       }
+
+       /* Generate a correlation entry if there are multiple values */
+       if (values_cnt > 1)
+       {
+           MemoryContext old_context;
+           float4 *corrs;
+           double  corr_xsum,
+                   corr_x2sum;
+
+           /* Must copy the target values into TransactionCommandContext */
+           old_context = MemoryContextSwitchTo(TransactionCommandContext);
+           corrs = (float4 *) palloc(sizeof(float4));
+           MemoryContextSwitchTo(old_context);
+
+           /*----------
+            * Since we know the x and y value sets are both
+            *      0, 1, ..., values_cnt-1
+            * we have sum(x) = sum(y) =
+            *      (values_cnt-1)*values_cnt / 2
+            * and sum(x^2) = sum(y^2) =
+            *      (values_cnt-1)*values_cnt*(2*values_cnt-1) / 6.
+            *----------
+            */
+           corr_xsum = (double) (values_cnt-1) * (double) values_cnt / 2.0;
+           corr_x2sum = (double) (values_cnt-1) * (double) values_cnt *
+               (double) (2*values_cnt-1) / 6.0;
+           /* And the correlation coefficient reduces to */
+           corrs[0] = (values_cnt * corr_xysum - corr_xsum * corr_xsum) /
+               (values_cnt * corr_x2sum - corr_xsum * corr_xsum);
+
+           stats->stakind[slot_idx] = STATISTIC_KIND_CORRELATION;
+           stats->staop[slot_idx] = stats->ltopr;
+           stats->stanumbers[slot_idx] = corrs;
+           stats->numnumbers[slot_idx] = 1;
+           slot_idx++;
        }
    }
-   heap_endscan(scan);
-   /* close rels, but hold locks till upcoming commit */
-   heap_close(ad, NoLock);
-   heap_close(sd, NoLock);
+
+   /* We don't need to bother cleaning up any of our temporary palloc's */
 }
 
 /*
- * del_stats() -- delete pg_statistic rows for a relation
+ * qsort comparator for sorting ScalarItems
  *
- * If a list of attribute numbers is given, only zap stats for those attrs.
+ * Aside from sorting the items, we update the datumCmpTupnoLink[] array
+ * whenever two ScalarItems are found to contain equal datums.  The array
+ * is indexed by tupno; for each ScalarItem, it contains the highest
+ * tupno that that item's datum has been found to be equal to.  This allows
+ * us to avoid additional comparisons in compute_scalar_stats().
  */
-static void
-del_stats(Oid relid, int attcnt, int *attnums)
+static int
+compare_scalars(const void *a, const void *b)
 {
-   Relation    pgstatistic;
-   HeapScanDesc scan;
-   HeapTuple   tuple;
-   ScanKeyData key;
+   Datum       da = ((ScalarItem *) a)->value;
+   int         ta = ((ScalarItem *) a)->tupno;
+   Datum       db = ((ScalarItem *) b)->value;
+   int         tb = ((ScalarItem *) b)->tupno;
 
-   pgstatistic = heap_openr(StatisticRelationName, RowExclusiveLock);
+   if (datumCmpFnKind == SORTFUNC_LT)
+   {
+       if (DatumGetBool(FunctionCall2(datumCmpFn, da, db)))
+           return -1;          /* a < b */
+       if (DatumGetBool(FunctionCall2(datumCmpFn, db, da)))
+           return 1;           /* a > b */
+   }
+   else
+   {
+       /* sort function is CMP or REVCMP */
+       int32   compare;
 
-   ScanKeyEntryInitialize(&key, 0x0, Anum_pg_statistic_starelid,
-                          F_OIDEQ, ObjectIdGetDatum(relid));
-   scan = heap_beginscan(pgstatistic, false, SnapshotNow, 1, &key);
+       compare = DatumGetInt32(FunctionCall2(datumCmpFn, da, db));
+       if (compare != 0)
+       {
+           if (datumCmpFnKind == SORTFUNC_REVCMP)
+               compare = -compare;
+           return compare;
+       }
+   }
 
-   while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
+   /*
+    * The two datums are equal, so update datumCmpTupnoLink[].
+    */
+   if (datumCmpTupnoLink[ta] < tb)
+       datumCmpTupnoLink[ta] = tb;
+   if (datumCmpTupnoLink[tb] < ta)
+       datumCmpTupnoLink[tb] = ta;
+
+   /*
+    * For equal datums, sort by tupno
+    */
+   return ta - tb;
+}
+
+/*
+ * qsort comparator for sorting ScalarMCVItems by position
+ */
+static int
+compare_mcvs(const void *a, const void *b)
+{
+   int         da = ((ScalarMCVItem *) a)->first;
+   int         db = ((ScalarMCVItem *) b)->first;
+
+   return da - db;
+}
+
+
+/*
+ * update_attstats() -- update attribute statistics for one relation
+ *
+ *     Statistics are stored in several places: the pg_class row for the
+ *     relation has stats about the whole relation, and there is a
+ *     pg_statistic row for each (non-system) attribute that has ever
+ *     been analyzed.  The pg_class values are updated by VACUUM, not here.
+ *
+ *     pg_statistic rows are just added or updated normally.  This means
+ *     that pg_statistic will probably contain some deleted rows at the
+ *     completion of a vacuum cycle, unless it happens to get vacuumed last.
+ *
+ *     To keep things simple, we punt for pg_statistic, and don't try
+ *     to compute or store rows for pg_statistic itself in pg_statistic.
+ *     This could possibly be made to work, but it's not worth the trouble.
+ *     Note analyze_rel() has seen to it that we won't come here when
+ *     vacuuming pg_statistic itself.
+ */
+static void
+update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats)
+{
+   Relation    sd;
+   int         attno;
+
+   /*
+    * We use an ExclusiveLock on pg_statistic to ensure that only one
+    * backend is writing it at a time --- without that, we might have to
+    * deal with concurrent updates here, and it's not worth the trouble.
+    */
+   sd = heap_openr(StatisticRelationName, ExclusiveLock);
+
+   for (attno = 0; attno < natts; attno++)
    {
-       if (attcnt > 0)
+       VacAttrStats *stats = vacattrstats[attno];
+       FmgrInfo    out_function;
+       HeapTuple   stup,
+                   oldtup;
+       int         i, k, n;
+       Datum       values[Natts_pg_statistic];
+       char        nulls[Natts_pg_statistic];
+       char        replaces[Natts_pg_statistic];
+       Relation    irelations[Num_pg_statistic_indices];
+
+       /* Ignore attr if we weren't able to collect stats */
+       if (!stats->stats_valid)
+           continue;
+
+       fmgr_info(stats->attrtype->typoutput, &out_function);
+
+       /*
+        * Construct a new pg_statistic tuple
+        */
+       for (i = 0; i < Natts_pg_statistic; ++i)
        {
-           Form_pg_statistic pgs = (Form_pg_statistic) GETSTRUCT(tuple);
-           int         i;
+           nulls[i] = ' ';
+           replaces[i] = 'r';
+       }
 
-           for (i = 0; i < attcnt; i++)
+       i = 0;
+       values[i++] = ObjectIdGetDatum(relid); /* starelid */
+       values[i++] = Int16GetDatum(stats->attnum); /* staattnum */
+       values[i++] = Float4GetDatum(stats->stanullfrac); /* stanullfrac */
+       values[i++] = Int32GetDatum(stats->stawidth); /* stawidth */
+       values[i++] = Float4GetDatum(stats->stadistinct); /* stadistinct */
+       for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+       {
+           values[i++] = Int16GetDatum(stats->stakind[k]); /* stakindN */
+       }
+       for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+       {
+           values[i++] = ObjectIdGetDatum(stats->staop[k]); /* staopN */
+       }
+       for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+       {
+           int     nnum = stats->numnumbers[k];
+
+           if (nnum > 0)
            {
-               if (pgs->staattnum == attnums[i] + 1)
-                   break;
+               Datum      *numdatums = (Datum *) palloc(nnum * sizeof(Datum));
+               ArrayType  *arry;
+
+               for (n = 0; n < nnum; n++)
+                   numdatums[n] = Float4GetDatum(stats->stanumbers[k][n]);
+               /* XXX knows more than it should about type float4: */
+               arry = construct_array(numdatums, nnum,
+                                      false, sizeof(float4), 'i');
+               values[i++] = PointerGetDatum(arry); /* stanumbersN */
+           }
+           else
+           {
+               nulls[i] = 'n';
+               values[i++] = (Datum) 0;
            }
-           if (i >= attcnt)
-               continue;       /* don't delete it */
        }
-       simple_heap_delete(pgstatistic, &tuple->t_self);
-   }
+       for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+       {
+           int     ntxt = stats->numvalues[k];
 
-   heap_endscan(scan);
+           if (ntxt > 0)
+           {
+               Datum      *txtdatums = (Datum *) palloc(ntxt * sizeof(Datum));
+               ArrayType  *arry;
 
-   /*
-    * Close rel, but *keep* lock; we will need to reacquire it later, so
-    * there's a possibility of deadlock against another VACUUM process if
-    * we let go now.  Keeping the lock shouldn't delay any common
-    * operation other than an attempted VACUUM of pg_statistic itself.
-    */
-   heap_close(pgstatistic, NoLock);
+               for (n = 0; n < ntxt; n++)
+               {
+                   /*
+                    * Convert data values to a text string to be inserted
+                    * into the text array.
+                    */
+                   Datum   stringdatum;
+
+                   stringdatum =
+                       FunctionCall3(&out_function,
+                                     stats->stavalues[k][n],
+                                     ObjectIdGetDatum(stats->attrtype->typelem),
+                                     Int32GetDatum(stats->attr->atttypmod));
+                   txtdatums[n] = DirectFunctionCall1(textin, stringdatum);
+                   pfree(DatumGetPointer(stringdatum));
+               }
+               /* XXX knows more than it should about type text: */
+               arry = construct_array(txtdatums, ntxt,
+                                      false, -1, 'i');
+               values[i++] = PointerGetDatum(arry); /* stavaluesN */
+           }
+           else
+           {
+               nulls[i] = 'n';
+               values[i++] = (Datum) 0;
+           }
+       }
+
+       /* Is there already a pg_statistic tuple for this attribute? */
+       oldtup = SearchSysCache(STATRELATT,
+                               ObjectIdGetDatum(relid),
+                               Int16GetDatum(stats->attnum),
+                               0, 0);
+
+       if (HeapTupleIsValid(oldtup))
+       {
+           /* Yes, replace it */
+           stup = heap_modifytuple(oldtup,
+                                   sd,
+                                   values,
+                                   nulls,
+                                   replaces);
+           ReleaseSysCache(oldtup);
+           simple_heap_update(sd, &stup->t_self, stup);
+       }
+       else
+       {
+           /* No, insert new tuple */
+           stup = heap_formtuple(sd->rd_att, values, nulls);
+           heap_insert(sd, stup);
+       }
+
+       /* update indices too */
+       CatalogOpenIndices(Num_pg_statistic_indices, Name_pg_statistic_indices,
+                          irelations);
+       CatalogIndexInsert(irelations, Num_pg_statistic_indices, sd, stup);
+       CatalogCloseIndices(Num_pg_statistic_indices, irelations);
+
+       heap_freetuple(stup);
+   }
+
+   /* close rel, but hold lock till upcoming commit */
+   heap_close(sd, NoLock);
 }


diff --git a/src/backend/commands/command.c b/src/backend/commands/command.c

index 96d493688e328aaa1fc4bf56bc12e18865f2ee33..13a78f1177390f0108702c94a7cc005e0a28b183 100644 (file)


--- a/src/backend/commands/command.c
+++ b/src/backend/commands/command.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/commands/Attic/command.c,v 1.125 2001/03/23 04:49:52 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/commands/Attic/command.c,v 1.126 2001/05/07 00:43:17 tgl Exp $
  *
  * NOTES
  *   The PerformAddAttribute() code, like most of the relation
@@ -56,6 +56,7 @@
 #include "access/genam.h"
 
 
+static void drop_default(Oid relid, int16 attnum);
 static bool needs_toast_table(Relation rel);
 static bool is_relation(char *name);
 
@@ -408,7 +409,7 @@ AlterTableAddColumn(const char *relationName,
        HeapTuple   typeTuple;
        Form_pg_type tform;
        char       *typename;
-       int         attnelems;
+       int         attndims;
 
        if (SearchSysCacheExists(ATTNAME,
                                 ObjectIdGetDatum(reltup->t_data->t_oid),
@@ -425,11 +426,11 @@ AlterTableAddColumn(const char *relationName,
 
        if (colDef->typename->arrayBounds)
        {
-           attnelems = length(colDef->typename->arrayBounds);
+           attndims = length(colDef->typename->arrayBounds);
            typename = makeArrayTypeName(colDef->typename->name);
        }
        else
-           attnelems = 0;
+           attndims = 0;
 
        typeTuple = SearchSysCache(TYPENAME,
                                   PointerGetDatum(typename),
@@ -441,12 +442,12 @@ AlterTableAddColumn(const char *relationName,
        namestrcpy(&(attribute->attname), colDef->colname);
        attribute->atttypid = typeTuple->t_data->t_oid;
        attribute->attlen = tform->typlen;
-       attribute->attdispersion = 0;
+       attribute->attstattarget = DEFAULT_ATTSTATTARGET;
        attribute->attcacheoff = -1;
        attribute->atttypmod = colDef->typename->typmod;
        attribute->attnum = i;
        attribute->attbyval = tform->typbyval;
-       attribute->attnelems = attnelems;
+       attribute->attndims = attndims;
        attribute->attisset = (bool) (tform->typtype == 'c');
        attribute->attstorage = tform->typstorage;
        attribute->attalign = tform->typalign;
@@ -496,17 +497,13 @@ AlterTableAddColumn(const char *relationName,
 }
 
 
-
-static void drop_default(Oid relid, int16 attnum);
-
-
 /*
  * ALTER TABLE ALTER COLUMN SET/DROP DEFAULT
  */
 void
-AlterTableAlterColumn(const char *relationName,
-                     bool inh, const char *colName,
-                     Node *newDefault)
+AlterTableAlterColumnDefault(const char *relationName,
+                            bool inh, const char *colName,
+                            Node *newDefault)
 {
    Relation    rel;
    HeapTuple   tuple;
@@ -551,8 +548,8 @@ AlterTableAlterColumn(const char *relationName,
            if (childrelid == myrelid)
                continue;
            rel = heap_open(childrelid, AccessExclusiveLock);
-           AlterTableAlterColumn(RelationGetRelationName(rel),
-                                 false, colName, newDefault);
+           AlterTableAlterColumnDefault(RelationGetRelationName(rel),
+                                        false, colName, newDefault);
            heap_close(rel, AccessExclusiveLock);
        }
    }
@@ -560,7 +557,7 @@ AlterTableAlterColumn(const char *relationName,
    /* -= now do the thing on this relation =- */
 
    /* reopen the business */
-   rel = heap_openr((char *) relationName, AccessExclusiveLock);
+   rel = heap_openr(relationName, AccessExclusiveLock);
 
    /*
     * get the number of the attribute
@@ -647,7 +644,6 @@ AlterTableAlterColumn(const char *relationName,
 }
 
 
-
 static void
 drop_default(Oid relid, int16 attnum)
 {
@@ -675,6 +671,104 @@ drop_default(Oid relid, int16 attnum)
 }
 
 
+/*
+ * ALTER TABLE ALTER COLUMN SET STATISTICS
+ */
+void
+AlterTableAlterColumnStatistics(const char *relationName,
+                               bool inh, const char *colName,
+                               Node *statsTarget)
+{
+   Relation    rel;
+   Oid         myrelid;
+   int         newtarget;
+   Relation    attrelation;
+   HeapTuple   tuple;
+
+#ifndef NO_SECURITY
+   if (!pg_ownercheck(GetUserId(), relationName, RELNAME))
+       elog(ERROR, "ALTER TABLE: permission denied");
+#endif
+
+   rel = heap_openr(relationName, AccessExclusiveLock);
+   if (rel->rd_rel->relkind != RELKIND_RELATION)
+       elog(ERROR, "ALTER TABLE: relation \"%s\" is not a table",
+            relationName);
+   myrelid = RelationGetRelid(rel);
+   heap_close(rel, NoLock);    /* close rel, but keep lock! */
+
+   /*
+    * Propagate to children if desired
+    */
+   if (inh)
+   {
+       List       *child,
+                  *children;
+
+       /* this routine is actually in the planner */
+       children = find_all_inheritors(myrelid);
+
+       /*
+        * find_all_inheritors does the recursive search of the
+        * inheritance hierarchy, so all we have to do is process all of
+        * the relids in the list that it returns.
+        */
+       foreach(child, children)
+       {
+           Oid         childrelid = lfirsti(child);
+
+           if (childrelid == myrelid)
+               continue;
+           rel = heap_open(childrelid, AccessExclusiveLock);
+           AlterTableAlterColumnStatistics(RelationGetRelationName(rel),
+                                           false, colName, statsTarget);
+           heap_close(rel, AccessExclusiveLock);
+       }
+   }
+
+   /* -= now do the thing on this relation =- */
+
+   Assert(IsA(statsTarget, Integer));
+   newtarget = intVal(statsTarget);
+
+   /* Limit target to sane range (should we raise an error instead?) */
+   if (newtarget < 0)
+       newtarget = 0;
+   else if (newtarget > 1000)
+       newtarget = 1000;
+
+   attrelation = heap_openr(AttributeRelationName, RowExclusiveLock);
+
+   tuple = SearchSysCacheCopy(ATTNAME,
+                              ObjectIdGetDatum(myrelid),
+                              PointerGetDatum(colName),
+                              0, 0);
+   if (!HeapTupleIsValid(tuple))
+       elog(ERROR, "ALTER TABLE: relation \"%s\" has no column \"%s\"",
+            relationName, colName);
+
+   if (((Form_pg_attribute) GETSTRUCT(tuple))->attnum < 0)
+       elog(ERROR, "ALTER TABLE: cannot change system attribute \"%s\"",
+            colName);
+
+   ((Form_pg_attribute) GETSTRUCT(tuple))->attstattarget = newtarget;
+
+   simple_heap_update(attrelation, &tuple->t_self, tuple);
+
+   /* keep system catalog indices current */
+   {
+       Relation    irelations[Num_pg_attr_indices];
+
+       CatalogOpenIndices(Num_pg_attr_indices, Name_pg_attr_indices, irelations);
+       CatalogIndexInsert(irelations, Num_pg_attr_indices, attrelation, tuple);
+       CatalogCloseIndices(Num_pg_attr_indices, irelations);
+   }
+
+   heap_freetuple(tuple);
+   heap_close(attrelation, RowExclusiveLock);
+}
+
+
 #ifdef _DROP_COLUMN_HACK__
 /*
  * ALTER TABLE DROP COLUMN trial implementation


diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c

index 694d0e8bbc1491c39827ff833f8cc3fb68906e9e..9a0dbdc8c8e15c0b261068728c7d38546e3aa07c 100644 (file)


--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.189 2001/03/25 23:23:58 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.190 2001/05/07 00:43:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -53,25 +53,90 @@ extern XLogRecPtr log_heap_move(Relation reln,
              Buffer oldbuf, ItemPointerData from,
              Buffer newbuf, HeapTuple newtup);
 
+
+typedef struct VRelListData
+{
+   Oid         vrl_relid;
+   struct VRelListData *vrl_next;
+} VRelListData;
+
+typedef VRelListData *VRelList;
+
+typedef struct VacPageData
+{
+   BlockNumber blkno;          /* BlockNumber of this Page */
+   Size        free;           /* FreeSpace on this Page */
+   uint16      offsets_used;   /* Number of OffNums used by vacuum */
+   uint16      offsets_free;   /* Number of OffNums free or to be free */
+   OffsetNumber offsets[1];    /* Array of its OffNums */
+} VacPageData;
+
+typedef VacPageData *VacPage;
+
+typedef struct VacPageListData
+{
+   int         empty_end_pages;/* Number of "empty" end-pages */
+   int         num_pages;      /* Number of pages in pagedesc */
+   int         num_allocated_pages;    /* Number of allocated pages in
+                                        * pagedesc */
+   VacPage    *pagedesc;       /* Descriptions of pages */
+} VacPageListData;
+
+typedef VacPageListData *VacPageList;
+
+typedef struct VTupleLinkData
+{
+   ItemPointerData new_tid;
+   ItemPointerData this_tid;
+} VTupleLinkData;
+
+typedef VTupleLinkData *VTupleLink;
+
+typedef struct VTupleMoveData
+{
+   ItemPointerData tid;        /* tuple ID */
+   VacPage     vacpage;        /* where to move */
+   bool        cleanVpd;       /* clean vacpage before using */
+} VTupleMoveData;
+
+typedef VTupleMoveData *VTupleMove;
+
+typedef struct VRelStats
+{
+   Oid         relid;
+   long        num_pages;
+   long        num_tuples;
+   Size        min_tlen;
+   Size        max_tlen;
+   bool        hasindex;
+   int         num_vtlinks;
+   VTupleLink  vtlinks;
+} VRelStats;
+
+
 static MemoryContext vac_context = NULL;
 
 static int MESSAGE_LEVEL;      /* message level */
 
 static TransactionId XmaxRecent;
 
+
 /* non-export function prototypes */
 static void vacuum_init(void);
 static void vacuum_shutdown(void);
-static void vac_vacuum(NameData *VacRelP, bool analyze, List *anal_cols2);
-static VRelList getrels(NameData *VacRelP);
+static VRelList getrels(Name VacRelP, const char *stmttype);
 static void vacuum_rel(Oid relid);
-static void scan_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages, VacPageList fraged_pages);
-static void repair_frag(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages, VacPageList fraged_pages, int nindices, Relation *Irel);
-static void vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacpagelist);
+static void scan_heap(VRelStats *vacrelstats, Relation onerel,
+                     VacPageList vacuum_pages, VacPageList fraged_pages);
+static void repair_frag(VRelStats *vacrelstats, Relation onerel,
+                       VacPageList vacuum_pages, VacPageList fraged_pages,
+                       int nindices, Relation *Irel);
+static void vacuum_heap(VRelStats *vacrelstats, Relation onerel,
+                       VacPageList vacpagelist);
 static void vacuum_page(Relation onerel, Buffer buffer, VacPage vacpage);
-static void vacuum_index(VacPageList vacpagelist, Relation indrel, int num_tuples, int keep_tuples);
-static void scan_index(Relation indrel, int num_tuples);
-static void update_relstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats *vacrelstats);
+static void vacuum_index(VacPageList vacpagelist, Relation indrel,
+                        long num_tuples, int keep_tuples);
+static void scan_index(Relation indrel, long num_tuples);
 static VacPage tid_reaped(ItemPointer itemptr, VacPageList vacpagelist);
 static void reap_page(VacPageList vacpagelist, VacPage vacpage);
 static void vpage_insert(VacPageList vacpagelist, VacPage vpnew);
@@ -88,17 +153,17 @@ static bool enough_space(VacPage vacpage, Size len);
 static char *show_rusage(struct rusage * ru0);
 
 
+/*
+ * Primary entry point for VACUUM and ANALYZE commands.
+ */
 void
-vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols)
+vacuum(VacuumStmt *vacstmt)
 {
+   const char *stmttype = vacstmt->vacuum ? "VACUUM" : "ANALYZE";
    NameData    VacRel;
    Name        VacRelName;
-   MemoryContext old;
-   List       *le;
-   List       *anal_cols2 = NIL;
-
-   if (anal_cols != NIL && !analyze)
-       elog(ERROR, "Can't vacuum columns, only tables.  You can 'vacuum analyze' columns.");
+   VRelList    vrl,
+               cur;
 
    /*
     * We cannot run VACUUM inside a user transaction block; if we were
@@ -110,9 +175,9 @@ vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols)
     * behavior.
     */
    if (IsTransactionBlock())
-       elog(ERROR, "VACUUM cannot run inside a BEGIN/END block");
+       elog(ERROR, "%s cannot run inside a BEGIN/END block", stmttype);
 
-   if (verbose)
+   if (vacstmt->verbose)
        MESSAGE_LEVEL = NOTICE;
    else
        MESSAGE_LEVEL = DEBUG;
@@ -130,37 +195,36 @@ vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols)
                                        ALLOCSET_DEFAULT_INITSIZE,
                                        ALLOCSET_DEFAULT_MAXSIZE);
 
-   /* vacrel gets de-allocated on xact commit, so copy it to safe storage */
-   if (vacrel)
+   /* Convert vacrel, which is just a string, to a Name */
+   if (vacstmt->vacrel)
    {
-       namestrcpy(&VacRel, vacrel);
+       namestrcpy(&VacRel, vacstmt->vacrel);
        VacRelName = &VacRel;
    }
    else
        VacRelName = NULL;
 
-   /* must also copy the column list, if any, to safe storage */
-   old = MemoryContextSwitchTo(vac_context);
-   foreach(le, anal_cols)
-   {
-       char       *col = (char *) lfirst(le);
-
-       anal_cols2 = lappend(anal_cols2, pstrdup(col));
-   }
-   MemoryContextSwitchTo(old);
+   /* Build list of relations to process (note this lives in vac_context) */
+   vrl = getrels(VacRelName, stmttype);
 
    /*
     * Start up the vacuum cleaner.
-    *
-    * NOTE: since this commits the current transaction, the memory holding
-    * any passed-in parameters gets freed here.  We must have already
-    * copied pass-by-reference parameters to safe storage.  Don't make me
-    * fix this again!
     */
    vacuum_init();
 
-   /* vacuum the database */
-   vac_vacuum(VacRelName, analyze, anal_cols2);
+   /*
+    * Process each selected relation.  We are careful to process
+    * each relation in a separate transaction in order to avoid holding
+    * too many locks at one time.
+    */
+   for (cur = vrl; cur != (VRelList) NULL; cur = cur->vrl_next)
+   {
+       if (vacstmt->vacuum)
+           vacuum_rel(cur->vrl_relid);
+       /* analyze separately so locking is minimized */
+       if (vacstmt->analyze)
+           analyze_rel(cur->vrl_relid, vacstmt);
+   }
 
    /* clean up */
    vacuum_shutdown();
@@ -187,14 +251,14 @@ vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols)
  *     PostgresMain().
  */
 static void
-vacuum_init()
+vacuum_init(void)
 {
    /* matches the StartTransaction in PostgresMain() */
    CommitTransactionCommand();
 }
 
 static void
-vacuum_shutdown()
+vacuum_shutdown(void)
 {
    /* on entry, we are not in a transaction */
 
@@ -223,34 +287,10 @@ vacuum_shutdown()
 }
 
 /*
- * vac_vacuum() -- vacuum the database.
- *
- *     This routine builds a list of relations to vacuum, and then calls
- *     code that vacuums them one at a time.  We are careful to vacuum each
- *     relation in a separate transaction in order to avoid holding too many
- *     locks at one time.
+ * Build a list of VRelListData nodes for each relation to be processed
  */
-static void
-vac_vacuum(NameData *VacRelP, bool analyze, List *anal_cols2)
-{
-   VRelList    vrl,
-               cur;
-
-   /* get list of relations */
-   vrl = getrels(VacRelP);
-
-   /* vacuum each heap relation */
-   for (cur = vrl; cur != (VRelList) NULL; cur = cur->vrl_next)
-   {
-       vacuum_rel(cur->vrl_relid);
-       /* analyze separately so locking is minimized */
-       if (analyze)
-           analyze_rel(cur->vrl_relid, anal_cols2, MESSAGE_LEVEL);
-   }
-}
-
 static VRelList
-getrels(NameData *VacRelP)
+getrels(Name VacRelP, const char *stmttype)
 {
    Relation    rel;
    TupleDesc   tupdesc;
@@ -262,12 +302,9 @@ getrels(NameData *VacRelP)
    char       *rname;
    char        rkind;
    bool        n;
-   bool        found = false;
    ScanKeyData key;
 
-   StartTransactionCommand();
-
-   if (NameStr(*VacRelP))
+   if (VacRelP)
    {
 
        /*
@@ -287,6 +324,7 @@ getrels(NameData *VacRelP)
    }
    else
    {
+       /* find all relations listed in pg_class */
        ScanKeyEntryInitialize(&key, 0x0, Anum_pg_class_relkind,
                               F_CHAREQ, CharGetDatum('r'));
    }
@@ -300,21 +338,20 @@ getrels(NameData *VacRelP)
 
    while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
    {
-       found = true;
-
        d = heap_getattr(tuple, Anum_pg_class_relname, tupdesc, &n);
-       rname = (char *) DatumGetPointer(d);
+       rname = (char *) DatumGetName(d);
 
        d = heap_getattr(tuple, Anum_pg_class_relkind, tupdesc, &n);
        rkind = DatumGetChar(d);
 
        if (rkind != RELKIND_RELATION)
        {
-           elog(NOTICE, "Vacuum: can not process indices, views and certain system tables");
+           elog(NOTICE, "%s: can not process indexes, views or special system tables",
+                stmttype);
            continue;
        }
 
-       /* get a relation list entry for this guy */
+       /* Make a relation list entry for this guy */
        if (vrl == (VRelList) NULL)
            vrl = cur = (VRelList)
                MemoryContextAlloc(vac_context, sizeof(VRelListData));
@@ -332,10 +369,8 @@ getrels(NameData *VacRelP)
    heap_endscan(scan);
    heap_close(rel, AccessShareLock);
 
-   if (!found)
-       elog(NOTICE, "Vacuum: table not found");
-
-   CommitTransactionCommand();
+   if (vrl == NULL)
+       elog(NOTICE, "%s: table not found", stmttype);
 
    return vrl;
 }
@@ -432,7 +467,8 @@ vacuum_rel(Oid relid)
     */
    vacrelstats = (VRelStats *) palloc(sizeof(VRelStats));
    vacrelstats->relid = relid;
-   vacrelstats->num_pages = vacrelstats->num_tuples = 0;
+   vacrelstats->num_pages = 0;
+   vacrelstats->num_tuples = 0;
    vacrelstats->hasindex = false;
 
    GetXmaxRecent(&XmaxRecent);
@@ -457,8 +493,8 @@ vacuum_rel(Oid relid)
        vacrelstats->hasindex = true;
    else
        vacrelstats->hasindex = false;
-#ifdef NOT_USED
 
+#ifdef NOT_USED
    /*
     * reindex in VACUUM is dangerous under WAL. ifdef out until it
     * becomes safe.
@@ -528,9 +564,8 @@ vacuum_rel(Oid relid)
    heap_close(onerel, NoLock);
 
    /* update statistics in pg_class */
-   update_relstats(vacrelstats->relid, vacrelstats->num_pages,
-                   vacrelstats->num_tuples, vacrelstats->hasindex,
-                   vacrelstats);
+   vac_update_relstats(vacrelstats->relid, vacrelstats->num_pages,
+                       vacrelstats->num_tuples, vacrelstats->hasindex);
 
    /*
     * Complete the transaction and free all temporary memory used.
@@ -582,8 +617,8 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
    char       *relname;
    VacPage     vacpage,
                vp;
+   long        num_tuples;
    uint32      tups_vacuumed,
-               num_tuples,
                nkeep,
                nunused,
                ncrash,
@@ -913,7 +948,6 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
    /* save stats in the rel list for use later */
    vacrelstats->num_tuples = num_tuples;
    vacrelstats->num_pages = nblocks;
-/*   vacrelstats->natts = attr_cnt;*/
    if (num_tuples == 0)
        min_tlen = max_tlen = 0;
    vacrelstats->min_tlen = min_tlen;
@@ -960,7 +994,7 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
    }
 
    elog(MESSAGE_LEVEL, "Pages %u: Changed %u, reaped %u, Empty %u, New %u; \
-Tup %u: Vac %u, Keep/VTL %u/%u, Crash %u, UnUsed %u, MinLen %lu, MaxLen %lu; \
+Tup %lu: Vac %u, Keep/VTL %u/%u, Crash %u, UnUsed %u, MinLen %lu, MaxLen %lu; \
 Re-using: Free/Avail. Space %lu/%lu; EndEmpty/Avail. Pages %u/%u. %s",
         nblocks, changed_pages, vacuum_pages->num_pages, empty_pages,
         new_pages, num_tuples, tups_vacuumed,
@@ -2009,7 +2043,7 @@ vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages)
 {
    Buffer      buf;
    VacPage    *vacpage;
-   int         nblocks;
+   long        nblocks;
    int         i;
 
    nblocks = vacuum_pages->num_pages;
@@ -2044,7 +2078,7 @@ vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages)
    /* truncate relation if there are some empty end-pages */
    if (vacuum_pages->empty_end_pages > 0)
    {
-       elog(MESSAGE_LEVEL, "Rel %s: Pages: %u --> %u.",
+       elog(MESSAGE_LEVEL, "Rel %s: Pages: %lu --> %lu.",
             RelationGetRelationName(onerel),
             vacrelstats->num_pages, nblocks);
        nblocks = smgrtruncate(DEFAULT_SMGR, onerel, nblocks);
@@ -2094,11 +2128,11 @@ vacuum_page(Relation onerel, Buffer buffer, VacPage vacpage)
  *
  */
 static void
-scan_index(Relation indrel, int num_tuples)
+scan_index(Relation indrel, long num_tuples)
 {
    RetrieveIndexResult res;
    IndexScanDesc iscan;
-   int         nitups;
+   long        nitups;
    int         nipages;
    struct rusage ru0;
 
@@ -2119,14 +2153,14 @@ scan_index(Relation indrel, int num_tuples)
 
    /* now update statistics in pg_class */
    nipages = RelationGetNumberOfBlocks(indrel);
-   update_relstats(RelationGetRelid(indrel), nipages, nitups, false, NULL);
+   vac_update_relstats(RelationGetRelid(indrel), nipages, nitups, false);
 
-   elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %u. %s",
+   elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %lu. %s",
         RelationGetRelationName(indrel), nipages, nitups,
         show_rusage(&ru0));
 
    if (nitups != num_tuples)
-       elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%u) IS NOT THE SAME AS HEAP' (%u).\
+       elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%lu) IS NOT THE SAME AS HEAP' (%lu).\
 \n\tRecreate the index.",
             RelationGetRelationName(indrel), nitups, num_tuples);
 
@@ -2145,13 +2179,14 @@ scan_index(Relation indrel, int num_tuples)
  *     pg_class.
  */
 static void
-vacuum_index(VacPageList vacpagelist, Relation indrel, int num_tuples, int keep_tuples)
+vacuum_index(VacPageList vacpagelist, Relation indrel,
+            long num_tuples, int keep_tuples)
 {
    RetrieveIndexResult res;
    IndexScanDesc iscan;
    ItemPointer heapptr;
    int         tups_vacuumed;
-   int         num_index_tuples;
+   long        num_index_tuples;
    int         num_pages;
    VacPage     vp;
    struct rusage ru0;
@@ -2196,15 +2231,16 @@ vacuum_index(VacPageList vacpagelist, Relation indrel, int num_tuples, int keep_
 
    /* now update statistics in pg_class */
    num_pages = RelationGetNumberOfBlocks(indrel);
-   update_relstats(RelationGetRelid(indrel), num_pages, num_index_tuples, false, NULL);
+   vac_update_relstats(RelationGetRelid(indrel),
+                       num_pages, num_index_tuples, false);
 
-   elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %u: Deleted %u. %s",
+   elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %lu: Deleted %u. %s",
         RelationGetRelationName(indrel), num_pages,
         num_index_tuples - keep_tuples, tups_vacuumed,
         show_rusage(&ru0));
 
    if (num_index_tuples != num_tuples + keep_tuples)
-       elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%u) IS NOT THE SAME AS HEAP' (%u).\
+       elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%lu) IS NOT THE SAME AS HEAP' (%lu).\
 \n\tRecreate the index.",
          RelationGetRelationName(indrel), num_index_tuples, num_tuples);
 
@@ -2255,7 +2291,7 @@ tid_reaped(ItemPointer itemptr, VacPageList vacpagelist)
 }
 
 /*
- * update_relstats() -- update statistics for one relation
+ * vac_update_relstats() -- update statistics for one relation
  *
  *     Update the whole-relation statistics that are kept in its pg_class
  *     row.  There are additional stats that will be updated if we are
@@ -2268,13 +2304,12 @@ tid_reaped(ItemPointer itemptr, VacPageList vacpagelist)
  *     we updated these tuples in the usual way, vacuuming pg_class itself
  *     wouldn't work very well --- by the time we got done with a vacuum
  *     cycle, most of the tuples in pg_class would've been obsoleted.
- *     Updating pg_class's own statistics would be especially tricky.
  *     Of course, this only works for fixed-size never-null columns, but
  *     these are.
  */
-static void
-update_relstats(Oid relid, int num_pages, int num_tuples, bool hasindex,
-               VRelStats *vacrelstats)
+void
+vac_update_relstats(Oid relid, long num_pages, double num_tuples,
+                   bool hasindex)
 {
    Relation    rd;
    HeapTupleData rtup;


diff --git a/src/backend/executor/nodeSort.c b/src/backend/executor/nodeSort.c

index 12c6f82a8b224c0f773f79c7b53132447467d399..e0543a2810977526886fee0d639ec76cc069463f 100644 (file)


--- a/src/backend/executor/nodeSort.c
+++ b/src/backend/executor/nodeSort.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/executor/nodeSort.c,v 1.32 2001/03/22 06:16:13 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/executor/nodeSort.c,v 1.33 2001/05/07 00:43:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -20,24 +20,24 @@
 #include "utils/tuplesort.h"
 
 /* ----------------------------------------------------------------
- *     FormSortKeys(node)
+ *     ExtractSortKeys
  *
- *     Forms the structure containing information used to sort the relation.
+ *     Extract the sorting key information from the plan node.
  *
- *     Returns an array of ScanKeyData.
+ *     Returns two palloc'd arrays, one of sort operator OIDs and
+ *     one of attribute numbers.
  * ----------------------------------------------------------------
  */
-static ScanKey
-FormSortKeys(Sort *sortnode)
+static void
+ExtractSortKeys(Sort *sortnode,
+               Oid **sortOperators,
+               AttrNumber **attNums)
 {
-   ScanKey     sortkeys;
    List       *targetList;
-   List       *tl;
    int         keycount;
-   Resdom     *resdom;
-   AttrNumber  resno;
-   Index       reskey;
-   Oid         reskeyop;
+   Oid        *sortOps;
+   AttrNumber *attNos;
+   List       *tl;
 
    /*
     * get information from the node
@@ -46,36 +46,33 @@ FormSortKeys(Sort *sortnode)
    keycount = sortnode->keycount;
 
    /*
-    * first allocate space for scan keys
+    * first allocate space for results
     */
    if (keycount <= 0)
-       elog(ERROR, "FormSortKeys: keycount <= 0");
-   sortkeys = (ScanKey) palloc(keycount * sizeof(ScanKeyData));
-   MemSet((char *) sortkeys, 0, keycount * sizeof(ScanKeyData));
+       elog(ERROR, "ExtractSortKeys: keycount <= 0");
+   sortOps = (Oid *) palloc(keycount * sizeof(Oid));
+   MemSet(sortOps, 0, keycount * sizeof(Oid));
+   *sortOperators = sortOps;
+   attNos = (AttrNumber *) palloc(keycount * sizeof(AttrNumber));
+   MemSet(attNos, 0, keycount * sizeof(AttrNumber));
+   *attNums = attNos;
 
    /*
-    * form each scan key from the resdom info in the target list
+    * extract info from the resdom nodes in the target list
     */
    foreach(tl, targetList)
    {
        TargetEntry *target = (TargetEntry *) lfirst(tl);
-
-       resdom = target->resdom;
-       resno = resdom->resno;
-       reskey = resdom->reskey;
-       reskeyop = resdom->reskeyop;
+       Resdom     *resdom = target->resdom;
+       Index       reskey = resdom->reskey;
 
        if (reskey > 0)         /* ignore TLEs that are not sort keys */
        {
-           ScanKeyEntryInitialize(&sortkeys[reskey - 1],
-                                  0x0,
-                                  resno,
-                                  (RegProcedure) reskeyop,
-                                  (Datum) 0);
+           Assert(reskey <= keycount);
+           sortOps[reskey - 1] = resdom->reskeyop;
+           attNos[reskey - 1] = resdom->resno;
        }
    }
-
-   return sortkeys;
 }
 
 /* ----------------------------------------------------------------
@@ -124,8 +121,8 @@ ExecSort(Sort *node)
    {
        Plan       *outerNode;
        TupleDesc   tupDesc;
-       int         keycount;
-       ScanKey     sortkeys;
+       Oid        *sortOperators;
+       AttrNumber *attNums;
 
        SO1_printf("ExecSort: %s\n",
                   "sorting subplan");
@@ -145,14 +142,17 @@ ExecSort(Sort *node)
 
        outerNode = outerPlan((Plan *) node);
        tupDesc = ExecGetTupType(outerNode);
-       keycount = node->keycount;
-       sortkeys = (ScanKey) sortstate->sort_Keys;
 
-       tuplesortstate = tuplesort_begin_heap(tupDesc, keycount, sortkeys,
-                                             true /* randomAccess */ );
+       ExtractSortKeys(node, &sortOperators, &attNums);
 
+       tuplesortstate = tuplesort_begin_heap(tupDesc, node->keycount,
+                                             sortOperators, attNums,
+                                             true /* randomAccess */ );
        sortstate->tuplesortstate = (void *) tuplesortstate;
 
+       pfree(sortOperators);
+       pfree(attNums);
+
        /*
         * Scan the subplan and feed all the tuples to tuplesort.
         */
@@ -230,7 +230,6 @@ ExecInitSort(Sort *node, EState *estate, Plan *parent)
     */
    sortstate = makeNode(SortState);
    sortstate->sort_Done = false;
-   sortstate->sort_Keys = NULL;
    sortstate->tuplesortstate = NULL;
 
    node->sortstate = sortstate;
@@ -258,11 +257,6 @@ ExecInitSort(Sort *node, EState *estate, Plan *parent)
    outerPlan = outerPlan((Plan *) node);
    ExecInitNode(outerPlan, estate, (Plan *) node);
 
-   /*
-    * initialize sortstate information
-    */
-   sortstate->sort_Keys = FormSortKeys(node);
-
    /*
     * initialize tuple type.  no need to initialize projection info
     * because this node doesn't do projections.
@@ -321,9 +315,6 @@ ExecEndSort(Sort *node)
        tuplesort_end((Tuplesortstate *) sortstate->tuplesortstate);
    sortstate->tuplesortstate = NULL;
 
-   if (sortstate->sort_Keys != NULL)
-       pfree(sortstate->sort_Keys);
-
    pfree(sortstate);
    node->sortstate = NULL;
 


diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c

index ad50630931e357a1ca7bae5f806f8cc242062722..ee5a803b8025ac9817834537bb5b4ccd10708527 100644 (file)


--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -15,7 +15,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.140 2001/03/22 06:16:14 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.141 2001/05/07 00:43:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1378,8 +1378,8 @@ _copyRestrictInfo(RestrictInfo *from)
    newnode->left_pathkey = NIL;
    newnode->right_pathkey = NIL;
    newnode->hashjoinoperator = from->hashjoinoperator;
-   newnode->left_dispersion = from->left_dispersion;
-   newnode->right_dispersion = from->right_dispersion;
+   newnode->left_bucketsize = from->left_bucketsize;
+   newnode->right_bucketsize = from->right_bucketsize;
 
    return newnode;
 }
@@ -2209,11 +2209,12 @@ _copyVacuumStmt(VacuumStmt *from)
 {
    VacuumStmt *newnode = makeNode(VacuumStmt);
 
-   newnode->verbose = from->verbose;
+   newnode->vacuum = from->vacuum;
    newnode->analyze = from->analyze;
+   newnode->verbose = from->verbose;
    if (from->vacrel)
        newnode->vacrel = pstrdup(from->vacrel);
-   Node_Copy(from, newnode, va_spec);
+   Node_Copy(from, newnode, va_cols);
 
    return newnode;
 }


diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c

index 06ee63bbacd05398c5445bd4ce4f8dfb169090da..284a534aa966f03a5f69da55e5faa89a96925b1e 100644 (file)


--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -20,7 +20,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.88 2001/03/22 03:59:31 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.89 2001/05/07 00:43:19 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -516,7 +516,7 @@ _equalRestrictInfo(RestrictInfo *a, RestrictInfo *b)
        return false;
 
    /*
-    * ignore eval_cost, left/right_pathkey, and left/right_dispersion,
+    * ignore eval_cost, left/right_pathkey, and left/right_bucketsize,
     * since they may not be set yet, and should be derivable from the
     * clause anyway
     */
@@ -1113,13 +1113,15 @@ _equalDropdbStmt(DropdbStmt *a, DropdbStmt *b)
 static bool
 _equalVacuumStmt(VacuumStmt *a, VacuumStmt *b)
 {
-   if (a->verbose != b->verbose)
+   if (a->vacuum != b->vacuum)
        return false;
    if (a->analyze != b->analyze)
        return false;
+   if (a->verbose != b->verbose)
+       return false;
    if (!equalstr(a->vacrel, b->vacrel))
        return false;
-   if (!equal(a->va_spec, b->va_spec))
+   if (!equal(a->va_cols, b->va_cols))
        return false;
 
    return true;


diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c

index 9a071e7a250df88efe03c183927ffeadfa07a86c..4c0c1b03ef544c60b9161208ceb950a83862419c 100644 (file)


--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.107 2001/03/22 03:59:32 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.108 2001/05/07 00:43:19 tgl Exp $
  *
  * NOTES
  *   Most of the read functions for plan nodes are tested. (In fact, they
@@ -1874,11 +1874,11 @@ _readRestrictInfo(void)
 
    /* eval_cost is not part of saved representation; compute on first use */
    local_node->eval_cost = -1;
-   /* ditto for cached pathkeys and dispersion */
+   /* ditto for cached pathkeys and bucketsize */
    local_node->left_pathkey = NIL;
    local_node->right_pathkey = NIL;
-   local_node->left_dispersion = -1;
-   local_node->right_dispersion = -1;
+   local_node->left_bucketsize = -1;
+   local_node->right_bucketsize = -1;
 
    return local_node;
 }


diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c

index c52af72a16b824c1f37078bb4e185d8a34b22d2b..bdfbbb18186d9c7ef4201fa8eb294bbdb55e298c 100644 (file)


--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -41,7 +41,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.70 2001/04/25 22:04:37 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.71 2001/05/07 00:43:20 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -50,11 +50,15 @@
 
 #include 
 
+#include "catalog/pg_statistic.h"
 #include "executor/nodeHash.h"
 #include "miscadmin.h"
 #include "optimizer/clauses.h"
 #include "optimizer/cost.h"
+#include "optimizer/pathnode.h"
+#include "parser/parsetree.h"
 #include "utils/lsyscache.h"
+#include "utils/syscache.h"
 
 
 /*
@@ -573,7 +577,7 @@ cost_mergejoin(Path *path,
  * 'outer_path' is the path for the outer relation
  * 'inner_path' is the path for the inner relation
  * 'restrictlist' are the RestrictInfo nodes to be applied at the join
- * 'innerdispersion' is an estimate of the dispersion statistic
+ * 'innerbucketsize' is an estimate of the bucketsize statistic
  *             for the inner hash key.
  */
 void
@@ -581,7 +585,7 @@ cost_hashjoin(Path *path,
              Path *outer_path,
              Path *inner_path,
              List *restrictlist,
-             Selectivity innerdispersion)
+             Selectivity innerbucketsize)
 {
    Cost        startup_cost = 0;
    Cost        run_cost = 0;
@@ -607,22 +611,20 @@ cost_hashjoin(Path *path,
 
    /*
     * The number of tuple comparisons needed is the number of outer
-    * tuples times the typical hash bucket size.  nodeHash.c tries for
-    * average bucket loading of NTUP_PER_BUCKET, but that goal will be
-    * reached only if data values are uniformly distributed among the
-    * buckets.  To be conservative, we scale up the target bucket size by
-    * the number of inner rows times inner dispersion, giving an estimate
-    * of the typical number of duplicates of each value. We then charge
-    * one cpu_operator_cost per tuple comparison.
+    * tuples times the typical number of tuples in a hash bucket,
+    * which is the inner relation size times its bucketsize fraction.
+    * We charge one cpu_operator_cost per tuple comparison.
     */
    run_cost += cpu_operator_cost * outer_path->parent->rows *
-       NTUP_PER_BUCKET * ceil(inner_path->parent->rows * innerdispersion);
+       ceil(inner_path->parent->rows * innerbucketsize);
 
    /*
     * Estimate the number of tuples that get through the hashing filter
     * as one per tuple in the two source relations.  This could be a
     * drastic underestimate if there are many equal-keyed tuples in
-    * either relation, but we have no good way of estimating that...
+    * either relation, but we have no simple way of estimating that;
+    * and since this is only a second-order parameter, it's probably
+    * not worth expending a lot of effort on the estimate.
     */
    ntuples = outer_path->parent->rows + inner_path->parent->rows;
 
@@ -651,7 +653,7 @@ cost_hashjoin(Path *path,
    /*
     * Bias against putting larger relation on inside.  We don't want an
     * absolute prohibition, though, since larger relation might have
-    * better dispersion --- and we can't trust the size estimates
+    * better bucketsize --- and we can't trust the size estimates
     * unreservedly, anyway.  Instead, inflate the startup cost by the
     * square root of the size ratio.  (Why square root?  No real good
     * reason, but it seems reasonable...)
@@ -663,6 +665,171 @@ cost_hashjoin(Path *path,
    path->total_cost = startup_cost + run_cost;
 }
 
+/*
+ * Estimate hash bucketsize fraction (ie, number of entries in a bucket
+ * divided by total tuples in relation) if the specified Var is used
+ * as a hash key.
+ *
+ * This statistic is used by cost_hashjoin.  We split out the calculation
+ * because it's useful to cache the result for re-use across multiple path
+ * cost calculations.
+ *
+ * XXX This is really pretty bogus since we're effectively assuming that the
+ * distribution of hash keys will be the same after applying restriction
+ * clauses as it was in the underlying relation.  However, we are not nearly
+ * smart enough to figure out how the restrict clauses might change the
+ * distribution, so this will have to do for now.
+ *
+ * The executor tries for average bucket loading of NTUP_PER_BUCKET by setting
+ * number of buckets equal to ntuples / NTUP_PER_BUCKET, which would yield
+ * a bucketsize fraction of NTUP_PER_BUCKET / ntuples.  But that goal will
+ * be reached only if the data values are uniformly distributed among the
+ * buckets, which requires (a) at least ntuples / NTUP_PER_BUCKET distinct
+ * data values, and (b) a not-too-skewed data distribution.  Otherwise the
+ * buckets will be nonuniformly occupied.  If the other relation in the join
+ * has a similar distribution, the most-loaded buckets are exactly those
+ * that will be probed most often.  Therefore, the "average" bucket size for
+ * costing purposes should really be taken as something close to the "worst
+ * case" bucket size.  We try to estimate this by first scaling up if there
+ * are too few distinct data values, and then scaling up again by the
+ * ratio of the most common value's frequency to the average frequency.
+ *
+ * If no statistics are available, use a default estimate of 0.1.  This will
+ * discourage use of a hash rather strongly if the inner relation is large,
+ * which is what we want.  We do not want to hash unless we know that the
+ * inner rel is well-dispersed (or the alternatives seem much worse).
+ */
+Selectivity
+estimate_hash_bucketsize(Query *root, Var *var)
+{
+   Oid         relid;
+   RelOptInfo *rel;
+   HeapTuple   tuple;
+   Form_pg_statistic stats;
+   double      estfract,
+               ndistinct,
+               needdistinct,
+               mcvfreq,
+               avgfreq;
+   float4     *numbers;
+   int         nnumbers;
+
+   /*
+    * Lookup info about var's relation and attribute;
+    * if none available, return default estimate.
+    */
+   if (!IsA(var, Var))
+       return 0.1;
+
+   relid = getrelid(var->varno, root->rtable);
+   if (relid == InvalidOid)
+       return 0.1;
+
+   rel = get_base_rel(root, var->varno);
+
+   if (rel->tuples <= 0.0 || rel->rows <= 0.0)
+       return 0.1;             /* ensure we can divide below */
+
+   tuple = SearchSysCache(STATRELATT,
+                          ObjectIdGetDatum(relid),
+                          Int16GetDatum(var->varattno),
+                          0, 0);
+   if (!HeapTupleIsValid(tuple))
+   {
+       /*
+        * Perhaps the Var is a system attribute; if so, it will have no
+        * entry in pg_statistic, but we may be able to guess something
+        * about its distribution anyway.
+        */
+       switch (var->varattno)
+       {
+           case ObjectIdAttributeNumber:
+           case SelfItemPointerAttributeNumber:
+               /* these are unique, so buckets should be well-distributed */
+               return (double) NTUP_PER_BUCKET / rel->rows;
+           case TableOidAttributeNumber:
+               /* hashing this is a terrible idea... */
+               return 1.0;
+       }
+       return 0.1;
+   }
+   stats = (Form_pg_statistic) GETSTRUCT(tuple);
+
+   /*
+    * Obtain number of distinct data values in raw relation.
+    */
+   ndistinct = stats->stadistinct;
+   if (ndistinct < 0.0)
+       ndistinct = -ndistinct * rel->tuples;
+
+   /*
+    * Adjust ndistinct to account for restriction clauses.  Observe we are
+    * assuming that the data distribution is affected uniformly by the
+    * restriction clauses!
+    *
+    * XXX Possibly better way, but much more expensive: multiply by
+    * selectivity of rel's restriction clauses that mention the target Var.
+    */
+   ndistinct *= rel->rows / rel->tuples;
+
+   /*
+    * Discourage use of hash join if there seem not to be very many distinct
+    * data values.  The threshold here is somewhat arbitrary, as is the
+    * fraction used to "discourage" the choice.
+    */
+   if (ndistinct < 50.0)
+   {
+       ReleaseSysCache(tuple);
+       return 0.5;
+   }
+
+   /*
+    * Form initial estimate of bucketsize fraction.  Here we use rel->rows,
+    * ie the number of rows after applying restriction clauses, because
+    * that's what the fraction will eventually be multiplied by in
+    * cost_heapjoin.
+    */
+   estfract = (double) NTUP_PER_BUCKET / rel->rows;
+
+   /*
+    * Adjust estimated bucketsize if too few distinct values to fill
+    * all the buckets.
+    */
+   needdistinct = rel->rows / (double) NTUP_PER_BUCKET;
+   if (ndistinct < needdistinct)
+       estfract *= needdistinct / ndistinct;
+
+   /*
+    * Look up the frequency of the most common value, if available.
+    */
+   mcvfreq = 0.0;
+
+   if (get_attstatsslot(tuple, var->vartype, var->vartypmod,
+                        STATISTIC_KIND_MCV, InvalidOid,
+                        NULL, NULL, &numbers, &nnumbers))
+   {
+       /*
+        * The first MCV stat is for the most common value.
+        */
+       if (nnumbers > 0)
+           mcvfreq = numbers[0];
+       free_attstatsslot(var->vartype, NULL, 0,
+                         numbers, nnumbers);
+   }
+
+   /*
+    * Adjust estimated bucketsize upward to account for skewed distribution.
+    */
+   avgfreq = (1.0 - stats->stanullfrac) / ndistinct;
+
+   if (avgfreq > 0.0 && mcvfreq > avgfreq)
+       estfract *= mcvfreq / avgfreq;
+
+   ReleaseSysCache(tuple);
+
+   return (Selectivity) estfract;
+}
+
 
 /*
  * cost_qual_eval


diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c

index d41336ddcee0f9c26ad9a2ab0b1410a1f0ae38c7..cd7cabd41deb7bf52b323b437d847eede311b8cc 100644 (file)


--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -8,15 +8,15 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.63 2001/04/15 00:48:17 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.64 2001/05/07 00:43:20 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
+#include "postgres.h"
+
 #include 
 #include 
 
-#include "postgres.h"
-
 #include "optimizer/clauses.h"
 #include "optimizer/cost.h"
 #include "optimizer/pathnode.h"
@@ -45,7 +45,6 @@ static void hash_inner_and_outer(Query *root, RelOptInfo *joinrel,
                     List *restrictlist, JoinType jointype);
 static Path *best_innerjoin(List *join_paths, List *outer_relid,
               JoinType jointype);
-static Selectivity estimate_dispersion(Query *root, Var *var);
 static List *select_mergejoin_clauses(RelOptInfo *joinrel,
                         RelOptInfo *outerrel,
                         RelOptInfo *innerrel,
@@ -722,7 +721,7 @@ hash_inner_and_outer(Query *root,
        Expr       *clause;
        Var        *left,
                   *right;
-       Selectivity innerdispersion;
+       Selectivity innerbucketsize;
        List       *hashclauses;
 
        if (restrictinfo->hashjoinoperator == InvalidOid)
@@ -742,34 +741,34 @@ hash_inner_and_outer(Query *root,
 
        /*
         * Check if clause is usable with these sub-rels, find inner side,
-        * estimate dispersion of inner var for costing purposes.
+        * estimate bucketsize of inner var for costing purposes.
         *
         * Since we tend to visit the same clauses over and over when
-        * planning a large query, we cache the dispersion estimates in
+        * planning a large query, we cache the bucketsize estimates in
         * the RestrictInfo node to avoid repeated lookups of statistics.
         */
        if (intMember(left->varno, outerrelids) &&
            intMember(right->varno, innerrelids))
        {
            /* righthand side is inner */
-           innerdispersion = restrictinfo->right_dispersion;
-           if (innerdispersion < 0)
+           innerbucketsize = restrictinfo->right_bucketsize;
+           if (innerbucketsize < 0)
            {
                /* not cached yet */
-               innerdispersion = estimate_dispersion(root, right);
-               restrictinfo->right_dispersion = innerdispersion;
+               innerbucketsize = estimate_hash_bucketsize(root, right);
+               restrictinfo->right_bucketsize = innerbucketsize;
            }
        }
        else if (intMember(left->varno, innerrelids) &&
                 intMember(right->varno, outerrelids))
        {
            /* lefthand side is inner */
-           innerdispersion = restrictinfo->left_dispersion;
-           if (innerdispersion < 0)
+           innerbucketsize = restrictinfo->left_bucketsize;
+           if (innerbucketsize < 0)
            {
                /* not cached yet */
-               innerdispersion = estimate_dispersion(root, left);
-               restrictinfo->left_dispersion = innerdispersion;
+               innerbucketsize = estimate_hash_bucketsize(root, left);
+               restrictinfo->left_bucketsize = innerbucketsize;
            }
        }
        else
@@ -790,7 +789,7 @@ hash_inner_and_outer(Query *root,
                                      innerrel->cheapest_total_path,
                                      restrictlist,
                                      hashclauses,
-                                     innerdispersion));
+                                     innerbucketsize));
        if (outerrel->cheapest_startup_path != outerrel->cheapest_total_path)
            add_path(joinrel, (Path *)
                     create_hashjoin_path(joinrel,
@@ -799,7 +798,7 @@ hash_inner_and_outer(Query *root,
                                          innerrel->cheapest_total_path,
                                          restrictlist,
                                          hashclauses,
-                                         innerdispersion));
+                                         innerbucketsize));
    }
 }
 
@@ -866,31 +865,6 @@ best_innerjoin(List *join_paths, Relids outer_relids, JoinType jointype)
    return cheapest;
 }
 
-/*
- * Estimate dispersion of the specified Var
- *
- * We use a default of 0.1 if we can't figure out anything better.
- * This will typically discourage use of a hash rather strongly,
- * if the inner relation is large. We do not want to hash unless
- * we know that the inner rel is well-dispersed (or the alternatives
- * seem much worse).
- */
-static Selectivity
-estimate_dispersion(Query *root, Var *var)
-{
-   Oid         relid;
-
-   if (!IsA(var, Var))
-       return 0.1;
-
-   relid = getrelid(var->varno, root->rtable);
-
-   if (relid == InvalidOid)
-       return 0.1;
-
-   return (Selectivity) get_attdispersion(relid, var->varattno, 0.1);
-}
-
 /*
  * select_mergejoin_clauses
  *   Select mergejoin clauses that are usable for a particular join.


diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c

index 8c3b00289d3e7d467aeb03dcc1b53eb02f5a3a2b..2d264c46881730ba4ace2ade745fe6942c9d49fb 100644 (file)


--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -10,14 +10,14 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.104 2001/03/22 03:59:36 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.105 2001/05/07 00:43:20 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
-#include 
-
 #include "postgres.h"
 
+#include 
+
 #include "catalog/pg_index.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
@@ -1484,9 +1484,9 @@ make_sort_from_pathkeys(List *tlist, Plan *lefttree, List *pathkeys)
         */
        if (resdom->reskey == 0)
        {
-           /* OK, mark it as a sort key and set the sort operator regproc */
+           /* OK, mark it as a sort key and set the sort operator */
            resdom->reskey = ++numsortkeys;
-           resdom->reskeyop = get_opcode(pathkey->sortop);
+           resdom->reskeyop = pathkey->sortop;
        }
    }
 


diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c

index 7c3e15a8f88d81b206e4d3f618eae9658294ad6a..5d67e02dacb44bce678665c592ab184f588469a5 100644 (file)


--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -8,13 +8,14 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.59 2001/04/16 19:44:10 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.60 2001/05/07 00:43:21 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
+#include "postgres.h"
+
 #include 
 
-#include "postgres.h"
 #include "catalog/pg_operator.h"
 #include "catalog/pg_type.h"
 #include "nodes/makefuncs.h"
@@ -348,8 +349,8 @@ distribute_qual_to_rels(Query *root, Node *clause,
    restrictinfo->left_pathkey = NIL;   /* not computable yet */
    restrictinfo->right_pathkey = NIL;
    restrictinfo->hashjoinoperator = InvalidOid;
-   restrictinfo->left_dispersion = -1; /* not computed until needed */
-   restrictinfo->right_dispersion = -1;
+   restrictinfo->left_bucketsize = -1; /* not computed until needed */
+   restrictinfo->right_bucketsize = -1;
 
    /*
     * Retrieve all relids and vars contained within the clause.


diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c

index b2ab4600209dd566fd281c5110f0e1f6ba5c1cb1..0aba4808c160f3bf5ba3a9cc3fd2c6cf26fa2fa3 100644 (file)


--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.105 2001/04/30 19:24:47 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.106 2001/05/07 00:43:21 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1367,7 +1367,7 @@ make_groupplan(List *group_tlist,
            {
                /* OK, insert the ordering info needed by the executor. */
                resdom->reskey = ++keyno;
-               resdom->reskeyop = get_opcode(grpcl->sortop);
+               resdom->reskeyop = grpcl->sortop;
            }
        }
 
@@ -1412,7 +1412,7 @@ make_sortplan(List *tlist, Plan *plannode, List *sortcls)
        {
            /* OK, insert the ordering info needed by the executor. */
            resdom->reskey = ++keyno;
-           resdom->reskeyop = get_opcode(sortcl->sortop);
+           resdom->reskeyop = sortcl->sortop;
        }
    }
 


diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c

index 0b173466cf98061a3add13f850ba9e750dd9f4e0..ede4159d9707629729b5dffbc32f241f48629e72 100644 (file)


--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -14,7 +14,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v 1.62 2001/03/27 18:02:19 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v 1.63 2001/05/07 00:43:22 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -682,8 +682,8 @@ adjust_inherited_attrs_mutator(Node *node,
        newinfo->eval_cost = -1;        /* reset this too */
        newinfo->left_pathkey = NIL;    /* and these */
        newinfo->right_pathkey = NIL;
-       newinfo->left_dispersion = -1;
-       newinfo->right_dispersion = -1;
+       newinfo->left_bucketsize = -1;
+       newinfo->right_bucketsize = -1;
 
        return (Node *) newinfo;
    }


diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c

index cfba3ee395f2e0216f74c1e2497a7a8f5897d74b..407c132b4f7a6388b093806fd3eb01286906e084 100644 (file)


--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -8,14 +8,14 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.71 2001/03/22 03:59:39 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.72 2001/05/07 00:43:22 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
-#include 
-
 #include "postgres.h"
 
+#include 
+
 #include "nodes/plannodes.h"
 #include "optimizer/cost.h"
 #include "optimizer/pathnode.h"
@@ -559,7 +559,7 @@ create_mergejoin_path(RelOptInfo *joinrel,
  * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
  * 'hashclauses' is a list of the hash join clause (always a 1-element list)
  *     (this should be a subset of the restrict_clauses list)
- * 'innerdispersion' is an estimate of the dispersion of the inner hash key
+ * 'innerbucketsize' is an estimate of the bucketsize of the inner hash key
  *
  */
 HashPath   *
@@ -569,7 +569,7 @@ create_hashjoin_path(RelOptInfo *joinrel,
                     Path *inner_path,
                     List *restrict_clauses,
                     List *hashclauses,
-                    Selectivity innerdispersion)
+                    Selectivity innerbucketsize)
 {
    HashPath   *pathnode = makeNode(HashPath);
 
@@ -587,7 +587,7 @@ create_hashjoin_path(RelOptInfo *joinrel,
                  outer_path,
                  inner_path,
                  restrict_clauses,
-                 innerdispersion);
+                 innerbucketsize);
 
    return pathnode;
 }


diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c

index 4f711df203c846acf4402ed131def54dbbf94443..ee3523553e8693ac1b7762d01ebbabc3697a4d7a 100644 (file)


--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -9,11 +9,10 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.64 2001/03/22 03:59:40 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.65 2001/05/07 00:43:22 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
-
 #include "postgres.h"
 
 #include 


diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c

index 4687a5599623d09b416357721488369cc8eaaa38..76cc095bc4edcdbf4cfecad9627a1e5a29d03256 100644 (file)


--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Header: /cvsroot/pgsql/src/backend/parser/analyze.c,v 1.183 2001/03/22 06:16:15 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/parser/analyze.c,v 1.184 2001/05/07 00:43:22 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -2660,7 +2660,7 @@ transformForUpdate(Query *qry, List *forUpdate)
        /* just the named tables */
        foreach(l, forUpdate)
        {
-           char       *relname = lfirst(l);
+           char       *relname = strVal(lfirst(l));
 
            i = 0;
            foreach(rt, qry->rtable)


diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y

index bed0ce239a42e2f75c48bdda8aff299cb2f02f9b..40c379aca51f280882945b9f5caf4aaeccc4475f 100644 (file)


--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -11,7 +11,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.222 2001/05/01 01:36:10 thomas Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.223 2001/05/07 00:43:23 tgl Exp $
  *
  * HISTORY
  *   AUTHOR            DATE            MAJOR EVENT
@@ -104,7 +104,6 @@ static void doNegateFloat(Value *v);
    char                *str;
    bool                boolean;
    JoinType            jtype;
-   InhOption           inhOpt;
    List                *list;
    Node                *node;
    Value               *value;
@@ -130,6 +129,7 @@ static void doNegateFloat(Value *v);
 
 %type    stmt,
        AlterGroupStmt, AlterSchemaStmt, AlterTableStmt, AlterUserStmt,
+       AnalyzeStmt,
        ClosePortalStmt, ClusterStmt, CommentStmt, ConstraintsSetStmt,
        CopyStmt, CreateAsStmt, CreateGroupStmt, CreatePLangStmt,
        CreateSchemaStmt, CreateSeqStmt, CreateStmt, CreateTrigStmt,
@@ -147,7 +147,7 @@ static void doNegateFloat(Value *v);
 %type    select_no_parens, select_with_parens, select_clause,
                simple_select
 
-%type     alter_column_action
+%type     alter_column_default
 %type     drop_behavior
 
 %type    createdb_opt_list, createdb_opt_item
@@ -185,7 +185,7 @@ static void doNegateFloat(Value *v);
        OptTableElementList, OptInherit, definition, opt_distinct,
        opt_with, func_args, func_args_list, func_as,
        oper_argtypes, RuleActionList, RuleActionMulti,
-       opt_column_list, columnList, opt_va_list, va_list,
+       opt_column_list, columnList, opt_name_list,
        sort_clause, sortby_list, index_params, index_list, name_list,
        from_clause, from_list, opt_array_bounds,
        expr_list, attrs, target_list, update_target_list,
@@ -210,9 +210,7 @@ static void doNegateFloat(Value *v);
 %type    substr_from, substr_for
 
 %type     opt_binary, opt_using, opt_instead, opt_cursor
-%type     opt_with_copy, index_opt_unique, opt_verbose, opt_analyze
-
-%type  opt_inh_star, opt_only
+%type     opt_with_copy, index_opt_unique, opt_verbose, analyze_keyword
 
 %type    copy_dirn, direction, reindex_type, drop_type,
        opt_column, event, comment_type, comment_cl,
@@ -350,7 +348,8 @@ static void doNegateFloat(Value *v);
        NEW, NOCREATEDB, NOCREATEUSER, NONE, NOTHING, NOTIFY, NOTNULL,
        OFFSET, OIDS, OPERATOR, OWNER, PASSWORD, PROCEDURAL,
        REINDEX, RENAME, RESET, RETURNS, ROW, RULE,
-       SEQUENCE, SERIAL, SETOF, SHARE, SHOW, START, STATEMENT, STDIN, STDOUT, SYSID,
+       SEQUENCE, SERIAL, SETOF, SHARE, SHOW, START, STATEMENT,
+       STATISTICS, STDIN, STDOUT, SYSID,
        TEMP, TEMPLATE, TOAST, TRUNCATE, TRUSTED, 
        UNLISTEN, UNTIL, VACUUM, VALID, VERBOSE, VERSION
 
@@ -470,6 +469,7 @@ stmt :  AlterSchemaStmt
        | CreatedbStmt
        | DropdbStmt
        | VacuumStmt
+       | AnalyzeStmt
        | VariableSetStmt
        | VariableShowStmt
        | VariableResetStmt
@@ -938,57 +938,68 @@ CheckPointStmt: CHECKPOINT
  *****************************************************************************/
 
 AlterTableStmt:
-/* ALTER TABLE <name> ADD [COLUMN]  */
-       ALTER TABLE relation_name opt_inh_star ADD opt_column columnDef
+/* ALTER TABLE <relation> ADD [COLUMN]  */
+       ALTER TABLE relation_expr ADD opt_column columnDef
                {
                    AlterTableStmt *n = makeNode(AlterTableStmt);
                    n->subtype = 'A';
-                   n->relname = $3;
-                   n->inhOpt = $4;
-                   n->def = $7;
+                   n->relname = $3->relname;
+                   n->inhOpt = $3->inhOpt;
+                   n->def = $6;
                    $$ = (Node *)n;
                }
-/* ALTER TABLE <name> ALTER [COLUMN]  {SET DEFAULT |DROP DEFAULT} */
-       | ALTER TABLE relation_name opt_inh_star ALTER opt_column ColId alter_column_action
+/* ALTER TABLE <relation> ALTER [COLUMN]  {SET DEFAULT |DROP DEFAULT} */
+       | ALTER TABLE relation_expr ALTER opt_column ColId alter_column_default
                {
                    AlterTableStmt *n = makeNode(AlterTableStmt);
                    n->subtype = 'T';
-                   n->relname = $3;
-                   n->inhOpt = $4;
-                   n->name = $7;
-                   n->def = $8;
+                   n->relname = $3->relname;
+                   n->inhOpt = $3->inhOpt;
+                   n->name = $6;
+                   n->def = $7;
                    $$ = (Node *)n;
                }
-/* ALTER TABLE  DROP [COLUMN]  {RESTRICT|CASCADE} */
-       | ALTER TABLE relation_name opt_inh_star DROP opt_column ColId drop_behavior
+/* ALTER TABLE  ALTER [COLUMN]  SET STATISTICS  */
+       | ALTER TABLE relation_expr ALTER opt_column ColId SET STATISTICS Iconst
+               {
+                   AlterTableStmt *n = makeNode(AlterTableStmt);
+                   n->subtype = 'S';
+                   n->relname = $3->relname;
+                   n->inhOpt = $3->inhOpt;
+                   n->name = $6;
+                   n->def = (Node *) makeInteger($9);
+                   $$ = (Node *)n;
+               }
+/* ALTER TABLE  DROP [COLUMN]  {RESTRICT|CASCADE} */
+       | ALTER TABLE relation_expr DROP opt_column ColId drop_behavior
                {
                    AlterTableStmt *n = makeNode(AlterTableStmt);
                    n->subtype = 'D';
-                   n->relname = $3;
-                   n->inhOpt = $4;
-                   n->name = $7;
-                   n->behavior = $8;
+                   n->relname = $3->relname;
+                   n->inhOpt = $3->inhOpt;
+                   n->name = $6;
+                   n->behavior = $7;
                    $$ = (Node *)n;
                }
-/* ALTER TABLE <name> ADD CONSTRAINT ... */
-       | ALTER TABLE relation_name opt_inh_star ADD TableConstraint
+/* ALTER TABLE <relation> ADD CONSTRAINT ... */
+       | ALTER TABLE relation_expr ADD TableConstraint
                {
                    AlterTableStmt *n = makeNode(AlterTableStmt);
                    n->subtype = 'C';
-                   n->relname = $3;
-                   n->inhOpt = $4;
-                   n->def = $6;
+                   n->relname = $3->relname;
+                   n->inhOpt = $3->inhOpt;
+                   n->def = $5;
                    $$ = (Node *)n;
                }
-/* ALTER TABLE <name> DROP CONSTRAINT  {RESTRICT|CASCADE} */
-       | ALTER TABLE relation_name opt_inh_star DROP CONSTRAINT name drop_behavior
+/* ALTER TABLE <relation> DROP CONSTRAINT  {RESTRICT|CASCADE} */
+       | ALTER TABLE relation_expr DROP CONSTRAINT name drop_behavior
                {
                    AlterTableStmt *n = makeNode(AlterTableStmt);
                    n->subtype = 'X';
-                   n->relname = $3;
-                   n->inhOpt = $4;
-                   n->name = $7;
-                   n->behavior = $8;
+                   n->relname = $3->relname;
+                   n->inhOpt = $3->inhOpt;
+                   n->name = $6;
+                   n->behavior = $7;
                    $$ = (Node *)n;
                }
 /* ALTER TABLE  CREATE TOAST TABLE */
@@ -997,6 +1008,7 @@ AlterTableStmt:
                    AlterTableStmt *n = makeNode(AlterTableStmt);
                    n->subtype = 'E';
                    n->relname = $3;
+                   n->inhOpt = INH_NO;
                    $$ = (Node *)n;
                }
 /* ALTER TABLE  OWNER TO UserId */
@@ -1005,12 +1017,13 @@ AlterTableStmt:
                    AlterTableStmt *n = makeNode(AlterTableStmt);
                    n->subtype = 'U';
                    n->relname = $3;
+                   n->inhOpt = INH_NO;
                    n->name = $6;
                    $$ = (Node *)n;
                }
        ;
 
-alter_column_action:
+alter_column_default:
        SET DEFAULT a_expr
            {
                /* Treat SET DEFAULT NULL the same as DROP DEFAULT */
@@ -1478,10 +1491,6 @@ key_reference:  NO ACTION                { $$ = FKCONSTR_ON_KEY_NOACTION; }
        | SET DEFAULT                   { $$ = FKCONSTR_ON_KEY_SETDEFAULT; }
        ;
 
-opt_only: ONLY                                 { $$ = INH_NO; }
-        | /*EMPTY*/                                { $$ = INH_DEFAULT; } 
-       ;
-
 OptInherit:  INHERITS '(' relation_name_list ')'   { $$ = $3; }
        | /*EMPTY*/                                 { $$ = NIL; }
        ;
@@ -2598,14 +2607,13 @@ opt_force:  FORCE                                   {  $$ = TRUE; }
  *
  *****************************************************************************/
 
-RenameStmt:  ALTER TABLE relation_name opt_inh_star
-                 RENAME opt_column opt_name TO name
+RenameStmt:  ALTER TABLE relation_expr RENAME opt_column opt_name TO name
                {
                    RenameStmt *n = makeNode(RenameStmt);
-                   n->relname = $3;
-                   n->inhOpt = $4;
-                   n->column = $7;
-                   n->newname = $9;
+                   n->relname = $3->relname;
+                   n->inhOpt = $3->inhOpt;
+                   n->column = $6;
+                   n->newname = $8;
                    $$ = (Node *)n;
                }
        ;
@@ -2994,49 +3002,71 @@ ClusterStmt:  CLUSTER index_name ON relation_name
  *
  *     QUERY:
  *             vacuum
+ *             analyze
  *
  *****************************************************************************/
 
-VacuumStmt:  VACUUM opt_verbose opt_analyze
+VacuumStmt:  VACUUM opt_verbose
                {
                    VacuumStmt *n = makeNode(VacuumStmt);
+                   n->vacuum = true;
+                   n->analyze = false;
                    n->verbose = $2;
-                   n->analyze = $3;
                    n->vacrel = NULL;
-                   n->va_spec = NIL;
+                   n->va_cols = NIL;
                    $$ = (Node *)n;
                }
-       | VACUUM opt_verbose opt_analyze relation_name opt_va_list
+       | VACUUM opt_verbose relation_name
                {
                    VacuumStmt *n = makeNode(VacuumStmt);
+                   n->vacuum = true;
+                   n->analyze = false;
                    n->verbose = $2;
-                   n->analyze = $3;
-                   n->vacrel = $4;
-                   n->va_spec = $5;
-                   if ( $5 != NIL && !$4 )
-                       elog(ERROR,"VACUUM syntax error at or near \"(\""
-                           "\n\tRelation name must be specified");
+                   n->vacrel = $3;
+                   n->va_cols = NIL;
+                   $$ = (Node *)n;
+               }
+       | VACUUM opt_verbose AnalyzeStmt
+               {
+                   VacuumStmt *n = (VacuumStmt *) $3;
+                   n->vacuum = true;
+                   n->verbose |= $2;
                    $$ = (Node *)n;
                }
        ;
 
-opt_verbose:  VERBOSE                          { $$ = TRUE; }
-       | /*EMPTY*/                             { $$ = FALSE; }
+AnalyzeStmt:  analyze_keyword opt_verbose
+               {
+                   VacuumStmt *n = makeNode(VacuumStmt);
+                   n->vacuum = false;
+                   n->analyze = true;
+                   n->verbose = $2;
+                   n->vacrel = NULL;
+                   n->va_cols = NIL;
+                   $$ = (Node *)n;
+               }
+       | analyze_keyword opt_verbose relation_name opt_name_list
+               {
+                   VacuumStmt *n = makeNode(VacuumStmt);
+                   n->vacuum = false;
+                   n->analyze = true;
+                   n->verbose = $2;
+                   n->vacrel = $3;
+                   n->va_cols = $4;
+                   $$ = (Node *)n;
+               }
        ;
 
-opt_analyze:  ANALYZE                          { $$ = TRUE; }
+analyze_keyword:  ANALYZE                      { $$ = TRUE; }
        |     ANALYSE /* British */             { $$ = TRUE; }
-       | /*EMPTY*/                             { $$ = FALSE; }
        ;
 
-opt_va_list:  '(' va_list ')'                  { $$ = $2; }
-       | /*EMPTY*/                             { $$ = NIL; }
+opt_verbose:  VERBOSE                          { $$ = TRUE; }
+       | /*EMPTY*/                             { $$ = FALSE; }
        ;
 
-va_list:  name
-               { $$ = makeList1($1); }
-       | va_list ',' name
-               { $$ = lappend($1, $3); }
+opt_name_list:  '(' name_list ')'              { $$ = $2; }
+       | /*EMPTY*/                             { $$ = NIL; }
        ;
 
 
@@ -3160,12 +3190,12 @@ columnElem:  ColId opt_indirection
  *
  *****************************************************************************/
 
-DeleteStmt:  DELETE FROM opt_only relation_name where_clause
+DeleteStmt:  DELETE FROM relation_expr where_clause
                {
                    DeleteStmt *n = makeNode(DeleteStmt);
-                   n->inhOpt = $3;
-                   n->relname = $4;
-                   n->whereClause = $5;
+                   n->relname = $3->relname;
+                   n->inhOpt = $3->inhOpt;
+                   n->whereClause = $4;
                    $$ = (Node *)n;
                }
        ;
@@ -3202,17 +3232,17 @@ opt_lmode:  SHARE               { $$ = TRUE; }
  *
  *****************************************************************************/
 
-UpdateStmt:  UPDATE opt_only relation_name
+UpdateStmt:  UPDATE relation_expr
              SET update_target_list
              from_clause
              where_clause
                {
                    UpdateStmt *n = makeNode(UpdateStmt);
-                   n->inhOpt = $2;
-                   n->relname = $3;
-                   n->targetList = $5;
-                   n->fromClause = $6;
-                   n->whereClause = $7;
+                   n->relname = $2->relname;
+                   n->inhOpt = $2->inhOpt;
+                   n->targetList = $4;
+                   n->fromClause = $5;
+                   n->whereClause = $6;
                    $$ = (Node *)n;
                }
        ;
@@ -3545,10 +3575,6 @@ select_offset_value: Iconst
  * ...however, recursive addattr and rename supported.  make special
  * cases for these.
  */
-opt_inh_star:  '*'                             { $$ = INH_YES; }
-       | /*EMPTY*/                             { $$ = INH_DEFAULT; }
-       ;
-
 relation_name_list:  name_list;
 
 name_list:  name
@@ -3576,7 +3602,7 @@ opt_for_update_clause:    for_update_clause       { $$ = $1; }
        | /* EMPTY */                           { $$ = NULL; }
        ;
 
-update_list:  OF va_list                       { $$ = $2; }
+update_list:  OF name_list                     { $$ = $2; }
        | /* EMPTY */                           { $$ = makeList1(NULL); }
        ;
 
@@ -5525,6 +5551,7 @@ TokenId:  ABSOLUTE                        { $$ = "absolute"; }
        | SHARE                         { $$ = "share"; }
        | START                         { $$ = "start"; }
        | STATEMENT                     { $$ = "statement"; }
+       | STATISTICS                    { $$ = "statistics"; }
        | STDIN                         { $$ = "stdin"; }
        | STDOUT                        { $$ = "stdout"; }
        | SYSID                         { $$ = "sysid"; }


diff --git a/src/backend/parser/keywords.c b/src/backend/parser/keywords.c

index 402dbfd28ca561a2c9d9ba513e7986dda06ec7df..8ab19f86ae8582213730311845cdbdcae0977f18 100644 (file)


--- a/src/backend/parser/keywords.c
+++ b/src/backend/parser/keywords.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/parser/keywords.c,v 1.90 2001/03/22 03:59:40 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/parser/keywords.c,v 1.91 2001/05/07 00:43:23 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -238,6 +238,7 @@ static ScanKeyword ScanKeywords[] = {
    {"some", SOME},
    {"start", START},
    {"statement", STATEMENT},
+   {"statistics", STATISTICS},
    {"stdin", STDIN},
    {"stdout", STDOUT},
    {"substring", SUBSTRING},


diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c

index f5324cb37355532ef4233a335bc40fb5e5eb635e..e1d49842fd2398a3338bf8fb8329c7ca0677a2fe 100644 (file)


--- a/src/backend/parser/parse_relation.c
+++ b/src/backend/parser/parse_relation.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/parser/parse_relation.c,v 1.54 2001/04/18 17:04:24 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/parser/parse_relation.c,v 1.55 2001/05/07 00:43:23 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -75,7 +75,7 @@ static struct
    }
 };
 
-#define SPECIALS ((int) (sizeof(special_attr)/sizeof(special_attr[0])))
+#define SPECIALS ((int) lengthof(special_attr))
 
 
 /*
@@ -670,7 +670,7 @@ isForUpdate(ParseState *pstate, char *relname)
 
                foreach(l, pstate->p_forUpdate)
                {
-                   char       *rname = lfirst(l);
+                   char       *rname = strVal(lfirst(l));
 
                    if (strcmp(relname, rname) == 0)
                        return true;
@@ -1020,20 +1020,6 @@ attnameIsSet(Relation rd, char *name)
 
 #endif
 
-#ifdef NOT_USED
-/*
- * This should only be used if the relation is already
- * heap_open()'ed.  Use the cache version
- * for access to non-opened relations.
- */
-int
-attnumAttNelems(Relation rd, int attid)
-{
-   return rd->rd_att->attrs[attid - 1]->attnelems;
-}
-
-#endif
-
 /* given attribute id, return type of that attribute */
 /*
  * This should only be used if the relation is already


diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c

index ae6cd20a5db3838c76a6f006232a8f04e5d4a800..b616f7e68ef875a0774de3f270c4cf98aa3dcc94 100644 (file)


--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -10,7 +10,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/tcop/utility.c,v 1.109 2001/03/22 06:16:17 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/tcop/utility.c,v 1.110 2001/05/07 00:43:23 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -427,13 +427,19 @@ ProcessUtility(Node *parsetree,
                                        interpretInhOption(stmt->inhOpt),
                                            (ColumnDef *) stmt->def);
                        break;
-                   case 'T':   /* ALTER COLUMN */
-                       AlterTableAlterColumn(stmt->relname,
+                   case 'T':   /* ALTER COLUMN DEFAULT */
+                       AlterTableAlterColumnDefault(stmt->relname,
                                        interpretInhOption(stmt->inhOpt),
-                                             stmt->name,
-                                             stmt->def);
+                                                    stmt->name,
+                                                    stmt->def);
                        break;
-                   case 'D':   /* ALTER DROP */
+                   case 'S':   /* ALTER COLUMN STATISTICS */
+                       AlterTableAlterColumnStatistics(stmt->relname,
+                                       interpretInhOption(stmt->inhOpt),
+                                                       stmt->name,
+                                                       stmt->def);
+                       break;
+                   case 'D':   /* DROP COLUMN */
                        AlterTableDropColumn(stmt->relname,
                                        interpretInhOption(stmt->inhOpt),
                                             stmt->name,
@@ -703,12 +709,13 @@ ProcessUtility(Node *parsetree,
            break;
 
        case T_VacuumStmt:
-           set_ps_display(commandTag = "VACUUM");
+           if (((VacuumStmt *) parsetree)->vacuum)
+               commandTag = "VACUUM";
+           else
+               commandTag = "ANALYZE";
+           set_ps_display(commandTag);
 
-           vacuum(((VacuumStmt *) parsetree)->vacrel,
-                  ((VacuumStmt *) parsetree)->verbose,
-                  ((VacuumStmt *) parsetree)->analyze,
-                  ((VacuumStmt *) parsetree)->va_spec);
+           vacuum((VacuumStmt *) parsetree);
            break;
 
        case T_ExplainStmt:


diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c

index 1fe0afb0a35b44ad34e76fbb73439194a73690ad..41ba82db7b574d6ba6d095a25092376d04702250 100644 (file)


--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -15,7 +15,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.87 2001/03/23 04:49:54 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.88 2001/05/07 00:43:23 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -57,9 +57,6 @@
 /* default selectivity estimate for pattern-match operators such as LIKE */
 #define DEFAULT_MATCH_SEL  0.01
 
-/* "fudge factor" for estimating frequency of not-most-common values */
-#define NOT_MOST_COMMON_RATIO  0.1
-
 static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
                  Datum lobound, Datum hibound, Oid boundstypid,
                  double *scaledlobound, double *scaledhibound);
@@ -75,17 +72,9 @@ static double convert_one_string_to_scalar(unsigned char *value,
 static unsigned char *convert_string_datum(Datum value, Oid typid);
 static double convert_timevalue_to_scalar(Datum value, Oid typid);
 static void getattproperties(Oid relid, AttrNumber attnum,
-                Oid *typid,
-                int *typlen,
-                bool *typbyval,
-                int32 *typmod);
-static bool getattstatistics(Oid relid, AttrNumber attnum,
-                Oid typid, int32 typmod,
-                double *nullfrac,
-                double *commonfrac,
-                Datum *commonval,
-                Datum *loval,
-                Datum *hival);
+                            Oid *typid, int32 *typmod);
+static double get_att_numdistinct(Oid relid, AttrNumber attnum, Oid typid,
+                                 Form_pg_statistic stats);
 static Selectivity prefix_selectivity(char *prefix,
                   Oid relid,
                   AttrNumber attno,
@@ -115,134 +104,173 @@ eqsel(PG_FUNCTION_ARGS)
    AttrNumber  attno = PG_GETARG_INT16(2);
    Datum       value = PG_GETARG_DATUM(3);
    int32       flag = PG_GETARG_INT32(4);
-   float8      result;
-
-   if (NONVALUE(attno) || NONVALUE(relid))
-       result = DEFAULT_EQ_SEL;
-   else
+   Oid         typid;
+   int32       typmod;
+   HeapTuple   statsTuple;
+   Datum      *values;
+   int         nvalues;
+   float4     *numbers;
+   int         nnumbers;
+   double      selec;
+
+   if (NONVALUE(relid) || NONVALUE(attno))
+       PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
+
+   /* get info about the attribute */
+   getattproperties(relid, attno, &typid, &typmod);
+
+   /* get stats for the attribute, if available */
+   statsTuple = SearchSysCache(STATRELATT,
+                               ObjectIdGetDatum(relid),
+                               Int16GetDatum(attno),
+                               0, 0);
+   if (HeapTupleIsValid(statsTuple))
    {
-       Oid         typid;
-       int         typlen;
-       bool        typbyval;
-       int32       typmod;
-       double      nullfrac;
-       double      commonfrac;
-       Datum       commonval;
-       double      selec;
-
-       /* get info about the attribute */
-       getattproperties(relid, attno,
-                        &typid, &typlen, &typbyval, &typmod);
-
-       /* get stats for the attribute, if available */
-       if (getattstatistics(relid, attno, typid, typmod,
-                            &nullfrac, &commonfrac, &commonval,
-                            NULL, NULL))
-       {
-           if (flag & SEL_CONSTANT)
-           {
+       Form_pg_statistic stats;
 
-               /*
-                * Is the constant "=" to the column's most common value?
-                * (Although the operator may not really be "=", we will
-                * assume that seeing whether it returns TRUE for the most
-                * common value is useful information. If you don't like
-                * it, maybe you shouldn't be using eqsel for your
-                * operator...)
-                */
-               RegProcedure eqproc = get_opcode(opid);
-               bool        mostcommon;
+       stats = (Form_pg_statistic) GETSTRUCT(statsTuple);
 
-               if (eqproc == (RegProcedure) NULL)
-                   elog(ERROR, "eqsel: no procedure for operator %u",
-                        opid);
+       if (flag & SEL_CONSTANT)
+       {
+           bool    match = false;
+           int     i;
 
-               /* be careful to apply operator right way 'round */
-               if (flag & SEL_RIGHT)
-                   mostcommon = DatumGetBool(OidFunctionCall2(eqproc,
-                                                              commonval,
-                                                              value));
-               else
-                   mostcommon = DatumGetBool(OidFunctionCall2(eqproc,
-                                                              value,
-                                                            commonval));
+           /*
+            * Is the constant "=" to any of the column's most common
+            * values?  (Although the given operator may not really be
+            * "=", we will assume that seeing whether it returns TRUE
+            * is an appropriate test.  If you don't like this, maybe you
+            * shouldn't be using eqsel for your operator...)
+            */
+           if (get_attstatsslot(statsTuple, typid, typmod,
+                                STATISTIC_KIND_MCV, InvalidOid,
+                                &values, &nvalues,
+                                &numbers, &nnumbers))
+           {
+               FmgrInfo    eqproc;
 
-               if (mostcommon)
-               {
+               fmgr_info(get_opcode(opid), &eqproc);
 
-                   /*
-                    * Constant is "=" to the most common value.  We know
-                    * selectivity exactly (or as exactly as VACUUM could
-                    * calculate it, anyway).
-                    */
-                   selec = commonfrac;
-               }
-               else
+               for (i = 0; i < nvalues; i++)
                {
-
-                   /*
-                    * Comparison is against a constant that is neither
-                    * the most common value nor null.  Its selectivity
-                    * cannot be more than this:
-                    */
-                   selec = 1.0 - commonfrac - nullfrac;
-                   if (selec > commonfrac)
-                       selec = commonfrac;
-
-                   /*
-                    * and in fact it's probably less, so we should apply
-                    * a fudge factor.  The only case where we don't is
-                    * for a boolean column, where indeed we have
-                    * estimated the less-common value's frequency
-                    * exactly!
-                    */
-                   if (typid != BOOLOID)
-                       selec *= NOT_MOST_COMMON_RATIO;
+                   /* be careful to apply operator right way 'round */
+                   if (flag & SEL_RIGHT)
+                       match = DatumGetBool(FunctionCall2(&eqproc,
+                                                          values[i],
+                                                          value));
+                   else
+                       match = DatumGetBool(FunctionCall2(&eqproc,
+                                                          value,
+                                                          values[i]));
+                   if (match)
+                       break;
                }
            }
            else
            {
+               /* no most-common-value info available */
+               values = NULL;
+               numbers = NULL;
+               i = nvalues = nnumbers = 0;
+           }
 
+           if (match)
+           {
+               /*
+                * Constant is "=" to this common value.  We know
+                * selectivity exactly (or as exactly as VACUUM
+                * could calculate it, anyway).
+                */
+               selec = numbers[i];
+           }
+           else
+           {
                /*
-                * Search is for a value that we do not know a priori, but
-                * we will assume it is not NULL.  Selectivity cannot be
-                * more than this:
+                * Comparison is against a constant that is neither
+                * NULL nor any of the common values.  Its selectivity
+                * cannot be more than this:
                 */
-               selec = 1.0 - nullfrac;
-               if (selec > commonfrac)
-                   selec = commonfrac;
+               double  sumcommon = 0.0;
+               double  otherdistinct;
 
+               for (i = 0; i < nnumbers; i++)
+                   sumcommon += numbers[i];
+               selec = 1.0 - sumcommon - stats->stanullfrac;
+               /*
+                * and in fact it's probably a good deal less.
+                * We approximate that all the not-common values
+                * share this remaining fraction equally, so we
+                * divide by the number of other distinct values.
+                */
+               otherdistinct = get_att_numdistinct(relid, attno,
+                                                   typid, stats)
+                   - nnumbers;
+               if (otherdistinct > 1)
+                   selec /= otherdistinct;
                /*
-                * and in fact it's probably less, so apply a fudge
-                * factor.
+                * Another cross-check: selectivity shouldn't be
+                * estimated as more than the least common
+                * "most common value".
                 */
-               selec *= NOT_MOST_COMMON_RATIO;
+               if (nnumbers > 0 && selec > numbers[nnumbers-1])
+                   selec = numbers[nnumbers-1];
            }
 
-           /* result should be in range, but make sure... */
-           if (selec < 0.0)
-               selec = 0.0;
-           else if (selec > 1.0)
-               selec = 1.0;
-
-           if (!typbyval)
-               pfree(DatumGetPointer(commonval));
+           free_attstatsslot(typid, values, nvalues, numbers, nnumbers);
        }
        else
        {
+           double      ndistinct;
 
            /*
-            * No VACUUM ANALYZE stats available, so make a guess using
-            * the dispersion stat (if we have that, which is unlikely for
-            * a normal attribute; but for a system attribute we may be
-            * able to estimate it).
+            * Search is for a value that we do not know a priori, but
+            * we will assume it is not NULL.  Estimate the selectivity
+            * as non-null fraction divided by number of distinct values,
+            * so that we get a result averaged over all possible values
+            * whether common or uncommon.  (Essentially, we are assuming
+            * that the not-yet-known comparison value is equally likely
+            * to be any of the possible values, regardless of their
+            * frequency in the table.  Is that a good idea?)
+            */
+           selec = 1.0 - stats->stanullfrac;
+           ndistinct = get_att_numdistinct(relid, attno, typid, stats);
+           if (ndistinct > 1)
+               selec /= ndistinct;
+           /*
+            * Cross-check: selectivity should never be
+            * estimated as more than the most common value's.
             */
-           selec = get_attdispersion(relid, attno, 0.01);
+           if (get_attstatsslot(statsTuple, typid, typmod,
+                                STATISTIC_KIND_MCV, InvalidOid,
+                                NULL, NULL,
+                                &numbers, &nnumbers))
+           {
+               if (nnumbers > 0 && selec > numbers[0])
+                   selec = numbers[0];
+               free_attstatsslot(typid, NULL, 0, numbers, nnumbers);
+           }
        }
 
-       result = (float8) selec;
+       ReleaseSysCache(statsTuple);
    }
-   PG_RETURN_FLOAT8(result);
+   else
+   {
+       /*
+        * No VACUUM ANALYZE stats available, so make a guess using
+        * estimated number of distinct values and assuming they are
+        * equally common.  (The guess is unlikely to be very good,
+        * but we do know a few special cases.)
+        */
+       selec = 1.0 / get_att_numdistinct(relid, attno, typid, NULL);
+   }
+
+   /* result should be in range, but make sure... */
+   if (selec < 0.0)
+       selec = 0.0;
+   else if (selec > 1.0)
+       selec = 1.0;
+
+   PG_RETURN_FLOAT8((float8) selec);
 }
 
 /*
@@ -301,117 +329,263 @@ scalarltsel(PG_FUNCTION_ARGS)
    AttrNumber  attno = PG_GETARG_INT16(2);
    Datum       value = PG_GETARG_DATUM(3);
    int32       flag = PG_GETARG_INT32(4);
-   float8      result;
+   bool        isgt;
+   HeapTuple   oprTuple;
+   HeapTuple   statsTuple;
+   Form_pg_statistic stats;
+   Oid         contype;
+   FmgrInfo    opproc;
+   Oid         typid;
+   int32       typmod;
+   Datum      *values;
+   int         nvalues;
+   float4     *numbers;
+   int         nnumbers;
+   double      mcv_selec,
+               hist_selec,
+               sumcommon;
+   double      selec;
+   int         i;
+
+   if (NONVALUE(relid) || NONVALUE(attno))
+       PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
+
+   /* Can't do anything useful if no constant to compare against, either */
+   if (!(flag & SEL_CONSTANT))
+       PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
 
-   if (!(flag & SEL_CONSTANT) || NONVALUE(attno) || NONVALUE(relid))
-       result = DEFAULT_INEQ_SEL;
+   /*
+    * Force the constant to be on the right to simplify later logic.
+    * This means that we may be dealing with either "<" or ">" cases.
+    */
+   if (flag & SEL_RIGHT)
+   {
+       /* we have x < const */
+       isgt = false;
+   }
    else
    {
-       HeapTuple   oprtuple;
-       Oid         ltype,
-                   rtype,
-                   contype;
-       Oid         typid;
-       int         typlen;
-       bool        typbyval;
-       int32       typmod;
-       Datum       hival,
-                   loval;
-       double      val,
-                   high,
-                   low,
-                   numerator,
-                   denominator;
-
-       /*
-        * Get left and right datatypes of the operator so we know what
-        * type the constant is.
-        */
-       oprtuple = SearchSysCache(OPEROID,
-                                 ObjectIdGetDatum(opid),
-                                 0, 0, 0);
-       if (!HeapTupleIsValid(oprtuple))
-           elog(ERROR, "scalarltsel: no tuple for operator %u", opid);
-       ltype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprleft;
-       rtype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprright;
-       contype = (flag & SEL_RIGHT) ? rtype : ltype;
-       ReleaseSysCache(oprtuple);
-
-       /* Now get info and stats about the attribute */
-       getattproperties(relid, attno,
-                        &typid, &typlen, &typbyval, &typmod);
-
-       if (!getattstatistics(relid, attno, typid, typmod,
-                             NULL, NULL, NULL,
-                             &loval, &hival))
+       /* we have const < x, commute to make x > const */
+       opid = get_commutator(opid);
+       if (!opid)
        {
-           /* no stats available, so default result */
+           /* Use default selectivity (should we raise an error instead?) */
            PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
        }
+       isgt = true;
+   }
 
-       /* Convert the values to a uniform comparison scale. */
-       if (!convert_to_scalar(value, contype, &val,
-                              loval, hival, typid,
-                              &low, &high))
-       {
+   /*
+    * The constant might not be the same datatype as the column;
+    * look at the operator's input types to find out what it is.
+    * Also set up to be able to call the operator's execution proc.
+    */
+   oprTuple = SearchSysCache(OPEROID,
+                             ObjectIdGetDatum(opid),
+                             0, 0, 0);
+   if (!HeapTupleIsValid(oprTuple))
+       elog(ERROR, "scalarltsel: no tuple for operator %u", opid);
+   contype = ((Form_pg_operator) GETSTRUCT(oprTuple))->oprright;
+   fmgr_info(((Form_pg_operator) GETSTRUCT(oprTuple))->oprcode, &opproc);
+   ReleaseSysCache(oprTuple);
+
+   /* Now get info and stats about the attribute */
+   getattproperties(relid, attno, &typid, &typmod);
+
+   statsTuple = SearchSysCache(STATRELATT,
+                               ObjectIdGetDatum(relid),
+                               Int16GetDatum(attno),
+                               0, 0);
+   if (!HeapTupleIsValid(statsTuple))
+   {
+       /* no stats available, so default result */
+       PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
+   }
+   stats = (Form_pg_statistic) GETSTRUCT(statsTuple);
 
-           /*
-            * Ideally we'd produce an error here, on the grounds that the
-            * given operator shouldn't have scalarltsel registered as its
-            * selectivity func unless we can deal with its operand types.
-            * But currently, all manner of stuff is invoking scalarltsel,
-            * so give a default estimate until that can be fixed.
-            */
-           if (!typbyval)
-           {
-               pfree(DatumGetPointer(hival));
-               pfree(DatumGetPointer(loval));
-           }
-           PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
-       }
+   /*
+    * If we have most-common-values info, add up the fractions of the
+    * MCV entries that satisfy MCV OP CONST.  These fractions contribute
+    * directly to the result selectivity.  Also add up the total fraction
+    * represented by MCV entries.
+    */
+   mcv_selec = 0.0;
+   sumcommon = 0.0;
 
-       /* release temp storage if needed */
-       if (!typbyval)
+   if (get_attstatsslot(statsTuple, typid, typmod,
+                        STATISTIC_KIND_MCV, InvalidOid,
+                        &values, &nvalues,
+                        &numbers, &nnumbers))
+   {
+       for (i = 0; i < nvalues; i++)
        {
-           pfree(DatumGetPointer(hival));
-           pfree(DatumGetPointer(loval));
+           if (DatumGetBool(FunctionCall2(&opproc,
+                                          values[i],
+                                          value)))
+               mcv_selec += numbers[i];
+           sumcommon += numbers[i];
        }
+       free_attstatsslot(typid, values, nvalues, numbers, nnumbers);
+   }
+
+   /*
+    * If there is a histogram, determine which bin the constant falls in,
+    * and compute the resulting contribution to selectivity.
+    *
+    * Someday, VACUUM might store more than one histogram per rel/att,
+    * corresponding to more than one possible sort ordering defined for
+    * the column type.  However, to make that work we will need to figure
+    * out which staop to search for --- it's not necessarily the one we
+    * have at hand!  (For example, we might have a '<=' operator rather
+    * than the '<' operator that will appear in staop.)  For now, assume
+    * that whatever appears in pg_statistic is sorted the same way our
+    * operator sorts.
+    */
+   hist_selec = 0.0;
 
-       if (high <= low)
+   if (get_attstatsslot(statsTuple, typid, typmod,
+                        STATISTIC_KIND_HISTOGRAM, InvalidOid,
+                        &values, &nvalues,
+                        NULL, NULL))
+   {
+       if (nvalues > 1)
        {
+           double  histfrac;
+           bool    ltcmp;
+
+           ltcmp = DatumGetBool(FunctionCall2(&opproc,
+                                              values[0],
+                                              value));
+           if (isgt)
+               ltcmp = !ltcmp;
+           if (!ltcmp)
+           {
+               /* Constant is below lower histogram boundary. */
+               histfrac = 0.0;
+           }
+           else
+           {
+               /*
+                * Scan to find proper location.  This could be made faster
+                * by using a binary-search method, but it's probably not
+                * worth the trouble for typical histogram sizes.
+                */
+               for (i = 1; i < nvalues; i++)
+               {
+                   ltcmp = DatumGetBool(FunctionCall2(&opproc,
+                                                      values[i],
+                                                      value));
+                   if (isgt)
+                       ltcmp = !ltcmp;
+                   if (!ltcmp)
+                       break;
+               }
+               if (i >= nvalues)
+               {
+                   /* Constant is above upper histogram boundary. */
+                   histfrac = 1.0;
+               }
+               else
+               {
+                   double      val,
+                               high,
+                               low;
+                   double      binfrac;
 
+                   /*
+                    * We have values[i-1] < constant < values[i].
+                    *
+                    * Convert the constant and the two nearest bin boundary
+                    * values to a uniform comparison scale, and do a linear
+                    * interpolation within this bin.
+                    */
+                   if (convert_to_scalar(value, contype, &val,
+                                         values[i-1], values[i], typid,
+                                         &low, &high))
+                   {
+                       if (high <= low)
+                       {
+                           /* cope if bin boundaries appear identical */
+                           binfrac = 0.5;
+                       }
+                       else if (val <= low)
+                           binfrac = 0.0;
+                       else if (val >= high)
+                           binfrac = 1.0;
+                       else
+                           binfrac = (val - low) / (high - low);
+                   }
+                   else
+                   {
+                       /*
+                        * Ideally we'd produce an error here, on the grounds
+                        * that the given operator shouldn't have scalarltsel
+                        * registered as its selectivity func unless we can
+                        * deal with its operand types.  But currently, all
+                        * manner of stuff is invoking scalarltsel, so give a
+                        * default estimate until that can be fixed.
+                        */
+                       binfrac = 0.5;
+                   }
+                   /*
+                    * Now, compute the overall selectivity across the values
+                    * represented by the histogram.  We have i-1 full bins
+                    * and binfrac partial bin below the constant.
+                    */
+                   histfrac = (double) (i-1) + binfrac;
+                   histfrac /= (double) (nvalues - 1);
+               }
+           }
            /*
-            * If we trusted the stats fully, we could return a small or
-            * large selec depending on which side of the single data
-            * point the constant is on.  But it seems better to assume
-            * that the stats are wrong and return a default...
+            * Now histfrac = fraction of histogram entries below the constant.
+            *
+            * Account for "<" vs ">"
             */
-           result = DEFAULT_INEQ_SEL;
-       }
-       else if (val < low || val > high)
-       {
-
+           hist_selec = isgt ? (1.0 - histfrac) : histfrac;
            /*
-            * If given value is outside the statistical range, return a
-            * small or large value; but not 0.0/1.0 since there is a
-            * chance the stats are out of date.
+            * The histogram boundaries are only approximate to begin
+            * with, and may well be out of date anyway.  Therefore,
+            * don't believe extremely small or large selectivity
+            * estimates.
             */
-           if (flag & SEL_RIGHT)
-               result = (val < low) ? 0.001 : 0.999;
-           else
-               result = (val < low) ? 0.999 : 0.001;
-       }
-       else
-       {
-           denominator = high - low;
-           if (flag & SEL_RIGHT)
-               numerator = val - low;
-           else
-               numerator = high - val;
-           result = numerator / denominator;
+           if (hist_selec < 0.001)
+               hist_selec = 0.001;
+           else if (hist_selec > 0.999)
+               hist_selec = 0.999;
        }
+
+       free_attstatsslot(typid, values, nvalues, NULL, 0);
    }
-   PG_RETURN_FLOAT8(result);
+
+   /*
+    * Now merge the results from the MCV and histogram calculations,
+    * realizing that the histogram covers only the non-null values that
+    * are not listed in MCV.
+    */
+   selec = 1.0 - stats->stanullfrac - sumcommon;
+
+   if (hist_selec > 0.0)
+       selec *= hist_selec;
+   else
+   {
+       /*
+        * If no histogram but there are values not accounted for by MCV,
+        * arbitrarily assume half of them will match.
+        */
+       selec *= 0.5;
+   }
+
+   selec += mcv_selec;
+
+   ReleaseSysCache(statsTuple);
+
+   /* result should be in range, but make sure... */
+   if (selec < 0.0)
+       selec = 0.0;
+   else if (selec > 1.0)
+       selec = 1.0;
+
+   PG_RETURN_FLOAT8((float8) selec);
 }
 
 /*
@@ -428,34 +602,25 @@ scalargtsel(PG_FUNCTION_ARGS)
    Datum       value = PG_GETARG_DATUM(3);
    int32       flag = PG_GETARG_INT32(4);
    Oid         ltopid;
-   float8      result;
 
    /*
-    * Compute selectivity of "<", then invert --- but only if we were
-    * able to produce a non-default estimate.  Note that we get the
-    * negator which strictly speaking means we are looking at "<=" for
-    * ">" or "<" for ">=".  We assume this won't matter.
+    * Commute so that we have a "<" or "<=" operator, then apply
+    * scalarltsel.
     */
-   ltopid = get_negator(opid);
-   if (ltopid)
-   {
-       result = DatumGetFloat8(DirectFunctionCall5(scalarltsel,
-                                               ObjectIdGetDatum(ltopid),
-                                                ObjectIdGetDatum(relid),
-                                                   Int16GetDatum(attno),
-                                                   value,
-                                                   Int32GetDatum(flag)));
-   }
-   else
+   ltopid = get_commutator(opid);
+   if (!ltopid)
    {
        /* Use default selectivity (should we raise an error instead?) */
-       result = DEFAULT_INEQ_SEL;
+       PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
    }
 
-   if (result != DEFAULT_INEQ_SEL)
-       result = 1.0 - result;
-
-   PG_RETURN_FLOAT8(result);
+   flag ^= SEL_RIGHT;
+   return DirectFunctionCall5(scalarltsel,
+                              ObjectIdGetDatum(ltopid),
+                              ObjectIdGetDatum(relid),
+                              Int16GetDatum(attno),
+                              value,
+                              Int32GetDatum(flag));
 }
 
 /*
@@ -476,7 +641,7 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype)
        result = DEFAULT_MATCH_SEL;
    else
    {
-       HeapTuple   oprtuple;
+       HeapTuple   oprTuple;
        Oid         ltype,
                    rtype;
        char       *patt;
@@ -488,14 +653,14 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype)
         * Get left and right datatypes of the operator so we know what
         * type the attribute is.
         */
-       oprtuple = SearchSysCache(OPEROID,
+       oprTuple = SearchSysCache(OPEROID,
                                  ObjectIdGetDatum(opid),
                                  0, 0, 0);
-       if (!HeapTupleIsValid(oprtuple))
+       if (!HeapTupleIsValid(oprTuple))
            elog(ERROR, "patternsel: no tuple for operator %u", opid);
-       ltype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprleft;
-       rtype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprright;
-       ReleaseSysCache(oprtuple);
+       ltype = ((Form_pg_operator) GETSTRUCT(oprTuple))->oprleft;
+       rtype = ((Form_pg_operator) GETSTRUCT(oprTuple))->oprright;
+       ReleaseSysCache(oprTuple);
 
        /* the right-hand const is type text for all supported operators */
        Assert(rtype == TEXTOID);
@@ -659,42 +824,88 @@ eqjoinsel(PG_FUNCTION_ARGS)
    AttrNumber  attno1 = PG_GETARG_INT16(2);
    Oid         relid2 = PG_GETARG_OID(3);
    AttrNumber  attno2 = PG_GETARG_INT16(4);
-   float8      result;
-   float8      num1,
-               num2,
-               min;
    bool        unknown1 = NONVALUE(relid1) || NONVALUE(attno1);
    bool        unknown2 = NONVALUE(relid2) || NONVALUE(attno2);
+   double      selec;
 
    if (unknown1 && unknown2)
-       result = DEFAULT_EQ_SEL;
+       selec = DEFAULT_EQ_SEL;
    else
    {
-       num1 = unknown1 ? 1.0 : get_attdispersion(relid1, attno1, 0.01);
-       num2 = unknown2 ? 1.0 : get_attdispersion(relid2, attno2, 0.01);
+       Oid         typid1;
+       Oid         typid2;
+       int32       typmod1;
+       int32       typmod2;
+       HeapTuple   statsTuple1 = NULL;
+       HeapTuple   statsTuple2 = NULL;
+       Form_pg_statistic stats1 = NULL;
+       Form_pg_statistic stats2 = NULL;
+       double      nd1,
+                   nd2;
+
+       if (unknown1)
+       {
+           nd1 = 100.0;
+       }
+       else
+       {
+           /* get info about the attribute */
+           getattproperties(relid1, attno1, &typid1, &typmod1);
+
+           /* get stats for the attribute, if available */
+           statsTuple1 = SearchSysCache(STATRELATT,
+                                        ObjectIdGetDatum(relid1),
+                                        Int16GetDatum(attno1),
+                                        0, 0);
+           if (HeapTupleIsValid(statsTuple1))
+               stats1 = (Form_pg_statistic) GETSTRUCT(statsTuple1);
+
+           nd1 = get_att_numdistinct(relid1, attno1, typid1, stats1);
+       }
+
+       if (unknown2)
+       {
+           nd2 = 100.0;
+       }
+       else
+       {
+           /* get info about the attribute */
+           getattproperties(relid2, attno2, &typid2, &typmod2);
+
+           /* get stats for the attribute, if available */
+           statsTuple2 = SearchSysCache(STATRELATT,
+                                        ObjectIdGetDatum(relid2),
+                                        Int16GetDatum(attno2),
+                                        0, 0);
+           if (HeapTupleIsValid(statsTuple2))
+               stats2 = (Form_pg_statistic) GETSTRUCT(statsTuple2);
+
+           nd2 = get_att_numdistinct(relid2, attno2, typid2, stats2);
+       }
 
        /*
-        * The join selectivity cannot be more than num2, since each tuple
-        * in table 1 could match no more than num2 fraction of tuples in
-        * table 2 (and that's only if the table-1 tuple matches the most
-        * common value in table 2, so probably it's less).  By the same
-        * reasoning it is not more than num1. The min is therefore an
-        * upper bound.
+        * Estimate the join selectivity as 1 / sqrt(nd1*nd2)
+        * (can we produce any theory for this)?
         *
-        * If we know the dispersion of only one side, use it; the reasoning
-        * above still works.
+        * XXX possibility to do better: if both attributes have histograms
+        * then we could determine the exact join selectivity between the
+        * MCV sets, and only have to assume the join behavior of the non-MCV
+        * values.  This could be a big win when the MCVs cover a large part
+        * of the population.
         *
-        * XXX can we make a better estimate here?  Using the nullfrac
-        * statistic might be helpful, for example.  Assuming the operator
-        * is strict (does not succeed for null inputs) then the
-        * selectivity couldn't be more than (1-nullfrac1)*(1-nullfrac2),
-        * which might be usefully small if there are many nulls.  How
-        * about applying the operator to the most common values?
+        * XXX what about nulls?
         */
-       min = (num1 < num2) ? num1 : num2;
-       result = min;
+       selec = 1.0 / sqrt(nd1 * nd2);
+       if (selec > 1.0)
+           selec = 1.0;
+
+       if (HeapTupleIsValid(statsTuple1))
+           ReleaseSysCache(statsTuple1);
+       if (HeapTupleIsValid(statsTuple2))
+           ReleaseSysCache(statsTuple2);
+
    }
-   PG_RETURN_FLOAT8(result);
+   PG_RETURN_FLOAT8((float8) selec);
 }
 
 /*
@@ -829,7 +1040,8 @@ icnlikejoinsel(PG_FUNCTION_ARGS)
  *   Returns "true" if successful.
  *
  * All numeric datatypes are simply converted to their equivalent
- * "double" values.
+ * "double" values.  XXX what about NUMERIC values that are outside
+ * the range of "double"?
  *
  * String datatypes are converted by convert_string_to_scalar(),
  * which is explained below.  The reason why this routine deals with
@@ -917,7 +1129,7 @@ convert_numeric_to_scalar(Datum value, Oid typid)
 {
    switch (typid)
    {
-           case BOOLOID:
+       case BOOLOID:
            return (double) DatumGetBool(value);
        case INT2OID:
            return (double) DatumGetInt16(value);
@@ -963,6 +1175,8 @@ convert_numeric_to_scalar(Datum value, Oid typid)
  * three strings before computing the scaled values.  This allows us to
  * "zoom in" when we encounter a narrow data range.  An example is a phone
  * number database where all the values begin with the same area code.
+ * (Actually, the bounds will be adjacent histogram-bin-boundary values,
+ * so this is more likely to happen than you might think.)
  */
 static void
 convert_string_to_scalar(unsigned char *value,
@@ -1208,11 +1422,11 @@ convert_timevalue_to_scalar(Datum value, Oid typid)
 /*
  * getattproperties
  *   Retrieve pg_attribute properties for an attribute,
- *   including type OID, type len, type byval flag, typmod.
+ *   including type OID and typmod.
  */
 static void
 getattproperties(Oid relid, AttrNumber attnum,
-                Oid *typid, int *typlen, bool *typbyval, int32 *typmod)
+                Oid *typid, int32 *typmod)
 {
    HeapTuple   atp;
    Form_pg_attribute att_tup;
@@ -1227,164 +1441,87 @@ getattproperties(Oid relid, AttrNumber attnum,
    att_tup = (Form_pg_attribute) GETSTRUCT(atp);
 
    *typid = att_tup->atttypid;
-   *typlen = att_tup->attlen;
-   *typbyval = att_tup->attbyval;
    *typmod = att_tup->atttypmod;
 
    ReleaseSysCache(atp);
 }
 
 /*
- * getattstatistics
- *   Retrieve the pg_statistic data for an attribute.
- *   Returns 'false' if no stats are available.
+ * get_att_numdistinct
  *
- * Inputs:
- * 'relid' and 'attnum' are the relation and attribute number.
- * 'typid' and 'typmod' are the type and typmod of the column,
- * which the caller must already have looked up.
+ *   Estimate the number of distinct values of an attribute.
  *
- * Outputs:
- * The available stats are nullfrac, commonfrac, commonval, loval, hival.
- * The caller need not retrieve all five --- pass NULL pointers for the
- * unwanted values.
+ * relid, attnum: identify the attribute to examine.
+ * typid: type of attribute.
+ * stats: pg_statistic tuple for attribute, or NULL if not available.
  *
- * commonval, loval, hival are returned as Datums holding the internal
- * representation of the values.  (Note that these should be pfree'd
- * after use if the data type is not by-value.)
+ * XXX possible future improvement: look to see if there is a unique
+ * index on the attribute.  If so, we can estimate ndistinct = ntuples.
+ * This should probably override any info from pg_statistic.
  */
-static bool
-getattstatistics(Oid relid,
-                AttrNumber attnum,
-                Oid typid,
-                int32 typmod,
-                double *nullfrac,
-                double *commonfrac,
-                Datum *commonval,
-                Datum *loval,
-                Datum *hival)
+static double
+get_att_numdistinct(Oid relid, AttrNumber attnum, Oid typid,
+                   Form_pg_statistic stats)
 {
-   HeapTuple   tuple;
-   HeapTuple   typeTuple;
-   FmgrInfo    inputproc;
-   Oid         typelem;
-   bool        isnull;
+   HeapTuple   reltup;
+   double      ntuples;
 
    /*
-    * We assume that there will only be one entry in pg_statistic for the
-    * given rel/att, so we search WITHOUT considering the staop column.
-    * Someday, VACUUM might store more than one entry per rel/att,
-    * corresponding to more than one possible sort ordering defined for
-    * the column type.  However, to make that work we will need to figure
-    * out which staop to search for --- it's not necessarily the one we
-    * have at hand!  (For example, we might have a '>' operator rather
-    * than the '<' operator that will appear in staop.)
+    * Special-case boolean columns: presumably, two distinct values.
+    *
+    * Are there any other cases we should wire in special estimates for?
     */
-   tuple = SearchSysCache(STATRELID,
-                          ObjectIdGetDatum(relid),
-                          Int16GetDatum((int16) attnum),
-                          0, 0);
-   if (!HeapTupleIsValid(tuple))
-   {
-       /* no such stats entry */
-       return false;
-   }
+   if (typid == BOOLOID)
+       return 2.0;
 
-   if (nullfrac)
-       *nullfrac = ((Form_pg_statistic) GETSTRUCT(tuple))->stanullfrac;
-   if (commonfrac)
-       *commonfrac = ((Form_pg_statistic) GETSTRUCT(tuple))->stacommonfrac;
-
-   /* Get the type input proc for the column datatype */
-   typeTuple = SearchSysCache(TYPEOID,
-                              ObjectIdGetDatum(typid),
-                              0, 0, 0);
-   if (!HeapTupleIsValid(typeTuple))
-       elog(ERROR, "getattstatistics: Cache lookup failed for type %u",
-            typid);
-   fmgr_info(((Form_pg_type) GETSTRUCT(typeTuple))->typinput, &inputproc);
-   typelem = ((Form_pg_type) GETSTRUCT(typeTuple))->typelem;
-   ReleaseSysCache(typeTuple);
+   /*
+    * If VACUUM ANALYZE determined a fixed estimate, use it.
+    */
+   if (stats && stats->stadistinct > 0.0)
+       return stats->stadistinct;
 
    /*
-    * Values are variable-length fields, so cannot access as struct
-    * fields. Must do it the hard way with SysCacheGetAttr.
+    * Otherwise we need to get the relation size.
     */
-   if (commonval)
-   {
-       Datum       val = SysCacheGetAttr(STATRELID, tuple,
-                                         Anum_pg_statistic_stacommonval,
-                                         &isnull);
+   reltup = SearchSysCache(RELOID,
+                           ObjectIdGetDatum(relid),
+                           0, 0, 0);
+   if (!HeapTupleIsValid(reltup))
+       elog(ERROR, "get_att_numdistinct: no relation tuple %u", relid);
 
-       if (isnull)
-       {
-           elog(DEBUG, "getattstatistics: stacommonval is null");
-           *commonval = PointerGetDatum(NULL);
-       }
-       else
-       {
-           char       *strval = DatumGetCString(DirectFunctionCall1(textout,
-                                                                  val));
-
-           *commonval = FunctionCall3(&inputproc,
-                                      CStringGetDatum(strval),
-                                      ObjectIdGetDatum(typelem),
-                                      Int32GetDatum(typmod));
-           pfree(strval);
-       }
-   }
+   ntuples = ((Form_pg_class) GETSTRUCT(reltup))->reltuples;
 
-   if (loval)
-   {
-       Datum       val = SysCacheGetAttr(STATRELID, tuple,
-                                         Anum_pg_statistic_staloval,
-                                         &isnull);
+   ReleaseSysCache(reltup);
 
-       if (isnull)
-       {
-           elog(DEBUG, "getattstatistics: staloval is null");
-           *loval = PointerGetDatum(NULL);
-       }
-       else
-       {
-           char       *strval = DatumGetCString(DirectFunctionCall1(textout,
-                                                                  val));
-
-           *loval = FunctionCall3(&inputproc,
-                                  CStringGetDatum(strval),
-                                  ObjectIdGetDatum(typelem),
-                                  Int32GetDatum(typmod));
-           pfree(strval);
-       }
-   }
+   if (ntuples <= 0.0)
+       return 100.0;           /* no data available; return a default */
 
-   if (hival)
-   {
-       Datum       val = SysCacheGetAttr(STATRELID, tuple,
-                                         Anum_pg_statistic_stahival,
-                                         &isnull);
+   /*
+    * If VACUUM ANALYZE determined a scaled estimate, use it.
+    */
+   if (stats && stats->stadistinct < 0.0)
+       return - stats->stadistinct * ntuples;
 
-       if (isnull)
-       {
-           elog(DEBUG, "getattstatistics: stahival is null");
-           *hival = PointerGetDatum(NULL);
-       }
-       else
-       {
-           char       *strval = DatumGetCString(DirectFunctionCall1(textout,
-                                                                  val));
-
-           *hival = FunctionCall3(&inputproc,
-                                  CStringGetDatum(strval),
-                                  ObjectIdGetDatum(typelem),
-                                  Int32GetDatum(typmod));
-           pfree(strval);
-       }
+   /*
+    * VACUUM ANALYZE does not compute stats for system attributes,
+    * but some of them can reasonably be assumed unique anyway.
+    */
+   switch (attnum)
+   {
+       case ObjectIdAttributeNumber:
+       case SelfItemPointerAttributeNumber:
+           return ntuples;
+       case TableOidAttributeNumber:
+           return 1.0;
    }
 
-   ReleaseSysCache(tuple);
+   /*
+    * Estimate ndistinct = ntuples if the table is small, else 100.
+    */
+   if (ntuples < 100.0)
+       return ntuples;
 
-   return true;
+   return 100.0;
 }
 
 /*-------------------------------------------------------------------------


diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c

index 82d55866215aac34724aa44deb029feea9d94a76..3995de5d7a1325085c901b0d2427cbbd775170ee 100644 (file)


--- a/src/backend/utils/cache/lsyscache.c
+++ b/src/backend/utils/cache/lsyscache.c
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/utils/cache/lsyscache.c,v 1.52 2001/03/23 04:49:55 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/utils/cache/lsyscache.c,v 1.53 2001/05/07 00:43:24 tgl Exp $
  *
  * NOTES
  *   Eventually, the index information should go through here, too.
@@ -18,7 +18,10 @@
 #include "access/tupmacs.h"
 #include "catalog/pg_operator.h"
 #include "catalog/pg_proc.h"
+#include "catalog/pg_statistic.h"
 #include "catalog/pg_type.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
 #include "utils/lsyscache.h"
 #include "utils/syscache.h"
 
@@ -182,106 +185,6 @@ get_atttypmod(Oid relid, AttrNumber attnum)
        return -1;
 }
 
-/*
- * get_attdispersion
- *
- *   Retrieve the dispersion statistic for an attribute,
- *   or produce an estimate if no info is available.
- *
- * min_estimate is the minimum estimate to return if insufficient data
- * is available to produce a reliable value.  This value may vary
- * depending on context.  (For example, when deciding whether it is
- * safe to use a hashjoin, we want to be more conservative than when
- * estimating the number of tuples produced by an equijoin.)
- */
-double
-get_attdispersion(Oid relid, AttrNumber attnum, double min_estimate)
-{
-   HeapTuple   atp;
-   Form_pg_attribute att_tup;
-   double      dispersion;
-   Oid         atttypid;
-   int32       ntuples;
-
-   atp = SearchSysCache(ATTNUM,
-                        ObjectIdGetDatum(relid),
-                        Int16GetDatum(attnum),
-                        0, 0);
-   if (!HeapTupleIsValid(atp))
-   {
-       /* this should not happen */
-       elog(ERROR, "get_attdispersion: no attribute tuple %u %d",
-            relid, attnum);
-       return min_estimate;
-   }
-
-   att_tup = (Form_pg_attribute) GETSTRUCT(atp);
-
-   dispersion = att_tup->attdispersion;
-   atttypid = att_tup->atttypid;
-
-   ReleaseSysCache(atp);
-
-   if (dispersion > 0.0)
-       return dispersion;      /* we have a specific estimate from VACUUM */
-
-   /*
-    * Special-case boolean columns: the dispersion of a boolean is highly
-    * unlikely to be anywhere near 1/numtuples, instead it's probably
-    * more like 0.5.
-    *
-    * Are there any other cases we should wire in special estimates for?
-    */
-   if (atttypid == BOOLOID)
-       return 0.5;
-
-   /*
-    * Dispersion is either 0 (no data available) or -1 (dispersion is
-    * 1/numtuples).  Either way, we need the relation size.
-    */
-
-   atp = SearchSysCache(RELOID,
-                        ObjectIdGetDatum(relid),
-                        0, 0, 0);
-   if (!HeapTupleIsValid(atp))
-   {
-       /* this should not happen */
-       elog(ERROR, "get_attdispersion: no relation tuple %u", relid);
-       return min_estimate;
-   }
-
-   ntuples = ((Form_pg_class) GETSTRUCT(atp))->reltuples;
-
-   ReleaseSysCache(atp);
-
-   if (ntuples == 0)
-       return min_estimate;    /* no data available */
-
-   if (dispersion < 0.0)       /* VACUUM thinks there are no duplicates */
-       return 1.0 / (double) ntuples;
-
-   /*
-    * VACUUM ANALYZE does not compute dispersion for system attributes,
-    * but some of them can reasonably be assumed unique anyway.
-    */
-   if (attnum == ObjectIdAttributeNumber ||
-       attnum == SelfItemPointerAttributeNumber)
-       return 1.0 / (double) ntuples;
-   if (attnum == TableOidAttributeNumber)
-       return 1.0;
-
-   /*
-    * VACUUM ANALYZE has not been run for this table. Produce an estimate
-    * of 1/numtuples.  This may produce unreasonably small estimates for
-    * large tables, so limit the estimate to no less than min_estimate.
-    */
-   dispersion = 1.0 / (double) ntuples;
-   if (dispersion < min_estimate)
-       dispersion = min_estimate;
-
-   return dispersion;
-}
-
 /*             ---------- INDEX CACHE ----------                        */
 
 /*     watch this space...
@@ -876,3 +779,157 @@ get_typtype(Oid typid)
 }
 
 #endif
+
+/*             ---------- STATISTICS CACHE ----------                   */
+
+/*
+ * get_attstatsslot
+ *
+ *     Extract the contents of a "slot" of a pg_statistic tuple.
+ *     Returns TRUE if requested slot type was found, else FALSE.
+ *
+ * Unlike other routines in this file, this takes a pointer to an
+ * already-looked-up tuple in the pg_statistic cache.  We do this since
+ * most callers will want to extract more than one value from the cache
+ * entry, and we don't want to repeat the cache lookup unnecessarily.
+ *
+ * statstuple: pg_statistics tuple to be examined.
+ * atttype: type OID of attribute.
+ * atttypmod: typmod of attribute.
+ * reqkind: STAKIND code for desired statistics slot kind.
+ * reqop: STAOP value wanted, or InvalidOid if don't care.
+ * values, nvalues: if not NULL, the slot's stavalues are extracted.
+ * numbers, nnumbers: if not NULL, the slot's stanumbers are extracted.
+ *
+ * If assigned, values and numbers are set to point to palloc'd arrays.
+ * If the attribute type is pass-by-reference, the values referenced by
+ * the values array are themselves palloc'd.  The palloc'd stuff can be
+ * freed by calling free_attstatsslot.
+ */
+bool
+get_attstatsslot(HeapTuple statstuple,
+                Oid atttype, int32 atttypmod,
+                int reqkind, Oid reqop,
+                Datum **values, int *nvalues,
+                float4 **numbers, int *nnumbers)
+{
+   Form_pg_statistic stats = (Form_pg_statistic) GETSTRUCT(statstuple);
+   int         i,
+               j;
+   Datum       val;
+   bool        isnull;
+   ArrayType  *statarray;
+   int         narrayelem;
+   HeapTuple   typeTuple;
+   FmgrInfo    inputproc;
+   Oid         typelem;
+
+   for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
+   {
+       if ((&stats->stakind1)[i] == reqkind &&
+           (reqop == InvalidOid || (&stats->staop1)[i] == reqop))
+           break;
+   }
+   if (i >= STATISTIC_NUM_SLOTS)
+       return false;           /* not there */
+
+   if (values)
+   {
+       val = SysCacheGetAttr(STATRELATT, statstuple,
+                             Anum_pg_statistic_stavalues1 + i,
+                             &isnull);
+       if (isnull)
+           elog(ERROR, "get_attstatsslot: stavalues is null");
+       statarray = DatumGetArrayTypeP(val);
+       /*
+        * Do initial examination of the array.  This produces a list
+        * of text Datums --- ie, pointers into the text array value.
+        */
+       deconstruct_array(statarray, false, -1, 'i', values, nvalues);
+       narrayelem = *nvalues;
+       /*
+        * We now need to replace each text Datum by its internal equivalent.
+        *
+        * Get the type input proc and typelem for the column datatype.
+        */
+       typeTuple = SearchSysCache(TYPEOID,
+                                  ObjectIdGetDatum(atttype),
+                                  0, 0, 0);
+       if (!HeapTupleIsValid(typeTuple))
+           elog(ERROR, "get_attstatsslot: Cache lookup failed for type %u",
+                atttype);
+       fmgr_info(((Form_pg_type) GETSTRUCT(typeTuple))->typinput, &inputproc);
+       typelem = ((Form_pg_type) GETSTRUCT(typeTuple))->typelem;
+       ReleaseSysCache(typeTuple);
+       /*
+        * Do the conversions.  The palloc'd array of Datums is reused
+        * in place.
+        */
+       for (j = 0; j < narrayelem; j++)
+       {
+           char       *strval;
+
+           strval = DatumGetCString(DirectFunctionCall1(textout,
+                                                        (*values)[j]));
+           (*values)[j] = FunctionCall3(&inputproc,
+                                        CStringGetDatum(strval),
+                                        ObjectIdGetDatum(typelem),
+                                        Int32GetDatum(atttypmod));
+           pfree(strval);
+       }
+       /*
+        * Free statarray if it's a detoasted copy.
+        */
+       if ((Pointer) statarray != DatumGetPointer(val))
+           pfree(statarray);
+   }
+
+   if (numbers)
+   {
+       val = SysCacheGetAttr(STATRELATT, statstuple,
+                             Anum_pg_statistic_stanumbers1 + i,
+                             &isnull);
+       if (isnull)
+           elog(ERROR, "get_attstatsslot: stanumbers is null");
+       statarray = DatumGetArrayTypeP(val);
+       /*
+        * We expect the array to be a 1-D float4 array; verify that.
+        * We don't need to use deconstruct_array() since the array
+        * data is just going to look like a C array of float4 values.
+        */
+       narrayelem = ARR_DIMS(statarray)[0];
+       if (ARR_NDIM(statarray) != 1 || narrayelem <= 0 ||
+           ARR_SIZE(statarray) != (ARR_OVERHEAD(1) + narrayelem * sizeof(float4)))
+           elog(ERROR, "get_attstatsslot: stanumbers is bogus");
+       *numbers = (float4 *) palloc(narrayelem * sizeof(float4));
+       memcpy(*numbers, ARR_DATA_PTR(statarray), narrayelem * sizeof(float4));
+       *nnumbers = narrayelem;
+       /*
+        * Free statarray if it's a detoasted copy.
+        */
+       if ((Pointer) statarray != DatumGetPointer(val))
+           pfree(statarray);
+   }
+
+   return true;
+}
+
+void
+free_attstatsslot(Oid atttype,
+                 Datum *values, int nvalues,
+                 float4 *numbers, int nnumbers)
+{
+   if (values)
+   {
+       if (! get_typbyval(atttype))
+       {
+           int     i;
+
+           for (i = 0; i < nvalues; i++)
+               pfree(DatumGetPointer(values[i]));
+       }
+       pfree(values);
+   }
+   if (numbers)
+       pfree(numbers);
+}


diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c

index 75ef3179202695a3fb7a5336b7bc4f3e24d3f3f5..4e35b3fb35ba67aa78d337e6bdb39149c6256f8c 100644 (file)


--- a/src/backend/utils/cache/syscache.c
+++ b/src/backend/utils/cache/syscache.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/utils/cache/syscache.c,v 1.60 2001/03/22 03:59:57 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/utils/cache/syscache.c,v 1.61 2001/05/07 00:43:24 tgl Exp $
  *
  * NOTES
  *   These routines allow the parser/planner/executor to perform
@@ -313,7 +313,7 @@ static struct cachedesc cacheinfo[] = {
            0,
            0
    }},
-   {StatisticRelationName,     /* STATRELID */
+   {StatisticRelationName,     /* STATRELATT */
        StatisticRelidAttnumIndex,
        2,
        {


diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c

index d27bfb29668711e985f1ba29bd1285ab77201bf2..5a77c47c20085f0d24ae5b8edb6ef2ca70acdc27 100644 (file)


--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -78,7 +78,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/utils/sort/tuplesort.c,v 1.15 2001/03/23 04:49:55 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/utils/sort/tuplesort.c,v 1.16 2001/05/07 00:43:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -87,7 +87,11 @@
 
 #include "access/heapam.h"
 #include "access/nbtree.h"
+#include "catalog/catname.h"
+#include "catalog/pg_amop.h"
+#include "catalog/pg_amproc.h"
 #include "miscadmin.h"
+#include "utils/fmgroids.h"
 #include "utils/logtape.h"
 #include "utils/lsyscache.h"
 #include "utils/tuplesort.h"
@@ -263,6 +267,7 @@ struct Tuplesortstate
    TupleDesc   tupDesc;
    int         nKeys;
    ScanKey     scanKeys;
+   SortFunctionKind *sortFnKinds;
 
    /*
     * These variables are specific to the IndexTuple case; they are set
@@ -279,6 +284,7 @@ struct Tuplesortstate
    Oid         datumType;
    Oid         sortOperator;
    FmgrInfo    sortOpFn;       /* cached lookup data for sortOperator */
+   SortFunctionKind sortFnKind;
    /* we need typelen and byval in order to know how to copy the Datums. */
    int         datumTypeLen;
    bool        datumTypeByVal;
@@ -458,14 +464,14 @@ tuplesort_begin_common(bool randomAccess)
 
 Tuplesortstate *
 tuplesort_begin_heap(TupleDesc tupDesc,
-                    int nkeys, ScanKey keys,
+                    int nkeys,
+                    Oid *sortOperators, AttrNumber *attNums,
                     bool randomAccess)
 {
    Tuplesortstate *state = tuplesort_begin_common(randomAccess);
+   int         i;
 
-   AssertArg(nkeys >= 1);
-   AssertArg(keys[0].sk_attno != 0);
-   AssertArg(keys[0].sk_procedure != 0);
+   AssertArg(nkeys > 0);
 
    state->comparetup = comparetup_heap;
    state->copytup = copytup_heap;
@@ -475,7 +481,29 @@ tuplesort_begin_heap(TupleDesc tupDesc,
 
    state->tupDesc = tupDesc;
    state->nKeys = nkeys;
-   state->scanKeys = keys;
+   state->scanKeys = (ScanKey) palloc(nkeys * sizeof(ScanKeyData));
+   MemSet(state->scanKeys, 0, nkeys * sizeof(ScanKeyData));
+   state->sortFnKinds = (SortFunctionKind *)
+       palloc(nkeys * sizeof(SortFunctionKind));
+   MemSet(state->sortFnKinds, 0, nkeys * sizeof(SortFunctionKind));
+
+   for (i = 0; i < nkeys; i++)
+   {
+       RegProcedure sortFunction;
+
+       AssertArg(sortOperators[i] != 0);
+       AssertArg(attNums[i] != 0);
+
+       /* select a function that implements the sort operator */
+       SelectSortFunction(sortOperators[i], &sortFunction,
+                          &state->sortFnKinds[i]);
+
+       ScanKeyEntryInitialize(&state->scanKeys[i],
+                              0x0,
+                              attNums[i],
+                              sortFunction,
+                              (Datum) 0);
+   }
 
    return state;
 }
@@ -507,6 +535,7 @@ tuplesort_begin_datum(Oid datumType,
                      bool randomAccess)
 {
    Tuplesortstate *state = tuplesort_begin_common(randomAccess);
+   RegProcedure sortFunction;
    int16       typlen;
    bool        typbyval;
 
@@ -518,8 +547,12 @@ tuplesort_begin_datum(Oid datumType,
 
    state->datumType = datumType;
    state->sortOperator = sortOperator;
-   /* lookup the function that implements the sort operator */
-   fmgr_info(get_opcode(sortOperator), &state->sortOpFn);
+
+   /* select a function that implements the sort operator */
+   SelectSortFunction(sortOperator, &sortFunction, &state->sortFnKind);
+   /* and look up the function */
+   fmgr_info(sortFunction, &state->sortOpFn);
+
    /* lookup necessary attributes of the datum type */
    get_typlenbyval(datumType, &typlen, &typbyval);
    state->datumTypeLen = typlen;
@@ -548,6 +581,13 @@ tuplesort_end(Tuplesortstate *state)
    }
    if (state->memtupindex)
        pfree(state->memtupindex);
+
+   /* this stuff might better belong in a variant-specific shutdown routine */
+   if (state->scanKeys)
+       pfree(state->scanKeys);
+   if (state->sortFnKinds)
+       pfree(state->sortFnKinds);
+
    pfree(state);
 }
 
@@ -1692,6 +1732,7 @@ comparetup_heap(Tuplesortstate *state, const void *a, const void *b)
    for (nkey = 0; nkey < state->nKeys; nkey++)
    {
        ScanKey     scanKey = state->scanKeys + nkey;
+       SortFunctionKind fnKind = state->sortFnKinds[nkey];
        AttrNumber  attno = scanKey->sk_attno;
        Datum       lattr,
                    rattr;
@@ -1708,23 +1749,36 @@ comparetup_heap(Tuplesortstate *state, const void *a, const void *b)
        }
        else if (isnull2)
            return -1;
-       else if (scanKey->sk_flags & SK_COMMUTE)
-       {
-           if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
-                                          rattr, lattr)))
-               return -1;      /* a < b after commute */
-           if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
-                                          lattr, rattr)))
-               return 1;       /* a > b after commute */
-       }
        else
        {
-           if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
-                                          lattr, rattr)))
-               return -1;      /* a < b */
-           if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
-                                          rattr, lattr)))
-               return 1;       /* a > b */
+           int32       compare;
+
+           if (fnKind == SORTFUNC_LT)
+           {
+               if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
+                                              lattr, rattr)))
+                   compare = -1;   /* a < b */
+               else if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
+                                                   rattr, lattr)))
+                   compare = 1;    /* a > b */
+               else
+                   compare = 0;
+           }
+           else
+           {
+               /* sort function is CMP or REVCMP */
+               compare = DatumGetInt32(FunctionCall2(&scanKey->sk_func,
+                                                     lattr, rattr));
+               if (fnKind == SORTFUNC_REVCMP)
+                   compare = -compare;
+           }
+
+           if (compare != 0)
+           {
+               if (scanKey->sk_flags & SK_COMMUTE)
+                   compare = -compare;
+               return compare;
+           }
        }
    }
 
@@ -1852,8 +1906,10 @@ comparetup_index(Tuplesortstate *state, const void *a, const void *b)
        }
        else
        {
+           /* the comparison function is always of CMP type */
            compare = DatumGetInt32(FunctionCall2(&entry->sk_func,
-                                               attrDatum1, attrDatum2));
+                                                 attrDatum1,
+                                                 attrDatum2));
        }
 
        if (compare != 0)
@@ -1954,7 +2010,7 @@ comparetup_datum(Tuplesortstate *state, const void *a, const void *b)
    }
    else if (rtup->isNull)
        return -1;
-   else
+   else if (state->sortFnKind == SORTFUNC_LT)
    {
        if (DatumGetBool(FunctionCall2(&state->sortOpFn,
                                       ltup->val, rtup->val)))
@@ -1964,6 +2020,17 @@ comparetup_datum(Tuplesortstate *state, const void *a, const void *b)
            return 1;           /* a > b */
        return 0;
    }
+   else
+   {
+       /* sort function is CMP or REVCMP */
+       int32   compare;
+
+       compare = DatumGetInt32(FunctionCall2(&state->sortOpFn,
+                                             ltup->val, rtup->val));
+       if (state->sortFnKind == SORTFUNC_REVCMP)
+           compare = -compare;
+       return compare;
+   }
 }
 
 static void *
@@ -2032,3 +2099,119 @@ tuplesize_datum(Tuplesortstate *state, void *tup)
        return (unsigned int) tuplelen;
    }
 }
+
+
+/*
+ * This routine selects an appropriate sorting function to implement
+ * a sort operator as efficiently as possible.  The straightforward
+ * method is to use the operator's implementation proc --- ie, "<"
+ * comparison.  However, that way often requires two calls of the function
+ * per comparison.  If we can find a btree three-way comparator function
+ * associated with the operator, we can use it to do the comparisons
+ * more efficiently.  We also support the possibility that the operator
+ * is ">" (descending sort), in which case we have to reverse the output
+ * of the btree comparator.
+ *
+ * Possibly this should live somewhere else (backend/catalog/, maybe?).
+ */
+void
+SelectSortFunction(Oid sortOperator,
+                  RegProcedure *sortFunction,
+                  SortFunctionKind *kind)
+{
+   Relation    relation;
+   HeapScanDesc scan;
+   ScanKeyData skey[3];
+   HeapTuple   tuple;
+   Oid         opclass = InvalidOid;
+
+   /*
+    * Scan pg_amop to see if the target operator is registered as the
+    * "<" or ">" operator of any btree opclass.  It's possible that it
+    * might be registered both ways (eg, if someone were to build a
+    * "reverse sort" opclass for some reason); prefer the "<" case if so.
+    * If the operator is registered the same way in multiple opclasses,
+    * assume we can use the associated comparator function from any one.
+    */
+   relation = heap_openr(AccessMethodOperatorRelationName,
+                         AccessShareLock);
+
+   ScanKeyEntryInitialize(&skey[0], 0,
+                          Anum_pg_amop_amopid,
+                          F_OIDEQ,
+                          ObjectIdGetDatum(BTREE_AM_OID));
+
+   ScanKeyEntryInitialize(&skey[1], 0,
+                          Anum_pg_amop_amopopr,
+                          F_OIDEQ,
+                          ObjectIdGetDatum(sortOperator));
+
+   scan = heap_beginscan(relation, false, SnapshotNow, 2, skey);
+
+   while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
+   {
+       Form_pg_amop aform = (Form_pg_amop) GETSTRUCT(tuple);
+
+       if (aform->amopstrategy == BTLessStrategyNumber)
+       {
+           opclass = aform->amopclaid;
+           *kind = SORTFUNC_CMP;
+           break;              /* done looking */
+       }
+       else if (aform->amopstrategy == BTGreaterStrategyNumber)
+       {
+           opclass = aform->amopclaid;
+           *kind = SORTFUNC_REVCMP;
+           /* keep scanning in hopes of finding a BTLess entry */
+       }
+   }
+
+   heap_endscan(scan);
+   heap_close(relation, AccessShareLock);
+
+   if (OidIsValid(opclass))
+   {
+       /* Found a suitable opclass, get its comparator support function */
+       relation = heap_openr(AccessMethodProcedureRelationName,
+                             AccessShareLock);
+
+       ScanKeyEntryInitialize(&skey[0], 0,
+                              Anum_pg_amproc_amid,
+                              F_OIDEQ,
+                              ObjectIdGetDatum(BTREE_AM_OID));
+
+       ScanKeyEntryInitialize(&skey[1], 0,
+                              Anum_pg_amproc_amopclaid,
+                              F_OIDEQ,
+                              ObjectIdGetDatum(opclass));
+
+       ScanKeyEntryInitialize(&skey[2], 0,
+                              Anum_pg_amproc_amprocnum,
+                              F_INT2EQ,
+                              Int16GetDatum(BTORDER_PROC));
+
+       scan = heap_beginscan(relation, false, SnapshotNow, 3, skey);
+
+       *sortFunction = InvalidOid;
+
+       if (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
+       {
+           Form_pg_amproc aform = (Form_pg_amproc) GETSTRUCT(tuple);
+           *sortFunction = aform->amproc;
+       }
+
+       heap_endscan(scan);
+       heap_close(relation, AccessShareLock);
+
+       if (RegProcedureIsValid(*sortFunction))
+           return;
+   }
+
+   /* Can't find a comparator, so use the operator as-is */
+
+   *kind = SORTFUNC_LT;
+   *sortFunction = get_opcode(sortOperator);
+   if (!RegProcedureIsValid(*sortFunction))
+       elog(ERROR, "SelectSortFunction: operator %u has no implementation",
+            sortOperator);
+}


diff --git a/src/include/access/tuptoaster.h b/src/include/access/tuptoaster.h

index 759ab3d39e2494de4fa021c8070ac7e5da62d283..6e38529204dabaab44c078c0af05a6687fd0d966 100644 (file)


--- a/src/include/access/tuptoaster.h
+++ b/src/include/access/tuptoaster.h
@@ -6,15 +6,13 @@
  *
  * Copyright (c) 2000, PostgreSQL Development Team
  *
- * $Id: tuptoaster.h,v 1.10 2001/03/22 04:00:32 momjian Exp $
+ * $Id: tuptoaster.h,v 1.11 2001/05/07 00:43:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #ifndef TUPTOASTER_H
 #define TUPTOASTER_H
 
-#ifdef TUPLE_TOASTER_ACTIVE
-
 #include "access/heapam.h"
 #include "access/htup.h"
 #include "access/tupmacs.h"
@@ -109,7 +107,13 @@ extern varattrib *heap_tuple_untoast_attr(varattrib *attr);
  */
 extern Datum toast_compress_datum(Datum value);
 
-#endif  /* TUPLE_TOASTER_ACTIVE */
+/* ----------
+ * toast_raw_datum_size -
+ *
+ * Return the raw (detoasted) size of a varlena datum
+ * ----------
+ */
+extern Size toast_raw_datum_size(Datum value);
 
 
 #endif  /* TUPTOASTER_H */


diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h

index 963b11c1d386ed6df175ad0e2e92cfe2929af774..832f91fb09f172d5ffc3d31aba10fccd5431c783 100644 (file)


--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -37,7 +37,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: catversion.h,v 1.70 2001/03/22 04:00:35 momjian Exp $
+ * $Id: catversion.h,v 1.71 2001/05/07 00:43:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -53,6 +53,6 @@
  */
 
 /*                         yyyymmddN */
-#define CATALOG_VERSION_NO 200101061
+#define CATALOG_VERSION_NO 200105051
 
 #endif


diff --git a/src/include/catalog/heap.h b/src/include/catalog/heap.h

index a7248f6c6dc4fb438d27b29fe250c446534ad228..7ab04b05fb25b1dd765830e90ec1b717c6e2814e 100644 (file)


--- a/src/include/catalog/heap.h
+++ b/src/include/catalog/heap.h
@@ -7,13 +7,14 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: heap.h,v 1.34 2001/03/22 04:00:35 momjian Exp $
+ * $Id: heap.h,v 1.35 2001/05/07 00:43:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #ifndef HEAP_H
 #define HEAP_H
 
+#include "catalog/pg_attribute.h"
 #include "utils/rel.h"
 
 typedef struct RawColumnDefault
@@ -44,4 +45,6 @@ extern void AddRelationRawConstraints(Relation rel,
                          List *rawColDefaults,
                          List *rawConstraints);
 
+extern Form_pg_attribute SystemAttributeDefinition(AttrNumber attno);
+
 #endif  /* HEAP_H */


diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h

index 1dac0bb1c31596e1b31e5fc2d82f20835ed7879b..07aaad61c798bc295723dfe80cded8dbc848d6c9 100644 (file)


--- a/src/include/catalog/index.h
+++ b/src/include/catalog/index.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: index.h,v 1.33 2001/03/22 04:00:35 momjian Exp $
+ * $Id: index.h,v 1.34 2001/05/07 00:43:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -46,7 +46,7 @@ extern void FormIndexDatum(IndexInfo *indexInfo,
               Datum *datum,
               char *nullv);
 
-extern void UpdateStats(Oid relid, long reltuples);
+extern void UpdateStats(Oid relid, double reltuples);
 extern bool IndexesAreActive(Oid relid, bool comfirmCommitted);
 extern void setRelhasindex(Oid relid, bool hasindex);
 


diff --git a/src/include/catalog/indexing.h b/src/include/catalog/indexing.h

index 41a580a37779abae1d46c2b8422b8ece0fbebc2b..cc155cf1bbb314f4cb54a41c23a3a2ed5e1fd5d8 100644 (file)


--- a/src/include/catalog/indexing.h
+++ b/src/include/catalog/indexing.h
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: indexing.h,v 1.48 2001/03/22 04:00:36 momjian Exp $
+ * $Id: indexing.h,v 1.49 2001/05/07 00:43:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -171,7 +171,7 @@ DECLARE_UNIQUE_INDEX(pg_rewrite_rulename_index on pg_rewrite using btree(rulenam
 xDECLARE_UNIQUE_INDEX(pg_shadow_name_index on pg_shadow using btree(usename name_ops));
 xDECLARE_UNIQUE_INDEX(pg_shadow_sysid_index on pg_shadow using btree(usesysid int4_ops));
 */
-DECLARE_INDEX(pg_statistic_relid_att_index on pg_statistic using btree(starelid oid_ops, staattnum int2_ops));
+DECLARE_UNIQUE_INDEX(pg_statistic_relid_att_index on pg_statistic using btree(starelid oid_ops, staattnum int2_ops));
 DECLARE_INDEX(pg_trigger_tgconstrname_index on pg_trigger using btree(tgconstrname name_ops));
 DECLARE_INDEX(pg_trigger_tgconstrrelid_index on pg_trigger using btree(tgconstrrelid oid_ops));
 DECLARE_INDEX(pg_trigger_tgrelid_index on pg_trigger using btree(tgrelid oid_ops));


diff --git a/src/include/catalog/pg_attribute.h b/src/include/catalog/pg_attribute.h

index 58724e94dc966cef18b5345521cafa985a4dbf1e..6e11aa6d530707371c7b5b0f5af4e4174c4919f5 100644 (file)


--- a/src/include/catalog/pg_attribute.h
+++ b/src/include/catalog/pg_attribute.h
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pg_attribute.h,v 1.70 2001/03/22 04:00:37 momjian Exp $
+ * $Id: pg_attribute.h,v 1.71 2001/05/07 00:43:24 tgl Exp $
  *
  * NOTES
  *   the genbki.sh script reads this file and generates .bki
@@ -36,15 +36,14 @@
  *     typedef struct FormData_pg_attribute
  *
  *     If you change the following, make sure you change the structs for
- *     system attributes in heap.c and index.c also.
+ *     system attributes in catalog/heap.c also.
  * ----------------
  */
 CATALOG(pg_attribute) BOOTSTRAP
 {
    Oid         attrelid;       /* OID of relation containing this
                                 * attribute */
-   NameData    attname;
-   Oid         atttypid;
+   NameData    attname;        /* name of attribute */
 
    /*
     * atttypid is the OID of the instance in Catalog Class pg_type that
@@ -53,30 +52,20 @@ CATALOG(pg_attribute) BOOTSTRAP
     * attalign attributes of this instance, so they had better match or
     * Postgres will fail.
     */
-
-   float4      attdispersion;
+   Oid         atttypid;
 
    /*
-    * attdispersion is the dispersion statistic of the column (0.0 to
-    * 1.0), or zero if the statistic has not been calculated, or -1.0 if
-    * VACUUM found that the column contains no duplicate entries (in
-    * which case the dispersion should be taken as 1.0/numberOfRows for
-    * the current table size).  The -1.0 hack is useful because the
-    * number of rows may be updated more often than attdispersion is. We
-    * assume that the column will retain its no-duplicate-entry property.
-    * (Perhaps this should be driven off the existence of a UNIQUE index
-    * for the column, instead of being a statistical guess?)
+    * attstattarget is the target number of statistics datapoints to collect
+    * during VACUUM ANALYZE of this column.  A zero here indicates that we
+    * do not wish to collect any stats about this column.
     */
-
-   int2        attlen;
+   int4        attstattarget;
 
    /*
     * attlen is a copy of the typlen field from pg_type for this
-    * attribute.  See atttypid above.  See struct Form_pg_type for
-    * definition.
+    * attribute.  See atttypid comments above.
     */
-
-   int2        attnum;
+   int2        attlen;
 
    /*
     * attnum is the "attribute number" for the attribute:  A value that
@@ -91,10 +80,13 @@ CATALOG(pg_attribute) BOOTSTRAP
     *
     * Note that (attnum - 1) is often used as the index to an array.
     */
+   int2        attnum;
 
-   int4        attnelems;      /* number of dimensions, if an array type */
-
-   int4        attcacheoff;
+   /*
+    * attndims is the declared number of dimensions, if an array type,
+    * otherwise zero.
+    */
+   int4        attndims;
 
    /*
     * fastgetattr() uses attcacheoff to cache byte offsets of attributes
@@ -103,8 +95,7 @@ CATALOG(pg_attribute) BOOTSTRAP
     * tuple descriptor, we may then update attcacheoff in the copies.
     * This speeds up the attribute walking process.
     */
-
-   int4        atttypmod;
+   int4        attcacheoff;
 
    /*
     * atttypmod records type-specific data supplied at table creation
@@ -113,16 +104,13 @@ CATALOG(pg_attribute) BOOTSTRAP
     * argument. The value will generally be -1 for types that do not need
     * typmod.
     */
-
-   bool        attbyval;
+   int4        atttypmod;
 
    /*
     * attbyval is a copy of the typbyval field from pg_type for this
-    * attribute.  See atttypid above.  See struct Form_pg_type for
-    * definition.
+    * attribute.  See atttypid comments above.
     */
-
-   char        attstorage;
+   bool        attbyval;
 
    /*----------
     * attstorage tells for VARLENA attributes, what the heap access
@@ -137,30 +125,31 @@ CATALOG(pg_attribute) BOOTSTRAP
     * but only as a last resort ('e' and 'x' fields are moved first).
     *----------
     */
+   char        attstorage;
 
+   /* This flag indicates that the attribute is really a set */
    bool        attisset;
-   char        attalign;
 
    /*
     * attalign is a copy of the typalign field from pg_type for this
-    * attribute.  See atttypid above.  See struct Form_pg_type for
-    * definition.
+    * attribute.  See atttypid comments above.
     */
-
-   bool        attnotnull;
+   char        attalign;
 
    /* This flag represents the "NOT NULL" constraint */
-   bool        atthasdef;
+   bool        attnotnull;
 
    /* Has DEFAULT value or not */
+   bool        atthasdef;
 } FormData_pg_attribute;
 
 /*
  * someone should figure out how to do this properly. (The problem is
- * the size of the C struct is not the same as the size of the tuple.)
+ * the size of the C struct is not the same as the size of the tuple
+ * because of alignment padding at the end of the struct.)
  */
 #define ATTRIBUTE_TUPLE_SIZE \
-   (offsetof(FormData_pg_attribute,atthasdef) + sizeof(char))
+   (offsetof(FormData_pg_attribute,atthasdef) + sizeof(bool))
 
 /* ----------------
  *     Form_pg_attribute corresponds to a pointer to a tuple with
@@ -178,10 +167,10 @@ typedef FormData_pg_attribute *Form_pg_attribute;
 #define Anum_pg_attribute_attrelid     1
 #define Anum_pg_attribute_attname      2
 #define Anum_pg_attribute_atttypid     3
-#define Anum_pg_attribute_attdispersion 4
+#define Anum_pg_attribute_attstattarget 4
 #define Anum_pg_attribute_attlen       5
 #define Anum_pg_attribute_attnum       6
-#define Anum_pg_attribute_attnelems        7
+#define Anum_pg_attribute_attndims     7
 #define Anum_pg_attribute_attcacheoff  8
 #define Anum_pg_attribute_atttypmod        9
 #define Anum_pg_attribute_attbyval     10
@@ -206,6 +195,7 @@ typedef FormData_pg_attribute *Form_pg_attribute;
    (attribute)->attnotnull = false; \
    (attribute)->atthasdef = false;
 #endif  /* _DROP_COLUMN_HACK__ */
+
 /* ----------------
  *     SCHEMA_ macros for declaring hardcoded tuple descriptors.
  *     these are used in utils/cache/relcache.c
@@ -231,25 +221,25 @@ typedef FormData_pg_attribute *Form_pg_attribute;
  * ----------------
  */
 #define Schema_pg_type \
-{ 1247, {"typname"},      19, 0, NAMEDATALEN,  1, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typowner"},     23, 0,   4,  2, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typlen"},           21, 0,   2,  3, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1247, {"typprtlen"},    21, 0,   2,  4, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1247, {"typbyval"},     16, 0,   1,  5, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typtype"},      18, 0,   1,  6, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typisdefined"},  16, 0,  1,  7, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typdelim"},     18, 0,   1,  8, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typrelid"},     26, 0,   4,  9, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typelem"},      26, 0,   4, 10, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typinput"},     24, 0,   4, 11, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typoutput"},    24, 0,   4, 12, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typreceive"},    24, 0,  4, 13, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typsend"},      24, 0,   4, 14, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typalign"},     18, 0,   1, 15, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typstorage"},    18, 0,  1, 16, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typdefault"},    25, 0,  -1, 17, 0, -1, -1, '\0' , 'x', '\0', 'i', '\0', '\0' }
-
-DATA(insert OID = 0 ( 1247 typname         19 0 NAMEDATALEN   1 0 -1 -1 f p f i f f));
+{ 1247, {"typname"},      19, DEFAULT_ATTSTATTARGET, NAMEDATALEN,  1, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1247, {"typowner"},     23, 0,   4,  2, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typlen"},           21, 0,   2,  3, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1247, {"typprtlen"},    21, 0,   2,  4, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1247, {"typbyval"},     16, 0,   1,  5, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typtype"},      18, 0,   1,  6, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typisdefined"},  16, 0,  1,  7, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typdelim"},     18, 0,   1,  8, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typrelid"},     26, 0,   4,  9, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typelem"},      26, 0,   4, 10, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typinput"},     24, 0,   4, 11, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typoutput"},    24, 0,   4, 12, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typreceive"},    24, 0,  4, 13, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typsend"},      24, 0,   4, 14, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typalign"},     18, 0,   1, 15, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typstorage"},    18, 0,  1, 16, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typdefault"},    25, 0,  -1, 17, 0, -1, -1, false    , 'x', false, 'i', false, false }
+
+DATA(insert OID = 0 ( 1247 typname         19 DEFAULT_ATTSTATTARGET NAMEDATALEN   1 0 -1 -1 f p f i f f));
 DATA(insert OID = 0 ( 1247 typowner            23 0  4   2 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1247 typlen          21 0  2   3 0 -1 -1 t p f s f f));
 DATA(insert OID = 0 ( 1247 typprtlen       21 0  2   4 0 -1 -1 t p f s f f));
@@ -299,25 +289,25 @@ DATA(insert OID = 0 ( 1262 tableoid           26 0  4  -7 0 -1 -1 t p f i f f));
  * ----------------
  */
 #define Schema_pg_proc \
-{ 1255, {"proname"},           19, 0, NAMEDATALEN,  1, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"proowner"},          23, 0,  4,  2, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"prolang"},           26, 0,  4,  3, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"proisinh"},          16, 0,  1,  4, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1255, {"proistrusted"},      16, 0,  1,  5, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1255, {"proiscachable"},     16, 0,  1,  6, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1255, {"proisstrict"},       16, 0,  1,  7, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1255, {"pronargs"},          21, 0,  2,  8, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1255, {"proretset"},         16, 0,  1,  9, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1255, {"prorettype"},            26, 0,  4, 10, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"proargtypes"},       30, 0, INDEX_MAX_KEYS*4, 11, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"probyte_pct"},       23, 0,  4, 12, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"properbyte_cpu"},        23, 0,  4, 13, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"propercall_cpu"},        23, 0,  4, 14, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"prooutin_ratio"},        23, 0,  4, 15, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"prosrc"},                25, 0, -1, 16, 0, -1, -1, '\0', 'x', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"probin"},                17, 0, -1, 17, 0, -1, -1, '\0', 'x', '\0', 'i', '\0', '\0' }
-
-DATA(insert OID = 0 ( 1255 proname         19 0 NAMEDATALEN   1 0 -1 -1 f p f i f f));
+{ 1255, {"proname"},           19, DEFAULT_ATTSTATTARGET, NAMEDATALEN,  1, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1255, {"proowner"},          23, 0,  4,  2, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"prolang"},           26, 0,  4,  3, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"proisinh"},          16, 0,  1,  4, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1255, {"proistrusted"},      16, 0,  1,  5, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1255, {"proiscachable"},     16, 0,  1,  6, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1255, {"proisstrict"},       16, 0,  1,  7, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1255, {"pronargs"},          21, 0,  2,  8, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1255, {"proretset"},         16, 0,  1,  9, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1255, {"prorettype"},            26, 0,  4, 10, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"proargtypes"},       30, 0, INDEX_MAX_KEYS*4, 11, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1255, {"probyte_pct"},       23, 0,  4, 12, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"properbyte_cpu"},        23, 0,  4, 13, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"propercall_cpu"},        23, 0,  4, 14, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"prooutin_ratio"},        23, 0,  4, 15, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"prosrc"},                25, 0, -1, 16, 0, -1, -1, false, 'x', false, 'i', false, false }, \
+{ 1255, {"probin"},                17, 0, -1, 17, 0, -1, -1, false, 'x', false, 'i', false, false }
+
+DATA(insert OID = 0 ( 1255 proname         19 DEFAULT_ATTSTATTARGET NAMEDATALEN   1 0 -1 -1 f p f i f f));
 DATA(insert OID = 0 ( 1255 proowner            23 0  4   2 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1255 prolang         26 0  4   3 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1255 proisinh            16 0  1   4 0 -1 -1 t p f c f f));
@@ -346,8 +336,8 @@ DATA(insert OID = 0 ( 1255 tableoid         26 0  4  -7 0 -1 -1 t p f i f f));
  *     pg_shadow
  * ----------------
  */
-DATA(insert OID = 0 ( 1260 usename         19  0 NAMEDATALEN   1 0 -1 -1 f p f i f f));
-DATA(insert OID = 0 ( 1260 usesysid            23  0   4   2 0 -1 -1 t p f i f f));
+DATA(insert OID = 0 ( 1260 usename         19  DEFAULT_ATTSTATTARGET NAMEDATALEN   1 0 -1 -1 f p f i f f));
+DATA(insert OID = 0 ( 1260 usesysid            23  DEFAULT_ATTSTATTARGET   4   2 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1260 usecreatedb     16  0   1   3 0 -1 -1 t p f c f f));
 DATA(insert OID = 0 ( 1260 usetrace            16  0   1   4 0 -1 -1 t p f c f f));
 DATA(insert OID = 0 ( 1260 usesuper            16  0   1   5 0 -1 -1 t p f c f f));
@@ -366,8 +356,8 @@ DATA(insert OID = 0 ( 1260 tableoid         26 0  4  -7 0 -1 -1 t p f i f f));
  *     pg_group
  * ----------------
  */
-DATA(insert OID = 0 ( 1261 groname         19 0 NAMEDATALEN  1 0 -1 -1 f p f i f f));
-DATA(insert OID = 0 ( 1261 grosysid            23 0  4   2 0 -1 -1 t p f i f f));
+DATA(insert OID = 0 ( 1261 groname         19 DEFAULT_ATTSTATTARGET NAMEDATALEN  1 0 -1 -1 f p f i f f));
+DATA(insert OID = 0 ( 1261 grosysid            23 DEFAULT_ATTSTATTARGET  4   2 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1261 grolist       1007 0 -1   3 0 -1 -1 f x f i f f));
 DATA(insert OID = 0 ( 1261 ctid                27 0  6  -1 0 -1 -1 f p f i f f));
 DATA(insert OID = 0 ( 1261 oid             26 0  4  -2 0 -1 -1 t p f i f f));
@@ -382,29 +372,29 @@ DATA(insert OID = 0 ( 1261 tableoid           26 0  4  -7 0 -1 -1 t p f i f f));
  * ----------------
  */
 #define Schema_pg_attribute \
-{ 1249, {"attrelid"},    26, 0,    4,  1, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"attname"},     19, 0, NAMEDATALEN,   2, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"atttypid"},    26, 0,    4,  3, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"attdispersion"}, 700, 0, 4,  4, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"attlen"},          21, 0,    2,  5, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1249, {"attnum"},          21, 0,    2,  6, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1249, {"attnelems"},   23, 0,    4,  7, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"attcacheoff"},  23, 0,   4,  8, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"atttypmod"},   23, 0,    4,  9, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"attbyval"},    16, 0,    1, 10, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1249, {"attstorage"},   18, 0,   1, 11, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1249, {"attisset"},    16, 0,    1, 12, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1249, {"attalign"},    18, 0,    1, 13, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1249, {"attnotnull"},  16, 0, 1, 14, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1249, {"atthasdef"},  16, 0, 1, 15, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }
-
-DATA(insert OID = 0 ( 1249 attrelid            26 0  4   1 0 -1 -1 t p f i f f));
-DATA(insert OID = 0 ( 1249 attname         19 0 NAMEDATALEN  2 0 -1 -1 f p f i f f));
+{ 1249, {"attrelid"},    26, DEFAULT_ATTSTATTARGET,    4,  1, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"attname"},     19, DEFAULT_ATTSTATTARGET, NAMEDATALEN,   2, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1249, {"atttypid"},    26, 0,    4,  3, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"attstattarget"}, 23, 0,  4,  4, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"attlen"},          21, 0,    2,  5, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1249, {"attnum"},          21, 0,    2,  6, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1249, {"attndims"},    23, 0,    4,  7, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"attcacheoff"},  23, 0,   4,  8, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"atttypmod"},   23, 0,    4,  9, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"attbyval"},    16, 0,    1, 10, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1249, {"attstorage"},   18, 0,   1, 11, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1249, {"attisset"},    16, 0,    1, 12, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1249, {"attalign"},    18, 0,    1, 13, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1249, {"attnotnull"},  16, 0, 1, 14, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1249, {"atthasdef"},  16, 0, 1, 15, 0, -1, -1, true, 'p', false, 'c', false, false }
+
+DATA(insert OID = 0 ( 1249 attrelid            26 DEFAULT_ATTSTATTARGET  4   1 0 -1 -1 t p f i f f));
+DATA(insert OID = 0 ( 1249 attname         19 DEFAULT_ATTSTATTARGET NAMEDATALEN  2 0 -1 -1 f p f i f f));
 DATA(insert OID = 0 ( 1249 atttypid            26 0  4   3 0 -1 -1 t p f i f f));
-DATA(insert OID = 0 ( 1249 attdispersion   700 0  4   4 0 -1 -1 f p f i f f));
+DATA(insert OID = 0 ( 1249 attstattarget   23 0  4   4 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1249 attlen          21 0  2   5 0 -1 -1 t p f s f f));
 DATA(insert OID = 0 ( 1249 attnum          21 0  2   6 0 -1 -1 t p f s f f));
-DATA(insert OID = 0 ( 1249 attnelems       23 0  4   7 0 -1 -1 t p f i f f));
+DATA(insert OID = 0 ( 1249 attndims            23 0  4   7 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1249 attcacheoff     23 0  4   8 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1249 atttypmod       23 0  4   9 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1249 attbyval            16 0  1  10 0 -1 -1 t p f c f f));
@@ -426,36 +416,36 @@ DATA(insert OID = 0 ( 1249 tableoid           26 0  4  -7 0 -1 -1 t p f i f f));
  * ----------------
  */
 #define Schema_pg_class \
-{ 1259, {"relname"},      19, 0, NAMEDATALEN,  1, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"reltype"},      26, 0,   4,  2, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"relowner"},     23, 0,   4,  3, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"relam"},        26, 0,   4,  4, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"relfilenode"},   26, 0,  4,  5, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"relpages"},     23, 0,   4,  6, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"reltuples"},    23, 0,   4,  7, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"reltoastrelid"}, 26, 0,  4,  8, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"reltoastidxid"}, 26, 0,  4,  9, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"relhasindex"},   16, 0,  1, 10, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relisshared"},   16, 0,  1, 11, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relkind"},      18, 0,   1, 12, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relnatts"},     21, 0,   2, 13, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"relchecks"},    21, 0,   2, 14, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"reltriggers"},   21, 0,  2, 15, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"relukeys"},     21, 0,   2, 16, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"relfkeys"},     21, 0,   2, 17, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"relrefs"},      21, 0,   2, 18, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"relhaspkey"},    16, 0,  1, 19, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relhasrules"},   16, 0,  1, 20, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relhassubclass"},16, 0,  1, 21, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relacl"},         1034, 0,  -1, 22, 0, -1, -1,   '\0', 'x', '\0', 'i', '\0', '\0' }
-
-DATA(insert OID = 0 ( 1259 relname         19 0 NAMEDATALEN   1 0 -1 -1 f p f i f f));
+{ 1259, {"relname"},      19, DEFAULT_ATTSTATTARGET, NAMEDATALEN,  1, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1259, {"reltype"},      26, 0,   4,  2, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"relowner"},     23, 0,   4,  3, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"relam"},        26, 0,   4,  4, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"relfilenode"},   26, 0,  4,  5, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"relpages"},     23, 0,   4,  6, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"reltuples"},    700, 0,  4,  7, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1259, {"reltoastrelid"}, 26, 0,  4,  8, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"reltoastidxid"}, 26, 0,  4,  9, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"relhasindex"},   16, 0,  1, 10, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relisshared"},   16, 0,  1, 11, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relkind"},      18, 0,   1, 12, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relnatts"},     21, 0,   2, 13, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"relchecks"},    21, 0,   2, 14, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"reltriggers"},   21, 0,  2, 15, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"relukeys"},     21, 0,   2, 16, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"relfkeys"},     21, 0,   2, 17, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"relrefs"},      21, 0,   2, 18, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"relhaspkey"},    16, 0,  1, 19, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relhasrules"},   16, 0,  1, 20, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relhassubclass"},16, 0,  1, 21, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relacl"},         1034, 0,  -1, 22, 0, -1, -1,   false, 'x', false, 'i', false, false }
+
+DATA(insert OID = 0 ( 1259 relname         19 DEFAULT_ATTSTATTARGET NAMEDATALEN   1 0 -1 -1 f p f i f f));
 DATA(insert OID = 0 ( 1259 reltype         26 0  4   2 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1259 relowner            23 0  4   3 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1259 relam           26 0  4   4 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1259 relfilenode     26 0  4   5 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1259 relpages            23 0  4   6 0 -1 -1 t p f i f f));
-DATA(insert OID = 0 ( 1259 reltuples       23 0  4   7 0 -1 -1 t p f i f f));
+DATA(insert OID = 0 ( 1259 reltuples      700 0  4   7 0 -1 -1 f p f i f f));
 DATA(insert OID = 0 ( 1259 reltoastrelid   26 0  4   8 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1259 reltoastidxid   26 0  4   9 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1259 relhasindex     16 0  1  10 0 -1 -1 t p f c f f));
@@ -544,7 +534,7 @@ DATA(insert OID = 0 ( 1219 tableoid         26 0  4  -7 0 -1 -1 t p f i f f));
  * ----------------
  */
 #define Schema_pg_variable \
-{ 1264, {"varfoo"},  26, 0, 4, 1, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }
+{ 1264, {"varfoo"},  26, 0, 4, 1, 0, -1, -1, true, 'p', false, 'i', false, false }
 
 DATA(insert OID = 0 ( 1264 varfoo          26 0  4   1 0 -1 -1 t p f i f f));
 
@@ -555,7 +545,7 @@ DATA(insert OID = 0 ( 1264 varfoo           26 0  4   1 0 -1 -1 t p f i f f));
  * ----------------
  */
 #define Schema_pg_log \
-{ 1269, {"logfoo"},  26, 0, 4, 1, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }
+{ 1269, {"logfoo"},  26, 0, 4, 1, 0, -1, -1, true, 'p', false, 'i', false, false }
 
 DATA(insert OID = 0 ( 1269 logfoo          26 0  4   1 0 -1 -1 t p f i f f));
 
@@ -566,7 +556,7 @@ DATA(insert OID = 0 ( 1269 logfoo           26 0  4   1 0 -1 -1 t p f i f f));
  * ----------------
  */
 #define Schema_pg_xactlock \
-{ 376, {"xactlockfoo"},  26, 0, 4, 1, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }
+{ 376, {"xactlockfoo"},  26, 0, 4, 1, 0, -1, -1, true, 'p', false, 'i', false, false }
 
 DATA(insert OID = 0 ( 376 xactlockfoo      26 0  4   1 0 -1 -1 t p f i f f));
 


diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h

index 81e75e14b6a7dc7372f9dcd6808f824944f5f028..86de88cc9b662fe5c65f43301e2a28a247bf69ee 100644 (file)


--- a/src/include/catalog/pg_class.h
+++ b/src/include/catalog/pg_class.h
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pg_class.h,v 1.47 2001/03/22 04:00:38 momjian Exp $
+ * $Id: pg_class.h,v 1.48 2001/05/07 00:43:25 tgl Exp $
  *
  * NOTES
  *   the genbki.sh script reads this file and generates .bki
@@ -52,7 +52,7 @@ CATALOG(pg_class) BOOTSTRAP
    Oid         relam;
    Oid         relfilenode;
    int4        relpages;
-   int4        reltuples;
+   float4      reltuples;
    Oid         reltoastrelid;
    Oid         reltoastidxid;
    bool        relhasindex;


diff --git a/src/include/catalog/pg_statistic.h b/src/include/catalog/pg_statistic.h

index 2f39bea3245e1772984b1b3b4fca0dbb36f41c1d..8d6a6b37c16ac513468f052508aadf91a034ff85 100644 (file)


--- a/src/include/catalog/pg_statistic.h
+++ b/src/include/catalog/pg_statistic.h
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pg_statistic.h,v 1.10 2001/01/24 19:43:22 momjian Exp $
+ * $Id: pg_statistic.h,v 1.11 2001/05/07 00:43:25 tgl Exp $
  *
  * NOTES
  *   the genbki.sh script reads this file and generates .bki
@@ -36,40 +36,91 @@ CATALOG(pg_statistic)
    /* These fields form the unique key for the entry: */
    Oid         starelid;       /* relation containing attribute */
    int2        staattnum;      /* attribute (column) stats are for */
-   Oid         staop;          /* '<' comparison op used for lo/hi vals */
+
+   /* the fraction of the column's entries that are NULL: */
+   float4      stanullfrac;
 
    /*
-    * Note: the current VACUUM code will never produce more than one
-    * entry per column, but in theory there could be multiple entries if
-    * a datatype has more than one useful ordering operator.  Also, the
-    * current code will not write an entry unless it found at least one
-    * non-NULL value in the column; so the remaining fields will never be
-    * NULL.
+    * stawidth is the average width in bytes of non-null entries.  For
+    * fixed-width datatypes this is of course the same as the typlen, but
+    * for varlena types it is more useful.  Note that this is the average
+    * width of the data as actually stored, post-TOASTing (eg, for a
+    * moved-out-of-line value, only the size of the pointer object is
+    * counted).  This is the appropriate definition for the primary use of
+    * the statistic, which is to estimate sizes of in-memory hash tables of
+    * tuples.
+    */
+   int4        stawidth;
+
+   /* ----------------
+    * stadistinct indicates the (approximate) number of distinct non-null
+    * data values in the column.  The interpretation is:
+    *      0       unknown or not computed
+    *      > 0     actual number of distinct values
+    *      < 0     negative of multiplier for number of rows
+    * The special negative case allows us to cope with columns that are
+    * unique (stadistinct = -1) or nearly so (for example, a column in
+    * which values appear about twice on the average could be represented
+    * by stadistinct = -0.5).  Because the number-of-rows statistic in
+    * pg_class may be updated more frequently than pg_statistic is, it's
+    * important to be able to describe such situations as a multiple of
+    * the number of rows, rather than a fixed number of distinct values.
+    * But in other cases a fixed number is correct (eg, a boolean column).
+    * ----------------
+    */
+   float4      stadistinct;
+
+   /* ----------------
+    * To allow keeping statistics on different kinds of datatypes,
+    * we do not hard-wire any particular meaning for the remaining
+    * statistical fields.  Instead, we provide several "slots" in which
+    * statistical data can be placed.  Each slot includes:
+    *      kind            integer code identifying kind of data
+    *      op              OID of associated operator, if needed
+    *      numbers         float4 array (for statistical values)
+    *      values          text array (for representations of data values)
+    * The ID and operator fields are never NULL; they are zeroes in an
+    * unused slot.  The numbers and values fields are NULL in an unused
+    * slot, and might also be NULL in a used slot if the slot kind has
+    * no need for one or the other.
+    * ----------------
     */
 
+   int2        stakind1;
+   int2        stakind2;
+   int2        stakind3;
+   int2        stakind4;
+
+   Oid         staop1;
+   Oid         staop2;
+   Oid         staop3;
+   Oid         staop4;
+
    /*
-    * These fields contain the stats about the column indicated by the
-    * key
+    * THE REST OF THESE ARE VARIABLE LENGTH FIELDS, and may even be absent
+    * (NULL). They cannot be accessed as C struct entries; you have to use
+    * the full field access machinery (heap_getattr) for them.  We declare
+    * them here for the catalog machinery.
     */
-   float4      stanullfrac;    /* the fraction of the entries that are
-                                * NULL */
-   float4      stacommonfrac;  /* the fraction that are the most common
-                                * val */
+
+   float4      stanumbers1[1];
+   float4      stanumbers2[1];
+   float4      stanumbers3[1];
+   float4      stanumbers4[1];
 
    /*
-    * THE REST OF THESE ARE VARIABLE LENGTH FIELDS. They cannot be
-    * accessed as C struct entries; you have to use the full field access
-    * machinery (heap_getattr) for them.
-    *
-    * All three of these are text representations of data values of the
-    * column's data type.  To re-create the actual Datum, do
-    * datatypein(textout(givenvalue)).
+    * Values in these text arrays are external representations of values
+    * of the column's data type.  To re-create the actual Datum, do
+    * datatypein(textout(arrayelement)).
     */
-   text        stacommonval;   /* most common non-null value in column */
-   text        staloval;       /* smallest non-null value in column */
-   text        stahival;       /* largest non-null value in column */
+   text        stavalues1[1];
+   text        stavalues2[1];
+   text        stavalues3[1];
+   text        stavalues4[1];
 } FormData_pg_statistic;
 
+#define STATISTIC_NUM_SLOTS  4
+
 /* ----------------
  *     Form_pg_statistic corresponds to a pointer to a tuple with
  *     the format of pg_statistic relation.
@@ -81,14 +132,78 @@ typedef FormData_pg_statistic *Form_pg_statistic;
  *     compiler constants for pg_statistic
  * ----------------
  */
-#define Natts_pg_statistic             8
+#define Natts_pg_statistic             21
 #define Anum_pg_statistic_starelid     1
 #define Anum_pg_statistic_staattnum        2
-#define Anum_pg_statistic_staop            3
-#define Anum_pg_statistic_stanullfrac  4
-#define Anum_pg_statistic_stacommonfrac 5
-#define Anum_pg_statistic_stacommonval 6
-#define Anum_pg_statistic_staloval     7
-#define Anum_pg_statistic_stahival     8
+#define Anum_pg_statistic_stanullfrac  3
+#define Anum_pg_statistic_stawidth     4
+#define Anum_pg_statistic_stadistinct  5
+#define Anum_pg_statistic_stakind1     6
+#define Anum_pg_statistic_stakind2     7
+#define Anum_pg_statistic_stakind3     8
+#define Anum_pg_statistic_stakind4     9
+#define Anum_pg_statistic_staop1       10
+#define Anum_pg_statistic_staop2       11
+#define Anum_pg_statistic_staop3       12
+#define Anum_pg_statistic_staop4       13
+#define Anum_pg_statistic_stanumbers1  14
+#define Anum_pg_statistic_stanumbers2  15
+#define Anum_pg_statistic_stanumbers3  16
+#define Anum_pg_statistic_stanumbers4  17
+#define Anum_pg_statistic_stavalues1   18
+#define Anum_pg_statistic_stavalues2   19
+#define Anum_pg_statistic_stavalues3   20
+#define Anum_pg_statistic_stavalues4   21
+
+/*
+ * Currently, three statistical slot "kinds" are defined: most common values,
+ * histogram, and correlation.  Additional "kinds" will probably appear in
+ * future to help cope with non-scalar datatypes.
+ *
+ * Code reading the pg_statistic relation should not assume that a particular
+ * data "kind" will appear in any particular slot.  Instead, search the
+ * stakind fields to see if the desired data is available.
+ */
+
+/*
+ * In a "most common values" slot, staop is the OID of the "=" operator
+ * used to decide whether values are the same or not.  stavalues contains
+ * the K most common non-null values appearing in the column, and stanumbers
+ * contains their frequencies (fractions of total row count).  The values
+ * shall be ordered in decreasing frequency.  Note that since the arrays are
+ * variable-size, K may be chosen by the statistics collector.  Values should
+ * not appear in MCV unless they have been observed to occur more than once;
+ * a unique column will have no MCV slot.
+ */
+#define STATISTIC_KIND_MCV  1
+
+/*
+ * A "histogram" slot describes the distribution of scalar data.  staop is
+ * the OID of the "<" operator that describes the sort ordering.  (In theory,
+ * more than one histogram could appear, if a datatype has more than one
+ * useful sort operator.)  stavalues contains M (>=2) non-null values that
+ * divide the non-null column data values into M-1 bins of approximately equal
+ * population.  The first stavalues item is the MIN and the last is the MAX.
+ * stanumbers is not used and should be NULL.  IMPORTANT POINT: if an MCV
+ * slot is also provided, then the histogram describes the data distribution
+ * *after removing the values listed in MCV* (thus, it's a "compressed
+ * histogram" in the technical parlance).  This allows a more accurate
+ * representation of the distribution of a column with some very-common
+ * values.  In a column with only a few distinct values, it's possible that
+ * the MCV list describes the entire data population; in this case the
+ * histogram reduces to empty and should be omitted.
+ */
+#define STATISTIC_KIND_HISTOGRAM  2
+
+/*
+ * A "correlation" slot describes the correlation between the physical order
+ * of table tuples and the ordering of data values of this column, as seen
+ * by the "<" operator identified by staop.  (As with the histogram, more
+ * than one entry could theoretically appear.)  stavalues is not used and
+ * should be NULL.  stanumbers contains a single entry, the correlation
+ * coefficient between the sequence of data values and the sequence of
+ * their actual tuple positions.  The coefficient ranges from +1 to -1.
+ */
+#define STATISTIC_KIND_CORRELATION  3
 
 #endif  /* PG_STATISTIC_H */


diff --git a/src/include/commands/command.h b/src/include/commands/command.h

index 8b108451d2accff7969f55e6972ad389551829a1..7eb1a4fab846aeff33b3f5cca4f60b9c4c3b5fb5 100644 (file)


--- a/src/include/commands/command.h
+++ b/src/include/commands/command.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: command.h,v 1.26 2001/03/22 04:00:41 momjian Exp $
+ * $Id: command.h,v 1.27 2001/05/07 00:43:25 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -43,9 +43,13 @@ extern void PortalCleanup(Portal portal);
 extern void AlterTableAddColumn(const char *relationName,
                    bool inh, ColumnDef *colDef);
 
-extern void AlterTableAlterColumn(const char *relationName,
-                     bool inh, const char *colName,
-                     Node *newDefault);
+extern void AlterTableAlterColumnDefault(const char *relationName,
+                                        bool inh, const char *colName,
+                                        Node *newDefault);
+
+extern void AlterTableAlterColumnStatistics(const char *relationName,
+                                           bool inh, const char *colName,
+                                           Node *statsTarget);
 
 extern void AlterTableDropColumn(const char *relationName,
                     bool inh, const char *colName,


diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h

index d82d22fcdfcbe3482ed5dbf1b66bf52b607767c3..87bb0007aa067dcbfbe15d31cccfbe00f61df460 100644 (file)


--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -1,129 +1,27 @@
 /*-------------------------------------------------------------------------
  *
  * vacuum.h
- *   header file for postgres vacuum cleaner
+ *   header file for postgres vacuum cleaner and statistics analyzer
  *
  *
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: vacuum.h,v 1.34 2001/03/22 04:00:43 momjian Exp $
+ * $Id: vacuum.h,v 1.35 2001/05/07 00:43:25 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #ifndef VACUUM_H
 #define VACUUM_H
 
-#include "catalog/pg_attribute.h"
-#include "catalog/pg_index.h"
-#include "fmgr.h"
-#include "nodes/pg_list.h"
-#include "storage/itemptr.h"
+#include "nodes/parsenodes.h"
 
 
-typedef struct VAttListData
-{
-   int         val_dummy;
-   struct VAttListData *val_next;
-} VAttListData;
-
-typedef VAttListData *VAttList;
-
-typedef struct VacPageData
-{
-   BlockNumber blkno;          /* BlockNumber of this Page */
-   Size        free;           /* FreeSpace on this Page */
-   uint16      offsets_used;   /* Number of OffNums used by vacuum */
-   uint16      offsets_free;   /* Number of OffNums free or to be free */
-   OffsetNumber offsets[1];    /* Array of its OffNums */
-} VacPageData;
-
-typedef VacPageData *VacPage;
-
-typedef struct VacPageListData
-{
-   int         empty_end_pages;/* Number of "empty" end-pages */
-   int         num_pages;      /* Number of pages in pagedesc */
-   int         num_allocated_pages;    /* Number of allocated pages in
-                                        * pagedesc */
-   VacPage    *pagedesc;       /* Descriptions of pages */
-} VacPageListData;
-
-typedef VacPageListData *VacPageList;
-
-typedef struct
-{
-   Form_pg_attribute attr;
-   Datum       best,
-               guess1,
-               guess2,
-               max,
-               min;
-   int         best_len,
-               guess1_len,
-               guess2_len,
-               max_len,
-               min_len;
-   long        best_cnt,
-               guess1_cnt,
-               guess1_hits,
-               guess2_hits,
-               null_cnt,
-               nonnull_cnt,
-               max_cnt,
-               min_cnt;
-   FmgrInfo    f_cmpeq,
-               f_cmplt,
-               f_cmpgt;
-   Oid         op_cmplt;
-   regproc     outfunc;
-   Oid         typelem;
-   bool        initialized;
-} VacAttrStats;
-
-typedef struct VRelListData
-{
-   Oid         vrl_relid;
-   struct VRelListData *vrl_next;
-} VRelListData;
-
-typedef VRelListData *VRelList;
-
-typedef struct VTupleLinkData
-{
-   ItemPointerData new_tid;
-   ItemPointerData this_tid;
-} VTupleLinkData;
-
-typedef VTupleLinkData *VTupleLink;
-
-typedef struct VTupleMoveData
-{
-   ItemPointerData tid;        /* tuple ID */
-   VacPage     vacpage;        /* where to move */
-   bool        cleanVpd;       /* clean vacpage before using */
-} VTupleMoveData;
-
-typedef VTupleMoveData *VTupleMove;
-
-typedef struct VRelStats
-{
-   Oid         relid;
-   int         num_tuples;
-   int         num_pages;
-   Size        min_tlen;
-   Size        max_tlen;
-   bool        hasindex;
-   int         num_vtlinks;
-   VTupleLink  vtlinks;
-} VRelStats;
-
-extern bool VacuumRunning;
-
-extern void vc_abort(void);
-extern void vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols);
-extern void analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL);
-
-#define ATTNVALS_SCALE 1000000000      /* XXX so it can act as a float4 */
+/* in commands/vacuum.c */
+extern void vacuum(VacuumStmt *vacstmt);
+extern void vac_update_relstats(Oid relid, long num_pages, double num_tuples,
+                               bool hasindex);
+/* in commands/analyze.c */
+extern void analyze_rel(Oid relid, VacuumStmt *vacstmt);
 
 #endif  /* VACUUM_H */


diff --git a/src/include/config.h.in b/src/include/config.h.in

index 0d989dbbb3155bfaa218fce2d6181c45921191de..01593a4ce963a05484b025e5206f27d8b2bd952b 100644 (file)


--- a/src/include/config.h.in
+++ b/src/include/config.h.in
@@ -8,7 +8,7 @@
  * or in config.h afterwards.  Of course, if you edit config.h, then your
  * changes will be overwritten the next time you run configure.
  *
- * $Id: config.h.in,v 1.162 2001/04/14 22:55:02 petere Exp $
+ * $Id: config.h.in,v 1.163 2001/05/07 00:43:25 tgl Exp $
  */
 
 #ifndef CONFIG_H
@@ -156,6 +156,11 @@
 #define INDEX_MAX_KEYS     16
 #define FUNC_MAX_ARGS      INDEX_MAX_KEYS
 
+/*
+ * System default value for pg_attribute.attstattarget
+ */
+#define DEFAULT_ATTSTATTARGET  10
+
 /*
  * Define this to make libpgtcl's "pg_result -assign" command process C-style
  * backslash sequences in returned tuple data and convert Postgres array


diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h

index 2cf9378cf116426106be2cba0bb29d970e561c09..0967bef24ba9437360c5142ffc6f770107c9aa5a 100644 (file)


--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: execnodes.h,v 1.57 2001/03/22 04:00:50 momjian Exp $
+ * $Id: execnodes.h,v 1.58 2001/05/07 00:43:25 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -628,7 +628,6 @@ typedef struct GroupState
  *  SortState information
  *
  *     sort_Done       indicates whether sort has been performed yet
- *     sort_Keys       scan key structures describing the sort keys
  *     tuplesortstate  private state of tuplesort.c
  * ----------------
  */
@@ -636,7 +635,6 @@ typedef struct SortState
 {
    CommonScanState csstate;    /* its first field is NodeTag */
    bool        sort_Done;
-   ScanKey     sort_Keys;
    void       *tuplesortstate;
 } SortState;
 


diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h

index 1614d787bcb8d0ceac119c020b51ae18ffebd013..63b1b1046a8e71675ed81102c38134886a45f0bc 100644 (file)


--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: parsenodes.h,v 1.126 2001/03/23 04:49:56 momjian Exp $
+ * $Id: parsenodes.h,v 1.127 2001/05/07 00:43:25 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -118,11 +118,12 @@ typedef struct AlterTableStmt
    NodeTag     type;
    char        subtype;        /*------------
                                 *  A = add column
-                                *  T = alter column
+                                *  T = alter column default
+                                *  S = alter column statistics
                                 *  D = drop column
                                 *  C = add constraint
                                 *  X = drop constraint
-                                *  E = add toast table,
+                                *  E = create toast table
                                 *  U = change owner
                                 *------------
                                 */
@@ -690,16 +691,20 @@ typedef struct ClusterStmt
 } ClusterStmt;
 
 /* ----------------------
- *     Vacuum Statement
+ *     Vacuum and Analyze Statements
+ *
+ * Even though these are nominally two statements, it's convenient to use
+ * just one node type for both.
  * ----------------------
  */
 typedef struct VacuumStmt
 {
    NodeTag     type;
-   bool        verbose;        /* print status info */
-   bool        analyze;        /* analyze data */
-   char       *vacrel;         /* table to vacuum */
-   List       *va_spec;        /* columns to analyse */
+   bool        vacuum;         /* do VACUUM step */
+   bool        analyze;        /* do ANALYZE step */
+   bool        verbose;        /* print progress info */
+   char       *vacrel;         /* name of single table to process, or NULL */
+   List       *va_cols;        /* list of column names, or NIL for all */
 } VacuumStmt;
 
 /* ----------------------


diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h

index 3ae8e09f57a30468fdece0f7fe9098a3ca05653f..9e69ed60992a7b7307fcc79150eccd7a6f62f963 100644 (file)


--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -10,7 +10,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: primnodes.h,v 1.53 2001/03/22 04:00:52 momjian Exp $
+ * $Id: primnodes.h,v 1.54 2001/05/07 00:43:25 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -45,8 +45,8 @@ typedef struct FunctionCache *FunctionCachePtr;
  * reskey and reskeyop are the execution-time representation of sorting.
  * reskey must be zero in any non-sort-key item.  The reskey of sort key
  * targetlist items for a sort plan node is 1,2,...,n for the n sort keys.
- * The reskeyop of each such targetlist item is the sort operator's
- * regproc OID.  reskeyop will be zero in non-sort-key items.
+ * The reskeyop of each such targetlist item is the sort operator's OID.
+ * reskeyop will be zero in non-sort-key items.
  *
  * Both reskey and reskeyop are typically zero during parse/plan stages.
  * The executor does not pay any attention to ressortgroupref.
@@ -62,7 +62,7 @@ typedef struct Resdom
    Index       ressortgroupref;
    /* nonzero if referenced by a sort/group clause */
    Index       reskey;         /* order of key in a sort (for those > 0) */
-   Oid         reskeyop;       /* sort operator's regproc Oid */
+   Oid         reskeyop;       /* sort operator's Oid */
    bool        resjunk;        /* set to true to eliminate the attribute
                                 * from final target list */
 } Resdom;


diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h

index f643ef879689640186250b344d4734f80aa6dc49..c76d9b4af7136f23fdc022f53127925129760519 100644 (file)


--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: relation.h,v 1.54 2001/03/22 04:00:53 momjian Exp $
+ * $Id: relation.h,v 1.55 2001/05/07 00:43:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -512,8 +512,8 @@ typedef struct RestrictInfo
    Oid         hashjoinoperator;       /* copy of clause operator */
 
    /* cache space for hashclause processing; -1 if not yet set */
-   Selectivity left_dispersion;/* dispersion of left side */
-   Selectivity right_dispersion;       /* dispersion of right side */
+   Selectivity left_bucketsize;        /* avg bucketsize of left side */
+   Selectivity right_bucketsize;       /* avg bucketsize of right side */
 } RestrictInfo;
 
 /*


diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h

index 5caa576f0c0be002c3e1bc88a7ff75746f5c45b4..cbf6df063a3cc4ae782cab805acaaf80b9d2025f 100644 (file)


--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: cost.h,v 1.38 2001/02/16 00:03:05 tgl Exp $
+ * $Id: cost.h,v 1.39 2001/05/07 00:43:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -64,7 +64,8 @@ extern void cost_mergejoin(Path *path, Path *outer_path, Path *inner_path,
               List *restrictlist,
               List *outersortkeys, List *innersortkeys);
 extern void cost_hashjoin(Path *path, Path *outer_path, Path *inner_path,
-             List *restrictlist, Selectivity innerdispersion);
+             List *restrictlist, Selectivity innerbucketsize);
+extern Selectivity estimate_hash_bucketsize(Query *root, Var *var);
 extern Cost cost_qual_eval(List *quals);
 extern void set_baserel_size_estimates(Query *root, RelOptInfo *rel);
 extern void set_joinrel_size_estimates(Query *root, RelOptInfo *rel,


diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h

index 5b71eded86fcac8f21a5732ef81d8906fd9263a3..0839feb4b2fe5c0d137a7705469acb3814779181 100644 (file)


--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pathnode.h,v 1.35 2001/03/22 04:00:54 momjian Exp $
+ * $Id: pathnode.h,v 1.36 2001/05/07 00:43:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -59,7 +59,7 @@ extern HashPath *create_hashjoin_path(RelOptInfo *joinrel,
                     Path *inner_path,
                     List *restrict_clauses,
                     List *hashclauses,
-                    Selectivity innerdispersion);
+                    Selectivity innerbucketsize);
 
 /*
  * prototypes for relnode.c


diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h

index f1c4aff1c804172da17b24a438551c0b631c98c0..6b35deed2867649e350da0c081a983eb0dec5821 100644 (file)


--- a/src/include/utils/lsyscache.h
+++ b/src/include/utils/lsyscache.h
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: lsyscache.h,v 1.30 2001/03/22 04:01:13 momjian Exp $
+ * $Id: lsyscache.h,v 1.31 2001/05/07 00:43:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -21,8 +21,6 @@ extern AttrNumber get_attnum(Oid relid, char *attname);
 extern Oid get_atttype(Oid relid, AttrNumber attnum);
 extern bool get_attisset(Oid relid, char *attname);
 extern int32 get_atttypmod(Oid relid, AttrNumber attnum);
-extern double get_attdispersion(Oid relid, AttrNumber attnum,
-                 double min_estimate);
 extern RegProcedure get_opcode(Oid opno);
 extern char *get_opname(Oid opno);
 extern bool op_mergejoinable(Oid opno, Oid ltype, Oid rtype,
@@ -41,6 +39,14 @@ extern bool get_typbyval(Oid typid);
 extern void get_typlenbyval(Oid typid, int16 *typlen, bool *typbyval);
 extern char get_typstorage(Oid typid);
 extern Datum get_typdefault(Oid typid);
+extern bool get_attstatsslot(HeapTuple statstuple,
+                            Oid atttype, int32 atttypmod,
+                            int reqkind, Oid reqop,
+                            Datum **values, int *nvalues,
+                            float4 **numbers, int *nnumbers);
+extern void free_attstatsslot(Oid atttype,
+                             Datum *values, int nvalues,
+                             float4 *numbers, int nnumbers);
 
 #define TypeIsToastable(typid) (get_typstorage(typid) != 'p')
 


diff --git a/src/include/utils/syscache.h b/src/include/utils/syscache.h

index 8d4e2ae62c419658f44ec3f1adb9853a658ea2c6..342f7bf8a566b73e4f8393553ccb332ed067ed06 100644 (file)


--- a/src/include/utils/syscache.h
+++ b/src/include/utils/syscache.h
@@ -9,7 +9,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: syscache.h,v 1.29 2001/03/22 04:01:14 momjian Exp $
+ * $Id: syscache.h,v 1.30 2001/05/07 00:43:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -53,7 +53,7 @@
 #define RULEOID            22
 #define SHADOWNAME     23
 #define SHADOWSYSID        24
-#define STATRELID      25
+#define STATRELATT     25
 #define TYPENAME       26
 #define TYPEOID            27
 


diff --git a/src/include/utils/tuplesort.h b/src/include/utils/tuplesort.h

index 7f273776c36a26cc1e6b688b4a530f74a7c108f2..001761796e2492781d98aec7c8b311b4538e251a 100644 (file)


--- a/src/include/utils/tuplesort.h
+++ b/src/include/utils/tuplesort.h
@@ -13,7 +13,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: tuplesort.h,v 1.6 2001/01/24 19:43:29 momjian Exp $
+ * $Id: tuplesort.h,v 1.7 2001/05/07 00:43:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -36,8 +36,9 @@ typedef struct Tuplesortstate Tuplesortstate;
  */
 
 extern Tuplesortstate *tuplesort_begin_heap(TupleDesc tupDesc,
-                    int nkeys, ScanKey keys,
-                    bool randomAccess);
+                     int nkeys,
+                     Oid *sortOperators, AttrNumber *attNums,
+                     bool randomAccess);
 extern Tuplesortstate *tuplesort_begin_index(Relation indexRel,
                      bool enforceUnique,
                      bool randomAccess);
@@ -75,4 +76,19 @@ extern void tuplesort_rescan(Tuplesortstate *state);
 extern void tuplesort_markpos(Tuplesortstate *state);
 extern void tuplesort_restorepos(Tuplesortstate *state);
 
+/*
+ * This routine selects an appropriate sorting function to implement
+ * a sort operator as efficiently as possible.
+ */
+typedef enum
+{
+   SORTFUNC_LT,                /* raw "<" operator */
+   SORTFUNC_CMP,               /* -1 / 0 / 1 three-way comparator */
+   SORTFUNC_REVCMP             /* 1 / 0 / -1 (reversed) 3-way comparator */
+} SortFunctionKind;
+
+extern void SelectSortFunction(Oid sortOperator,
+                              RegProcedure *sortFunction,
+                              SortFunctionKind *kind);
+
 #endif  /* TUPLESORT_H */


diff --git a/src/interfaces/ecpg/preproc/keywords.c b/src/interfaces/ecpg/preproc/keywords.c

index 5614a34b0fef7390ba8ec0a4184fea10da5e7d69..c03880f497d0d62526a94157175fede654376f28 100644 (file)


--- a/src/interfaces/ecpg/preproc/keywords.c
+++ b/src/interfaces/ecpg/preproc/keywords.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/interfaces/ecpg/preproc/keywords.c,v 1.39 2001/03/22 04:01:21 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/interfaces/ecpg/preproc/keywords.c,v 1.40 2001/05/07 00:43:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -238,6 +238,7 @@ static ScanKeyword ScanKeywords[] = {
    {"some", SOME},
    {"start", START},
    {"statement", STATEMENT},
+   {"statistics", STATISTICS},
    {"stdin", STDIN},
    {"stdout", STDOUT},
    {"substring", SUBSTRING},


diff --git a/src/interfaces/ecpg/preproc/preproc.y b/src/interfaces/ecpg/preproc/preproc.y

index 345efb6576e2ddd8ff944993799a816b12bd34c8..91708bd91fae24f446576cacaea6ccbc1028163d 100644 (file)


--- a/src/interfaces/ecpg/preproc/preproc.y
+++ b/src/interfaces/ecpg/preproc/preproc.y
@@ -134,7 +134,7 @@ make_name(void)
 
 %union {
    double                  dval;
-        int                     ival;
+   int                     ival;
    char *                  str;
    struct when             action;
    struct index        index;
@@ -224,7 +224,7 @@ make_name(void)
        NONE, NOTHING, NOTIFY, NOTNULL, OFFSET, OIDS,
        OPERATOR, OWNER, PASSWORD, PROCEDURAL, REINDEX, RENAME, RESET,
        RETURNS, ROW, RULE, SEQUENCE, SERIAL, SETOF, SHARE,
-       SHOW, START, STATEMENT, STDIN, STDOUT, SYSID TEMP,
+       SHOW, START, STATEMENT, STATISTICS, STDIN, STDOUT, SYSID TEMP,
        TEMPLATE, TOAST, TRUNCATE, TRUSTED, UNLISTEN, UNTIL, VACUUM,
        VALID, VERBOSE, VERSION
 
@@ -285,7 +285,7 @@ make_name(void)
 %type      file_name AexprConst ParamNo c_expr ConstTypename
 %type     in_expr_nodes a_expr b_expr TruncateStmt CommentStmt
 %type     opt_indirection expr_list extract_list extract_arg
-%type     position_list substr_list substr_from alter_column_action
+%type     position_list substr_list substr_from alter_column_default
 %type     trim_list in_expr substr_for attr attrs drop_behavior
 %type     Typename SimpleTypename Generic Numeric generic opt_float opt_numeric
 %type     opt_decimal Character character opt_varying opt_charset
@@ -293,7 +293,7 @@ make_name(void)
 %type     row_expr row_descriptor row_list ConstDatetime opt_chain
 %type     SelectStmt into_clause OptTemp ConstraintAttributeSpec
 %type     opt_table opt_all sort_clause sortby_list ConstraintAttr 
-%type     sortby OptUseOp opt_inh_star relation_name_list name_list
+%type     sortby OptUseOp relation_name_list name_list
 %type     group_clause having_clause from_clause opt_distinct
 %type     join_outer where_clause relation_expr sub_type opt_arg
 %type     opt_column_list insert_rest InsertStmt OptimizableStmt
@@ -301,8 +301,8 @@ make_name(void)
 %type      NotifyStmt columnElem copy_dirn UnlistenStmt copy_null
 %type      copy_delimiter ListenStmt CopyStmt copy_file_name opt_binary
 %type      opt_with_copy FetchStmt direction fetch_how_many from_in
-%type      ClosePortalStmt DropStmt VacuumStmt opt_verbose func_arg
-%type      opt_analyze opt_va_list va_list ExplainStmt index_params
+%type      ClosePortalStmt DropStmt VacuumStmt AnalyzeStmt opt_verbose func_arg
+%type      analyze_keyword opt_name_list ExplainStmt index_params
 %type      index_list func_index index_elem opt_class access_method_clause
 %type      index_opt_unique IndexStmt func_return ConstInterval
 %type      func_args_list func_args opt_with ProcedureStmt def_arg
@@ -329,7 +329,7 @@ make_name(void)
 %type     opt_cursor opt_lmode ConstraintsSetStmt comment_tg AllConst
 %type     case_expr when_clause_list case_default case_arg when_clause
 %type      select_clause opt_select_limit select_limit_value ConstraintTimeSpec
-%type      select_offset_value ReindexStmt join_type opt_only opt_boolean
+%type      select_offset_value ReindexStmt join_type opt_boolean
 %type     join_qual update_list AlterSchemaStmt joined_table
 %type     opt_level opt_lock lock_type users_in_new_group_clause
 %type      OptConstrFromTable comment_op OptTempTableName StringConst
@@ -447,6 +447,7 @@ stmt:  AlterSchemaStmt          { output_statement($1, 0, NULL, connection); }
        | CreatedbStmt      { output_statement($1, 0, NULL, connection); }
        | DropdbStmt        { output_statement($1, 0, NULL, connection); }
        | VacuumStmt        { output_statement($1, 0, NULL, connection); }
+       | AnalyzeStmt       { output_statement($1, 0, NULL, connection); }
        | VariableSetStmt   { output_statement($1, 0, NULL, connection); }
        | VariableShowStmt  { output_statement($1, 0, NULL, connection); }
        | VariableResetStmt { output_statement($1, 0, NULL, connection); }
@@ -908,40 +909,41 @@ CheckPointStmt: CHECKPOINT     { $$= make_str("checkpoint"); }
 
 
 /*****************************************************************************
- *
- *     QUERY :
  *
  * ALTER TABLE variations
  *
  *****************************************************************************/
 
 AlterTableStmt:
-/* ALTER TABLE  ADD [COLUMN]  */
-        ALTER TABLE relation_name opt_inh_star ADD opt_column columnDef
+/* ALTER TABLE  ADD [COLUMN]  */
+        ALTER TABLE relation_expr ADD opt_column columnDef
+       {
+           $$ = cat_str(5, make_str("alter table"), $3, make_str("add"), $5, $6);
+       }
+/* ALTER TABLE  ALTER [COLUMN]  {SET DEFAULT |DROP DEFAULT} */
+   | ALTER TABLE relation_expr ALTER opt_column ColId alter_column_default
        {
-           $$ = cat_str(6, make_str("alter table"), $3, $4, make_str("add"), $6, $7);
+           $$ = cat_str(6, make_str("alter table"), $3, make_str("alter"), $5, $6, $7);
        }
-/* ALTER TABLE  ALTER [COLUMN]  {SET DEFAULT |DROP
-DEFAULT} */
-   | ALTER TABLE relation_name opt_inh_star ALTER opt_column ColId
-       alter_column_action
+/* ALTER TABLE  ALTER [COLUMN]  SET STATISTICS  */
+   | ALTER TABLE relation_expr ALTER opt_column ColId SET STATISTICS Iconst
        {
-           $$ = cat_str(7, make_str("alter table"), $3, $4, make_str("alter"), $6, $7, $8);
+           $$ = cat_str(7, make_str("alter table"), $3, make_str("alter"), $5, $6, make_str("set statistics"), $9);
        }
-/* ALTER TABLE <name> DROP [COLUMN] <name> {RESTRICT|CASCADE} */
-   | ALTER TABLE relation_name opt_inh_star DROP opt_column ColId drop_behavior
+/* ALTER TABLE <relation> DROP [COLUMN] name> {RESTRICT|CASCADE} */
+   | ALTER TABLE relation_expr DROP opt_column ColId drop_behavior
        {
-           $$ = cat_str(7, make_str("alter table"), $3, $4, make_str("drop"), $6, $7, $8);
+           $$ = cat_str(6, make_str("alter table"), $3, make_str("drop"), $5, $6, $7);
        }
-/* ALTER TABLE <name> ADD CONSTRAINT ... */
-   | ALTER TABLE relation_name opt_inh_star ADD TableConstraint
+/* ALTER TABLE <relation> ADD CONSTRAINT ... */
+   | ALTER TABLE relation_expr ADD TableConstraint
        {
-           $$ = cat_str(5, make_str("alter table"), $3, $4, make_str("add"), $6);
+           $$ = cat_str(4, make_str("alter table"), $3, make_str("add"), $5);
        }
-/* ALTER TABLE <name> DROP CONSTRAINT ... */
-   | ALTER TABLE relation_name opt_inh_star DROP CONSTRAINT name drop_behavior
+/* ALTER TABLE <relation> DROP CONSTRAINT ... */
+   | ALTER TABLE relation_expr DROP CONSTRAINT name drop_behavior
        {
-           $$ = cat_str(6, make_str("alter table"), $3, $4, make_str("drop constraint"), $7, $8);
+           $$ = cat_str(5, make_str("alter table"), $3, make_str("drop constraint"), $6, $7);
        }
 /* ALTER TABLE  OWNER TO UserId */     
    | ALTER TABLE relation_name OWNER TO UserId   
@@ -950,7 +952,7 @@ DEFAULT} */
        }
        ;
 
-alter_column_action:
+alter_column_default:
         SET DEFAULT a_expr { $$ = cat2_str(make_str("set default"), $3); }
         | DROP DEFAULT          { $$ = make_str("drop default"); }
         ;
@@ -1234,10 +1236,6 @@ key_reference:  NO ACTION    { $$ = make_str("no action"); }
        | SET NULL_P    { $$ = make_str("set null"); }
        ;
 
-opt_only: ONLY     { $$ = make_str("only"); }
-   | /*EMPTY*/ { $$ = EMPTY; }
-   ;
-
 OptInherit:  INHERITS '(' relation_name_list ')'                { $$ = cat_str(3, make_str("inherits ("), $3, make_str(")")); }
                 | /*EMPTY*/                    { $$ = EMPTY; }
                 ;      
@@ -2013,10 +2011,9 @@ opt_force:      FORCE        { $$ = make_str("force"); }
  *
  *****************************************************************************/
 
-RenameStmt:  ALTER TABLE relation_name opt_inh_star
-                 RENAME opt_column opt_name TO name
+RenameStmt:  ALTER TABLE relation_expr RENAME opt_column opt_name TO name
                {
-                   $$ = cat_str(8, make_str("alter table"), $3, $4, make_str("rename"), $6, $7, make_str("to"), $9);
+                   $$ = cat_str(7, make_str("alter table"), $3, make_str("rename"), $5, $6, make_str("to"), $8);
                }
        ;
 
@@ -2250,38 +2247,44 @@ ClusterStmt:  CLUSTER index_name ON relation_name
  *
  *     QUERY:
  *             vacuum
+ *             analyze
  *
  *****************************************************************************/
 
-VacuumStmt:  VACUUM opt_verbose opt_analyze
+VacuumStmt:  VACUUM opt_verbose
+               {
+                   $$ = cat_str(2, make_str("vacuum"), $2);
+               }
+       | VACUUM opt_verbose relation_name
                {
                    $$ = cat_str(3, make_str("vacuum"), $2, $3);
                }
-       | VACUUM opt_verbose opt_analyze relation_name opt_va_list
+       | VACUUM opt_verbose AnalyzeStmt
                {
-                   if ( strlen($5) > 0 && strlen($4) == 0 )
-                       mmerror(ET_ERROR, "VACUUM syntax error at or near \"(\"\n\tRelations name must be specified");
-                   $$ = cat_str(5, make_str("vacuum"), $2, $3, $4, $5);
+                   $$ = cat_str(3, make_str("vacuum"), $2, $3);
                }
        ;
 
-opt_verbose:  VERBOSE                  { $$ = make_str("verbose"); }
-       | /*EMPTY*/             { $$ = EMPTY; }
+AnalyzeStmt:  analyze_keyword opt_verbose
+               {
+                   $$ = cat_str(2, $1, $2);
+               }
+       | analyze_keyword opt_verbose relation_name opt_name_list
+               {
+                   $$ = cat_str(4, $1, $2, $3, $4);
+               }
        ;
 
-opt_analyze:  ANALYZE                  { $$ = make_str("analyze"); }
-       | ANALYSE               { $$ = make_str("analyse"); }
-       | /*EMPTY*/             { $$ = EMPTY; }
+analyze_keyword:  ANALYZE                  { $$ = make_str("analyze"); }
+       | ANALYSE                           { $$ = make_str("analyse"); }
        ;
 
-opt_va_list:  '(' va_list ')'              { $$ = cat_str(3, make_str("("), $2, make_str(")")); }
+opt_verbose:  VERBOSE                  { $$ = make_str("verbose"); }
        | /*EMPTY*/             { $$ = EMPTY; }
        ;
 
-va_list:  name
-               { $$=$1; }
-       | va_list ',' name
-               { $$=cat_str(3, $1, make_str(","), $3); }
+opt_name_list:  '(' name_list ')'      { $$ = cat_str(3, make_str("("), $2, make_str(")")); }
+       | /*EMPTY*/             { $$ = EMPTY; }
        ;
 
 
@@ -2383,9 +2386,9 @@ columnElem:  ColId opt_indirection
  *
  *****************************************************************************/
 
-DeleteStmt:  DELETE FROM opt_only name where_clause
+DeleteStmt:  DELETE FROM relation_expr where_clause
                {
-                   $$ = cat_str(4, make_str("delete from"), $3, $4, $5);
+                   $$ = cat_str(3, make_str("delete from"), $3, $4);
                }
        ;
 
@@ -2416,12 +2419,12 @@ opt_lmode:      SHARE                           { $$ = make_str("share"); }
  *
  *****************************************************************************/
 
-UpdateStmt:  UPDATE opt_only relation_name
+UpdateStmt:  UPDATE relation_expr
              SET update_target_list
              from_clause
              where_clause
                {
-                   $$ = cat_str(7, make_str("update"), $2, $3, make_str("set"), $5, $6, $7);
+                   $$ = cat_str(6, make_str("update"), $2, make_str("set"), $4, $5, $6);
                }
        ;
 
@@ -2667,10 +2670,6 @@ select_offset_value:     PosIntConst {
  * ...however, recursive addattr and rename supported.  make special
  * cases for these.
  */
-opt_inh_star:  '*'                 { $$ = make_str("*"); }
-       | /*EMPTY*/             { $$ = EMPTY; }
-       ;
-
 relation_name_list:  name_list { $$ = $1; };
 
 name_list:  name
@@ -2704,7 +2703,7 @@ opt_for_update_clause: for_update_clause                { $$ = $1; }
        | /* EMPTY */               { $$ = EMPTY; }
                 ;
 
-update_list:  OF va_list
+update_list:  OF name_list
               {
            $$ = cat2_str(make_str("of"), $2);
          }
@@ -5028,6 +5027,7 @@ TokenId:  ABSOLUTE            { $$ = make_str("absolute"); }
    | SHARE             { $$ = make_str("share"); }
    | START             { $$ = make_str("start"); }
    | STATEMENT         { $$ = make_str("statement"); }
+   | STATISTICS        { $$ = make_str("statistics"); }
    | STDIN                         { $$ = make_str("stdin"); }
    | STDOUT                        { $$ = make_str("stdout"); }
    | SYSID                         { $$ = make_str("sysid"); }


diff --git a/src/test/regress/expected/oidjoins.out b/src/test/regress/expected/oidjoins.out

index a2b0ad9e3e722827117e707ae7033a210771e9b4..46bc60f6955d60e4a52170d4b7281048b146d97d 100644 (file)


--- a/src/test/regress/expected/oidjoins.out
+++ b/src/test/regress/expected/oidjoins.out
@@ -353,12 +353,28 @@ WHERE pg_statistic.starelid != 0 AND
 -----+----------
 (0 rows)
 
-SELECT oid, pg_statistic.staop 
+SELECT oid, pg_statistic.staop1 
 FROM   pg_statistic 
-WHERE  pg_statistic.staop != 0 AND 
-   NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop);
- oid | staop 
------+-------
+WHERE  pg_statistic.staop1 != 0 AND 
+   NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop1);
+ oid | staop1 
+-----+--------
+(0 rows)
+
+SELECT oid, pg_statistic.staop2 
+FROM   pg_statistic 
+WHERE  pg_statistic.staop2 != 0 AND 
+   NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop2);
+ oid | staop2 
+-----+--------
+(0 rows)
+
+SELECT oid, pg_statistic.staop3 
+FROM   pg_statistic 
+WHERE  pg_statistic.staop3 != 0 AND 
+   NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop3);
+ oid | staop3 
+-----+--------
 (0 rows)
 
 SELECT oid, pg_trigger.tgrelid 


diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out

index 9d4ff1b39856b3d468938ef709578649fe4d84ce..1b094a6e3bfe2f58a8e7b108c0088131a36feb35 100644 (file)


--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -482,8 +482,8 @@ WHERE p1.aggtransfn = p2.oid AND
           (p2.pronargs = 1 AND p1.aggbasetype = 0)));
   oid  | aggname | oid |   proname   
 -------+---------+-----+-------------
- 16997 | max     | 768 | int4larger
- 17011 | min     | 769 | int4smaller
+ 17010 | max     | 768 | int4larger
+ 17024 | min     | 769 | int4smaller
 (2 rows)
 
 -- Cross-check finalfn (if present) against its entry in pg_proc.


diff --git a/src/test/regress/sql/oidjoins.sql b/src/test/regress/sql/oidjoins.sql

index b7ea1f63eaa8268d3583a670e9f3985619be0453..88727a6c76ec6922fc12f4456fba2dc650570f0a 100644 (file)


--- a/src/test/regress/sql/oidjoins.sql
+++ b/src/test/regress/sql/oidjoins.sql
@@ -177,10 +177,18 @@ SELECT    oid, pg_statistic.starelid
 FROM   pg_statistic 
 WHERE  pg_statistic.starelid != 0 AND 
    NOT EXISTS(SELECT * FROM pg_class AS t1 WHERE t1.oid = pg_statistic.starelid);
-SELECT oid, pg_statistic.staop 
+SELECT oid, pg_statistic.staop1 
 FROM   pg_statistic 
-WHERE  pg_statistic.staop != 0 AND 
-   NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop);
+WHERE  pg_statistic.staop1 != 0 AND 
+   NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop1);
+SELECT oid, pg_statistic.staop2 
+FROM   pg_statistic 
+WHERE  pg_statistic.staop2 != 0 AND 
+   NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop2);
+SELECT oid, pg_statistic.staop3 
+FROM   pg_statistic 
+WHERE  pg_statistic.staop3 != 0 AND 
+   NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop3);
 SELECT oid, pg_trigger.tgrelid 
 FROM   pg_trigger 
 WHERE  pg_trigger.tgrelid != 0 AND 




This is the main PostgreSQL git repository.
RSS
Atom
      class="PARAMETER">newcolumn
  ALTER TABLE table
@@ -159,9 +161,14 @@ ALTER TABLE table
     ALTER TABLE changes the definition of an existing table.
     The ADD COLUMN form adds a new column to the table
     using the same syntax as 
-   endterm="SQL-CREATETABLE-title">. The ALTER COLUMN form
-   allows you to set or remove the default for the column. Note that defaults
-   only apply to newly inserted rows.
+   endterm="SQL-CREATETABLE-title">.
+   The ALTER COLUMN SET/DROP DEFAULT forms
+   allow you to set or remove the default for the column. Note that defaults
+   only apply to subsequent INSERT commands; they do not
+   cause rows already in the table to change.
+   The ALTER COLUMN SET STATISTICS form allows you to
+   set the statistics-gathering target for subsequent
+    operations.
    The RENAME clause causes the name of a table or column
    to change without changing any of the data contained in
    the affected table. Thus, the table or column will
@@ -170,7 +177,7 @@ ALTER TABLE table
    The ADD table constraint definition clause 
    adds a new constraint to the table using the same syntax as 
    linkend="SQL-CREATETABLE" endterm="SQL-CREATETABLE-title">. 
-   The OWNER clause chnages the owner of the table to the user 
+   The OWNER clause changes the owner of the table to the user 
    new user.
   
 
@@ -190,10 +197,11 @@ ALTER TABLE table
    
 
    
-    In the current implementation, default and constraint clauses for the
+    In the current implementation of ADD COLUMN,
+    default and constraint clauses for the
     new column will be ignored. You can use the SET DEFAULT
     form of ALTER TABLE to set the default later.
-    (You will also have to update the already existing rows to the
+    (You may also want to update the already existing rows to the
     new default value, using 
     endterm="sql-update-title">.)
    
@@ -210,7 +218,7 @@ ALTER TABLE table
 
    
     You must own the table in order to change it.
-    Renaming any  part  of  the schema of a system
+    Changing any  part  of  the schema of a system
     catalog is not permitted.
     The PostgreSQL User's Guide has further
     information on inheritance.


diff --git a/doc/src/sgml/ref/analyze.sgml b/doc/src/sgml/ref/analyze.sgml

new file mode 100644 (file)

index 0000000..57d3213


--- /dev/null
+++ b/doc/src/sgml/ref/analyze.sgml
@@ -0,0 +1,219 @@
+
+
+
+ 
+  
+   ANALYZE
+  
+  SQL - Language Statements
+ 
+ 
+  
+   ANALYZE
+  
+  
+   Collect statistics about a Postgres database
+  
+ 
+ 
+  
+   2001-05-04
+  
+  
+ANALYZE [ VERBOSE ] [ table [ (column [, ...] ) ] ]
+  
+
+  
+   
+    2001-05-04
+   
+   </div>
<div class="diff add">+    Inputs</div>
<div class="diff add">+   
+
+   
+    
+     
+      VERBOSE
+      
+       
+   Enables display of progress messages.
+       
+      
+     
+     
+      table
+      
+       
+   The name of a specific table to analyze. Defaults to all tables.
+       
+      
+     
+     
+      column
+      
+       
+   The name of a specific column to analyze. Defaults to all columns.
+       
+      
+     
+    
+   
+  
+
+  
+   
+    2001-05-04
+   
+   </div>
<div class="diff add">+    Outputs</div>
<div class="diff add">+   
+   
+
+    
+     
+      
+ANALYZE
+       
+      
+       
+   The command is complete.
+       
+      
+     
+
+    
+   
+  
+ 
+
+ 
+  
+   2001-05-04
+  
+  </div>
<div class="diff add">+   Description</div>
<div class="diff add">+  
+  
+   ANALYZE collects statistics about the contents of
+   Postgres tables, and stores the results in
+   the system table pg_statistic.  Subsequently,
+   the query planner uses the statistics to help determine the most efficient
+   execution plans for queries.
+  
+
+  
+   With no parameter, ANALYZE examines every table in the
+   current database.  With a parameter, ANALYZE examines
+   only that table.  It is further possible to give a list of column names,
+   in which case only the statistics for those columns are updated.
+  
+
+  
+   
+    2001-05-04
+   
+   </div>
<div class="diff add">+    Notes</div>
<div class="diff add">+   
+
+  
+   It is a good idea to run ANALYZE periodically, or
+   just after making major changes in the contents of a table.  Accurate
+   statistics will help the planner to choose the most appropriate query
+   plan, and thereby improve the speed of query processing.  A common
+   strategy is to run VACUUM and ANALYZE
+   once a day during a low-usage time of day.
+  
+
+  
+   Unlike ,
+   ANALYZE requires
+   only a read lock on the target table, so it can run in parallel with
+   other activity on the table.
+  
+
+  
+   For large tables, ANALYZE takes a random sample of the
+   table contents, rather than examining every row.  This allows even very
+   large tables to be analyzed in a small amount of time.  Note however
+   that the statistics are only approximate, and will change slightly each
+   time ANALYZE is run, even if the actual table contents
+   did not change.  This may result in small changes in the planner's
+   estimated costs shown by EXPLAIN.
+  
+
+  
+   The collected statistics usually include a list of some of the most common
+   values in each column and a histogram showing the approximate data
+   distribution in each column.  One or both of these may be omitted if
+   ANALYZE deems them uninteresting (for example, in
+   a unique-key column, there are no common values) or if the column
+   datatype does not support the appropriate operators.
+  
+
+  
+   The extent of analysis can be controlled by adjusting the per-column
+   statistics target with ALTER TABLE ALTER COLUMN SET
+   STATISTICS (see
+   ).  The
+   target value sets the maximum number of entries in the most-common-value
+   list and the maximum number of bins in the histogram.  The default
+   target value is 10, but this can be adjusted up or down to trade off
+   accuracy of planner estimates against the time taken for
+   ANALYZE and the
+   amount of space occupied in pg_statistic.
+   In particular, setting the statistics target to zero disables collection of
+   statistics for that column.  It may be useful to do that for columns that
+   are never used as part of the WHERE, GROUP BY, or ORDER BY clauses of
+   queries, since the planner will have no use for statistics on such columns.
+  
+
+  
+   The largest statistics target among the columns being analyzed determines
+   the number of table rows sampled to prepare the statistics.  Increasing
+   the target causes a proportional increase in the time and space needed
+   to do ANALYZE.
+  
+
+  
+ 
+
+ 
+  </div>
<div class="diff add">+   Compatibility</div>
<div class="diff add">+  
+
+  
+   
+    2001-05-04
+   
+   </div>
<div class="diff add">+    SQL92</div>
<div class="diff add">+   
+   
+    There is no ANALYZE statement in SQL92.
+   
+  
+ 
+
+
+


diff --git a/doc/src/sgml/ref/vacuum.sgml b/doc/src/sgml/ref/vacuum.sgml

index 51cb8a9ffdacfe41115a94d41b11e97fa1e6b6f9..cbb182466ea44d231b4271f54f2c14da9534307b 100644 (file)


--- a/doc/src/sgml/ref/vacuum.sgml
+++ b/doc/src/sgml/ref/vacuum.sgml
@@ -1,5 +1,5 @@
 
 
@@ -15,15 +15,15 @@ Postgres documentation
    VACUUM
   
   
-   Clean and analyze a Postgres database
+   Clean and optionally analyze a Postgres database
   
  
  
   
-   1999-07-20
+   2001-05-04
   
   
-VACUUM [ VERBOSE ] [ ANALYZE ] [ table ]
+VACUUM [ VERBOSE ] [ table ]
 VACUUM [ VERBOSE ] ANALYZE [ table [ (column [, ...] ) ] ]
   
 
@@ -49,7 +49,7 @@ VACUUM [ VERBOSE ] ANALYZE [ table
       ANALYZE
       
        
-   Updates column statistics used by the optimizer to
+   Updates statistics used by the optimizer to
    determine the most efficient way to execute a query.
        
       
@@ -90,7 +90,7 @@ VACUUM [ VERBOSE ] ANALYZE [ table
        
       
        
-   The command has been accepted and the database is being cleaned.
+   The command is complete.
        
       
      
@@ -144,28 +144,26 @@ NOTICE:  Index index: Pages 28;
    Description
   
   
-   VACUUM serves two purposes in 
-   Postgres as both a means to reclaim storage and
-   also a means to collect information for the optimizer.
+   VACUUM reclaims storage occupied by deleted tuples.
+   In normal Postgres operation, tuples that
+   are DELETEd or obsoleted by UPDATE are not physically removed from
+   their table; they remain present until a VACUUM is
+   done.  Therefore it's necessary to do VACUUM
+   periodically, especially on frequently-updated tables.
   
 
   
-   VACUUM opens every table in the database,
-   cleans out records from rolled back transactions, and updates statistics in the
-   system catalogs.  The statistics maintained include the number of
-   tuples and number of pages stored in all tables.
-  
-
-
-  
-   VACUUM ANALYZE collects statistics representing the
-   dispersion of the data in each column.
-   This information is valuable when several query execution paths are possible.
+   With no parameter, VACUUM processes every table in the
+   current database.  With a parameter, VACUUM processes
+   only that table.
   
 
   
-   Running VACUUM
-   periodically will increase the speed of the database in processing user queries.
+   VACUUM ANALYZE performs a VACUUM
+   and then an ANALYZE for each selected table.  This
+   is a handy combination form for routine maintenance scripts.  See
+   
+   for more details about its processing.
   
 
   
@@ -175,16 +173,15 @@ NOTICE:  Index index: Pages 28;
    </div>
<div class="diff ctx">     Notes</div>
<div class="diff ctx">    
-   
-    The open database is the target for VACUUM.
-   
+
    
     We recommend that active production databases be
     VACUUM-ed nightly, in order to remove
     expired rows. After copying a large table into
     Postgres or after deleting a large number
     of records, it may be a good idea to issue a VACUUM
-    ANALYZE query. This will update the system catalogs with
+    ANALYZE command for the affected table. This will update the
+    system catalogs with
     the results of all recent changes, and allow the
     Postgres query optimizer to make better
     choices in planning user queries.


diff --git a/doc/src/sgml/reference.sgml b/doc/src/sgml/reference.sgml

index b92ee0868d029cf48443f4240fab5224bc958862..9a977a6515c97db601f13f5f43413bc3e81a46c8 100644 (file)


--- a/doc/src/sgml/reference.sgml
+++ b/doc/src/sgml/reference.sgml
@@ -1,5 +1,5 @@
 
@@ -26,6 +26,7 @@ PostgreSQL Reference Manual
    &alterGroup;
    &alterTable;
    &alterUser;
+   &analyze;
    &begin;
    &checkpoint;
    &close;


diff --git a/doc/src/sgml/xoper.sgml b/doc/src/sgml/xoper.sgml

index d38e78a4e1af22651531a65d320f427ea71b175b..57d8bb79c28d69da43ce1897f0dacb4f3dd1a56b 100644 (file)


--- a/doc/src/sgml/xoper.sgml
+++ b/doc/src/sgml/xoper.sgml
@@ -1,5 +1,5 @@
 
 
  
@@ -244,7 +244,7 @@ SELECT (a + b) AS c FROM test_complex;
     only a small fraction.  '<' will accept a fraction that depends on
     where the given constant falls in the range of values for that table
     column (which, it just so happens, is information collected by
-    VACUUM ANALYZE and made available to the selectivity estimator).
+    ANALYZE and made available to the selectivity estimator).
     '<=' will accept a slightly larger fraction than '<' for the same
     comparison constant, but they're close enough to not be worth
     distinguishing, especially since we're not likely to do better than a


diff --git a/src/backend/access/common/tupdesc.c b/src/backend/access/common/tupdesc.c

index 769f754b6690919606bdaaf8a016260382abdef8..86d704e8d08779e32b38e3d4d4f938072adeccf7 100644 (file)


--- a/src/backend/access/common/tupdesc.c
+++ b/src/backend/access/common/tupdesc.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/access/common/tupdesc.c,v 1.73 2001/03/22 06:16:06 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/access/common/tupdesc.c,v 1.74 2001/05/07 00:43:15 tgl Exp $
  *
  * NOTES
  *   some of the executor utility code such as "ExecTypeFromTL" should be
@@ -237,16 +237,16 @@ equalTupleDescs(TupleDesc tupdesc1, TupleDesc tupdesc2)
        Form_pg_attribute attr2 = tupdesc2->attrs[i];
 
        /*
-        * We do not need to check every single field here, and in fact
-        * some fields such as attdispersion probably shouldn't be
-        * compared.  We can also disregard attnum (it was used to place
-        * the row in the attrs array) and everything derived from the
-        * column datatype.
+        * We do not need to check every single field here: we can disregard
+        * attrelid, attnum (it was used to place the row in the attrs array)
+        * and everything derived from the column datatype.
         */
        if (strcmp(NameStr(attr1->attname), NameStr(attr2->attname)) != 0)
            return false;
        if (attr1->atttypid != attr2->atttypid)
            return false;
+       if (attr1->attstattarget != attr2->attstattarget)
+           return false;
        if (attr1->atttypmod != attr2->atttypmod)
            return false;
        if (attr1->attstorage != attr2->attstorage)
@@ -365,12 +365,12 @@ TupleDescInitEntry(TupleDesc desc,
    else
        MemSet(NameStr(att->attname), 0, NAMEDATALEN);
 
-   att->attdispersion = 0;     /* dummy value */
+   att->attstattarget = 0;
    att->attcacheoff = -1;
    att->atttypmod = typmod;
 
    att->attnum = attributeNumber;
-   att->attnelems = attdim;
+   att->attndims = attdim;
    att->attisset = attisset;
 
    att->attnotnull = false;
@@ -506,7 +506,7 @@ TupleDescMakeSelfReference(TupleDesc desc,
    att->attbyval = true;
    att->attalign = 'i';
    att->attstorage = 'p';
-   att->attnelems = 0;
+   att->attndims = 0;
 }
 
 /* ----------------------------------------------------------------


diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c

index 1c5577b88a066a1ecebfd6ce317147efc28d489c..06010896821e5caa9627c17f6328239ec3c277b6 100644 (file)


--- a/src/backend/access/gist/gist.c
+++ b/src/backend/access/gist/gist.c
@@ -6,7 +6,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/access/gist/gist.c,v 1.72 2001/03/22 03:59:12 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/access/gist/gist.c,v 1.73 2001/05/07 00:43:15 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -84,8 +84,8 @@ static void gist_dumptree(Relation r, int level, BlockNumber blk, OffsetNumber c
 #endif
 
 /*
-** routine to build an index.  Basically calls insert over and over
-*/
+ * routine to build an index.  Basically calls insert over and over
+ */
 Datum
 gistbuild(PG_FUNCTION_ARGS)
 {
@@ -105,7 +105,7 @@ gistbuild(PG_FUNCTION_ARGS)
                itupdesc;
    Datum       attdata[INDEX_MAX_KEYS];
    char        nulls[INDEX_MAX_KEYS];
-   int         nhtups,
+   double      nhtups,
                nitups;
    Node       *pred = indexInfo->ii_Predicate;
 
@@ -172,7 +172,7 @@ gistbuild(PG_FUNCTION_ARGS)
 #endif  /* OMIT_PARTIAL_INDEX */
 
    /* build the index */
-   nhtups = nitups = 0;
+   nhtups = nitups = 0.0;
 
    compvec = (bool *) palloc(sizeof(bool) * indexInfo->ii_NumIndexAttrs);
 
@@ -183,7 +183,7 @@ gistbuild(PG_FUNCTION_ARGS)
    {
        MemoryContextReset(econtext->ecxt_per_tuple_memory);
 
-       nhtups++;
+       nhtups += 1.0;
 
 #ifndef OMIT_PARTIAL_INDEX
 
@@ -196,7 +196,7 @@ gistbuild(PG_FUNCTION_ARGS)
            slot->val = htup;
            if (ExecQual((List *) oldPred, econtext, false))
            {
-               nitups++;
+               nitups += 1.0;
                continue;
            }
        }
@@ -213,7 +213,7 @@ gistbuild(PG_FUNCTION_ARGS)
        }
 #endif  /* OMIT_PARTIAL_INDEX */
 
-       nitups++;
+       nitups += 1.0;
 
        /*
         * For the current heap tuple, extract all the attributes we use


diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c

index aa76ba232a05c21da94012fbefbc287924aa154f..9617fcc33a6a0bb5bf4556944cc433be26ad0331 100644 (file)


--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.50 2001/03/22 03:59:12 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.51 2001/05/07 00:43:15 tgl Exp $
  *
  * NOTES
  *   This file contains only the public interface routines.
@@ -57,7 +57,7 @@ hashbuild(PG_FUNCTION_ARGS)
                itupdesc;
    Datum       attdata[INDEX_MAX_KEYS];
    char        nulls[INDEX_MAX_KEYS];
-   int         nhtups,
+   double      nhtups,
                nitups;
    HashItem    hitem;
    Node       *pred = indexInfo->ii_Predicate;
@@ -109,7 +109,7 @@ hashbuild(PG_FUNCTION_ARGS)
 #endif  /* OMIT_PARTIAL_INDEX */
 
    /* build the index */
-   nhtups = nitups = 0;
+   nhtups = nitups = 0.0;
 
    /* start a heap scan */
    hscan = heap_beginscan(heap, 0, SnapshotNow, 0, (ScanKey) NULL);
@@ -118,7 +118,7 @@ hashbuild(PG_FUNCTION_ARGS)
    {
        MemoryContextReset(econtext->ecxt_per_tuple_memory);
 
-       nhtups++;
+       nhtups += 1.0;
 
 #ifndef OMIT_PARTIAL_INDEX
 
@@ -131,7 +131,7 @@ hashbuild(PG_FUNCTION_ARGS)
            slot->val = htup;
            if (ExecQual((List *) oldPred, econtext, false))
            {
-               nitups++;
+               nitups += 1.0;
                continue;
            }
        }
@@ -148,7 +148,7 @@ hashbuild(PG_FUNCTION_ARGS)
        }
 #endif  /* OMIT_PARTIAL_INDEX */
 
-       nitups++;
+       nitups += 1.0;
 
        /*
         * For the current heap tuple, extract all the attributes we use


diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c

index fb509ab66de99d90fcdab322dd36af40551316d1..2a9df577b10c56de723c68ae329e47847849fb71 100644 (file)


--- a/src/backend/access/heap/tuptoaster.c
+++ b/src/backend/access/heap/tuptoaster.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/access/heap/tuptoaster.c,v 1.21 2001/03/25 00:45:20 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/access/heap/tuptoaster.c,v 1.22 2001/05/07 00:43:15 tgl Exp $
  *
  *
  * INTERFACE ROUTINES
@@ -166,6 +166,43 @@ heap_tuple_untoast_attr(varattrib *attr)
 }
 
 
+/* ----------
+ * toast_raw_datum_size -
+ *
+ * Return the raw (detoasted) size of a varlena datum
+ * ----------
+ */
+Size
+toast_raw_datum_size(Datum value)
+{
+   varattrib  *attr = (varattrib *) DatumGetPointer(value);
+   Size        result;
+
+   if (VARATT_IS_COMPRESSED(attr))
+   {
+       /*
+        * va_rawsize shows the original data size, whether the datum
+        * is external or not.
+        */
+       result = attr->va_content.va_compressed.va_rawsize + VARHDRSZ;
+   }
+   else if (VARATT_IS_EXTERNAL(attr))
+   {
+       /*
+        * an uncompressed external attribute has rawsize including the
+        * header (not too consistent!)
+        */
+       result = attr->va_content.va_external.va_rawsize;
+   }
+   else
+   {
+       /* plain untoasted datum */
+       result = VARSIZE(attr);
+   }
+   return result;
+}
+
+
 /* ----------
  * toast_delete -
  *


diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c

index 97d99da4fde7bbbfe009c7c7baf04dc557390cd9..f456e0c9306f4f3c191d75172463bf852e905041 100644 (file)


--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -12,7 +12,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.79 2001/03/22 03:59:15 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.80 2001/05/07 00:43:16 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -69,7 +69,7 @@ btbuild(PG_FUNCTION_ARGS)
                itupdesc;
    Datum       attdata[INDEX_MAX_KEYS];
    char        nulls[INDEX_MAX_KEYS];
-   int         nhtups,
+   double      nhtups,
                nitups;
    Node       *pred = indexInfo->ii_Predicate;
 
@@ -156,7 +156,7 @@ btbuild(PG_FUNCTION_ARGS)
 #endif  /* OMIT_PARTIAL_INDEX */
 
    /* build the index */
-   nhtups = nitups = 0;
+   nhtups = nitups = 0.0;
 
    if (usefast)
    {
@@ -196,7 +196,7 @@ btbuild(PG_FUNCTION_ARGS)
 
        MemoryContextReset(econtext->ecxt_per_tuple_memory);
 
-       nhtups++;
+       nhtups += 1.0;
 
 #ifndef OMIT_PARTIAL_INDEX
 
@@ -209,7 +209,7 @@ btbuild(PG_FUNCTION_ARGS)
            slot->val = htup;
            if (ExecQual((List *) oldPred, econtext, false))
            {
-               nitups++;
+               nitups += 1.0;
                continue;
            }
        }
@@ -226,7 +226,7 @@ btbuild(PG_FUNCTION_ARGS)
        }
 #endif  /* OMIT_PARTIAL_INDEX */
 
-       nitups++;
+       nitups += 1.0;
 
        /*
         * For the current heap tuple, extract all the attributes we use


diff --git a/src/backend/access/rtree/rtree.c b/src/backend/access/rtree/rtree.c

index 3752a59e99a3259dcef8feb7660927baf8308a4a..a8c6a13ea3c14626245bad59e372b66b0d5c25e2 100644 (file)


--- a/src/backend/access/rtree/rtree.c
+++ b/src/backend/access/rtree/rtree.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.61 2001/03/22 03:59:16 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.62 2001/05/07 00:43:16 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -100,7 +100,7 @@ rtbuild(PG_FUNCTION_ARGS)
                itupdesc;
    Datum       attdata[INDEX_MAX_KEYS];
    char        nulls[INDEX_MAX_KEYS];
-   int         nhtups,
+   double      nhtups,
                nitups;
    Node       *pred = indexInfo->ii_Predicate;
 
@@ -163,7 +163,7 @@ rtbuild(PG_FUNCTION_ARGS)
 #endif  /* OMIT_PARTIAL_INDEX */
 
    /* count the tuples as we insert them */
-   nhtups = nitups = 0;
+   nhtups = nitups = 0.0;
 
    /* start a heap scan */
    hscan = heap_beginscan(heap, 0, SnapshotNow, 0, (ScanKey) NULL);
@@ -172,7 +172,7 @@ rtbuild(PG_FUNCTION_ARGS)
    {
        MemoryContextReset(econtext->ecxt_per_tuple_memory);
 
-       nhtups++;
+       nhtups += 1.0;
 
 #ifndef OMIT_PARTIAL_INDEX
 
@@ -185,7 +185,7 @@ rtbuild(PG_FUNCTION_ARGS)
            slot->val = htup;
            if (ExecQual((List *) oldPred, econtext, false))
            {
-               nitups++;
+               nitups += 1.0;
                continue;
            }
        }
@@ -202,7 +202,7 @@ rtbuild(PG_FUNCTION_ARGS)
        }
 #endif  /* OMIT_PARTIAL_INDEX */
 
-       nitups++;
+       nitups += 1.0;
 
        /*
         * For the current heap tuple, extract all the attributes we use


diff --git a/src/backend/catalog/genbki.sh b/src/backend/catalog/genbki.sh

index c2993fa8fc6b474bc13badd0c4369ca56fdbb9d4..cac53f3e0853262c213239e698170311a6ee8e1c 100644 (file)


--- a/src/backend/catalog/genbki.sh
+++ b/src/backend/catalog/genbki.sh
@@ -10,7 +10,7 @@
 #
 #
 # IDENTIFICATION
-#    $Header: /cvsroot/pgsql/src/backend/catalog/Attic/genbki.sh,v 1.19 2001/01/16 22:48:34 tgl Exp $
+#    $Header: /cvsroot/pgsql/src/backend/catalog/Attic/genbki.sh,v 1.20 2001/05/07 00:43:16 tgl Exp $
 #
 # NOTES
 #    non-essential whitespace is removed from the generated file.
@@ -126,10 +126,12 @@ for dir in $INCLUDE_DIRS; do
     fi
 done
 
-# Get INDEX_MAX_KEYS from config.h (who needs consistency?)
+# Get INDEX_MAX_KEYS and DEFAULT_ATTSTATTARGET from config.h
+# (who needs consistency?)
 for dir in $INCLUDE_DIRS; do
     if [ -f "$dir/config.h" ]; then
         INDEXMAXKEYS=`grep '#define[   ]*INDEX_MAX_KEYS' $dir/config.h | $AWK '{ print $3 }'`
+        DEFAULTATTSTATTARGET=`grep '#define[   ]*DEFAULT_ATTSTATTARGET' $dir/config.h | $AWK '{ print $3 }'`
         break
     fi
 done
@@ -168,6 +170,7 @@ sed -e "s/;[    ]*$//g" \
     -e "s/(NameData/(name/g" \
     -e "s/(Oid/(oid/g" \
     -e "s/NAMEDATALEN/$NAMEDATALEN/g" \
+    -e "s/DEFAULT_ATTSTATTARGET/$DEFAULTATTSTATTARGET/g" \
     -e "s/INDEX_MAX_KEYS\*2/$INDEXMAXKEYS2/g" \
     -e "s/INDEX_MAX_KEYS\*4/$INDEXMAXKEYS4/g" \
     -e "s/INDEX_MAX_KEYS/$INDEXMAXKEYS/g" \


diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c

index 54867d51a4b631241e649453750b03ee0c1aeef4..03f16e11c3f3710b2589d8e7330bfd0a2bb386b8 100644 (file)


--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/catalog/heap.c,v 1.162 2001/03/22 06:16:10 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/catalog/heap.c,v 1.163 2001/05/07 00:43:17 tgl Exp $
  *
  *
  * INTERFACE ROUTINES
@@ -96,54 +96,72 @@ static void RemoveStatistics(Relation rel);
 
 /*
  * Note:
- *     Should the executor special case these attributes in the future?
- *     Advantage:  consume 1/2 the space in the ATTRIBUTE relation.
- *     Disadvantage:  having rules to compute values in these tuples may
- *             be more difficult if not impossible.
+ *     Should the system special case these attributes in the future?
+ *     Advantage:  consume much less space in the ATTRIBUTE relation.
+ *     Disadvantage:  special cases will be all over the place.
  */
 
 static FormData_pg_attribute a1 = {
-   0xffffffff, {"ctid"}, TIDOID, 0, sizeof(ItemPointerData),
-   SelfItemPointerAttributeNumber, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0'
+   0, {"ctid"}, TIDOID, 0, sizeof(ItemPointerData),
+   SelfItemPointerAttributeNumber, 0, -1, -1,
+   false, 'p', false, 'i', false, false
 };
 
 static FormData_pg_attribute a2 = {
-   0xffffffff, {"oid"}, OIDOID, 0, sizeof(Oid),
-   ObjectIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+   0, {"oid"}, OIDOID, 0, sizeof(Oid),
+   ObjectIdAttributeNumber, 0, -1, -1,
+   true, 'p', false, 'i', false, false
 };
 
 static FormData_pg_attribute a3 = {
-   0xffffffff, {"xmin"}, XIDOID, 0, sizeof(TransactionId),
-   MinTransactionIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+   0, {"xmin"}, XIDOID, 0, sizeof(TransactionId),
+   MinTransactionIdAttributeNumber, 0, -1, -1,
+   true, 'p', false, 'i', false, false
 };
 
 static FormData_pg_attribute a4 = {
-   0xffffffff, {"cmin"}, CIDOID, 0, sizeof(CommandId),
-   MinCommandIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+   0, {"cmin"}, CIDOID, 0, sizeof(CommandId),
+   MinCommandIdAttributeNumber, 0, -1, -1,
+   true, 'p', false, 'i', false, false
 };
 
 static FormData_pg_attribute a5 = {
-   0xffffffff, {"xmax"}, XIDOID, 0, sizeof(TransactionId),
-   MaxTransactionIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+   0, {"xmax"}, XIDOID, 0, sizeof(TransactionId),
+   MaxTransactionIdAttributeNumber, 0, -1, -1,
+   true, 'p', false, 'i', false, false
 };
 
 static FormData_pg_attribute a6 = {
-   0xffffffff, {"cmax"}, CIDOID, 0, sizeof(CommandId),
-   MaxCommandIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+   0, {"cmax"}, CIDOID, 0, sizeof(CommandId),
+   MaxCommandIdAttributeNumber, 0, -1, -1,
+   true, 'p', false, 'i', false, false
 };
 
 /*
-   We decide to call this attribute "tableoid" rather than say
-"classoid" on the basis that in the future there may be more than one
-table of a particular class/type. In any case table is still the word
-used in SQL.
-*/
+ * We decided to call this attribute "tableoid" rather than say
+ * "classoid" on the basis that in the future there may be more than one
+ * table of a particular class/type. In any case table is still the word
+ * used in SQL.
+ */
 static FormData_pg_attribute a7 = {
-   0xffffffff, {"tableoid"}, OIDOID, 0, sizeof(Oid),
-   TableOidAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+   0, {"tableoid"}, OIDOID, 0, sizeof(Oid),
+   TableOidAttributeNumber, 0, -1, -1,
+   true, 'p', false, 'i', false, false
 };
 
-static Form_pg_attribute HeapAtt[] = {&a1, &a2, &a3, &a4, &a5, &a6, &a7};
+static Form_pg_attribute SysAtt[] = {&a1, &a2, &a3, &a4, &a5, &a6, &a7};
+
+/*
+ * This function returns a Form_pg_attribute pointer for a system attribute.
+ */
+Form_pg_attribute
+SystemAttributeDefinition(AttrNumber attno)
+{
+   if (attno >= 0 || attno < - (int) lengthof(SysAtt))
+       elog(ERROR, "SystemAttributeDefinition: invalid attribute number %d",
+            attno);
+   return SysAtt[-attno - 1];
+}
 
 /* ----------------------------------------------------------------
  *             XXX END OF UGLY HARD CODED BADNESS XXX
@@ -380,32 +398,6 @@ heap_storage_create(Relation rel)
  *     8) the relations are closed and the new relation's oid
  *        is returned.
  *
- * old comments:
- *     A new relation is inserted into the RELATION relation
- *     with the specified attribute(s) (newly inserted into
- *     the ATTRIBUTE relation).  How does concurrency control
- *     work?  Is it automatic now?  Expects the caller to have
- *     attname, atttypid, atttyparg, attproc, and attlen domains filled.
- *     Create fills the attnum domains sequentually from zero,
- *     fills the attdispersion domains with zeros, and fills the
- *     attrelid fields with the relid.
- *
- *     scan relation catalog for name conflict
- *     scan type catalog for typids (if not arg)
- *     create and insert attribute(s) into attribute catalog
- *     create new relation
- *     insert new relation into attribute catalog
- *
- *     Should coordinate with heap_create_with_catalog(). Either
- *     it should not be called or there should be a way to prevent
- *     the relation from being removed at the end of the
- *     transaction if it is successful ('u'/'r' may be enough).
- *     Also, if the transaction does not commit, then the
- *     relation should be removed.
- *
- *     XXX amcreate ignores "off" when inserting (for now).
- *     XXX amcreate (like the other utilities) needs to understand indexes.
- *
  * ----------------------------------------------------------------
  */
 
@@ -432,14 +424,14 @@ CheckAttributeNames(TupleDesc tupdesc)
     */
    for (i = 0; i < natts; i++)
    {
-       for (j = 0; j < (int) (sizeof(HeapAtt) / sizeof(HeapAtt[0])); j++)
+       for (j = 0; j < (int) lengthof(SysAtt); j++)
        {
-           if (strcmp(NameStr(HeapAtt[j]->attname),
+           if (strcmp(NameStr(SysAtt[j]->attname),
                       NameStr(tupdesc->attrs[i]->attname)) == 0)
            {
                elog(ERROR, "Attribute '%s' has a name conflict"
                     "\n\tName matches an existing system attribute",
-                    NameStr(HeapAtt[j]->attname));
+                    NameStr(SysAtt[j]->attname));
            }
        }
        if (tupdesc->attrs[i]->atttypid == UNKNOWNOID)
@@ -574,7 +566,7 @@ AddNewAttributeTuples(Oid new_rel_oid,
        /* Fill in the correct relation OID */
        (*dpp)->attrelid = new_rel_oid;
        /* Make sure these are OK, too */
-       (*dpp)->attdispersion = 0;
+       (*dpp)->attstattarget = DEFAULT_ATTSTATTARGET;
        (*dpp)->attcacheoff = -1;
 
        tup = heap_addheader(Natts_pg_attribute,
@@ -593,14 +585,14 @@ AddNewAttributeTuples(Oid new_rel_oid,
    /*
     * next we add the system attributes..
     */
-   dpp = HeapAtt;
+   dpp = SysAtt;
    for (i = 0; i < -1 - FirstLowInvalidHeapAttributeNumber; i++)
    {
        /* Fill in the correct relation OID */
        /* HACK: we are writing on static data here */
        (*dpp)->attrelid = new_rel_oid;
        /* Unneeded since they should be OK in the constant data anyway */
-       /* (*dpp)->attdispersion = 0; */
+       /* (*dpp)->attstattarget = 0; */
        /* (*dpp)->attcacheoff = -1; */
 
        tup = heap_addheader(Natts_pg_attribute,
@@ -669,8 +661,23 @@ AddNewRelationTuple(Relation pg_class_desc,
     * save. (NOTE: CREATE INDEX inserts the same bogus estimates if it
     * finds the relation has 0 rows and pages. See index.c.)
     */
-   new_rel_reltup->relpages = 10;      /* bogus estimates */
-   new_rel_reltup->reltuples = 1000;
+   switch (relkind)
+   {
+       case RELKIND_RELATION:
+       case RELKIND_INDEX:
+       case RELKIND_TOASTVALUE:
+           new_rel_reltup->relpages = 10;  /* bogus estimates */
+           new_rel_reltup->reltuples = 1000;
+           break;
+       case RELKIND_SEQUENCE:
+           new_rel_reltup->relpages = 1;
+           new_rel_reltup->reltuples = 1;
+           break;
+       default:                /* views, etc */
+           new_rel_reltup->relpages = 0;
+           new_rel_reltup->reltuples = 0;
+           break;
+   }
 
    new_rel_reltup->relowner = GetUserId();
    new_rel_reltup->reltype = new_type_oid;


diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c

index 2adb30e1ed8ecf91d12c0028495b8911ece7068d..5eefab114891fdc1b2bbcc7b407d6c96ac3c75ca 100644 (file)


--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/catalog/index.c,v 1.145 2001/04/02 14:34:25 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/catalog/index.c,v 1.146 2001/05/07 00:43:17 tgl Exp $
  *
  *
  * INTERFACE ROUTINES
@@ -55,7 +55,7 @@
  */
 #define AVG_ATTR_SIZE 8
 #define NTUPLES_PER_PAGE(natts) \
-   ((BLCKSZ - MAXALIGN(sizeof (PageHeaderData))) / \
+   ((BLCKSZ - MAXALIGN(sizeof(PageHeaderData))) / \
    ((natts) * AVG_ATTR_SIZE + MAXALIGN(sizeof(HeapTupleHeaderData))))
 
 /* non-export function prototypes */
@@ -98,39 +98,6 @@ IsReindexProcessing(void)
    return reindexing;
 }
 
-/* ----------------------------------------------------------------
- *   sysatts is a structure containing attribute tuple forms
- *   for system attributes (numbered -1, -2, ...).  This really
- *   should be generated or eliminated or moved elsewhere. -cim 1/19/91
- *
- * typedef struct FormData_pg_attribute {
- *     Oid             attrelid;
- *     NameData        attname;
- *     Oid             atttypid;
- *     uint32          attnvals;
- *     int16           attlen;
- *     AttrNumber      attnum;
- *     uint32          attnelems;
- *     int32           attcacheoff;
- *     int32           atttypmod;
- *     bool            attbyval;
- *     bool            attisset;
- *     char            attalign;
- *     bool            attnotnull;
- *     bool            atthasdef;
- * } FormData_pg_attribute;
- *
- * ----------------------------------------------------------------
- */
-static FormData_pg_attribute sysatts[] = {
-   {0, {"ctid"}, TIDOID, 0, 6, -1, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0'},
-   {0, {"oid"}, OIDOID, 0, 4, -2, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'},
-   {0, {"xmin"}, XIDOID, 0, 4, -3, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'},
-   {0, {"cmin"}, CIDOID, 0, 4, -4, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'},
-   {0, {"xmax"}, XIDOID, 0, 4, -5, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'},
-   {0, {"cmax"}, CIDOID, 0, 4, -6, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'},
-};
-
 /* ----------------------------------------------------------------
  *     GetHeapRelationOid
  * ----------------------------------------------------------------
@@ -250,7 +217,6 @@ ConstructTupleDescriptor(Relation heapRelation,
    for (i = 0; i < numatts; i++)
    {
        AttrNumber  atnum;      /* attributeNumber[attributeOffset] */
-       AttrNumber  atind;
        Form_pg_attribute from;
        Form_pg_attribute to;
 
@@ -264,16 +230,9 @@ ConstructTupleDescriptor(Relation heapRelation,
        {
 
            /*
-            * here we are indexing on a system attribute (-1...-n) so we
-            * convert atnum into a usable index 0...n-1 so we can use it
-            * to dereference the array sysatts[] which stores tuple
-            * descriptor information for system attributes.
+            * here we are indexing on a system attribute (-1...-n)
             */
-           if (atnum <= FirstLowInvalidHeapAttributeNumber || atnum >= 0)
-               elog(ERROR, "Cannot create index on system attribute: attribute number out of range (%d)", atnum);
-           atind = (-atnum) - 1;
-
-           from = &sysatts[atind];
+           from = SystemAttributeDefinition(atnum);
        }
        else
        {
@@ -284,9 +243,8 @@ ConstructTupleDescriptor(Relation heapRelation,
            if (atnum > natts)
                elog(ERROR, "Cannot create index: attribute %d does not exist",
                     atnum);
-           atind = AttrNumberGetAttrOffset(atnum);
 
-           from = heapTupDesc->attrs[atind];
+           from = heapTupDesc->attrs[AttrNumberGetAttrOffset(atnum)];
        }
 
        /*
@@ -303,10 +261,10 @@ ConstructTupleDescriptor(Relation heapRelation,
         */
        to->attnum = i + 1;
 
-       to->attdispersion = 0.0;
+       to->attstattarget = 0;
+       to->attcacheoff = -1;
        to->attnotnull = false;
        to->atthasdef = false;
-       to->attcacheoff = -1;
 
        /*
         * We do not yet have the correct relation OID for the index, so
@@ -1542,10 +1500,14 @@ setNewRelfilenode(Relation relation)
 
 /* ----------------
  *     UpdateStats
+ *
+ * Update pg_class' relpages and reltuples statistics for the given relation
+ * (which can be either a table or an index).  Note that this is not used
+ * in the context of VACUUM.
  * ----------------
  */
 void
-UpdateStats(Oid relid, long reltuples)
+UpdateStats(Oid relid, double reltuples)
 {
    Relation    whichRel;
    Relation    pg_class;
@@ -1636,6 +1598,10 @@ UpdateStats(Oid relid, long reltuples)
     * with zero size statistics until a VACUUM is done.  The optimizer
     * will generate very bad plans if the stats claim the table is empty
     * when it is actually sizable.  See also CREATE TABLE in heap.c.
+    *
+    * Note: this path is also taken during bootstrap, because bootstrap.c
+    * passes reltuples = 0 after loading a table.  We have to estimate some
+    * number for reltuples based on the actual number of pages.
     */
    relpages = RelationGetNumberOfBlocks(whichRel);
 
@@ -1689,15 +1655,15 @@ UpdateStats(Oid relid, long reltuples)
 
        for (i = 0; i < Natts_pg_class; i++)
        {
-           nulls[i] = heap_attisnull(tuple, i + 1) ? 'n' : ' ';
+           nulls[i] = ' ';
            replace[i] = ' ';
            values[i] = (Datum) NULL;
        }
 
        replace[Anum_pg_class_relpages - 1] = 'r';
-       values[Anum_pg_class_relpages - 1] = (Datum) relpages;
+       values[Anum_pg_class_relpages - 1] = Int32GetDatum(relpages);
        replace[Anum_pg_class_reltuples - 1] = 'r';
-       values[Anum_pg_class_reltuples - 1] = (Datum) reltuples;
+       values[Anum_pg_class_reltuples - 1] = Float4GetDatum((float4) reltuples);
        newtup = heap_modifytuple(tuple, pg_class, values, nulls, replace);
        simple_heap_update(pg_class, &tuple->t_self, newtup);
        if (!IsIgnoringSystemIndexes())
@@ -1741,7 +1707,7 @@ DefaultBuild(Relation heapRelation,
    TupleDesc   heapDescriptor;
    Datum       datum[INDEX_MAX_KEYS];
    char        nullv[INDEX_MAX_KEYS];
-   long        reltuples,
+   double      reltuples,
                indtuples;
    Node       *predicate = indexInfo->ii_Predicate;
 
@@ -1796,7 +1762,7 @@ DefaultBuild(Relation heapRelation,
                          0,    /* number of keys */
                          (ScanKey) NULL);      /* scan key */
 
-   reltuples = indtuples = 0;
+   reltuples = indtuples = 0.0;
 
    /*
     * for each tuple in the base relation, we create an index tuple and
@@ -1808,7 +1774,7 @@ DefaultBuild(Relation heapRelation,
    {
        MemoryContextReset(econtext->ecxt_per_tuple_memory);
 
-       reltuples++;
+       reltuples += 1.0;
 
 #ifndef OMIT_PARTIAL_INDEX
 
@@ -1821,7 +1787,7 @@ DefaultBuild(Relation heapRelation,
            slot->val = heapTuple;
            if (ExecQual((List *) oldPred, econtext, false))
            {
-               indtuples++;
+               indtuples += 1.0;
                continue;
            }
        }
@@ -1838,7 +1804,7 @@ DefaultBuild(Relation heapRelation,
        }
 #endif  /* OMIT_PARTIAL_INDEX */
 
-       indtuples++;
+       indtuples += 1.0;
 
        /*
         * FormIndexDatum fills in its datum and null parameters with


diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c

index 88e56869da58eee31d6c7b0a764b93c6c73476a7..24cc7a8b254dc9a10dea74b263e52cf30f477964 100644 (file)


--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -8,19 +8,16 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/commands/analyze.c,v 1.16 2001/03/22 06:16:11 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/commands/analyze.c,v 1.17 2001/05/07 00:43:17 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"
 
-#include 
-#include 
-#include 
-#include 
-#include 
+#include 
 
 #include "access/heapam.h"
+#include "access/tuptoaster.h"
 #include "catalog/catname.h"
 #include "catalog/indexing.h"
 #include "catalog/pg_operator.h"
@@ -29,43 +26,139 @@
 #include "commands/vacuum.h"
 #include "miscadmin.h"
 #include "parser/parse_oper.h"
-#include "tcop/tcopprot.h"
 #include "utils/acl.h"
 #include "utils/builtins.h"
+#include "utils/datum.h"
 #include "utils/fmgroids.h"
-#include "utils/inval.h"
 #include "utils/syscache.h"
+#include "utils/tuplesort.h"
 
-#define swapLong(a,b)  {long tmp; tmp=a; a=b; b=tmp;}
-#define swapInt(a,b)   {int tmp; tmp=a; a=b; b=tmp;}
-#define swapDatum(a,b) {Datum tmp; tmp=a; a=b; b=tmp;}
-#define VacAttrStatsEqValid(stats) ( stats->f_cmpeq.fn_addr != NULL )
-#define VacAttrStatsLtGtValid(stats) ( stats->f_cmplt.fn_addr != NULL && \
-                                  stats->f_cmpgt.fn_addr != NULL && \
-                                  RegProcedureIsValid(stats->outfunc) )
 
+/*
+ * Analysis algorithms supported
+ */
+typedef enum {
+   ALG_MINIMAL = 1,            /* Compute only most-common-values */
+   ALG_SCALAR                  /* Compute MCV, histogram, sort correlation */
+} AlgCode;
+
+/*
+ * To avoid consuming too much memory during analysis and/or too much space
+ * in the resulting pg_statistic rows, we ignore varlena datums that are wider
+ * than WIDTH_THRESHOLD (after detoasting!).  This is legitimate for MCV
+ * and distinct-value calculations since a wide value is unlikely to be
+ * duplicated at all, much less be a most-common value.  For the same reason,
+ * ignoring wide values will not affect our estimates of histogram bin
+ * boundaries very much.
+ */
+#define WIDTH_THRESHOLD  256
+
+/*
+ * We build one of these structs for each attribute (column) that is to be
+ * analyzed.  The struct and subsidiary data are in TransactionCommandContext,
+ * so they live until the end of the ANALYZE operation.
+ */
+typedef struct
+{
+   /* These fields are set up by examine_attribute */
+   int         attnum;         /* attribute number */
+   AlgCode     algcode;        /* Which algorithm to use for this column */
+   int         minrows;        /* Minimum # of rows needed for stats */
+   Form_pg_attribute attr;     /* copy of pg_attribute row for column */
+   Form_pg_type attrtype;      /* copy of pg_type row for column */
+   Oid         eqopr;          /* '=' operator for datatype, if any */
+   Oid         eqfunc;         /* and associated function */
+   Oid         ltopr;          /* '<' operator for datatype, if any */
+
+   /* These fields are filled in by the actual statistics-gathering routine */
+   bool        stats_valid;
+   float4      stanullfrac;    /* fraction of entries that are NULL */
+   int4        stawidth;       /* average width */
+   float4      stadistinct;    /* # distinct values */
+   int2        stakind[STATISTIC_NUM_SLOTS];
+   Oid         staop[STATISTIC_NUM_SLOTS];
+   int         numnumbers[STATISTIC_NUM_SLOTS];
+   float4     *stanumbers[STATISTIC_NUM_SLOTS];
+   int         numvalues[STATISTIC_NUM_SLOTS];
+   Datum      *stavalues[STATISTIC_NUM_SLOTS];
+} VacAttrStats;
+
+
+typedef struct
+{
+   Datum       value;          /* a data value */
+   int         tupno;          /* position index for tuple it came from */
+} ScalarItem;
+
+typedef struct
+{
+   int         count;          /* # of duplicates */
+   int         first;          /* values[] index of first occurrence */
+} ScalarMCVItem;
+
+
+#define swapInt(a,b)   {int _tmp; _tmp=a; a=b; b=_tmp;}
+#define swapDatum(a,b) {Datum _tmp; _tmp=a; a=b; b=_tmp;}
 
-static void attr_stats(Relation onerel, int attr_cnt, VacAttrStats *vacattrstats, HeapTuple tuple);
-static void bucketcpy(Form_pg_attribute attr, Datum value, Datum *bucket, int *bucket_len);
-static void update_attstats(Oid relid, int natts, VacAttrStats *vacattrstats);
-static void del_stats(Oid relid, int attcnt, int *attnums);
+
+static int MESSAGE_LEVEL;
+
+/* context information for compare_scalars() */
+static FmgrInfo *datumCmpFn;
+static SortFunctionKind datumCmpFnKind;
+static int *datumCmpTupnoLink;
+
+
+static VacAttrStats *examine_attribute(Relation onerel, int attnum);
+static int acquire_sample_rows(Relation onerel, HeapTuple *rows,
+                              int targrows, long *totalrows);
+static double random_fract(void);
+static double init_selection_state(int n);
+static long select_next_random_record(long t, int n, double *stateptr);
+static int compare_rows(const void *a, const void *b);
+static int compare_scalars(const void *a, const void *b);
+static int compare_mcvs(const void *a, const void *b);
+static OffsetNumber get_page_max_offset(Relation relation,
+                                       BlockNumber blocknumber);
+static void compute_minimal_stats(VacAttrStats *stats,
+                                 TupleDesc tupDesc, long totalrows,
+                                 HeapTuple *rows, int numrows);
+static void compute_scalar_stats(VacAttrStats *stats,
+                                TupleDesc tupDesc, long totalrows,
+                                HeapTuple *rows, int numrows);
+static void update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats);
 
 
 /*
- * analyze_rel() -- analyze relation
+ * analyze_rel() -- analyze one relation
  */
 void
-analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL)
+analyze_rel(Oid relid, VacuumStmt *vacstmt)
 {
-   HeapTuple   tuple;
    Relation    onerel;
-   int32       i;
-   int         attr_cnt,
-              *attnums = NULL;
    Form_pg_attribute *attr;
-   VacAttrStats *vacattrstats;
-   HeapScanDesc scan;
+   int         attr_cnt,
+               tcnt,
+               i;
+   VacAttrStats **vacattrstats;
+   int         targrows,
+               numrows;
+   long        totalrows;
+   HeapTuple  *rows;
+   HeapTuple   tuple;
+
+   if (vacstmt->verbose)
+       MESSAGE_LEVEL = NOTICE;
+   else
+       MESSAGE_LEVEL = DEBUG;
 
+   /*
+    * Begin a transaction for analyzing this relation.
+    *
+    * Note: All memory allocated during ANALYZE will live in
+    * TransactionCommandContext or a subcontext thereof, so it will
+    * all be released by transaction commit at the end of this routine.
+    */
    StartTransactionCommand();
 
    /*
@@ -76,7 +169,7 @@ analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL)
 
    /*
     * Race condition -- if the pg_class tuple has gone away since the
-    * last time we saw it, we don't need to vacuum it.
+    * last time we saw it, we don't need to process it.
     */
    tuple = SearchSysCache(RELOID,
                           ObjectIdGetDatum(relid),
@@ -88,8 +181,7 @@ analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL)
    }
 
    /*
-    * We can VACUUM ANALYZE any table except pg_statistic. see
-    * update_relstats
+    * We can ANALYZE any table except pg_statistic. See update_attstats
     */
    if (strcmp(NameStr(((Form_pg_class) GETSTRUCT(tuple))->relname),
               StatisticRelationName) == 0)
@@ -100,586 +192,1466 @@ analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL)
    }
    ReleaseSysCache(tuple);
 
+   /*
+    * Open the class, getting only a read lock on it, and check permissions
+    */
    onerel = heap_open(relid, AccessShareLock);
 
    if (!pg_ownercheck(GetUserId(), RelationGetRelationName(onerel),
                       RELNAME))
    {
-
-       /*
-        * we already did an elog during vacuum elog(NOTICE, "Skipping
-        * \"%s\" --- only table owner can VACUUM it",
-        * RelationGetRelationName(onerel));
-        */
+       /* No need for a notice if we already complained during VACUUM */
+       if (!vacstmt->vacuum)
+           elog(NOTICE, "Skipping \"%s\" --- only table owner can ANALYZE it",
+                RelationGetRelationName(onerel));
        heap_close(onerel, NoLock);
        CommitTransactionCommand();
        return;
    }
 
-   elog(MESSAGE_LEVEL, "Analyzing...");
+   elog(MESSAGE_LEVEL, "Analyzing %s", RelationGetRelationName(onerel));
 
-   attr_cnt = onerel->rd_att->natts;
+   /*
+    * Determine which columns to analyze
+    *
+    * Note that system attributes are never analyzed.
+    */
    attr = onerel->rd_att->attrs;
+   attr_cnt = onerel->rd_att->natts;
 
-   if (anal_cols2 != NIL)
+   if (vacstmt->va_cols != NIL)
    {
-       int         tcnt = 0;
        List       *le;
 
-       if (length(anal_cols2) > attr_cnt)
-           elog(ERROR, "vacuum: too many attributes specified for relation %s",
-                RelationGetRelationName(onerel));
-       attnums = (int *) palloc(attr_cnt * sizeof(int));
-       foreach(le, anal_cols2)
+       vacattrstats = (VacAttrStats **) palloc(length(vacstmt->va_cols) *
+                                               sizeof(VacAttrStats *));
+       tcnt = 0;
+       foreach(le, vacstmt->va_cols)
        {
-           char       *col = (char *) lfirst(le);
+           char       *col = strVal(lfirst(le));
 
            for (i = 0; i < attr_cnt; i++)
            {
                if (namestrcmp(&(attr[i]->attname), col) == 0)
                    break;
            }
-           if (i < attr_cnt)   /* found */
-               attnums[tcnt++] = i;
-           else
-           {
-               elog(ERROR, "vacuum: there is no attribute %s in %s",
+           if (i >= attr_cnt)
+               elog(ERROR, "ANALYZE: there is no attribute %s in %s",
                     col, RelationGetRelationName(onerel));
-           }
+           vacattrstats[tcnt] = examine_attribute(onerel, i+1);
+           if (vacattrstats[tcnt] != NULL)
+               tcnt++;
+       }
+       attr_cnt = tcnt;
+   }
+   else
+   {
+       vacattrstats = (VacAttrStats **) palloc(attr_cnt *
+                                               sizeof(VacAttrStats *));
+       tcnt = 0;
+       for (i = 0; i < attr_cnt; i++)
+       {
+           vacattrstats[tcnt] = examine_attribute(onerel, i+1);
+           if (vacattrstats[tcnt] != NULL)
+               tcnt++;
        }
        attr_cnt = tcnt;
    }
 
-   vacattrstats = (VacAttrStats *) palloc(attr_cnt * sizeof(VacAttrStats));
+   /*
+    * Quit if no analyzable columns
+    */
+   if (attr_cnt <= 0)
+   {
+       heap_close(onerel, NoLock);
+       CommitTransactionCommand();
+       return;
+   }
 
+   /*
+    * Determine how many rows we need to sample, using the worst case
+    * from all analyzable columns.  We use a lower bound of 100 rows
+    * to avoid possible overflow in Vitter's algorithm.
+    */
+   targrows = 100;
    for (i = 0; i < attr_cnt; i++)
    {
-       Operator    func_operator;
-       VacAttrStats *stats;
-
-       stats = &vacattrstats[i];
-       stats->attr = palloc(ATTRIBUTE_TUPLE_SIZE);
-       memcpy(stats->attr, attr[((attnums) ? attnums[i] : i)],
-              ATTRIBUTE_TUPLE_SIZE);
-       stats->best = stats->guess1 = stats->guess2 = 0;
-       stats->max = stats->min = 0;
-       stats->best_len = stats->guess1_len = stats->guess2_len = 0;
-       stats->max_len = stats->min_len = 0;
-       stats->initialized = false;
-       stats->best_cnt = stats->guess1_cnt = stats->guess1_hits = stats->guess2_hits = 0;
-       stats->max_cnt = stats->min_cnt = stats->null_cnt = stats->nonnull_cnt = 0;
-
-       func_operator = compatible_oper("=",
-                                       stats->attr->atttypid,
-                                       stats->attr->atttypid,
-                                       true);
-       if (func_operator != NULL)
-       {
-           fmgr_info(oprfuncid(func_operator), &(stats->f_cmpeq));
-           ReleaseSysCache(func_operator);
-       }
-       else
-           stats->f_cmpeq.fn_addr = NULL;
+       if (targrows < vacattrstats[i]->minrows)
+           targrows = vacattrstats[i]->minrows;
+   }
+
+   /*
+    * Acquire the sample rows
+    */
+   rows = (HeapTuple *) palloc(targrows * sizeof(HeapTuple));
+   numrows = acquire_sample_rows(onerel, rows, targrows, &totalrows);
 
-       func_operator = compatible_oper("<",
-                                       stats->attr->atttypid,
-                                       stats->attr->atttypid,
-                                       true);
-       if (func_operator != NULL)
+   /*
+    * If we are running a standalone ANALYZE, update pages/tuples stats
+    * in pg_class.  We have the accurate page count from heap_beginscan,
+    * but only an approximate number of tuples; therefore, if we are
+    * part of VACUUM ANALYZE do *not* overwrite the accurate count already
+    * inserted by VACUUM.
+    */
+   if (!vacstmt->vacuum)
+       vac_update_relstats(RelationGetRelid(onerel),
+                           onerel->rd_nblocks,
+                           (double) totalrows,
+                           RelationGetForm(onerel)->relhasindex);
+
+   /*
+    * Compute the statistics.  Temporary results during the calculations
+    * for each column are stored in a child context.  The calc routines
+    * are responsible to make sure that whatever they store into the
+    * VacAttrStats structure is allocated in TransactionCommandContext.
+    */
+   if (numrows > 0)
+   {
+       MemoryContext col_context,
+                   old_context;
+
+       col_context = AllocSetContextCreate(CurrentMemoryContext,
+                                           "Analyze Column",
+                                           ALLOCSET_DEFAULT_MINSIZE,
+                                           ALLOCSET_DEFAULT_INITSIZE,
+                                           ALLOCSET_DEFAULT_MAXSIZE);
+       old_context = MemoryContextSwitchTo(col_context);
+       for (i = 0; i < attr_cnt; i++)
        {
-           fmgr_info(oprfuncid(func_operator), &(stats->f_cmplt));
-           stats->op_cmplt = oprid(func_operator);
-           ReleaseSysCache(func_operator);
+           switch (vacattrstats[i]->algcode)
+           {
+               case ALG_MINIMAL:
+                   compute_minimal_stats(vacattrstats[i],
+                                         onerel->rd_att, totalrows,
+                                         rows, numrows);
+                   break;
+               case ALG_SCALAR:
+                   compute_scalar_stats(vacattrstats[i],
+                                        onerel->rd_att, totalrows,
+                                        rows, numrows);
+                   break;
+           }
+           MemoryContextResetAndDeleteChildren(col_context);
        }
-       else
+       MemoryContextSwitchTo(old_context);
+       MemoryContextDelete(col_context);
+
+       /*
+        * Emit the completed stats rows into pg_statistic, replacing any
+        * previous statistics for the target columns.  (If there are stats
+        * in pg_statistic for columns we didn't process, we leave them alone.)
+        */
+       update_attstats(relid, attr_cnt, vacattrstats);
+   }
+
+   /*
+    * Close source relation now, but keep lock so that no one deletes it
+    * before we commit.  (If someone did, they'd fail to clean up the
+    * entries we made in pg_statistic.)
+    */
+   heap_close(onerel, NoLock);
+
+   /* Commit and release working memory */
+   CommitTransactionCommand();
+}
+
+/*
+ * examine_attribute -- pre-analysis of a single column
+ *
+ * Determine whether the column is analyzable; if so, create and initialize
+ * a VacAttrStats struct for it.  If not, return NULL.
+ */
+static VacAttrStats *
+examine_attribute(Relation onerel, int attnum)
+{
+   Form_pg_attribute attr = onerel->rd_att->attrs[attnum-1];
+   Operator    func_operator;
+   Oid         oprrest;
+   HeapTuple   typtuple;
+   Oid         eqopr = InvalidOid;
+   Oid         eqfunc = InvalidOid;
+   Oid         ltopr = InvalidOid;
+   VacAttrStats *stats;
+
+   /* Don't analyze column if user has specified not to */
+   if (attr->attstattarget <= 0)
+       return NULL;
+
+   /* If column has no "=" operator, we can't do much of anything */
+   func_operator = compatible_oper("=",
+                                   attr->atttypid,
+                                   attr->atttypid,
+                                   true);
+   if (func_operator != NULL)
+   {
+       oprrest = ((Form_pg_operator) GETSTRUCT(func_operator))->oprrest;
+       if (oprrest == F_EQSEL)
        {
-           stats->f_cmplt.fn_addr = NULL;
-           stats->op_cmplt = InvalidOid;
+           eqopr = oprid(func_operator);
+           eqfunc = oprfuncid(func_operator);
        }
+       ReleaseSysCache(func_operator);
+   }
+   if (!OidIsValid(eqfunc))
+       return NULL;
 
-       func_operator = compatible_oper(">",
-                                       stats->attr->atttypid,
-                                       stats->attr->atttypid,
-                                       true);
-       if (func_operator != NULL)
+   /*
+    * If we have "=" then we're at least able to do the minimal algorithm,
+    * so start filling in a VacAttrStats struct.
+    */
+   stats = (VacAttrStats *) palloc(sizeof(VacAttrStats));
+   MemSet(stats, 0, sizeof(VacAttrStats));
+   stats->attnum = attnum;
+   stats->attr = (Form_pg_attribute) palloc(ATTRIBUTE_TUPLE_SIZE);
+   memcpy(stats->attr, attr, ATTRIBUTE_TUPLE_SIZE);
+   typtuple = SearchSysCache(TYPEOID,
+                             ObjectIdGetDatum(attr->atttypid),
+                             0, 0, 0);
+   if (!HeapTupleIsValid(typtuple))
+       elog(ERROR, "cache lookup of type %u failed", attr->atttypid);
+   stats->attrtype = (Form_pg_type) palloc(sizeof(FormData_pg_type));
+   memcpy(stats->attrtype, GETSTRUCT(typtuple), sizeof(FormData_pg_type));
+   ReleaseSysCache(typtuple);
+   stats->eqopr = eqopr;
+   stats->eqfunc = eqfunc;
+
+   /* Is there a "<" operator with suitable semantics? */
+   func_operator = compatible_oper("<",
+                                   attr->atttypid,
+                                   attr->atttypid,
+                                   true);
+   if (func_operator != NULL)
+   {
+       oprrest = ((Form_pg_operator) GETSTRUCT(func_operator))->oprrest;
+       if (oprrest == F_SCALARLTSEL)
        {
-           fmgr_info(oprfuncid(func_operator), &(stats->f_cmpgt));
-           ReleaseSysCache(func_operator);
+           ltopr = oprid(func_operator);
        }
-       else
-           stats->f_cmpgt.fn_addr = NULL;
+       ReleaseSysCache(func_operator);
+   }
+   stats->ltopr = ltopr;
+
+   /*
+    * Determine the algorithm to use (this will get more complicated later)
+    */
+   if (OidIsValid(ltopr))
+   {
+       /* Seems to be a scalar datatype */
+       stats->algcode = ALG_SCALAR;
+       /*--------------------
+        * The following choice of minrows is based on the paper
+        * "Random sampling for histogram construction: how much is enough?"
+        * by Surajit Chaudhuri, Rajeev Motwani and Vivek Narasayya, in
+        * Proceedings of ACM SIGMOD International Conference on Management
+        * of Data, 1998, Pages 436-447.  Their Corollary 1 to Theorem 5
+        * says that for table size n, histogram size k, maximum relative
+        * error in bin size f, and error probability gamma, the minimum
+        * random sample size is
+        *      r = 4 * k * ln(2*n/gamma) / f^2
+        * Taking f = 0.5, gamma = 0.01, n = 1 million rows, we obtain
+        *      r = 305.82 * k
+        * Note that because of the log function, the dependence on n is
+        * quite weak; even at n = 1 billion, a 300*k sample gives <= 0.59
+        * bin size error with probability 0.99.  So there's no real need to
+        * scale for n, which is a good thing because we don't necessarily
+        * know it at this point.
+        *--------------------
+        */
+       stats->minrows = 300 * attr->attstattarget;
+   }
+   else
+   {
+       /* Can't do much but the minimal stuff */
+       stats->algcode = ALG_MINIMAL;
+       /* Might as well use the same minrows as above */
+       stats->minrows = 300 * attr->attstattarget;
+   }
+
+   return stats;
+}
 
-       tuple = SearchSysCache(TYPEOID,
-                              ObjectIdGetDatum(stats->attr->atttypid),
-                              0, 0, 0);
-       if (HeapTupleIsValid(tuple))
+/*
+ * acquire_sample_rows -- acquire a random sample of rows from the table
+ *
+ * Up to targrows rows are collected (if there are fewer than that many
+ * rows in the table, all rows are collected).  When the table is larger
+ * than targrows, a truly random sample is collected: every row has an
+ * equal chance of ending up in the final sample.
+ *
+ * We also estimate the total number of rows in the table, and return that
+ * into *totalrows.
+ *
+ * The returned list of tuples is in order by physical position in the table.
+ * (We will rely on this later to derive correlation estimates.)
+ */
+static int
+acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
+                   long *totalrows)
+{
+   int         numrows = 0;
+   HeapScanDesc scan;
+   HeapTuple   tuple;
+   ItemPointer lasttuple;
+   BlockNumber lastblock,
+               estblock;
+   OffsetNumber lastoffset;
+   int         numest;
+   double      tuplesperpage;
+   long        t;
+   double      rstate;
+
+   Assert(targrows > 1);
+   /*
+    * Do a simple linear scan until we reach the target number of rows.
+    */
+   scan = heap_beginscan(onerel, false, SnapshotNow, 0, NULL);
+   while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
+   {
+       rows[numrows++] = heap_copytuple(tuple);
+       if (numrows >= targrows)
+           break;
+   }
+   heap_endscan(scan);
+   /*
+    * If we ran out of tuples then we're done, no matter how few we 
+    * collected.  No sort is needed, since they're already in order.
+    */
+   if (!HeapTupleIsValid(tuple))
+   {
+       *totalrows = numrows;
+       return numrows;
+   }
+   /*
+    * Otherwise, start replacing tuples in the sample until we reach the
+    * end of the relation.  This algorithm is from Jeff Vitter's paper
+    * (see full citation below).  It works by repeatedly computing the number
+    * of the next tuple we want to fetch, which will replace a randomly
+    * chosen element of the reservoir (current set of tuples).  At all times
+    * the reservoir is a true random sample of the tuples we've passed over
+    * so far, so when we fall off the end of the relation we're done.
+    *
+    * A slight difficulty is that since we don't want to fetch tuples or even
+    * pages that we skip over, it's not possible to fetch *exactly* the N'th
+    * tuple at each step --- we don't know how many valid tuples are on
+    * the skipped pages.  We handle this by assuming that the average number
+    * of valid tuples/page on the pages already scanned over holds good for
+    * the rest of the relation as well; this lets us estimate which page
+    * the next tuple should be on and its position in the page.  Then we
+    * fetch the first valid tuple at or after that position, being careful
+    * not to use the same tuple twice.  This approach should still give a
+    * good random sample, although it's not perfect.
+    */
+   lasttuple = &(rows[numrows-1]->t_self);
+   lastblock = ItemPointerGetBlockNumber(lasttuple);
+   lastoffset = ItemPointerGetOffsetNumber(lasttuple);
+   /*
+    * If possible, estimate tuples/page using only completely-scanned pages.
+    */
+   for (numest = numrows; numest > 0; numest--)
+   {
+       if (ItemPointerGetBlockNumber(&(rows[numest-1]->t_self)) != lastblock)
+           break;
+   }
+   if (numest == 0)
+   {
+       numest = numrows;       /* don't have a full page? */
+       estblock = lastblock + 1;
+   }
+   else
+   {
+       estblock = lastblock;
+   }
+   tuplesperpage = (double) numest / (double) estblock;
+
+   t = numrows;                /* t is the # of records processed so far */
+   rstate = init_selection_state(targrows);
+   for (;;)
+   {
+       double          targpos;
+       BlockNumber     targblock;
+       OffsetNumber    targoffset,
+                       maxoffset;
+
+       t = select_next_random_record(t, targrows, &rstate);
+       /* Try to read the t'th record in the table */
+       targpos = (double) t / tuplesperpage;
+       targblock = (BlockNumber) targpos;
+       targoffset = ((int) (targpos - targblock) * tuplesperpage) + 
+           FirstOffsetNumber;
+       /* Make sure we are past the last selected record */
+       if (targblock <= lastblock)
        {
-           stats->outfunc = ((Form_pg_type) GETSTRUCT(tuple))->typoutput;
-           stats->typelem = ((Form_pg_type) GETSTRUCT(tuple))->typelem;
-           ReleaseSysCache(tuple);
+           targblock = lastblock;
+           if (targoffset <= lastoffset)
+               targoffset = lastoffset + 1;
        }
-       else
+       /* Loop to find first valid record at or after given position */
+   pageloop:;
+       /*
+        * Have we fallen off the end of the relation?  (We rely on
+        * heap_beginscan to have updated rd_nblocks.)
+        */
+       if (targblock >= onerel->rd_nblocks)
+           break;
+       maxoffset = get_page_max_offset(onerel, targblock);
+       for (;;)
        {
-           stats->outfunc = InvalidOid;
-           stats->typelem = InvalidOid;
+           HeapTupleData targtuple;
+           Buffer      targbuffer;
+
+           if (targoffset > maxoffset)
+           {
+               /* Fell off end of this page, try next */
+               targblock++;
+               targoffset = FirstOffsetNumber;
+               goto pageloop;
+           }
+           ItemPointerSet(&targtuple.t_self, targblock, targoffset);
+           heap_fetch(onerel, SnapshotNow, &targtuple, &targbuffer);
+           if (targtuple.t_data != NULL)
+           {
+               /*
+                * Found a suitable tuple, so save it, replacing one old
+                * tuple at random
+                */
+               int     k = (int) (targrows * random_fract());
+
+               Assert(k >= 0 && k < targrows);
+               heap_freetuple(rows[k]);
+               rows[k] = heap_copytuple(&targtuple);
+               ReleaseBuffer(targbuffer);
+               lastblock = targblock;
+               lastoffset = targoffset;
+               break;
+           }
+           /* this tuple is dead, so advance to next one on same page */
+           targoffset++;
        }
    }
-   /* delete existing pg_statistic rows for relation */
-   del_stats(relid, ((attnums) ? attr_cnt : 0), attnums);
-
-   /* scan relation to gather statistics */
-   scan = heap_beginscan(onerel, false, SnapshotNow, 0, NULL);
 
-   while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
-       attr_stats(onerel, attr_cnt, vacattrstats, tuple);
+   /*
+    * Now we need to sort the collected tuples by position (itempointer).
+    */
+   qsort((void *) rows, numrows, sizeof(HeapTuple), compare_rows);
 
-   heap_endscan(scan);
+   /*
+    * Estimate total number of valid rows in relation.
+    */
+   *totalrows = (long) (onerel->rd_nblocks * tuplesperpage + 0.5);
 
-   /* close rel, but keep lock so it doesn't go away before commit */
-   heap_close(onerel, NoLock);
+   return numrows;
+}
 
-   /* update statistics in pg_class */
-   update_attstats(relid, attr_cnt, vacattrstats);
+/* Select a random value R uniformly distributed in 0 < R < 1 */
+static double
+random_fract(void)
+{
+   long    z;
 
-   CommitTransactionCommand();
+   /* random() can produce endpoint values, try again if so */
+   do
+   {
+       z = random();
+   } while (! (z > 0 && z < MAX_RANDOM_VALUE));
+   return (double) z / (double) MAX_RANDOM_VALUE;
 }
 
 /*
- * attr_stats() -- compute column statistics used by the planner
+ * These two routines embody Algorithm Z from "Random sampling with a
+ * reservoir" by Jeffrey S. Vitter, in ACM Trans. Math. Softw. 11, 1
+ * (Mar. 1985), Pages 37-57.  While Vitter describes his algorithm in terms
+ * of the count S of records to skip before processing another record,
+ * it is convenient to work primarily with t, the index (counting from 1)
+ * of the last record processed and next record to process.  The only extra
+ * state needed between calls is W, a random state variable.
  *
- * We compute the column min, max, null and non-null counts.
- * Plus we attempt to find the count of the value that occurs most
- * frequently in each column.  These figures are used to compute
- * the selectivity of the column.
+ * init_selection_state computes the initial W value.
  *
- * We use a three-bucket cache to get the most frequent item.
- * The 'guess' buckets count hits.  A cache miss causes guess1
- * to get the most hit 'guess' item in the most recent cycle, and
- * the new item goes into guess2.  Whenever the total count of hits
- * of a 'guess' entry is larger than 'best', 'guess' becomes 'best'.
+ * Given that we've already processed t records (t >= n),
+ * select_next_random_record determines the number of the next record to
+ * process.
+ */
+static double
+init_selection_state(int n)
+{
+   /* Initial value of W (for use when Algorithm Z is first applied) */
+   return exp(- log(random_fract())/n);
+}
+
+static long
+select_next_random_record(long t, int n, double *stateptr)
+{
+   /* The magic constant here is T from Vitter's paper */
+   if (t <= (22 * n))
+   {
+       /* Process records using Algorithm X until t is large enough */
+       double  V,
+               quot;
+
+       V = random_fract();     /* Generate V */
+       t++;
+       quot = (double) (t - n) / (double) t;
+       /* Find min S satisfying (4.1) */
+       while (quot > V)
+       {
+           t++;
+           quot *= (double) (t - n) / (double) t;
+       }
+   }
+   else
+   {
+       /* Now apply Algorithm Z */
+       double  W = *stateptr;
+       long    term = t - n + 1;
+       int     S;
+
+       for (;;)
+       {
+           long    numer,
+                   numer_lim,
+                   denom;
+           double  U,
+                   X,
+                   lhs,
+                   rhs,
+                   y,
+                   tmp;
+
+           /* Generate U and X */
+           U = random_fract();
+           X = t * (W - 1.0);
+           S = X;              /* S is tentatively set to floor(X) */
+           /* Test if U <= h(S)/cg(X) in the manner of (6.3) */
+           tmp = (double) (t + 1) / (double) term;
+           lhs = exp(log(((U * tmp * tmp) * (term + S))/(t + X))/n);
+           rhs = (((t + X)/(term + S)) * term)/t;
+           if (lhs <= rhs)
+           {
+               W = rhs/lhs;
+               break;
+           }
+           /* Test if U <= f(S)/cg(X) */
+           y = (((U * (t + 1))/term) * (t + S + 1))/(t + X);
+           if (n < S)
+           {
+               denom = t;
+               numer_lim = term + S;
+           }
+           else
+           {
+               denom = t - n + S;
+               numer_lim = t + 1;
+           }
+           for (numer = t + S; numer >= numer_lim; numer--)
+           {
+               y *= (double) numer / (double) denom;
+               denom--;
+           }
+           W = exp(- log(random_fract())/n); /* Generate W in advance */
+           if (exp(log(y)/n) <= (t + X)/t)
+               break;
+       }
+       t += S + 1;
+       *stateptr = W;
+   }
+   return t;
+}
+
+/*
+ * qsort comparator for sorting rows[] array
+ */
+static int
+compare_rows(const void *a, const void *b)
+{
+   HeapTuple   ha = * (HeapTuple *) a;
+   HeapTuple   hb = * (HeapTuple *) b;
+   BlockNumber ba = ItemPointerGetBlockNumber(&ha->t_self);
+   OffsetNumber oa = ItemPointerGetOffsetNumber(&ha->t_self);
+   BlockNumber bb = ItemPointerGetBlockNumber(&hb->t_self);
+   OffsetNumber ob = ItemPointerGetOffsetNumber(&hb->t_self);
+
+   if (ba < bb)
+       return -1;
+   if (ba > bb)
+       return 1;
+   if (oa < ob)
+       return -1;
+   if (oa > ob)
+       return 1;
+   return 0;
+}
+
+/*
+ * Discover the largest valid tuple offset number on the given page
+ *
+ * This code probably ought to live in some other module.
+ */
+static OffsetNumber
+get_page_max_offset(Relation relation, BlockNumber blocknumber)
+{
+   Buffer      buffer;
+   Page        p;
+   OffsetNumber offnum;
+
+   buffer = ReadBuffer(relation, blocknumber);
+   if (!BufferIsValid(buffer))
+       elog(ERROR, "get_page_max_offset: %s relation: ReadBuffer(%ld) failed",
+            RelationGetRelationName(relation), (long) blocknumber);
+   LockBuffer(buffer, BUFFER_LOCK_SHARE);
+   p = BufferGetPage(buffer);
+   offnum = PageGetMaxOffsetNumber(p);
+   LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+   ReleaseBuffer(buffer);
+   return offnum;
+}
+
+
+/*
+ * compute_minimal_stats() -- compute minimal column statistics
  *
- * This method works perfectly for columns with unique values, and columns
- * with only two unique values, plus nulls.
+ * We use this when we can find only an "=" operator for the datatype.
  *
- * It becomes less perfect as the number of unique values increases and
- * their distribution in the table becomes more random.
+ * We determine the fraction of non-null rows, the average width, the
+ * most common values, and the (estimated) number of distinct values.
  *
+ * The most common values are determined by brute force: we keep a list
+ * of previously seen values, ordered by number of times seen, as we scan
+ * the samples.  A newly seen value is inserted just after the last
+ * multiply-seen value, causing the bottommost (oldest) singly-seen value
+ * to drop off the list.  The accuracy of this method, and also its cost,
+ * depend mainly on the length of the list we are willing to keep.
  */
 static void
-attr_stats(Relation onerel, int attr_cnt, VacAttrStats *vacattrstats, HeapTuple tuple)
+compute_minimal_stats(VacAttrStats *stats,
+                     TupleDesc tupDesc, long totalrows,
+                     HeapTuple *rows, int numrows)
 {
    int         i;
-   TupleDesc   tupDesc = onerel->rd_att;
-
-   for (i = 0; i < attr_cnt; i++)
+   int         null_cnt = 0;
+   int         nonnull_cnt = 0;
+   int         toowide_cnt = 0;
+   double      total_width = 0;
+   bool        is_varlena = (!stats->attr->attbyval &&
+                             stats->attr->attlen == -1);
+   FmgrInfo    f_cmpeq;
+   typedef struct
+   {
+       Datum   value;
+       int     count;
+   } TrackItem;
+   TrackItem  *track;
+   int         track_cnt,
+               track_max;
+   int         num_mcv = stats->attr->attstattarget;
+
+   /* We track up to 2*n values for an n-element MCV list; but at least 10 */
+   track_max = 2 * num_mcv;
+   if (track_max < 10)
+       track_max = 10;
+   track = (TrackItem *) palloc(track_max * sizeof(TrackItem));
+   track_cnt = 0;
+
+   fmgr_info(stats->eqfunc, &f_cmpeq);
+
+   for (i = 0; i < numrows; i++)
    {
-       VacAttrStats *stats = &vacattrstats[i];
-       Datum       origvalue;
+       HeapTuple   tuple = rows[i];
        Datum       value;
        bool        isnull;
-       bool        value_hit;
-
-       if (!VacAttrStatsEqValid(stats))
-           continue;
-
-#ifdef _DROP_COLUMN_HACK__
-       if (COLUMN_IS_DROPPED(stats->attr))
-           continue;
-#endif  /* _DROP_COLUMN_HACK__ */
+       bool        match;
+       int         firstcount1,
+                   j;
 
-       origvalue = heap_getattr(tuple, stats->attr->attnum,
-                                tupDesc, &isnull);
+       value = heap_getattr(tuple, stats->attnum, tupDesc, &isnull);
 
+       /* Check for null/nonnull */
        if (isnull)
        {
-           stats->null_cnt++;
+           null_cnt++;
            continue;
        }
-       stats->nonnull_cnt++;
+       nonnull_cnt++;
 
        /*
-        * If the value is toasted, detoast it to avoid repeated
-        * detoastings and resultant memory leakage inside the comparison
-        * routines.
+        * If it's a varlena field, add up widths for average width
+        * calculation.  Note that if the value is toasted, we
+        * use the toasted width.  We don't bother with this calculation
+        * if it's a fixed-width type.
         */
-       if (!stats->attr->attbyval && stats->attr->attlen == -1)
-           value = PointerGetDatum(PG_DETOAST_DATUM(origvalue));
-       else
-           value = origvalue;
-
-       if (!stats->initialized)
+       if (is_varlena)
        {
-           bucketcpy(stats->attr, value, &stats->best, &stats->best_len);
-           /* best_cnt gets incremented below */
-           bucketcpy(stats->attr, value, &stats->guess1, &stats->guess1_len);
-           stats->guess1_cnt = stats->guess1_hits = 1;
-           bucketcpy(stats->attr, value, &stats->guess2, &stats->guess2_len);
-           stats->guess2_hits = 1;
-           if (VacAttrStatsLtGtValid(stats))
+           total_width += VARSIZE(DatumGetPointer(value));
+           /*
+            * If the value is toasted, we want to detoast it just once to
+            * avoid repeated detoastings and resultant excess memory usage
+            * during the comparisons.  Also, check to see if the value is
+            * excessively wide, and if so don't detoast at all --- just
+            * ignore the value.
+            */
+           if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
            {
-               bucketcpy(stats->attr, value, &stats->max, &stats->max_len);
-               bucketcpy(stats->attr, value, &stats->min, &stats->min_len);
-               /* min_cnt, max_cnt get incremented below */
+               toowide_cnt++;
+               continue;
            }
-           stats->initialized = true;
+           value = PointerGetDatum(PG_DETOAST_DATUM(value));
        }
 
-       if (VacAttrStatsLtGtValid(stats))
+       /*
+        * See if the value matches anything we're already tracking.
+        */
+       match = false;
+       firstcount1 = track_cnt;
+       for (j = 0; j < track_cnt; j++)
        {
-           if (DatumGetBool(FunctionCall2(&stats->f_cmplt,
-                                          value, stats->min)))
+           if (DatumGetBool(FunctionCall2(&f_cmpeq, value, track[j].value)))
            {
-               bucketcpy(stats->attr, value, &stats->min, &stats->min_len);
-               stats->min_cnt = 1;
+               match = true;
+               break;
            }
-           else if (DatumGetBool(FunctionCall2(&stats->f_cmpeq,
-                                               value, stats->min)))
-               stats->min_cnt++;
+           if (j < firstcount1 && track[j].count == 1)
+               firstcount1 = j;
+       }
 
-           if (DatumGetBool(FunctionCall2(&stats->f_cmpgt,
-                                          value, stats->max)))
+       if (match)
+       {
+           /* Found a match */
+           track[j].count++;
+           /* This value may now need to "bubble up" in the track list */
+           while (j > 0 && track[j].count > track[j-1].count)
            {
-               bucketcpy(stats->attr, value, &stats->max, &stats->max_len);
-               stats->max_cnt = 1;
+               swapDatum(track[j].value, track[j-1].value);
+               swapInt(track[j].count, track[j-1].count);
+               j--;
            }
-           else if (DatumGetBool(FunctionCall2(&stats->f_cmpeq,
-                                               value, stats->max)))
-               stats->max_cnt++;
        }
-
-       value_hit = true;
-       if (DatumGetBool(FunctionCall2(&stats->f_cmpeq,
-                                      value, stats->best)))
-           stats->best_cnt++;
-       else if (DatumGetBool(FunctionCall2(&stats->f_cmpeq,
-                                           value, stats->guess1)))
+       else
        {
-           stats->guess1_cnt++;
-           stats->guess1_hits++;
+           /* No match.  Insert at head of count-1 list */
+           if (track_cnt < track_max)
+               track_cnt++;
+           for (j = track_cnt-1; j > firstcount1; j--)
+           {
+               track[j].value = track[j-1].value;
+               track[j].count = track[j-1].count;
+           }
+           if (firstcount1 < track_cnt)
+           {
+               track[firstcount1].value = value;
+               track[firstcount1].count = 1;
+           }
        }
-       else if (DatumGetBool(FunctionCall2(&stats->f_cmpeq,
-                                           value, stats->guess2)))
-           stats->guess2_hits++;
+   }
+
+   /* We can only compute valid stats if we found some non-null values. */
+   if (nonnull_cnt > 0)
+   {
+       int     nmultiple,
+               summultiple;
+
+       stats->stats_valid = true;
+       /* Do the simple null-frac and width stats */
+       stats->stanullfrac = (double) null_cnt / (double) numrows;
+       if (is_varlena)
+           stats->stawidth = total_width / (double) nonnull_cnt;
        else
-           value_hit = false;
+           stats->stawidth = stats->attrtype->typlen;
 
-       if (stats->guess2_hits > stats->guess1_hits)
+       /* Count the number of values we found multiple times */
+       summultiple = 0;
+       for (nmultiple = 0; nmultiple < track_cnt; nmultiple++)
        {
-           swapDatum(stats->guess1, stats->guess2);
-           swapInt(stats->guess1_len, stats->guess2_len);
-           swapLong(stats->guess1_hits, stats->guess2_hits);
-           stats->guess1_cnt = stats->guess1_hits;
+           if (track[nmultiple].count == 1)
+               break;
+           summultiple += track[nmultiple].count;
        }
-       if (stats->guess1_cnt > stats->best_cnt)
+
+       if (nmultiple == 0)
        {
-           swapDatum(stats->best, stats->guess1);
-           swapInt(stats->best_len, stats->guess1_len);
-           swapLong(stats->best_cnt, stats->guess1_cnt);
-           stats->guess1_hits = 1;
-           stats->guess2_hits = 1;
+           /* If we found no repeated values, assume it's a unique column */
+           stats->stadistinct = -1.0;
        }
-       if (!value_hit)
+       else if (track_cnt < track_max && toowide_cnt == 0 &&
+                nmultiple == track_cnt)
        {
-           bucketcpy(stats->attr, value, &stats->guess2, &stats->guess2_len);
-           stats->guess1_hits = 1;
-           stats->guess2_hits = 1;
+           /*
+            * Our track list includes every value in the sample, and every
+            * value appeared more than once.  Assume the column has just
+            * these values.
+            */
+           stats->stadistinct = track_cnt;
        }
+       else
+       {
+           /*----------
+            * Estimate the number of distinct values using the estimator
+            * proposed by Chaudhuri et al (see citation above).  This is
+            *      sqrt(n/r) * max(f1,1) + f2 + f3 + ...
+            * where fk is the number of distinct values that occurred
+            * exactly k times in our sample of r rows (from a total of n).
+            * We assume (not very reliably!) that all the multiply-occurring
+            * values are reflected in the final track[] list, and the other
+            * nonnull values all appeared but once.
+            *----------
+            */
+           int     f1 = nonnull_cnt - summultiple;
+           double  term1;
 
-       /* Clean up detoasted copy, if any */
-       if (value != origvalue)
-           pfree(DatumGetPointer(value));
-   }
-}
+           if (f1 < 1)
+               f1 = 1;
+           term1 = sqrt((double) totalrows / (double) numrows) * f1;
+           stats->stadistinct = floor(term1 + nmultiple + 0.5);
+       }
 
-/*
- * bucketcpy() -- copy a new value into one of the statistics buckets
- */
-static void
-bucketcpy(Form_pg_attribute attr, Datum value, Datum *bucket, int *bucket_len)
-{
-   if (attr->attbyval)
-       *bucket = value;
-   else
-   {
-       int         len = (attr->attlen != -1 ? attr->attlen : VARSIZE(value));
+       /*
+        * If we estimated the number of distinct values at more than 10%
+        * of the total row count (a very arbitrary limit), then assume
+        * that stadistinct should scale with the row count rather than be
+        * a fixed value.
+        */
+       if (stats->stadistinct > 0.1 * totalrows)
+           stats->stadistinct = - (stats->stadistinct / totalrows);
 
-       /* Avoid unnecessary palloc() traffic... */
-       if (len > *bucket_len)
+       /* Generate an MCV slot entry, only if we found multiples */
+       if (nmultiple < num_mcv)
+           num_mcv = nmultiple;
+       if (num_mcv > 0)
        {
-           if (*bucket_len != 0)
-               pfree(DatumGetPointer(*bucket));
-           *bucket = PointerGetDatum(palloc(len));
-           *bucket_len = len;
+           MemoryContext old_context;
+           Datum  *mcv_values;
+           float4 *mcv_freqs;
+
+           /* Must copy the target values into TransactionCommandContext */
+           old_context = MemoryContextSwitchTo(TransactionCommandContext);
+           mcv_values = (Datum *) palloc(num_mcv * sizeof(Datum));
+           mcv_freqs = (float4 *) palloc(num_mcv * sizeof(float4));
+           for (i = 0; i < num_mcv; i++)
+           {
+               mcv_values[i] = datumCopy(track[i].value,
+                                         stats->attr->attbyval,
+                                         stats->attr->attlen);
+               mcv_freqs[i] = (double) track[i].count / (double) numrows;
+           }
+           MemoryContextSwitchTo(old_context);
+
+           stats->stakind[0] = STATISTIC_KIND_MCV;
+           stats->staop[0] = stats->eqopr;
+           stats->stanumbers[0] = mcv_freqs;
+           stats->numnumbers[0] = num_mcv;
+           stats->stavalues[0] = mcv_values;
+           stats->numvalues[0] = num_mcv;
        }
-       memcpy(DatumGetPointer(*bucket), DatumGetPointer(value), len);
    }
+
+   /* We don't need to bother cleaning up any of our temporary palloc's */
 }
 
 
 /*
- * update_attstats() -- update attribute statistics for one relation
+ * compute_scalar_stats() -- compute column statistics
  *
- *     Statistics are stored in several places: the pg_class row for the
- *     relation has stats about the whole relation, the pg_attribute rows
- *     for each attribute store "dispersion", and there is a pg_statistic
- *     row for each (non-system) attribute.  (Dispersion probably ought to
- *     be moved to pg_statistic, but it's not worth doing unless there's
- *     another reason to have to change pg_attribute.)  The pg_class values
- *     are updated by VACUUM, not here.
- *
- *     We violate no-overwrite semantics here by storing new values for
- *     the dispersion column directly into the pg_attribute tuple that's
- *     already on the page.  The reason for this is that if we updated
- *     these tuples in the usual way, vacuuming pg_attribute itself
- *     wouldn't work very well --- by the time we got done with a vacuum
- *     cycle, most of the tuples in pg_attribute would've been obsoleted.
- *     Updating pg_attribute's own statistics would be especially tricky.
- *     Of course, this only works for fixed-size never-null columns, but
- *     dispersion is.
+ * We use this when we can find "=" and "<" operators for the datatype.
  *
- *     pg_statistic rows are just added normally.  This means that
- *     pg_statistic will probably contain some deleted rows at the
- *     completion of a vacuum cycle, unless it happens to get vacuumed last.
+ * We determine the fraction of non-null rows, the average width, the
+ * most common values, the (estimated) number of distinct values, the
+ * distribution histogram, and the correlation of physical to logical order.
  *
- *     To keep things simple, we punt for pg_statistic, and don't try
- *     to compute or store rows for pg_statistic itself in pg_statistic.
- *     This could possibly be made to work, but it's not worth the trouble.
+ * The desired stats can be determined fairly easily after sorting the
+ * data values into order.
  */
 static void
-update_attstats(Oid relid, int natts, VacAttrStats *vacattrstats)
+compute_scalar_stats(VacAttrStats *stats,
+                    TupleDesc tupDesc, long totalrows,
+                    HeapTuple *rows, int numrows)
 {
-   Relation    ad,
-               sd;
-   HeapScanDesc scan;
-   HeapTuple   atup,
-               stup;
-   ScanKeyData askey;
-   Form_pg_attribute attp;
-
-   ad = heap_openr(AttributeRelationName, RowExclusiveLock);
-   sd = heap_openr(StatisticRelationName, RowExclusiveLock);
-
-   /* Find pg_attribute rows for this relation */
-   ScanKeyEntryInitialize(&askey, 0, Anum_pg_attribute_attrelid,
-                          F_INT4EQ, relid);
-
-   scan = heap_beginscan(ad, false, SnapshotNow, 1, &askey);
-
-   while (HeapTupleIsValid(atup = heap_getnext(scan, 0)))
+   int         i;
+   int         null_cnt = 0;
+   int         nonnull_cnt = 0;
+   int         toowide_cnt = 0;
+   double      total_width = 0;
+   bool        is_varlena = (!stats->attr->attbyval &&
+                             stats->attr->attlen == -1);
+   double      corr_xysum;
+   RegProcedure cmpFn;
+   SortFunctionKind cmpFnKind;
+   FmgrInfo    f_cmpfn;
+   ScalarItem *values;
+   int         values_cnt = 0;
+   int        *tupnoLink;
+   ScalarMCVItem *track;
+   int         track_cnt = 0;
+   int         num_mcv = stats->attr->attstattarget;
+
+   values = (ScalarItem *) palloc(numrows * sizeof(ScalarItem));
+   tupnoLink = (int *) palloc(numrows * sizeof(int));
+   track = (ScalarMCVItem *) palloc(num_mcv * sizeof(ScalarMCVItem));
+
+   SelectSortFunction(stats->ltopr, &cmpFn, &cmpFnKind);
+   fmgr_info(cmpFn, &f_cmpfn);
+
+   /* Initial scan to find sortable values */
+   for (i = 0; i < numrows; i++)
    {
-       int         i;
-       VacAttrStats *stats;
+       HeapTuple   tuple = rows[i];
+       Datum       value;
+       bool        isnull;
 
-       attp = (Form_pg_attribute) GETSTRUCT(atup);
-       if (attp->attnum <= 0)  /* skip system attributes for now */
-           continue;
+       value = heap_getattr(tuple, stats->attnum, tupDesc, &isnull);
 
-       for (i = 0; i < natts; i++)
+       /* Check for null/nonnull */
+       if (isnull)
        {
-           if (attp->attnum == vacattrstats[i].attr->attnum)
-               break;
+           null_cnt++;
+           continue;
        }
-       if (i >= natts)
-           continue;           /* skip attr if no stats collected */
-       stats = &(vacattrstats[i]);
+       nonnull_cnt++;
 
-       if (VacAttrStatsEqValid(stats))
+       /*
+        * If it's a varlena field, add up widths for average width
+        * calculation.  Note that if the value is toasted, we
+        * use the toasted width.  We don't bother with this calculation
+        * if it's a fixed-width type.
+        */
+       if (is_varlena)
        {
-           float4      selratio;       /* average ratio of rows selected
-                                        * for a random constant */
-
-           /* Compute dispersion */
-           if (stats->nonnull_cnt == 0 && stats->null_cnt == 0)
+           total_width += VARSIZE(DatumGetPointer(value));
+           /*
+            * If the value is toasted, we want to detoast it just once to
+            * avoid repeated detoastings and resultant excess memory usage
+            * during the comparisons.  Also, check to see if the value is
+            * excessively wide, and if so don't detoast at all --- just
+            * ignore the value.
+            */
+           if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
            {
-
-               /*
-                * empty relation, so put a dummy value in attdispersion
-                */
-               selratio = 0;
+               toowide_cnt++;
+               continue;
            }
-           else if (stats->null_cnt <= 1 && stats->best_cnt == 1)
-           {
+           value = PointerGetDatum(PG_DETOAST_DATUM(value));
+       }
 
-               /*
-                * looks like we have a unique-key attribute --- flag this
-                * with special -1.0 flag value.
-                *
-                * The correct dispersion is 1.0/numberOfRows, but since the
-                * relation row count can get updated without recomputing
-                * dispersion, we want to store a "symbolic" value and
-                * figure 1.0/numberOfRows on the fly.
-                */
-               selratio = -1;
-           }
-           else
+       /* Add it to the list to be sorted */
+       values[values_cnt].value = value;
+       values[values_cnt].tupno = values_cnt;
+       tupnoLink[values_cnt] = values_cnt;
+       values_cnt++;
+   }
+
+   /* We can only compute valid stats if we found some sortable values. */
+   if (values_cnt > 0)
+   {
+       int     ndistinct,      /* # distinct values in sample */
+               nmultiple,      /* # that appear multiple times */
+               num_hist,
+               dups_cnt;
+       int     slot_idx = 0;
+
+       /* Sort the collected values */
+       datumCmpFn = &f_cmpfn;
+       datumCmpFnKind = cmpFnKind;
+       datumCmpTupnoLink = tupnoLink;
+       qsort((void *) values, values_cnt,
+             sizeof(ScalarItem), compare_scalars);
+
+       /*
+        * Now scan the values in order, find the most common ones,
+        * and also accumulate ordering-correlation statistics.
+        *
+        * To determine which are most common, we first have to count the
+        * number of duplicates of each value.  The duplicates are adjacent
+        * in the sorted list, so a brute-force approach is to compare
+        * successive datum values until we find two that are not equal.
+        * However, that requires N-1 invocations of the datum comparison
+        * routine, which are completely redundant with work that was done
+        * during the sort.  (The sort algorithm must at some point have
+        * compared each pair of items that are adjacent in the sorted order;
+        * otherwise it could not know that it's ordered the pair correctly.)
+        * We exploit this by having compare_scalars remember the highest
+        * tupno index that each ScalarItem has been found equal to.  At the
+        * end of the sort, a ScalarItem's tupnoLink will still point to
+        * itself if and only if it is the last item of its group of
+        * duplicates (since the group will be ordered by tupno).
+        */
+       corr_xysum = 0;
+       ndistinct = 0;
+       nmultiple = 0;
+       dups_cnt = 0;
+       for (i = 0; i < values_cnt; i++)
+       {
+           int         tupno = values[i].tupno;
+
+           corr_xysum += (double) i * (double) tupno;
+           dups_cnt++;
+           if (tupnoLink[tupno] == tupno)
            {
-               if (VacAttrStatsLtGtValid(stats) &&
-                   stats->min_cnt + stats->max_cnt == stats->nonnull_cnt)
+               /* Reached end of duplicates of this value */
+               ndistinct++;
+               if (dups_cnt > 1)
                {
+                   nmultiple++;
+                   if (track_cnt < num_mcv ||
+                       dups_cnt > track[track_cnt-1].count)
+                   {
+                       /*
+                        * Found a new item for the mcv list; find its
+                        * position, bubbling down old items if needed.
+                        * Loop invariant is that j points at an empty/
+                        * replaceable slot.
+                        */
+                       int     j;
+
+                       if (track_cnt < num_mcv)
+                           track_cnt++;
+                       for (j = track_cnt-1; j > 0; j--)
+                       {
+                           if (dups_cnt <= track[j-1].count)
+                               break;
+                           track[j].count = track[j-1].count;
+                           track[j].first = track[j-1].first;
+                       }
+                       track[j].count = dups_cnt;
+                       track[j].first = i + 1 - dups_cnt;
+                   }
+               }
+               dups_cnt = 0;
+           }
+       }
 
-                   /*
-                    * exact result when there are just 1 or 2 values...
-                    */
-                   double      min_cnt_d = stats->min_cnt,
-                               max_cnt_d = stats->max_cnt,
-                               null_cnt_d = stats->null_cnt;
-                   double      total = ((double) stats->nonnull_cnt) + null_cnt_d;
+       stats->stats_valid = true;
+       /* Do the simple null-frac and width stats */
+       stats->stanullfrac = (double) null_cnt / (double) numrows;
+       if (is_varlena)
+           stats->stawidth = total_width / (double) nonnull_cnt;
+       else
+           stats->stawidth = stats->attrtype->typlen;
 
-                   selratio = (min_cnt_d * min_cnt_d + max_cnt_d * max_cnt_d + null_cnt_d * null_cnt_d) / (total * total);
-               }
-               else
-               {
-                   double      most = (double) (stats->best_cnt > stats->null_cnt ? stats->best_cnt : stats->null_cnt);
-                   double      total = ((double) stats->nonnull_cnt) + ((double) stats->null_cnt);
+       if (nmultiple == 0)
+       {
+           /* If we found no repeated values, assume it's a unique column */
+           stats->stadistinct = -1.0;
+       }
+       else if (toowide_cnt == 0 && nmultiple == ndistinct)
+       {
+           /*
+            * Every value in the sample appeared more than once.  Assume the
+            * column has just these values.
+            */
+           stats->stadistinct = ndistinct;
+       }
+       else
+       {
+           /*----------
+            * Estimate the number of distinct values using the estimator
+            * proposed by Chaudhuri et al (see citation above).  This is
+            *      sqrt(n/r) * max(f1,1) + f2 + f3 + ...
+            * where fk is the number of distinct values that occurred
+            * exactly k times in our sample of r rows (from a total of n).
+            * Overwidth values are assumed to have been distinct.
+            *----------
+            */
+           int     f1 = ndistinct - nmultiple + toowide_cnt;
+           double  term1;
 
-                   /*
-                    * we assume count of other values are 20% of best
-                    * count in table
-                    */
-                   selratio = (most * most + 0.20 * most * (total - most)) / (total * total);
-               }
-               /* Make sure calculated values are in-range */
-               if (selratio < 0.0)
-                   selratio = 0.0;
-               else if (selratio > 1.0)
-                   selratio = 1.0;
+           if (f1 < 1)
+               f1 = 1;
+           term1 = sqrt((double) totalrows / (double) numrows) * f1;
+           stats->stadistinct = floor(term1 + nmultiple + 0.5);
+       }
+
+       /*
+        * If we estimated the number of distinct values at more than 10%
+        * of the total row count (a very arbitrary limit), then assume
+        * that stadistinct should scale with the row count rather than be
+        * a fixed value.
+        */
+       if (stats->stadistinct > 0.1 * totalrows)
+           stats->stadistinct = - (stats->stadistinct / totalrows);
+
+       /* Generate an MCV slot entry, only if we found multiples */
+       if (nmultiple < num_mcv)
+           num_mcv = nmultiple;
+       Assert(track_cnt >= num_mcv);
+       if (num_mcv > 0)
+       {
+           MemoryContext old_context;
+           Datum  *mcv_values;
+           float4 *mcv_freqs;
+
+           /* Must copy the target values into TransactionCommandContext */
+           old_context = MemoryContextSwitchTo(TransactionCommandContext);
+           mcv_values = (Datum *) palloc(num_mcv * sizeof(Datum));
+           mcv_freqs = (float4 *) palloc(num_mcv * sizeof(float4));
+           for (i = 0; i < num_mcv; i++)
+           {
+               mcv_values[i] = datumCopy(values[track[i].first].value,
+                                         stats->attr->attbyval,
+                                         stats->attr->attlen);
+               mcv_freqs[i] = (double) track[i].count / (double) numrows;
            }
+           MemoryContextSwitchTo(old_context);
+
+           stats->stakind[slot_idx] = STATISTIC_KIND_MCV;
+           stats->staop[slot_idx] = stats->eqopr;
+           stats->stanumbers[slot_idx] = mcv_freqs;
+           stats->numnumbers[slot_idx] = num_mcv;
+           stats->stavalues[slot_idx] = mcv_values;
+           stats->numvalues[slot_idx] = num_mcv;
+           slot_idx++;
+       }
 
-           /* overwrite the existing statistics in the tuple */
-           attp->attdispersion = selratio;
+       /*
+        * Generate a histogram slot entry if there are at least two
+        * distinct values not accounted for in the MCV list.  (This
+        * ensures the histogram won't collapse to empty or a singleton.)
+        */
+       num_hist = ndistinct - num_mcv;
+       if (num_hist > stats->attr->attstattarget)
+           num_hist = stats->attr->attstattarget + 1;
+       if (num_hist >= 2)
+       {
+           MemoryContext old_context;
+           Datum  *hist_values;
+           int     nvals;
 
-           /* invalidate the tuple in the cache and write the buffer */
-           RelationInvalidateHeapTuple(ad, atup);
-           WriteNoReleaseBuffer(scan->rs_cbuf);
+           /* Sort the MCV items into position order to speed next loop */
+           qsort((void *) track, num_mcv,
+                 sizeof(ScalarMCVItem), compare_mcvs);
 
            /*
-            * Create pg_statistic tuples for the relation, if we have
-            * gathered the right data.  del_stats() previously deleted
-            * all the pg_statistic tuples for the rel, so we just have to
-            * insert new ones here.
+            * Collapse out the MCV items from the values[] array.
             *
-            * Note analyze_rel() has seen to it that we won't come here when
-            * vacuuming pg_statistic itself.
+            * Note we destroy the values[] array here... but we don't need
+            * it for anything more.  We do, however, still need values_cnt.
             */
-           if (VacAttrStatsLtGtValid(stats) && stats->initialized)
+           if (num_mcv > 0)
            {
-               float4      nullratio;
-               float4      bestratio;
-               FmgrInfo    out_function;
-               char       *out_string;
-               double      best_cnt_d = stats->best_cnt,
-                           null_cnt_d = stats->null_cnt,
-                           nonnull_cnt_d = stats->nonnull_cnt; /* prevent overflow */
-               Datum       values[Natts_pg_statistic];
-               char        nulls[Natts_pg_statistic];
-               Relation    irelations[Num_pg_statistic_indices];
+               int     src,
+                       dest;
+               int     j;
 
-               nullratio = null_cnt_d / (nonnull_cnt_d + null_cnt_d);
-               bestratio = best_cnt_d / (nonnull_cnt_d + null_cnt_d);
-
-               fmgr_info(stats->outfunc, &out_function);
+               src = dest = 0;
+               j = 0;          /* index of next interesting MCV item */
+               while (src < values_cnt)
+               {
+                   int     ncopy;
+
+                   if (j < num_mcv)
+                   {
+                       int     first = track[j].first;
+
+                       if (src >= first)
+                       {
+                           /* advance past this MCV item */
+                           src = first + track[j].count;
+                           j++;
+                           continue;
+                       }
+                       ncopy = first - src;
+                   }
+                   else
+                   {
+                       ncopy = values_cnt - src;
+                   }
+                   memmove(&values[dest], &values[src],
+                           ncopy * sizeof(ScalarItem));
+                   src += ncopy;
+                   dest += ncopy;
+               }
+               nvals = dest;
+           }
+           else
+               nvals = values_cnt;
+           Assert(nvals >= num_hist);
 
-               for (i = 0; i < Natts_pg_statistic; ++i)
-                   nulls[i] = ' ';
+           /* Must copy the target values into TransactionCommandContext */
+           old_context = MemoryContextSwitchTo(TransactionCommandContext);
+           hist_values = (Datum *) palloc(num_hist * sizeof(Datum));
+           for (i = 0; i < num_hist; i++)
+           {
+               int     pos;
 
-               /*
-                * initialize values[]
-                */
-               i = 0;
-               values[i++] = ObjectIdGetDatum(relid);  /* starelid */
-               values[i++] = Int16GetDatum(attp->attnum);      /* staattnum */
-               values[i++] = ObjectIdGetDatum(stats->op_cmplt);        /* staop */
-               values[i++] = Float4GetDatum(nullratio);        /* stanullfrac */
-               values[i++] = Float4GetDatum(bestratio);        /* stacommonfrac */
-               out_string = DatumGetCString(FunctionCall3(&out_function,
-                                                          stats->best,
-                                       ObjectIdGetDatum(stats->typelem),
-                                Int32GetDatum(stats->attr->atttypmod)));
-               values[i++] = DirectFunctionCall1(textin,       /* stacommonval */
-                                           CStringGetDatum(out_string));
-               pfree(out_string);
-               out_string = DatumGetCString(FunctionCall3(&out_function,
-                                                          stats->min,
-                                       ObjectIdGetDatum(stats->typelem),
-                                Int32GetDatum(stats->attr->atttypmod)));
-               values[i++] = DirectFunctionCall1(textin,       /* staloval */
-                                           CStringGetDatum(out_string));
-               pfree(out_string);
-               out_string = DatumGetCString(FunctionCall3(&out_function,
-                                                          stats->max,
-                                       ObjectIdGetDatum(stats->typelem),
-                                Int32GetDatum(stats->attr->atttypmod)));
-               values[i++] = DirectFunctionCall1(textin,       /* stahival */
-                                           CStringGetDatum(out_string));
-               pfree(out_string);
-
-               stup = heap_formtuple(sd->rd_att, values, nulls);
-
-               /* store tuple and update indexes too */
-               heap_insert(sd, stup);
-
-               CatalogOpenIndices(Num_pg_statistic_indices, Name_pg_statistic_indices, irelations);
-               CatalogIndexInsert(irelations, Num_pg_statistic_indices, sd, stup);
-               CatalogCloseIndices(Num_pg_statistic_indices, irelations);
-
-               /* release allocated space */
-               pfree(DatumGetPointer(values[Anum_pg_statistic_stacommonval - 1]));
-               pfree(DatumGetPointer(values[Anum_pg_statistic_staloval - 1]));
-               pfree(DatumGetPointer(values[Anum_pg_statistic_stahival - 1]));
-               heap_freetuple(stup);
+               pos = (i * (nvals - 1)) / (num_hist - 1);
+               hist_values[i] = datumCopy(values[pos].value,
+                                          stats->attr->attbyval,
+                                          stats->attr->attlen);
            }
+           MemoryContextSwitchTo(old_context);
+
+           stats->stakind[slot_idx] = STATISTIC_KIND_HISTOGRAM;
+           stats->staop[slot_idx] = stats->ltopr;
+           stats->stavalues[slot_idx] = hist_values;
+           stats->numvalues[slot_idx] = num_hist;
+           slot_idx++;
+       }
+
+       /* Generate a correlation entry if there are multiple values */
+       if (values_cnt > 1)
+       {
+           MemoryContext old_context;
+           float4 *corrs;
+           double  corr_xsum,
+                   corr_x2sum;
+
+           /* Must copy the target values into TransactionCommandContext */
+           old_context = MemoryContextSwitchTo(TransactionCommandContext);
+           corrs = (float4 *) palloc(sizeof(float4));
+           MemoryContextSwitchTo(old_context);
+
+           /*----------
+            * Since we know the x and y value sets are both
+            *      0, 1, ..., values_cnt-1
+            * we have sum(x) = sum(y) =
+            *      (values_cnt-1)*values_cnt / 2
+            * and sum(x^2) = sum(y^2) =
+            *      (values_cnt-1)*values_cnt*(2*values_cnt-1) / 6.
+            *----------
+            */
+           corr_xsum = (double) (values_cnt-1) * (double) values_cnt / 2.0;
+           corr_x2sum = (double) (values_cnt-1) * (double) values_cnt *
+               (double) (2*values_cnt-1) / 6.0;
+           /* And the correlation coefficient reduces to */
+           corrs[0] = (values_cnt * corr_xysum - corr_xsum * corr_xsum) /
+               (values_cnt * corr_x2sum - corr_xsum * corr_xsum);
+
+           stats->stakind[slot_idx] = STATISTIC_KIND_CORRELATION;
+           stats->staop[slot_idx] = stats->ltopr;
+           stats->stanumbers[slot_idx] = corrs;
+           stats->numnumbers[slot_idx] = 1;
+           slot_idx++;
        }
    }
-   heap_endscan(scan);
-   /* close rels, but hold locks till upcoming commit */
-   heap_close(ad, NoLock);
-   heap_close(sd, NoLock);
+
+   /* We don't need to bother cleaning up any of our temporary palloc's */
 }
 
 /*
- * del_stats() -- delete pg_statistic rows for a relation
+ * qsort comparator for sorting ScalarItems
  *
- * If a list of attribute numbers is given, only zap stats for those attrs.
+ * Aside from sorting the items, we update the datumCmpTupnoLink[] array
+ * whenever two ScalarItems are found to contain equal datums.  The array
+ * is indexed by tupno; for each ScalarItem, it contains the highest
+ * tupno that that item's datum has been found to be equal to.  This allows
+ * us to avoid additional comparisons in compute_scalar_stats().
  */
-static void
-del_stats(Oid relid, int attcnt, int *attnums)
+static int
+compare_scalars(const void *a, const void *b)
 {
-   Relation    pgstatistic;
-   HeapScanDesc scan;
-   HeapTuple   tuple;
-   ScanKeyData key;
+   Datum       da = ((ScalarItem *) a)->value;
+   int         ta = ((ScalarItem *) a)->tupno;
+   Datum       db = ((ScalarItem *) b)->value;
+   int         tb = ((ScalarItem *) b)->tupno;
 
-   pgstatistic = heap_openr(StatisticRelationName, RowExclusiveLock);
+   if (datumCmpFnKind == SORTFUNC_LT)
+   {
+       if (DatumGetBool(FunctionCall2(datumCmpFn, da, db)))
+           return -1;          /* a < b */
+       if (DatumGetBool(FunctionCall2(datumCmpFn, db, da)))
+           return 1;           /* a > b */
+   }
+   else
+   {
+       /* sort function is CMP or REVCMP */
+       int32   compare;
 
-   ScanKeyEntryInitialize(&key, 0x0, Anum_pg_statistic_starelid,
-                          F_OIDEQ, ObjectIdGetDatum(relid));
-   scan = heap_beginscan(pgstatistic, false, SnapshotNow, 1, &key);
+       compare = DatumGetInt32(FunctionCall2(datumCmpFn, da, db));
+       if (compare != 0)
+       {
+           if (datumCmpFnKind == SORTFUNC_REVCMP)
+               compare = -compare;
+           return compare;
+       }
+   }
 
-   while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
+   /*
+    * The two datums are equal, so update datumCmpTupnoLink[].
+    */
+   if (datumCmpTupnoLink[ta] < tb)
+       datumCmpTupnoLink[ta] = tb;
+   if (datumCmpTupnoLink[tb] < ta)
+       datumCmpTupnoLink[tb] = ta;
+
+   /*
+    * For equal datums, sort by tupno
+    */
+   return ta - tb;
+}
+
+/*
+ * qsort comparator for sorting ScalarMCVItems by position
+ */
+static int
+compare_mcvs(const void *a, const void *b)
+{
+   int         da = ((ScalarMCVItem *) a)->first;
+   int         db = ((ScalarMCVItem *) b)->first;
+
+   return da - db;
+}
+
+
+/*
+ * update_attstats() -- update attribute statistics for one relation
+ *
+ *     Statistics are stored in several places: the pg_class row for the
+ *     relation has stats about the whole relation, and there is a
+ *     pg_statistic row for each (non-system) attribute that has ever
+ *     been analyzed.  The pg_class values are updated by VACUUM, not here.
+ *
+ *     pg_statistic rows are just added or updated normally.  This means
+ *     that pg_statistic will probably contain some deleted rows at the
+ *     completion of a vacuum cycle, unless it happens to get vacuumed last.
+ *
+ *     To keep things simple, we punt for pg_statistic, and don't try
+ *     to compute or store rows for pg_statistic itself in pg_statistic.
+ *     This could possibly be made to work, but it's not worth the trouble.
+ *     Note analyze_rel() has seen to it that we won't come here when
+ *     vacuuming pg_statistic itself.
+ */
+static void
+update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats)
+{
+   Relation    sd;
+   int         attno;
+
+   /*
+    * We use an ExclusiveLock on pg_statistic to ensure that only one
+    * backend is writing it at a time --- without that, we might have to
+    * deal with concurrent updates here, and it's not worth the trouble.
+    */
+   sd = heap_openr(StatisticRelationName, ExclusiveLock);
+
+   for (attno = 0; attno < natts; attno++)
    {
-       if (attcnt > 0)
+       VacAttrStats *stats = vacattrstats[attno];
+       FmgrInfo    out_function;
+       HeapTuple   stup,
+                   oldtup;
+       int         i, k, n;
+       Datum       values[Natts_pg_statistic];
+       char        nulls[Natts_pg_statistic];
+       char        replaces[Natts_pg_statistic];
+       Relation    irelations[Num_pg_statistic_indices];
+
+       /* Ignore attr if we weren't able to collect stats */
+       if (!stats->stats_valid)
+           continue;
+
+       fmgr_info(stats->attrtype->typoutput, &out_function);
+
+       /*
+        * Construct a new pg_statistic tuple
+        */
+       for (i = 0; i < Natts_pg_statistic; ++i)
        {
-           Form_pg_statistic pgs = (Form_pg_statistic) GETSTRUCT(tuple);
-           int         i;
+           nulls[i] = ' ';
+           replaces[i] = 'r';
+       }
 
-           for (i = 0; i < attcnt; i++)
+       i = 0;
+       values[i++] = ObjectIdGetDatum(relid); /* starelid */
+       values[i++] = Int16GetDatum(stats->attnum); /* staattnum */
+       values[i++] = Float4GetDatum(stats->stanullfrac); /* stanullfrac */
+       values[i++] = Int32GetDatum(stats->stawidth); /* stawidth */
+       values[i++] = Float4GetDatum(stats->stadistinct); /* stadistinct */
+       for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+       {
+           values[i++] = Int16GetDatum(stats->stakind[k]); /* stakindN */
+       }
+       for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+       {
+           values[i++] = ObjectIdGetDatum(stats->staop[k]); /* staopN */
+       }
+       for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+       {
+           int     nnum = stats->numnumbers[k];
+
+           if (nnum > 0)
            {
-               if (pgs->staattnum == attnums[i] + 1)
-                   break;
+               Datum      *numdatums = (Datum *) palloc(nnum * sizeof(Datum));
+               ArrayType  *arry;
+
+               for (n = 0; n < nnum; n++)
+                   numdatums[n] = Float4GetDatum(stats->stanumbers[k][n]);
+               /* XXX knows more than it should about type float4: */
+               arry = construct_array(numdatums, nnum,
+                                      false, sizeof(float4), 'i');
+               values[i++] = PointerGetDatum(arry); /* stanumbersN */
+           }
+           else
+           {
+               nulls[i] = 'n';
+               values[i++] = (Datum) 0;
            }
-           if (i >= attcnt)
-               continue;       /* don't delete it */
        }
-       simple_heap_delete(pgstatistic, &tuple->t_self);
-   }
+       for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+       {
+           int     ntxt = stats->numvalues[k];
 
-   heap_endscan(scan);
+           if (ntxt > 0)
+           {
+               Datum      *txtdatums = (Datum *) palloc(ntxt * sizeof(Datum));
+               ArrayType  *arry;
 
-   /*
-    * Close rel, but *keep* lock; we will need to reacquire it later, so
-    * there's a possibility of deadlock against another VACUUM process if
-    * we let go now.  Keeping the lock shouldn't delay any common
-    * operation other than an attempted VACUUM of pg_statistic itself.
-    */
-   heap_close(pgstatistic, NoLock);
+               for (n = 0; n < ntxt; n++)
+               {
+                   /*
+                    * Convert data values to a text string to be inserted
+                    * into the text array.
+                    */
+                   Datum   stringdatum;
+
+                   stringdatum =
+                       FunctionCall3(&out_function,
+                                     stats->stavalues[k][n],
+                                     ObjectIdGetDatum(stats->attrtype->typelem),
+                                     Int32GetDatum(stats->attr->atttypmod));
+                   txtdatums[n] = DirectFunctionCall1(textin, stringdatum);
+                   pfree(DatumGetPointer(stringdatum));
+               }
+               /* XXX knows more than it should about type text: */
+               arry = construct_array(txtdatums, ntxt,
+                                      false, -1, 'i');
+               values[i++] = PointerGetDatum(arry); /* stavaluesN */
+           }
+           else
+           {
+               nulls[i] = 'n';
+               values[i++] = (Datum) 0;
+           }
+       }
+
+       /* Is there already a pg_statistic tuple for this attribute? */
+       oldtup = SearchSysCache(STATRELATT,
+                               ObjectIdGetDatum(relid),
+                               Int16GetDatum(stats->attnum),
+                               0, 0);
+
+       if (HeapTupleIsValid(oldtup))
+       {
+           /* Yes, replace it */
+           stup = heap_modifytuple(oldtup,
+                                   sd,
+                                   values,
+                                   nulls,
+                                   replaces);
+           ReleaseSysCache(oldtup);
+           simple_heap_update(sd, &stup->t_self, stup);
+       }
+       else
+       {
+           /* No, insert new tuple */
+           stup = heap_formtuple(sd->rd_att, values, nulls);
+           heap_insert(sd, stup);
+       }
+
+       /* update indices too */
+       CatalogOpenIndices(Num_pg_statistic_indices, Name_pg_statistic_indices,
+                          irelations);
+       CatalogIndexInsert(irelations, Num_pg_statistic_indices, sd, stup);
+       CatalogCloseIndices(Num_pg_statistic_indices, irelations);
+
+       heap_freetuple(stup);
+   }
+
+   /* close rel, but hold lock till upcoming commit */
+   heap_close(sd, NoLock);
 }


diff --git a/src/backend/commands/command.c b/src/backend/commands/command.c

index 96d493688e328aaa1fc4bf56bc12e18865f2ee33..13a78f1177390f0108702c94a7cc005e0a28b183 100644 (file)


--- a/src/backend/commands/command.c
+++ b/src/backend/commands/command.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/commands/Attic/command.c,v 1.125 2001/03/23 04:49:52 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/commands/Attic/command.c,v 1.126 2001/05/07 00:43:17 tgl Exp $
  *
  * NOTES
  *   The PerformAddAttribute() code, like most of the relation
@@ -56,6 +56,7 @@
 #include "access/genam.h"
 
 
+static void drop_default(Oid relid, int16 attnum);
 static bool needs_toast_table(Relation rel);
 static bool is_relation(char *name);
 
@@ -408,7 +409,7 @@ AlterTableAddColumn(const char *relationName,
        HeapTuple   typeTuple;
        Form_pg_type tform;
        char       *typename;
-       int         attnelems;
+       int         attndims;
 
        if (SearchSysCacheExists(ATTNAME,
                                 ObjectIdGetDatum(reltup->t_data->t_oid),
@@ -425,11 +426,11 @@ AlterTableAddColumn(const char *relationName,
 
        if (colDef->typename->arrayBounds)
        {
-           attnelems = length(colDef->typename->arrayBounds);
+           attndims = length(colDef->typename->arrayBounds);
            typename = makeArrayTypeName(colDef->typename->name);
        }
        else
-           attnelems = 0;
+           attndims = 0;
 
        typeTuple = SearchSysCache(TYPENAME,
                                   PointerGetDatum(typename),
@@ -441,12 +442,12 @@ AlterTableAddColumn(const char *relationName,
        namestrcpy(&(attribute->attname), colDef->colname);
        attribute->atttypid = typeTuple->t_data->t_oid;
        attribute->attlen = tform->typlen;
-       attribute->attdispersion = 0;
+       attribute->attstattarget = DEFAULT_ATTSTATTARGET;
        attribute->attcacheoff = -1;
        attribute->atttypmod = colDef->typename->typmod;
        attribute->attnum = i;
        attribute->attbyval = tform->typbyval;
-       attribute->attnelems = attnelems;
+       attribute->attndims = attndims;
        attribute->attisset = (bool) (tform->typtype == 'c');
        attribute->attstorage = tform->typstorage;
        attribute->attalign = tform->typalign;
@@ -496,17 +497,13 @@ AlterTableAddColumn(const char *relationName,
 }
 
 
-
-static void drop_default(Oid relid, int16 attnum);
-
-
 /*
  * ALTER TABLE ALTER COLUMN SET/DROP DEFAULT
  */
 void
-AlterTableAlterColumn(const char *relationName,
-                     bool inh, const char *colName,
-                     Node *newDefault)
+AlterTableAlterColumnDefault(const char *relationName,
+                            bool inh, const char *colName,
+                            Node *newDefault)
 {
    Relation    rel;
    HeapTuple   tuple;
@@ -551,8 +548,8 @@ AlterTableAlterColumn(const char *relationName,
            if (childrelid == myrelid)
                continue;
            rel = heap_open(childrelid, AccessExclusiveLock);
-           AlterTableAlterColumn(RelationGetRelationName(rel),
-                                 false, colName, newDefault);
+           AlterTableAlterColumnDefault(RelationGetRelationName(rel),
+                                        false, colName, newDefault);
            heap_close(rel, AccessExclusiveLock);
        }
    }
@@ -560,7 +557,7 @@ AlterTableAlterColumn(const char *relationName,
    /* -= now do the thing on this relation =- */
 
    /* reopen the business */
-   rel = heap_openr((char *) relationName, AccessExclusiveLock);
+   rel = heap_openr(relationName, AccessExclusiveLock);
 
    /*
     * get the number of the attribute
@@ -647,7 +644,6 @@ AlterTableAlterColumn(const char *relationName,
 }
 
 
-
 static void
 drop_default(Oid relid, int16 attnum)
 {
@@ -675,6 +671,104 @@ drop_default(Oid relid, int16 attnum)
 }
 
 
+/*
+ * ALTER TABLE ALTER COLUMN SET STATISTICS
+ */
+void
+AlterTableAlterColumnStatistics(const char *relationName,
+                               bool inh, const char *colName,
+                               Node *statsTarget)
+{
+   Relation    rel;
+   Oid         myrelid;
+   int         newtarget;
+   Relation    attrelation;
+   HeapTuple   tuple;
+
+#ifndef NO_SECURITY
+   if (!pg_ownercheck(GetUserId(), relationName, RELNAME))
+       elog(ERROR, "ALTER TABLE: permission denied");
+#endif
+
+   rel = heap_openr(relationName, AccessExclusiveLock);
+   if (rel->rd_rel->relkind != RELKIND_RELATION)
+       elog(ERROR, "ALTER TABLE: relation \"%s\" is not a table",
+            relationName);
+   myrelid = RelationGetRelid(rel);
+   heap_close(rel, NoLock);    /* close rel, but keep lock! */
+
+   /*
+    * Propagate to children if desired
+    */
+   if (inh)
+   {
+       List       *child,
+                  *children;
+
+       /* this routine is actually in the planner */
+       children = find_all_inheritors(myrelid);
+
+       /*
+        * find_all_inheritors does the recursive search of the
+        * inheritance hierarchy, so all we have to do is process all of
+        * the relids in the list that it returns.
+        */
+       foreach(child, children)
+       {
+           Oid         childrelid = lfirsti(child);
+
+           if (childrelid == myrelid)
+               continue;
+           rel = heap_open(childrelid, AccessExclusiveLock);
+           AlterTableAlterColumnStatistics(RelationGetRelationName(rel),
+                                           false, colName, statsTarget);
+           heap_close(rel, AccessExclusiveLock);
+       }
+   }
+
+   /* -= now do the thing on this relation =- */
+
+   Assert(IsA(statsTarget, Integer));
+   newtarget = intVal(statsTarget);
+
+   /* Limit target to sane range (should we raise an error instead?) */
+   if (newtarget < 0)
+       newtarget = 0;
+   else if (newtarget > 1000)
+       newtarget = 1000;
+
+   attrelation = heap_openr(AttributeRelationName, RowExclusiveLock);
+
+   tuple = SearchSysCacheCopy(ATTNAME,
+                              ObjectIdGetDatum(myrelid),
+                              PointerGetDatum(colName),
+                              0, 0);
+   if (!HeapTupleIsValid(tuple))
+       elog(ERROR, "ALTER TABLE: relation \"%s\" has no column \"%s\"",
+            relationName, colName);
+
+   if (((Form_pg_attribute) GETSTRUCT(tuple))->attnum < 0)
+       elog(ERROR, "ALTER TABLE: cannot change system attribute \"%s\"",
+            colName);
+
+   ((Form_pg_attribute) GETSTRUCT(tuple))->attstattarget = newtarget;
+
+   simple_heap_update(attrelation, &tuple->t_self, tuple);
+
+   /* keep system catalog indices current */
+   {
+       Relation    irelations[Num_pg_attr_indices];
+
+       CatalogOpenIndices(Num_pg_attr_indices, Name_pg_attr_indices, irelations);
+       CatalogIndexInsert(irelations, Num_pg_attr_indices, attrelation, tuple);
+       CatalogCloseIndices(Num_pg_attr_indices, irelations);
+   }
+
+   heap_freetuple(tuple);
+   heap_close(attrelation, RowExclusiveLock);
+}
+
+
 #ifdef _DROP_COLUMN_HACK__
 /*
  * ALTER TABLE DROP COLUMN trial implementation


diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c

index 694d0e8bbc1491c39827ff833f8cc3fb68906e9e..9a0dbdc8c8e15c0b261068728c7d38546e3aa07c 100644 (file)


--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.189 2001/03/25 23:23:58 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.190 2001/05/07 00:43:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -53,25 +53,90 @@ extern XLogRecPtr log_heap_move(Relation reln,
              Buffer oldbuf, ItemPointerData from,
              Buffer newbuf, HeapTuple newtup);
 
+
+typedef struct VRelListData
+{
+   Oid         vrl_relid;
+   struct VRelListData *vrl_next;
+} VRelListData;
+
+typedef VRelListData *VRelList;
+
+typedef struct VacPageData
+{
+   BlockNumber blkno;          /* BlockNumber of this Page */
+   Size        free;           /* FreeSpace on this Page */
+   uint16      offsets_used;   /* Number of OffNums used by vacuum */
+   uint16      offsets_free;   /* Number of OffNums free or to be free */
+   OffsetNumber offsets[1];    /* Array of its OffNums */
+} VacPageData;
+
+typedef VacPageData *VacPage;
+
+typedef struct VacPageListData
+{
+   int         empty_end_pages;/* Number of "empty" end-pages */
+   int         num_pages;      /* Number of pages in pagedesc */
+   int         num_allocated_pages;    /* Number of allocated pages in
+                                        * pagedesc */
+   VacPage    *pagedesc;       /* Descriptions of pages */
+} VacPageListData;
+
+typedef VacPageListData *VacPageList;
+
+typedef struct VTupleLinkData
+{
+   ItemPointerData new_tid;
+   ItemPointerData this_tid;
+} VTupleLinkData;
+
+typedef VTupleLinkData *VTupleLink;
+
+typedef struct VTupleMoveData
+{
+   ItemPointerData tid;        /* tuple ID */
+   VacPage     vacpage;        /* where to move */
+   bool        cleanVpd;       /* clean vacpage before using */
+} VTupleMoveData;
+
+typedef VTupleMoveData *VTupleMove;
+
+typedef struct VRelStats
+{
+   Oid         relid;
+   long        num_pages;
+   long        num_tuples;
+   Size        min_tlen;
+   Size        max_tlen;
+   bool        hasindex;
+   int         num_vtlinks;
+   VTupleLink  vtlinks;
+} VRelStats;
+
+
 static MemoryContext vac_context = NULL;
 
 static int MESSAGE_LEVEL;      /* message level */
 
 static TransactionId XmaxRecent;
 
+
 /* non-export function prototypes */
 static void vacuum_init(void);
 static void vacuum_shutdown(void);
-static void vac_vacuum(NameData *VacRelP, bool analyze, List *anal_cols2);
-static VRelList getrels(NameData *VacRelP);
+static VRelList getrels(Name VacRelP, const char *stmttype);
 static void vacuum_rel(Oid relid);
-static void scan_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages, VacPageList fraged_pages);
-static void repair_frag(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages, VacPageList fraged_pages, int nindices, Relation *Irel);
-static void vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacpagelist);
+static void scan_heap(VRelStats *vacrelstats, Relation onerel,
+                     VacPageList vacuum_pages, VacPageList fraged_pages);
+static void repair_frag(VRelStats *vacrelstats, Relation onerel,
+                       VacPageList vacuum_pages, VacPageList fraged_pages,
+                       int nindices, Relation *Irel);
+static void vacuum_heap(VRelStats *vacrelstats, Relation onerel,
+                       VacPageList vacpagelist);
 static void vacuum_page(Relation onerel, Buffer buffer, VacPage vacpage);
-static void vacuum_index(VacPageList vacpagelist, Relation indrel, int num_tuples, int keep_tuples);
-static void scan_index(Relation indrel, int num_tuples);
-static void update_relstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats *vacrelstats);
+static void vacuum_index(VacPageList vacpagelist, Relation indrel,
+                        long num_tuples, int keep_tuples);
+static void scan_index(Relation indrel, long num_tuples);
 static VacPage tid_reaped(ItemPointer itemptr, VacPageList vacpagelist);
 static void reap_page(VacPageList vacpagelist, VacPage vacpage);
 static void vpage_insert(VacPageList vacpagelist, VacPage vpnew);
@@ -88,17 +153,17 @@ static bool enough_space(VacPage vacpage, Size len);
 static char *show_rusage(struct rusage * ru0);
 
 
+/*
+ * Primary entry point for VACUUM and ANALYZE commands.
+ */
 void
-vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols)
+vacuum(VacuumStmt *vacstmt)
 {
+   const char *stmttype = vacstmt->vacuum ? "VACUUM" : "ANALYZE";
    NameData    VacRel;
    Name        VacRelName;
-   MemoryContext old;
-   List       *le;
-   List       *anal_cols2 = NIL;
-
-   if (anal_cols != NIL && !analyze)
-       elog(ERROR, "Can't vacuum columns, only tables.  You can 'vacuum analyze' columns.");
+   VRelList    vrl,
+               cur;
 
    /*
     * We cannot run VACUUM inside a user transaction block; if we were
@@ -110,9 +175,9 @@ vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols)
     * behavior.
     */
    if (IsTransactionBlock())
-       elog(ERROR, "VACUUM cannot run inside a BEGIN/END block");
+       elog(ERROR, "%s cannot run inside a BEGIN/END block", stmttype);
 
-   if (verbose)
+   if (vacstmt->verbose)
        MESSAGE_LEVEL = NOTICE;
    else
        MESSAGE_LEVEL = DEBUG;
@@ -130,37 +195,36 @@ vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols)
                                        ALLOCSET_DEFAULT_INITSIZE,
                                        ALLOCSET_DEFAULT_MAXSIZE);
 
-   /* vacrel gets de-allocated on xact commit, so copy it to safe storage */
-   if (vacrel)
+   /* Convert vacrel, which is just a string, to a Name */
+   if (vacstmt->vacrel)
    {
-       namestrcpy(&VacRel, vacrel);
+       namestrcpy(&VacRel, vacstmt->vacrel);
        VacRelName = &VacRel;
    }
    else
        VacRelName = NULL;
 
-   /* must also copy the column list, if any, to safe storage */
-   old = MemoryContextSwitchTo(vac_context);
-   foreach(le, anal_cols)
-   {
-       char       *col = (char *) lfirst(le);
-
-       anal_cols2 = lappend(anal_cols2, pstrdup(col));
-   }
-   MemoryContextSwitchTo(old);
+   /* Build list of relations to process (note this lives in vac_context) */
+   vrl = getrels(VacRelName, stmttype);
 
    /*
     * Start up the vacuum cleaner.
-    *
-    * NOTE: since this commits the current transaction, the memory holding
-    * any passed-in parameters gets freed here.  We must have already
-    * copied pass-by-reference parameters to safe storage.  Don't make me
-    * fix this again!
     */
    vacuum_init();
 
-   /* vacuum the database */
-   vac_vacuum(VacRelName, analyze, anal_cols2);
+   /*
+    * Process each selected relation.  We are careful to process
+    * each relation in a separate transaction in order to avoid holding
+    * too many locks at one time.
+    */
+   for (cur = vrl; cur != (VRelList) NULL; cur = cur->vrl_next)
+   {
+       if (vacstmt->vacuum)
+           vacuum_rel(cur->vrl_relid);
+       /* analyze separately so locking is minimized */
+       if (vacstmt->analyze)
+           analyze_rel(cur->vrl_relid, vacstmt);
+   }
 
    /* clean up */
    vacuum_shutdown();
@@ -187,14 +251,14 @@ vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols)
  *     PostgresMain().
  */
 static void
-vacuum_init()
+vacuum_init(void)
 {
    /* matches the StartTransaction in PostgresMain() */
    CommitTransactionCommand();
 }
 
 static void
-vacuum_shutdown()
+vacuum_shutdown(void)
 {
    /* on entry, we are not in a transaction */
 
@@ -223,34 +287,10 @@ vacuum_shutdown()
 }
 
 /*
- * vac_vacuum() -- vacuum the database.
- *
- *     This routine builds a list of relations to vacuum, and then calls
- *     code that vacuums them one at a time.  We are careful to vacuum each
- *     relation in a separate transaction in order to avoid holding too many
- *     locks at one time.
+ * Build a list of VRelListData nodes for each relation to be processed
  */
-static void
-vac_vacuum(NameData *VacRelP, bool analyze, List *anal_cols2)
-{
-   VRelList    vrl,
-               cur;
-
-   /* get list of relations */
-   vrl = getrels(VacRelP);
-
-   /* vacuum each heap relation */
-   for (cur = vrl; cur != (VRelList) NULL; cur = cur->vrl_next)
-   {
-       vacuum_rel(cur->vrl_relid);
-       /* analyze separately so locking is minimized */
-       if (analyze)
-           analyze_rel(cur->vrl_relid, anal_cols2, MESSAGE_LEVEL);
-   }
-}
-
 static VRelList
-getrels(NameData *VacRelP)
+getrels(Name VacRelP, const char *stmttype)
 {
    Relation    rel;
    TupleDesc   tupdesc;
@@ -262,12 +302,9 @@ getrels(NameData *VacRelP)
    char       *rname;
    char        rkind;
    bool        n;
-   bool        found = false;
    ScanKeyData key;
 
-   StartTransactionCommand();
-
-   if (NameStr(*VacRelP))
+   if (VacRelP)
    {
 
        /*
@@ -287,6 +324,7 @@ getrels(NameData *VacRelP)
    }
    else
    {
+       /* find all relations listed in pg_class */
        ScanKeyEntryInitialize(&key, 0x0, Anum_pg_class_relkind,
                               F_CHAREQ, CharGetDatum('r'));
    }
@@ -300,21 +338,20 @@ getrels(NameData *VacRelP)
 
    while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
    {
-       found = true;
-
        d = heap_getattr(tuple, Anum_pg_class_relname, tupdesc, &n);
-       rname = (char *) DatumGetPointer(d);
+       rname = (char *) DatumGetName(d);
 
        d = heap_getattr(tuple, Anum_pg_class_relkind, tupdesc, &n);
        rkind = DatumGetChar(d);
 
        if (rkind != RELKIND_RELATION)
        {
-           elog(NOTICE, "Vacuum: can not process indices, views and certain system tables");
+           elog(NOTICE, "%s: can not process indexes, views or special system tables",
+                stmttype);
            continue;
        }
 
-       /* get a relation list entry for this guy */
+       /* Make a relation list entry for this guy */
        if (vrl == (VRelList) NULL)
            vrl = cur = (VRelList)
                MemoryContextAlloc(vac_context, sizeof(VRelListData));
@@ -332,10 +369,8 @@ getrels(NameData *VacRelP)
    heap_endscan(scan);
    heap_close(rel, AccessShareLock);
 
-   if (!found)
-       elog(NOTICE, "Vacuum: table not found");
-
-   CommitTransactionCommand();
+   if (vrl == NULL)
+       elog(NOTICE, "%s: table not found", stmttype);
 
    return vrl;
 }
@@ -432,7 +467,8 @@ vacuum_rel(Oid relid)
     */
    vacrelstats = (VRelStats *) palloc(sizeof(VRelStats));
    vacrelstats->relid = relid;
-   vacrelstats->num_pages = vacrelstats->num_tuples = 0;
+   vacrelstats->num_pages = 0;
+   vacrelstats->num_tuples = 0;
    vacrelstats->hasindex = false;
 
    GetXmaxRecent(&XmaxRecent);
@@ -457,8 +493,8 @@ vacuum_rel(Oid relid)
        vacrelstats->hasindex = true;
    else
        vacrelstats->hasindex = false;
-#ifdef NOT_USED
 
+#ifdef NOT_USED
    /*
     * reindex in VACUUM is dangerous under WAL. ifdef out until it
     * becomes safe.
@@ -528,9 +564,8 @@ vacuum_rel(Oid relid)
    heap_close(onerel, NoLock);
 
    /* update statistics in pg_class */
-   update_relstats(vacrelstats->relid, vacrelstats->num_pages,
-                   vacrelstats->num_tuples, vacrelstats->hasindex,
-                   vacrelstats);
+   vac_update_relstats(vacrelstats->relid, vacrelstats->num_pages,
+                       vacrelstats->num_tuples, vacrelstats->hasindex);
 
    /*
     * Complete the transaction and free all temporary memory used.
@@ -582,8 +617,8 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
    char       *relname;
    VacPage     vacpage,
                vp;
+   long        num_tuples;
    uint32      tups_vacuumed,
-               num_tuples,
                nkeep,
                nunused,
                ncrash,
@@ -913,7 +948,6 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
    /* save stats in the rel list for use later */
    vacrelstats->num_tuples = num_tuples;
    vacrelstats->num_pages = nblocks;
-/*   vacrelstats->natts = attr_cnt;*/
    if (num_tuples == 0)
        min_tlen = max_tlen = 0;
    vacrelstats->min_tlen = min_tlen;
@@ -960,7 +994,7 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
    }
 
    elog(MESSAGE_LEVEL, "Pages %u: Changed %u, reaped %u, Empty %u, New %u; \
-Tup %u: Vac %u, Keep/VTL %u/%u, Crash %u, UnUsed %u, MinLen %lu, MaxLen %lu; \
+Tup %lu: Vac %u, Keep/VTL %u/%u, Crash %u, UnUsed %u, MinLen %lu, MaxLen %lu; \
 Re-using: Free/Avail. Space %lu/%lu; EndEmpty/Avail. Pages %u/%u. %s",
         nblocks, changed_pages, vacuum_pages->num_pages, empty_pages,
         new_pages, num_tuples, tups_vacuumed,
@@ -2009,7 +2043,7 @@ vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages)
 {
    Buffer      buf;
    VacPage    *vacpage;
-   int         nblocks;
+   long        nblocks;
    int         i;
 
    nblocks = vacuum_pages->num_pages;
@@ -2044,7 +2078,7 @@ vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages)
    /* truncate relation if there are some empty end-pages */
    if (vacuum_pages->empty_end_pages > 0)
    {
-       elog(MESSAGE_LEVEL, "Rel %s: Pages: %u --> %u.",
+       elog(MESSAGE_LEVEL, "Rel %s: Pages: %lu --> %lu.",
             RelationGetRelationName(onerel),
             vacrelstats->num_pages, nblocks);
        nblocks = smgrtruncate(DEFAULT_SMGR, onerel, nblocks);
@@ -2094,11 +2128,11 @@ vacuum_page(Relation onerel, Buffer buffer, VacPage vacpage)
  *
  */
 static void
-scan_index(Relation indrel, int num_tuples)
+scan_index(Relation indrel, long num_tuples)
 {
    RetrieveIndexResult res;
    IndexScanDesc iscan;
-   int         nitups;
+   long        nitups;
    int         nipages;
    struct rusage ru0;
 
@@ -2119,14 +2153,14 @@ scan_index(Relation indrel, int num_tuples)
 
    /* now update statistics in pg_class */
    nipages = RelationGetNumberOfBlocks(indrel);
-   update_relstats(RelationGetRelid(indrel), nipages, nitups, false, NULL);
+   vac_update_relstats(RelationGetRelid(indrel), nipages, nitups, false);
 
-   elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %u. %s",
+   elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %lu. %s",
         RelationGetRelationName(indrel), nipages, nitups,
         show_rusage(&ru0));
 
    if (nitups != num_tuples)
-       elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%u) IS NOT THE SAME AS HEAP' (%u).\
+       elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%lu) IS NOT THE SAME AS HEAP' (%lu).\
 \n\tRecreate the index.",
             RelationGetRelationName(indrel), nitups, num_tuples);
 
@@ -2145,13 +2179,14 @@ scan_index(Relation indrel, int num_tuples)
  *     pg_class.
  */
 static void
-vacuum_index(VacPageList vacpagelist, Relation indrel, int num_tuples, int keep_tuples)
+vacuum_index(VacPageList vacpagelist, Relation indrel,
+            long num_tuples, int keep_tuples)
 {
    RetrieveIndexResult res;
    IndexScanDesc iscan;
    ItemPointer heapptr;
    int         tups_vacuumed;
-   int         num_index_tuples;
+   long        num_index_tuples;
    int         num_pages;
    VacPage     vp;
    struct rusage ru0;
@@ -2196,15 +2231,16 @@ vacuum_index(VacPageList vacpagelist, Relation indrel, int num_tuples, int keep_
 
    /* now update statistics in pg_class */
    num_pages = RelationGetNumberOfBlocks(indrel);
-   update_relstats(RelationGetRelid(indrel), num_pages, num_index_tuples, false, NULL);
+   vac_update_relstats(RelationGetRelid(indrel),
+                       num_pages, num_index_tuples, false);
 
-   elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %u: Deleted %u. %s",
+   elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %lu: Deleted %u. %s",
         RelationGetRelationName(indrel), num_pages,
         num_index_tuples - keep_tuples, tups_vacuumed,
         show_rusage(&ru0));
 
    if (num_index_tuples != num_tuples + keep_tuples)
-       elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%u) IS NOT THE SAME AS HEAP' (%u).\
+       elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%lu) IS NOT THE SAME AS HEAP' (%lu).\
 \n\tRecreate the index.",
          RelationGetRelationName(indrel), num_index_tuples, num_tuples);
 
@@ -2255,7 +2291,7 @@ tid_reaped(ItemPointer itemptr, VacPageList vacpagelist)
 }
 
 /*
- * update_relstats() -- update statistics for one relation
+ * vac_update_relstats() -- update statistics for one relation
  *
  *     Update the whole-relation statistics that are kept in its pg_class
  *     row.  There are additional stats that will be updated if we are
@@ -2268,13 +2304,12 @@ tid_reaped(ItemPointer itemptr, VacPageList vacpagelist)
  *     we updated these tuples in the usual way, vacuuming pg_class itself
  *     wouldn't work very well --- by the time we got done with a vacuum
  *     cycle, most of the tuples in pg_class would've been obsoleted.
- *     Updating pg_class's own statistics would be especially tricky.
  *     Of course, this only works for fixed-size never-null columns, but
  *     these are.
  */
-static void
-update_relstats(Oid relid, int num_pages, int num_tuples, bool hasindex,
-               VRelStats *vacrelstats)
+void
+vac_update_relstats(Oid relid, long num_pages, double num_tuples,
+                   bool hasindex)
 {
    Relation    rd;
    HeapTupleData rtup;


diff --git a/src/backend/executor/nodeSort.c b/src/backend/executor/nodeSort.c

index 12c6f82a8b224c0f773f79c7b53132447467d399..e0543a2810977526886fee0d639ec76cc069463f 100644 (file)


--- a/src/backend/executor/nodeSort.c
+++ b/src/backend/executor/nodeSort.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/executor/nodeSort.c,v 1.32 2001/03/22 06:16:13 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/executor/nodeSort.c,v 1.33 2001/05/07 00:43:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -20,24 +20,24 @@
 #include "utils/tuplesort.h"
 
 /* ----------------------------------------------------------------
- *     FormSortKeys(node)
+ *     ExtractSortKeys
  *
- *     Forms the structure containing information used to sort the relation.
+ *     Extract the sorting key information from the plan node.
  *
- *     Returns an array of ScanKeyData.
+ *     Returns two palloc'd arrays, one of sort operator OIDs and
+ *     one of attribute numbers.
  * ----------------------------------------------------------------
  */
-static ScanKey
-FormSortKeys(Sort *sortnode)
+static void
+ExtractSortKeys(Sort *sortnode,
+               Oid **sortOperators,
+               AttrNumber **attNums)
 {
-   ScanKey     sortkeys;
    List       *targetList;
-   List       *tl;
    int         keycount;
-   Resdom     *resdom;
-   AttrNumber  resno;
-   Index       reskey;
-   Oid         reskeyop;
+   Oid        *sortOps;
+   AttrNumber *attNos;
+   List       *tl;
 
    /*
     * get information from the node
@@ -46,36 +46,33 @@ FormSortKeys(Sort *sortnode)
    keycount = sortnode->keycount;
 
    /*
-    * first allocate space for scan keys
+    * first allocate space for results
     */
    if (keycount <= 0)
-       elog(ERROR, "FormSortKeys: keycount <= 0");
-   sortkeys = (ScanKey) palloc(keycount * sizeof(ScanKeyData));
-   MemSet((char *) sortkeys, 0, keycount * sizeof(ScanKeyData));
+       elog(ERROR, "ExtractSortKeys: keycount <= 0");
+   sortOps = (Oid *) palloc(keycount * sizeof(Oid));
+   MemSet(sortOps, 0, keycount * sizeof(Oid));
+   *sortOperators = sortOps;
+   attNos = (AttrNumber *) palloc(keycount * sizeof(AttrNumber));
+   MemSet(attNos, 0, keycount * sizeof(AttrNumber));
+   *attNums = attNos;
 
    /*
-    * form each scan key from the resdom info in the target list
+    * extract info from the resdom nodes in the target list
     */
    foreach(tl, targetList)
    {
        TargetEntry *target = (TargetEntry *) lfirst(tl);
-
-       resdom = target->resdom;
-       resno = resdom->resno;
-       reskey = resdom->reskey;
-       reskeyop = resdom->reskeyop;
+       Resdom     *resdom = target->resdom;
+       Index       reskey = resdom->reskey;
 
        if (reskey > 0)         /* ignore TLEs that are not sort keys */
        {
-           ScanKeyEntryInitialize(&sortkeys[reskey - 1],
-                                  0x0,
-                                  resno,
-                                  (RegProcedure) reskeyop,
-                                  (Datum) 0);
+           Assert(reskey <= keycount);
+           sortOps[reskey - 1] = resdom->reskeyop;
+           attNos[reskey - 1] = resdom->resno;
        }
    }
-
-   return sortkeys;
 }
 
 /* ----------------------------------------------------------------
@@ -124,8 +121,8 @@ ExecSort(Sort *node)
    {
        Plan       *outerNode;
        TupleDesc   tupDesc;
-       int         keycount;
-       ScanKey     sortkeys;
+       Oid        *sortOperators;
+       AttrNumber *attNums;
 
        SO1_printf("ExecSort: %s\n",
                   "sorting subplan");
@@ -145,14 +142,17 @@ ExecSort(Sort *node)
 
        outerNode = outerPlan((Plan *) node);
        tupDesc = ExecGetTupType(outerNode);
-       keycount = node->keycount;
-       sortkeys = (ScanKey) sortstate->sort_Keys;
 
-       tuplesortstate = tuplesort_begin_heap(tupDesc, keycount, sortkeys,
-                                             true /* randomAccess */ );
+       ExtractSortKeys(node, &sortOperators, &attNums);
 
+       tuplesortstate = tuplesort_begin_heap(tupDesc, node->keycount,
+                                             sortOperators, attNums,
+                                             true /* randomAccess */ );
        sortstate->tuplesortstate = (void *) tuplesortstate;
 
+       pfree(sortOperators);
+       pfree(attNums);
+
        /*
         * Scan the subplan and feed all the tuples to tuplesort.
         */
@@ -230,7 +230,6 @@ ExecInitSort(Sort *node, EState *estate, Plan *parent)
     */
    sortstate = makeNode(SortState);
    sortstate->sort_Done = false;
-   sortstate->sort_Keys = NULL;
    sortstate->tuplesortstate = NULL;
 
    node->sortstate = sortstate;
@@ -258,11 +257,6 @@ ExecInitSort(Sort *node, EState *estate, Plan *parent)
    outerPlan = outerPlan((Plan *) node);
    ExecInitNode(outerPlan, estate, (Plan *) node);
 
-   /*
-    * initialize sortstate information
-    */
-   sortstate->sort_Keys = FormSortKeys(node);
-
    /*
     * initialize tuple type.  no need to initialize projection info
     * because this node doesn't do projections.
@@ -321,9 +315,6 @@ ExecEndSort(Sort *node)
        tuplesort_end((Tuplesortstate *) sortstate->tuplesortstate);
    sortstate->tuplesortstate = NULL;
 
-   if (sortstate->sort_Keys != NULL)
-       pfree(sortstate->sort_Keys);
-
    pfree(sortstate);
    node->sortstate = NULL;
 


diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c

index ad50630931e357a1ca7bae5f806f8cc242062722..ee5a803b8025ac9817834537bb5b4ccd10708527 100644 (file)


--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -15,7 +15,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.140 2001/03/22 06:16:14 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.141 2001/05/07 00:43:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1378,8 +1378,8 @@ _copyRestrictInfo(RestrictInfo *from)
    newnode->left_pathkey = NIL;
    newnode->right_pathkey = NIL;
    newnode->hashjoinoperator = from->hashjoinoperator;
-   newnode->left_dispersion = from->left_dispersion;
-   newnode->right_dispersion = from->right_dispersion;
+   newnode->left_bucketsize = from->left_bucketsize;
+   newnode->right_bucketsize = from->right_bucketsize;
 
    return newnode;
 }
@@ -2209,11 +2209,12 @@ _copyVacuumStmt(VacuumStmt *from)
 {
    VacuumStmt *newnode = makeNode(VacuumStmt);
 
-   newnode->verbose = from->verbose;
+   newnode->vacuum = from->vacuum;
    newnode->analyze = from->analyze;
+   newnode->verbose = from->verbose;
    if (from->vacrel)
        newnode->vacrel = pstrdup(from->vacrel);
-   Node_Copy(from, newnode, va_spec);
+   Node_Copy(from, newnode, va_cols);
 
    return newnode;
 }


diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c

index 06ee63bbacd05398c5445bd4ce4f8dfb169090da..284a534aa966f03a5f69da55e5faa89a96925b1e 100644 (file)


--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -20,7 +20,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.88 2001/03/22 03:59:31 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.89 2001/05/07 00:43:19 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -516,7 +516,7 @@ _equalRestrictInfo(RestrictInfo *a, RestrictInfo *b)
        return false;
 
    /*
-    * ignore eval_cost, left/right_pathkey, and left/right_dispersion,
+    * ignore eval_cost, left/right_pathkey, and left/right_bucketsize,
     * since they may not be set yet, and should be derivable from the
     * clause anyway
     */
@@ -1113,13 +1113,15 @@ _equalDropdbStmt(DropdbStmt *a, DropdbStmt *b)
 static bool
 _equalVacuumStmt(VacuumStmt *a, VacuumStmt *b)
 {
-   if (a->verbose != b->verbose)
+   if (a->vacuum != b->vacuum)
        return false;
    if (a->analyze != b->analyze)
        return false;
+   if (a->verbose != b->verbose)
+       return false;
    if (!equalstr(a->vacrel, b->vacrel))
        return false;
-   if (!equal(a->va_spec, b->va_spec))
+   if (!equal(a->va_cols, b->va_cols))
        return false;
 
    return true;


diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c

index 9a071e7a250df88efe03c183927ffeadfa07a86c..4c0c1b03ef544c60b9161208ceb950a83862419c 100644 (file)


--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.107 2001/03/22 03:59:32 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.108 2001/05/07 00:43:19 tgl Exp $
  *
  * NOTES
  *   Most of the read functions for plan nodes are tested. (In fact, they
@@ -1874,11 +1874,11 @@ _readRestrictInfo(void)
 
    /* eval_cost is not part of saved representation; compute on first use */
    local_node->eval_cost = -1;
-   /* ditto for cached pathkeys and dispersion */
+   /* ditto for cached pathkeys and bucketsize */
    local_node->left_pathkey = NIL;
    local_node->right_pathkey = NIL;
-   local_node->left_dispersion = -1;
-   local_node->right_dispersion = -1;
+   local_node->left_bucketsize = -1;
+   local_node->right_bucketsize = -1;
 
    return local_node;
 }


diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c

index c52af72a16b824c1f37078bb4e185d8a34b22d2b..bdfbbb18186d9c7ef4201fa8eb294bbdb55e298c 100644 (file)


--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -41,7 +41,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.70 2001/04/25 22:04:37 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.71 2001/05/07 00:43:20 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -50,11 +50,15 @@
 
 #include 
 
+#include "catalog/pg_statistic.h"
 #include "executor/nodeHash.h"
 #include "miscadmin.h"
 #include "optimizer/clauses.h"
 #include "optimizer/cost.h"
+#include "optimizer/pathnode.h"
+#include "parser/parsetree.h"
 #include "utils/lsyscache.h"
+#include "utils/syscache.h"
 
 
 /*
@@ -573,7 +577,7 @@ cost_mergejoin(Path *path,
  * 'outer_path' is the path for the outer relation
  * 'inner_path' is the path for the inner relation
  * 'restrictlist' are the RestrictInfo nodes to be applied at the join
- * 'innerdispersion' is an estimate of the dispersion statistic
+ * 'innerbucketsize' is an estimate of the bucketsize statistic
  *             for the inner hash key.
  */
 void
@@ -581,7 +585,7 @@ cost_hashjoin(Path *path,
              Path *outer_path,
              Path *inner_path,
              List *restrictlist,
-             Selectivity innerdispersion)
+             Selectivity innerbucketsize)
 {
    Cost        startup_cost = 0;
    Cost        run_cost = 0;
@@ -607,22 +611,20 @@ cost_hashjoin(Path *path,
 
    /*
     * The number of tuple comparisons needed is the number of outer
-    * tuples times the typical hash bucket size.  nodeHash.c tries for
-    * average bucket loading of NTUP_PER_BUCKET, but that goal will be
-    * reached only if data values are uniformly distributed among the
-    * buckets.  To be conservative, we scale up the target bucket size by
-    * the number of inner rows times inner dispersion, giving an estimate
-    * of the typical number of duplicates of each value. We then charge
-    * one cpu_operator_cost per tuple comparison.
+    * tuples times the typical number of tuples in a hash bucket,
+    * which is the inner relation size times its bucketsize fraction.
+    * We charge one cpu_operator_cost per tuple comparison.
     */
    run_cost += cpu_operator_cost * outer_path->parent->rows *
-       NTUP_PER_BUCKET * ceil(inner_path->parent->rows * innerdispersion);
+       ceil(inner_path->parent->rows * innerbucketsize);
 
    /*
     * Estimate the number of tuples that get through the hashing filter
     * as one per tuple in the two source relations.  This could be a
     * drastic underestimate if there are many equal-keyed tuples in
-    * either relation, but we have no good way of estimating that...
+    * either relation, but we have no simple way of estimating that;
+    * and since this is only a second-order parameter, it's probably
+    * not worth expending a lot of effort on the estimate.
     */
    ntuples = outer_path->parent->rows + inner_path->parent->rows;
 
@@ -651,7 +653,7 @@ cost_hashjoin(Path *path,
    /*
     * Bias against putting larger relation on inside.  We don't want an
     * absolute prohibition, though, since larger relation might have
-    * better dispersion --- and we can't trust the size estimates
+    * better bucketsize --- and we can't trust the size estimates
     * unreservedly, anyway.  Instead, inflate the startup cost by the
     * square root of the size ratio.  (Why square root?  No real good
     * reason, but it seems reasonable...)
@@ -663,6 +665,171 @@ cost_hashjoin(Path *path,
    path->total_cost = startup_cost + run_cost;
 }
 
+/*
+ * Estimate hash bucketsize fraction (ie, number of entries in a bucket
+ * divided by total tuples in relation) if the specified Var is used
+ * as a hash key.
+ *
+ * This statistic is used by cost_hashjoin.  We split out the calculation
+ * because it's useful to cache the result for re-use across multiple path
+ * cost calculations.
+ *
+ * XXX This is really pretty bogus since we're effectively assuming that the
+ * distribution of hash keys will be the same after applying restriction
+ * clauses as it was in the underlying relation.  However, we are not nearly
+ * smart enough to figure out how the restrict clauses might change the
+ * distribution, so this will have to do for now.
+ *
+ * The executor tries for average bucket loading of NTUP_PER_BUCKET by setting
+ * number of buckets equal to ntuples / NTUP_PER_BUCKET, which would yield
+ * a bucketsize fraction of NTUP_PER_BUCKET / ntuples.  But that goal will
+ * be reached only if the data values are uniformly distributed among the
+ * buckets, which requires (a) at least ntuples / NTUP_PER_BUCKET distinct
+ * data values, and (b) a not-too-skewed data distribution.  Otherwise the
+ * buckets will be nonuniformly occupied.  If the other relation in the join
+ * has a similar distribution, the most-loaded buckets are exactly those
+ * that will be probed most often.  Therefore, the "average" bucket size for
+ * costing purposes should really be taken as something close to the "worst
+ * case" bucket size.  We try to estimate this by first scaling up if there
+ * are too few distinct data values, and then scaling up again by the
+ * ratio of the most common value's frequency to the average frequency.
+ *
+ * If no statistics are available, use a default estimate of 0.1.  This will
+ * discourage use of a hash rather strongly if the inner relation is large,
+ * which is what we want.  We do not want to hash unless we know that the
+ * inner rel is well-dispersed (or the alternatives seem much worse).
+ */
+Selectivity
+estimate_hash_bucketsize(Query *root, Var *var)
+{
+   Oid         relid;
+   RelOptInfo *rel;
+   HeapTuple   tuple;
+   Form_pg_statistic stats;
+   double      estfract,
+               ndistinct,
+               needdistinct,
+               mcvfreq,
+               avgfreq;
+   float4     *numbers;
+   int         nnumbers;
+
+   /*
+    * Lookup info about var's relation and attribute;
+    * if none available, return default estimate.
+    */
+   if (!IsA(var, Var))
+       return 0.1;
+
+   relid = getrelid(var->varno, root->rtable);
+   if (relid == InvalidOid)
+       return 0.1;
+
+   rel = get_base_rel(root, var->varno);
+
+   if (rel->tuples <= 0.0 || rel->rows <= 0.0)
+       return 0.1;             /* ensure we can divide below */
+
+   tuple = SearchSysCache(STATRELATT,
+                          ObjectIdGetDatum(relid),
+                          Int16GetDatum(var->varattno),
+                          0, 0);
+   if (!HeapTupleIsValid(tuple))
+   {
+       /*
+        * Perhaps the Var is a system attribute; if so, it will have no
+        * entry in pg_statistic, but we may be able to guess something
+        * about its distribution anyway.
+        */
+       switch (var->varattno)
+       {
+           case ObjectIdAttributeNumber:
+           case SelfItemPointerAttributeNumber:
+               /* these are unique, so buckets should be well-distributed */
+               return (double) NTUP_PER_BUCKET / rel->rows;
+           case TableOidAttributeNumber:
+               /* hashing this is a terrible idea... */
+               return 1.0;
+       }
+       return 0.1;
+   }
+   stats = (Form_pg_statistic) GETSTRUCT(tuple);
+
+   /*
+    * Obtain number of distinct data values in raw relation.
+    */
+   ndistinct = stats->stadistinct;
+   if (ndistinct < 0.0)
+       ndistinct = -ndistinct * rel->tuples;
+
+   /*
+    * Adjust ndistinct to account for restriction clauses.  Observe we are
+    * assuming that the data distribution is affected uniformly by the
+    * restriction clauses!
+    *
+    * XXX Possibly better way, but much more expensive: multiply by
+    * selectivity of rel's restriction clauses that mention the target Var.
+    */
+   ndistinct *= rel->rows / rel->tuples;
+
+   /*
+    * Discourage use of hash join if there seem not to be very many distinct
+    * data values.  The threshold here is somewhat arbitrary, as is the
+    * fraction used to "discourage" the choice.
+    */
+   if (ndistinct < 50.0)
+   {
+       ReleaseSysCache(tuple);
+       return 0.5;
+   }
+
+   /*
+    * Form initial estimate of bucketsize fraction.  Here we use rel->rows,
+    * ie the number of rows after applying restriction clauses, because
+    * that's what the fraction will eventually be multiplied by in
+    * cost_heapjoin.
+    */
+   estfract = (double) NTUP_PER_BUCKET / rel->rows;
+
+   /*
+    * Adjust estimated bucketsize if too few distinct values to fill
+    * all the buckets.
+    */
+   needdistinct = rel->rows / (double) NTUP_PER_BUCKET;
+   if (ndistinct < needdistinct)
+       estfract *= needdistinct / ndistinct;
+
+   /*
+    * Look up the frequency of the most common value, if available.
+    */
+   mcvfreq = 0.0;
+
+   if (get_attstatsslot(tuple, var->vartype, var->vartypmod,
+                        STATISTIC_KIND_MCV, InvalidOid,
+                        NULL, NULL, &numbers, &nnumbers))
+   {
+       /*
+        * The first MCV stat is for the most common value.
+        */
+       if (nnumbers > 0)
+           mcvfreq = numbers[0];
+       free_attstatsslot(var->vartype, NULL, 0,
+                         numbers, nnumbers);
+   }
+
+   /*
+    * Adjust estimated bucketsize upward to account for skewed distribution.
+    */
+   avgfreq = (1.0 - stats->stanullfrac) / ndistinct;
+
+   if (avgfreq > 0.0 && mcvfreq > avgfreq)
+       estfract *= mcvfreq / avgfreq;
+
+   ReleaseSysCache(tuple);
+
+   return (Selectivity) estfract;
+}
+
 
 /*
  * cost_qual_eval


diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c

index d41336ddcee0f9c26ad9a2ab0b1410a1f0ae38c7..cd7cabd41deb7bf52b323b437d847eede311b8cc 100644 (file)


--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -8,15 +8,15 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.63 2001/04/15 00:48:17 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.64 2001/05/07 00:43:20 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
+#include "postgres.h"
+
 #include 
 #include 
 
-#include "postgres.h"
-
 #include "optimizer/clauses.h"
 #include "optimizer/cost.h"
 #include "optimizer/pathnode.h"
@@ -45,7 +45,6 @@ static void hash_inner_and_outer(Query *root, RelOptInfo *joinrel,
                     List *restrictlist, JoinType jointype);
 static Path *best_innerjoin(List *join_paths, List *outer_relid,
               JoinType jointype);
-static Selectivity estimate_dispersion(Query *root, Var *var);
 static List *select_mergejoin_clauses(RelOptInfo *joinrel,
                         RelOptInfo *outerrel,
                         RelOptInfo *innerrel,
@@ -722,7 +721,7 @@ hash_inner_and_outer(Query *root,
        Expr       *clause;
        Var        *left,
                   *right;
-       Selectivity innerdispersion;
+       Selectivity innerbucketsize;
        List       *hashclauses;
 
        if (restrictinfo->hashjoinoperator == InvalidOid)
@@ -742,34 +741,34 @@ hash_inner_and_outer(Query *root,
 
        /*
         * Check if clause is usable with these sub-rels, find inner side,
-        * estimate dispersion of inner var for costing purposes.
+        * estimate bucketsize of inner var for costing purposes.
         *
         * Since we tend to visit the same clauses over and over when
-        * planning a large query, we cache the dispersion estimates in
+        * planning a large query, we cache the bucketsize estimates in
         * the RestrictInfo node to avoid repeated lookups of statistics.
         */
        if (intMember(left->varno, outerrelids) &&
            intMember(right->varno, innerrelids))
        {
            /* righthand side is inner */
-           innerdispersion = restrictinfo->right_dispersion;
-           if (innerdispersion < 0)
+           innerbucketsize = restrictinfo->right_bucketsize;
+           if (innerbucketsize < 0)
            {
                /* not cached yet */
-               innerdispersion = estimate_dispersion(root, right);
-               restrictinfo->right_dispersion = innerdispersion;
+               innerbucketsize = estimate_hash_bucketsize(root, right);
+               restrictinfo->right_bucketsize = innerbucketsize;
            }
        }
        else if (intMember(left->varno, innerrelids) &&
                 intMember(right->varno, outerrelids))
        {
            /* lefthand side is inner */
-           innerdispersion = restrictinfo->left_dispersion;
-           if (innerdispersion < 0)
+           innerbucketsize = restrictinfo->left_bucketsize;
+           if (innerbucketsize < 0)
            {
                /* not cached yet */
-               innerdispersion = estimate_dispersion(root, left);
-               restrictinfo->left_dispersion = innerdispersion;
+               innerbucketsize = estimate_hash_bucketsize(root, left);
+               restrictinfo->left_bucketsize = innerbucketsize;
            }
        }
        else
@@ -790,7 +789,7 @@ hash_inner_and_outer(Query *root,
                                      innerrel->cheapest_total_path,
                                      restrictlist,
                                      hashclauses,
-                                     innerdispersion));
+                                     innerbucketsize));
        if (outerrel->cheapest_startup_path != outerrel->cheapest_total_path)
            add_path(joinrel, (Path *)
                     create_hashjoin_path(joinrel,
@@ -799,7 +798,7 @@ hash_inner_and_outer(Query *root,
                                          innerrel->cheapest_total_path,
                                          restrictlist,
                                          hashclauses,
-                                         innerdispersion));
+                                         innerbucketsize));
    }
 }
 
@@ -866,31 +865,6 @@ best_innerjoin(List *join_paths, Relids outer_relids, JoinType jointype)
    return cheapest;
 }
 
-/*
- * Estimate dispersion of the specified Var
- *
- * We use a default of 0.1 if we can't figure out anything better.
- * This will typically discourage use of a hash rather strongly,
- * if the inner relation is large. We do not want to hash unless
- * we know that the inner rel is well-dispersed (or the alternatives
- * seem much worse).
- */
-static Selectivity
-estimate_dispersion(Query *root, Var *var)
-{
-   Oid         relid;
-
-   if (!IsA(var, Var))
-       return 0.1;
-
-   relid = getrelid(var->varno, root->rtable);
-
-   if (relid == InvalidOid)
-       return 0.1;
-
-   return (Selectivity) get_attdispersion(relid, var->varattno, 0.1);
-}
-
 /*
  * select_mergejoin_clauses
  *   Select mergejoin clauses that are usable for a particular join.


diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c

index 8c3b00289d3e7d467aeb03dcc1b53eb02f5a3a2b..2d264c46881730ba4ace2ade745fe6942c9d49fb 100644 (file)


--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -10,14 +10,14 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.104 2001/03/22 03:59:36 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.105 2001/05/07 00:43:20 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
-#include 
-
 #include "postgres.h"
 
+#include 
+
 #include "catalog/pg_index.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
@@ -1484,9 +1484,9 @@ make_sort_from_pathkeys(List *tlist, Plan *lefttree, List *pathkeys)
         */
        if (resdom->reskey == 0)
        {
-           /* OK, mark it as a sort key and set the sort operator regproc */
+           /* OK, mark it as a sort key and set the sort operator */
            resdom->reskey = ++numsortkeys;
-           resdom->reskeyop = get_opcode(pathkey->sortop);
+           resdom->reskeyop = pathkey->sortop;
        }
    }
 


diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c

index 7c3e15a8f88d81b206e4d3f618eae9658294ad6a..5d67e02dacb44bce678665c592ab184f588469a5 100644 (file)


--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -8,13 +8,14 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.59 2001/04/16 19:44:10 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.60 2001/05/07 00:43:21 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
+#include "postgres.h"
+
 #include 
 
-#include "postgres.h"
 #include "catalog/pg_operator.h"
 #include "catalog/pg_type.h"
 #include "nodes/makefuncs.h"
@@ -348,8 +349,8 @@ distribute_qual_to_rels(Query *root, Node *clause,
    restrictinfo->left_pathkey = NIL;   /* not computable yet */
    restrictinfo->right_pathkey = NIL;
    restrictinfo->hashjoinoperator = InvalidOid;
-   restrictinfo->left_dispersion = -1; /* not computed until needed */
-   restrictinfo->right_dispersion = -1;
+   restrictinfo->left_bucketsize = -1; /* not computed until needed */
+   restrictinfo->right_bucketsize = -1;
 
    /*
     * Retrieve all relids and vars contained within the clause.


diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c

index b2ab4600209dd566fd281c5110f0e1f6ba5c1cb1..0aba4808c160f3bf5ba3a9cc3fd2c6cf26fa2fa3 100644 (file)


--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.105 2001/04/30 19:24:47 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.106 2001/05/07 00:43:21 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1367,7 +1367,7 @@ make_groupplan(List *group_tlist,
            {
                /* OK, insert the ordering info needed by the executor. */
                resdom->reskey = ++keyno;
-               resdom->reskeyop = get_opcode(grpcl->sortop);
+               resdom->reskeyop = grpcl->sortop;
            }
        }
 
@@ -1412,7 +1412,7 @@ make_sortplan(List *tlist, Plan *plannode, List *sortcls)
        {
            /* OK, insert the ordering info needed by the executor. */
            resdom->reskey = ++keyno;
-           resdom->reskeyop = get_opcode(sortcl->sortop);
+           resdom->reskeyop = sortcl->sortop;
        }
    }
 


diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c

index 0b173466cf98061a3add13f850ba9e750dd9f4e0..ede4159d9707629729b5dffbc32f241f48629e72 100644 (file)


--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -14,7 +14,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v 1.62 2001/03/27 18:02:19 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v 1.63 2001/05/07 00:43:22 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -682,8 +682,8 @@ adjust_inherited_attrs_mutator(Node *node,
        newinfo->eval_cost = -1;        /* reset this too */
        newinfo->left_pathkey = NIL;    /* and these */
        newinfo->right_pathkey = NIL;
-       newinfo->left_dispersion = -1;
-       newinfo->right_dispersion = -1;
+       newinfo->left_bucketsize = -1;
+       newinfo->right_bucketsize = -1;
 
        return (Node *) newinfo;
    }


diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c

index cfba3ee395f2e0216f74c1e2497a7a8f5897d74b..407c132b4f7a6388b093806fd3eb01286906e084 100644 (file)


--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -8,14 +8,14 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.71 2001/03/22 03:59:39 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.72 2001/05/07 00:43:22 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
-#include 
-
 #include "postgres.h"
 
+#include 
+
 #include "nodes/plannodes.h"
 #include "optimizer/cost.h"
 #include "optimizer/pathnode.h"
@@ -559,7 +559,7 @@ create_mergejoin_path(RelOptInfo *joinrel,
  * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
  * 'hashclauses' is a list of the hash join clause (always a 1-element list)
  *     (this should be a subset of the restrict_clauses list)
- * 'innerdispersion' is an estimate of the dispersion of the inner hash key
+ * 'innerbucketsize' is an estimate of the bucketsize of the inner hash key
  *
  */
 HashPath   *
@@ -569,7 +569,7 @@ create_hashjoin_path(RelOptInfo *joinrel,
                     Path *inner_path,
                     List *restrict_clauses,
                     List *hashclauses,
-                    Selectivity innerdispersion)
+                    Selectivity innerbucketsize)
 {
    HashPath   *pathnode = makeNode(HashPath);
 
@@ -587,7 +587,7 @@ create_hashjoin_path(RelOptInfo *joinrel,
                  outer_path,
                  inner_path,
                  restrict_clauses,
-                 innerdispersion);
+                 innerbucketsize);
 
    return pathnode;
 }


diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c

index 4f711df203c846acf4402ed131def54dbbf94443..ee3523553e8693ac1b7762d01ebbabc3697a4d7a 100644 (file)


--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -9,11 +9,10 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.64 2001/03/22 03:59:40 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.65 2001/05/07 00:43:22 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
-
 #include "postgres.h"
 
 #include 


diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c

index 4687a5599623d09b416357721488369cc8eaaa38..76cc095bc4edcdbf4cfecad9627a1e5a29d03256 100644 (file)


--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Header: /cvsroot/pgsql/src/backend/parser/analyze.c,v 1.183 2001/03/22 06:16:15 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/parser/analyze.c,v 1.184 2001/05/07 00:43:22 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -2660,7 +2660,7 @@ transformForUpdate(Query *qry, List *forUpdate)
        /* just the named tables */
        foreach(l, forUpdate)
        {
-           char       *relname = lfirst(l);
+           char       *relname = strVal(lfirst(l));
 
            i = 0;
            foreach(rt, qry->rtable)


diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y

index bed0ce239a42e2f75c48bdda8aff299cb2f02f9b..40c379aca51f280882945b9f5caf4aaeccc4475f 100644 (file)


--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -11,7 +11,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.222 2001/05/01 01:36:10 thomas Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.223 2001/05/07 00:43:23 tgl Exp $
  *
  * HISTORY
  *   AUTHOR            DATE            MAJOR EVENT
@@ -104,7 +104,6 @@ static void doNegateFloat(Value *v);
    char                *str;
    bool                boolean;
    JoinType            jtype;
-   InhOption           inhOpt;
    List                *list;
    Node                *node;
    Value               *value;
@@ -130,6 +129,7 @@ static void doNegateFloat(Value *v);
 
 %type    stmt,
        AlterGroupStmt, AlterSchemaStmt, AlterTableStmt, AlterUserStmt,
+       AnalyzeStmt,
        ClosePortalStmt, ClusterStmt, CommentStmt, ConstraintsSetStmt,
        CopyStmt, CreateAsStmt, CreateGroupStmt, CreatePLangStmt,
        CreateSchemaStmt, CreateSeqStmt, CreateStmt, CreateTrigStmt,
@@ -147,7 +147,7 @@ static void doNegateFloat(Value *v);
 %type    select_no_parens, select_with_parens, select_clause,
                simple_select
 
-%type     alter_column_action
+%type     alter_column_default
 %type     drop_behavior
 
 %type    createdb_opt_list, createdb_opt_item
@@ -185,7 +185,7 @@ static void doNegateFloat(Value *v);
        OptTableElementList, OptInherit, definition, opt_distinct,
        opt_with, func_args, func_args_list, func_as,
        oper_argtypes, RuleActionList, RuleActionMulti,
-       opt_column_list, columnList, opt_va_list, va_list,
+       opt_column_list, columnList, opt_name_list,
        sort_clause, sortby_list, index_params, index_list, name_list,
        from_clause, from_list, opt_array_bounds,
        expr_list, attrs, target_list, update_target_list,
@@ -210,9 +210,7 @@ static void doNegateFloat(Value *v);
 %type    substr_from, substr_for
 
 %type     opt_binary, opt_using, opt_instead, opt_cursor
-%type     opt_with_copy, index_opt_unique, opt_verbose, opt_analyze
-
-%type  opt_inh_star, opt_only
+%type     opt_with_copy, index_opt_unique, opt_verbose, analyze_keyword
 
 %type    copy_dirn, direction, reindex_type, drop_type,
        opt_column, event, comment_type, comment_cl,
@@ -350,7 +348,8 @@ static void doNegateFloat(Value *v);
        NEW, NOCREATEDB, NOCREATEUSER, NONE, NOTHING, NOTIFY, NOTNULL,
        OFFSET, OIDS, OPERATOR, OWNER, PASSWORD, PROCEDURAL,
        REINDEX, RENAME, RESET, RETURNS, ROW, RULE,
-       SEQUENCE, SERIAL, SETOF, SHARE, SHOW, START, STATEMENT, STDIN, STDOUT, SYSID,
+       SEQUENCE, SERIAL, SETOF, SHARE, SHOW, START, STATEMENT,
+       STATISTICS, STDIN, STDOUT, SYSID,
        TEMP, TEMPLATE, TOAST, TRUNCATE, TRUSTED, 
        UNLISTEN, UNTIL, VACUUM, VALID, VERBOSE, VERSION
 
@@ -470,6 +469,7 @@ stmt :  AlterSchemaStmt
        | CreatedbStmt
        | DropdbStmt
        | VacuumStmt
+       | AnalyzeStmt
        | VariableSetStmt
        | VariableShowStmt
        | VariableResetStmt
@@ -938,57 +938,68 @@ CheckPointStmt: CHECKPOINT
  *****************************************************************************/
 
 AlterTableStmt:
-/* ALTER TABLE <name> ADD [COLUMN]  */
-       ALTER TABLE relation_name opt_inh_star ADD opt_column columnDef
+/* ALTER TABLE <relation> ADD [COLUMN]  */
+       ALTER TABLE relation_expr ADD opt_column columnDef
                {
                    AlterTableStmt *n = makeNode(AlterTableStmt);
                    n->subtype = 'A';
-                   n->relname = $3;
-                   n->inhOpt = $4;
-                   n->def = $7;
+                   n->relname = $3->relname;
+                   n->inhOpt = $3->inhOpt;
+                   n->def = $6;
                    $$ = (Node *)n;
                }
-/* ALTER TABLE <name> ALTER [COLUMN]  {SET DEFAULT |DROP DEFAULT} */
-       | ALTER TABLE relation_name opt_inh_star ALTER opt_column ColId alter_column_action
+/* ALTER TABLE <relation> ALTER [COLUMN]  {SET DEFAULT |DROP DEFAULT} */
+       | ALTER TABLE relation_expr ALTER opt_column ColId alter_column_default
                {
                    AlterTableStmt *n = makeNode(AlterTableStmt);
                    n->subtype = 'T';
-                   n->relname = $3;
-                   n->inhOpt = $4;
-                   n->name = $7;
-                   n->def = $8;
+                   n->relname = $3->relname;
+                   n->inhOpt = $3->inhOpt;
+                   n->name = $6;
+                   n->def = $7;
                    $$ = (Node *)n;
                }
-/* ALTER TABLE  DROP [COLUMN]  {RESTRICT|CASCADE} */
-       | ALTER TABLE relation_name opt_inh_star DROP opt_column ColId drop_behavior
+/* ALTER TABLE  ALTER [COLUMN]  SET STATISTICS  */
+       | ALTER TABLE relation_expr ALTER opt_column ColId SET STATISTICS Iconst
+               {
+                   AlterTableStmt *n = makeNode(AlterTableStmt);
+                   n->subtype = 'S';
+                   n->relname = $3->relname;
+                   n->inhOpt = $3->inhOpt;
+                   n->name = $6;
+                   n->def = (Node *) makeInteger($9);
+                   $$ = (Node *)n;
+               }
+/* ALTER TABLE  DROP [COLUMN]  {RESTRICT|CASCADE} */
+       | ALTER TABLE relation_expr DROP opt_column ColId drop_behavior
                {
                    AlterTableStmt *n = makeNode(AlterTableStmt);
                    n->subtype = 'D';
-                   n->relname = $3;
-                   n->inhOpt = $4;
-                   n->name = $7;
-                   n->behavior = $8;
+                   n->relname = $3->relname;
+                   n->inhOpt = $3->inhOpt;
+                   n->name = $6;
+                   n->behavior = $7;
                    $$ = (Node *)n;
                }
-/* ALTER TABLE <name> ADD CONSTRAINT ... */
-       | ALTER TABLE relation_name opt_inh_star ADD TableConstraint
+/* ALTER TABLE <relation> ADD CONSTRAINT ... */
+       | ALTER TABLE relation_expr ADD TableConstraint
                {
                    AlterTableStmt *n = makeNode(AlterTableStmt);
                    n->subtype = 'C';
-                   n->relname = $3;
-                   n->inhOpt = $4;
-                   n->def = $6;
+                   n->relname = $3->relname;
+                   n->inhOpt = $3->inhOpt;
+                   n->def = $5;
                    $$ = (Node *)n;
                }
-/* ALTER TABLE <name> DROP CONSTRAINT  {RESTRICT|CASCADE} */
-       | ALTER TABLE relation_name opt_inh_star DROP CONSTRAINT name drop_behavior
+/* ALTER TABLE <relation> DROP CONSTRAINT  {RESTRICT|CASCADE} */
+       | ALTER TABLE relation_expr DROP CONSTRAINT name drop_behavior
                {
                    AlterTableStmt *n = makeNode(AlterTableStmt);
                    n->subtype = 'X';
-                   n->relname = $3;
-                   n->inhOpt = $4;
-                   n->name = $7;
-                   n->behavior = $8;
+                   n->relname = $3->relname;
+                   n->inhOpt = $3->inhOpt;
+                   n->name = $6;
+                   n->behavior = $7;
                    $$ = (Node *)n;
                }
 /* ALTER TABLE  CREATE TOAST TABLE */
@@ -997,6 +1008,7 @@ AlterTableStmt:
                    AlterTableStmt *n = makeNode(AlterTableStmt);
                    n->subtype = 'E';
                    n->relname = $3;
+                   n->inhOpt = INH_NO;
                    $$ = (Node *)n;
                }
 /* ALTER TABLE  OWNER TO UserId */
@@ -1005,12 +1017,13 @@ AlterTableStmt:
                    AlterTableStmt *n = makeNode(AlterTableStmt);
                    n->subtype = 'U';
                    n->relname = $3;
+                   n->inhOpt = INH_NO;
                    n->name = $6;
                    $$ = (Node *)n;
                }
        ;
 
-alter_column_action:
+alter_column_default:
        SET DEFAULT a_expr
            {
                /* Treat SET DEFAULT NULL the same as DROP DEFAULT */
@@ -1478,10 +1491,6 @@ key_reference:  NO ACTION                { $$ = FKCONSTR_ON_KEY_NOACTION; }
        | SET DEFAULT                   { $$ = FKCONSTR_ON_KEY_SETDEFAULT; }
        ;
 
-opt_only: ONLY                                 { $$ = INH_NO; }
-        | /*EMPTY*/                                { $$ = INH_DEFAULT; } 
-       ;
-
 OptInherit:  INHERITS '(' relation_name_list ')'   { $$ = $3; }
        | /*EMPTY*/                                 { $$ = NIL; }
        ;
@@ -2598,14 +2607,13 @@ opt_force:  FORCE                                   {  $$ = TRUE; }
  *
  *****************************************************************************/
 
-RenameStmt:  ALTER TABLE relation_name opt_inh_star
-                 RENAME opt_column opt_name TO name
+RenameStmt:  ALTER TABLE relation_expr RENAME opt_column opt_name TO name
                {
                    RenameStmt *n = makeNode(RenameStmt);
-                   n->relname = $3;
-                   n->inhOpt = $4;
-                   n->column = $7;
-                   n->newname = $9;
+                   n->relname = $3->relname;
+                   n->inhOpt = $3->inhOpt;
+                   n->column = $6;
+                   n->newname = $8;
                    $$ = (Node *)n;
                }
        ;
@@ -2994,49 +3002,71 @@ ClusterStmt:  CLUSTER index_name ON relation_name
  *
  *     QUERY:
  *             vacuum
+ *             analyze
  *
  *****************************************************************************/
 
-VacuumStmt:  VACUUM opt_verbose opt_analyze
+VacuumStmt:  VACUUM opt_verbose
                {
                    VacuumStmt *n = makeNode(VacuumStmt);
+                   n->vacuum = true;
+                   n->analyze = false;
                    n->verbose = $2;
-                   n->analyze = $3;
                    n->vacrel = NULL;
-                   n->va_spec = NIL;
+                   n->va_cols = NIL;
                    $$ = (Node *)n;
                }
-       | VACUUM opt_verbose opt_analyze relation_name opt_va_list
+       | VACUUM opt_verbose relation_name
                {
                    VacuumStmt *n = makeNode(VacuumStmt);
+                   n->vacuum = true;
+                   n->analyze = false;
                    n->verbose = $2;
-                   n->analyze = $3;
-                   n->vacrel = $4;
-                   n->va_spec = $5;
-                   if ( $5 != NIL && !$4 )
-                       elog(ERROR,"VACUUM syntax error at or near \"(\""
-                           "\n\tRelation name must be specified");
+                   n->vacrel = $3;
+                   n->va_cols = NIL;
+                   $$ = (Node *)n;
+               }
+       | VACUUM opt_verbose AnalyzeStmt
+               {
+                   VacuumStmt *n = (VacuumStmt *) $3;
+                   n->vacuum = true;
+                   n->verbose |= $2;
                    $$ = (Node *)n;
                }
        ;
 
-opt_verbose:  VERBOSE                          { $$ = TRUE; }
-       | /*EMPTY*/                             { $$ = FALSE; }
+AnalyzeStmt:  analyze_keyword opt_verbose
+               {
+                   VacuumStmt *n = makeNode(VacuumStmt);
+                   n->vacuum = false;
+                   n->analyze = true;
+                   n->verbose = $2;
+                   n->vacrel = NULL;
+                   n->va_cols = NIL;
+                   $$ = (Node *)n;
+               }
+       | analyze_keyword opt_verbose relation_name opt_name_list
+               {
+                   VacuumStmt *n = makeNode(VacuumStmt);
+                   n->vacuum = false;
+                   n->analyze = true;
+                   n->verbose = $2;
+                   n->vacrel = $3;
+                   n->va_cols = $4;
+                   $$ = (Node *)n;
+               }
        ;
 
-opt_analyze:  ANALYZE                          { $$ = TRUE; }
+analyze_keyword:  ANALYZE                      { $$ = TRUE; }
        |     ANALYSE /* British */             { $$ = TRUE; }
-       | /*EMPTY*/                             { $$ = FALSE; }
        ;
 
-opt_va_list:  '(' va_list ')'                  { $$ = $2; }
-       | /*EMPTY*/                             { $$ = NIL; }
+opt_verbose:  VERBOSE                          { $$ = TRUE; }
+       | /*EMPTY*/                             { $$ = FALSE; }
        ;
 
-va_list:  name
-               { $$ = makeList1($1); }
-       | va_list ',' name
-               { $$ = lappend($1, $3); }
+opt_name_list:  '(' name_list ')'              { $$ = $2; }
+       | /*EMPTY*/                             { $$ = NIL; }
        ;
 
 
@@ -3160,12 +3190,12 @@ columnElem:  ColId opt_indirection
  *
  *****************************************************************************/
 
-DeleteStmt:  DELETE FROM opt_only relation_name where_clause
+DeleteStmt:  DELETE FROM relation_expr where_clause
                {
                    DeleteStmt *n = makeNode(DeleteStmt);
-                   n->inhOpt = $3;
-                   n->relname = $4;
-                   n->whereClause = $5;
+                   n->relname = $3->relname;
+                   n->inhOpt = $3->inhOpt;
+                   n->whereClause = $4;
                    $$ = (Node *)n;
                }
        ;
@@ -3202,17 +3232,17 @@ opt_lmode:  SHARE               { $$ = TRUE; }
  *
  *****************************************************************************/
 
-UpdateStmt:  UPDATE opt_only relation_name
+UpdateStmt:  UPDATE relation_expr
              SET update_target_list
              from_clause
              where_clause
                {
                    UpdateStmt *n = makeNode(UpdateStmt);
-                   n->inhOpt = $2;
-                   n->relname = $3;
-                   n->targetList = $5;
-                   n->fromClause = $6;
-                   n->whereClause = $7;
+                   n->relname = $2->relname;
+                   n->inhOpt = $2->inhOpt;
+                   n->targetList = $4;
+                   n->fromClause = $5;
+                   n->whereClause = $6;
                    $$ = (Node *)n;
                }
        ;
@@ -3545,10 +3575,6 @@ select_offset_value: Iconst
  * ...however, recursive addattr and rename supported.  make special
  * cases for these.
  */
-opt_inh_star:  '*'                             { $$ = INH_YES; }
-       | /*EMPTY*/                             { $$ = INH_DEFAULT; }
-       ;
-
 relation_name_list:  name_list;
 
 name_list:  name
@@ -3576,7 +3602,7 @@ opt_for_update_clause:    for_update_clause       { $$ = $1; }
        | /* EMPTY */                           { $$ = NULL; }
        ;
 
-update_list:  OF va_list                       { $$ = $2; }
+update_list:  OF name_list                     { $$ = $2; }
        | /* EMPTY */                           { $$ = makeList1(NULL); }
        ;
 
@@ -5525,6 +5551,7 @@ TokenId:  ABSOLUTE                        { $$ = "absolute"; }
        | SHARE                         { $$ = "share"; }
        | START                         { $$ = "start"; }
        | STATEMENT                     { $$ = "statement"; }
+       | STATISTICS                    { $$ = "statistics"; }
        | STDIN                         { $$ = "stdin"; }
        | STDOUT                        { $$ = "stdout"; }
        | SYSID                         { $$ = "sysid"; }


diff --git a/src/backend/parser/keywords.c b/src/backend/parser/keywords.c

index 402dbfd28ca561a2c9d9ba513e7986dda06ec7df..8ab19f86ae8582213730311845cdbdcae0977f18 100644 (file)


--- a/src/backend/parser/keywords.c
+++ b/src/backend/parser/keywords.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/parser/keywords.c,v 1.90 2001/03/22 03:59:40 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/parser/keywords.c,v 1.91 2001/05/07 00:43:23 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -238,6 +238,7 @@ static ScanKeyword ScanKeywords[] = {
    {"some", SOME},
    {"start", START},
    {"statement", STATEMENT},
+   {"statistics", STATISTICS},
    {"stdin", STDIN},
    {"stdout", STDOUT},
    {"substring", SUBSTRING},


diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c

index f5324cb37355532ef4233a335bc40fb5e5eb635e..e1d49842fd2398a3338bf8fb8329c7ca0677a2fe 100644 (file)


--- a/src/backend/parser/parse_relation.c
+++ b/src/backend/parser/parse_relation.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/parser/parse_relation.c,v 1.54 2001/04/18 17:04:24 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/parser/parse_relation.c,v 1.55 2001/05/07 00:43:23 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -75,7 +75,7 @@ static struct
    }
 };
 
-#define SPECIALS ((int) (sizeof(special_attr)/sizeof(special_attr[0])))
+#define SPECIALS ((int) lengthof(special_attr))
 
 
 /*
@@ -670,7 +670,7 @@ isForUpdate(ParseState *pstate, char *relname)
 
                foreach(l, pstate->p_forUpdate)
                {
-                   char       *rname = lfirst(l);
+                   char       *rname = strVal(lfirst(l));
 
                    if (strcmp(relname, rname) == 0)
                        return true;
@@ -1020,20 +1020,6 @@ attnameIsSet(Relation rd, char *name)
 
 #endif
 
-#ifdef NOT_USED
-/*
- * This should only be used if the relation is already
- * heap_open()'ed.  Use the cache version
- * for access to non-opened relations.
- */
-int
-attnumAttNelems(Relation rd, int attid)
-{
-   return rd->rd_att->attrs[attid - 1]->attnelems;
-}
-
-#endif
-
 /* given attribute id, return type of that attribute */
 /*
  * This should only be used if the relation is already


diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c

index ae6cd20a5db3838c76a6f006232a8f04e5d4a800..b616f7e68ef875a0774de3f270c4cf98aa3dcc94 100644 (file)


--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -10,7 +10,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/tcop/utility.c,v 1.109 2001/03/22 06:16:17 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/tcop/utility.c,v 1.110 2001/05/07 00:43:23 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -427,13 +427,19 @@ ProcessUtility(Node *parsetree,
                                        interpretInhOption(stmt->inhOpt),
                                            (ColumnDef *) stmt->def);
                        break;
-                   case 'T':   /* ALTER COLUMN */
-                       AlterTableAlterColumn(stmt->relname,
+                   case 'T':   /* ALTER COLUMN DEFAULT */
+                       AlterTableAlterColumnDefault(stmt->relname,
                                        interpretInhOption(stmt->inhOpt),
-                                             stmt->name,
-                                             stmt->def);
+                                                    stmt->name,
+                                                    stmt->def);
                        break;
-                   case 'D':   /* ALTER DROP */
+                   case 'S':   /* ALTER COLUMN STATISTICS */
+                       AlterTableAlterColumnStatistics(stmt->relname,
+                                       interpretInhOption(stmt->inhOpt),
+                                                       stmt->name,
+                                                       stmt->def);
+                       break;
+                   case 'D':   /* DROP COLUMN */
                        AlterTableDropColumn(stmt->relname,
                                        interpretInhOption(stmt->inhOpt),
                                             stmt->name,
@@ -703,12 +709,13 @@ ProcessUtility(Node *parsetree,
            break;
 
        case T_VacuumStmt:
-           set_ps_display(commandTag = "VACUUM");
+           if (((VacuumStmt *) parsetree)->vacuum)
+               commandTag = "VACUUM";
+           else
+               commandTag = "ANALYZE";
+           set_ps_display(commandTag);
 
-           vacuum(((VacuumStmt *) parsetree)->vacrel,
-                  ((VacuumStmt *) parsetree)->verbose,
-                  ((VacuumStmt *) parsetree)->analyze,
-                  ((VacuumStmt *) parsetree)->va_spec);
+           vacuum((VacuumStmt *) parsetree);
            break;
 
        case T_ExplainStmt:


diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c

index 1fe0afb0a35b44ad34e76fbb73439194a73690ad..41ba82db7b574d6ba6d095a25092376d04702250 100644 (file)


--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -15,7 +15,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.87 2001/03/23 04:49:54 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.88 2001/05/07 00:43:23 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -57,9 +57,6 @@
 /* default selectivity estimate for pattern-match operators such as LIKE */
 #define DEFAULT_MATCH_SEL  0.01
 
-/* "fudge factor" for estimating frequency of not-most-common values */
-#define NOT_MOST_COMMON_RATIO  0.1
-
 static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
                  Datum lobound, Datum hibound, Oid boundstypid,
                  double *scaledlobound, double *scaledhibound);
@@ -75,17 +72,9 @@ static double convert_one_string_to_scalar(unsigned char *value,
 static unsigned char *convert_string_datum(Datum value, Oid typid);
 static double convert_timevalue_to_scalar(Datum value, Oid typid);
 static void getattproperties(Oid relid, AttrNumber attnum,
-                Oid *typid,
-                int *typlen,
-                bool *typbyval,
-                int32 *typmod);
-static bool getattstatistics(Oid relid, AttrNumber attnum,
-                Oid typid, int32 typmod,
-                double *nullfrac,
-                double *commonfrac,
-                Datum *commonval,
-                Datum *loval,
-                Datum *hival);
+                            Oid *typid, int32 *typmod);
+static double get_att_numdistinct(Oid relid, AttrNumber attnum, Oid typid,
+                                 Form_pg_statistic stats);
 static Selectivity prefix_selectivity(char *prefix,
                   Oid relid,
                   AttrNumber attno,
@@ -115,134 +104,173 @@ eqsel(PG_FUNCTION_ARGS)
    AttrNumber  attno = PG_GETARG_INT16(2);
    Datum       value = PG_GETARG_DATUM(3);
    int32       flag = PG_GETARG_INT32(4);
-   float8      result;
-
-   if (NONVALUE(attno) || NONVALUE(relid))
-       result = DEFAULT_EQ_SEL;
-   else
+   Oid         typid;
+   int32       typmod;
+   HeapTuple   statsTuple;
+   Datum      *values;
+   int         nvalues;
+   float4     *numbers;
+   int         nnumbers;
+   double      selec;
+
+   if (NONVALUE(relid) || NONVALUE(attno))
+       PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
+
+   /* get info about the attribute */
+   getattproperties(relid, attno, &typid, &typmod);
+
+   /* get stats for the attribute, if available */
+   statsTuple = SearchSysCache(STATRELATT,
+                               ObjectIdGetDatum(relid),
+                               Int16GetDatum(attno),
+                               0, 0);
+   if (HeapTupleIsValid(statsTuple))
    {
-       Oid         typid;
-       int         typlen;
-       bool        typbyval;
-       int32       typmod;
-       double      nullfrac;
-       double      commonfrac;
-       Datum       commonval;
-       double      selec;
-
-       /* get info about the attribute */
-       getattproperties(relid, attno,
-                        &typid, &typlen, &typbyval, &typmod);
-
-       /* get stats for the attribute, if available */
-       if (getattstatistics(relid, attno, typid, typmod,
-                            &nullfrac, &commonfrac, &commonval,
-                            NULL, NULL))
-       {
-           if (flag & SEL_CONSTANT)
-           {
+       Form_pg_statistic stats;
 
-               /*
-                * Is the constant "=" to the column's most common value?
-                * (Although the operator may not really be "=", we will
-                * assume that seeing whether it returns TRUE for the most
-                * common value is useful information. If you don't like
-                * it, maybe you shouldn't be using eqsel for your
-                * operator...)
-                */
-               RegProcedure eqproc = get_opcode(opid);
-               bool        mostcommon;
+       stats = (Form_pg_statistic) GETSTRUCT(statsTuple);
 
-               if (eqproc == (RegProcedure) NULL)
-                   elog(ERROR, "eqsel: no procedure for operator %u",
-                        opid);
+       if (flag & SEL_CONSTANT)
+       {
+           bool    match = false;
+           int     i;
 
-               /* be careful to apply operator right way 'round */
-               if (flag & SEL_RIGHT)
-                   mostcommon = DatumGetBool(OidFunctionCall2(eqproc,
-                                                              commonval,
-                                                              value));
-               else
-                   mostcommon = DatumGetBool(OidFunctionCall2(eqproc,
-                                                              value,
-                                                            commonval));
+           /*
+            * Is the constant "=" to any of the column's most common
+            * values?  (Although the given operator may not really be
+            * "=", we will assume that seeing whether it returns TRUE
+            * is an appropriate test.  If you don't like this, maybe you
+            * shouldn't be using eqsel for your operator...)
+            */
+           if (get_attstatsslot(statsTuple, typid, typmod,
+                                STATISTIC_KIND_MCV, InvalidOid,
+                                &values, &nvalues,
+                                &numbers, &nnumbers))
+           {
+               FmgrInfo    eqproc;
 
-               if (mostcommon)
-               {
+               fmgr_info(get_opcode(opid), &eqproc);
 
-                   /*
-                    * Constant is "=" to the most common value.  We know
-                    * selectivity exactly (or as exactly as VACUUM could
-                    * calculate it, anyway).
-                    */
-                   selec = commonfrac;
-               }
-               else
+               for (i = 0; i < nvalues; i++)
                {
-
-                   /*
-                    * Comparison is against a constant that is neither
-                    * the most common value nor null.  Its selectivity
-                    * cannot be more than this:
-                    */
-                   selec = 1.0 - commonfrac - nullfrac;
-                   if (selec > commonfrac)
-                       selec = commonfrac;
-
-                   /*
-                    * and in fact it's probably less, so we should apply
-                    * a fudge factor.  The only case where we don't is
-                    * for a boolean column, where indeed we have
-                    * estimated the less-common value's frequency
-                    * exactly!
-                    */
-                   if (typid != BOOLOID)
-                       selec *= NOT_MOST_COMMON_RATIO;
+                   /* be careful to apply operator right way 'round */
+                   if (flag & SEL_RIGHT)
+                       match = DatumGetBool(FunctionCall2(&eqproc,
+                                                          values[i],
+                                                          value));
+                   else
+                       match = DatumGetBool(FunctionCall2(&eqproc,
+                                                          value,
+                                                          values[i]));
+                   if (match)
+                       break;
                }
            }
            else
            {
+               /* no most-common-value info available */
+               values = NULL;
+               numbers = NULL;
+               i = nvalues = nnumbers = 0;
+           }
 
+           if (match)
+           {
+               /*
+                * Constant is "=" to this common value.  We know
+                * selectivity exactly (or as exactly as VACUUM
+                * could calculate it, anyway).
+                */
+               selec = numbers[i];
+           }
+           else
+           {
                /*
-                * Search is for a value that we do not know a priori, but
-                * we will assume it is not NULL.  Selectivity cannot be
-                * more than this:
+                * Comparison is against a constant that is neither
+                * NULL nor any of the common values.  Its selectivity
+                * cannot be more than this:
                 */
-               selec = 1.0 - nullfrac;
-               if (selec > commonfrac)
-                   selec = commonfrac;
+               double  sumcommon = 0.0;
+               double  otherdistinct;
 
+               for (i = 0; i < nnumbers; i++)
+                   sumcommon += numbers[i];
+               selec = 1.0 - sumcommon - stats->stanullfrac;
+               /*
+                * and in fact it's probably a good deal less.
+                * We approximate that all the not-common values
+                * share this remaining fraction equally, so we
+                * divide by the number of other distinct values.
+                */
+               otherdistinct = get_att_numdistinct(relid, attno,
+                                                   typid, stats)
+                   - nnumbers;
+               if (otherdistinct > 1)
+                   selec /= otherdistinct;
                /*
-                * and in fact it's probably less, so apply a fudge
-                * factor.
+                * Another cross-check: selectivity shouldn't be
+                * estimated as more than the least common
+                * "most common value".
                 */
-               selec *= NOT_MOST_COMMON_RATIO;
+               if (nnumbers > 0 && selec > numbers[nnumbers-1])
+                   selec = numbers[nnumbers-1];
            }
 
-           /* result should be in range, but make sure... */
-           if (selec < 0.0)
-               selec = 0.0;
-           else if (selec > 1.0)
-               selec = 1.0;
-
-           if (!typbyval)
-               pfree(DatumGetPointer(commonval));
+           free_attstatsslot(typid, values, nvalues, numbers, nnumbers);
        }
        else
        {
+           double      ndistinct;
 
            /*
-            * No VACUUM ANALYZE stats available, so make a guess using
-            * the dispersion stat (if we have that, which is unlikely for
-            * a normal attribute; but for a system attribute we may be
-            * able to estimate it).
+            * Search is for a value that we do not know a priori, but
+            * we will assume it is not NULL.  Estimate the selectivity
+            * as non-null fraction divided by number of distinct values,
+            * so that we get a result averaged over all possible values
+            * whether common or uncommon.  (Essentially, we are assuming
+            * that the not-yet-known comparison value is equally likely
+            * to be any of the possible values, regardless of their
+            * frequency in the table.  Is that a good idea?)
+            */
+           selec = 1.0 - stats->stanullfrac;
+           ndistinct = get_att_numdistinct(relid, attno, typid, stats);
+           if (ndistinct > 1)
+               selec /= ndistinct;
+           /*
+            * Cross-check: selectivity should never be
+            * estimated as more than the most common value's.
             */
-           selec = get_attdispersion(relid, attno, 0.01);
+           if (get_attstatsslot(statsTuple, typid, typmod,
+                                STATISTIC_KIND_MCV, InvalidOid,
+                                NULL, NULL,
+                                &numbers, &nnumbers))
+           {
+               if (nnumbers > 0 && selec > numbers[0])
+                   selec = numbers[0];
+               free_attstatsslot(typid, NULL, 0, numbers, nnumbers);
+           }
        }
 
-       result = (float8) selec;
+       ReleaseSysCache(statsTuple);
    }
-   PG_RETURN_FLOAT8(result);
+   else
+   {
+       /*
+        * No VACUUM ANALYZE stats available, so make a guess using
+        * estimated number of distinct values and assuming they are
+        * equally common.  (The guess is unlikely to be very good,
+        * but we do know a few special cases.)
+        */
+       selec = 1.0 / get_att_numdistinct(relid, attno, typid, NULL);
+   }
+
+   /* result should be in range, but make sure... */
+   if (selec < 0.0)
+       selec = 0.0;
+   else if (selec > 1.0)
+       selec = 1.0;
+
+   PG_RETURN_FLOAT8((float8) selec);
 }
 
 /*
@@ -301,117 +329,263 @@ scalarltsel(PG_FUNCTION_ARGS)
    AttrNumber  attno = PG_GETARG_INT16(2);
    Datum       value = PG_GETARG_DATUM(3);
    int32       flag = PG_GETARG_INT32(4);
-   float8      result;
+   bool        isgt;
+   HeapTuple   oprTuple;
+   HeapTuple   statsTuple;
+   Form_pg_statistic stats;
+   Oid         contype;
+   FmgrInfo    opproc;
+   Oid         typid;
+   int32       typmod;
+   Datum      *values;
+   int         nvalues;
+   float4     *numbers;
+   int         nnumbers;
+   double      mcv_selec,
+               hist_selec,
+               sumcommon;
+   double      selec;
+   int         i;
+
+   if (NONVALUE(relid) || NONVALUE(attno))
+       PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
+
+   /* Can't do anything useful if no constant to compare against, either */
+   if (!(flag & SEL_CONSTANT))
+       PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
 
-   if (!(flag & SEL_CONSTANT) || NONVALUE(attno) || NONVALUE(relid))
-       result = DEFAULT_INEQ_SEL;
+   /*
+    * Force the constant to be on the right to simplify later logic.
+    * This means that we may be dealing with either "<" or ">" cases.
+    */
+   if (flag & SEL_RIGHT)
+   {
+       /* we have x < const */
+       isgt = false;
+   }
    else
    {
-       HeapTuple   oprtuple;
-       Oid         ltype,
-                   rtype,
-                   contype;
-       Oid         typid;
-       int         typlen;
-       bool        typbyval;
-       int32       typmod;
-       Datum       hival,
-                   loval;
-       double      val,
-                   high,
-                   low,
-                   numerator,
-                   denominator;
-
-       /*
-        * Get left and right datatypes of the operator so we know what
-        * type the constant is.
-        */
-       oprtuple = SearchSysCache(OPEROID,
-                                 ObjectIdGetDatum(opid),
-                                 0, 0, 0);
-       if (!HeapTupleIsValid(oprtuple))
-           elog(ERROR, "scalarltsel: no tuple for operator %u", opid);
-       ltype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprleft;
-       rtype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprright;
-       contype = (flag & SEL_RIGHT) ? rtype : ltype;
-       ReleaseSysCache(oprtuple);
-
-       /* Now get info and stats about the attribute */
-       getattproperties(relid, attno,
-                        &typid, &typlen, &typbyval, &typmod);
-
-       if (!getattstatistics(relid, attno, typid, typmod,
-                             NULL, NULL, NULL,
-                             &loval, &hival))
+       /* we have const < x, commute to make x > const */
+       opid = get_commutator(opid);
+       if (!opid)
        {
-           /* no stats available, so default result */
+           /* Use default selectivity (should we raise an error instead?) */
            PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
        }
+       isgt = true;
+   }
 
-       /* Convert the values to a uniform comparison scale. */
-       if (!convert_to_scalar(value, contype, &val,
-                              loval, hival, typid,
-                              &low, &high))
-       {
+   /*
+    * The constant might not be the same datatype as the column;
+    * look at the operator's input types to find out what it is.
+    * Also set up to be able to call the operator's execution proc.
+    */
+   oprTuple = SearchSysCache(OPEROID,
+                             ObjectIdGetDatum(opid),
+                             0, 0, 0);
+   if (!HeapTupleIsValid(oprTuple))
+       elog(ERROR, "scalarltsel: no tuple for operator %u", opid);
+   contype = ((Form_pg_operator) GETSTRUCT(oprTuple))->oprright;
+   fmgr_info(((Form_pg_operator) GETSTRUCT(oprTuple))->oprcode, &opproc);
+   ReleaseSysCache(oprTuple);
+
+   /* Now get info and stats about the attribute */
+   getattproperties(relid, attno, &typid, &typmod);
+
+   statsTuple = SearchSysCache(STATRELATT,
+                               ObjectIdGetDatum(relid),
+                               Int16GetDatum(attno),
+                               0, 0);
+   if (!HeapTupleIsValid(statsTuple))
+   {
+       /* no stats available, so default result */
+       PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
+   }
+   stats = (Form_pg_statistic) GETSTRUCT(statsTuple);
 
-           /*
-            * Ideally we'd produce an error here, on the grounds that the
-            * given operator shouldn't have scalarltsel registered as its
-            * selectivity func unless we can deal with its operand types.
-            * But currently, all manner of stuff is invoking scalarltsel,
-            * so give a default estimate until that can be fixed.
-            */
-           if (!typbyval)
-           {
-               pfree(DatumGetPointer(hival));
-               pfree(DatumGetPointer(loval));
-           }
-           PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
-       }
+   /*
+    * If we have most-common-values info, add up the fractions of the
+    * MCV entries that satisfy MCV OP CONST.  These fractions contribute
+    * directly to the result selectivity.  Also add up the total fraction
+    * represented by MCV entries.
+    */
+   mcv_selec = 0.0;
+   sumcommon = 0.0;
 
-       /* release temp storage if needed */
-       if (!typbyval)
+   if (get_attstatsslot(statsTuple, typid, typmod,
+                        STATISTIC_KIND_MCV, InvalidOid,
+                        &values, &nvalues,
+                        &numbers, &nnumbers))
+   {
+       for (i = 0; i < nvalues; i++)
        {
-           pfree(DatumGetPointer(hival));
-           pfree(DatumGetPointer(loval));
+           if (DatumGetBool(FunctionCall2(&opproc,
+                                          values[i],
+                                          value)))
+               mcv_selec += numbers[i];
+           sumcommon += numbers[i];
        }
+       free_attstatsslot(typid, values, nvalues, numbers, nnumbers);
+   }
+
+   /*
+    * If there is a histogram, determine which bin the constant falls in,
+    * and compute the resulting contribution to selectivity.
+    *
+    * Someday, VACUUM might store more than one histogram per rel/att,
+    * corresponding to more than one possible sort ordering defined for
+    * the column type.  However, to make that work we will need to figure
+    * out which staop to search for --- it's not necessarily the one we
+    * have at hand!  (For example, we might have a '<=' operator rather
+    * than the '<' operator that will appear in staop.)  For now, assume
+    * that whatever appears in pg_statistic is sorted the same way our
+    * operator sorts.
+    */
+   hist_selec = 0.0;
 
-       if (high <= low)
+   if (get_attstatsslot(statsTuple, typid, typmod,
+                        STATISTIC_KIND_HISTOGRAM, InvalidOid,
+                        &values, &nvalues,
+                        NULL, NULL))
+   {
+       if (nvalues > 1)
        {
+           double  histfrac;
+           bool    ltcmp;
+
+           ltcmp = DatumGetBool(FunctionCall2(&opproc,
+                                              values[0],
+                                              value));
+           if (isgt)
+               ltcmp = !ltcmp;
+           if (!ltcmp)
+           {
+               /* Constant is below lower histogram boundary. */
+               histfrac = 0.0;
+           }
+           else
+           {
+               /*
+                * Scan to find proper location.  This could be made faster
+                * by using a binary-search method, but it's probably not
+                * worth the trouble for typical histogram sizes.
+                */
+               for (i = 1; i < nvalues; i++)
+               {
+                   ltcmp = DatumGetBool(FunctionCall2(&opproc,
+                                                      values[i],
+                                                      value));
+                   if (isgt)
+                       ltcmp = !ltcmp;
+                   if (!ltcmp)
+                       break;
+               }
+               if (i >= nvalues)
+               {
+                   /* Constant is above upper histogram boundary. */
+                   histfrac = 1.0;
+               }
+               else
+               {
+                   double      val,
+                               high,
+                               low;
+                   double      binfrac;
 
+                   /*
+                    * We have values[i-1] < constant < values[i].
+                    *
+                    * Convert the constant and the two nearest bin boundary
+                    * values to a uniform comparison scale, and do a linear
+                    * interpolation within this bin.
+                    */
+                   if (convert_to_scalar(value, contype, &val,
+                                         values[i-1], values[i], typid,
+                                         &low, &high))
+                   {
+                       if (high <= low)
+                       {
+                           /* cope if bin boundaries appear identical */
+                           binfrac = 0.5;
+                       }
+                       else if (val <= low)
+                           binfrac = 0.0;
+                       else if (val >= high)
+                           binfrac = 1.0;
+                       else
+                           binfrac = (val - low) / (high - low);
+                   }
+                   else
+                   {
+                       /*
+                        * Ideally we'd produce an error here, on the grounds
+                        * that the given operator shouldn't have scalarltsel
+                        * registered as its selectivity func unless we can
+                        * deal with its operand types.  But currently, all
+                        * manner of stuff is invoking scalarltsel, so give a
+                        * default estimate until that can be fixed.
+                        */
+                       binfrac = 0.5;
+                   }
+                   /*
+                    * Now, compute the overall selectivity across the values
+                    * represented by the histogram.  We have i-1 full bins
+                    * and binfrac partial bin below the constant.
+                    */
+                   histfrac = (double) (i-1) + binfrac;
+                   histfrac /= (double) (nvalues - 1);
+               }
+           }
            /*
-            * If we trusted the stats fully, we could return a small or
-            * large selec depending on which side of the single data
-            * point the constant is on.  But it seems better to assume
-            * that the stats are wrong and return a default...
+            * Now histfrac = fraction of histogram entries below the constant.
+            *
+            * Account for "<" vs ">"
             */
-           result = DEFAULT_INEQ_SEL;
-       }
-       else if (val < low || val > high)
-       {
-
+           hist_selec = isgt ? (1.0 - histfrac) : histfrac;
            /*
-            * If given value is outside the statistical range, return a
-            * small or large value; but not 0.0/1.0 since there is a
-            * chance the stats are out of date.
+            * The histogram boundaries are only approximate to begin
+            * with, and may well be out of date anyway.  Therefore,
+            * don't believe extremely small or large selectivity
+            * estimates.
             */
-           if (flag & SEL_RIGHT)
-               result = (val < low) ? 0.001 : 0.999;
-           else
-               result = (val < low) ? 0.999 : 0.001;
-       }
-       else
-       {
-           denominator = high - low;
-           if (flag & SEL_RIGHT)
-               numerator = val - low;
-           else
-               numerator = high - val;
-           result = numerator / denominator;
+           if (hist_selec < 0.001)
+               hist_selec = 0.001;
+           else if (hist_selec > 0.999)
+               hist_selec = 0.999;
        }
+
+       free_attstatsslot(typid, values, nvalues, NULL, 0);
    }
-   PG_RETURN_FLOAT8(result);
+
+   /*
+    * Now merge the results from the MCV and histogram calculations,
+    * realizing that the histogram covers only the non-null values that
+    * are not listed in MCV.
+    */
+   selec = 1.0 - stats->stanullfrac - sumcommon;
+
+   if (hist_selec > 0.0)
+       selec *= hist_selec;
+   else
+   {
+       /*
+        * If no histogram but there are values not accounted for by MCV,
+        * arbitrarily assume half of them will match.
+        */
+       selec *= 0.5;
+   }
+
+   selec += mcv_selec;
+
+   ReleaseSysCache(statsTuple);
+
+   /* result should be in range, but make sure... */
+   if (selec < 0.0)
+       selec = 0.0;
+   else if (selec > 1.0)
+       selec = 1.0;
+
+   PG_RETURN_FLOAT8((float8) selec);
 }
 
 /*
@@ -428,34 +602,25 @@ scalargtsel(PG_FUNCTION_ARGS)
    Datum       value = PG_GETARG_DATUM(3);
    int32       flag = PG_GETARG_INT32(4);
    Oid         ltopid;
-   float8      result;
 
    /*
-    * Compute selectivity of "<", then invert --- but only if we were
-    * able to produce a non-default estimate.  Note that we get the
-    * negator which strictly speaking means we are looking at "<=" for
-    * ">" or "<" for ">=".  We assume this won't matter.
+    * Commute so that we have a "<" or "<=" operator, then apply
+    * scalarltsel.
     */
-   ltopid = get_negator(opid);
-   if (ltopid)
-   {
-       result = DatumGetFloat8(DirectFunctionCall5(scalarltsel,
-                                               ObjectIdGetDatum(ltopid),
-                                                ObjectIdGetDatum(relid),
-                                                   Int16GetDatum(attno),
-                                                   value,
-                                                   Int32GetDatum(flag)));
-   }
-   else
+   ltopid = get_commutator(opid);
+   if (!ltopid)
    {
        /* Use default selectivity (should we raise an error instead?) */
-       result = DEFAULT_INEQ_SEL;
+       PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
    }
 
-   if (result != DEFAULT_INEQ_SEL)
-       result = 1.0 - result;
-
-   PG_RETURN_FLOAT8(result);
+   flag ^= SEL_RIGHT;
+   return DirectFunctionCall5(scalarltsel,
+                              ObjectIdGetDatum(ltopid),
+                              ObjectIdGetDatum(relid),
+                              Int16GetDatum(attno),
+                              value,
+                              Int32GetDatum(flag));
 }
 
 /*
@@ -476,7 +641,7 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype)
        result = DEFAULT_MATCH_SEL;
    else
    {
-       HeapTuple   oprtuple;
+       HeapTuple   oprTuple;
        Oid         ltype,
                    rtype;
        char       *patt;
@@ -488,14 +653,14 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype)
         * Get left and right datatypes of the operator so we know what
         * type the attribute is.
         */
-       oprtuple = SearchSysCache(OPEROID,
+       oprTuple = SearchSysCache(OPEROID,
                                  ObjectIdGetDatum(opid),
                                  0, 0, 0);
-       if (!HeapTupleIsValid(oprtuple))
+       if (!HeapTupleIsValid(oprTuple))
            elog(ERROR, "patternsel: no tuple for operator %u", opid);
-       ltype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprleft;
-       rtype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprright;
-       ReleaseSysCache(oprtuple);
+       ltype = ((Form_pg_operator) GETSTRUCT(oprTuple))->oprleft;
+       rtype = ((Form_pg_operator) GETSTRUCT(oprTuple))->oprright;
+       ReleaseSysCache(oprTuple);
 
        /* the right-hand const is type text for all supported operators */
        Assert(rtype == TEXTOID);
@@ -659,42 +824,88 @@ eqjoinsel(PG_FUNCTION_ARGS)
    AttrNumber  attno1 = PG_GETARG_INT16(2);
    Oid         relid2 = PG_GETARG_OID(3);
    AttrNumber  attno2 = PG_GETARG_INT16(4);
-   float8      result;
-   float8      num1,
-               num2,
-               min;
    bool        unknown1 = NONVALUE(relid1) || NONVALUE(attno1);
    bool        unknown2 = NONVALUE(relid2) || NONVALUE(attno2);
+   double      selec;
 
    if (unknown1 && unknown2)
-       result = DEFAULT_EQ_SEL;
+       selec = DEFAULT_EQ_SEL;
    else
    {
-       num1 = unknown1 ? 1.0 : get_attdispersion(relid1, attno1, 0.01);
-       num2 = unknown2 ? 1.0 : get_attdispersion(relid2, attno2, 0.01);
+       Oid         typid1;
+       Oid         typid2;
+       int32       typmod1;
+       int32       typmod2;
+       HeapTuple   statsTuple1 = NULL;
+       HeapTuple   statsTuple2 = NULL;
+       Form_pg_statistic stats1 = NULL;
+       Form_pg_statistic stats2 = NULL;
+       double      nd1,
+                   nd2;
+
+       if (unknown1)
+       {
+           nd1 = 100.0;
+       }
+       else
+       {
+           /* get info about the attribute */
+           getattproperties(relid1, attno1, &typid1, &typmod1);
+
+           /* get stats for the attribute, if available */
+           statsTuple1 = SearchSysCache(STATRELATT,
+                                        ObjectIdGetDatum(relid1),
+                                        Int16GetDatum(attno1),
+                                        0, 0);
+           if (HeapTupleIsValid(statsTuple1))
+               stats1 = (Form_pg_statistic) GETSTRUCT(statsTuple1);
+
+           nd1 = get_att_numdistinct(relid1, attno1, typid1, stats1);
+       }
+
+       if (unknown2)
+       {
+           nd2 = 100.0;
+       }
+       else
+       {
+           /* get info about the attribute */
+           getattproperties(relid2, attno2, &typid2, &typmod2);
+
+           /* get stats for the attribute, if available */
+           statsTuple2 = SearchSysCache(STATRELATT,
+                                        ObjectIdGetDatum(relid2),
+                                        Int16GetDatum(attno2),
+                                        0, 0);
+           if (HeapTupleIsValid(statsTuple2))
+               stats2 = (Form_pg_statistic) GETSTRUCT(statsTuple2);
+
+           nd2 = get_att_numdistinct(relid2, attno2, typid2, stats2);
+       }
 
        /*
-        * The join selectivity cannot be more than num2, since each tuple
-        * in table 1 could match no more than num2 fraction of tuples in
-        * table 2 (and that's only if the table-1 tuple matches the most
-        * common value in table 2, so probably it's less).  By the same
-        * reasoning it is not more than num1. The min is therefore an
-        * upper bound.
+        * Estimate the join selectivity as 1 / sqrt(nd1*nd2)
+        * (can we produce any theory for this)?
         *
-        * If we know the dispersion of only one side, use it; the reasoning
-        * above still works.
+        * XXX possibility to do better: if both attributes have histograms
+        * then we could determine the exact join selectivity between the
+        * MCV sets, and only have to assume the join behavior of the non-MCV
+        * values.  This could be a big win when the MCVs cover a large part
+        * of the population.
         *
-        * XXX can we make a better estimate here?  Using the nullfrac
-        * statistic might be helpful, for example.  Assuming the operator
-        * is strict (does not succeed for null inputs) then the
-        * selectivity couldn't be more than (1-nullfrac1)*(1-nullfrac2),
-        * which might be usefully small if there are many nulls.  How
-        * about applying the operator to the most common values?
+        * XXX what about nulls?
         */
-       min = (num1 < num2) ? num1 : num2;
-       result = min;
+       selec = 1.0 / sqrt(nd1 * nd2);
+       if (selec > 1.0)
+           selec = 1.0;
+
+       if (HeapTupleIsValid(statsTuple1))
+           ReleaseSysCache(statsTuple1);
+       if (HeapTupleIsValid(statsTuple2))
+           ReleaseSysCache(statsTuple2);
+
    }
-   PG_RETURN_FLOAT8(result);
+   PG_RETURN_FLOAT8((float8) selec);
 }
 
 /*
@@ -829,7 +1040,8 @@ icnlikejoinsel(PG_FUNCTION_ARGS)
  *   Returns "true" if successful.
  *
  * All numeric datatypes are simply converted to their equivalent
- * "double" values.
+ * "double" values.  XXX what about NUMERIC values that are outside
+ * the range of "double"?
  *
  * String datatypes are converted by convert_string_to_scalar(),
  * which is explained below.  The reason why this routine deals with
@@ -917,7 +1129,7 @@ convert_numeric_to_scalar(Datum value, Oid typid)
 {
    switch (typid)
    {
-           case BOOLOID:
+       case BOOLOID:
            return (double) DatumGetBool(value);
        case INT2OID:
            return (double) DatumGetInt16(value);
@@ -963,6 +1175,8 @@ convert_numeric_to_scalar(Datum value, Oid typid)
  * three strings before computing the scaled values.  This allows us to
  * "zoom in" when we encounter a narrow data range.  An example is a phone
  * number database where all the values begin with the same area code.
+ * (Actually, the bounds will be adjacent histogram-bin-boundary values,
+ * so this is more likely to happen than you might think.)
  */
 static void
 convert_string_to_scalar(unsigned char *value,
@@ -1208,11 +1422,11 @@ convert_timevalue_to_scalar(Datum value, Oid typid)
 /*
  * getattproperties
  *   Retrieve pg_attribute properties for an attribute,
- *   including type OID, type len, type byval flag, typmod.
+ *   including type OID and typmod.
  */
 static void
 getattproperties(Oid relid, AttrNumber attnum,
-                Oid *typid, int *typlen, bool *typbyval, int32 *typmod)
+                Oid *typid, int32 *typmod)
 {
    HeapTuple   atp;
    Form_pg_attribute att_tup;
@@ -1227,164 +1441,87 @@ getattproperties(Oid relid, AttrNumber attnum,
    att_tup = (Form_pg_attribute) GETSTRUCT(atp);
 
    *typid = att_tup->atttypid;
-   *typlen = att_tup->attlen;
-   *typbyval = att_tup->attbyval;
    *typmod = att_tup->atttypmod;
 
    ReleaseSysCache(atp);
 }
 
 /*
- * getattstatistics
- *   Retrieve the pg_statistic data for an attribute.
- *   Returns 'false' if no stats are available.
+ * get_att_numdistinct
  *
- * Inputs:
- * 'relid' and 'attnum' are the relation and attribute number.
- * 'typid' and 'typmod' are the type and typmod of the column,
- * which the caller must already have looked up.
+ *   Estimate the number of distinct values of an attribute.
  *
- * Outputs:
- * The available stats are nullfrac, commonfrac, commonval, loval, hival.
- * The caller need not retrieve all five --- pass NULL pointers for the
- * unwanted values.
+ * relid, attnum: identify the attribute to examine.
+ * typid: type of attribute.
+ * stats: pg_statistic tuple for attribute, or NULL if not available.
  *
- * commonval, loval, hival are returned as Datums holding the internal
- * representation of the values.  (Note that these should be pfree'd
- * after use if the data type is not by-value.)
+ * XXX possible future improvement: look to see if there is a unique
+ * index on the attribute.  If so, we can estimate ndistinct = ntuples.
+ * This should probably override any info from pg_statistic.
  */
-static bool
-getattstatistics(Oid relid,
-                AttrNumber attnum,
-                Oid typid,
-                int32 typmod,
-                double *nullfrac,
-                double *commonfrac,
-                Datum *commonval,
-                Datum *loval,
-                Datum *hival)
+static double
+get_att_numdistinct(Oid relid, AttrNumber attnum, Oid typid,
+                   Form_pg_statistic stats)
 {
-   HeapTuple   tuple;
-   HeapTuple   typeTuple;
-   FmgrInfo    inputproc;
-   Oid         typelem;
-   bool        isnull;
+   HeapTuple   reltup;
+   double      ntuples;
 
    /*
-    * We assume that there will only be one entry in pg_statistic for the
-    * given rel/att, so we search WITHOUT considering the staop column.
-    * Someday, VACUUM might store more than one entry per rel/att,
-    * corresponding to more than one possible sort ordering defined for
-    * the column type.  However, to make that work we will need to figure
-    * out which staop to search for --- it's not necessarily the one we
-    * have at hand!  (For example, we might have a '>' operator rather
-    * than the '<' operator that will appear in staop.)
+    * Special-case boolean columns: presumably, two distinct values.
+    *
+    * Are there any other cases we should wire in special estimates for?
     */
-   tuple = SearchSysCache(STATRELID,
-                          ObjectIdGetDatum(relid),
-                          Int16GetDatum((int16) attnum),
-                          0, 0);
-   if (!HeapTupleIsValid(tuple))
-   {
-       /* no such stats entry */
-       return false;
-   }
+   if (typid == BOOLOID)
+       return 2.0;
 
-   if (nullfrac)
-       *nullfrac = ((Form_pg_statistic) GETSTRUCT(tuple))->stanullfrac;
-   if (commonfrac)
-       *commonfrac = ((Form_pg_statistic) GETSTRUCT(tuple))->stacommonfrac;
-
-   /* Get the type input proc for the column datatype */
-   typeTuple = SearchSysCache(TYPEOID,
-                              ObjectIdGetDatum(typid),
-                              0, 0, 0);
-   if (!HeapTupleIsValid(typeTuple))
-       elog(ERROR, "getattstatistics: Cache lookup failed for type %u",
-            typid);
-   fmgr_info(((Form_pg_type) GETSTRUCT(typeTuple))->typinput, &inputproc);
-   typelem = ((Form_pg_type) GETSTRUCT(typeTuple))->typelem;
-   ReleaseSysCache(typeTuple);
+   /*
+    * If VACUUM ANALYZE determined a fixed estimate, use it.
+    */
+   if (stats && stats->stadistinct > 0.0)
+       return stats->stadistinct;
 
    /*
-    * Values are variable-length fields, so cannot access as struct
-    * fields. Must do it the hard way with SysCacheGetAttr.
+    * Otherwise we need to get the relation size.
     */
-   if (commonval)
-   {
-       Datum       val = SysCacheGetAttr(STATRELID, tuple,
-                                         Anum_pg_statistic_stacommonval,
-                                         &isnull);
+   reltup = SearchSysCache(RELOID,
+                           ObjectIdGetDatum(relid),
+                           0, 0, 0);
+   if (!HeapTupleIsValid(reltup))
+       elog(ERROR, "get_att_numdistinct: no relation tuple %u", relid);
 
-       if (isnull)
-       {
-           elog(DEBUG, "getattstatistics: stacommonval is null");
-           *commonval = PointerGetDatum(NULL);
-       }
-       else
-       {
-           char       *strval = DatumGetCString(DirectFunctionCall1(textout,
-                                                                  val));
-
-           *commonval = FunctionCall3(&inputproc,
-                                      CStringGetDatum(strval),
-                                      ObjectIdGetDatum(typelem),
-                                      Int32GetDatum(typmod));
-           pfree(strval);
-       }
-   }
+   ntuples = ((Form_pg_class) GETSTRUCT(reltup))->reltuples;
 
-   if (loval)
-   {
-       Datum       val = SysCacheGetAttr(STATRELID, tuple,
-                                         Anum_pg_statistic_staloval,
-                                         &isnull);
+   ReleaseSysCache(reltup);
 
-       if (isnull)
-       {
-           elog(DEBUG, "getattstatistics: staloval is null");
-           *loval = PointerGetDatum(NULL);
-       }
-       else
-       {
-           char       *strval = DatumGetCString(DirectFunctionCall1(textout,
-                                                                  val));
-
-           *loval = FunctionCall3(&inputproc,
-                                  CStringGetDatum(strval),
-                                  ObjectIdGetDatum(typelem),
-                                  Int32GetDatum(typmod));
-           pfree(strval);
-       }
-   }
+   if (ntuples <= 0.0)
+       return 100.0;           /* no data available; return a default */
 
-   if (hival)
-   {
-       Datum       val = SysCacheGetAttr(STATRELID, tuple,
-                                         Anum_pg_statistic_stahival,
-                                         &isnull);
+   /*
+    * If VACUUM ANALYZE determined a scaled estimate, use it.
+    */
+   if (stats && stats->stadistinct < 0.0)
+       return - stats->stadistinct * ntuples;
 
-       if (isnull)
-       {
-           elog(DEBUG, "getattstatistics: stahival is null");
-           *hival = PointerGetDatum(NULL);
-       }
-       else
-       {
-           char       *strval = DatumGetCString(DirectFunctionCall1(textout,
-                                                                  val));
-
-           *hival = FunctionCall3(&inputproc,
-                                  CStringGetDatum(strval),
-                                  ObjectIdGetDatum(typelem),
-                                  Int32GetDatum(typmod));
-           pfree(strval);
-       }
+   /*
+    * VACUUM ANALYZE does not compute stats for system attributes,
+    * but some of them can reasonably be assumed unique anyway.
+    */
+   switch (attnum)
+   {
+       case ObjectIdAttributeNumber:
+       case SelfItemPointerAttributeNumber:
+           return ntuples;
+       case TableOidAttributeNumber:
+           return 1.0;
    }
 
-   ReleaseSysCache(tuple);
+   /*
+    * Estimate ndistinct = ntuples if the table is small, else 100.
+    */
+   if (ntuples < 100.0)
+       return ntuples;
 
-   return true;
+   return 100.0;
 }
 
 /*-------------------------------------------------------------------------


diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c

index 82d55866215aac34724aa44deb029feea9d94a76..3995de5d7a1325085c901b0d2427cbbd775170ee 100644 (file)


--- a/src/backend/utils/cache/lsyscache.c
+++ b/src/backend/utils/cache/lsyscache.c
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/utils/cache/lsyscache.c,v 1.52 2001/03/23 04:49:55 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/utils/cache/lsyscache.c,v 1.53 2001/05/07 00:43:24 tgl Exp $
  *
  * NOTES
  *   Eventually, the index information should go through here, too.
@@ -18,7 +18,10 @@
 #include "access/tupmacs.h"
 #include "catalog/pg_operator.h"
 #include "catalog/pg_proc.h"
+#include "catalog/pg_statistic.h"
 #include "catalog/pg_type.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
 #include "utils/lsyscache.h"
 #include "utils/syscache.h"
 
@@ -182,106 +185,6 @@ get_atttypmod(Oid relid, AttrNumber attnum)
        return -1;
 }
 
-/*
- * get_attdispersion
- *
- *   Retrieve the dispersion statistic for an attribute,
- *   or produce an estimate if no info is available.
- *
- * min_estimate is the minimum estimate to return if insufficient data
- * is available to produce a reliable value.  This value may vary
- * depending on context.  (For example, when deciding whether it is
- * safe to use a hashjoin, we want to be more conservative than when
- * estimating the number of tuples produced by an equijoin.)
- */
-double
-get_attdispersion(Oid relid, AttrNumber attnum, double min_estimate)
-{
-   HeapTuple   atp;
-   Form_pg_attribute att_tup;
-   double      dispersion;
-   Oid         atttypid;
-   int32       ntuples;
-
-   atp = SearchSysCache(ATTNUM,
-                        ObjectIdGetDatum(relid),
-                        Int16GetDatum(attnum),
-                        0, 0);
-   if (!HeapTupleIsValid(atp))
-   {
-       /* this should not happen */
-       elog(ERROR, "get_attdispersion: no attribute tuple %u %d",
-            relid, attnum);
-       return min_estimate;
-   }
-
-   att_tup = (Form_pg_attribute) GETSTRUCT(atp);
-
-   dispersion = att_tup->attdispersion;
-   atttypid = att_tup->atttypid;
-
-   ReleaseSysCache(atp);
-
-   if (dispersion > 0.0)
-       return dispersion;      /* we have a specific estimate from VACUUM */
-
-   /*
-    * Special-case boolean columns: the dispersion of a boolean is highly
-    * unlikely to be anywhere near 1/numtuples, instead it's probably
-    * more like 0.5.
-    *
-    * Are there any other cases we should wire in special estimates for?
-    */
-   if (atttypid == BOOLOID)
-       return 0.5;
-
-   /*
-    * Dispersion is either 0 (no data available) or -1 (dispersion is
-    * 1/numtuples).  Either way, we need the relation size.
-    */
-
-   atp = SearchSysCache(RELOID,
-                        ObjectIdGetDatum(relid),
-                        0, 0, 0);
-   if (!HeapTupleIsValid(atp))
-   {
-       /* this should not happen */
-       elog(ERROR, "get_attdispersion: no relation tuple %u", relid);
-       return min_estimate;
-   }
-
-   ntuples = ((Form_pg_class) GETSTRUCT(atp))->reltuples;
-
-   ReleaseSysCache(atp);
-
-   if (ntuples == 0)
-       return min_estimate;    /* no data available */
-
-   if (dispersion < 0.0)       /* VACUUM thinks there are no duplicates */
-       return 1.0 / (double) ntuples;
-
-   /*
-    * VACUUM ANALYZE does not compute dispersion for system attributes,
-    * but some of them can reasonably be assumed unique anyway.
-    */
-   if (attnum == ObjectIdAttributeNumber ||
-       attnum == SelfItemPointerAttributeNumber)
-       return 1.0 / (double) ntuples;
-   if (attnum == TableOidAttributeNumber)
-       return 1.0;
-
-   /*
-    * VACUUM ANALYZE has not been run for this table. Produce an estimate
-    * of 1/numtuples.  This may produce unreasonably small estimates for
-    * large tables, so limit the estimate to no less than min_estimate.
-    */
-   dispersion = 1.0 / (double) ntuples;
-   if (dispersion < min_estimate)
-       dispersion = min_estimate;
-
-   return dispersion;
-}
-
 /*             ---------- INDEX CACHE ----------                        */
 
 /*     watch this space...
@@ -876,3 +779,157 @@ get_typtype(Oid typid)
 }
 
 #endif
+
+/*             ---------- STATISTICS CACHE ----------                   */
+
+/*
+ * get_attstatsslot
+ *
+ *     Extract the contents of a "slot" of a pg_statistic tuple.
+ *     Returns TRUE if requested slot type was found, else FALSE.
+ *
+ * Unlike other routines in this file, this takes a pointer to an
+ * already-looked-up tuple in the pg_statistic cache.  We do this since
+ * most callers will want to extract more than one value from the cache
+ * entry, and we don't want to repeat the cache lookup unnecessarily.
+ *
+ * statstuple: pg_statistics tuple to be examined.
+ * atttype: type OID of attribute.
+ * atttypmod: typmod of attribute.
+ * reqkind: STAKIND code for desired statistics slot kind.
+ * reqop: STAOP value wanted, or InvalidOid if don't care.
+ * values, nvalues: if not NULL, the slot's stavalues are extracted.
+ * numbers, nnumbers: if not NULL, the slot's stanumbers are extracted.
+ *
+ * If assigned, values and numbers are set to point to palloc'd arrays.
+ * If the attribute type is pass-by-reference, the values referenced by
+ * the values array are themselves palloc'd.  The palloc'd stuff can be
+ * freed by calling free_attstatsslot.
+ */
+bool
+get_attstatsslot(HeapTuple statstuple,
+                Oid atttype, int32 atttypmod,
+                int reqkind, Oid reqop,
+                Datum **values, int *nvalues,
+                float4 **numbers, int *nnumbers)
+{
+   Form_pg_statistic stats = (Form_pg_statistic) GETSTRUCT(statstuple);
+   int         i,
+               j;
+   Datum       val;
+   bool        isnull;
+   ArrayType  *statarray;
+   int         narrayelem;
+   HeapTuple   typeTuple;
+   FmgrInfo    inputproc;
+   Oid         typelem;
+
+   for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
+   {
+       if ((&stats->stakind1)[i] == reqkind &&
+           (reqop == InvalidOid || (&stats->staop1)[i] == reqop))
+           break;
+   }
+   if (i >= STATISTIC_NUM_SLOTS)
+       return false;           /* not there */
+
+   if (values)
+   {
+       val = SysCacheGetAttr(STATRELATT, statstuple,
+                             Anum_pg_statistic_stavalues1 + i,
+                             &isnull);
+       if (isnull)
+           elog(ERROR, "get_attstatsslot: stavalues is null");
+       statarray = DatumGetArrayTypeP(val);
+       /*
+        * Do initial examination of the array.  This produces a list
+        * of text Datums --- ie, pointers into the text array value.
+        */
+       deconstruct_array(statarray, false, -1, 'i', values, nvalues);
+       narrayelem = *nvalues;
+       /*
+        * We now need to replace each text Datum by its internal equivalent.
+        *
+        * Get the type input proc and typelem for the column datatype.
+        */
+       typeTuple = SearchSysCache(TYPEOID,
+                                  ObjectIdGetDatum(atttype),
+                                  0, 0, 0);
+       if (!HeapTupleIsValid(typeTuple))
+           elog(ERROR, "get_attstatsslot: Cache lookup failed for type %u",
+                atttype);
+       fmgr_info(((Form_pg_type) GETSTRUCT(typeTuple))->typinput, &inputproc);
+       typelem = ((Form_pg_type) GETSTRUCT(typeTuple))->typelem;
+       ReleaseSysCache(typeTuple);
+       /*
+        * Do the conversions.  The palloc'd array of Datums is reused
+        * in place.
+        */
+       for (j = 0; j < narrayelem; j++)
+       {
+           char       *strval;
+
+           strval = DatumGetCString(DirectFunctionCall1(textout,
+                                                        (*values)[j]));
+           (*values)[j] = FunctionCall3(&inputproc,
+                                        CStringGetDatum(strval),
+                                        ObjectIdGetDatum(typelem),
+                                        Int32GetDatum(atttypmod));
+           pfree(strval);
+       }
+       /*
+        * Free statarray if it's a detoasted copy.
+        */
+       if ((Pointer) statarray != DatumGetPointer(val))
+           pfree(statarray);
+   }
+
+   if (numbers)
+   {
+       val = SysCacheGetAttr(STATRELATT, statstuple,
+                             Anum_pg_statistic_stanumbers1 + i,
+                             &isnull);
+       if (isnull)
+           elog(ERROR, "get_attstatsslot: stanumbers is null");
+       statarray = DatumGetArrayTypeP(val);
+       /*
+        * We expect the array to be a 1-D float4 array; verify that.
+        * We don't need to use deconstruct_array() since the array
+        * data is just going to look like a C array of float4 values.
+        */
+       narrayelem = ARR_DIMS(statarray)[0];
+       if (ARR_NDIM(statarray) != 1 || narrayelem <= 0 ||
+           ARR_SIZE(statarray) != (ARR_OVERHEAD(1) + narrayelem * sizeof(float4)))
+           elog(ERROR, "get_attstatsslot: stanumbers is bogus");
+       *numbers = (float4 *) palloc(narrayelem * sizeof(float4));
+       memcpy(*numbers, ARR_DATA_PTR(statarray), narrayelem * sizeof(float4));
+       *nnumbers = narrayelem;
+       /*
+        * Free statarray if it's a detoasted copy.
+        */
+       if ((Pointer) statarray != DatumGetPointer(val))
+           pfree(statarray);
+   }
+
+   return true;
+}
+
+void
+free_attstatsslot(Oid atttype,
+                 Datum *values, int nvalues,
+                 float4 *numbers, int nnumbers)
+{
+   if (values)
+   {
+       if (! get_typbyval(atttype))
+       {
+           int     i;
+
+           for (i = 0; i < nvalues; i++)
+               pfree(DatumGetPointer(values[i]));
+       }
+       pfree(values);
+   }
+   if (numbers)
+       pfree(numbers);
+}


diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c

index 75ef3179202695a3fb7a5336b7bc4f3e24d3f3f5..4e35b3fb35ba67aa78d337e6bdb39149c6256f8c 100644 (file)


--- a/src/backend/utils/cache/syscache.c
+++ b/src/backend/utils/cache/syscache.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/utils/cache/syscache.c,v 1.60 2001/03/22 03:59:57 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/utils/cache/syscache.c,v 1.61 2001/05/07 00:43:24 tgl Exp $
  *
  * NOTES
  *   These routines allow the parser/planner/executor to perform
@@ -313,7 +313,7 @@ static struct cachedesc cacheinfo[] = {
            0,
            0
    }},
-   {StatisticRelationName,     /* STATRELID */
+   {StatisticRelationName,     /* STATRELATT */
        StatisticRelidAttnumIndex,
        2,
        {


diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c

index d27bfb29668711e985f1ba29bd1285ab77201bf2..5a77c47c20085f0d24ae5b8edb6ef2ca70acdc27 100644 (file)


--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -78,7 +78,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/utils/sort/tuplesort.c,v 1.15 2001/03/23 04:49:55 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/utils/sort/tuplesort.c,v 1.16 2001/05/07 00:43:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -87,7 +87,11 @@
 
 #include "access/heapam.h"
 #include "access/nbtree.h"
+#include "catalog/catname.h"
+#include "catalog/pg_amop.h"
+#include "catalog/pg_amproc.h"
 #include "miscadmin.h"
+#include "utils/fmgroids.h"
 #include "utils/logtape.h"
 #include "utils/lsyscache.h"
 #include "utils/tuplesort.h"
@@ -263,6 +267,7 @@ struct Tuplesortstate
    TupleDesc   tupDesc;
    int         nKeys;
    ScanKey     scanKeys;
+   SortFunctionKind *sortFnKinds;
 
    /*
     * These variables are specific to the IndexTuple case; they are set
@@ -279,6 +284,7 @@ struct Tuplesortstate
    Oid         datumType;
    Oid         sortOperator;
    FmgrInfo    sortOpFn;       /* cached lookup data for sortOperator */
+   SortFunctionKind sortFnKind;
    /* we need typelen and byval in order to know how to copy the Datums. */
    int         datumTypeLen;
    bool        datumTypeByVal;
@@ -458,14 +464,14 @@ tuplesort_begin_common(bool randomAccess)
 
 Tuplesortstate *
 tuplesort_begin_heap(TupleDesc tupDesc,
-                    int nkeys, ScanKey keys,
+                    int nkeys,
+                    Oid *sortOperators, AttrNumber *attNums,
                     bool randomAccess)
 {
    Tuplesortstate *state = tuplesort_begin_common(randomAccess);
+   int         i;
 
-   AssertArg(nkeys >= 1);
-   AssertArg(keys[0].sk_attno != 0);
-   AssertArg(keys[0].sk_procedure != 0);
+   AssertArg(nkeys > 0);
 
    state->comparetup = comparetup_heap;
    state->copytup = copytup_heap;
@@ -475,7 +481,29 @@ tuplesort_begin_heap(TupleDesc tupDesc,
 
    state->tupDesc = tupDesc;
    state->nKeys = nkeys;
-   state->scanKeys = keys;
+   state->scanKeys = (ScanKey) palloc(nkeys * sizeof(ScanKeyData));
+   MemSet(state->scanKeys, 0, nkeys * sizeof(ScanKeyData));
+   state->sortFnKinds = (SortFunctionKind *)
+       palloc(nkeys * sizeof(SortFunctionKind));
+   MemSet(state->sortFnKinds, 0, nkeys * sizeof(SortFunctionKind));
+
+   for (i = 0; i < nkeys; i++)
+   {
+       RegProcedure sortFunction;
+
+       AssertArg(sortOperators[i] != 0);
+       AssertArg(attNums[i] != 0);
+
+       /* select a function that implements the sort operator */
+       SelectSortFunction(sortOperators[i], &sortFunction,
+                          &state->sortFnKinds[i]);
+
+       ScanKeyEntryInitialize(&state->scanKeys[i],
+                              0x0,
+                              attNums[i],
+                              sortFunction,
+                              (Datum) 0);
+   }
 
    return state;
 }
@@ -507,6 +535,7 @@ tuplesort_begin_datum(Oid datumType,
                      bool randomAccess)
 {
    Tuplesortstate *state = tuplesort_begin_common(randomAccess);
+   RegProcedure sortFunction;
    int16       typlen;
    bool        typbyval;
 
@@ -518,8 +547,12 @@ tuplesort_begin_datum(Oid datumType,
 
    state->datumType = datumType;
    state->sortOperator = sortOperator;
-   /* lookup the function that implements the sort operator */
-   fmgr_info(get_opcode(sortOperator), &state->sortOpFn);
+
+   /* select a function that implements the sort operator */
+   SelectSortFunction(sortOperator, &sortFunction, &state->sortFnKind);
+   /* and look up the function */
+   fmgr_info(sortFunction, &state->sortOpFn);
+
    /* lookup necessary attributes of the datum type */
    get_typlenbyval(datumType, &typlen, &typbyval);
    state->datumTypeLen = typlen;
@@ -548,6 +581,13 @@ tuplesort_end(Tuplesortstate *state)
    }
    if (state->memtupindex)
        pfree(state->memtupindex);
+
+   /* this stuff might better belong in a variant-specific shutdown routine */
+   if (state->scanKeys)
+       pfree(state->scanKeys);
+   if (state->sortFnKinds)
+       pfree(state->sortFnKinds);
+
    pfree(state);
 }
 
@@ -1692,6 +1732,7 @@ comparetup_heap(Tuplesortstate *state, const void *a, const void *b)
    for (nkey = 0; nkey < state->nKeys; nkey++)
    {
        ScanKey     scanKey = state->scanKeys + nkey;
+       SortFunctionKind fnKind = state->sortFnKinds[nkey];
        AttrNumber  attno = scanKey->sk_attno;
        Datum       lattr,
                    rattr;
@@ -1708,23 +1749,36 @@ comparetup_heap(Tuplesortstate *state, const void *a, const void *b)
        }
        else if (isnull2)
            return -1;
-       else if (scanKey->sk_flags & SK_COMMUTE)
-       {
-           if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
-                                          rattr, lattr)))
-               return -1;      /* a < b after commute */
-           if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
-                                          lattr, rattr)))
-               return 1;       /* a > b after commute */
-       }
        else
        {
-           if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
-                                          lattr, rattr)))
-               return -1;      /* a < b */
-           if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
-                                          rattr, lattr)))
-               return 1;       /* a > b */
+           int32       compare;
+
+           if (fnKind == SORTFUNC_LT)
+           {
+               if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
+                                              lattr, rattr)))
+                   compare = -1;   /* a < b */
+               else if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
+                                                   rattr, lattr)))
+                   compare = 1;    /* a > b */
+               else
+                   compare = 0;
+           }
+           else
+           {
+               /* sort function is CMP or REVCMP */
+               compare = DatumGetInt32(FunctionCall2(&scanKey->sk_func,
+                                                     lattr, rattr));
+               if (fnKind == SORTFUNC_REVCMP)
+                   compare = -compare;
+           }
+
+           if (compare != 0)
+           {
+               if (scanKey->sk_flags & SK_COMMUTE)
+                   compare = -compare;
+               return compare;
+           }
        }
    }
 
@@ -1852,8 +1906,10 @@ comparetup_index(Tuplesortstate *state, const void *a, const void *b)
        }
        else
        {
+           /* the comparison function is always of CMP type */
            compare = DatumGetInt32(FunctionCall2(&entry->sk_func,
-                                               attrDatum1, attrDatum2));
+                                                 attrDatum1,
+                                                 attrDatum2));
        }
 
        if (compare != 0)
@@ -1954,7 +2010,7 @@ comparetup_datum(Tuplesortstate *state, const void *a, const void *b)
    }
    else if (rtup->isNull)
        return -1;
-   else
+   else if (state->sortFnKind == SORTFUNC_LT)
    {
        if (DatumGetBool(FunctionCall2(&state->sortOpFn,
                                       ltup->val, rtup->val)))
@@ -1964,6 +2020,17 @@ comparetup_datum(Tuplesortstate *state, const void *a, const void *b)
            return 1;           /* a > b */
        return 0;
    }
+   else
+   {
+       /* sort function is CMP or REVCMP */
+       int32   compare;
+
+       compare = DatumGetInt32(FunctionCall2(&state->sortOpFn,
+                                             ltup->val, rtup->val));
+       if (state->sortFnKind == SORTFUNC_REVCMP)
+           compare = -compare;
+       return compare;
+   }
 }
 
 static void *
@@ -2032,3 +2099,119 @@ tuplesize_datum(Tuplesortstate *state, void *tup)
        return (unsigned int) tuplelen;
    }
 }
+
+
+/*
+ * This routine selects an appropriate sorting function to implement
+ * a sort operator as efficiently as possible.  The straightforward
+ * method is to use the operator's implementation proc --- ie, "<"
+ * comparison.  However, that way often requires two calls of the function
+ * per comparison.  If we can find a btree three-way comparator function
+ * associated with the operator, we can use it to do the comparisons
+ * more efficiently.  We also support the possibility that the operator
+ * is ">" (descending sort), in which case we have to reverse the output
+ * of the btree comparator.
+ *
+ * Possibly this should live somewhere else (backend/catalog/, maybe?).
+ */
+void
+SelectSortFunction(Oid sortOperator,
+                  RegProcedure *sortFunction,
+                  SortFunctionKind *kind)
+{
+   Relation    relation;
+   HeapScanDesc scan;
+   ScanKeyData skey[3];
+   HeapTuple   tuple;
+   Oid         opclass = InvalidOid;
+
+   /*
+    * Scan pg_amop to see if the target operator is registered as the
+    * "<" or ">" operator of any btree opclass.  It's possible that it
+    * might be registered both ways (eg, if someone were to build a
+    * "reverse sort" opclass for some reason); prefer the "<" case if so.
+    * If the operator is registered the same way in multiple opclasses,
+    * assume we can use the associated comparator function from any one.
+    */
+   relation = heap_openr(AccessMethodOperatorRelationName,
+                         AccessShareLock);
+
+   ScanKeyEntryInitialize(&skey[0], 0,
+                          Anum_pg_amop_amopid,
+                          F_OIDEQ,
+                          ObjectIdGetDatum(BTREE_AM_OID));
+
+   ScanKeyEntryInitialize(&skey[1], 0,
+                          Anum_pg_amop_amopopr,
+                          F_OIDEQ,
+                          ObjectIdGetDatum(sortOperator));
+
+   scan = heap_beginscan(relation, false, SnapshotNow, 2, skey);
+
+   while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
+   {
+       Form_pg_amop aform = (Form_pg_amop) GETSTRUCT(tuple);
+
+       if (aform->amopstrategy == BTLessStrategyNumber)
+       {
+           opclass = aform->amopclaid;
+           *kind = SORTFUNC_CMP;
+           break;              /* done looking */
+       }
+       else if (aform->amopstrategy == BTGreaterStrategyNumber)
+       {
+           opclass = aform->amopclaid;
+           *kind = SORTFUNC_REVCMP;
+           /* keep scanning in hopes of finding a BTLess entry */
+       }
+   }
+
+   heap_endscan(scan);
+   heap_close(relation, AccessShareLock);
+
+   if (OidIsValid(opclass))
+   {
+       /* Found a suitable opclass, get its comparator support function */
+       relation = heap_openr(AccessMethodProcedureRelationName,
+                             AccessShareLock);
+
+       ScanKeyEntryInitialize(&skey[0], 0,
+                              Anum_pg_amproc_amid,
+                              F_OIDEQ,
+                              ObjectIdGetDatum(BTREE_AM_OID));
+
+       ScanKeyEntryInitialize(&skey[1], 0,
+                              Anum_pg_amproc_amopclaid,
+                              F_OIDEQ,
+                              ObjectIdGetDatum(opclass));
+
+       ScanKeyEntryInitialize(&skey[2], 0,
+                              Anum_pg_amproc_amprocnum,
+                              F_INT2EQ,
+                              Int16GetDatum(BTORDER_PROC));
+
+       scan = heap_beginscan(relation, false, SnapshotNow, 3, skey);
+
+       *sortFunction = InvalidOid;
+
+       if (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
+       {
+           Form_pg_amproc aform = (Form_pg_amproc) GETSTRUCT(tuple);
+           *sortFunction = aform->amproc;
+       }
+
+       heap_endscan(scan);
+       heap_close(relation, AccessShareLock);
+
+       if (RegProcedureIsValid(*sortFunction))
+           return;
+   }
+
+   /* Can't find a comparator, so use the operator as-is */
+
+   *kind = SORTFUNC_LT;
+   *sortFunction = get_opcode(sortOperator);
+   if (!RegProcedureIsValid(*sortFunction))
+       elog(ERROR, "SelectSortFunction: operator %u has no implementation",
+            sortOperator);
+}


diff --git a/src/include/access/tuptoaster.h b/src/include/access/tuptoaster.h

index 759ab3d39e2494de4fa021c8070ac7e5da62d283..6e38529204dabaab44c078c0af05a6687fd0d966 100644 (file)


--- a/src/include/access/tuptoaster.h
+++ b/src/include/access/tuptoaster.h
@@ -6,15 +6,13 @@
  *
  * Copyright (c) 2000, PostgreSQL Development Team
  *
- * $Id: tuptoaster.h,v 1.10 2001/03/22 04:00:32 momjian Exp $
+ * $Id: tuptoaster.h,v 1.11 2001/05/07 00:43:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #ifndef TUPTOASTER_H
 #define TUPTOASTER_H
 
-#ifdef TUPLE_TOASTER_ACTIVE
-
 #include "access/heapam.h"
 #include "access/htup.h"
 #include "access/tupmacs.h"
@@ -109,7 +107,13 @@ extern varattrib *heap_tuple_untoast_attr(varattrib *attr);
  */
 extern Datum toast_compress_datum(Datum value);
 
-#endif  /* TUPLE_TOASTER_ACTIVE */
+/* ----------
+ * toast_raw_datum_size -
+ *
+ * Return the raw (detoasted) size of a varlena datum
+ * ----------
+ */
+extern Size toast_raw_datum_size(Datum value);
 
 
 #endif  /* TUPTOASTER_H */


diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h

index 963b11c1d386ed6df175ad0e2e92cfe2929af774..832f91fb09f172d5ffc3d31aba10fccd5431c783 100644 (file)


--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -37,7 +37,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: catversion.h,v 1.70 2001/03/22 04:00:35 momjian Exp $
+ * $Id: catversion.h,v 1.71 2001/05/07 00:43:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -53,6 +53,6 @@
  */
 
 /*                         yyyymmddN */
-#define CATALOG_VERSION_NO 200101061
+#define CATALOG_VERSION_NO 200105051
 
 #endif


diff --git a/src/include/catalog/heap.h b/src/include/catalog/heap.h

index a7248f6c6dc4fb438d27b29fe250c446534ad228..7ab04b05fb25b1dd765830e90ec1b717c6e2814e 100644 (file)


--- a/src/include/catalog/heap.h
+++ b/src/include/catalog/heap.h
@@ -7,13 +7,14 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: heap.h,v 1.34 2001/03/22 04:00:35 momjian Exp $
+ * $Id: heap.h,v 1.35 2001/05/07 00:43:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #ifndef HEAP_H
 #define HEAP_H
 
+#include "catalog/pg_attribute.h"
 #include "utils/rel.h"
 
 typedef struct RawColumnDefault
@@ -44,4 +45,6 @@ extern void AddRelationRawConstraints(Relation rel,
                          List *rawColDefaults,
                          List *rawConstraints);
 
+extern Form_pg_attribute SystemAttributeDefinition(AttrNumber attno);
+
 #endif  /* HEAP_H */


diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h

index 1dac0bb1c31596e1b31e5fc2d82f20835ed7879b..07aaad61c798bc295723dfe80cded8dbc848d6c9 100644 (file)


--- a/src/include/catalog/index.h
+++ b/src/include/catalog/index.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: index.h,v 1.33 2001/03/22 04:00:35 momjian Exp $
+ * $Id: index.h,v 1.34 2001/05/07 00:43:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -46,7 +46,7 @@ extern void FormIndexDatum(IndexInfo *indexInfo,
               Datum *datum,
               char *nullv);
 
-extern void UpdateStats(Oid relid, long reltuples);
+extern void UpdateStats(Oid relid, double reltuples);
 extern bool IndexesAreActive(Oid relid, bool comfirmCommitted);
 extern void setRelhasindex(Oid relid, bool hasindex);
 


diff --git a/src/include/catalog/indexing.h b/src/include/catalog/indexing.h

index 41a580a37779abae1d46c2b8422b8ece0fbebc2b..cc155cf1bbb314f4cb54a41c23a3a2ed5e1fd5d8 100644 (file)


--- a/src/include/catalog/indexing.h
+++ b/src/include/catalog/indexing.h
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: indexing.h,v 1.48 2001/03/22 04:00:36 momjian Exp $
+ * $Id: indexing.h,v 1.49 2001/05/07 00:43:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -171,7 +171,7 @@ DECLARE_UNIQUE_INDEX(pg_rewrite_rulename_index on pg_rewrite using btree(rulenam
 xDECLARE_UNIQUE_INDEX(pg_shadow_name_index on pg_shadow using btree(usename name_ops));
 xDECLARE_UNIQUE_INDEX(pg_shadow_sysid_index on pg_shadow using btree(usesysid int4_ops));
 */
-DECLARE_INDEX(pg_statistic_relid_att_index on pg_statistic using btree(starelid oid_ops, staattnum int2_ops));
+DECLARE_UNIQUE_INDEX(pg_statistic_relid_att_index on pg_statistic using btree(starelid oid_ops, staattnum int2_ops));
 DECLARE_INDEX(pg_trigger_tgconstrname_index on pg_trigger using btree(tgconstrname name_ops));
 DECLARE_INDEX(pg_trigger_tgconstrrelid_index on pg_trigger using btree(tgconstrrelid oid_ops));
 DECLARE_INDEX(pg_trigger_tgrelid_index on pg_trigger using btree(tgrelid oid_ops));


diff --git a/src/include/catalog/pg_attribute.h b/src/include/catalog/pg_attribute.h

index 58724e94dc966cef18b5345521cafa985a4dbf1e..6e11aa6d530707371c7b5b0f5af4e4174c4919f5 100644 (file)


--- a/src/include/catalog/pg_attribute.h
+++ b/src/include/catalog/pg_attribute.h
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pg_attribute.h,v 1.70 2001/03/22 04:00:37 momjian Exp $
+ * $Id: pg_attribute.h,v 1.71 2001/05/07 00:43:24 tgl Exp $
  *
  * NOTES
  *   the genbki.sh script reads this file and generates .bki
@@ -36,15 +36,14 @@
  *     typedef struct FormData_pg_attribute
  *
  *     If you change the following, make sure you change the structs for
- *     system attributes in heap.c and index.c also.
+ *     system attributes in catalog/heap.c also.
  * ----------------
  */
 CATALOG(pg_attribute) BOOTSTRAP
 {
    Oid         attrelid;       /* OID of relation containing this
                                 * attribute */
-   NameData    attname;
-   Oid         atttypid;
+   NameData    attname;        /* name of attribute */
 
    /*
     * atttypid is the OID of the instance in Catalog Class pg_type that
@@ -53,30 +52,20 @@ CATALOG(pg_attribute) BOOTSTRAP
     * attalign attributes of this instance, so they had better match or
     * Postgres will fail.
     */
-
-   float4      attdispersion;
+   Oid         atttypid;
 
    /*
-    * attdispersion is the dispersion statistic of the column (0.0 to
-    * 1.0), or zero if the statistic has not been calculated, or -1.0 if
-    * VACUUM found that the column contains no duplicate entries (in
-    * which case the dispersion should be taken as 1.0/numberOfRows for
-    * the current table size).  The -1.0 hack is useful because the
-    * number of rows may be updated more often than attdispersion is. We
-    * assume that the column will retain its no-duplicate-entry property.
-    * (Perhaps this should be driven off the existence of a UNIQUE index
-    * for the column, instead of being a statistical guess?)
+    * attstattarget is the target number of statistics datapoints to collect
+    * during VACUUM ANALYZE of this column.  A zero here indicates that we
+    * do not wish to collect any stats about this column.
     */
-
-   int2        attlen;
+   int4        attstattarget;
 
    /*
     * attlen is a copy of the typlen field from pg_type for this
-    * attribute.  See atttypid above.  See struct Form_pg_type for
-    * definition.
+    * attribute.  See atttypid comments above.
     */
-
-   int2        attnum;
+   int2        attlen;
 
    /*
     * attnum is the "attribute number" for the attribute:  A value that
@@ -91,10 +80,13 @@ CATALOG(pg_attribute) BOOTSTRAP
     *
     * Note that (attnum - 1) is often used as the index to an array.
     */
+   int2        attnum;
 
-   int4        attnelems;      /* number of dimensions, if an array type */
-
-   int4        attcacheoff;
+   /*
+    * attndims is the declared number of dimensions, if an array type,
+    * otherwise zero.
+    */
+   int4        attndims;
 
    /*
     * fastgetattr() uses attcacheoff to cache byte offsets of attributes
@@ -103,8 +95,7 @@ CATALOG(pg_attribute) BOOTSTRAP
     * tuple descriptor, we may then update attcacheoff in the copies.
     * This speeds up the attribute walking process.
     */
-
-   int4        atttypmod;
+   int4        attcacheoff;
 
    /*
     * atttypmod records type-specific data supplied at table creation
@@ -113,16 +104,13 @@ CATALOG(pg_attribute) BOOTSTRAP
     * argument. The value will generally be -1 for types that do not need
     * typmod.
     */
-
-   bool        attbyval;
+   int4        atttypmod;
 
    /*
     * attbyval is a copy of the typbyval field from pg_type for this
-    * attribute.  See atttypid above.  See struct Form_pg_type for
-    * definition.
+    * attribute.  See atttypid comments above.
     */
-
-   char        attstorage;
+   bool        attbyval;
 
    /*----------
     * attstorage tells for VARLENA attributes, what the heap access
@@ -137,30 +125,31 @@ CATALOG(pg_attribute) BOOTSTRAP
     * but only as a last resort ('e' and 'x' fields are moved first).
     *----------
     */
+   char        attstorage;
 
+   /* This flag indicates that the attribute is really a set */
    bool        attisset;
-   char        attalign;
 
    /*
     * attalign is a copy of the typalign field from pg_type for this
-    * attribute.  See atttypid above.  See struct Form_pg_type for
-    * definition.
+    * attribute.  See atttypid comments above.
     */
-
-   bool        attnotnull;
+   char        attalign;
 
    /* This flag represents the "NOT NULL" constraint */
-   bool        atthasdef;
+   bool        attnotnull;
 
    /* Has DEFAULT value or not */
+   bool        atthasdef;
 } FormData_pg_attribute;
 
 /*
  * someone should figure out how to do this properly. (The problem is
- * the size of the C struct is not the same as the size of the tuple.)
+ * the size of the C struct is not the same as the size of the tuple
+ * because of alignment padding at the end of the struct.)
  */
 #define ATTRIBUTE_TUPLE_SIZE \
-   (offsetof(FormData_pg_attribute,atthasdef) + sizeof(char))
+   (offsetof(FormData_pg_attribute,atthasdef) + sizeof(bool))
 
 /* ----------------
  *     Form_pg_attribute corresponds to a pointer to a tuple with
@@ -178,10 +167,10 @@ typedef FormData_pg_attribute *Form_pg_attribute;
 #define Anum_pg_attribute_attrelid     1
 #define Anum_pg_attribute_attname      2
 #define Anum_pg_attribute_atttypid     3
-#define Anum_pg_attribute_attdispersion 4
+#define Anum_pg_attribute_attstattarget 4
 #define Anum_pg_attribute_attlen       5
 #define Anum_pg_attribute_attnum       6
-#define Anum_pg_attribute_attnelems        7
+#define Anum_pg_attribute_attndims     7
 #define Anum_pg_attribute_attcacheoff  8
 #define Anum_pg_attribute_atttypmod        9
 #define Anum_pg_attribute_attbyval     10
@@ -206,6 +195,7 @@ typedef FormData_pg_attribute *Form_pg_attribute;
    (attribute)->attnotnull = false; \
    (attribute)->atthasdef = false;
 #endif  /* _DROP_COLUMN_HACK__ */
+
 /* ----------------
  *     SCHEMA_ macros for declaring hardcoded tuple descriptors.
  *     these are used in utils/cache/relcache.c
@@ -231,25 +221,25 @@ typedef FormData_pg_attribute *Form_pg_attribute;
  * ----------------
  */
 #define Schema_pg_type \
-{ 1247, {"typname"},      19, 0, NAMEDATALEN,  1, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typowner"},     23, 0,   4,  2, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typlen"},           21, 0,   2,  3, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1247, {"typprtlen"},    21, 0,   2,  4, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1247, {"typbyval"},     16, 0,   1,  5, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typtype"},      18, 0,   1,  6, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typisdefined"},  16, 0,  1,  7, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typdelim"},     18, 0,   1,  8, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typrelid"},     26, 0,   4,  9, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typelem"},      26, 0,   4, 10, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typinput"},     24, 0,   4, 11, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typoutput"},    24, 0,   4, 12, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typreceive"},    24, 0,  4, 13, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typsend"},      24, 0,   4, 14, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typalign"},     18, 0,   1, 15, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typstorage"},    18, 0,  1, 16, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typdefault"},    25, 0,  -1, 17, 0, -1, -1, '\0' , 'x', '\0', 'i', '\0', '\0' }
-
-DATA(insert OID = 0 ( 1247 typname         19 0 NAMEDATALEN   1 0 -1 -1 f p f i f f));
+{ 1247, {"typname"},      19, DEFAULT_ATTSTATTARGET, NAMEDATALEN,  1, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1247, {"typowner"},     23, 0,   4,  2, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typlen"},           21, 0,   2,  3, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1247, {"typprtlen"},    21, 0,   2,  4, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1247, {"typbyval"},     16, 0,   1,  5, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typtype"},      18, 0,   1,  6, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typisdefined"},  16, 0,  1,  7, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typdelim"},     18, 0,   1,  8, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typrelid"},     26, 0,   4,  9, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typelem"},      26, 0,   4, 10, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typinput"},     24, 0,   4, 11, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typoutput"},    24, 0,   4, 12, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typreceive"},    24, 0,  4, 13, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typsend"},      24, 0,   4, 14, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typalign"},     18, 0,   1, 15, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typstorage"},    18, 0,  1, 16, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typdefault"},    25, 0,  -1, 17, 0, -1, -1, false    , 'x', false, 'i', false, false }
+
+DATA(insert OID = 0 ( 1247 typname         19 DEFAULT_ATTSTATTARGET NAMEDATALEN   1 0 -1 -1 f p f i f f));
 DATA(insert OID = 0 ( 1247 typowner            23 0  4   2 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1247 typlen          21 0  2   3 0 -1 -1 t p f s f f));
 DATA(insert OID = 0 ( 1247 typprtlen       21 0  2   4 0 -1 -1 t p f s f f));
@@ -299,25 +289,25 @@ DATA(insert OID = 0 ( 1262 tableoid           26 0  4  -7 0 -1 -1 t p f i f f));
  * ----------------
  */
 #define Schema_pg_proc \
-{ 1255, {"proname"},           19, 0, NAMEDATALEN,  1, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"proowner"},          23, 0,  4,  2, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"prolang"},           26, 0,  4,  3, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"proisinh"},          16, 0,  1,  4, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1255, {"proistrusted"},      16, 0,  1,  5, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1255, {"proiscachable"},     16, 0,  1,  6, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1255, {"proisstrict"},       16, 0,  1,  7, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1255, {"pronargs"},          21, 0,  2,  8, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1255, {"proretset"},         16, 0,  1,  9, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1255, {"prorettype"},            26, 0,  4, 10, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"proargtypes"},       30, 0, INDEX_MAX_KEYS*4, 11, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"probyte_pct"},       23, 0,  4, 12, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"properbyte_cpu"},        23, 0,  4, 13, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"propercall_cpu"},        23, 0,  4, 14, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"prooutin_ratio"},        23, 0,  4, 15, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"prosrc"},                25, 0, -1, 16, 0, -1, -1, '\0', 'x', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"probin"},                17, 0, -1, 17, 0, -1, -1, '\0', 'x', '\0', 'i', '\0', '\0' }
-
-DATA(insert OID = 0 ( 1255 proname         19 0 NAMEDATALEN   1 0 -1 -1 f p f i f f));
+{ 1255, {"proname"},           19, DEFAULT_ATTSTATTARGET, NAMEDATALEN,  1, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1255, {"proowner"},          23, 0,  4,  2, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"prolang"},           26, 0,  4,  3, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"proisinh"},          16, 0,  1,  4, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1255, {"proistrusted"},      16, 0,  1,  5, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1255, {"proiscachable"},     16, 0,  1,  6, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1255, {"proisstrict"},       16, 0,  1,  7, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1255, {"pronargs"},          21, 0,  2,  8, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1255, {"proretset"},         16, 0,  1,  9, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1255, {"prorettype"},            26, 0,  4, 10, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"proargtypes"},       30, 0, INDEX_MAX_KEYS*4, 11, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1255, {"probyte_pct"},       23, 0,  4, 12, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"properbyte_cpu"},        23, 0,  4, 13, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"propercall_cpu"},        23, 0,  4, 14, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"prooutin_ratio"},        23, 0,  4, 15, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"prosrc"},                25, 0, -1, 16, 0, -1, -1, false, 'x', false, 'i', false, false }, \
+{ 1255, {"probin"},                17, 0, -1, 17, 0, -1, -1, false, 'x', false, 'i', false, false }
+
+DATA(insert OID = 0 ( 1255 proname         19 DEFAULT_ATTSTATTARGET NAMEDATALEN   1 0 -1 -1 f p f i f f));
 DATA(insert OID = 0 ( 1255 proowner            23 0  4   2 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1255 prolang         26 0  4   3 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1255 proisinh            16 0  1   4 0 -1 -1 t p f c f f));
@@ -346,8 +336,8 @@ DATA(insert OID = 0 ( 1255 tableoid         26 0  4  -7 0 -1 -1 t p f i f f));
  *     pg_shadow
  * ----------------
  */
-DATA(insert OID = 0 ( 1260 usename         19  0 NAMEDATALEN   1 0 -1 -1 f p f i f f));
-DATA(insert OID = 0 ( 1260 usesysid            23  0   4   2 0 -1 -1 t p f i f f));
+DATA(insert OID = 0 ( 1260 usename         19  DEFAULT_ATTSTATTARGET NAMEDATALEN   1 0 -1 -1 f p f i f f));
+DATA(insert OID = 0 ( 1260 usesysid            23  DEFAULT_ATTSTATTARGET   4   2 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1260 usecreatedb     16  0   1   3 0 -1 -1 t p f c f f));
 DATA(insert OID = 0 ( 1260 usetrace            16  0   1   4 0 -1 -1 t p f c f f));
 DATA(insert OID = 0 ( 1260 usesuper            16  0   1   5 0 -1 -1 t p f c f f));
@@ -366,8 +356,8 @@ DATA(insert OID = 0 ( 1260 tableoid         26 0  4  -7 0 -1 -1 t p f i f f));
  *     pg_group
  * ----------------
  */
-DATA(insert OID = 0 ( 1261 groname         19 0 NAMEDATALEN  1 0 -1 -1 f p f i f f));
-DATA(insert OID = 0 ( 1261 grosysid            23 0  4   2 0 -1 -1 t p f i f f));
+DATA(insert OID = 0 ( 1261 groname         19 DEFAULT_ATTSTATTARGET NAMEDATALEN  1 0 -1 -1 f p f i f f));
+DATA(insert OID = 0 ( 1261 grosysid            23 DEFAULT_ATTSTATTARGET  4   2 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1261 grolist       1007 0 -1   3 0 -1 -1 f x f i f f));
 DATA(insert OID = 0 ( 1261 ctid                27 0  6  -1 0 -1 -1 f p f i f f));
 DATA(insert OID = 0 ( 1261 oid             26 0  4  -2 0 -1 -1 t p f i f f));
@@ -382,29 +372,29 @@ DATA(insert OID = 0 ( 1261 tableoid           26 0  4  -7 0 -1 -1 t p f i f f));
  * ----------------
  */
 #define Schema_pg_attribute \
-{ 1249, {"attrelid"},    26, 0,    4,  1, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"attname"},     19, 0, NAMEDATALEN,   2, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"atttypid"},    26, 0,    4,  3, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"attdispersion"}, 700, 0, 4,  4, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"attlen"},          21, 0,    2,  5, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1249, {"attnum"},          21, 0,    2,  6, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1249, {"attnelems"},   23, 0,    4,  7, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"attcacheoff"},  23, 0,   4,  8, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"atttypmod"},   23, 0,    4,  9, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"attbyval"},    16, 0,    1, 10, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1249, {"attstorage"},   18, 0,   1, 11, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1249, {"attisset"},    16, 0,    1, 12, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1249, {"attalign"},    18, 0,    1, 13, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1249, {"attnotnull"},  16, 0, 1, 14, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1249, {"atthasdef"},  16, 0, 1, 15, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }
-
-DATA(insert OID = 0 ( 1249 attrelid            26 0  4   1 0 -1 -1 t p f i f f));
-DATA(insert OID = 0 ( 1249 attname         19 0 NAMEDATALEN  2 0 -1 -1 f p f i f f));
+{ 1249, {"attrelid"},    26, DEFAULT_ATTSTATTARGET,    4,  1, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"attname"},     19, DEFAULT_ATTSTATTARGET, NAMEDATALEN,   2, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1249, {"atttypid"},    26, 0,    4,  3, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"attstattarget"}, 23, 0,  4,  4, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"attlen"},          21, 0,    2,  5, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1249, {"attnum"},          21, 0,    2,  6, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1249, {"attndims"},    23, 0,    4,  7, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"attcacheoff"},  23, 0,   4,  8, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"atttypmod"},   23, 0,    4,  9, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"attbyval"},    16, 0,    1, 10, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1249, {"attstorage"},   18, 0,   1, 11, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1249, {"attisset"},    16, 0,    1, 12, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1249, {"attalign"},    18, 0,    1, 13, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1249, {"attnotnull"},  16, 0, 1, 14, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1249, {"atthasdef"},  16, 0, 1, 15, 0, -1, -1, true, 'p', false, 'c', false, false }
+
+DATA(insert OID = 0 ( 1249 attrelid            26 DEFAULT_ATTSTATTARGET  4   1 0 -1 -1 t p f i f f));
+DATA(insert OID = 0 ( 1249 attname         19 DEFAULT_ATTSTATTARGET NAMEDATALEN  2 0 -1 -1 f p f i f f));
 DATA(insert OID = 0 ( 1249 atttypid            26 0  4   3 0 -1 -1 t p f i f f));
-DATA(insert OID = 0 ( 1249 attdispersion   700 0  4   4 0 -1 -1 f p f i f f));
+DATA(insert OID = 0 ( 1249 attstattarget   23 0  4   4 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1249 attlen          21 0  2   5 0 -1 -1 t p f s f f));
 DATA(insert OID = 0 ( 1249 attnum          21 0  2   6 0 -1 -1 t p f s f f));
-DATA(insert OID = 0 ( 1249 attnelems       23 0  4   7 0 -1 -1 t p f i f f));
+DATA(insert OID = 0 ( 1249 attndims            23 0  4   7 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1249 attcacheoff     23 0  4   8 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1249 atttypmod       23 0  4   9 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1249 attbyval            16 0  1  10 0 -1 -1 t p f c f f));
@@ -426,36 +416,36 @@ DATA(insert OID = 0 ( 1249 tableoid           26 0  4  -7 0 -1 -1 t p f i f f));
  * ----------------
  */
 #define Schema_pg_class \
-{ 1259, {"relname"},      19, 0, NAMEDATALEN,  1, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"reltype"},      26, 0,   4,  2, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"relowner"},     23, 0,   4,  3, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"relam"},        26, 0,   4,  4, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"relfilenode"},   26, 0,  4,  5, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"relpages"},     23, 0,   4,  6, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"reltuples"},    23, 0,   4,  7, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"reltoastrelid"}, 26, 0,  4,  8, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"reltoastidxid"}, 26, 0,  4,  9, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"relhasindex"},   16, 0,  1, 10, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relisshared"},   16, 0,  1, 11, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relkind"},      18, 0,   1, 12, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relnatts"},     21, 0,   2, 13, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"relchecks"},    21, 0,   2, 14, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"reltriggers"},   21, 0,  2, 15, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"relukeys"},     21, 0,   2, 16, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"relfkeys"},     21, 0,   2, 17, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"relrefs"},      21, 0,   2, 18, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"relhaspkey"},    16, 0,  1, 19, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relhasrules"},   16, 0,  1, 20, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relhassubclass"},16, 0,  1, 21, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relacl"},         1034, 0,  -1, 22, 0, -1, -1,   '\0', 'x', '\0', 'i', '\0', '\0' }
-
-DATA(insert OID = 0 ( 1259 relname         19 0 NAMEDATALEN   1 0 -1 -1 f p f i f f));
+{ 1259, {"relname"},      19, DEFAULT_ATTSTATTARGET, NAMEDATALEN,  1, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1259, {"reltype"},      26, 0,   4,  2, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"relowner"},     23, 0,   4,  3, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"relam"},        26, 0,   4,  4, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"relfilenode"},   26, 0,  4,  5, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"relpages"},     23, 0,   4,  6, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"reltuples"},    700, 0,  4,  7, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1259, {"reltoastrelid"}, 26, 0,  4,  8, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"reltoastidxid"}, 26, 0,  4,  9, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"relhasindex"},   16, 0,  1, 10, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relisshared"},   16, 0,  1, 11, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relkind"},      18, 0,   1, 12, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relnatts"},     21, 0,   2, 13, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"relchecks"},    21, 0,   2, 14, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"reltriggers"},   21, 0,  2, 15, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"relukeys"},     21, 0,   2, 16, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"relfkeys"},     21, 0,   2, 17, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"relrefs"},      21, 0,   2, 18, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"relhaspkey"},    16, 0,  1, 19, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relhasrules"},   16, 0,  1, 20, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relhassubclass"},16, 0,  1, 21, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relacl"},         1034, 0,  -1, 22, 0, -1, -1,   false, 'x', false, 'i', false, false }
+
+DATA(insert OID = 0 ( 1259 relname         19 DEFAULT_ATTSTATTARGET NAMEDATALEN   1 0 -1 -1 f p f i f f));
 DATA(insert OID = 0 ( 1259 reltype         26 0  4   2 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1259 relowner            23 0  4   3 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1259 relam           26 0  4   4 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1259 relfilenode     26 0  4   5 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1259 relpages            23 0  4   6 0 -1 -1 t p f i f f));
-DATA(insert OID = 0 ( 1259 reltuples       23 0  4   7 0 -1 -1 t p f i f f));
+DATA(insert OID = 0 ( 1259 reltuples      700 0  4   7 0 -1 -1 f p f i f f));
 DATA(insert OID = 0 ( 1259 reltoastrelid   26 0  4   8 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1259 reltoastidxid   26 0  4   9 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1259 relhasindex     16 0  1  10 0 -1 -1 t p f c f f));
@@ -544,7 +534,7 @@ DATA(insert OID = 0 ( 1219 tableoid         26 0  4  -7 0 -1 -1 t p f i f f));
  * ----------------
  */
 #define Schema_pg_variable \
-{ 1264, {"varfoo"},  26, 0, 4, 1, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }
+{ 1264, {"varfoo"},  26, 0, 4, 1, 0, -1, -1, true, 'p', false, 'i', false, false }
 
 DATA(insert OID = 0 ( 1264 varfoo          26 0  4   1 0 -1 -1 t p f i f f));
 
@@ -555,7 +545,7 @@ DATA(insert OID = 0 ( 1264 varfoo           26 0  4   1 0 -1 -1 t p f i f f));
  * ----------------
  */
 #define Schema_pg_log \
-{ 1269, {"logfoo"},  26, 0, 4, 1, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }
+{ 1269, {"logfoo"},  26, 0, 4, 1, 0, -1, -1, true, 'p', false, 'i', false, false }
 
 DATA(insert OID = 0 ( 1269 logfoo          26 0  4   1 0 -1 -1 t p f i f f));
 
@@ -566,7 +556,7 @@ DATA(insert OID = 0 ( 1269 logfoo           26 0  4   1 0 -1 -1 t p f i f f));
  * ----------------
  */
 #define Schema_pg_xactlock \
-{ 376, {"xactlockfoo"},  26, 0, 4, 1, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }
+{ 376, {"xactlockfoo"},  26, 0, 4, 1, 0, -1, -1, true, 'p', false, 'i', false, false }
 
 DATA(insert OID = 0 ( 376 xactlockfoo      26 0  4   1 0 -1 -1 t p f i f f));
 


diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h

index 81e75e14b6a7dc7372f9dcd6808f824944f5f028..86de88cc9b662fe5c65f43301e2a28a247bf69ee 100644 (file)


--- a/src/include/catalog/pg_class.h
+++ b/src/include/catalog/pg_class.h
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pg_class.h,v 1.47 2001/03/22 04:00:38 momjian Exp $
+ * $Id: pg_class.h,v 1.48 2001/05/07 00:43:25 tgl Exp $
  *
  * NOTES
  *   the genbki.sh script reads this file and generates .bki
@@ -52,7 +52,7 @@ CATALOG(pg_class) BOOTSTRAP
    Oid         relam;
    Oid         relfilenode;
    int4        relpages;
-   int4        reltuples;
+   float4      reltuples;
    Oid         reltoastrelid;
    Oid         reltoastidxid;
    bool        relhasindex;


diff --git a/src/include/catalog/pg_statistic.h b/src/include/catalog/pg_statistic.h

index 2f39bea3245e1772984b1b3b4fca0dbb36f41c1d..8d6a6b37c16ac513468f052508aadf91a034ff85 100644 (file)


--- a/src/include/catalog/pg_statistic.h
+++ b/src/include/catalog/pg_statistic.h
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pg_statistic.h,v 1.10 2001/01/24 19:43:22 momjian Exp $
+ * $Id: pg_statistic.h,v 1.11 2001/05/07 00:43:25 tgl Exp $
  *
  * NOTES
  *   the genbki.sh script reads this file and generates .bki
@@ -36,40 +36,91 @@ CATALOG(pg_statistic)
    /* These fields form the unique key for the entry: */
    Oid         starelid;       /* relation containing attribute */
    int2        staattnum;      /* attribute (column) stats are for */
-   Oid         staop;          /* '<' comparison op used for lo/hi vals */
+
+   /* the fraction of the column's entries that are NULL: */
+   float4      stanullfrac;
 
    /*
-    * Note: the current VACUUM code will never produce more than one
-    * entry per column, but in theory there could be multiple entries if
-    * a datatype has more than one useful ordering operator.  Also, the
-    * current code will not write an entry unless it found at least one
-    * non-NULL value in the column; so the remaining fields will never be
-    * NULL.
+    * stawidth is the average width in bytes of non-null entries.  For
+    * fixed-width datatypes this is of course the same as the typlen, but
+    * for varlena types it is more useful.  Note that this is the average
+    * width of the data as actually stored, post-TOASTing (eg, for a
+    * moved-out-of-line value, only the size of the pointer object is
+    * counted).  This is the appropriate definition for the primary use of
+    * the statistic, which is to estimate sizes of in-memory hash tables of
+    * tuples.
+    */
+   int4        stawidth;
+
+   /* ----------------
+    * stadistinct indicates the (approximate) number of distinct non-null
+    * data values in the column.  The interpretation is:
+    *      0       unknown or not computed
+    *      > 0     actual number of distinct values
+    *      < 0     negative of multiplier for number of rows
+    * The special negative case allows us to cope with columns that are
+    * unique (stadistinct = -1) or nearly so (for example, a column in
+    * which values appear about twice on the average could be represented
+    * by stadistinct = -0.5).  Because the number-of-rows statistic in
+    * pg_class may be updated more frequently than pg_statistic is, it's
+    * important to be able to describe such situations as a multiple of
+    * the number of rows, rather than a fixed number of distinct values.
+    * But in other cases a fixed number is correct (eg, a boolean column).
+    * ----------------
+    */
+   float4      stadistinct;
+
+   /* ----------------
+    * To allow keeping statistics on different kinds of datatypes,
+    * we do not hard-wire any particular meaning for the remaining
+    * statistical fields.  Instead, we provide several "slots" in which
+    * statistical data can be placed.  Each slot includes:
+    *      kind            integer code identifying kind of data
+    *      op              OID of associated operator, if needed
+    *      numbers         float4 array (for statistical values)
+    *      values          text array (for representations of data values)
+    * The ID and operator fields are never NULL; they are zeroes in an
+    * unused slot.  The numbers and values fields are NULL in an unused
+    * slot, and might also be NULL in a used slot if the slot kind has
+    * no need for one or the other.
+    * ----------------
     */
 
+   int2        stakind1;
+   int2        stakind2;
+   int2        stakind3;
+   int2        stakind4;
+
+   Oid         staop1;
+   Oid         staop2;
+   Oid         staop3;
+   Oid         staop4;
+
    /*
-    * These fields contain the stats about the column indicated by the
-    * key
+    * THE REST OF THESE ARE VARIABLE LENGTH FIELDS, and may even be absent
+    * (NULL). They cannot be accessed as C struct entries; you have to use
+    * the full field access machinery (heap_getattr) for them.  We declare
+    * them here for the catalog machinery.
     */
-   float4      stanullfrac;    /* the fraction of the entries that are
-                                * NULL */
-   float4      stacommonfrac;  /* the fraction that are the most common
-                                * val */
+
+   float4      stanumbers1[1];
+   float4      stanumbers2[1];
+   float4      stanumbers3[1];
+   float4      stanumbers4[1];
 
    /*
-    * THE REST OF THESE ARE VARIABLE LENGTH FIELDS. They cannot be
-    * accessed as C struct entries; you have to use the full field access
-    * machinery (heap_getattr) for them.
-    *
-    * All three of these are text representations of data values of the
-    * column's data type.  To re-create the actual Datum, do
-    * datatypein(textout(givenvalue)).
+    * Values in these text arrays are external representations of values
+    * of the column's data type.  To re-create the actual Datum, do
+    * datatypein(textout(arrayelement)).
     */
-   text        stacommonval;   /* most common non-null value in column */
-   text        staloval;       /* smallest non-null value in column */
-   text        stahival;       /* largest non-null value in column */
+   text        stavalues1[1];
+   text        stavalues2[1];
+   text        stavalues3[1];
+   text        stavalues4[1];
 } FormData_pg_statistic;
 
+#define STATISTIC_NUM_SLOTS  4
+
 /* ----------------
  *     Form_pg_statistic corresponds to a pointer to a tuple with
  *     the format of pg_statistic relation.
@@ -81,14 +132,78 @@ typedef FormData_pg_statistic *Form_pg_statistic;
  *     compiler constants for pg_statistic
  * ----------------
  */
-#define Natts_pg_statistic             8
+#define Natts_pg_statistic             21
 #define Anum_pg_statistic_starelid     1
 #define Anum_pg_statistic_staattnum        2
-#define Anum_pg_statistic_staop            3
-#define Anum_pg_statistic_stanullfrac  4
-#define Anum_pg_statistic_stacommonfrac 5
-#define Anum_pg_statistic_stacommonval 6
-#define Anum_pg_statistic_staloval     7
-#define Anum_pg_statistic_stahival     8
+#define Anum_pg_statistic_stanullfrac  3
+#define Anum_pg_statistic_stawidth     4
+#define Anum_pg_statistic_stadistinct  5
+#define Anum_pg_statistic_stakind1     6
+#define Anum_pg_statistic_stakind2     7
+#define Anum_pg_statistic_stakind3     8
+#define Anum_pg_statistic_stakind4     9
+#define Anum_pg_statistic_staop1       10
+#define Anum_pg_statistic_staop2       11
+#define Anum_pg_statistic_staop3       12
+#define Anum_pg_statistic_staop4       13
+#define Anum_pg_statistic_stanumbers1  14
+#define Anum_pg_statistic_stanumbers2  15
+#define Anum_pg_statistic_stanumbers3  16
+#define Anum_pg_statistic_stanumbers4  17
+#define Anum_pg_statistic_stavalues1   18
+#define Anum_pg_statistic_stavalues2   19
+#define Anum_pg_statistic_stavalues3   20
+#define Anum_pg_statistic_stavalues4   21
+
+/*
+ * Currently, three statistical slot "kinds" are defined: most common values,
+ * histogram, and correlation.  Additional "kinds" will probably appear in
+ * future to help cope with non-scalar datatypes.
+ *
+ * Code reading the pg_statistic relation should not assume that a particular
+ * data "kind" will appear in any particular slot.  Instead, search the
+ * stakind fields to see if the desired data is available.
+ */
+
+/*
+ * In a "most common values" slot, staop is the OID of the "=" operator
+ * used to decide whether values are the same or not.  stavalues contains
+ * the K most common non-null values appearing in the column, and stanumbers
+ * contains their frequencies (fractions of total row count).  The values
+ * shall be ordered in decreasing frequency.  Note that since the arrays are
+ * variable-size, K may be chosen by the statistics collector.  Values should
+ * not appear in MCV unless they have been observed to occur more than once;
+ * a unique column will have no MCV slot.
+ */
+#define STATISTIC_KIND_MCV  1
+
+/*
+ * A "histogram" slot describes the distribution of scalar data.  staop is
+ * the OID of the "<" operator that describes the sort ordering.  (In theory,
+ * more than one histogram could appear, if a datatype has more than one
+ * useful sort operator.)  stavalues contains M (>=2) non-null values that
+ * divide the non-null column data values into M-1 bins of approximately equal
+ * population.  The first stavalues item is the MIN and the last is the MAX.
+ * stanumbers is not used and should be NULL.  IMPORTANT POINT: if an MCV
+ * slot is also provided, then the histogram describes the data distribution
+ * *after removing the values listed in MCV* (thus, it's a "compressed
+ * histogram" in the technical parlance).  This allows a more accurate
+ * representation of the distribution of a column with some very-common
+ * values.  In a column with only a few distinct values, it's possible that
+ * the MCV list describes the entire data population; in this case the
+ * histogram reduces to empty and should be omitted.
+ */
+#define STATISTIC_KIND_HISTOGRAM  2
+
+/*
+ * A "correlation" slot describes the correlation between the physical order
+ * of table tuples and the ordering of data values of this column, as seen
+ * by the "<" operator identified by staop.  (As with the histogram, more
+ * than one entry could theoretically appear.)  stavalues is not used and
+ * should be NULL.  stanumbers contains a single entry, the correlation
+ * coefficient between the sequence of data values and the sequence of
+ * their actual tuple positions.  The coefficient ranges from +1 to -1.
+ */
+#define STATISTIC_KIND_CORRELATION  3
 
 #endif  /* PG_STATISTIC_H */


diff --git a/src/include/commands/command.h b/src/include/commands/command.h

index 8b108451d2accff7969f55e6972ad389551829a1..7eb1a4fab846aeff33b3f5cca4f60b9c4c3b5fb5 100644 (file)


--- a/src/include/commands/command.h
+++ b/src/include/commands/command.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: command.h,v 1.26 2001/03/22 04:00:41 momjian Exp $
+ * $Id: command.h,v 1.27 2001/05/07 00:43:25 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -43,9 +43,13 @@ extern void PortalCleanup(Portal portal);
 extern void AlterTableAddColumn(const char *relationName,
                    bool inh, ColumnDef *colDef);
 
-extern void AlterTableAlterColumn(const char *relationName,
-                     bool inh, const char *colName,
-                     Node *newDefault);
+extern void AlterTableAlterColumnDefault(const char *relationName,
+                                        bool inh, const char *colName,
+                                        Node *newDefault);
+
+extern void AlterTableAlterColumnStatistics(const char *relationName,
+                                           bool inh, const char *colName,
+                                           Node *statsTarget);
 
 extern void AlterTableDropColumn(const char *relationName,
                     bool inh, const char *colName,


diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h

index d82d22fcdfcbe3482ed5dbf1b66bf52b607767c3..87bb0007aa067dcbfbe15d31cccfbe00f61df460 100644 (file)


--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -1,129 +1,27 @@
 /*-------------------------------------------------------------------------
  *
  * vacuum.h
- *   header file for postgres vacuum cleaner
+ *   header file for postgres vacuum cleaner and statistics analyzer
  *
  *
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: vacuum.h,v 1.34 2001/03/22 04:00:43 momjian Exp $
+ * $Id: vacuum.h,v 1.35 2001/05/07 00:43:25 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #ifndef VACUUM_H
 #define VACUUM_H
 
-#include "catalog/pg_attribute.h"
-#include "catalog/pg_index.h"
-#include "fmgr.h"
-#include "nodes/pg_list.h"
-#include "storage/itemptr.h"
+#include "nodes/parsenodes.h"
 
 
-typedef struct VAttListData
-{
-   int         val_dummy;
-   struct VAttListData *val_next;
-} VAttListData;
-
-typedef VAttListData *VAttList;
-
-typedef struct VacPageData
-{
-   BlockNumber blkno;          /* BlockNumber of this Page */
-   Size        free;           /* FreeSpace on this Page */
-   uint16      offsets_used;   /* Number of OffNums used by vacuum */
-   uint16      offsets_free;   /* Number of OffNums free or to be free */
-   OffsetNumber offsets[1];    /* Array of its OffNums */
-} VacPageData;
-
-typedef VacPageData *VacPage;
-
-typedef struct VacPageListData
-{
-   int         empty_end_pages;/* Number of "empty" end-pages */
-   int         num_pages;      /* Number of pages in pagedesc */
-   int         num_allocated_pages;    /* Number of allocated pages in
-                                        * pagedesc */
-   VacPage    *pagedesc;       /* Descriptions of pages */
-} VacPageListData;
-
-typedef VacPageListData *VacPageList;
-
-typedef struct
-{
-   Form_pg_attribute attr;
-   Datum       best,
-               guess1,
-               guess2,
-               max,
-               min;
-   int         best_len,
-               guess1_len,
-               guess2_len,
-               max_len,
-               min_len;
-   long        best_cnt,
-               guess1_cnt,
-               guess1_hits,
-               guess2_hits,
-               null_cnt,
-               nonnull_cnt,
-               max_cnt,
-               min_cnt;
-   FmgrInfo    f_cmpeq,
-               f_cmplt,
-               f_cmpgt;
-   Oid         op_cmplt;
-   regproc     outfunc;
-   Oid         typelem;
-   bool        initialized;
-} VacAttrStats;
-
-typedef struct VRelListData
-{
-   Oid         vrl_relid;
-   struct VRelListData *vrl_next;
-} VRelListData;
-
-typedef VRelListData *VRelList;
-
-typedef struct VTupleLinkData
-{
-   ItemPointerData new_tid;
-   ItemPointerData this_tid;
-} VTupleLinkData;
-
-typedef VTupleLinkData *VTupleLink;
-
-typedef struct VTupleMoveData
-{
-   ItemPointerData tid;        /* tuple ID */
-   VacPage     vacpage;        /* where to move */
-   bool        cleanVpd;       /* clean vacpage before using */
-} VTupleMoveData;
-
-typedef VTupleMoveData *VTupleMove;
-
-typedef struct VRelStats
-{
-   Oid         relid;
-   int         num_tuples;
-   int         num_pages;
-   Size        min_tlen;
-   Size        max_tlen;
-   bool        hasindex;
-   int         num_vtlinks;
-   VTupleLink  vtlinks;
-} VRelStats;
-
-extern bool VacuumRunning;
-
-extern void vc_abort(void);
-extern void vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols);
-extern void analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL);
-
-#define ATTNVALS_SCALE 1000000000      /* XXX so it can act as a float4 */
+/* in commands/vacuum.c */
+extern void vacuum(VacuumStmt *vacstmt);
+extern void vac_update_relstats(Oid relid, long num_pages, double num_tuples,
+                               bool hasindex);
+/* in commands/analyze.c */
+extern void analyze_rel(Oid relid, VacuumStmt *vacstmt);
 
 #endif  /* VACUUM_H */


diff --git a/src/include/config.h.in b/src/include/config.h.in

index 0d989dbbb3155bfaa218fce2d6181c45921191de..01593a4ce963a05484b025e5206f27d8b2bd952b 100644 (file)


--- a/src/include/config.h.in
+++ b/src/include/config.h.in
@@ -8,7 +8,7 @@
  * or in config.h afterwards.  Of course, if you edit config.h, then your
  * changes will be overwritten the next time you run configure.
  *
- * $Id: config.h.in,v 1.162 2001/04/14 22:55:02 petere Exp $
+ * $Id: config.h.in,v 1.163 2001/05/07 00:43:25 tgl Exp $
  */
 
 #ifndef CONFIG_H
@@ -156,6 +156,11 @@
 #define INDEX_MAX_KEYS     16
 #define FUNC_MAX_ARGS      INDEX_MAX_KEYS
 
+/*
+ * System default value for pg_attribute.attstattarget
+ */
+#define DEFAULT_ATTSTATTARGET  10
+
 /*
  * Define this to make libpgtcl's "pg_result -assign" command process C-style
  * backslash sequences in returned tuple data and convert Postgres array


diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h

index 2cf9378cf116426106be2cba0bb29d970e561c09..0967bef24ba9437360c5142ffc6f770107c9aa5a 100644 (file)


--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: execnodes.h,v 1.57 2001/03/22 04:00:50 momjian Exp $
+ * $Id: execnodes.h,v 1.58 2001/05/07 00:43:25 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -628,7 +628,6 @@ typedef struct GroupState
  *  SortState information
  *
  *     sort_Done       indicates whether sort has been performed yet
- *     sort_Keys       scan key structures describing the sort keys
  *     tuplesortstate  private state of tuplesort.c
  * ----------------
  */
@@ -636,7 +635,6 @@ typedef struct SortState
 {
    CommonScanState csstate;    /* its first field is NodeTag */
    bool        sort_Done;
-   ScanKey     sort_Keys;
    void       *tuplesortstate;
 } SortState;
 


diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h

index 1614d787bcb8d0ceac119c020b51ae18ffebd013..63b1b1046a8e71675ed81102c38134886a45f0bc 100644 (file)


--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: parsenodes.h,v 1.126 2001/03/23 04:49:56 momjian Exp $
+ * $Id: parsenodes.h,v 1.127 2001/05/07 00:43:25 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -118,11 +118,12 @@ typedef struct AlterTableStmt
    NodeTag     type;
    char        subtype;        /*------------
                                 *  A = add column
-                                *  T = alter column
+                                *  T = alter column default
+                                *  S = alter column statistics
                                 *  D = drop column
                                 *  C = add constraint
                                 *  X = drop constraint
-                                *  E = add toast table,
+                                *  E = create toast table
                                 *  U = change owner
                                 *------------
                                 */
@@ -690,16 +691,20 @@ typedef struct ClusterStmt
 } ClusterStmt;
 
 /* ----------------------
- *     Vacuum Statement
+ *     Vacuum and Analyze Statements
+ *
+ * Even though these are nominally two statements, it's convenient to use
+ * just one node type for both.
  * ----------------------
  */
 typedef struct VacuumStmt
 {
    NodeTag     type;
-   bool        verbose;        /* print status info */
-   bool        analyze;        /* analyze data */
-   char       *vacrel;         /* table to vacuum */
-   List       *va_spec;        /* columns to analyse */
+   bool        vacuum;         /* do VACUUM step */
+   bool        analyze;        /* do ANALYZE step */
+   bool        verbose;        /* print progress info */
+   char       *vacrel;         /* name of single table to process, or NULL */
+   List       *va_cols;        /* list of column names, or NIL for all */
 } VacuumStmt;
 
 /* ----------------------


diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h

index 3ae8e09f57a30468fdece0f7fe9098a3ca05653f..9e69ed60992a7b7307fcc79150eccd7a6f62f963 100644 (file)


--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -10,7 +10,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: primnodes.h,v 1.53 2001/03/22 04:00:52 momjian Exp $
+ * $Id: primnodes.h,v 1.54 2001/05/07 00:43:25 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -45,8 +45,8 @@ typedef struct FunctionCache *FunctionCachePtr;
  * reskey and reskeyop are the execution-time representation of sorting.
  * reskey must be zero in any non-sort-key item.  The reskey of sort key
  * targetlist items for a sort plan node is 1,2,...,n for the n sort keys.
- * The reskeyop of each such targetlist item is the sort operator's
- * regproc OID.  reskeyop will be zero in non-sort-key items.
+ * The reskeyop of each such targetlist item is the sort operator's OID.
+ * reskeyop will be zero in non-sort-key items.
  *
  * Both reskey and reskeyop are typically zero during parse/plan stages.
  * The executor does not pay any attention to ressortgroupref.
@@ -62,7 +62,7 @@ typedef struct Resdom
    Index       ressortgroupref;
    /* nonzero if referenced by a sort/group clause */
    Index       reskey;         /* order of key in a sort (for those > 0) */
-   Oid         reskeyop;       /* sort operator's regproc Oid */
+   Oid         reskeyop;       /* sort operator's Oid */
    bool        resjunk;        /* set to true to eliminate the attribute
                                 * from final target list */
 } Resdom;


diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h

index f643ef879689640186250b344d4734f80aa6dc49..c76d9b4af7136f23fdc022f53127925129760519 100644 (file)


--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: relation.h,v 1.54 2001/03/22 04:00:53 momjian Exp $
+ * $Id: relation.h,v 1.55 2001/05/07 00:43:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -512,8 +512,8 @@ typedef struct RestrictInfo
    Oid         hashjoinoperator;       /* copy of clause operator */
 
    /* cache space for hashclause processing; -1 if not yet set */
-   Selectivity left_dispersion;/* dispersion of left side */
-   Selectivity right_dispersion;       /* dispersion of right side */
+   Selectivity left_bucketsize;        /* avg bucketsize of left side */
+   Selectivity right_bucketsize;       /* avg bucketsize of right side */
 } RestrictInfo;
 
 /*


diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h

index 5caa576f0c0be002c3e1bc88a7ff75746f5c45b4..cbf6df063a3cc4ae782cab805acaaf80b9d2025f 100644 (file)


--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: cost.h,v 1.38 2001/02/16 00:03:05 tgl Exp $
+ * $Id: cost.h,v 1.39 2001/05/07 00:43:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -64,7 +64,8 @@ extern void cost_mergejoin(Path *path, Path *outer_path, Path *inner_path,
               List *restrictlist,
               List *outersortkeys, List *innersortkeys);
 extern void cost_hashjoin(Path *path, Path *outer_path, Path *inner_path,
-             List *restrictlist, Selectivity innerdispersion);
+             List *restrictlist, Selectivity innerbucketsize);
+extern Selectivity estimate_hash_bucketsize(Query *root, Var *var);
 extern Cost cost_qual_eval(List *quals);
 extern void set_baserel_size_estimates(Query *root, RelOptInfo *rel);
 extern void set_joinrel_size_estimates(Query *root, RelOptInfo *rel,


diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h

index 5b71eded86fcac8f21a5732ef81d8906fd9263a3..0839feb4b2fe5c0d137a7705469acb3814779181 100644 (file)


--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pathnode.h,v 1.35 2001/03/22 04:00:54 momjian Exp $
+ * $Id: pathnode.h,v 1.36 2001/05/07 00:43:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -59,7 +59,7 @@ extern HashPath *create_hashjoin_path(RelOptInfo *joinrel,
                     Path *inner_path,
                     List *restrict_clauses,
                     List *hashclauses,
-                    Selectivity innerdispersion);
+                    Selectivity innerbucketsize);
 
 /*
  * prototypes for relnode.c


diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h

index f1c4aff1c804172da17b24a438551c0b631c98c0..6b35deed2867649e350da0c081a983eb0dec5821 100644 (file)


--- a/src/include/utils/lsyscache.h
+++ b/src/include/utils/lsyscache.h
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: lsyscache.h,v 1.30 2001/03/22 04:01:13 momjian Exp $
+ * $Id: lsyscache.h,v 1.31 2001/05/07 00:43:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -21,8 +21,6 @@ extern AttrNumber get_attnum(Oid relid, char *attname);
 extern Oid get_atttype(Oid relid, AttrNumber attnum);
 extern bool get_attisset(Oid relid, char *attname);
 extern int32 get_atttypmod(Oid relid, AttrNumber attnum);
-extern double get_attdispersion(Oid relid, AttrNumber attnum,
-                 double min_estimate);
 extern RegProcedure get_opcode(Oid opno);
 extern char *get_opname(Oid opno);
 extern bool op_mergejoinable(Oid opno, Oid ltype, Oid rtype,
@@ -41,6 +39,14 @@ extern bool get_typbyval(Oid typid);
 extern void get_typlenbyval(Oid typid, int16 *typlen, bool *typbyval);
 extern char get_typstorage(Oid typid);
 extern Datum get_typdefault(Oid typid);
+extern bool get_attstatsslot(HeapTuple statstuple,
+                            Oid atttype, int32 atttypmod,
+                            int reqkind, Oid reqop,
+                            Datum **values, int *nvalues,
+                            float4 **numbers, int *nnumbers);
+extern void free_attstatsslot(Oid atttype,
+                             Datum *values, int nvalues,
+                             float4 *numbers, int nnumbers);
 
 #define TypeIsToastable(typid) (get_typstorage(typid) != 'p')
 


diff --git a/src/include/utils/syscache.h b/src/include/utils/syscache.h

index 8d4e2ae62c419658f44ec3f1adb9853a658ea2c6..342f7bf8a566b73e4f8393553ccb332ed067ed06 100644 (file)


--- a/src/include/utils/syscache.h
+++ b/src/include/utils/syscache.h
@@ -9,7 +9,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: syscache.h,v 1.29 2001/03/22 04:01:14 momjian Exp $
+ * $Id: syscache.h,v 1.30 2001/05/07 00:43:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -53,7 +53,7 @@
 #define RULEOID            22
 #define SHADOWNAME     23
 #define SHADOWSYSID        24
-#define STATRELID      25
+#define STATRELATT     25
 #define TYPENAME       26
 #define TYPEOID            27
 


diff --git a/src/include/utils/tuplesort.h b/src/include/utils/tuplesort.h

index 7f273776c36a26cc1e6b688b4a530f74a7c108f2..001761796e2492781d98aec7c8b311b4538e251a 100644 (file)


--- a/src/include/utils/tuplesort.h
+++ b/src/include/utils/tuplesort.h
@@ -13,7 +13,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: tuplesort.h,v 1.6 2001/01/24 19:43:29 momjian Exp $
+ * $Id: tuplesort.h,v 1.7 2001/05/07 00:43:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -36,8 +36,9 @@ typedef struct Tuplesortstate Tuplesortstate;
  */
 
 extern Tuplesortstate *tuplesort_begin_heap(TupleDesc tupDesc,
-                    int nkeys, ScanKey keys,
-                    bool randomAccess);
+                     int nkeys,
+                     Oid *sortOperators, AttrNumber *attNums,
+                     bool randomAccess);
 extern Tuplesortstate *tuplesort_begin_index(Relation indexRel,
                      bool enforceUnique,
                      bool randomAccess);
@@ -75,4 +76,19 @@ extern void tuplesort_rescan(Tuplesortstate *state);
 extern void tuplesort_markpos(Tuplesortstate *state);
 extern void tuplesort_restorepos(Tuplesortstate *state);
 
+/*
+ * This routine selects an appropriate sorting function to implement
+ * a sort operator as efficiently as possible.
+ */
+typedef enum
+{
+   SORTFUNC_LT,                /* raw "<" operator */
+   SORTFUNC_CMP,               /* -1 / 0 / 1 three-way comparator */
+   SORTFUNC_REVCMP             /* 1 / 0 / -1 (reversed) 3-way comparator */
+} SortFunctionKind;
+
+extern void SelectSortFunction(Oid sortOperator,
+                              RegProcedure *sortFunction,
+                              SortFunctionKind *kind);
+
 #endif  /* TUPLESORT_H */


diff --git a/src/interfaces/ecpg/preproc/keywords.c b/src/interfaces/ecpg/preproc/keywords.c

index 5614a34b0fef7390ba8ec0a4184fea10da5e7d69..c03880f497d0d62526a94157175fede654376f28 100644 (file)


--- a/src/interfaces/ecpg/preproc/keywords.c
+++ b/src/interfaces/ecpg/preproc/keywords.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/interfaces/ecpg/preproc/keywords.c,v 1.39 2001/03/22 04:01:21 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/interfaces/ecpg/preproc/keywords.c,v 1.40 2001/05/07 00:43:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -238,6 +238,7 @@ static ScanKeyword ScanKeywords[] = {
    {"some", SOME},
    {"start", START},
    {"statement", STATEMENT},
+   {"statistics", STATISTICS},
    {"stdin", STDIN},
    {"stdout", STDOUT},
    {"substring", SUBSTRING},


diff --git a/src/interfaces/ecpg/preproc/preproc.y b/src/interfaces/ecpg/preproc/preproc.y

index 345efb6576e2ddd8ff944993799a816b12bd34c8..91708bd91fae24f446576cacaea6ccbc1028163d 100644 (file)


--- a/src/interfaces/ecpg/preproc/preproc.y
+++ b/src/interfaces/ecpg/preproc/preproc.y
@@ -134,7 +134,7 @@ make_name(void)
 
 %union {
    double                  dval;
-        int                     ival;
+   int                     ival;
    char *                  str;
    struct when             action;
    struct index        index;
@@ -224,7 +224,7 @@ make_name(void)
        NONE, NOTHING, NOTIFY, NOTNULL, OFFSET, OIDS,
        OPERATOR, OWNER, PASSWORD, PROCEDURAL, REINDEX, RENAME, RESET,
        RETURNS, ROW, RULE, SEQUENCE, SERIAL, SETOF, SHARE,
-       SHOW, START, STATEMENT, STDIN, STDOUT, SYSID TEMP,
+       SHOW, START, STATEMENT, STATISTICS, STDIN, STDOUT, SYSID TEMP,
        TEMPLATE, TOAST, TRUNCATE, TRUSTED, UNLISTEN, UNTIL, VACUUM,
        VALID, VERBOSE, VERSION
 
@@ -285,7 +285,7 @@ make_name(void)
 %type      file_name AexprConst ParamNo c_expr ConstTypename
 %type     in_expr_nodes a_expr b_expr TruncateStmt CommentStmt
 %type     opt_indirection expr_list extract_list extract_arg
-%type     position_list substr_list substr_from alter_column_action
+%type     position_list substr_list substr_from alter_column_default
 %type     trim_list in_expr substr_for attr attrs drop_behavior
 %type     Typename SimpleTypename Generic Numeric generic opt_float opt_numeric
 %type     opt_decimal Character character opt_varying opt_charset
@@ -293,7 +293,7 @@ make_name(void)
 %type     row_expr row_descriptor row_list ConstDatetime opt_chain
 %type     SelectStmt into_clause OptTemp ConstraintAttributeSpec
 %type     opt_table opt_all sort_clause sortby_list ConstraintAttr 
-%type     sortby OptUseOp opt_inh_star relation_name_list name_list
+%type     sortby OptUseOp relation_name_list name_list
 %type     group_clause having_clause from_clause opt_distinct
 %type     join_outer where_clause relation_expr sub_type opt_arg
 %type     opt_column_list insert_rest InsertStmt OptimizableStmt
@@ -301,8 +301,8 @@ make_name(void)
 %type      NotifyStmt columnElem copy_dirn UnlistenStmt copy_null
 %type      copy_delimiter ListenStmt CopyStmt copy_file_name opt_binary
 %type      opt_with_copy FetchStmt direction fetch_how_many from_in
-%type      ClosePortalStmt DropStmt VacuumStmt opt_verbose func_arg
-%type      opt_analyze opt_va_list va_list ExplainStmt index_params
+%type      ClosePortalStmt DropStmt VacuumStmt AnalyzeStmt opt_verbose func_arg
+%type      analyze_keyword opt_name_list ExplainStmt index_params
 %type      index_list func_index index_elem opt_class access_method_clause
 %type      index_opt_unique IndexStmt func_return ConstInterval
 %type      func_args_list func_args opt_with ProcedureStmt def_arg
@@ -329,7 +329,7 @@ make_name(void)
 %type     opt_cursor opt_lmode ConstraintsSetStmt comment_tg AllConst
 %type     case_expr when_clause_list case_default case_arg when_clause
 %type      select_clause opt_select_limit select_limit_value ConstraintTimeSpec
-%type      select_offset_value ReindexStmt join_type opt_only opt_boolean
+%type      select_offset_value ReindexStmt join_type opt_boolean
 %type     join_qual update_list AlterSchemaStmt joined_table
 %type     opt_level opt_lock lock_type users_in_new_group_clause
 %type      OptConstrFromTable comment_op OptTempTableName StringConst
@@ -447,6 +447,7 @@ stmt:  AlterSchemaStmt          { output_statement($1, 0, NULL, connection); }
        | CreatedbStmt      { output_statement($1, 0, NULL, connection); }
        | DropdbStmt        { output_statement($1, 0, NULL, connection); }
        | VacuumStmt        { output_statement($1, 0, NULL, connection); }
+       | AnalyzeStmt       { output_statement($1, 0, NULL, connection); }
        | VariableSetStmt   { output_statement($1, 0, NULL, connection); }
        | VariableShowStmt  { output_statement($1, 0, NULL, connection); }
        | VariableResetStmt { output_statement($1, 0, NULL, connection); }
@@ -908,40 +909,41 @@ CheckPointStmt: CHECKPOINT     { $$= make_str("checkpoint"); }
 
 
 /*****************************************************************************
- *
- *     QUERY :
  *
  * ALTER TABLE variations
  *
  *****************************************************************************/
 
 AlterTableStmt:
-/* ALTER TABLE  ADD [COLUMN]  */
-        ALTER TABLE relation_name opt_inh_star ADD opt_column columnDef
+/* ALTER TABLE  ADD [COLUMN]  */
+        ALTER TABLE relation_expr ADD opt_column columnDef
+       {
+           $$ = cat_str(5, make_str("alter table"), $3, make_str("add"), $5, $6);
+       }
+/* ALTER TABLE  ALTER [COLUMN]  {SET DEFAULT |DROP DEFAULT} */
+   | ALTER TABLE relation_expr ALTER opt_column ColId alter_column_default
        {
-           $$ = cat_str(6, make_str("alter table"), $3, $4, make_str("add"), $6, $7);
+           $$ = cat_str(6, make_str("alter table"), $3, make_str("alter"), $5, $6, $7);
        }
-/* ALTER TABLE  ALTER [COLUMN]  {SET DEFAULT |DROP
-DEFAULT} */
-   | ALTER TABLE relation_name opt_inh_star ALTER opt_column ColId
-       alter_column_action
+/* ALTER TABLE  ALTER [COLUMN]  SET STATISTICS  */
+   | ALTER TABLE relation_expr ALTER opt_column ColId SET STATISTICS Iconst
        {
-           $$ = cat_str(7, make_str("alter table"), $3, $4, make_str("alter"), $6, $7, $8);
+           $$ = cat_str(7, make_str("alter table"), $3, make_str("alter"), $5, $6, make_str("set statistics"), $9);
        }
-/* ALTER TABLE <name> DROP [COLUMN] <name> {RESTRICT|CASCADE} */
-   | ALTER TABLE relation_name opt_inh_star DROP opt_column ColId drop_behavior
+/* ALTER TABLE <relation> DROP [COLUMN] name> {RESTRICT|CASCADE} */
+   | ALTER TABLE relation_expr DROP opt_column ColId drop_behavior
        {
-           $$ = cat_str(7, make_str("alter table"), $3, $4, make_str("drop"), $6, $7, $8);
+           $$ = cat_str(6, make_str("alter table"), $3, make_str("drop"), $5, $6, $7);
        }
-/* ALTER TABLE <name> ADD CONSTRAINT ... */
-   | ALTER TABLE relation_name opt_inh_star ADD TableConstraint
+/* ALTER TABLE <relation> ADD CONSTRAINT ... */
+   | ALTER TABLE relation_expr ADD TableConstraint
        {
-           $$ = cat_str(5, make_str("alter table"), $3, $4, make_str("add"), $6);
+           $$ = cat_str(4, make_str("alter table"), $3, make_str("add"), $5);
        }
-/* ALTER TABLE <name> DROP CONSTRAINT ... */
-   | ALTER TABLE relation_name opt_inh_star DROP CONSTRAINT name drop_behavior
+/* ALTER TABLE <relation> DROP CONSTRAINT ... */
+   | ALTER TABLE relation_expr DROP CONSTRAINT name drop_behavior
        {
-           $$ = cat_str(6, make_str("alter table"), $3, $4, make_str("drop constraint"), $7, $8);
+           $$ = cat_str(5, make_str("alter table"), $3, make_str("drop constraint"), $6, $7);
        }
 /* ALTER TABLE  OWNER TO UserId */     
    | ALTER TABLE relation_name OWNER TO UserId   
@@ -950,7 +952,7 @@ DEFAULT} */
        }
        ;
 
-alter_column_action:
+alter_column_default:
         SET DEFAULT a_expr { $$ = cat2_str(make_str("set default"), $3); }
         | DROP DEFAULT          { $$ = make_str("drop default"); }
         ;
@@ -1234,10 +1236,6 @@ key_reference:  NO ACTION    { $$ = make_str("no action"); }
        | SET NULL_P    { $$ = make_str("set null"); }
        ;
 
-opt_only: ONLY     { $$ = make_str("only"); }
-   | /*EMPTY*/ { $$ = EMPTY; }
-   ;
-
 OptInherit:  INHERITS '(' relation_name_list ')'                { $$ = cat_str(3, make_str("inherits ("), $3, make_str(")")); }
                 | /*EMPTY*/                    { $$ = EMPTY; }
                 ;      
@@ -2013,10 +2011,9 @@ opt_force:      FORCE        { $$ = make_str("force"); }
  *
  *****************************************************************************/
 
-RenameStmt:  ALTER TABLE relation_name opt_inh_star
-                 RENAME opt_column opt_name TO name
+RenameStmt:  ALTER TABLE relation_expr RENAME opt_column opt_name TO name
                {
-                   $$ = cat_str(8, make_str("alter table"), $3, $4, make_str("rename"), $6, $7, make_str("to"), $9);
+                   $$ = cat_str(7, make_str("alter table"), $3, make_str("rename"), $5, $6, make_str("to"), $8);
                }
        ;
 
@@ -2250,38 +2247,44 @@ ClusterStmt:  CLUSTER index_name ON relation_name
  *
  *     QUERY:
  *             vacuum
+ *             analyze
  *
  *****************************************************************************/
 
-VacuumStmt:  VACUUM opt_verbose opt_analyze
+VacuumStmt:  VACUUM opt_verbose
+               {
+                   $$ = cat_str(2, make_str("vacuum"), $2);
+               }
+       | VACUUM opt_verbose relation_name
                {
                    $$ = cat_str(3, make_str("vacuum"), $2, $3);
                }
-       | VACUUM opt_verbose opt_analyze relation_name opt_va_list
+       | VACUUM opt_verbose AnalyzeStmt
                {
-                   if ( strlen($5) > 0 && strlen($4) == 0 )
-                       mmerror(ET_ERROR, "VACUUM syntax error at or near \"(\"\n\tRelations name must be specified");
-                   $$ = cat_str(5, make_str("vacuum"), $2, $3, $4, $5);
+                   $$ = cat_str(3, make_str("vacuum"), $2, $3);
                }
        ;
 
-opt_verbose:  VERBOSE                  { $$ = make_str("verbose"); }
-       | /*EMPTY*/             { $$ = EMPTY; }
+AnalyzeStmt:  analyze_keyword opt_verbose
+               {
+                   $$ = cat_str(2, $1, $2);
+               }
+       | analyze_keyword opt_verbose relation_name opt_name_list
+               {
+                   $$ = cat_str(4, $1, $2, $3, $4);
+               }
        ;
 
-opt_analyze:  ANALYZE                  { $$ = make_str("analyze"); }
-       | ANALYSE               { $$ = make_str("analyse"); }
-       | /*EMPTY*/             { $$ = EMPTY; }
+analyze_keyword:  ANALYZE                  { $$ = make_str("analyze"); }
+       | ANALYSE                           { $$ = make_str("analyse"); }
        ;
 
-opt_va_list:  '(' va_list ')'              { $$ = cat_str(3, make_str("("), $2, make_str(")")); }
+opt_verbose:  VERBOSE                  { $$ = make_str("verbose"); }
        | /*EMPTY*/             { $$ = EMPTY; }
        ;
 
-va_list:  name
-               { $$=$1; }
-       | va_list ',' name
-               { $$=cat_str(3, $1, make_str(","), $3); }
+opt_name_list:  '(' name_list ')'      { $$ = cat_str(3, make_str("("), $2, make_str(")")); }
+       | /*EMPTY*/             { $$ = EMPTY; }
        ;
 
 
@@ -2383,9 +2386,9 @@ columnElem:  ColId opt_indirection
  *
  *****************************************************************************/
 
-DeleteStmt:  DELETE FROM opt_only name where_clause
+DeleteStmt:  DELETE FROM relation_expr where_clause
                {
-                   $$ = cat_str(4, make_str("delete from"), $3, $4, $5);
+                   $$ = cat_str(3, make_str("delete from"), $3, $4);
                }
        ;
 
@@ -2416,12 +2419,12 @@ opt_lmode:      SHARE                           { $$ = make_str("share"); }
  *
  *****************************************************************************/
 
-UpdateStmt:  UPDATE opt_only relation_name
+UpdateStmt:  UPDATE relation_expr
              SET update_target_list
              from_clause
              where_clause
                {
-                   $$ = cat_str(7, make_str("update"), $2, $3, make_str("set"), $5, $6, $7);
+                   $$ = cat_str(6, make_str("update"), $2, make_str("set"), $4, $5, $6);
                }
        ;
 
@@ -2667,10 +2670,6 @@ select_offset_value:     PosIntConst {
  * ...however, recursive addattr and rename supported.  make special
  * cases for these.
  */
-opt_inh_star:  '*'                 { $$ = make_str("*"); }
-       | /*EMPTY*/             { $$ = EMPTY; }
-       ;
-
 relation_name_list:  name_list { $$ = $1; };
 
 name_list:  name
@@ -2704,7 +2703,7 @@ opt_for_update_clause: for_update_clause                { $$ = $1; }
        | /* EMPTY */               { $$ = EMPTY; }
                 ;
 
-update_list:  OF va_list
+update_list:  OF name_list
               {
            $$ = cat2_str(make_str("of"), $2);
          }
@@ -5028,6 +5027,7 @@ TokenId:  ABSOLUTE            { $$ = make_str("absolute"); }
    | SHARE             { $$ = make_str("share"); }
    | START             { $$ = make_str("start"); }
    | STATEMENT         { $$ = make_str("statement"); }
+   | STATISTICS        { $$ = make_str("statistics"); }
    | STDIN                         { $$ = make_str("stdin"); }
    | STDOUT                        { $$ = make_str("stdout"); }
    | SYSID                         { $$ = make_str("sysid"); }


diff --git a/src/test/regress/expected/oidjoins.out b/src/test/regress/expected/oidjoins.out

index a2b0ad9e3e722827117e707ae7033a210771e9b4..46bc60f6955d60e4a52170d4b7281048b146d97d 100644 (file)


--- a/src/test/regress/expected/oidjoins.out
+++ b/src/test/regress/expected/oidjoins.out
@@ -353,12 +353,28 @@ WHERE pg_statistic.starelid != 0 AND
 -----+----------
 (0 rows)
 
-SELECT oid, pg_statistic.staop 
+SELECT oid, pg_statistic.staop1 
 FROM   pg_statistic 
-WHERE  pg_statistic.staop != 0 AND 
-   NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop);
- oid | staop 
------+-------
+WHERE  pg_statistic.staop1 != 0 AND 
+   NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop1);
+ oid | staop1 
+-----+--------
+(0 rows)
+
+SELECT oid, pg_statistic.staop2 
+FROM   pg_statistic 
+WHERE  pg_statistic.staop2 != 0 AND 
+   NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop2);
+ oid | staop2 
+-----+--------
+(0 rows)
+
+SELECT oid, pg_statistic.staop3 
+FROM   pg_statistic 
+WHERE  pg_statistic.staop3 != 0 AND 
+   NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop3);
+ oid | staop3 
+-----+--------
 (0 rows)
 
 SELECT oid, pg_trigger.tgrelid 


diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out

index 9d4ff1b39856b3d468938ef709578649fe4d84ce..1b094a6e3bfe2f58a8e7b108c0088131a36feb35 100644 (file)


--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -482,8 +482,8 @@ WHERE p1.aggtransfn = p2.oid AND
           (p2.pronargs = 1 AND p1.aggbasetype = 0)));
   oid  | aggname | oid |   proname   
 -------+---------+-----+-------------
- 16997 | max     | 768 | int4larger
- 17011 | min     | 769 | int4smaller
+ 17010 | max     | 768 | int4larger
+ 17024 | min     | 769 | int4smaller
 (2 rows)
 
 -- Cross-check finalfn (if present) against its entry in pg_proc.


diff --git a/src/test/regress/sql/oidjoins.sql b/src/test/regress/sql/oidjoins.sql

index b7ea1f63eaa8268d3583a670e9f3985619be0453..88727a6c76ec6922fc12f4456fba2dc650570f0a 100644 (file)


--- a/src/test/regress/sql/oidjoins.sql
+++ b/src/test/regress/sql/oidjoins.sql
@@ -177,10 +177,18 @@ SELECT    oid, pg_statistic.starelid
 FROM   pg_statistic 
 WHERE  pg_statistic.starelid != 0 AND 
    NOT EXISTS(SELECT * FROM pg_class AS t1 WHERE t1.oid = pg_statistic.starelid);
-SELECT oid, pg_statistic.staop 
+SELECT oid, pg_statistic.staop1 
 FROM   pg_statistic 
-WHERE  pg_statistic.staop != 0 AND 
-   NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop);
+WHERE  pg_statistic.staop1 != 0 AND 
+   NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop1);
+SELECT oid, pg_statistic.staop2 
+FROM   pg_statistic 
+WHERE  pg_statistic.staop2 != 0 AND 
+   NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop2);
+SELECT oid, pg_statistic.staop3 
+FROM   pg_statistic 
+WHERE  pg_statistic.staop3 != 0 AND 
+   NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop3);
 SELECT oid, pg_trigger.tgrelid 
 FROM   pg_trigger 
 WHERE  pg_trigger.tgrelid != 0 AND 
-   endterm="SQL-CREATETABLE-title">. The ALTER COLUMN form
-   allows you to set or remove the default for the column. Note that defaults
-   only apply to newly inserted rows.
+   endterm="SQL-CREATETABLE-title">.
+   The ALTER COLUMN SET/DROP DEFAULT forms
+   allow you to set or remove the default for the column. Note that defaults
+   only apply to subsequent INSERT commands; they do not
+   cause rows already in the table to change.
+   The ALTER COLUMN SET STATISTICS form allows you to
+   set the statistics-gathering target for subsequent
+    operations.
     The RENAME clause causes the name of a table or column
     to change without changing any of the data contained in
     the affected table. Thus, the table or column will
@@ -170,7 +177,7 @@ ALTER TABLE table
     The ADD table constraint definition clause 
     adds a new constraint to the table using the same syntax as 
    linkend="SQL-CREATETABLE" endterm="SQL-CREATETABLE-title">. 
-   The OWNER clause chnages the owner of the table to the user 
+   The OWNER clause changes the owner of the table to the user 
    new user.
   
 
@@ -190,10 +197,11 @@ ALTER TABLE table
    
 
    
-    In the current implementation, default and constraint clauses for the
+    In the current implementation of ADD COLUMN,
+    default and constraint clauses for the
     new column will be ignored. You can use the SET DEFAULT
     form of ALTER TABLE to set the default later.
-    (You will also have to update the already existing rows to the
+    (You may also want to update the already existing rows to the
     new default value, using 
     endterm="sql-update-title">.)
    
@@ -210,7 +218,7 @@ ALTER TABLE table
 
    
     You must own the table in order to change it.
-    Renaming any  part  of  the schema of a system
+    Changing any  part  of  the schema of a system
     catalog is not permitted.
     The PostgreSQL User's Guide has further
     information on inheritance.


diff --git a/doc/src/sgml/ref/analyze.sgml b/doc/src/sgml/ref/analyze.sgml

new file mode 100644 (file)

index 0000000..57d3213


--- /dev/null
+++ b/doc/src/sgml/ref/analyze.sgml
@@ -0,0 +1,219 @@
+
+
+
+ 
+  
+   ANALYZE
+  
+  SQL - Language Statements
+ 
+ 
+  
+   ANALYZE
+  
+  
+   Collect statistics about a Postgres database
+  
+ 
+ 
+  
+   2001-05-04
+  
+  
+ANALYZE [ VERBOSE ] [ table [ (column [, ...] ) ] ]
+  
+
+  
+   
+    2001-05-04
+   
+   </div>
<div class="diff add">+    Inputs</div>
<div class="diff add">+   
+
+   
+    
+     
+      VERBOSE
+      
+       
+   Enables display of progress messages.
+       
+      
+     
+     
+      table
+      
+       
+   The name of a specific table to analyze. Defaults to all tables.
+       
+      
+     
+     
+      column
+      
+       
+   The name of a specific column to analyze. Defaults to all columns.
+       
+      
+     
+    
+   
+  
+
+  
+   
+    2001-05-04
+   
+   </div>
<div class="diff add">+    Outputs</div>
<div class="diff add">+   
+   
+
+    
+     
+      
+ANALYZE
+       
+      
+       
+   The command is complete.
+       
+      
+     
+
+    
+   
+  
+ 
+
+ 
+  
+   2001-05-04
+  
+  </div>
<div class="diff add">+   Description</div>
<div class="diff add">+  
+  
+   ANALYZE collects statistics about the contents of
+   Postgres tables, and stores the results in
+   the system table pg_statistic.  Subsequently,
+   the query planner uses the statistics to help determine the most efficient
+   execution plans for queries.
+  
+
+  
+   With no parameter, ANALYZE examines every table in the
+   current database.  With a parameter, ANALYZE examines
+   only that table.  It is further possible to give a list of column names,
+   in which case only the statistics for those columns are updated.
+  
+
+  
+   
+    2001-05-04
+   
+   </div>
<div class="diff add">+    Notes</div>
<div class="diff add">+   
+
+  
+   It is a good idea to run ANALYZE periodically, or
+   just after making major changes in the contents of a table.  Accurate
+   statistics will help the planner to choose the most appropriate query
+   plan, and thereby improve the speed of query processing.  A common
+   strategy is to run VACUUM and ANALYZE
+   once a day during a low-usage time of day.
+  
+
+  
+   Unlike ,
+   ANALYZE requires
+   only a read lock on the target table, so it can run in parallel with
+   other activity on the table.
+  
+
+  
+   For large tables, ANALYZE takes a random sample of the
+   table contents, rather than examining every row.  This allows even very
+   large tables to be analyzed in a small amount of time.  Note however
+   that the statistics are only approximate, and will change slightly each
+   time ANALYZE is run, even if the actual table contents
+   did not change.  This may result in small changes in the planner's
+   estimated costs shown by EXPLAIN.
+  
+
+  
+   The collected statistics usually include a list of some of the most common
+   values in each column and a histogram showing the approximate data
+   distribution in each column.  One or both of these may be omitted if
+   ANALYZE deems them uninteresting (for example, in
+   a unique-key column, there are no common values) or if the column
+   datatype does not support the appropriate operators.
+  
+
+  
+   The extent of analysis can be controlled by adjusting the per-column
+   statistics target with ALTER TABLE ALTER COLUMN SET
+   STATISTICS (see
+   ).  The
+   target value sets the maximum number of entries in the most-common-value
+   list and the maximum number of bins in the histogram.  The default
+   target value is 10, but this can be adjusted up or down to trade off
+   accuracy of planner estimates against the time taken for
+   ANALYZE and the
+   amount of space occupied in pg_statistic.
+   In particular, setting the statistics target to zero disables collection of
+   statistics for that column.  It may be useful to do that for columns that
+   are never used as part of the WHERE, GROUP BY, or ORDER BY clauses of
+   queries, since the planner will have no use for statistics on such columns.
+  
+
+  
+   The largest statistics target among the columns being analyzed determines
+   the number of table rows sampled to prepare the statistics.  Increasing
+   the target causes a proportional increase in the time and space needed
+   to do ANALYZE.
+  
+
+  
+ 
+
+ 
+  </div>
<div class="diff add">+   Compatibility</div>
<div class="diff add">+  
+
+  
+   
+    2001-05-04
+   
+   </div>
<div class="diff add">+    SQL92</div>
<div class="diff add">+   
+   
+    There is no ANALYZE statement in SQL92.
+   
+  
+ 
+
+
+


diff --git a/doc/src/sgml/ref/vacuum.sgml b/doc/src/sgml/ref/vacuum.sgml

index 51cb8a9ffdacfe41115a94d41b11e97fa1e6b6f9..cbb182466ea44d231b4271f54f2c14da9534307b 100644 (file)


--- a/doc/src/sgml/ref/vacuum.sgml
+++ b/doc/src/sgml/ref/vacuum.sgml
@@ -1,5 +1,5 @@
 
 
@@ -15,15 +15,15 @@ Postgres documentation
    VACUUM
   
   
-   Clean and analyze a Postgres database
+   Clean and optionally analyze a Postgres database
   
  
  
   
-   1999-07-20
+   2001-05-04
   
   
-VACUUM [ VERBOSE ] [ ANALYZE ] [ table ]
+VACUUM [ VERBOSE ] [ table ]
 VACUUM [ VERBOSE ] ANALYZE [ table [ (column [, ...] ) ] ]
   
 
@@ -49,7 +49,7 @@ VACUUM [ VERBOSE ] ANALYZE [ table
       ANALYZE
       
        
-   Updates column statistics used by the optimizer to
+   Updates statistics used by the optimizer to
    determine the most efficient way to execute a query.
        
       
@@ -90,7 +90,7 @@ VACUUM [ VERBOSE ] ANALYZE [ table
        
       
        
-   The command has been accepted and the database is being cleaned.
+   The command is complete.
        
       
      
@@ -144,28 +144,26 @@ NOTICE:  Index index: Pages 28;
    Description
   
   
-   VACUUM serves two purposes in 
-   Postgres as both a means to reclaim storage and
-   also a means to collect information for the optimizer.
+   VACUUM reclaims storage occupied by deleted tuples.
+   In normal Postgres operation, tuples that
+   are DELETEd or obsoleted by UPDATE are not physically removed from
+   their table; they remain present until a VACUUM is
+   done.  Therefore it's necessary to do VACUUM
+   periodically, especially on frequently-updated tables.
   
 
   
-   VACUUM opens every table in the database,
-   cleans out records from rolled back transactions, and updates statistics in the
-   system catalogs.  The statistics maintained include the number of
-   tuples and number of pages stored in all tables.
-  
-
-
-  
-   VACUUM ANALYZE collects statistics representing the
-   dispersion of the data in each column.
-   This information is valuable when several query execution paths are possible.
+   With no parameter, VACUUM processes every table in the
+   current database.  With a parameter, VACUUM processes
+   only that table.
   
 
   
-   Running VACUUM
-   periodically will increase the speed of the database in processing user queries.
+   VACUUM ANALYZE performs a VACUUM
+   and then an ANALYZE for each selected table.  This
+   is a handy combination form for routine maintenance scripts.  See
+   
+   for more details about its processing.
   
 
   
@@ -175,16 +173,15 @@ NOTICE:  Index index: Pages 28;
    </div>
<div class="diff ctx">     Notes</div>
<div class="diff ctx">    
-   
-    The open database is the target for VACUUM.
-   
+
    
     We recommend that active production databases be
     VACUUM-ed nightly, in order to remove
     expired rows. After copying a large table into
     Postgres or after deleting a large number
     of records, it may be a good idea to issue a VACUUM
-    ANALYZE query. This will update the system catalogs with
+    ANALYZE command for the affected table. This will update the
+    system catalogs with
     the results of all recent changes, and allow the
     Postgres query optimizer to make better
     choices in planning user queries.


diff --git a/doc/src/sgml/reference.sgml b/doc/src/sgml/reference.sgml

index b92ee0868d029cf48443f4240fab5224bc958862..9a977a6515c97db601f13f5f43413bc3e81a46c8 100644 (file)


--- a/doc/src/sgml/reference.sgml
+++ b/doc/src/sgml/reference.sgml
@@ -1,5 +1,5 @@
 
@@ -26,6 +26,7 @@ PostgreSQL Reference Manual
    &alterGroup;
    &alterTable;
    &alterUser;
+   &analyze;
    &begin;
    &checkpoint;
    &close;


diff --git a/doc/src/sgml/xoper.sgml b/doc/src/sgml/xoper.sgml

index d38e78a4e1af22651531a65d320f427ea71b175b..57d8bb79c28d69da43ce1897f0dacb4f3dd1a56b 100644 (file)


--- a/doc/src/sgml/xoper.sgml
+++ b/doc/src/sgml/xoper.sgml
@@ -1,5 +1,5 @@
 
 
  
@@ -244,7 +244,7 @@ SELECT (a + b) AS c FROM test_complex;
     only a small fraction.  '<' will accept a fraction that depends on
     where the given constant falls in the range of values for that table
     column (which, it just so happens, is information collected by
-    VACUUM ANALYZE and made available to the selectivity estimator).
+    ANALYZE and made available to the selectivity estimator).
     '<=' will accept a slightly larger fraction than '<' for the same
     comparison constant, but they're close enough to not be worth
     distinguishing, especially since we're not likely to do better than a


diff --git a/src/backend/access/common/tupdesc.c b/src/backend/access/common/tupdesc.c

index 769f754b6690919606bdaaf8a016260382abdef8..86d704e8d08779e32b38e3d4d4f938072adeccf7 100644 (file)


--- a/src/backend/access/common/tupdesc.c
+++ b/src/backend/access/common/tupdesc.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/access/common/tupdesc.c,v 1.73 2001/03/22 06:16:06 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/access/common/tupdesc.c,v 1.74 2001/05/07 00:43:15 tgl Exp $
  *
  * NOTES
  *   some of the executor utility code such as "ExecTypeFromTL" should be
@@ -237,16 +237,16 @@ equalTupleDescs(TupleDesc tupdesc1, TupleDesc tupdesc2)
        Form_pg_attribute attr2 = tupdesc2->attrs[i];
 
        /*
-        * We do not need to check every single field here, and in fact
-        * some fields such as attdispersion probably shouldn't be
-        * compared.  We can also disregard attnum (it was used to place
-        * the row in the attrs array) and everything derived from the
-        * column datatype.
+        * We do not need to check every single field here: we can disregard
+        * attrelid, attnum (it was used to place the row in the attrs array)
+        * and everything derived from the column datatype.
         */
        if (strcmp(NameStr(attr1->attname), NameStr(attr2->attname)) != 0)
            return false;
        if (attr1->atttypid != attr2->atttypid)
            return false;
+       if (attr1->attstattarget != attr2->attstattarget)
+           return false;
        if (attr1->atttypmod != attr2->atttypmod)
            return false;
        if (attr1->attstorage != attr2->attstorage)
@@ -365,12 +365,12 @@ TupleDescInitEntry(TupleDesc desc,
    else
        MemSet(NameStr(att->attname), 0, NAMEDATALEN);
 
-   att->attdispersion = 0;     /* dummy value */
+   att->attstattarget = 0;
    att->attcacheoff = -1;
    att->atttypmod = typmod;
 
    att->attnum = attributeNumber;
-   att->attnelems = attdim;
+   att->attndims = attdim;
    att->attisset = attisset;
 
    att->attnotnull = false;
@@ -506,7 +506,7 @@ TupleDescMakeSelfReference(TupleDesc desc,
    att->attbyval = true;
    att->attalign = 'i';
    att->attstorage = 'p';
-   att->attnelems = 0;
+   att->attndims = 0;
 }
 
 /* ----------------------------------------------------------------


diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c

index 1c5577b88a066a1ecebfd6ce317147efc28d489c..06010896821e5caa9627c17f6328239ec3c277b6 100644 (file)


--- a/src/backend/access/gist/gist.c
+++ b/src/backend/access/gist/gist.c
@@ -6,7 +6,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/access/gist/gist.c,v 1.72 2001/03/22 03:59:12 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/access/gist/gist.c,v 1.73 2001/05/07 00:43:15 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -84,8 +84,8 @@ static void gist_dumptree(Relation r, int level, BlockNumber blk, OffsetNumber c
 #endif
 
 /*
-** routine to build an index.  Basically calls insert over and over
-*/
+ * routine to build an index.  Basically calls insert over and over
+ */
 Datum
 gistbuild(PG_FUNCTION_ARGS)
 {
@@ -105,7 +105,7 @@ gistbuild(PG_FUNCTION_ARGS)
                itupdesc;
    Datum       attdata[INDEX_MAX_KEYS];
    char        nulls[INDEX_MAX_KEYS];
-   int         nhtups,
+   double      nhtups,
                nitups;
    Node       *pred = indexInfo->ii_Predicate;
 
@@ -172,7 +172,7 @@ gistbuild(PG_FUNCTION_ARGS)
 #endif  /* OMIT_PARTIAL_INDEX */
 
    /* build the index */
-   nhtups = nitups = 0;
+   nhtups = nitups = 0.0;
 
    compvec = (bool *) palloc(sizeof(bool) * indexInfo->ii_NumIndexAttrs);
 
@@ -183,7 +183,7 @@ gistbuild(PG_FUNCTION_ARGS)
    {
        MemoryContextReset(econtext->ecxt_per_tuple_memory);
 
-       nhtups++;
+       nhtups += 1.0;
 
 #ifndef OMIT_PARTIAL_INDEX
 
@@ -196,7 +196,7 @@ gistbuild(PG_FUNCTION_ARGS)
            slot->val = htup;
            if (ExecQual((List *) oldPred, econtext, false))
            {
-               nitups++;
+               nitups += 1.0;
                continue;
            }
        }
@@ -213,7 +213,7 @@ gistbuild(PG_FUNCTION_ARGS)
        }
 #endif  /* OMIT_PARTIAL_INDEX */
 
-       nitups++;
+       nitups += 1.0;
 
        /*
         * For the current heap tuple, extract all the attributes we use


diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c

index aa76ba232a05c21da94012fbefbc287924aa154f..9617fcc33a6a0bb5bf4556944cc433be26ad0331 100644 (file)


--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.50 2001/03/22 03:59:12 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.51 2001/05/07 00:43:15 tgl Exp $
  *
  * NOTES
  *   This file contains only the public interface routines.
@@ -57,7 +57,7 @@ hashbuild(PG_FUNCTION_ARGS)
                itupdesc;
    Datum       attdata[INDEX_MAX_KEYS];
    char        nulls[INDEX_MAX_KEYS];
-   int         nhtups,
+   double      nhtups,
                nitups;
    HashItem    hitem;
    Node       *pred = indexInfo->ii_Predicate;
@@ -109,7 +109,7 @@ hashbuild(PG_FUNCTION_ARGS)
 #endif  /* OMIT_PARTIAL_INDEX */
 
    /* build the index */
-   nhtups = nitups = 0;
+   nhtups = nitups = 0.0;
 
    /* start a heap scan */
    hscan = heap_beginscan(heap, 0, SnapshotNow, 0, (ScanKey) NULL);
@@ -118,7 +118,7 @@ hashbuild(PG_FUNCTION_ARGS)
    {
        MemoryContextReset(econtext->ecxt_per_tuple_memory);
 
-       nhtups++;
+       nhtups += 1.0;
 
 #ifndef OMIT_PARTIAL_INDEX
 
@@ -131,7 +131,7 @@ hashbuild(PG_FUNCTION_ARGS)
            slot->val = htup;
            if (ExecQual((List *) oldPred, econtext, false))
            {
-               nitups++;
+               nitups += 1.0;
                continue;
            }
        }
@@ -148,7 +148,7 @@ hashbuild(PG_FUNCTION_ARGS)
        }
 #endif  /* OMIT_PARTIAL_INDEX */
 
-       nitups++;
+       nitups += 1.0;
 
        /*
         * For the current heap tuple, extract all the attributes we use


diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c

index fb509ab66de99d90fcdab322dd36af40551316d1..2a9df577b10c56de723c68ae329e47847849fb71 100644 (file)


--- a/src/backend/access/heap/tuptoaster.c
+++ b/src/backend/access/heap/tuptoaster.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/access/heap/tuptoaster.c,v 1.21 2001/03/25 00:45:20 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/access/heap/tuptoaster.c,v 1.22 2001/05/07 00:43:15 tgl Exp $
  *
  *
  * INTERFACE ROUTINES
@@ -166,6 +166,43 @@ heap_tuple_untoast_attr(varattrib *attr)
 }
 
 
+/* ----------
+ * toast_raw_datum_size -
+ *
+ * Return the raw (detoasted) size of a varlena datum
+ * ----------
+ */
+Size
+toast_raw_datum_size(Datum value)
+{
+   varattrib  *attr = (varattrib *) DatumGetPointer(value);
+   Size        result;
+
+   if (VARATT_IS_COMPRESSED(attr))
+   {
+       /*
+        * va_rawsize shows the original data size, whether the datum
+        * is external or not.
+        */
+       result = attr->va_content.va_compressed.va_rawsize + VARHDRSZ;
+   }
+   else if (VARATT_IS_EXTERNAL(attr))
+   {
+       /*
+        * an uncompressed external attribute has rawsize including the
+        * header (not too consistent!)
+        */
+       result = attr->va_content.va_external.va_rawsize;
+   }
+   else
+   {
+       /* plain untoasted datum */
+       result = VARSIZE(attr);
+   }
+   return result;
+}
+
+
 /* ----------
  * toast_delete -
  *


diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c

index 97d99da4fde7bbbfe009c7c7baf04dc557390cd9..f456e0c9306f4f3c191d75172463bf852e905041 100644 (file)


--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -12,7 +12,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.79 2001/03/22 03:59:15 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.80 2001/05/07 00:43:16 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -69,7 +69,7 @@ btbuild(PG_FUNCTION_ARGS)
                itupdesc;
    Datum       attdata[INDEX_MAX_KEYS];
    char        nulls[INDEX_MAX_KEYS];
-   int         nhtups,
+   double      nhtups,
                nitups;
    Node       *pred = indexInfo->ii_Predicate;
 
@@ -156,7 +156,7 @@ btbuild(PG_FUNCTION_ARGS)
 #endif  /* OMIT_PARTIAL_INDEX */
 
    /* build the index */
-   nhtups = nitups = 0;
+   nhtups = nitups = 0.0;
 
    if (usefast)
    {
@@ -196,7 +196,7 @@ btbuild(PG_FUNCTION_ARGS)
 
        MemoryContextReset(econtext->ecxt_per_tuple_memory);
 
-       nhtups++;
+       nhtups += 1.0;
 
 #ifndef OMIT_PARTIAL_INDEX
 
@@ -209,7 +209,7 @@ btbuild(PG_FUNCTION_ARGS)
            slot->val = htup;
            if (ExecQual((List *) oldPred, econtext, false))
            {
-               nitups++;
+               nitups += 1.0;
                continue;
            }
        }
@@ -226,7 +226,7 @@ btbuild(PG_FUNCTION_ARGS)
        }
 #endif  /* OMIT_PARTIAL_INDEX */
 
-       nitups++;
+       nitups += 1.0;
 
        /*
         * For the current heap tuple, extract all the attributes we use


diff --git a/src/backend/access/rtree/rtree.c b/src/backend/access/rtree/rtree.c

index 3752a59e99a3259dcef8feb7660927baf8308a4a..a8c6a13ea3c14626245bad59e372b66b0d5c25e2 100644 (file)


--- a/src/backend/access/rtree/rtree.c
+++ b/src/backend/access/rtree/rtree.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.61 2001/03/22 03:59:16 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.62 2001/05/07 00:43:16 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -100,7 +100,7 @@ rtbuild(PG_FUNCTION_ARGS)
                itupdesc;
    Datum       attdata[INDEX_MAX_KEYS];
    char        nulls[INDEX_MAX_KEYS];
-   int         nhtups,
+   double      nhtups,
                nitups;
    Node       *pred = indexInfo->ii_Predicate;
 
@@ -163,7 +163,7 @@ rtbuild(PG_FUNCTION_ARGS)
 #endif  /* OMIT_PARTIAL_INDEX */
 
    /* count the tuples as we insert them */
-   nhtups = nitups = 0;
+   nhtups = nitups = 0.0;
 
    /* start a heap scan */
    hscan = heap_beginscan(heap, 0, SnapshotNow, 0, (ScanKey) NULL);
@@ -172,7 +172,7 @@ rtbuild(PG_FUNCTION_ARGS)
    {
        MemoryContextReset(econtext->ecxt_per_tuple_memory);
 
-       nhtups++;
+       nhtups += 1.0;
 
 #ifndef OMIT_PARTIAL_INDEX
 
@@ -185,7 +185,7 @@ rtbuild(PG_FUNCTION_ARGS)
            slot->val = htup;
            if (ExecQual((List *) oldPred, econtext, false))
            {
-               nitups++;
+               nitups += 1.0;
                continue;
            }
        }
@@ -202,7 +202,7 @@ rtbuild(PG_FUNCTION_ARGS)
        }
 #endif  /* OMIT_PARTIAL_INDEX */
 
-       nitups++;
+       nitups += 1.0;
 
        /*
         * For the current heap tuple, extract all the attributes we use


diff --git a/src/backend/catalog/genbki.sh b/src/backend/catalog/genbki.sh

index c2993fa8fc6b474bc13badd0c4369ca56fdbb9d4..cac53f3e0853262c213239e698170311a6ee8e1c 100644 (file)


--- a/src/backend/catalog/genbki.sh
+++ b/src/backend/catalog/genbki.sh
@@ -10,7 +10,7 @@
 #
 #
 # IDENTIFICATION
-#    $Header: /cvsroot/pgsql/src/backend/catalog/Attic/genbki.sh,v 1.19 2001/01/16 22:48:34 tgl Exp $
+#    $Header: /cvsroot/pgsql/src/backend/catalog/Attic/genbki.sh,v 1.20 2001/05/07 00:43:16 tgl Exp $
 #
 # NOTES
 #    non-essential whitespace is removed from the generated file.
@@ -126,10 +126,12 @@ for dir in $INCLUDE_DIRS; do
     fi
 done
 
-# Get INDEX_MAX_KEYS from config.h (who needs consistency?)
+# Get INDEX_MAX_KEYS and DEFAULT_ATTSTATTARGET from config.h
+# (who needs consistency?)
 for dir in $INCLUDE_DIRS; do
     if [ -f "$dir/config.h" ]; then
         INDEXMAXKEYS=`grep '#define[   ]*INDEX_MAX_KEYS' $dir/config.h | $AWK '{ print $3 }'`
+        DEFAULTATTSTATTARGET=`grep '#define[   ]*DEFAULT_ATTSTATTARGET' $dir/config.h | $AWK '{ print $3 }'`
         break
     fi
 done
@@ -168,6 +170,7 @@ sed -e "s/;[    ]*$//g" \
     -e "s/(NameData/(name/g" \
     -e "s/(Oid/(oid/g" \
     -e "s/NAMEDATALEN/$NAMEDATALEN/g" \
+    -e "s/DEFAULT_ATTSTATTARGET/$DEFAULTATTSTATTARGET/g" \
     -e "s/INDEX_MAX_KEYS\*2/$INDEXMAXKEYS2/g" \
     -e "s/INDEX_MAX_KEYS\*4/$INDEXMAXKEYS4/g" \
     -e "s/INDEX_MAX_KEYS/$INDEXMAXKEYS/g" \


diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c

index 54867d51a4b631241e649453750b03ee0c1aeef4..03f16e11c3f3710b2589d8e7330bfd0a2bb386b8 100644 (file)


--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/catalog/heap.c,v 1.162 2001/03/22 06:16:10 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/catalog/heap.c,v 1.163 2001/05/07 00:43:17 tgl Exp $
  *
  *
  * INTERFACE ROUTINES
@@ -96,54 +96,72 @@ static void RemoveStatistics(Relation rel);
 
 /*
  * Note:
- *     Should the executor special case these attributes in the future?
- *     Advantage:  consume 1/2 the space in the ATTRIBUTE relation.
- *     Disadvantage:  having rules to compute values in these tuples may
- *             be more difficult if not impossible.
+ *     Should the system special case these attributes in the future?
+ *     Advantage:  consume much less space in the ATTRIBUTE relation.
+ *     Disadvantage:  special cases will be all over the place.
  */
 
 static FormData_pg_attribute a1 = {
-   0xffffffff, {"ctid"}, TIDOID, 0, sizeof(ItemPointerData),
-   SelfItemPointerAttributeNumber, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0'
+   0, {"ctid"}, TIDOID, 0, sizeof(ItemPointerData),
+   SelfItemPointerAttributeNumber, 0, -1, -1,
+   false, 'p', false, 'i', false, false
 };
 
 static FormData_pg_attribute a2 = {
-   0xffffffff, {"oid"}, OIDOID, 0, sizeof(Oid),
-   ObjectIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+   0, {"oid"}, OIDOID, 0, sizeof(Oid),
+   ObjectIdAttributeNumber, 0, -1, -1,
+   true, 'p', false, 'i', false, false
 };
 
 static FormData_pg_attribute a3 = {
-   0xffffffff, {"xmin"}, XIDOID, 0, sizeof(TransactionId),
-   MinTransactionIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+   0, {"xmin"}, XIDOID, 0, sizeof(TransactionId),
+   MinTransactionIdAttributeNumber, 0, -1, -1,
+   true, 'p', false, 'i', false, false
 };
 
 static FormData_pg_attribute a4 = {
-   0xffffffff, {"cmin"}, CIDOID, 0, sizeof(CommandId),
-   MinCommandIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+   0, {"cmin"}, CIDOID, 0, sizeof(CommandId),
+   MinCommandIdAttributeNumber, 0, -1, -1,
+   true, 'p', false, 'i', false, false
 };
 
 static FormData_pg_attribute a5 = {
-   0xffffffff, {"xmax"}, XIDOID, 0, sizeof(TransactionId),
-   MaxTransactionIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+   0, {"xmax"}, XIDOID, 0, sizeof(TransactionId),
+   MaxTransactionIdAttributeNumber, 0, -1, -1,
+   true, 'p', false, 'i', false, false
 };
 
 static FormData_pg_attribute a6 = {
-   0xffffffff, {"cmax"}, CIDOID, 0, sizeof(CommandId),
-   MaxCommandIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+   0, {"cmax"}, CIDOID, 0, sizeof(CommandId),
+   MaxCommandIdAttributeNumber, 0, -1, -1,
+   true, 'p', false, 'i', false, false
 };
 
 /*
-   We decide to call this attribute "tableoid" rather than say
-"classoid" on the basis that in the future there may be more than one
-table of a particular class/type. In any case table is still the word
-used in SQL.
-*/
+ * We decided to call this attribute "tableoid" rather than say
+ * "classoid" on the basis that in the future there may be more than one
+ * table of a particular class/type. In any case table is still the word
+ * used in SQL.
+ */
 static FormData_pg_attribute a7 = {
-   0xffffffff, {"tableoid"}, OIDOID, 0, sizeof(Oid),
-   TableOidAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+   0, {"tableoid"}, OIDOID, 0, sizeof(Oid),
+   TableOidAttributeNumber, 0, -1, -1,
+   true, 'p', false, 'i', false, false
 };
 
-static Form_pg_attribute HeapAtt[] = {&a1, &a2, &a3, &a4, &a5, &a6, &a7};
+static Form_pg_attribute SysAtt[] = {&a1, &a2, &a3, &a4, &a5, &a6, &a7};
+
+/*
+ * This function returns a Form_pg_attribute pointer for a system attribute.
+ */
+Form_pg_attribute
+SystemAttributeDefinition(AttrNumber attno)
+{
+   if (attno >= 0 || attno < - (int) lengthof(SysAtt))
+       elog(ERROR, "SystemAttributeDefinition: invalid attribute number %d",
+            attno);
+   return SysAtt[-attno - 1];
+}
 
 /* ----------------------------------------------------------------
  *             XXX END OF UGLY HARD CODED BADNESS XXX
@@ -380,32 +398,6 @@ heap_storage_create(Relation rel)
  *     8) the relations are closed and the new relation's oid
  *        is returned.
  *
- * old comments:
- *     A new relation is inserted into the RELATION relation
- *     with the specified attribute(s) (newly inserted into
- *     the ATTRIBUTE relation).  How does concurrency control
- *     work?  Is it automatic now?  Expects the caller to have
- *     attname, atttypid, atttyparg, attproc, and attlen domains filled.
- *     Create fills the attnum domains sequentually from zero,
- *     fills the attdispersion domains with zeros, and fills the
- *     attrelid fields with the relid.
- *
- *     scan relation catalog for name conflict
- *     scan type catalog for typids (if not arg)
- *     create and insert attribute(s) into attribute catalog
- *     create new relation
- *     insert new relation into attribute catalog
- *
- *     Should coordinate with heap_create_with_catalog(). Either
- *     it should not be called or there should be a way to prevent
- *     the relation from being removed at the end of the
- *     transaction if it is successful ('u'/'r' may be enough).
- *     Also, if the transaction does not commit, then the
- *     relation should be removed.
- *
- *     XXX amcreate ignores "off" when inserting (for now).
- *     XXX amcreate (like the other utilities) needs to understand indexes.
- *
  * ----------------------------------------------------------------
  */
 
@@ -432,14 +424,14 @@ CheckAttributeNames(TupleDesc tupdesc)
     */
    for (i = 0; i < natts; i++)
    {
-       for (j = 0; j < (int) (sizeof(HeapAtt) / sizeof(HeapAtt[0])); j++)
+       for (j = 0; j < (int) lengthof(SysAtt); j++)
        {
-           if (strcmp(NameStr(HeapAtt[j]->attname),
+           if (strcmp(NameStr(SysAtt[j]->attname),
                       NameStr(tupdesc->attrs[i]->attname)) == 0)
            {
                elog(ERROR, "Attribute '%s' has a name conflict"
                     "\n\tName matches an existing system attribute",
-                    NameStr(HeapAtt[j]->attname));
+                    NameStr(SysAtt[j]->attname));
            }
        }
        if (tupdesc->attrs[i]->atttypid == UNKNOWNOID)
@@ -574,7 +566,7 @@ AddNewAttributeTuples(Oid new_rel_oid,
        /* Fill in the correct relation OID */
        (*dpp)->attrelid = new_rel_oid;
        /* Make sure these are OK, too */
-       (*dpp)->attdispersion = 0;
+       (*dpp)->attstattarget = DEFAULT_ATTSTATTARGET;
        (*dpp)->attcacheoff = -1;
 
        tup = heap_addheader(Natts_pg_attribute,
@@ -593,14 +585,14 @@ AddNewAttributeTuples(Oid new_rel_oid,
    /*
     * next we add the system attributes..
     */
-   dpp = HeapAtt;
+   dpp = SysAtt;
    for (i = 0; i < -1 - FirstLowInvalidHeapAttributeNumber; i++)
    {
        /* Fill in the correct relation OID */
        /* HACK: we are writing on static data here */
        (*dpp)->attrelid = new_rel_oid;
        /* Unneeded since they should be OK in the constant data anyway */
-       /* (*dpp)->attdispersion = 0; */
+       /* (*dpp)->attstattarget = 0; */
        /* (*dpp)->attcacheoff = -1; */
 
        tup = heap_addheader(Natts_pg_attribute,
@@ -669,8 +661,23 @@ AddNewRelationTuple(Relation pg_class_desc,
     * save. (NOTE: CREATE INDEX inserts the same bogus estimates if it
     * finds the relation has 0 rows and pages. See index.c.)
     */
-   new_rel_reltup->relpages = 10;      /* bogus estimates */
-   new_rel_reltup->reltuples = 1000;
+   switch (relkind)
+   {
+       case RELKIND_RELATION:
+       case RELKIND_INDEX:
+       case RELKIND_TOASTVALUE:
+           new_rel_reltup->relpages = 10;  /* bogus estimates */
+           new_rel_reltup->reltuples = 1000;
+           break;
+       case RELKIND_SEQUENCE:
+           new_rel_reltup->relpages = 1;
+           new_rel_reltup->reltuples = 1;
+           break;
+       default:                /* views, etc */
+           new_rel_reltup->relpages = 0;
+           new_rel_reltup->reltuples = 0;
+           break;
+   }
 
    new_rel_reltup->relowner = GetUserId();
    new_rel_reltup->reltype = new_type_oid;


diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c

index 2adb30e1ed8ecf91d12c0028495b8911ece7068d..5eefab114891fdc1b2bbcc7b407d6c96ac3c75ca 100644 (file)


--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/catalog/index.c,v 1.145 2001/04/02 14:34:25 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/catalog/index.c,v 1.146 2001/05/07 00:43:17 tgl Exp $
  *
  *
  * INTERFACE ROUTINES
@@ -55,7 +55,7 @@
  */
 #define AVG_ATTR_SIZE 8
 #define NTUPLES_PER_PAGE(natts) \
-   ((BLCKSZ - MAXALIGN(sizeof (PageHeaderData))) / \
+   ((BLCKSZ - MAXALIGN(sizeof(PageHeaderData))) / \
    ((natts) * AVG_ATTR_SIZE + MAXALIGN(sizeof(HeapTupleHeaderData))))
 
 /* non-export function prototypes */
@@ -98,39 +98,6 @@ IsReindexProcessing(void)
    return reindexing;
 }
 
-/* ----------------------------------------------------------------
- *   sysatts is a structure containing attribute tuple forms
- *   for system attributes (numbered -1, -2, ...).  This really
- *   should be generated or eliminated or moved elsewhere. -cim 1/19/91
- *
- * typedef struct FormData_pg_attribute {
- *     Oid             attrelid;
- *     NameData        attname;
- *     Oid             atttypid;
- *     uint32          attnvals;
- *     int16           attlen;
- *     AttrNumber      attnum;
- *     uint32          attnelems;
- *     int32           attcacheoff;
- *     int32           atttypmod;
- *     bool            attbyval;
- *     bool            attisset;
- *     char            attalign;
- *     bool            attnotnull;
- *     bool            atthasdef;
- * } FormData_pg_attribute;
- *
- * ----------------------------------------------------------------
- */
-static FormData_pg_attribute sysatts[] = {
-   {0, {"ctid"}, TIDOID, 0, 6, -1, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0'},
-   {0, {"oid"}, OIDOID, 0, 4, -2, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'},
-   {0, {"xmin"}, XIDOID, 0, 4, -3, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'},
-   {0, {"cmin"}, CIDOID, 0, 4, -4, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'},
-   {0, {"xmax"}, XIDOID, 0, 4, -5, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'},
-   {0, {"cmax"}, CIDOID, 0, 4, -6, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'},
-};
-
 /* ----------------------------------------------------------------
  *     GetHeapRelationOid
  * ----------------------------------------------------------------
@@ -250,7 +217,6 @@ ConstructTupleDescriptor(Relation heapRelation,
    for (i = 0; i < numatts; i++)
    {
        AttrNumber  atnum;      /* attributeNumber[attributeOffset] */
-       AttrNumber  atind;
        Form_pg_attribute from;
        Form_pg_attribute to;
 
@@ -264,16 +230,9 @@ ConstructTupleDescriptor(Relation heapRelation,
        {
 
            /*
-            * here we are indexing on a system attribute (-1...-n) so we
-            * convert atnum into a usable index 0...n-1 so we can use it
-            * to dereference the array sysatts[] which stores tuple
-            * descriptor information for system attributes.
+            * here we are indexing on a system attribute (-1...-n)
             */
-           if (atnum <= FirstLowInvalidHeapAttributeNumber || atnum >= 0)
-               elog(ERROR, "Cannot create index on system attribute: attribute number out of range (%d)", atnum);
-           atind = (-atnum) - 1;
-
-           from = &sysatts[atind];
+           from = SystemAttributeDefinition(atnum);
        }
        else
        {
@@ -284,9 +243,8 @@ ConstructTupleDescriptor(Relation heapRelation,
            if (atnum > natts)
                elog(ERROR, "Cannot create index: attribute %d does not exist",
                     atnum);
-           atind = AttrNumberGetAttrOffset(atnum);
 
-           from = heapTupDesc->attrs[atind];
+           from = heapTupDesc->attrs[AttrNumberGetAttrOffset(atnum)];
        }
 
        /*
@@ -303,10 +261,10 @@ ConstructTupleDescriptor(Relation heapRelation,
         */
        to->attnum = i + 1;
 
-       to->attdispersion = 0.0;
+       to->attstattarget = 0;
+       to->attcacheoff = -1;
        to->attnotnull = false;
        to->atthasdef = false;
-       to->attcacheoff = -1;
 
        /*
         * We do not yet have the correct relation OID for the index, so
@@ -1542,10 +1500,14 @@ setNewRelfilenode(Relation relation)
 
 /* ----------------
  *     UpdateStats
+ *
+ * Update pg_class' relpages and reltuples statistics for the given relation
+ * (which can be either a table or an index).  Note that this is not used
+ * in the context of VACUUM.
  * ----------------
  */
 void
-UpdateStats(Oid relid, long reltuples)
+UpdateStats(Oid relid, double reltuples)
 {
    Relation    whichRel;
    Relation    pg_class;
@@ -1636,6 +1598,10 @@ UpdateStats(Oid relid, long reltuples)
     * with zero size statistics until a VACUUM is done.  The optimizer
     * will generate very bad plans if the stats claim the table is empty
     * when it is actually sizable.  See also CREATE TABLE in heap.c.
+    *
+    * Note: this path is also taken during bootstrap, because bootstrap.c
+    * passes reltuples = 0 after loading a table.  We have to estimate some
+    * number for reltuples based on the actual number of pages.
     */
    relpages = RelationGetNumberOfBlocks(whichRel);
 
@@ -1689,15 +1655,15 @@ UpdateStats(Oid relid, long reltuples)
 
        for (i = 0; i < Natts_pg_class; i++)
        {
-           nulls[i] = heap_attisnull(tuple, i + 1) ? 'n' : ' ';
+           nulls[i] = ' ';
            replace[i] = ' ';
            values[i] = (Datum) NULL;
        }
 
        replace[Anum_pg_class_relpages - 1] = 'r';
-       values[Anum_pg_class_relpages - 1] = (Datum) relpages;
+       values[Anum_pg_class_relpages - 1] = Int32GetDatum(relpages);
        replace[Anum_pg_class_reltuples - 1] = 'r';
-       values[Anum_pg_class_reltuples - 1] = (Datum) reltuples;
+       values[Anum_pg_class_reltuples - 1] = Float4GetDatum((float4) reltuples);
        newtup = heap_modifytuple(tuple, pg_class, values, nulls, replace);
        simple_heap_update(pg_class, &tuple->t_self, newtup);
        if (!IsIgnoringSystemIndexes())
@@ -1741,7 +1707,7 @@ DefaultBuild(Relation heapRelation,
    TupleDesc   heapDescriptor;
    Datum       datum[INDEX_MAX_KEYS];
    char        nullv[INDEX_MAX_KEYS];
-   long        reltuples,
+   double      reltuples,
                indtuples;
    Node       *predicate = indexInfo->ii_Predicate;
 
@@ -1796,7 +1762,7 @@ DefaultBuild(Relation heapRelation,
                          0,    /* number of keys */
                          (ScanKey) NULL);      /* scan key */
 
-   reltuples = indtuples = 0;
+   reltuples = indtuples = 0.0;
 
    /*
     * for each tuple in the base relation, we create an index tuple and
@@ -1808,7 +1774,7 @@ DefaultBuild(Relation heapRelation,
    {
        MemoryContextReset(econtext->ecxt_per_tuple_memory);
 
-       reltuples++;
+       reltuples += 1.0;
 
 #ifndef OMIT_PARTIAL_INDEX
 
@@ -1821,7 +1787,7 @@ DefaultBuild(Relation heapRelation,
            slot->val = heapTuple;
            if (ExecQual((List *) oldPred, econtext, false))
            {
-               indtuples++;
+               indtuples += 1.0;
                continue;
            }
        }
@@ -1838,7 +1804,7 @@ DefaultBuild(Relation heapRelation,
        }
 #endif  /* OMIT_PARTIAL_INDEX */
 
-       indtuples++;
+       indtuples += 1.0;
 
        /*
         * FormIndexDatum fills in its datum and null parameters with


diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c

index 88e56869da58eee31d6c7b0a764b93c6c73476a7..24cc7a8b254dc9a10dea74b263e52cf30f477964 100644 (file)


--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -8,19 +8,16 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/commands/analyze.c,v 1.16 2001/03/22 06:16:11 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/commands/analyze.c,v 1.17 2001/05/07 00:43:17 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"
 
-#include 
-#include 
-#include 
-#include 
-#include 
+#include 
 
 #include "access/heapam.h"
+#include "access/tuptoaster.h"
 #include "catalog/catname.h"
 #include "catalog/indexing.h"
 #include "catalog/pg_operator.h"
@@ -29,43 +26,139 @@
 #include "commands/vacuum.h"
 #include "miscadmin.h"
 #include "parser/parse_oper.h"
-#include "tcop/tcopprot.h"
 #include "utils/acl.h"
 #include "utils/builtins.h"
+#include "utils/datum.h"
 #include "utils/fmgroids.h"
-#include "utils/inval.h"
 #include "utils/syscache.h"
+#include "utils/tuplesort.h"
 
-#define swapLong(a,b)  {long tmp; tmp=a; a=b; b=tmp;}
-#define swapInt(a,b)   {int tmp; tmp=a; a=b; b=tmp;}
-#define swapDatum(a,b) {Datum tmp; tmp=a; a=b; b=tmp;}
-#define VacAttrStatsEqValid(stats) ( stats->f_cmpeq.fn_addr != NULL )
-#define VacAttrStatsLtGtValid(stats) ( stats->f_cmplt.fn_addr != NULL && \
-                                  stats->f_cmpgt.fn_addr != NULL && \
-                                  RegProcedureIsValid(stats->outfunc) )
 
+/*
+ * Analysis algorithms supported
+ */
+typedef enum {
+   ALG_MINIMAL = 1,            /* Compute only most-common-values */
+   ALG_SCALAR                  /* Compute MCV, histogram, sort correlation */
+} AlgCode;
+
+/*
+ * To avoid consuming too much memory during analysis and/or too much space
+ * in the resulting pg_statistic rows, we ignore varlena datums that are wider
+ * than WIDTH_THRESHOLD (after detoasting!).  This is legitimate for MCV
+ * and distinct-value calculations since a wide value is unlikely to be
+ * duplicated at all, much less be a most-common value.  For the same reason,
+ * ignoring wide values will not affect our estimates of histogram bin
+ * boundaries very much.
+ */
+#define WIDTH_THRESHOLD  256
+
+/*
+ * We build one of these structs for each attribute (column) that is to be
+ * analyzed.  The struct and subsidiary data are in TransactionCommandContext,
+ * so they live until the end of the ANALYZE operation.
+ */
+typedef struct
+{
+   /* These fields are set up by examine_attribute */
+   int         attnum;         /* attribute number */
+   AlgCode     algcode;        /* Which algorithm to use for this column */
+   int         minrows;        /* Minimum # of rows needed for stats */
+   Form_pg_attribute attr;     /* copy of pg_attribute row for column */
+   Form_pg_type attrtype;      /* copy of pg_type row for column */
+   Oid         eqopr;          /* '=' operator for datatype, if any */
+   Oid         eqfunc;         /* and associated function */
+   Oid         ltopr;          /* '<' operator for datatype, if any */
+
+   /* These fields are filled in by the actual statistics-gathering routine */
+   bool        stats_valid;
+   float4      stanullfrac;    /* fraction of entries that are NULL */
+   int4        stawidth;       /* average width */
+   float4      stadistinct;    /* # distinct values */
+   int2        stakind[STATISTIC_NUM_SLOTS];
+   Oid         staop[STATISTIC_NUM_SLOTS];
+   int         numnumbers[STATISTIC_NUM_SLOTS];
+   float4     *stanumbers[STATISTIC_NUM_SLOTS];
+   int         numvalues[STATISTIC_NUM_SLOTS];
+   Datum      *stavalues[STATISTIC_NUM_SLOTS];
+} VacAttrStats;
+
+
+typedef struct
+{
+   Datum       value;          /* a data value */
+   int         tupno;          /* position index for tuple it came from */
+} ScalarItem;
+
+typedef struct
+{
+   int         count;          /* # of duplicates */
+   int         first;          /* values[] index of first occurrence */
+} ScalarMCVItem;
+
+
+#define swapInt(a,b)   {int _tmp; _tmp=a; a=b; b=_tmp;}
+#define swapDatum(a,b) {Datum _tmp; _tmp=a; a=b; b=_tmp;}
 
-static void attr_stats(Relation onerel, int attr_cnt, VacAttrStats *vacattrstats, HeapTuple tuple);
-static void bucketcpy(Form_pg_attribute attr, Datum value, Datum *bucket, int *bucket_len);
-static void update_attstats(Oid relid, int natts, VacAttrStats *vacattrstats);
-static void del_stats(Oid relid, int attcnt, int *attnums);
+
+static int MESSAGE_LEVEL;
+
+/* context information for compare_scalars() */
+static FmgrInfo *datumCmpFn;
+static SortFunctionKind datumCmpFnKind;
+static int *datumCmpTupnoLink;
+
+
+static VacAttrStats *examine_attribute(Relation onerel, int attnum);
+static int acquire_sample_rows(Relation onerel, HeapTuple *rows,
+                              int targrows, long *totalrows);
+static double random_fract(void);
+static double init_selection_state(int n);
+static long select_next_random_record(long t, int n, double *stateptr);
+static int compare_rows(const void *a, const void *b);
+static int compare_scalars(const void *a, const void *b);
+static int compare_mcvs(const void *a, const void *b);
+static OffsetNumber get_page_max_offset(Relation relation,
+                                       BlockNumber blocknumber);
+static void compute_minimal_stats(VacAttrStats *stats,
+                                 TupleDesc tupDesc, long totalrows,
+                                 HeapTuple *rows, int numrows);
+static void compute_scalar_stats(VacAttrStats *stats,
+                                TupleDesc tupDesc, long totalrows,
+                                HeapTuple *rows, int numrows);
+static void update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats);
 
 
 /*
- * analyze_rel() -- analyze relation
+ * analyze_rel() -- analyze one relation
  */
 void
-analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL)
+analyze_rel(Oid relid, VacuumStmt *vacstmt)
 {
-   HeapTuple   tuple;
    Relation    onerel;
-   int32       i;
-   int         attr_cnt,
-              *attnums = NULL;
    Form_pg_attribute *attr;
-   VacAttrStats *vacattrstats;
-   HeapScanDesc scan;
+   int         attr_cnt,
+               tcnt,
+               i;
+   VacAttrStats **vacattrstats;
+   int         targrows,
+               numrows;
+   long        totalrows;
+   HeapTuple  *rows;
+   HeapTuple   tuple;
+
+   if (vacstmt->verbose)
+       MESSAGE_LEVEL = NOTICE;
+   else
+       MESSAGE_LEVEL = DEBUG;
 
+   /*
+    * Begin a transaction for analyzing this relation.
+    *
+    * Note: All memory allocated during ANALYZE will live in
+    * TransactionCommandContext or a subcontext thereof, so it will
+    * all be released by transaction commit at the end of this routine.
+    */
    StartTransactionCommand();
 
    /*
@@ -76,7 +169,7 @@ analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL)
 
    /*
     * Race condition -- if the pg_class tuple has gone away since the
-    * last time we saw it, we don't need to vacuum it.
+    * last time we saw it, we don't need to process it.
     */
    tuple = SearchSysCache(RELOID,
                           ObjectIdGetDatum(relid),
@@ -88,8 +181,7 @@ analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL)
    }
 
    /*
-    * We can VACUUM ANALYZE any table except pg_statistic. see
-    * update_relstats
+    * We can ANALYZE any table except pg_statistic. See update_attstats
     */
    if (strcmp(NameStr(((Form_pg_class) GETSTRUCT(tuple))->relname),
               StatisticRelationName) == 0)
@@ -100,586 +192,1466 @@ analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL)
    }
    ReleaseSysCache(tuple);
 
+   /*
+    * Open the class, getting only a read lock on it, and check permissions
+    */
    onerel = heap_open(relid, AccessShareLock);
 
    if (!pg_ownercheck(GetUserId(), RelationGetRelationName(onerel),
                       RELNAME))
    {
-
-       /*
-        * we already did an elog during vacuum elog(NOTICE, "Skipping
-        * \"%s\" --- only table owner can VACUUM it",
-        * RelationGetRelationName(onerel));
-        */
+       /* No need for a notice if we already complained during VACUUM */
+       if (!vacstmt->vacuum)
+           elog(NOTICE, "Skipping \"%s\" --- only table owner can ANALYZE it",
+                RelationGetRelationName(onerel));
        heap_close(onerel, NoLock);
        CommitTransactionCommand();
        return;
    }
 
-   elog(MESSAGE_LEVEL, "Analyzing...");
+   elog(MESSAGE_LEVEL, "Analyzing %s", RelationGetRelationName(onerel));
 
-   attr_cnt = onerel->rd_att->natts;
+   /*
+    * Determine which columns to analyze
+    *
+    * Note that system attributes are never analyzed.
+    */
    attr = onerel->rd_att->attrs;
+   attr_cnt = onerel->rd_att->natts;
 
-   if (anal_cols2 != NIL)
+   if (vacstmt->va_cols != NIL)
    {
-       int         tcnt = 0;
        List       *le;
 
-       if (length(anal_cols2) > attr_cnt)
-           elog(ERROR, "vacuum: too many attributes specified for relation %s",
-                RelationGetRelationName(onerel));
-       attnums = (int *) palloc(attr_cnt * sizeof(int));
-       foreach(le, anal_cols2)
+       vacattrstats = (VacAttrStats **) palloc(length(vacstmt->va_cols) *
+                                               sizeof(VacAttrStats *));
+       tcnt = 0;
+       foreach(le, vacstmt->va_cols)
        {
-           char       *col = (char *) lfirst(le);
+           char       *col = strVal(lfirst(le));
 
            for (i = 0; i < attr_cnt; i++)
            {
                if (namestrcmp(&(attr[i]->attname), col) == 0)
                    break;
            }
-           if (i < attr_cnt)   /* found */
-               attnums[tcnt++] = i;
-           else
-           {
-               elog(ERROR, "vacuum: there is no attribute %s in %s",
+           if (i >= attr_cnt)
+               elog(ERROR, "ANALYZE: there is no attribute %s in %s",
                     col, RelationGetRelationName(onerel));
-           }
+           vacattrstats[tcnt] = examine_attribute(onerel, i+1);
+           if (vacattrstats[tcnt] != NULL)
+               tcnt++;
+       }
+       attr_cnt = tcnt;
+   }
+   else
+   {
+       vacattrstats = (VacAttrStats **) palloc(attr_cnt *
+                                               sizeof(VacAttrStats *));
+       tcnt = 0;
+       for (i = 0; i < attr_cnt; i++)
+       {
+           vacattrstats[tcnt] = examine_attribute(onerel, i+1);
+           if (vacattrstats[tcnt] != NULL)
+               tcnt++;
        }
        attr_cnt = tcnt;
    }
 
-   vacattrstats = (VacAttrStats *) palloc(attr_cnt * sizeof(VacAttrStats));
+   /*
+    * Quit if no analyzable columns
+    */
+   if (attr_cnt <= 0)
+   {
+       heap_close(onerel, NoLock);
+       CommitTransactionCommand();
+       return;
+   }
 
+   /*
+    * Determine how many rows we need to sample, using the worst case
+    * from all analyzable columns.  We use a lower bound of 100 rows
+    * to avoid possible overflow in Vitter's algorithm.
+    */
+   targrows = 100;
    for (i = 0; i < attr_cnt; i++)
    {
-       Operator    func_operator;
-       VacAttrStats *stats;
-
-       stats = &vacattrstats[i];
-       stats->attr = palloc(ATTRIBUTE_TUPLE_SIZE);
-       memcpy(stats->attr, attr[((attnums) ? attnums[i] : i)],
-              ATTRIBUTE_TUPLE_SIZE);
-       stats->best = stats->guess1 = stats->guess2 = 0;
-       stats->max = stats->min = 0;
-       stats->best_len = stats->guess1_len = stats->guess2_len = 0;
-       stats->max_len = stats->min_len = 0;
-       stats->initialized = false;
-       stats->best_cnt = stats->guess1_cnt = stats->guess1_hits = stats->guess2_hits = 0;
-       stats->max_cnt = stats->min_cnt = stats->null_cnt = stats->nonnull_cnt = 0;
-
-       func_operator = compatible_oper("=",
-                                       stats->attr->atttypid,
-                                       stats->attr->atttypid,
-                                       true);
-       if (func_operator != NULL)
-       {
-           fmgr_info(oprfuncid(func_operator), &(stats->f_cmpeq));
-           ReleaseSysCache(func_operator);
-       }
-       else
-           stats->f_cmpeq.fn_addr = NULL;
+       if (targrows < vacattrstats[i]->minrows)
+           targrows = vacattrstats[i]->minrows;
+   }
+
+   /*
+    * Acquire the sample rows
+    */
+   rows = (HeapTuple *) palloc(targrows * sizeof(HeapTuple));
+   numrows = acquire_sample_rows(onerel, rows, targrows, &totalrows);
 
-       func_operator = compatible_oper("<",
-                                       stats->attr->atttypid,
-                                       stats->attr->atttypid,
-                                       true);
-       if (func_operator != NULL)
+   /*
+    * If we are running a standalone ANALYZE, update pages/tuples stats
+    * in pg_class.  We have the accurate page count from heap_beginscan,
+    * but only an approximate number of tuples; therefore, if we are
+    * part of VACUUM ANALYZE do *not* overwrite the accurate count already
+    * inserted by VACUUM.
+    */
+   if (!vacstmt->vacuum)
+       vac_update_relstats(RelationGetRelid(onerel),
+                           onerel->rd_nblocks,
+                           (double) totalrows,
+                           RelationGetForm(onerel)->relhasindex);
+
+   /*
+    * Compute the statistics.  Temporary results during the calculations
+    * for each column are stored in a child context.  The calc routines
+    * are responsible to make sure that whatever they store into the
+    * VacAttrStats structure is allocated in TransactionCommandContext.
+    */
+   if (numrows > 0)
+   {
+       MemoryContext col_context,
+                   old_context;
+
+       col_context = AllocSetContextCreate(CurrentMemoryContext,
+                                           "Analyze Column",
+                                           ALLOCSET_DEFAULT_MINSIZE,
+                                           ALLOCSET_DEFAULT_INITSIZE,
+                                           ALLOCSET_DEFAULT_MAXSIZE);
+       old_context = MemoryContextSwitchTo(col_context);
+       for (i = 0; i < attr_cnt; i++)
        {
-           fmgr_info(oprfuncid(func_operator), &(stats->f_cmplt));
-           stats->op_cmplt = oprid(func_operator);
-           ReleaseSysCache(func_operator);
+           switch (vacattrstats[i]->algcode)
+           {
+               case ALG_MINIMAL:
+                   compute_minimal_stats(vacattrstats[i],
+                                         onerel->rd_att, totalrows,
+                                         rows, numrows);
+                   break;
+               case ALG_SCALAR:
+                   compute_scalar_stats(vacattrstats[i],
+                                        onerel->rd_att, totalrows,
+                                        rows, numrows);
+                   break;
+           }
+           MemoryContextResetAndDeleteChildren(col_context);
        }
-       else
+       MemoryContextSwitchTo(old_context);
+       MemoryContextDelete(col_context);
+
+       /*
+        * Emit the completed stats rows into pg_statistic, replacing any
+        * previous statistics for the target columns.  (If there are stats
+        * in pg_statistic for columns we didn't process, we leave them alone.)
+        */
+       update_attstats(relid, attr_cnt, vacattrstats);
+   }
+
+   /*
+    * Close source relation now, but keep lock so that no one deletes it
+    * before we commit.  (If someone did, they'd fail to clean up the
+    * entries we made in pg_statistic.)
+    */
+   heap_close(onerel, NoLock);
+
+   /* Commit and release working memory */
+   CommitTransactionCommand();
+}
+
+/*
+ * examine_attribute -- pre-analysis of a single column
+ *
+ * Determine whether the column is analyzable; if so, create and initialize
+ * a VacAttrStats struct for it.  If not, return NULL.
+ */
+static VacAttrStats *
+examine_attribute(Relation onerel, int attnum)
+{
+   Form_pg_attribute attr = onerel->rd_att->attrs[attnum-1];
+   Operator    func_operator;
+   Oid         oprrest;
+   HeapTuple   typtuple;
+   Oid         eqopr = InvalidOid;
+   Oid         eqfunc = InvalidOid;
+   Oid         ltopr = InvalidOid;
+   VacAttrStats *stats;
+
+   /* Don't analyze column if user has specified not to */
+   if (attr->attstattarget <= 0)
+       return NULL;
+
+   /* If column has no "=" operator, we can't do much of anything */
+   func_operator = compatible_oper("=",
+                                   attr->atttypid,
+                                   attr->atttypid,
+                                   true);
+   if (func_operator != NULL)
+   {
+       oprrest = ((Form_pg_operator) GETSTRUCT(func_operator))->oprrest;
+       if (oprrest == F_EQSEL)
        {
-           stats->f_cmplt.fn_addr = NULL;
-           stats->op_cmplt = InvalidOid;
+           eqopr = oprid(func_operator);
+           eqfunc = oprfuncid(func_operator);
        }
+       ReleaseSysCache(func_operator);
+   }
+   if (!OidIsValid(eqfunc))
+       return NULL;
 
-       func_operator = compatible_oper(">",
-                                       stats->attr->atttypid,
-                                       stats->attr->atttypid,
-                                       true);
-       if (func_operator != NULL)
+   /*
+    * If we have "=" then we're at least able to do the minimal algorithm,
+    * so start filling in a VacAttrStats struct.
+    */
+   stats = (VacAttrStats *) palloc(sizeof(VacAttrStats));
+   MemSet(stats, 0, sizeof(VacAttrStats));
+   stats->attnum = attnum;
+   stats->attr = (Form_pg_attribute) palloc(ATTRIBUTE_TUPLE_SIZE);
+   memcpy(stats->attr, attr, ATTRIBUTE_TUPLE_SIZE);
+   typtuple = SearchSysCache(TYPEOID,
+                             ObjectIdGetDatum(attr->atttypid),
+                             0, 0, 0);
+   if (!HeapTupleIsValid(typtuple))
+       elog(ERROR, "cache lookup of type %u failed", attr->atttypid);
+   stats->attrtype = (Form_pg_type) palloc(sizeof(FormData_pg_type));
+   memcpy(stats->attrtype, GETSTRUCT(typtuple), sizeof(FormData_pg_type));
+   ReleaseSysCache(typtuple);
+   stats->eqopr = eqopr;
+   stats->eqfunc = eqfunc;
+
+   /* Is there a "<" operator with suitable semantics? */
+   func_operator = compatible_oper("<",
+                                   attr->atttypid,
+                                   attr->atttypid,
+                                   true);
+   if (func_operator != NULL)
+   {
+       oprrest = ((Form_pg_operator) GETSTRUCT(func_operator))->oprrest;
+       if (oprrest == F_SCALARLTSEL)
        {
-           fmgr_info(oprfuncid(func_operator), &(stats->f_cmpgt));
-           ReleaseSysCache(func_operator);
+           ltopr = oprid(func_operator);
        }
-       else
-           stats->f_cmpgt.fn_addr = NULL;
+       ReleaseSysCache(func_operator);
+   }
+   stats->ltopr = ltopr;
+
+   /*
+    * Determine the algorithm to use (this will get more complicated later)
+    */
+   if (OidIsValid(ltopr))
+   {
+       /* Seems to be a scalar datatype */
+       stats->algcode = ALG_SCALAR;
+       /*--------------------
+        * The following choice of minrows is based on the paper
+        * "Random sampling for histogram construction: how much is enough?"
+        * by Surajit Chaudhuri, Rajeev Motwani and Vivek Narasayya, in
+        * Proceedings of ACM SIGMOD International Conference on Management
+        * of Data, 1998, Pages 436-447.  Their Corollary 1 to Theorem 5
+        * says that for table size n, histogram size k, maximum relative
+        * error in bin size f, and error probability gamma, the minimum
+        * random sample size is
+        *      r = 4 * k * ln(2*n/gamma) / f^2
+        * Taking f = 0.5, gamma = 0.01, n = 1 million rows, we obtain
+        *      r = 305.82 * k
+        * Note that because of the log function, the dependence on n is
+        * quite weak; even at n = 1 billion, a 300*k sample gives <= 0.59
+        * bin size error with probability 0.99.  So there's no real need to
+        * scale for n, which is a good thing because we don't necessarily
+        * know it at this point.
+        *--------------------
+        */
+       stats->minrows = 300 * attr->attstattarget;
+   }
+   else
+   {
+       /* Can't do much but the minimal stuff */
+       stats->algcode = ALG_MINIMAL;
+       /* Might as well use the same minrows as above */
+       stats->minrows = 300 * attr->attstattarget;
+   }
+
+   return stats;
+}
 
-       tuple = SearchSysCache(TYPEOID,
-                              ObjectIdGetDatum(stats->attr->atttypid),
-                              0, 0, 0);
-       if (HeapTupleIsValid(tuple))
+/*
+ * acquire_sample_rows -- acquire a random sample of rows from the table
+ *
+ * Up to targrows rows are collected (if there are fewer than that many
+ * rows in the table, all rows are collected).  When the table is larger
+ * than targrows, a truly random sample is collected: every row has an
+ * equal chance of ending up in the final sample.
+ *
+ * We also estimate the total number of rows in the table, and return that
+ * into *totalrows.
+ *
+ * The returned list of tuples is in order by physical position in the table.
+ * (We will rely on this later to derive correlation estimates.)
+ */
+static int
+acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
+                   long *totalrows)
+{
+   int         numrows = 0;
+   HeapScanDesc scan;
+   HeapTuple   tuple;
+   ItemPointer lasttuple;
+   BlockNumber lastblock,
+               estblock;
+   OffsetNumber lastoffset;
+   int         numest;
+   double      tuplesperpage;
+   long        t;
+   double      rstate;
+
+   Assert(targrows > 1);
+   /*
+    * Do a simple linear scan until we reach the target number of rows.
+    */
+   scan = heap_beginscan(onerel, false, SnapshotNow, 0, NULL);
+   while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
+   {
+       rows[numrows++] = heap_copytuple(tuple);
+       if (numrows >= targrows)
+           break;
+   }
+   heap_endscan(scan);
+   /*
+    * If we ran out of tuples then we're done, no matter how few we 
+    * collected.  No sort is needed, since they're already in order.
+    */
+   if (!HeapTupleIsValid(tuple))
+   {
+       *totalrows = numrows;
+       return numrows;
+   }
+   /*
+    * Otherwise, start replacing tuples in the sample until we reach the
+    * end of the relation.  This algorithm is from Jeff Vitter's paper
+    * (see full citation below).  It works by repeatedly computing the number
+    * of the next tuple we want to fetch, which will replace a randomly
+    * chosen element of the reservoir (current set of tuples).  At all times
+    * the reservoir is a true random sample of the tuples we've passed over
+    * so far, so when we fall off the end of the relation we're done.
+    *
+    * A slight difficulty is that since we don't want to fetch tuples or even
+    * pages that we skip over, it's not possible to fetch *exactly* the N'th
+    * tuple at each step --- we don't know how many valid tuples are on
+    * the skipped pages.  We handle this by assuming that the average number
+    * of valid tuples/page on the pages already scanned over holds good for
+    * the rest of the relation as well; this lets us estimate which page
+    * the next tuple should be on and its position in the page.  Then we
+    * fetch the first valid tuple at or after that position, being careful
+    * not to use the same tuple twice.  This approach should still give a
+    * good random sample, although it's not perfect.
+    */
+   lasttuple = &(rows[numrows-1]->t_self);
+   lastblock = ItemPointerGetBlockNumber(lasttuple);
+   lastoffset = ItemPointerGetOffsetNumber(lasttuple);
+   /*
+    * If possible, estimate tuples/page using only completely-scanned pages.
+    */
+   for (numest = numrows; numest > 0; numest--)
+   {
+       if (ItemPointerGetBlockNumber(&(rows[numest-1]->t_self)) != lastblock)
+           break;
+   }
+   if (numest == 0)
+   {
+       numest = numrows;       /* don't have a full page? */
+       estblock = lastblock + 1;
+   }
+   else
+   {
+       estblock = lastblock;
+   }
+   tuplesperpage = (double) numest / (double) estblock;
+
+   t = numrows;                /* t is the # of records processed so far */
+   rstate = init_selection_state(targrows);
+   for (;;)
+   {
+       double          targpos;
+       BlockNumber     targblock;
+       OffsetNumber    targoffset,
+                       maxoffset;
+
+       t = select_next_random_record(t, targrows, &rstate);
+       /* Try to read the t'th record in the table */
+       targpos = (double) t / tuplesperpage;
+       targblock = (BlockNumber) targpos;
+       targoffset = ((int) (targpos - targblock) * tuplesperpage) + 
+           FirstOffsetNumber;
+       /* Make sure we are past the last selected record */
+       if (targblock <= lastblock)
        {
-           stats->outfunc = ((Form_pg_type) GETSTRUCT(tuple))->typoutput;
-           stats->typelem = ((Form_pg_type) GETSTRUCT(tuple))->typelem;
-           ReleaseSysCache(tuple);
+           targblock = lastblock;
+           if (targoffset <= lastoffset)
+               targoffset = lastoffset + 1;
        }
-       else
+       /* Loop to find first valid record at or after given position */
+   pageloop:;
+       /*
+        * Have we fallen off the end of the relation?  (We rely on
+        * heap_beginscan to have updated rd_nblocks.)
+        */
+       if (targblock >= onerel->rd_nblocks)
+           break;
+       maxoffset = get_page_max_offset(onerel, targblock);
+       for (;;)
        {
-           stats->outfunc = InvalidOid;
-           stats->typelem = InvalidOid;
+           HeapTupleData targtuple;
+           Buffer      targbuffer;
+
+           if (targoffset > maxoffset)
+           {
+               /* Fell off end of this page, try next */
+               targblock++;
+               targoffset = FirstOffsetNumber;
+               goto pageloop;
+           }
+           ItemPointerSet(&targtuple.t_self, targblock, targoffset);
+           heap_fetch(onerel, SnapshotNow, &targtuple, &targbuffer);
+           if (targtuple.t_data != NULL)
+           {
+               /*
+                * Found a suitable tuple, so save it, replacing one old
+                * tuple at random
+                */
+               int     k = (int) (targrows * random_fract());
+
+               Assert(k >= 0 && k < targrows);
+               heap_freetuple(rows[k]);
+               rows[k] = heap_copytuple(&targtuple);
+               ReleaseBuffer(targbuffer);
+               lastblock = targblock;
+               lastoffset = targoffset;
+               break;
+           }
+           /* this tuple is dead, so advance to next one on same page */
+           targoffset++;
        }
    }
-   /* delete existing pg_statistic rows for relation */
-   del_stats(relid, ((attnums) ? attr_cnt : 0), attnums);
-
-   /* scan relation to gather statistics */
-   scan = heap_beginscan(onerel, false, SnapshotNow, 0, NULL);
 
-   while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
-       attr_stats(onerel, attr_cnt, vacattrstats, tuple);
+   /*
+    * Now we need to sort the collected tuples by position (itempointer).
+    */
+   qsort((void *) rows, numrows, sizeof(HeapTuple), compare_rows);
 
-   heap_endscan(scan);
+   /*
+    * Estimate total number of valid rows in relation.
+    */
+   *totalrows = (long) (onerel->rd_nblocks * tuplesperpage + 0.5);
 
-   /* close rel, but keep lock so it doesn't go away before commit */
-   heap_close(onerel, NoLock);
+   return numrows;
+}
 
-   /* update statistics in pg_class */
-   update_attstats(relid, attr_cnt, vacattrstats);
+/* Select a random value R uniformly distributed in 0 < R < 1 */
+static double
+random_fract(void)
+{
+   long    z;
 
-   CommitTransactionCommand();
+   /* random() can produce endpoint values, try again if so */
+   do
+   {
+       z = random();
+   } while (! (z > 0 && z < MAX_RANDOM_VALUE));
+   return (double) z / (double) MAX_RANDOM_VALUE;
 }
 
 /*
- * attr_stats() -- compute column statistics used by the planner
+ * These two routines embody Algorithm Z from "Random sampling with a
+ * reservoir" by Jeffrey S. Vitter, in ACM Trans. Math. Softw. 11, 1
+ * (Mar. 1985), Pages 37-57.  While Vitter describes his algorithm in terms
+ * of the count S of records to skip before processing another record,
+ * it is convenient to work primarily with t, the index (counting from 1)
+ * of the last record processed and next record to process.  The only extra
+ * state needed between calls is W, a random state variable.
  *
- * We compute the column min, max, null and non-null counts.
- * Plus we attempt to find the count of the value that occurs most
- * frequently in each column.  These figures are used to compute
- * the selectivity of the column.
+ * init_selection_state computes the initial W value.
  *
- * We use a three-bucket cache to get the most frequent item.
- * The 'guess' buckets count hits.  A cache miss causes guess1
- * to get the most hit 'guess' item in the most recent cycle, and
- * the new item goes into guess2.  Whenever the total count of hits
- * of a 'guess' entry is larger than 'best', 'guess' becomes 'best'.
+ * Given that we've already processed t records (t >= n),
+ * select_next_random_record determines the number of the next record to
+ * process.
+ */
+static double
+init_selection_state(int n)
+{
+   /* Initial value of W (for use when Algorithm Z is first applied) */
+   return exp(- log(random_fract())/n);
+}
+
+static long
+select_next_random_record(long t, int n, double *stateptr)
+{
+   /* The magic constant here is T from Vitter's paper */
+   if (t <= (22 * n))
+   {
+       /* Process records using Algorithm X until t is large enough */
+       double  V,
+               quot;
+
+       V = random_fract();     /* Generate V */
+       t++;
+       quot = (double) (t - n) / (double) t;
+       /* Find min S satisfying (4.1) */
+       while (quot > V)
+       {
+           t++;
+           quot *= (double) (t - n) / (double) t;
+       }
+   }
+   else
+   {
+       /* Now apply Algorithm Z */
+       double  W = *stateptr;
+       long    term = t - n + 1;
+       int     S;
+
+       for (;;)
+       {
+           long    numer,
+                   numer_lim,
+                   denom;
+           double  U,
+                   X,
+                   lhs,
+                   rhs,
+                   y,
+                   tmp;
+
+           /* Generate U and X */
+           U = random_fract();
+           X = t * (W - 1.0);
+           S = X;              /* S is tentatively set to floor(X) */
+           /* Test if U <= h(S)/cg(X) in the manner of (6.3) */
+           tmp = (double) (t + 1) / (double) term;
+           lhs = exp(log(((U * tmp * tmp) * (term + S))/(t + X))/n);
+           rhs = (((t + X)/(term + S)) * term)/t;
+           if (lhs <= rhs)
+           {
+               W = rhs/lhs;
+               break;
+           }
+           /* Test if U <= f(S)/cg(X) */
+           y = (((U * (t + 1))/term) * (t + S + 1))/(t + X);
+           if (n < S)
+           {
+               denom = t;
+               numer_lim = term + S;
+           }
+           else
+           {
+               denom = t - n + S;
+               numer_lim = t + 1;
+           }
+           for (numer = t + S; numer >= numer_lim; numer--)
+           {
+               y *= (double) numer / (double) denom;
+               denom--;
+           }
+           W = exp(- log(random_fract())/n); /* Generate W in advance */
+           if (exp(log(y)/n) <= (t + X)/t)
+               break;
+       }
+       t += S + 1;
+       *stateptr = W;
+   }
+   return t;
+}
+
+/*
+ * qsort comparator for sorting rows[] array
+ */
+static int
+compare_rows(const void *a, const void *b)
+{
+   HeapTuple   ha = * (HeapTuple *) a;
+   HeapTuple   hb = * (HeapTuple *) b;
+   BlockNumber ba = ItemPointerGetBlockNumber(&ha->t_self);
+   OffsetNumber oa = ItemPointerGetOffsetNumber(&ha->t_self);
+   BlockNumber bb = ItemPointerGetBlockNumber(&hb->t_self);
+   OffsetNumber ob = ItemPointerGetOffsetNumber(&hb->t_self);
+
+   if (ba < bb)
+       return -1;
+   if (ba > bb)
+       return 1;
+   if (oa < ob)
+       return -1;
+   if (oa > ob)
+       return 1;
+   return 0;
+}
+
+/*
+ * Discover the largest valid tuple offset number on the given page
+ *
+ * This code probably ought to live in some other module.
+ */
+static OffsetNumber
+get_page_max_offset(Relation relation, BlockNumber blocknumber)
+{
+   Buffer      buffer;
+   Page        p;
+   OffsetNumber offnum;
+
+   buffer = ReadBuffer(relation, blocknumber);
+   if (!BufferIsValid(buffer))
+       elog(ERROR, "get_page_max_offset: %s relation: ReadBuffer(%ld) failed",
+            RelationGetRelationName(relation), (long) blocknumber);
+   LockBuffer(buffer, BUFFER_LOCK_SHARE);
+   p = BufferGetPage(buffer);
+   offnum = PageGetMaxOffsetNumber(p);
+   LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+   ReleaseBuffer(buffer);
+   return offnum;
+}
+
+
+/*
+ * compute_minimal_stats() -- compute minimal column statistics
  *
- * This method works perfectly for columns with unique values, and columns
- * with only two unique values, plus nulls.
+ * We use this when we can find only an "=" operator for the datatype.
  *
- * It becomes less perfect as the number of unique values increases and
- * their distribution in the table becomes more random.
+ * We determine the fraction of non-null rows, the average width, the
+ * most common values, and the (estimated) number of distinct values.
  *
+ * The most common values are determined by brute force: we keep a list
+ * of previously seen values, ordered by number of times seen, as we scan
+ * the samples.  A newly seen value is inserted just after the last
+ * multiply-seen value, causing the bottommost (oldest) singly-seen value
+ * to drop off the list.  The accuracy of this method, and also its cost,
+ * depend mainly on the length of the list we are willing to keep.
  */
 static void
-attr_stats(Relation onerel, int attr_cnt, VacAttrStats *vacattrstats, HeapTuple tuple)
+compute_minimal_stats(VacAttrStats *stats,
+                     TupleDesc tupDesc, long totalrows,
+                     HeapTuple *rows, int numrows)
 {
    int         i;
-   TupleDesc   tupDesc = onerel->rd_att;
-
-   for (i = 0; i < attr_cnt; i++)
+   int         null_cnt = 0;
+   int         nonnull_cnt = 0;
+   int         toowide_cnt = 0;
+   double      total_width = 0;
+   bool        is_varlena = (!stats->attr->attbyval &&
+                             stats->attr->attlen == -1);
+   FmgrInfo    f_cmpeq;
+   typedef struct
+   {
+       Datum   value;
+       int     count;
+   } TrackItem;
+   TrackItem  *track;
+   int         track_cnt,
+               track_max;
+   int         num_mcv = stats->attr->attstattarget;
+
+   /* We track up to 2*n values for an n-element MCV list; but at least 10 */
+   track_max = 2 * num_mcv;
+   if (track_max < 10)
+       track_max = 10;
+   track = (TrackItem *) palloc(track_max * sizeof(TrackItem));
+   track_cnt = 0;
+
+   fmgr_info(stats->eqfunc, &f_cmpeq);
+
+   for (i = 0; i < numrows; i++)
    {
-       VacAttrStats *stats = &vacattrstats[i];
-       Datum       origvalue;
+       HeapTuple   tuple = rows[i];
        Datum       value;
        bool        isnull;
-       bool        value_hit;
-
-       if (!VacAttrStatsEqValid(stats))
-           continue;
-
-#ifdef _DROP_COLUMN_HACK__
-       if (COLUMN_IS_DROPPED(stats->attr))
-           continue;
-#endif  /* _DROP_COLUMN_HACK__ */
+       bool        match;
+       int         firstcount1,
+                   j;
 
-       origvalue = heap_getattr(tuple, stats->attr->attnum,
-                                tupDesc, &isnull);
+       value = heap_getattr(tuple, stats->attnum, tupDesc, &isnull);
 
+       /* Check for null/nonnull */
        if (isnull)
        {
-           stats->null_cnt++;
+           null_cnt++;
            continue;
        }
-       stats->nonnull_cnt++;
+       nonnull_cnt++;
 
        /*
-        * If the value is toasted, detoast it to avoid repeated
-        * detoastings and resultant memory leakage inside the comparison
-        * routines.
+        * If it's a varlena field, add up widths for average width
+        * calculation.  Note that if the value is toasted, we
+        * use the toasted width.  We don't bother with this calculation
+        * if it's a fixed-width type.
         */
-       if (!stats->attr->attbyval && stats->attr->attlen == -1)
-           value = PointerGetDatum(PG_DETOAST_DATUM(origvalue));
-       else
-           value = origvalue;
-
-       if (!stats->initialized)
+       if (is_varlena)
        {
-           bucketcpy(stats->attr, value, &stats->best, &stats->best_len);
-           /* best_cnt gets incremented below */
-           bucketcpy(stats->attr, value, &stats->guess1, &stats->guess1_len);
-           stats->guess1_cnt = stats->guess1_hits = 1;
-           bucketcpy(stats->attr, value, &stats->guess2, &stats->guess2_len);
-           stats->guess2_hits = 1;
-           if (VacAttrStatsLtGtValid(stats))
+           total_width += VARSIZE(DatumGetPointer(value));
+           /*
+            * If the value is toasted, we want to detoast it just once to
+            * avoid repeated detoastings and resultant excess memory usage
+            * during the comparisons.  Also, check to see if the value is
+            * excessively wide, and if so don't detoast at all --- just
+            * ignore the value.
+            */
+           if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
            {
-               bucketcpy(stats->attr, value, &stats->max, &stats->max_len);
-               bucketcpy(stats->attr, value, &stats->min, &stats->min_len);
-               /* min_cnt, max_cnt get incremented below */
+               toowide_cnt++;
+               continue;
            }
-           stats->initialized = true;
+           value = PointerGetDatum(PG_DETOAST_DATUM(value));
        }
 
-       if (VacAttrStatsLtGtValid(stats))
+       /*
+        * See if the value matches anything we're already tracking.
+        */
+       match = false;
+       firstcount1 = track_cnt;
+       for (j = 0; j < track_cnt; j++)
        {
-           if (DatumGetBool(FunctionCall2(&stats->f_cmplt,
-                                          value, stats->min)))
+           if (DatumGetBool(FunctionCall2(&f_cmpeq, value, track[j].value)))
            {
-               bucketcpy(stats->attr, value, &stats->min, &stats->min_len);
-               stats->min_cnt = 1;
+               match = true;
+               break;
            }
-           else if (DatumGetBool(FunctionCall2(&stats->f_cmpeq,
-                                               value, stats->min)))
-               stats->min_cnt++;
+           if (j < firstcount1 && track[j].count == 1)
+               firstcount1 = j;
+       }
 
-           if (DatumGetBool(FunctionCall2(&stats->f_cmpgt,
-                                          value, stats->max)))
+       if (match)
+       {
+           /* Found a match */
+           track[j].count++;
+           /* This value may now need to "bubble up" in the track list */
+           while (j > 0 && track[j].count > track[j-1].count)
            {
-               bucketcpy(stats->attr, value, &stats->max, &stats->max_len);
-               stats->max_cnt = 1;
+               swapDatum(track[j].value, track[j-1].value);
+               swapInt(track[j].count, track[j-1].count);
+               j--;
            }
-           else if (DatumGetBool(FunctionCall2(&stats->f_cmpeq,
-                                               value, stats->max)))
-               stats->max_cnt++;
        }
-
-       value_hit = true;
-       if (DatumGetBool(FunctionCall2(&stats->f_cmpeq,
-                                      value, stats->best)))
-           stats->best_cnt++;
-       else if (DatumGetBool(FunctionCall2(&stats->f_cmpeq,
-                                           value, stats->guess1)))
+       else
        {
-           stats->guess1_cnt++;
-           stats->guess1_hits++;
+           /* No match.  Insert at head of count-1 list */
+           if (track_cnt < track_max)
+               track_cnt++;
+           for (j = track_cnt-1; j > firstcount1; j--)
+           {
+               track[j].value = track[j-1].value;
+               track[j].count = track[j-1].count;
+           }
+           if (firstcount1 < track_cnt)
+           {
+               track[firstcount1].value = value;
+               track[firstcount1].count = 1;
+           }
        }
-       else if (DatumGetBool(FunctionCall2(&stats->f_cmpeq,
-                                           value, stats->guess2)))
-           stats->guess2_hits++;
+   }
+
+   /* We can only compute valid stats if we found some non-null values. */
+   if (nonnull_cnt > 0)
+   {
+       int     nmultiple,
+               summultiple;
+
+       stats->stats_valid = true;
+       /* Do the simple null-frac and width stats */
+       stats->stanullfrac = (double) null_cnt / (double) numrows;
+       if (is_varlena)
+           stats->stawidth = total_width / (double) nonnull_cnt;
        else
-           value_hit = false;
+           stats->stawidth = stats->attrtype->typlen;
 
-       if (stats->guess2_hits > stats->guess1_hits)
+       /* Count the number of values we found multiple times */
+       summultiple = 0;
+       for (nmultiple = 0; nmultiple < track_cnt; nmultiple++)
        {
-           swapDatum(stats->guess1, stats->guess2);
-           swapInt(stats->guess1_len, stats->guess2_len);
-           swapLong(stats->guess1_hits, stats->guess2_hits);
-           stats->guess1_cnt = stats->guess1_hits;
+           if (track[nmultiple].count == 1)
+               break;
+           summultiple += track[nmultiple].count;
        }
-       if (stats->guess1_cnt > stats->best_cnt)
+
+       if (nmultiple == 0)
        {
-           swapDatum(stats->best, stats->guess1);
-           swapInt(stats->best_len, stats->guess1_len);
-           swapLong(stats->best_cnt, stats->guess1_cnt);
-           stats->guess1_hits = 1;
-           stats->guess2_hits = 1;
+           /* If we found no repeated values, assume it's a unique column */
+           stats->stadistinct = -1.0;
        }
-       if (!value_hit)
+       else if (track_cnt < track_max && toowide_cnt == 0 &&
+                nmultiple == track_cnt)
        {
-           bucketcpy(stats->attr, value, &stats->guess2, &stats->guess2_len);
-           stats->guess1_hits = 1;
-           stats->guess2_hits = 1;
+           /*
+            * Our track list includes every value in the sample, and every
+            * value appeared more than once.  Assume the column has just
+            * these values.
+            */
+           stats->stadistinct = track_cnt;
        }
+       else
+       {
+           /*----------
+            * Estimate the number of distinct values using the estimator
+            * proposed by Chaudhuri et al (see citation above).  This is
+            *      sqrt(n/r) * max(f1,1) + f2 + f3 + ...
+            * where fk is the number of distinct values that occurred
+            * exactly k times in our sample of r rows (from a total of n).
+            * We assume (not very reliably!) that all the multiply-occurring
+            * values are reflected in the final track[] list, and the other
+            * nonnull values all appeared but once.
+            *----------
+            */
+           int     f1 = nonnull_cnt - summultiple;
+           double  term1;
 
-       /* Clean up detoasted copy, if any */
-       if (value != origvalue)
-           pfree(DatumGetPointer(value));
-   }
-}
+           if (f1 < 1)
+               f1 = 1;
+           term1 = sqrt((double) totalrows / (double) numrows) * f1;
+           stats->stadistinct = floor(term1 + nmultiple + 0.5);
+       }
 
-/*
- * bucketcpy() -- copy a new value into one of the statistics buckets
- */
-static void
-bucketcpy(Form_pg_attribute attr, Datum value, Datum *bucket, int *bucket_len)
-{
-   if (attr->attbyval)
-       *bucket = value;
-   else
-   {
-       int         len = (attr->attlen != -1 ? attr->attlen : VARSIZE(value));
+       /*
+        * If we estimated the number of distinct values at more than 10%
+        * of the total row count (a very arbitrary limit), then assume
+        * that stadistinct should scale with the row count rather than be
+        * a fixed value.
+        */
+       if (stats->stadistinct > 0.1 * totalrows)
+           stats->stadistinct = - (stats->stadistinct / totalrows);
 
-       /* Avoid unnecessary palloc() traffic... */
-       if (len > *bucket_len)
+       /* Generate an MCV slot entry, only if we found multiples */
+       if (nmultiple < num_mcv)
+           num_mcv = nmultiple;
+       if (num_mcv > 0)
        {
-           if (*bucket_len != 0)
-               pfree(DatumGetPointer(*bucket));
-           *bucket = PointerGetDatum(palloc(len));
-           *bucket_len = len;
+           MemoryContext old_context;
+           Datum  *mcv_values;
+           float4 *mcv_freqs;
+
+           /* Must copy the target values into TransactionCommandContext */
+           old_context = MemoryContextSwitchTo(TransactionCommandContext);
+           mcv_values = (Datum *) palloc(num_mcv * sizeof(Datum));
+           mcv_freqs = (float4 *) palloc(num_mcv * sizeof(float4));
+           for (i = 0; i < num_mcv; i++)
+           {
+               mcv_values[i] = datumCopy(track[i].value,
+                                         stats->attr->attbyval,
+                                         stats->attr->attlen);
+               mcv_freqs[i] = (double) track[i].count / (double) numrows;
+           }
+           MemoryContextSwitchTo(old_context);
+
+           stats->stakind[0] = STATISTIC_KIND_MCV;
+           stats->staop[0] = stats->eqopr;
+           stats->stanumbers[0] = mcv_freqs;
+           stats->numnumbers[0] = num_mcv;
+           stats->stavalues[0] = mcv_values;
+           stats->numvalues[0] = num_mcv;
        }
-       memcpy(DatumGetPointer(*bucket), DatumGetPointer(value), len);
    }
+
+   /* We don't need to bother cleaning up any of our temporary palloc's */
 }
 
 
 /*
- * update_attstats() -- update attribute statistics for one relation
+ * compute_scalar_stats() -- compute column statistics
  *
- *     Statistics are stored in several places: the pg_class row for the
- *     relation has stats about the whole relation, the pg_attribute rows
- *     for each attribute store "dispersion", and there is a pg_statistic
- *     row for each (non-system) attribute.  (Dispersion probably ought to
- *     be moved to pg_statistic, but it's not worth doing unless there's
- *     another reason to have to change pg_attribute.)  The pg_class values
- *     are updated by VACUUM, not here.
- *
- *     We violate no-overwrite semantics here by storing new values for
- *     the dispersion column directly into the pg_attribute tuple that's
- *     already on the page.  The reason for this is that if we updated
- *     these tuples in the usual way, vacuuming pg_attribute itself
- *     wouldn't work very well --- by the time we got done with a vacuum
- *     cycle, most of the tuples in pg_attribute would've been obsoleted.
- *     Updating pg_attribute's own statistics would be especially tricky.
- *     Of course, this only works for fixed-size never-null columns, but
- *     dispersion is.
+ * We use this when we can find "=" and "<" operators for the datatype.
  *
- *     pg_statistic rows are just added normally.  This means that
- *     pg_statistic will probably contain some deleted rows at the
- *     completion of a vacuum cycle, unless it happens to get vacuumed last.
+ * We determine the fraction of non-null rows, the average width, the
+ * most common values, the (estimated) number of distinct values, the
+ * distribution histogram, and the correlation of physical to logical order.
  *
- *     To keep things simple, we punt for pg_statistic, and don't try
- *     to compute or store rows for pg_statistic itself in pg_statistic.
- *     This could possibly be made to work, but it's not worth the trouble.
+ * The desired stats can be determined fairly easily after sorting the
+ * data values into order.
  */
 static void
-update_attstats(Oid relid, int natts, VacAttrStats *vacattrstats)
+compute_scalar_stats(VacAttrStats *stats,
+                    TupleDesc tupDesc, long totalrows,
+                    HeapTuple *rows, int numrows)
 {
-   Relation    ad,
-               sd;
-   HeapScanDesc scan;
-   HeapTuple   atup,
-               stup;
-   ScanKeyData askey;
-   Form_pg_attribute attp;
-
-   ad = heap_openr(AttributeRelationName, RowExclusiveLock);
-   sd = heap_openr(StatisticRelationName, RowExclusiveLock);
-
-   /* Find pg_attribute rows for this relation */
-   ScanKeyEntryInitialize(&askey, 0, Anum_pg_attribute_attrelid,
-                          F_INT4EQ, relid);
-
-   scan = heap_beginscan(ad, false, SnapshotNow, 1, &askey);
-
-   while (HeapTupleIsValid(atup = heap_getnext(scan, 0)))
+   int         i;
+   int         null_cnt = 0;
+   int         nonnull_cnt = 0;
+   int         toowide_cnt = 0;
+   double      total_width = 0;
+   bool        is_varlena = (!stats->attr->attbyval &&
+                             stats->attr->attlen == -1);
+   double      corr_xysum;
+   RegProcedure cmpFn;
+   SortFunctionKind cmpFnKind;
+   FmgrInfo    f_cmpfn;
+   ScalarItem *values;
+   int         values_cnt = 0;
+   int        *tupnoLink;
+   ScalarMCVItem *track;
+   int         track_cnt = 0;
+   int         num_mcv = stats->attr->attstattarget;
+
+   values = (ScalarItem *) palloc(numrows * sizeof(ScalarItem));
+   tupnoLink = (int *) palloc(numrows * sizeof(int));
+   track = (ScalarMCVItem *) palloc(num_mcv * sizeof(ScalarMCVItem));
+
+   SelectSortFunction(stats->ltopr, &cmpFn, &cmpFnKind);
+   fmgr_info(cmpFn, &f_cmpfn);
+
+   /* Initial scan to find sortable values */
+   for (i = 0; i < numrows; i++)
    {
-       int         i;
-       VacAttrStats *stats;
+       HeapTuple   tuple = rows[i];
+       Datum       value;
+       bool        isnull;
 
-       attp = (Form_pg_attribute) GETSTRUCT(atup);
-       if (attp->attnum <= 0)  /* skip system attributes for now */
-           continue;
+       value = heap_getattr(tuple, stats->attnum, tupDesc, &isnull);
 
-       for (i = 0; i < natts; i++)
+       /* Check for null/nonnull */
+       if (isnull)
        {
-           if (attp->attnum == vacattrstats[i].attr->attnum)
-               break;
+           null_cnt++;
+           continue;
        }
-       if (i >= natts)
-           continue;           /* skip attr if no stats collected */
-       stats = &(vacattrstats[i]);
+       nonnull_cnt++;
 
-       if (VacAttrStatsEqValid(stats))
+       /*
+        * If it's a varlena field, add up widths for average width
+        * calculation.  Note that if the value is toasted, we
+        * use the toasted width.  We don't bother with this calculation
+        * if it's a fixed-width type.
+        */
+       if (is_varlena)
        {
-           float4      selratio;       /* average ratio of rows selected
-                                        * for a random constant */
-
-           /* Compute dispersion */
-           if (stats->nonnull_cnt == 0 && stats->null_cnt == 0)
+           total_width += VARSIZE(DatumGetPointer(value));
+           /*
+            * If the value is toasted, we want to detoast it just once to
+            * avoid repeated detoastings and resultant excess memory usage
+            * during the comparisons.  Also, check to see if the value is
+            * excessively wide, and if so don't detoast at all --- just
+            * ignore the value.
+            */
+           if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
            {
-
-               /*
-                * empty relation, so put a dummy value in attdispersion
-                */
-               selratio = 0;
+               toowide_cnt++;
+               continue;
            }
-           else if (stats->null_cnt <= 1 && stats->best_cnt == 1)
-           {
+           value = PointerGetDatum(PG_DETOAST_DATUM(value));
+       }
 
-               /*
-                * looks like we have a unique-key attribute --- flag this
-                * with special -1.0 flag value.
-                *
-                * The correct dispersion is 1.0/numberOfRows, but since the
-                * relation row count can get updated without recomputing
-                * dispersion, we want to store a "symbolic" value and
-                * figure 1.0/numberOfRows on the fly.
-                */
-               selratio = -1;
-           }
-           else
+       /* Add it to the list to be sorted */
+       values[values_cnt].value = value;
+       values[values_cnt].tupno = values_cnt;
+       tupnoLink[values_cnt] = values_cnt;
+       values_cnt++;
+   }
+
+   /* We can only compute valid stats if we found some sortable values. */
+   if (values_cnt > 0)
+   {
+       int     ndistinct,      /* # distinct values in sample */
+               nmultiple,      /* # that appear multiple times */
+               num_hist,
+               dups_cnt;
+       int     slot_idx = 0;
+
+       /* Sort the collected values */
+       datumCmpFn = &f_cmpfn;
+       datumCmpFnKind = cmpFnKind;
+       datumCmpTupnoLink = tupnoLink;
+       qsort((void *) values, values_cnt,
+             sizeof(ScalarItem), compare_scalars);
+
+       /*
+        * Now scan the values in order, find the most common ones,
+        * and also accumulate ordering-correlation statistics.
+        *
+        * To determine which are most common, we first have to count the
+        * number of duplicates of each value.  The duplicates are adjacent
+        * in the sorted list, so a brute-force approach is to compare
+        * successive datum values until we find two that are not equal.
+        * However, that requires N-1 invocations of the datum comparison
+        * routine, which are completely redundant with work that was done
+        * during the sort.  (The sort algorithm must at some point have
+        * compared each pair of items that are adjacent in the sorted order;
+        * otherwise it could not know that it's ordered the pair correctly.)
+        * We exploit this by having compare_scalars remember the highest
+        * tupno index that each ScalarItem has been found equal to.  At the
+        * end of the sort, a ScalarItem's tupnoLink will still point to
+        * itself if and only if it is the last item of its group of
+        * duplicates (since the group will be ordered by tupno).
+        */
+       corr_xysum = 0;
+       ndistinct = 0;
+       nmultiple = 0;
+       dups_cnt = 0;
+       for (i = 0; i < values_cnt; i++)
+       {
+           int         tupno = values[i].tupno;
+
+           corr_xysum += (double) i * (double) tupno;
+           dups_cnt++;
+           if (tupnoLink[tupno] == tupno)
            {
-               if (VacAttrStatsLtGtValid(stats) &&
-                   stats->min_cnt + stats->max_cnt == stats->nonnull_cnt)
+               /* Reached end of duplicates of this value */
+               ndistinct++;
+               if (dups_cnt > 1)
                {
+                   nmultiple++;
+                   if (track_cnt < num_mcv ||
+                       dups_cnt > track[track_cnt-1].count)
+                   {
+                       /*
+                        * Found a new item for the mcv list; find its
+                        * position, bubbling down old items if needed.
+                        * Loop invariant is that j points at an empty/
+                        * replaceable slot.
+                        */
+                       int     j;
+
+                       if (track_cnt < num_mcv)
+                           track_cnt++;
+                       for (j = track_cnt-1; j > 0; j--)
+                       {
+                           if (dups_cnt <= track[j-1].count)
+                               break;
+                           track[j].count = track[j-1].count;
+                           track[j].first = track[j-1].first;
+                       }
+                       track[j].count = dups_cnt;
+                       track[j].first = i + 1 - dups_cnt;
+                   }
+               }
+               dups_cnt = 0;
+           }
+       }
 
-                   /*
-                    * exact result when there are just 1 or 2 values...
-                    */
-                   double      min_cnt_d = stats->min_cnt,
-                               max_cnt_d = stats->max_cnt,
-                               null_cnt_d = stats->null_cnt;
-                   double      total = ((double) stats->nonnull_cnt) + null_cnt_d;
+       stats->stats_valid = true;
+       /* Do the simple null-frac and width stats */
+       stats->stanullfrac = (double) null_cnt / (double) numrows;
+       if (is_varlena)
+           stats->stawidth = total_width / (double) nonnull_cnt;
+       else
+           stats->stawidth = stats->attrtype->typlen;
 
-                   selratio = (min_cnt_d * min_cnt_d + max_cnt_d * max_cnt_d + null_cnt_d * null_cnt_d) / (total * total);
-               }
-               else
-               {
-                   double      most = (double) (stats->best_cnt > stats->null_cnt ? stats->best_cnt : stats->null_cnt);
-                   double      total = ((double) stats->nonnull_cnt) + ((double) stats->null_cnt);
+       if (nmultiple == 0)
+       {
+           /* If we found no repeated values, assume it's a unique column */
+           stats->stadistinct = -1.0;
+       }
+       else if (toowide_cnt == 0 && nmultiple == ndistinct)
+       {
+           /*
+            * Every value in the sample appeared more than once.  Assume the
+            * column has just these values.
+            */
+           stats->stadistinct = ndistinct;
+       }
+       else
+       {
+           /*----------
+            * Estimate the number of distinct values using the estimator
+            * proposed by Chaudhuri et al (see citation above).  This is
+            *      sqrt(n/r) * max(f1,1) + f2 + f3 + ...
+            * where fk is the number of distinct values that occurred
+            * exactly k times in our sample of r rows (from a total of n).
+            * Overwidth values are assumed to have been distinct.
+            *----------
+            */
+           int     f1 = ndistinct - nmultiple + toowide_cnt;
+           double  term1;
 
-                   /*
-                    * we assume count of other values are 20% of best
-                    * count in table
-                    */
-                   selratio = (most * most + 0.20 * most * (total - most)) / (total * total);
-               }
-               /* Make sure calculated values are in-range */
-               if (selratio < 0.0)
-                   selratio = 0.0;
-               else if (selratio > 1.0)
-                   selratio = 1.0;
+           if (f1 < 1)
+               f1 = 1;
+           term1 = sqrt((double) totalrows / (double) numrows) * f1;
+           stats->stadistinct = floor(term1 + nmultiple + 0.5);
+       }
+
+       /*
+        * If we estimated the number of distinct values at more than 10%
+        * of the total row count (a very arbitrary limit), then assume
+        * that stadistinct should scale with the row count rather than be
+        * a fixed value.
+        */
+       if (stats->stadistinct > 0.1 * totalrows)
+           stats->stadistinct = - (stats->stadistinct / totalrows);
+
+       /* Generate an MCV slot entry, only if we found multiples */
+       if (nmultiple < num_mcv)
+           num_mcv = nmultiple;
+       Assert(track_cnt >= num_mcv);
+       if (num_mcv > 0)
+       {
+           MemoryContext old_context;
+           Datum  *mcv_values;
+           float4 *mcv_freqs;
+
+           /* Must copy the target values into TransactionCommandContext */
+           old_context = MemoryContextSwitchTo(TransactionCommandContext);
+           mcv_values = (Datum *) palloc(num_mcv * sizeof(Datum));
+           mcv_freqs = (float4 *) palloc(num_mcv * sizeof(float4));
+           for (i = 0; i < num_mcv; i++)
+           {
+               mcv_values[i] = datumCopy(values[track[i].first].value,
+                                         stats->attr->attbyval,
+                                         stats->attr->attlen);
+               mcv_freqs[i] = (double) track[i].count / (double) numrows;
            }
+           MemoryContextSwitchTo(old_context);
+
+           stats->stakind[slot_idx] = STATISTIC_KIND_MCV;
+           stats->staop[slot_idx] = stats->eqopr;
+           stats->stanumbers[slot_idx] = mcv_freqs;
+           stats->numnumbers[slot_idx] = num_mcv;
+           stats->stavalues[slot_idx] = mcv_values;
+           stats->numvalues[slot_idx] = num_mcv;
+           slot_idx++;
+       }
 
-           /* overwrite the existing statistics in the tuple */
-           attp->attdispersion = selratio;
+       /*
+        * Generate a histogram slot entry if there are at least two
+        * distinct values not accounted for in the MCV list.  (This
+        * ensures the histogram won't collapse to empty or a singleton.)
+        */
+       num_hist = ndistinct - num_mcv;
+       if (num_hist > stats->attr->attstattarget)
+           num_hist = stats->attr->attstattarget + 1;
+       if (num_hist >= 2)
+       {
+           MemoryContext old_context;
+           Datum  *hist_values;
+           int     nvals;
 
-           /* invalidate the tuple in the cache and write the buffer */
-           RelationInvalidateHeapTuple(ad, atup);
-           WriteNoReleaseBuffer(scan->rs_cbuf);
+           /* Sort the MCV items into position order to speed next loop */
+           qsort((void *) track, num_mcv,
+                 sizeof(ScalarMCVItem), compare_mcvs);
 
            /*
-            * Create pg_statistic tuples for the relation, if we have
-            * gathered the right data.  del_stats() previously deleted
-            * all the pg_statistic tuples for the rel, so we just have to
-            * insert new ones here.
+            * Collapse out the MCV items from the values[] array.
             *
-            * Note analyze_rel() has seen to it that we won't come here when
-            * vacuuming pg_statistic itself.
+            * Note we destroy the values[] array here... but we don't need
+            * it for anything more.  We do, however, still need values_cnt.
             */
-           if (VacAttrStatsLtGtValid(stats) && stats->initialized)
+           if (num_mcv > 0)
            {
-               float4      nullratio;
-               float4      bestratio;
-               FmgrInfo    out_function;
-               char       *out_string;
-               double      best_cnt_d = stats->best_cnt,
-                           null_cnt_d = stats->null_cnt,
-                           nonnull_cnt_d = stats->nonnull_cnt; /* prevent overflow */
-               Datum       values[Natts_pg_statistic];
-               char        nulls[Natts_pg_statistic];
-               Relation    irelations[Num_pg_statistic_indices];
+               int     src,
+                       dest;
+               int     j;
 
-               nullratio = null_cnt_d / (nonnull_cnt_d + null_cnt_d);
-               bestratio = best_cnt_d / (nonnull_cnt_d + null_cnt_d);
-
-               fmgr_info(stats->outfunc, &out_function);
+               src = dest = 0;
+               j = 0;          /* index of next interesting MCV item */
+               while (src < values_cnt)
+               {
+                   int     ncopy;
+
+                   if (j < num_mcv)
+                   {
+                       int     first = track[j].first;
+
+                       if (src >= first)
+                       {
+                           /* advance past this MCV item */
+                           src = first + track[j].count;
+                           j++;
+                           continue;
+                       }
+                       ncopy = first - src;
+                   }
+                   else
+                   {
+                       ncopy = values_cnt - src;
+                   }
+                   memmove(&values[dest], &values[src],
+                           ncopy * sizeof(ScalarItem));
+                   src += ncopy;
+                   dest += ncopy;
+               }
+               nvals = dest;
+           }
+           else
+               nvals = values_cnt;
+           Assert(nvals >= num_hist);
 
-               for (i = 0; i < Natts_pg_statistic; ++i)
-                   nulls[i] = ' ';
+           /* Must copy the target values into TransactionCommandContext */
+           old_context = MemoryContextSwitchTo(TransactionCommandContext);
+           hist_values = (Datum *) palloc(num_hist * sizeof(Datum));
+           for (i = 0; i < num_hist; i++)
+           {
+               int     pos;
 
-               /*
-                * initialize values[]
-                */
-               i = 0;
-               values[i++] = ObjectIdGetDatum(relid);  /* starelid */
-               values[i++] = Int16GetDatum(attp->attnum);      /* staattnum */
-               values[i++] = ObjectIdGetDatum(stats->op_cmplt);        /* staop */
-               values[i++] = Float4GetDatum(nullratio);        /* stanullfrac */
-               values[i++] = Float4GetDatum(bestratio);        /* stacommonfrac */
-               out_string = DatumGetCString(FunctionCall3(&out_function,
-                                                          stats->best,
-                                       ObjectIdGetDatum(stats->typelem),
-                                Int32GetDatum(stats->attr->atttypmod)));
-               values[i++] = DirectFunctionCall1(textin,       /* stacommonval */
-                                           CStringGetDatum(out_string));
-               pfree(out_string);
-               out_string = DatumGetCString(FunctionCall3(&out_function,
-                                                          stats->min,
-                                       ObjectIdGetDatum(stats->typelem),
-                                Int32GetDatum(stats->attr->atttypmod)));
-               values[i++] = DirectFunctionCall1(textin,       /* staloval */
-                                           CStringGetDatum(out_string));
-               pfree(out_string);
-               out_string = DatumGetCString(FunctionCall3(&out_function,
-                                                          stats->max,
-                                       ObjectIdGetDatum(stats->typelem),
-                                Int32GetDatum(stats->attr->atttypmod)));
-               values[i++] = DirectFunctionCall1(textin,       /* stahival */
-                                           CStringGetDatum(out_string));
-               pfree(out_string);
-
-               stup = heap_formtuple(sd->rd_att, values, nulls);
-
-               /* store tuple and update indexes too */
-               heap_insert(sd, stup);
-
-               CatalogOpenIndices(Num_pg_statistic_indices, Name_pg_statistic_indices, irelations);
-               CatalogIndexInsert(irelations, Num_pg_statistic_indices, sd, stup);
-               CatalogCloseIndices(Num_pg_statistic_indices, irelations);
-
-               /* release allocated space */
-               pfree(DatumGetPointer(values[Anum_pg_statistic_stacommonval - 1]));
-               pfree(DatumGetPointer(values[Anum_pg_statistic_staloval - 1]));
-               pfree(DatumGetPointer(values[Anum_pg_statistic_stahival - 1]));
-               heap_freetuple(stup);
+               pos = (i * (nvals - 1)) / (num_hist - 1);
+               hist_values[i] = datumCopy(values[pos].value,
+                                          stats->attr->attbyval,
+                                          stats->attr->attlen);
            }
+           MemoryContextSwitchTo(old_context);
+
+           stats->stakind[slot_idx] = STATISTIC_KIND_HISTOGRAM;
+           stats->staop[slot_idx] = stats->ltopr;
+           stats->stavalues[slot_idx] = hist_values;
+           stats->numvalues[slot_idx] = num_hist;
+           slot_idx++;
+       }
+
+       /* Generate a correlation entry if there are multiple values */
+       if (values_cnt > 1)
+       {
+           MemoryContext old_context;
+           float4 *corrs;
+           double  corr_xsum,
+                   corr_x2sum;
+
+           /* Must copy the target values into TransactionCommandContext */
+           old_context = MemoryContextSwitchTo(TransactionCommandContext);
+           corrs = (float4 *) palloc(sizeof(float4));
+           MemoryContextSwitchTo(old_context);
+
+           /*----------
+            * Since we know the x and y value sets are both
+            *      0, 1, ..., values_cnt-1
+            * we have sum(x) = sum(y) =
+            *      (values_cnt-1)*values_cnt / 2
+            * and sum(x^2) = sum(y^2) =
+            *      (values_cnt-1)*values_cnt*(2*values_cnt-1) / 6.
+            *----------
+            */
+           corr_xsum = (double) (values_cnt-1) * (double) values_cnt / 2.0;
+           corr_x2sum = (double) (values_cnt-1) * (double) values_cnt *
+               (double) (2*values_cnt-1) / 6.0;
+           /* And the correlation coefficient reduces to */
+           corrs[0] = (values_cnt * corr_xysum - corr_xsum * corr_xsum) /
+               (values_cnt * corr_x2sum - corr_xsum * corr_xsum);
+
+           stats->stakind[slot_idx] = STATISTIC_KIND_CORRELATION;
+           stats->staop[slot_idx] = stats->ltopr;
+           stats->stanumbers[slot_idx] = corrs;
+           stats->numnumbers[slot_idx] = 1;
+           slot_idx++;
        }
    }
-   heap_endscan(scan);
-   /* close rels, but hold locks till upcoming commit */
-   heap_close(ad, NoLock);
-   heap_close(sd, NoLock);
+
+   /* We don't need to bother cleaning up any of our temporary palloc's */
 }
 
 /*
- * del_stats() -- delete pg_statistic rows for a relation
+ * qsort comparator for sorting ScalarItems
  *
- * If a list of attribute numbers is given, only zap stats for those attrs.
+ * Aside from sorting the items, we update the datumCmpTupnoLink[] array
+ * whenever two ScalarItems are found to contain equal datums.  The array
+ * is indexed by tupno; for each ScalarItem, it contains the highest
+ * tupno that that item's datum has been found to be equal to.  This allows
+ * us to avoid additional comparisons in compute_scalar_stats().
  */
-static void
-del_stats(Oid relid, int attcnt, int *attnums)
+static int
+compare_scalars(const void *a, const void *b)
 {
-   Relation    pgstatistic;
-   HeapScanDesc scan;
-   HeapTuple   tuple;
-   ScanKeyData key;
+   Datum       da = ((ScalarItem *) a)->value;
+   int         ta = ((ScalarItem *) a)->tupno;
+   Datum       db = ((ScalarItem *) b)->value;
+   int         tb = ((ScalarItem *) b)->tupno;
 
-   pgstatistic = heap_openr(StatisticRelationName, RowExclusiveLock);
+   if (datumCmpFnKind == SORTFUNC_LT)
+   {
+       if (DatumGetBool(FunctionCall2(datumCmpFn, da, db)))
+           return -1;          /* a < b */
+       if (DatumGetBool(FunctionCall2(datumCmpFn, db, da)))
+           return 1;           /* a > b */
+   }
+   else
+   {
+       /* sort function is CMP or REVCMP */
+       int32   compare;
 
-   ScanKeyEntryInitialize(&key, 0x0, Anum_pg_statistic_starelid,
-                          F_OIDEQ, ObjectIdGetDatum(relid));
-   scan = heap_beginscan(pgstatistic, false, SnapshotNow, 1, &key);
+       compare = DatumGetInt32(FunctionCall2(datumCmpFn, da, db));
+       if (compare != 0)
+       {
+           if (datumCmpFnKind == SORTFUNC_REVCMP)
+               compare = -compare;
+           return compare;
+       }
+   }
 
-   while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
+   /*
+    * The two datums are equal, so update datumCmpTupnoLink[].
+    */
+   if (datumCmpTupnoLink[ta] < tb)
+       datumCmpTupnoLink[ta] = tb;
+   if (datumCmpTupnoLink[tb] < ta)
+       datumCmpTupnoLink[tb] = ta;
+
+   /*
+    * For equal datums, sort by tupno
+    */
+   return ta - tb;
+}
+
+/*
+ * qsort comparator for sorting ScalarMCVItems by position
+ */
+static int
+compare_mcvs(const void *a, const void *b)
+{
+   int         da = ((ScalarMCVItem *) a)->first;
+   int         db = ((ScalarMCVItem *) b)->first;
+
+   return da - db;
+}
+
+
+/*
+ * update_attstats() -- update attribute statistics for one relation
+ *
+ *     Statistics are stored in several places: the pg_class row for the
+ *     relation has stats about the whole relation, and there is a
+ *     pg_statistic row for each (non-system) attribute that has ever
+ *     been analyzed.  The pg_class values are updated by VACUUM, not here.
+ *
+ *     pg_statistic rows are just added or updated normally.  This means
+ *     that pg_statistic will probably contain some deleted rows at the
+ *     completion of a vacuum cycle, unless it happens to get vacuumed last.
+ *
+ *     To keep things simple, we punt for pg_statistic, and don't try
+ *     to compute or store rows for pg_statistic itself in pg_statistic.
+ *     This could possibly be made to work, but it's not worth the trouble.
+ *     Note analyze_rel() has seen to it that we won't come here when
+ *     vacuuming pg_statistic itself.
+ */
+static void
+update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats)
+{
+   Relation    sd;
+   int         attno;
+
+   /*
+    * We use an ExclusiveLock on pg_statistic to ensure that only one
+    * backend is writing it at a time --- without that, we might have to
+    * deal with concurrent updates here, and it's not worth the trouble.
+    */
+   sd = heap_openr(StatisticRelationName, ExclusiveLock);
+
+   for (attno = 0; attno < natts; attno++)
    {
-       if (attcnt > 0)
+       VacAttrStats *stats = vacattrstats[attno];
+       FmgrInfo    out_function;
+       HeapTuple   stup,
+                   oldtup;
+       int         i, k, n;
+       Datum       values[Natts_pg_statistic];
+       char        nulls[Natts_pg_statistic];
+       char        replaces[Natts_pg_statistic];
+       Relation    irelations[Num_pg_statistic_indices];
+
+       /* Ignore attr if we weren't able to collect stats */
+       if (!stats->stats_valid)
+           continue;
+
+       fmgr_info(stats->attrtype->typoutput, &out_function);
+
+       /*
+        * Construct a new pg_statistic tuple
+        */
+       for (i = 0; i < Natts_pg_statistic; ++i)
        {
-           Form_pg_statistic pgs = (Form_pg_statistic) GETSTRUCT(tuple);
-           int         i;
+           nulls[i] = ' ';
+           replaces[i] = 'r';
+       }
 
-           for (i = 0; i < attcnt; i++)
+       i = 0;
+       values[i++] = ObjectIdGetDatum(relid); /* starelid */
+       values[i++] = Int16GetDatum(stats->attnum); /* staattnum */
+       values[i++] = Float4GetDatum(stats->stanullfrac); /* stanullfrac */
+       values[i++] = Int32GetDatum(stats->stawidth); /* stawidth */
+       values[i++] = Float4GetDatum(stats->stadistinct); /* stadistinct */
+       for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+       {
+           values[i++] = Int16GetDatum(stats->stakind[k]); /* stakindN */
+       }
+       for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+       {
+           values[i++] = ObjectIdGetDatum(stats->staop[k]); /* staopN */
+       }
+       for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+       {
+           int     nnum = stats->numnumbers[k];
+
+           if (nnum > 0)
            {
-               if (pgs->staattnum == attnums[i] + 1)
-                   break;
+               Datum      *numdatums = (Datum *) palloc(nnum * sizeof(Datum));
+               ArrayType  *arry;
+
+               for (n = 0; n < nnum; n++)
+                   numdatums[n] = Float4GetDatum(stats->stanumbers[k][n]);
+               /* XXX knows more than it should about type float4: */
+               arry = construct_array(numdatums, nnum,
+                                      false, sizeof(float4), 'i');
+               values[i++] = PointerGetDatum(arry); /* stanumbersN */
+           }
+           else
+           {
+               nulls[i] = 'n';
+               values[i++] = (Datum) 0;
            }
-           if (i >= attcnt)
-               continue;       /* don't delete it */
        }
-       simple_heap_delete(pgstatistic, &tuple->t_self);
-   }
+       for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+       {
+           int     ntxt = stats->numvalues[k];
 
-   heap_endscan(scan);
+           if (ntxt > 0)
+           {
+               Datum      *txtdatums = (Datum *) palloc(ntxt * sizeof(Datum));
+               ArrayType  *arry;
 
-   /*
-    * Close rel, but *keep* lock; we will need to reacquire it later, so
-    * there's a possibility of deadlock against another VACUUM process if
-    * we let go now.  Keeping the lock shouldn't delay any common
-    * operation other than an attempted VACUUM of pg_statistic itself.
-    */
-   heap_close(pgstatistic, NoLock);
+               for (n = 0; n < ntxt; n++)
+               {
+                   /*
+                    * Convert data values to a text string to be inserted
+                    * into the text array.
+                    */
+                   Datum   stringdatum;
+
+                   stringdatum =
+                       FunctionCall3(&out_function,
+                                     stats->stavalues[k][n],
+                                     ObjectIdGetDatum(stats->attrtype->typelem),
+                                     Int32GetDatum(stats->attr->atttypmod));
+                   txtdatums[n] = DirectFunctionCall1(textin, stringdatum);
+                   pfree(DatumGetPointer(stringdatum));
+               }
+               /* XXX knows more than it should about type text: */
+               arry = construct_array(txtdatums, ntxt,
+                                      false, -1, 'i');
+               values[i++] = PointerGetDatum(arry); /* stavaluesN */
+           }
+           else
+           {
+               nulls[i] = 'n';
+               values[i++] = (Datum) 0;
+           }
+       }
+
+       /* Is there already a pg_statistic tuple for this attribute? */
+       oldtup = SearchSysCache(STATRELATT,
+                               ObjectIdGetDatum(relid),
+                               Int16GetDatum(stats->attnum),
+                               0, 0);
+
+       if (HeapTupleIsValid(oldtup))
+       {
+           /* Yes, replace it */
+           stup = heap_modifytuple(oldtup,
+                                   sd,
+                                   values,
+                                   nulls,
+                                   replaces);
+           ReleaseSysCache(oldtup);
+           simple_heap_update(sd, &stup->t_self, stup);
+       }
+       else
+       {
+           /* No, insert new tuple */
+           stup = heap_formtuple(sd->rd_att, values, nulls);
+           heap_insert(sd, stup);
+       }
+
+       /* update indices too */
+       CatalogOpenIndices(Num_pg_statistic_indices, Name_pg_statistic_indices,
+                          irelations);
+       CatalogIndexInsert(irelations, Num_pg_statistic_indices, sd, stup);
+       CatalogCloseIndices(Num_pg_statistic_indices, irelations);
+
+       heap_freetuple(stup);
+   }
+
+   /* close rel, but hold lock till upcoming commit */
+   heap_close(sd, NoLock);
 }


diff --git a/src/backend/commands/command.c b/src/backend/commands/command.c

index 96d493688e328aaa1fc4bf56bc12e18865f2ee33..13a78f1177390f0108702c94a7cc005e0a28b183 100644 (file)


--- a/src/backend/commands/command.c
+++ b/src/backend/commands/command.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/commands/Attic/command.c,v 1.125 2001/03/23 04:49:52 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/commands/Attic/command.c,v 1.126 2001/05/07 00:43:17 tgl Exp $
  *
  * NOTES
  *   The PerformAddAttribute() code, like most of the relation
@@ -56,6 +56,7 @@
 #include "access/genam.h"
 
 
+static void drop_default(Oid relid, int16 attnum);
 static bool needs_toast_table(Relation rel);
 static bool is_relation(char *name);
 
@@ -408,7 +409,7 @@ AlterTableAddColumn(const char *relationName,
        HeapTuple   typeTuple;
        Form_pg_type tform;
        char       *typename;
-       int         attnelems;
+       int         attndims;
 
        if (SearchSysCacheExists(ATTNAME,
                                 ObjectIdGetDatum(reltup->t_data->t_oid),
@@ -425,11 +426,11 @@ AlterTableAddColumn(const char *relationName,
 
        if (colDef->typename->arrayBounds)
        {
-           attnelems = length(colDef->typename->arrayBounds);
+           attndims = length(colDef->typename->arrayBounds);
            typename = makeArrayTypeName(colDef->typename->name);
        }
        else
-           attnelems = 0;
+           attndims = 0;
 
        typeTuple = SearchSysCache(TYPENAME,
                                   PointerGetDatum(typename),
@@ -441,12 +442,12 @@ AlterTableAddColumn(const char *relationName,
        namestrcpy(&(attribute->attname), colDef->colname);
        attribute->atttypid = typeTuple->t_data->t_oid;
        attribute->attlen = tform->typlen;
-       attribute->attdispersion = 0;
+       attribute->attstattarget = DEFAULT_ATTSTATTARGET;
        attribute->attcacheoff = -1;
        attribute->atttypmod = colDef->typename->typmod;
        attribute->attnum = i;
        attribute->attbyval = tform->typbyval;
-       attribute->attnelems = attnelems;
+       attribute->attndims = attndims;
        attribute->attisset = (bool) (tform->typtype == 'c');
        attribute->attstorage = tform->typstorage;
        attribute->attalign = tform->typalign;
@@ -496,17 +497,13 @@ AlterTableAddColumn(const char *relationName,
 }
 
 
-
-static void drop_default(Oid relid, int16 attnum);
-
-
 /*
  * ALTER TABLE ALTER COLUMN SET/DROP DEFAULT
  */
 void
-AlterTableAlterColumn(const char *relationName,
-                     bool inh, const char *colName,
-                     Node *newDefault)
+AlterTableAlterColumnDefault(const char *relationName,
+                            bool inh, const char *colName,
+                            Node *newDefault)
 {
    Relation    rel;
    HeapTuple   tuple;
@@ -551,8 +548,8 @@ AlterTableAlterColumn(const char *relationName,
            if (childrelid == myrelid)
                continue;
            rel = heap_open(childrelid, AccessExclusiveLock);
-           AlterTableAlterColumn(RelationGetRelationName(rel),
-                                 false, colName, newDefault);
+           AlterTableAlterColumnDefault(RelationGetRelationName(rel),
+                                        false, colName, newDefault);
            heap_close(rel, AccessExclusiveLock);
        }
    }
@@ -560,7 +557,7 @@ AlterTableAlterColumn(const char *relationName,
    /* -= now do the thing on this relation =- */
 
    /* reopen the business */
-   rel = heap_openr((char *) relationName, AccessExclusiveLock);
+   rel = heap_openr(relationName, AccessExclusiveLock);
 
    /*
     * get the number of the attribute
@@ -647,7 +644,6 @@ AlterTableAlterColumn(const char *relationName,
 }
 
 
-
 static void
 drop_default(Oid relid, int16 attnum)
 {
@@ -675,6 +671,104 @@ drop_default(Oid relid, int16 attnum)
 }
 
 
+/*
+ * ALTER TABLE ALTER COLUMN SET STATISTICS
+ */
+void
+AlterTableAlterColumnStatistics(const char *relationName,
+                               bool inh, const char *colName,
+                               Node *statsTarget)
+{
+   Relation    rel;
+   Oid         myrelid;
+   int         newtarget;
+   Relation    attrelation;
+   HeapTuple   tuple;
+
+#ifndef NO_SECURITY
+   if (!pg_ownercheck(GetUserId(), relationName, RELNAME))
+       elog(ERROR, "ALTER TABLE: permission denied");
+#endif
+
+   rel = heap_openr(relationName, AccessExclusiveLock);
+   if (rel->rd_rel->relkind != RELKIND_RELATION)
+       elog(ERROR, "ALTER TABLE: relation \"%s\" is not a table",
+            relationName);
+   myrelid = RelationGetRelid(rel);
+   heap_close(rel, NoLock);    /* close rel, but keep lock! */
+
+   /*
+    * Propagate to children if desired
+    */
+   if (inh)
+   {
+       List       *child,
+                  *children;
+
+       /* this routine is actually in the planner */
+       children = find_all_inheritors(myrelid);
+
+       /*
+        * find_all_inheritors does the recursive search of the
+        * inheritance hierarchy, so all we have to do is process all of
+        * the relids in the list that it returns.
+        */
+       foreach(child, children)
+       {
+           Oid         childrelid = lfirsti(child);
+
+           if (childrelid == myrelid)
+               continue;
+           rel = heap_open(childrelid, AccessExclusiveLock);
+           AlterTableAlterColumnStatistics(RelationGetRelationName(rel),
+                                           false, colName, statsTarget);
+           heap_close(rel, AccessExclusiveLock);
+       }
+   }
+
+   /* -= now do the thing on this relation =- */
+
+   Assert(IsA(statsTarget, Integer));
+   newtarget = intVal(statsTarget);
+
+   /* Limit target to sane range (should we raise an error instead?) */
+   if (newtarget < 0)
+       newtarget = 0;
+   else if (newtarget > 1000)
+       newtarget = 1000;
+
+   attrelation = heap_openr(AttributeRelationName, RowExclusiveLock);
+
+   tuple = SearchSysCacheCopy(ATTNAME,
+                              ObjectIdGetDatum(myrelid),
+                              PointerGetDatum(colName),
+                              0, 0);
+   if (!HeapTupleIsValid(tuple))
+       elog(ERROR, "ALTER TABLE: relation \"%s\" has no column \"%s\"",
+            relationName, colName);
+
+   if (((Form_pg_attribute) GETSTRUCT(tuple))->attnum < 0)
+       elog(ERROR, "ALTER TABLE: cannot change system attribute \"%s\"",
+            colName);
+
+   ((Form_pg_attribute) GETSTRUCT(tuple))->attstattarget = newtarget;
+
+   simple_heap_update(attrelation, &tuple->t_self, tuple);
+
+   /* keep system catalog indices current */
+   {
+       Relation    irelations[Num_pg_attr_indices];
+
+       CatalogOpenIndices(Num_pg_attr_indices, Name_pg_attr_indices, irelations);
+       CatalogIndexInsert(irelations, Num_pg_attr_indices, attrelation, tuple);
+       CatalogCloseIndices(Num_pg_attr_indices, irelations);
+   }
+
+   heap_freetuple(tuple);
+   heap_close(attrelation, RowExclusiveLock);
+}
+
+
 #ifdef _DROP_COLUMN_HACK__
 /*
  * ALTER TABLE DROP COLUMN trial implementation


diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c

index 694d0e8bbc1491c39827ff833f8cc3fb68906e9e..9a0dbdc8c8e15c0b261068728c7d38546e3aa07c 100644 (file)


--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.189 2001/03/25 23:23:58 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.190 2001/05/07 00:43:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -53,25 +53,90 @@ extern XLogRecPtr log_heap_move(Relation reln,
              Buffer oldbuf, ItemPointerData from,
              Buffer newbuf, HeapTuple newtup);
 
+
+typedef struct VRelListData
+{
+   Oid         vrl_relid;
+   struct VRelListData *vrl_next;
+} VRelListData;
+
+typedef VRelListData *VRelList;
+
+typedef struct VacPageData
+{
+   BlockNumber blkno;          /* BlockNumber of this Page */
+   Size        free;           /* FreeSpace on this Page */
+   uint16      offsets_used;   /* Number of OffNums used by vacuum */
+   uint16      offsets_free;   /* Number of OffNums free or to be free */
+   OffsetNumber offsets[1];    /* Array of its OffNums */
+} VacPageData;
+
+typedef VacPageData *VacPage;
+
+typedef struct VacPageListData
+{
+   int         empty_end_pages;/* Number of "empty" end-pages */
+   int         num_pages;      /* Number of pages in pagedesc */
+   int         num_allocated_pages;    /* Number of allocated pages in
+                                        * pagedesc */
+   VacPage    *pagedesc;       /* Descriptions of pages */
+} VacPageListData;
+
+typedef VacPageListData *VacPageList;
+
+typedef struct VTupleLinkData
+{
+   ItemPointerData new_tid;
+   ItemPointerData this_tid;
+} VTupleLinkData;
+
+typedef VTupleLinkData *VTupleLink;
+
+typedef struct VTupleMoveData
+{
+   ItemPointerData tid;        /* tuple ID */
+   VacPage     vacpage;        /* where to move */
+   bool        cleanVpd;       /* clean vacpage before using */
+} VTupleMoveData;
+
+typedef VTupleMoveData *VTupleMove;
+
+typedef struct VRelStats
+{
+   Oid         relid;
+   long        num_pages;
+   long        num_tuples;
+   Size        min_tlen;
+   Size        max_tlen;
+   bool        hasindex;
+   int         num_vtlinks;
+   VTupleLink  vtlinks;
+} VRelStats;
+
+
 static MemoryContext vac_context = NULL;
 
 static int MESSAGE_LEVEL;      /* message level */
 
 static TransactionId XmaxRecent;
 
+
 /* non-export function prototypes */
 static void vacuum_init(void);
 static void vacuum_shutdown(void);
-static void vac_vacuum(NameData *VacRelP, bool analyze, List *anal_cols2);
-static VRelList getrels(NameData *VacRelP);
+static VRelList getrels(Name VacRelP, const char *stmttype);
 static void vacuum_rel(Oid relid);
-static void scan_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages, VacPageList fraged_pages);
-static void repair_frag(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages, VacPageList fraged_pages, int nindices, Relation *Irel);
-static void vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacpagelist);
+static void scan_heap(VRelStats *vacrelstats, Relation onerel,
+                     VacPageList vacuum_pages, VacPageList fraged_pages);
+static void repair_frag(VRelStats *vacrelstats, Relation onerel,
+                       VacPageList vacuum_pages, VacPageList fraged_pages,
+                       int nindices, Relation *Irel);
+static void vacuum_heap(VRelStats *vacrelstats, Relation onerel,
+                       VacPageList vacpagelist);
 static void vacuum_page(Relation onerel, Buffer buffer, VacPage vacpage);
-static void vacuum_index(VacPageList vacpagelist, Relation indrel, int num_tuples, int keep_tuples);
-static void scan_index(Relation indrel, int num_tuples);
-static void update_relstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats *vacrelstats);
+static void vacuum_index(VacPageList vacpagelist, Relation indrel,
+                        long num_tuples, int keep_tuples);
+static void scan_index(Relation indrel, long num_tuples);
 static VacPage tid_reaped(ItemPointer itemptr, VacPageList vacpagelist);
 static void reap_page(VacPageList vacpagelist, VacPage vacpage);
 static void vpage_insert(VacPageList vacpagelist, VacPage vpnew);
@@ -88,17 +153,17 @@ static bool enough_space(VacPage vacpage, Size len);
 static char *show_rusage(struct rusage * ru0);
 
 
+/*
+ * Primary entry point for VACUUM and ANALYZE commands.
+ */
 void
-vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols)
+vacuum(VacuumStmt *vacstmt)
 {
+   const char *stmttype = vacstmt->vacuum ? "VACUUM" : "ANALYZE";
    NameData    VacRel;
    Name        VacRelName;
-   MemoryContext old;
-   List       *le;
-   List       *anal_cols2 = NIL;
-
-   if (anal_cols != NIL && !analyze)
-       elog(ERROR, "Can't vacuum columns, only tables.  You can 'vacuum analyze' columns.");
+   VRelList    vrl,
+               cur;
 
    /*
     * We cannot run VACUUM inside a user transaction block; if we were
@@ -110,9 +175,9 @@ vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols)
     * behavior.
     */
    if (IsTransactionBlock())
-       elog(ERROR, "VACUUM cannot run inside a BEGIN/END block");
+       elog(ERROR, "%s cannot run inside a BEGIN/END block", stmttype);
 
-   if (verbose)
+   if (vacstmt->verbose)
        MESSAGE_LEVEL = NOTICE;
    else
        MESSAGE_LEVEL = DEBUG;
@@ -130,37 +195,36 @@ vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols)
                                        ALLOCSET_DEFAULT_INITSIZE,
                                        ALLOCSET_DEFAULT_MAXSIZE);
 
-   /* vacrel gets de-allocated on xact commit, so copy it to safe storage */
-   if (vacrel)
+   /* Convert vacrel, which is just a string, to a Name */
+   if (vacstmt->vacrel)
    {
-       namestrcpy(&VacRel, vacrel);
+       namestrcpy(&VacRel, vacstmt->vacrel);
        VacRelName = &VacRel;
    }
    else
        VacRelName = NULL;
 
-   /* must also copy the column list, if any, to safe storage */
-   old = MemoryContextSwitchTo(vac_context);
-   foreach(le, anal_cols)
-   {
-       char       *col = (char *) lfirst(le);
-
-       anal_cols2 = lappend(anal_cols2, pstrdup(col));
-   }
-   MemoryContextSwitchTo(old);
+   /* Build list of relations to process (note this lives in vac_context) */
+   vrl = getrels(VacRelName, stmttype);
 
    /*
     * Start up the vacuum cleaner.
-    *
-    * NOTE: since this commits the current transaction, the memory holding
-    * any passed-in parameters gets freed here.  We must have already
-    * copied pass-by-reference parameters to safe storage.  Don't make me
-    * fix this again!
     */
    vacuum_init();
 
-   /* vacuum the database */
-   vac_vacuum(VacRelName, analyze, anal_cols2);
+   /*
+    * Process each selected relation.  We are careful to process
+    * each relation in a separate transaction in order to avoid holding
+    * too many locks at one time.
+    */
+   for (cur = vrl; cur != (VRelList) NULL; cur = cur->vrl_next)
+   {
+       if (vacstmt->vacuum)
+           vacuum_rel(cur->vrl_relid);
+       /* analyze separately so locking is minimized */
+       if (vacstmt->analyze)
+           analyze_rel(cur->vrl_relid, vacstmt);
+   }
 
    /* clean up */
    vacuum_shutdown();
@@ -187,14 +251,14 @@ vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols)
  *     PostgresMain().
  */
 static void
-vacuum_init()
+vacuum_init(void)
 {
    /* matches the StartTransaction in PostgresMain() */
    CommitTransactionCommand();
 }
 
 static void
-vacuum_shutdown()
+vacuum_shutdown(void)
 {
    /* on entry, we are not in a transaction */
 
@@ -223,34 +287,10 @@ vacuum_shutdown()
 }
 
 /*
- * vac_vacuum() -- vacuum the database.
- *
- *     This routine builds a list of relations to vacuum, and then calls
- *     code that vacuums them one at a time.  We are careful to vacuum each
- *     relation in a separate transaction in order to avoid holding too many
- *     locks at one time.
+ * Build a list of VRelListData nodes for each relation to be processed
  */
-static void
-vac_vacuum(NameData *VacRelP, bool analyze, List *anal_cols2)
-{
-   VRelList    vrl,
-               cur;
-
-   /* get list of relations */
-   vrl = getrels(VacRelP);
-
-   /* vacuum each heap relation */
-   for (cur = vrl; cur != (VRelList) NULL; cur = cur->vrl_next)
-   {
-       vacuum_rel(cur->vrl_relid);
-       /* analyze separately so locking is minimized */
-       if (analyze)
-           analyze_rel(cur->vrl_relid, anal_cols2, MESSAGE_LEVEL);
-   }
-}
-
 static VRelList
-getrels(NameData *VacRelP)
+getrels(Name VacRelP, const char *stmttype)
 {
    Relation    rel;
    TupleDesc   tupdesc;
@@ -262,12 +302,9 @@ getrels(NameData *VacRelP)
    char       *rname;
    char        rkind;
    bool        n;
-   bool        found = false;
    ScanKeyData key;
 
-   StartTransactionCommand();
-
-   if (NameStr(*VacRelP))
+   if (VacRelP)
    {
 
        /*
@@ -287,6 +324,7 @@ getrels(NameData *VacRelP)
    }
    else
    {
+       /* find all relations listed in pg_class */
        ScanKeyEntryInitialize(&key, 0x0, Anum_pg_class_relkind,
                               F_CHAREQ, CharGetDatum('r'));
    }
@@ -300,21 +338,20 @@ getrels(NameData *VacRelP)
 
    while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
    {
-       found = true;
-
        d = heap_getattr(tuple, Anum_pg_class_relname, tupdesc, &n);
-       rname = (char *) DatumGetPointer(d);
+       rname = (char *) DatumGetName(d);
 
        d = heap_getattr(tuple, Anum_pg_class_relkind, tupdesc, &n);
        rkind = DatumGetChar(d);
 
        if (rkind != RELKIND_RELATION)
        {
-           elog(NOTICE, "Vacuum: can not process indices, views and certain system tables");
+           elog(NOTICE, "%s: can not process indexes, views or special system tables",
+                stmttype);
            continue;
        }
 
-       /* get a relation list entry for this guy */
+       /* Make a relation list entry for this guy */
        if (vrl == (VRelList) NULL)
            vrl = cur = (VRelList)
                MemoryContextAlloc(vac_context, sizeof(VRelListData));
@@ -332,10 +369,8 @@ getrels(NameData *VacRelP)
    heap_endscan(scan);
    heap_close(rel, AccessShareLock);
 
-   if (!found)
-       elog(NOTICE, "Vacuum: table not found");
-
-   CommitTransactionCommand();
+   if (vrl == NULL)
+       elog(NOTICE, "%s: table not found", stmttype);
 
    return vrl;
 }
@@ -432,7 +467,8 @@ vacuum_rel(Oid relid)
     */
    vacrelstats = (VRelStats *) palloc(sizeof(VRelStats));
    vacrelstats->relid = relid;
-   vacrelstats->num_pages = vacrelstats->num_tuples = 0;
+   vacrelstats->num_pages = 0;
+   vacrelstats->num_tuples = 0;
    vacrelstats->hasindex = false;
 
    GetXmaxRecent(&XmaxRecent);
@@ -457,8 +493,8 @@ vacuum_rel(Oid relid)
        vacrelstats->hasindex = true;
    else
        vacrelstats->hasindex = false;
-#ifdef NOT_USED
 
+#ifdef NOT_USED
    /*
     * reindex in VACUUM is dangerous under WAL. ifdef out until it
     * becomes safe.
@@ -528,9 +564,8 @@ vacuum_rel(Oid relid)
    heap_close(onerel, NoLock);
 
    /* update statistics in pg_class */
-   update_relstats(vacrelstats->relid, vacrelstats->num_pages,
-                   vacrelstats->num_tuples, vacrelstats->hasindex,
-                   vacrelstats);
+   vac_update_relstats(vacrelstats->relid, vacrelstats->num_pages,
+                       vacrelstats->num_tuples, vacrelstats->hasindex);
 
    /*
     * Complete the transaction and free all temporary memory used.
@@ -582,8 +617,8 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
    char       *relname;
    VacPage     vacpage,
                vp;
+   long        num_tuples;
    uint32      tups_vacuumed,
-               num_tuples,
                nkeep,
                nunused,
                ncrash,
@@ -913,7 +948,6 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
    /* save stats in the rel list for use later */
    vacrelstats->num_tuples = num_tuples;
    vacrelstats->num_pages = nblocks;
-/*   vacrelstats->natts = attr_cnt;*/
    if (num_tuples == 0)
        min_tlen = max_tlen = 0;
    vacrelstats->min_tlen = min_tlen;
@@ -960,7 +994,7 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
    }
 
    elog(MESSAGE_LEVEL, "Pages %u: Changed %u, reaped %u, Empty %u, New %u; \
-Tup %u: Vac %u, Keep/VTL %u/%u, Crash %u, UnUsed %u, MinLen %lu, MaxLen %lu; \
+Tup %lu: Vac %u, Keep/VTL %u/%u, Crash %u, UnUsed %u, MinLen %lu, MaxLen %lu; \
 Re-using: Free/Avail. Space %lu/%lu; EndEmpty/Avail. Pages %u/%u. %s",
         nblocks, changed_pages, vacuum_pages->num_pages, empty_pages,
         new_pages, num_tuples, tups_vacuumed,
@@ -2009,7 +2043,7 @@ vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages)
 {
    Buffer      buf;
    VacPage    *vacpage;
-   int         nblocks;
+   long        nblocks;
    int         i;
 
    nblocks = vacuum_pages->num_pages;
@@ -2044,7 +2078,7 @@ vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages)
    /* truncate relation if there are some empty end-pages */
    if (vacuum_pages->empty_end_pages > 0)
    {
-       elog(MESSAGE_LEVEL, "Rel %s: Pages: %u --> %u.",
+       elog(MESSAGE_LEVEL, "Rel %s: Pages: %lu --> %lu.",
             RelationGetRelationName(onerel),
             vacrelstats->num_pages, nblocks);
        nblocks = smgrtruncate(DEFAULT_SMGR, onerel, nblocks);
@@ -2094,11 +2128,11 @@ vacuum_page(Relation onerel, Buffer buffer, VacPage vacpage)
  *
  */
 static void
-scan_index(Relation indrel, int num_tuples)
+scan_index(Relation indrel, long num_tuples)
 {
    RetrieveIndexResult res;
    IndexScanDesc iscan;
-   int         nitups;
+   long        nitups;
    int         nipages;
    struct rusage ru0;
 
@@ -2119,14 +2153,14 @@ scan_index(Relation indrel, int num_tuples)
 
    /* now update statistics in pg_class */
    nipages = RelationGetNumberOfBlocks(indrel);
-   update_relstats(RelationGetRelid(indrel), nipages, nitups, false, NULL);
+   vac_update_relstats(RelationGetRelid(indrel), nipages, nitups, false);
 
-   elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %u. %s",
+   elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %lu. %s",
         RelationGetRelationName(indrel), nipages, nitups,
         show_rusage(&ru0));
 
    if (nitups != num_tuples)
-       elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%u) IS NOT THE SAME AS HEAP' (%u).\
+       elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%lu) IS NOT THE SAME AS HEAP' (%lu).\
 \n\tRecreate the index.",
             RelationGetRelationName(indrel), nitups, num_tuples);
 
@@ -2145,13 +2179,14 @@ scan_index(Relation indrel, int num_tuples)
  *     pg_class.
  */
 static void
-vacuum_index(VacPageList vacpagelist, Relation indrel, int num_tuples, int keep_tuples)
+vacuum_index(VacPageList vacpagelist, Relation indrel,
+            long num_tuples, int keep_tuples)
 {
    RetrieveIndexResult res;
    IndexScanDesc iscan;
    ItemPointer heapptr;
    int         tups_vacuumed;
-   int         num_index_tuples;
+   long        num_index_tuples;
    int         num_pages;
    VacPage     vp;
    struct rusage ru0;
@@ -2196,15 +2231,16 @@ vacuum_index(VacPageList vacpagelist, Relation indrel, int num_tuples, int keep_
 
    /* now update statistics in pg_class */
    num_pages = RelationGetNumberOfBlocks(indrel);
-   update_relstats(RelationGetRelid(indrel), num_pages, num_index_tuples, false, NULL);
+   vac_update_relstats(RelationGetRelid(indrel),
+                       num_pages, num_index_tuples, false);
 
-   elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %u: Deleted %u. %s",
+   elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %lu: Deleted %u. %s",
         RelationGetRelationName(indrel), num_pages,
         num_index_tuples - keep_tuples, tups_vacuumed,
         show_rusage(&ru0));
 
    if (num_index_tuples != num_tuples + keep_tuples)
-       elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%u) IS NOT THE SAME AS HEAP' (%u).\
+       elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%lu) IS NOT THE SAME AS HEAP' (%lu).\
 \n\tRecreate the index.",
          RelationGetRelationName(indrel), num_index_tuples, num_tuples);
 
@@ -2255,7 +2291,7 @@ tid_reaped(ItemPointer itemptr, VacPageList vacpagelist)
 }
 
 /*
- * update_relstats() -- update statistics for one relation
+ * vac_update_relstats() -- update statistics for one relation
  *
  *     Update the whole-relation statistics that are kept in its pg_class
  *     row.  There are additional stats that will be updated if we are
@@ -2268,13 +2304,12 @@ tid_reaped(ItemPointer itemptr, VacPageList vacpagelist)
  *     we updated these tuples in the usual way, vacuuming pg_class itself
  *     wouldn't work very well --- by the time we got done with a vacuum
  *     cycle, most of the tuples in pg_class would've been obsoleted.
- *     Updating pg_class's own statistics would be especially tricky.
  *     Of course, this only works for fixed-size never-null columns, but
  *     these are.
  */
-static void
-update_relstats(Oid relid, int num_pages, int num_tuples, bool hasindex,
-               VRelStats *vacrelstats)
+void
+vac_update_relstats(Oid relid, long num_pages, double num_tuples,
+                   bool hasindex)
 {
    Relation    rd;
    HeapTupleData rtup;


diff --git a/src/backend/executor/nodeSort.c b/src/backend/executor/nodeSort.c

index 12c6f82a8b224c0f773f79c7b53132447467d399..e0543a2810977526886fee0d639ec76cc069463f 100644 (file)


--- a/src/backend/executor/nodeSort.c
+++ b/src/backend/executor/nodeSort.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/executor/nodeSort.c,v 1.32 2001/03/22 06:16:13 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/executor/nodeSort.c,v 1.33 2001/05/07 00:43:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -20,24 +20,24 @@
 #include "utils/tuplesort.h"
 
 /* ----------------------------------------------------------------
- *     FormSortKeys(node)
+ *     ExtractSortKeys
  *
- *     Forms the structure containing information used to sort the relation.
+ *     Extract the sorting key information from the plan node.
  *
- *     Returns an array of ScanKeyData.
+ *     Returns two palloc'd arrays, one of sort operator OIDs and
+ *     one of attribute numbers.
  * ----------------------------------------------------------------
  */
-static ScanKey
-FormSortKeys(Sort *sortnode)
+static void
+ExtractSortKeys(Sort *sortnode,
+               Oid **sortOperators,
+               AttrNumber **attNums)
 {
-   ScanKey     sortkeys;
    List       *targetList;
-   List       *tl;
    int         keycount;
-   Resdom     *resdom;
-   AttrNumber  resno;
-   Index       reskey;
-   Oid         reskeyop;
+   Oid        *sortOps;
+   AttrNumber *attNos;
+   List       *tl;
 
    /*
     * get information from the node
@@ -46,36 +46,33 @@ FormSortKeys(Sort *sortnode)
    keycount = sortnode->keycount;
 
    /*
-    * first allocate space for scan keys
+    * first allocate space for results
     */
    if (keycount <= 0)
-       elog(ERROR, "FormSortKeys: keycount <= 0");
-   sortkeys = (ScanKey) palloc(keycount * sizeof(ScanKeyData));
-   MemSet((char *) sortkeys, 0, keycount * sizeof(ScanKeyData));
+       elog(ERROR, "ExtractSortKeys: keycount <= 0");
+   sortOps = (Oid *) palloc(keycount * sizeof(Oid));
+   MemSet(sortOps, 0, keycount * sizeof(Oid));
+   *sortOperators = sortOps;
+   attNos = (AttrNumber *) palloc(keycount * sizeof(AttrNumber));
+   MemSet(attNos, 0, keycount * sizeof(AttrNumber));
+   *attNums = attNos;
 
    /*
-    * form each scan key from the resdom info in the target list
+    * extract info from the resdom nodes in the target list
     */
    foreach(tl, targetList)
    {
        TargetEntry *target = (TargetEntry *) lfirst(tl);
-
-       resdom = target->resdom;
-       resno = resdom->resno;
-       reskey = resdom->reskey;
-       reskeyop = resdom->reskeyop;
+       Resdom     *resdom = target->resdom;
+       Index       reskey = resdom->reskey;
 
        if (reskey > 0)         /* ignore TLEs that are not sort keys */
        {
-           ScanKeyEntryInitialize(&sortkeys[reskey - 1],
-                                  0x0,
-                                  resno,
-                                  (RegProcedure) reskeyop,
-                                  (Datum) 0);
+           Assert(reskey <= keycount);
+           sortOps[reskey - 1] = resdom->reskeyop;
+           attNos[reskey - 1] = resdom->resno;
        }
    }
-
-   return sortkeys;
 }
 
 /* ----------------------------------------------------------------
@@ -124,8 +121,8 @@ ExecSort(Sort *node)
    {
        Plan       *outerNode;
        TupleDesc   tupDesc;
-       int         keycount;
-       ScanKey     sortkeys;
+       Oid        *sortOperators;
+       AttrNumber *attNums;
 
        SO1_printf("ExecSort: %s\n",
                   "sorting subplan");
@@ -145,14 +142,17 @@ ExecSort(Sort *node)
 
        outerNode = outerPlan((Plan *) node);
        tupDesc = ExecGetTupType(outerNode);
-       keycount = node->keycount;
-       sortkeys = (ScanKey) sortstate->sort_Keys;
 
-       tuplesortstate = tuplesort_begin_heap(tupDesc, keycount, sortkeys,
-                                             true /* randomAccess */ );
+       ExtractSortKeys(node, &sortOperators, &attNums);
 
+       tuplesortstate = tuplesort_begin_heap(tupDesc, node->keycount,
+                                             sortOperators, attNums,
+                                             true /* randomAccess */ );
        sortstate->tuplesortstate = (void *) tuplesortstate;
 
+       pfree(sortOperators);
+       pfree(attNums);
+
        /*
         * Scan the subplan and feed all the tuples to tuplesort.
         */
@@ -230,7 +230,6 @@ ExecInitSort(Sort *node, EState *estate, Plan *parent)
     */
    sortstate = makeNode(SortState);
    sortstate->sort_Done = false;
-   sortstate->sort_Keys = NULL;
    sortstate->tuplesortstate = NULL;
 
    node->sortstate = sortstate;
@@ -258,11 +257,6 @@ ExecInitSort(Sort *node, EState *estate, Plan *parent)
    outerPlan = outerPlan((Plan *) node);
    ExecInitNode(outerPlan, estate, (Plan *) node);
 
-   /*
-    * initialize sortstate information
-    */
-   sortstate->sort_Keys = FormSortKeys(node);
-
    /*
     * initialize tuple type.  no need to initialize projection info
     * because this node doesn't do projections.
@@ -321,9 +315,6 @@ ExecEndSort(Sort *node)
        tuplesort_end((Tuplesortstate *) sortstate->tuplesortstate);
    sortstate->tuplesortstate = NULL;
 
-   if (sortstate->sort_Keys != NULL)
-       pfree(sortstate->sort_Keys);
-
    pfree(sortstate);
    node->sortstate = NULL;
 


diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c

index ad50630931e357a1ca7bae5f806f8cc242062722..ee5a803b8025ac9817834537bb5b4ccd10708527 100644 (file)


--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -15,7 +15,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.140 2001/03/22 06:16:14 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.141 2001/05/07 00:43:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1378,8 +1378,8 @@ _copyRestrictInfo(RestrictInfo *from)
    newnode->left_pathkey = NIL;
    newnode->right_pathkey = NIL;
    newnode->hashjoinoperator = from->hashjoinoperator;
-   newnode->left_dispersion = from->left_dispersion;
-   newnode->right_dispersion = from->right_dispersion;
+   newnode->left_bucketsize = from->left_bucketsize;
+   newnode->right_bucketsize = from->right_bucketsize;
 
    return newnode;
 }
@@ -2209,11 +2209,12 @@ _copyVacuumStmt(VacuumStmt *from)
 {
    VacuumStmt *newnode = makeNode(VacuumStmt);
 
-   newnode->verbose = from->verbose;
+   newnode->vacuum = from->vacuum;
    newnode->analyze = from->analyze;
+   newnode->verbose = from->verbose;
    if (from->vacrel)
        newnode->vacrel = pstrdup(from->vacrel);
-   Node_Copy(from, newnode, va_spec);
+   Node_Copy(from, newnode, va_cols);
 
    return newnode;
 }


diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c

index 06ee63bbacd05398c5445bd4ce4f8dfb169090da..284a534aa966f03a5f69da55e5faa89a96925b1e 100644 (file)


--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -20,7 +20,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.88 2001/03/22 03:59:31 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.89 2001/05/07 00:43:19 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -516,7 +516,7 @@ _equalRestrictInfo(RestrictInfo *a, RestrictInfo *b)
        return false;
 
    /*
-    * ignore eval_cost, left/right_pathkey, and left/right_dispersion,
+    * ignore eval_cost, left/right_pathkey, and left/right_bucketsize,
     * since they may not be set yet, and should be derivable from the
     * clause anyway
     */
@@ -1113,13 +1113,15 @@ _equalDropdbStmt(DropdbStmt *a, DropdbStmt *b)
 static bool
 _equalVacuumStmt(VacuumStmt *a, VacuumStmt *b)
 {
-   if (a->verbose != b->verbose)
+   if (a->vacuum != b->vacuum)
        return false;
    if (a->analyze != b->analyze)
        return false;
+   if (a->verbose != b->verbose)
+       return false;
    if (!equalstr(a->vacrel, b->vacrel))
        return false;
-   if (!equal(a->va_spec, b->va_spec))
+   if (!equal(a->va_cols, b->va_cols))
        return false;
 
    return true;


diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c

index 9a071e7a250df88efe03c183927ffeadfa07a86c..4c0c1b03ef544c60b9161208ceb950a83862419c 100644 (file)


--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.107 2001/03/22 03:59:32 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.108 2001/05/07 00:43:19 tgl Exp $
  *
  * NOTES
  *   Most of the read functions for plan nodes are tested. (In fact, they
@@ -1874,11 +1874,11 @@ _readRestrictInfo(void)
 
    /* eval_cost is not part of saved representation; compute on first use */
    local_node->eval_cost = -1;
-   /* ditto for cached pathkeys and dispersion */
+   /* ditto for cached pathkeys and bucketsize */
    local_node->left_pathkey = NIL;
    local_node->right_pathkey = NIL;
-   local_node->left_dispersion = -1;
-   local_node->right_dispersion = -1;
+   local_node->left_bucketsize = -1;
+   local_node->right_bucketsize = -1;
 
    return local_node;
 }


diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c

index c52af72a16b824c1f37078bb4e185d8a34b22d2b..bdfbbb18186d9c7ef4201fa8eb294bbdb55e298c 100644 (file)


--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -41,7 +41,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.70 2001/04/25 22:04:37 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.71 2001/05/07 00:43:20 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -50,11 +50,15 @@
 
 #include 
 
+#include "catalog/pg_statistic.h"
 #include "executor/nodeHash.h"
 #include "miscadmin.h"
 #include "optimizer/clauses.h"
 #include "optimizer/cost.h"
+#include "optimizer/pathnode.h"
+#include "parser/parsetree.h"
 #include "utils/lsyscache.h"
+#include "utils/syscache.h"
 
 
 /*
@@ -573,7 +577,7 @@ cost_mergejoin(Path *path,
  * 'outer_path' is the path for the outer relation
  * 'inner_path' is the path for the inner relation
  * 'restrictlist' are the RestrictInfo nodes to be applied at the join
- * 'innerdispersion' is an estimate of the dispersion statistic
+ * 'innerbucketsize' is an estimate of the bucketsize statistic
  *             for the inner hash key.
  */
 void
@@ -581,7 +585,7 @@ cost_hashjoin(Path *path,
              Path *outer_path,
              Path *inner_path,
              List *restrictlist,
-             Selectivity innerdispersion)
+             Selectivity innerbucketsize)
 {
    Cost        startup_cost = 0;
    Cost        run_cost = 0;
@@ -607,22 +611,20 @@ cost_hashjoin(Path *path,
 
    /*
     * The number of tuple comparisons needed is the number of outer
-    * tuples times the typical hash bucket size.  nodeHash.c tries for
-    * average bucket loading of NTUP_PER_BUCKET, but that goal will be
-    * reached only if data values are uniformly distributed among the
-    * buckets.  To be conservative, we scale up the target bucket size by
-    * the number of inner rows times inner dispersion, giving an estimate
-    * of the typical number of duplicates of each value. We then charge
-    * one cpu_operator_cost per tuple comparison.
+    * tuples times the typical number of tuples in a hash bucket,
+    * which is the inner relation size times its bucketsize fraction.
+    * We charge one cpu_operator_cost per tuple comparison.
     */
    run_cost += cpu_operator_cost * outer_path->parent->rows *
-       NTUP_PER_BUCKET * ceil(inner_path->parent->rows * innerdispersion);
+       ceil(inner_path->parent->rows * innerbucketsize);
 
    /*
     * Estimate the number of tuples that get through the hashing filter
     * as one per tuple in the two source relations.  This could be a
     * drastic underestimate if there are many equal-keyed tuples in
-    * either relation, but we have no good way of estimating that...
+    * either relation, but we have no simple way of estimating that;
+    * and since this is only a second-order parameter, it's probably
+    * not worth expending a lot of effort on the estimate.
     */
    ntuples = outer_path->parent->rows + inner_path->parent->rows;
 
@@ -651,7 +653,7 @@ cost_hashjoin(Path *path,
    /*
     * Bias against putting larger relation on inside.  We don't want an
     * absolute prohibition, though, since larger relation might have
-    * better dispersion --- and we can't trust the size estimates
+    * better bucketsize --- and we can't trust the size estimates
     * unreservedly, anyway.  Instead, inflate the startup cost by the
     * square root of the size ratio.  (Why square root?  No real good
     * reason, but it seems reasonable...)
@@ -663,6 +665,171 @@ cost_hashjoin(Path *path,
    path->total_cost = startup_cost + run_cost;
 }
 
+/*
+ * Estimate hash bucketsize fraction (ie, number of entries in a bucket
+ * divided by total tuples in relation) if the specified Var is used
+ * as a hash key.
+ *
+ * This statistic is used by cost_hashjoin.  We split out the calculation
+ * because it's useful to cache the result for re-use across multiple path
+ * cost calculations.
+ *
+ * XXX This is really pretty bogus since we're effectively assuming that the
+ * distribution of hash keys will be the same after applying restriction
+ * clauses as it was in the underlying relation.  However, we are not nearly
+ * smart enough to figure out how the restrict clauses might change the
+ * distribution, so this will have to do for now.
+ *
+ * The executor tries for average bucket loading of NTUP_PER_BUCKET by setting
+ * number of buckets equal to ntuples / NTUP_PER_BUCKET, which would yield
+ * a bucketsize fraction of NTUP_PER_BUCKET / ntuples.  But that goal will
+ * be reached only if the data values are uniformly distributed among the
+ * buckets, which requires (a) at least ntuples / NTUP_PER_BUCKET distinct
+ * data values, and (b) a not-too-skewed data distribution.  Otherwise the
+ * buckets will be nonuniformly occupied.  If the other relation in the join
+ * has a similar distribution, the most-loaded buckets are exactly those
+ * that will be probed most often.  Therefore, the "average" bucket size for
+ * costing purposes should really be taken as something close to the "worst
+ * case" bucket size.  We try to estimate this by first scaling up if there
+ * are too few distinct data values, and then scaling up again by the
+ * ratio of the most common value's frequency to the average frequency.
+ *
+ * If no statistics are available, use a default estimate of 0.1.  This will
+ * discourage use of a hash rather strongly if the inner relation is large,
+ * which is what we want.  We do not want to hash unless we know that the
+ * inner rel is well-dispersed (or the alternatives seem much worse).
+ */
+Selectivity
+estimate_hash_bucketsize(Query *root, Var *var)
+{
+   Oid         relid;
+   RelOptInfo *rel;
+   HeapTuple   tuple;
+   Form_pg_statistic stats;
+   double      estfract,
+               ndistinct,
+               needdistinct,
+               mcvfreq,
+               avgfreq;
+   float4     *numbers;
+   int         nnumbers;
+
+   /*
+    * Lookup info about var's relation and attribute;
+    * if none available, return default estimate.
+    */
+   if (!IsA(var, Var))
+       return 0.1;
+
+   relid = getrelid(var->varno, root->rtable);
+   if (relid == InvalidOid)
+       return 0.1;
+
+   rel = get_base_rel(root, var->varno);
+
+   if (rel->tuples <= 0.0 || rel->rows <= 0.0)
+       return 0.1;             /* ensure we can divide below */
+
+   tuple = SearchSysCache(STATRELATT,
+                          ObjectIdGetDatum(relid),
+                          Int16GetDatum(var->varattno),
+                          0, 0);
+   if (!HeapTupleIsValid(tuple))
+   {
+       /*
+        * Perhaps the Var is a system attribute; if so, it will have no
+        * entry in pg_statistic, but we may be able to guess something
+        * about its distribution anyway.
+        */
+       switch (var->varattno)
+       {
+           case ObjectIdAttributeNumber:
+           case SelfItemPointerAttributeNumber:
+               /* these are unique, so buckets should be well-distributed */
+               return (double) NTUP_PER_BUCKET / rel->rows;
+           case TableOidAttributeNumber:
+               /* hashing this is a terrible idea... */
+               return 1.0;
+       }
+       return 0.1;
+   }
+   stats = (Form_pg_statistic) GETSTRUCT(tuple);
+
+   /*
+    * Obtain number of distinct data values in raw relation.
+    */
+   ndistinct = stats->stadistinct;
+   if (ndistinct < 0.0)
+       ndistinct = -ndistinct * rel->tuples;
+
+   /*
+    * Adjust ndistinct to account for restriction clauses.  Observe we are
+    * assuming that the data distribution is affected uniformly by the
+    * restriction clauses!
+    *
+    * XXX Possibly better way, but much more expensive: multiply by
+    * selectivity of rel's restriction clauses that mention the target Var.
+    */
+   ndistinct *= rel->rows / rel->tuples;
+
+   /*
+    * Discourage use of hash join if there seem not to be very many distinct
+    * data values.  The threshold here is somewhat arbitrary, as is the
+    * fraction used to "discourage" the choice.
+    */
+   if (ndistinct < 50.0)
+   {
+       ReleaseSysCache(tuple);
+       return 0.5;
+   }
+
+   /*
+    * Form initial estimate of bucketsize fraction.  Here we use rel->rows,
+    * ie the number of rows after applying restriction clauses, because
+    * that's what the fraction will eventually be multiplied by in
+    * cost_heapjoin.
+    */
+   estfract = (double) NTUP_PER_BUCKET / rel->rows;
+
+   /*
+    * Adjust estimated bucketsize if too few distinct values to fill
+    * all the buckets.
+    */
+   needdistinct = rel->rows / (double) NTUP_PER_BUCKET;
+   if (ndistinct < needdistinct)
+       estfract *= needdistinct / ndistinct;
+
+   /*
+    * Look up the frequency of the most common value, if available.
+    */
+   mcvfreq = 0.0;
+
+   if (get_attstatsslot(tuple, var->vartype, var->vartypmod,
+                        STATISTIC_KIND_MCV, InvalidOid,
+                        NULL, NULL, &numbers, &nnumbers))
+   {
+       /*
+        * The first MCV stat is for the most common value.
+        */
+       if (nnumbers > 0)
+           mcvfreq = numbers[0];
+       free_attstatsslot(var->vartype, NULL, 0,
+                         numbers, nnumbers);
+   }
+
+   /*
+    * Adjust estimated bucketsize upward to account for skewed distribution.
+    */
+   avgfreq = (1.0 - stats->stanullfrac) / ndistinct;
+
+   if (avgfreq > 0.0 && mcvfreq > avgfreq)
+       estfract *= mcvfreq / avgfreq;
+
+   ReleaseSysCache(tuple);
+
+   return (Selectivity) estfract;
+}
+
 
 /*
  * cost_qual_eval


diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c

index d41336ddcee0f9c26ad9a2ab0b1410a1f0ae38c7..cd7cabd41deb7bf52b323b437d847eede311b8cc 100644 (file)


--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -8,15 +8,15 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.63 2001/04/15 00:48:17 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.64 2001/05/07 00:43:20 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
+#include "postgres.h"
+
 #include 
 #include 
 
-#include "postgres.h"
-
 #include "optimizer/clauses.h"
 #include "optimizer/cost.h"
 #include "optimizer/pathnode.h"
@@ -45,7 +45,6 @@ static void hash_inner_and_outer(Query *root, RelOptInfo *joinrel,
                     List *restrictlist, JoinType jointype);
 static Path *best_innerjoin(List *join_paths, List *outer_relid,
               JoinType jointype);
-static Selectivity estimate_dispersion(Query *root, Var *var);
 static List *select_mergejoin_clauses(RelOptInfo *joinrel,
                         RelOptInfo *outerrel,
                         RelOptInfo *innerrel,
@@ -722,7 +721,7 @@ hash_inner_and_outer(Query *root,
        Expr       *clause;
        Var        *left,
                   *right;
-       Selectivity innerdispersion;
+       Selectivity innerbucketsize;
        List       *hashclauses;
 
        if (restrictinfo->hashjoinoperator == InvalidOid)
@@ -742,34 +741,34 @@ hash_inner_and_outer(Query *root,
 
        /*
         * Check if clause is usable with these sub-rels, find inner side,
-        * estimate dispersion of inner var for costing purposes.
+        * estimate bucketsize of inner var for costing purposes.
         *
         * Since we tend to visit the same clauses over and over when
-        * planning a large query, we cache the dispersion estimates in
+        * planning a large query, we cache the bucketsize estimates in
         * the RestrictInfo node to avoid repeated lookups of statistics.
         */
        if (intMember(left->varno, outerrelids) &&
            intMember(right->varno, innerrelids))
        {
            /* righthand side is inner */
-           innerdispersion = restrictinfo->right_dispersion;
-           if (innerdispersion < 0)
+           innerbucketsize = restrictinfo->right_bucketsize;
+           if (innerbucketsize < 0)
            {
                /* not cached yet */
-               innerdispersion = estimate_dispersion(root, right);
-               restrictinfo->right_dispersion = innerdispersion;
+               innerbucketsize = estimate_hash_bucketsize(root, right);
+               restrictinfo->right_bucketsize = innerbucketsize;
            }
        }
        else if (intMember(left->varno, innerrelids) &&
                 intMember(right->varno, outerrelids))
        {
            /* lefthand side is inner */
-           innerdispersion = restrictinfo->left_dispersion;
-           if (innerdispersion < 0)
+           innerbucketsize = restrictinfo->left_bucketsize;
+           if (innerbucketsize < 0)
            {
                /* not cached yet */
-               innerdispersion = estimate_dispersion(root, left);
-               restrictinfo->left_dispersion = innerdispersion;
+               innerbucketsize = estimate_hash_bucketsize(root, left);
+               restrictinfo->left_bucketsize = innerbucketsize;
            }
        }
        else
@@ -790,7 +789,7 @@ hash_inner_and_outer(Query *root,
                                      innerrel->cheapest_total_path,
                                      restrictlist,
                                      hashclauses,
-                                     innerdispersion));
+                                     innerbucketsize));
        if (outerrel->cheapest_startup_path != outerrel->cheapest_total_path)
            add_path(joinrel, (Path *)
                     create_hashjoin_path(joinrel,
@@ -799,7 +798,7 @@ hash_inner_and_outer(Query *root,
                                          innerrel->cheapest_total_path,
                                          restrictlist,
                                          hashclauses,
-                                         innerdispersion));
+                                         innerbucketsize));
    }
 }
 
@@ -866,31 +865,6 @@ best_innerjoin(List *join_paths, Relids outer_relids, JoinType jointype)
    return cheapest;
 }
 
-/*
- * Estimate dispersion of the specified Var
- *
- * We use a default of 0.1 if we can't figure out anything better.
- * This will typically discourage use of a hash rather strongly,
- * if the inner relation is large. We do not want to hash unless
- * we know that the inner rel is well-dispersed (or the alternatives
- * seem much worse).
- */
-static Selectivity
-estimate_dispersion(Query *root, Var *var)
-{
-   Oid         relid;
-
-   if (!IsA(var, Var))
-       return 0.1;
-
-   relid = getrelid(var->varno, root->rtable);
-
-   if (relid == InvalidOid)
-       return 0.1;
-
-   return (Selectivity) get_attdispersion(relid, var->varattno, 0.1);
-}
-
 /*
  * select_mergejoin_clauses
  *   Select mergejoin clauses that are usable for a particular join.


diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c

index 8c3b00289d3e7d467aeb03dcc1b53eb02f5a3a2b..2d264c46881730ba4ace2ade745fe6942c9d49fb 100644 (file)


--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -10,14 +10,14 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.104 2001/03/22 03:59:36 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.105 2001/05/07 00:43:20 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
-#include 
-
 #include "postgres.h"
 
+#include 
+
 #include "catalog/pg_index.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
@@ -1484,9 +1484,9 @@ make_sort_from_pathkeys(List *tlist, Plan *lefttree, List *pathkeys)
         */
        if (resdom->reskey == 0)
        {
-           /* OK, mark it as a sort key and set the sort operator regproc */
+           /* OK, mark it as a sort key and set the sort operator */
            resdom->reskey = ++numsortkeys;
-           resdom->reskeyop = get_opcode(pathkey->sortop);
+           resdom->reskeyop = pathkey->sortop;
        }
    }
 


diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c

index 7c3e15a8f88d81b206e4d3f618eae9658294ad6a..5d67e02dacb44bce678665c592ab184f588469a5 100644 (file)


--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -8,13 +8,14 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.59 2001/04/16 19:44:10 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.60 2001/05/07 00:43:21 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
+#include "postgres.h"
+
 #include 
 
-#include "postgres.h"
 #include "catalog/pg_operator.h"
 #include "catalog/pg_type.h"
 #include "nodes/makefuncs.h"
@@ -348,8 +349,8 @@ distribute_qual_to_rels(Query *root, Node *clause,
    restrictinfo->left_pathkey = NIL;   /* not computable yet */
    restrictinfo->right_pathkey = NIL;
    restrictinfo->hashjoinoperator = InvalidOid;
-   restrictinfo->left_dispersion = -1; /* not computed until needed */
-   restrictinfo->right_dispersion = -1;
+   restrictinfo->left_bucketsize = -1; /* not computed until needed */
+   restrictinfo->right_bucketsize = -1;
 
    /*
     * Retrieve all relids and vars contained within the clause.


diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c

index b2ab4600209dd566fd281c5110f0e1f6ba5c1cb1..0aba4808c160f3bf5ba3a9cc3fd2c6cf26fa2fa3 100644 (file)


--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.105 2001/04/30 19:24:47 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.106 2001/05/07 00:43:21 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1367,7 +1367,7 @@ make_groupplan(List *group_tlist,
            {
                /* OK, insert the ordering info needed by the executor. */
                resdom->reskey = ++keyno;
-               resdom->reskeyop = get_opcode(grpcl->sortop);
+               resdom->reskeyop = grpcl->sortop;
            }
        }
 
@@ -1412,7 +1412,7 @@ make_sortplan(List *tlist, Plan *plannode, List *sortcls)
        {
            /* OK, insert the ordering info needed by the executor. */
            resdom->reskey = ++keyno;
-           resdom->reskeyop = get_opcode(sortcl->sortop);
+           resdom->reskeyop = sortcl->sortop;
        }
    }
 


diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c

index 0b173466cf98061a3add13f850ba9e750dd9f4e0..ede4159d9707629729b5dffbc32f241f48629e72 100644 (file)


--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -14,7 +14,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v 1.62 2001/03/27 18:02:19 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v 1.63 2001/05/07 00:43:22 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -682,8 +682,8 @@ adjust_inherited_attrs_mutator(Node *node,
        newinfo->eval_cost = -1;        /* reset this too */
        newinfo->left_pathkey = NIL;    /* and these */
        newinfo->right_pathkey = NIL;
-       newinfo->left_dispersion = -1;
-       newinfo->right_dispersion = -1;
+       newinfo->left_bucketsize = -1;
+       newinfo->right_bucketsize = -1;
 
        return (Node *) newinfo;
    }


diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c

index cfba3ee395f2e0216f74c1e2497a7a8f5897d74b..407c132b4f7a6388b093806fd3eb01286906e084 100644 (file)


--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -8,14 +8,14 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.71 2001/03/22 03:59:39 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.72 2001/05/07 00:43:22 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
-#include 
-
 #include "postgres.h"
 
+#include 
+
 #include "nodes/plannodes.h"
 #include "optimizer/cost.h"
 #include "optimizer/pathnode.h"
@@ -559,7 +559,7 @@ create_mergejoin_path(RelOptInfo *joinrel,
  * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
  * 'hashclauses' is a list of the hash join clause (always a 1-element list)
  *     (this should be a subset of the restrict_clauses list)
- * 'innerdispersion' is an estimate of the dispersion of the inner hash key
+ * 'innerbucketsize' is an estimate of the bucketsize of the inner hash key
  *
  */
 HashPath   *
@@ -569,7 +569,7 @@ create_hashjoin_path(RelOptInfo *joinrel,
                     Path *inner_path,
                     List *restrict_clauses,
                     List *hashclauses,
-                    Selectivity innerdispersion)
+                    Selectivity innerbucketsize)
 {
    HashPath   *pathnode = makeNode(HashPath);
 
@@ -587,7 +587,7 @@ create_hashjoin_path(RelOptInfo *joinrel,
                  outer_path,
                  inner_path,
                  restrict_clauses,
-                 innerdispersion);
+                 innerbucketsize);
 
    return pathnode;
 }


diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c

index 4f711df203c846acf4402ed131def54dbbf94443..ee3523553e8693ac1b7762d01ebbabc3697a4d7a 100644 (file)


--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -9,11 +9,10 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.64 2001/03/22 03:59:40 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.65 2001/05/07 00:43:22 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
-
 #include "postgres.h"
 
 #include 


diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c

index 4687a5599623d09b416357721488369cc8eaaa38..76cc095bc4edcdbf4cfecad9627a1e5a29d03256 100644 (file)


--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Header: /cvsroot/pgsql/src/backend/parser/analyze.c,v 1.183 2001/03/22 06:16:15 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/parser/analyze.c,v 1.184 2001/05/07 00:43:22 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -2660,7 +2660,7 @@ transformForUpdate(Query *qry, List *forUpdate)
        /* just the named tables */
        foreach(l, forUpdate)
        {
-           char       *relname = lfirst(l);
+           char       *relname = strVal(lfirst(l));
 
            i = 0;
            foreach(rt, qry->rtable)


diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y

index bed0ce239a42e2f75c48bdda8aff299cb2f02f9b..40c379aca51f280882945b9f5caf4aaeccc4475f 100644 (file)


--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -11,7 +11,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.222 2001/05/01 01:36:10 thomas Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.223 2001/05/07 00:43:23 tgl Exp $
  *
  * HISTORY
  *   AUTHOR            DATE            MAJOR EVENT
@@ -104,7 +104,6 @@ static void doNegateFloat(Value *v);
    char                *str;
    bool                boolean;
    JoinType            jtype;
-   InhOption           inhOpt;
    List                *list;
    Node                *node;
    Value               *value;
@@ -130,6 +129,7 @@ static void doNegateFloat(Value *v);
 
 %type    stmt,
        AlterGroupStmt, AlterSchemaStmt, AlterTableStmt, AlterUserStmt,
+       AnalyzeStmt,
        ClosePortalStmt, ClusterStmt, CommentStmt, ConstraintsSetStmt,
        CopyStmt, CreateAsStmt, CreateGroupStmt, CreatePLangStmt,
        CreateSchemaStmt, CreateSeqStmt, CreateStmt, CreateTrigStmt,
@@ -147,7 +147,7 @@ static void doNegateFloat(Value *v);
 %type    select_no_parens, select_with_parens, select_clause,
                simple_select
 
-%type     alter_column_action
+%type     alter_column_default
 %type     drop_behavior
 
 %type    createdb_opt_list, createdb_opt_item
@@ -185,7 +185,7 @@ static void doNegateFloat(Value *v);
        OptTableElementList, OptInherit, definition, opt_distinct,
        opt_with, func_args, func_args_list, func_as,
        oper_argtypes, RuleActionList, RuleActionMulti,
-       opt_column_list, columnList, opt_va_list, va_list,
+       opt_column_list, columnList, opt_name_list,
        sort_clause, sortby_list, index_params, index_list, name_list,
        from_clause, from_list, opt_array_bounds,
        expr_list, attrs, target_list, update_target_list,
@@ -210,9 +210,7 @@ static void doNegateFloat(Value *v);
 %type    substr_from, substr_for
 
 %type     opt_binary, opt_using, opt_instead, opt_cursor
-%type     opt_with_copy, index_opt_unique, opt_verbose, opt_analyze
-
-%type  opt_inh_star, opt_only
+%type     opt_with_copy, index_opt_unique, opt_verbose, analyze_keyword
 
 %type    copy_dirn, direction, reindex_type, drop_type,
        opt_column, event, comment_type, comment_cl,
@@ -350,7 +348,8 @@ static void doNegateFloat(Value *v);
        NEW, NOCREATEDB, NOCREATEUSER, NONE, NOTHING, NOTIFY, NOTNULL,
        OFFSET, OIDS, OPERATOR, OWNER, PASSWORD, PROCEDURAL,
        REINDEX, RENAME, RESET, RETURNS, ROW, RULE,
-       SEQUENCE, SERIAL, SETOF, SHARE, SHOW, START, STATEMENT, STDIN, STDOUT, SYSID,
+       SEQUENCE, SERIAL, SETOF, SHARE, SHOW, START, STATEMENT,
+       STATISTICS, STDIN, STDOUT, SYSID,
        TEMP, TEMPLATE, TOAST, TRUNCATE, TRUSTED, 
        UNLISTEN, UNTIL, VACUUM, VALID, VERBOSE, VERSION
 
@@ -470,6 +469,7 @@ stmt :  AlterSchemaStmt
        | CreatedbStmt
        | DropdbStmt
        | VacuumStmt
+       | AnalyzeStmt
        | VariableSetStmt
        | VariableShowStmt
        | VariableResetStmt
@@ -938,57 +938,68 @@ CheckPointStmt: CHECKPOINT
  *****************************************************************************/
 
 AlterTableStmt:
-/* ALTER TABLE <name> ADD [COLUMN]  */
-       ALTER TABLE relation_name opt_inh_star ADD opt_column columnDef
+/* ALTER TABLE <relation> ADD [COLUMN]  */
+       ALTER TABLE relation_expr ADD opt_column columnDef
                {
                    AlterTableStmt *n = makeNode(AlterTableStmt);
                    n->subtype = 'A';
-                   n->relname = $3;
-                   n->inhOpt = $4;
-                   n->def = $7;
+                   n->relname = $3->relname;
+                   n->inhOpt = $3->inhOpt;
+                   n->def = $6;
                    $$ = (Node *)n;
                }
-/* ALTER TABLE <name> ALTER [COLUMN]  {SET DEFAULT |DROP DEFAULT} */
-       | ALTER TABLE relation_name opt_inh_star ALTER opt_column ColId alter_column_action
+/* ALTER TABLE <relation> ALTER [COLUMN]  {SET DEFAULT |DROP DEFAULT} */
+       | ALTER TABLE relation_expr ALTER opt_column ColId alter_column_default
                {
                    AlterTableStmt *n = makeNode(AlterTableStmt);
                    n->subtype = 'T';
-                   n->relname = $3;
-                   n->inhOpt = $4;
-                   n->name = $7;
-                   n->def = $8;
+                   n->relname = $3->relname;
+                   n->inhOpt = $3->inhOpt;
+                   n->name = $6;
+                   n->def = $7;
                    $$ = (Node *)n;
                }
-/* ALTER TABLE  DROP [COLUMN]  {RESTRICT|CASCADE} */
-       | ALTER TABLE relation_name opt_inh_star DROP opt_column ColId drop_behavior
+/* ALTER TABLE  ALTER [COLUMN]  SET STATISTICS  */
+       | ALTER TABLE relation_expr ALTER opt_column ColId SET STATISTICS Iconst
+               {
+                   AlterTableStmt *n = makeNode(AlterTableStmt);
+                   n->subtype = 'S';
+                   n->relname = $3->relname;
+                   n->inhOpt = $3->inhOpt;
+                   n->name = $6;
+                   n->def = (Node *) makeInteger($9);
+                   $$ = (Node *)n;
+               }
+/* ALTER TABLE  DROP [COLUMN]  {RESTRICT|CASCADE} */
+       | ALTER TABLE relation_expr DROP opt_column ColId drop_behavior
                {
                    AlterTableStmt *n = makeNode(AlterTableStmt);
                    n->subtype = 'D';
-                   n->relname = $3;
-                   n->inhOpt = $4;
-                   n->name = $7;
-                   n->behavior = $8;
+                   n->relname = $3->relname;
+                   n->inhOpt = $3->inhOpt;
+                   n->name = $6;
+                   n->behavior = $7;
                    $$ = (Node *)n;
                }
-/* ALTER TABLE <name> ADD CONSTRAINT ... */
-       | ALTER TABLE relation_name opt_inh_star ADD TableConstraint
+/* ALTER TABLE <relation> ADD CONSTRAINT ... */
+       | ALTER TABLE relation_expr ADD TableConstraint
                {
                    AlterTableStmt *n = makeNode(AlterTableStmt);
                    n->subtype = 'C';
-                   n->relname = $3;
-                   n->inhOpt = $4;
-                   n->def = $6;
+                   n->relname = $3->relname;
+                   n->inhOpt = $3->inhOpt;
+                   n->def = $5;
                    $$ = (Node *)n;
                }
-/* ALTER TABLE <name> DROP CONSTRAINT  {RESTRICT|CASCADE} */
-       | ALTER TABLE relation_name opt_inh_star DROP CONSTRAINT name drop_behavior
+/* ALTER TABLE <relation> DROP CONSTRAINT  {RESTRICT|CASCADE} */
+       | ALTER TABLE relation_expr DROP CONSTRAINT name drop_behavior
                {
                    AlterTableStmt *n = makeNode(AlterTableStmt);
                    n->subtype = 'X';
-                   n->relname = $3;
-                   n->inhOpt = $4;
-                   n->name = $7;
-                   n->behavior = $8;
+                   n->relname = $3->relname;
+                   n->inhOpt = $3->inhOpt;
+                   n->name = $6;
+                   n->behavior = $7;
                    $$ = (Node *)n;
                }
 /* ALTER TABLE  CREATE TOAST TABLE */
@@ -997,6 +1008,7 @@ AlterTableStmt:
                    AlterTableStmt *n = makeNode(AlterTableStmt);
                    n->subtype = 'E';
                    n->relname = $3;
+                   n->inhOpt = INH_NO;
                    $$ = (Node *)n;
                }
 /* ALTER TABLE  OWNER TO UserId */
@@ -1005,12 +1017,13 @@ AlterTableStmt:
                    AlterTableStmt *n = makeNode(AlterTableStmt);
                    n->subtype = 'U';
                    n->relname = $3;
+                   n->inhOpt = INH_NO;
                    n->name = $6;
                    $$ = (Node *)n;
                }
        ;
 
-alter_column_action:
+alter_column_default:
        SET DEFAULT a_expr
            {
                /* Treat SET DEFAULT NULL the same as DROP DEFAULT */
@@ -1478,10 +1491,6 @@ key_reference:  NO ACTION                { $$ = FKCONSTR_ON_KEY_NOACTION; }
        | SET DEFAULT                   { $$ = FKCONSTR_ON_KEY_SETDEFAULT; }
        ;
 
-opt_only: ONLY                                 { $$ = INH_NO; }
-        | /*EMPTY*/                                { $$ = INH_DEFAULT; } 
-       ;
-
 OptInherit:  INHERITS '(' relation_name_list ')'   { $$ = $3; }
        | /*EMPTY*/                                 { $$ = NIL; }
        ;
@@ -2598,14 +2607,13 @@ opt_force:  FORCE                                   {  $$ = TRUE; }
  *
  *****************************************************************************/
 
-RenameStmt:  ALTER TABLE relation_name opt_inh_star
-                 RENAME opt_column opt_name TO name
+RenameStmt:  ALTER TABLE relation_expr RENAME opt_column opt_name TO name
                {
                    RenameStmt *n = makeNode(RenameStmt);
-                   n->relname = $3;
-                   n->inhOpt = $4;
-                   n->column = $7;
-                   n->newname = $9;
+                   n->relname = $3->relname;
+                   n->inhOpt = $3->inhOpt;
+                   n->column = $6;
+                   n->newname = $8;
                    $$ = (Node *)n;
                }
        ;
@@ -2994,49 +3002,71 @@ ClusterStmt:  CLUSTER index_name ON relation_name
  *
  *     QUERY:
  *             vacuum
+ *             analyze
  *
  *****************************************************************************/
 
-VacuumStmt:  VACUUM opt_verbose opt_analyze
+VacuumStmt:  VACUUM opt_verbose
                {
                    VacuumStmt *n = makeNode(VacuumStmt);
+                   n->vacuum = true;
+                   n->analyze = false;
                    n->verbose = $2;
-                   n->analyze = $3;
                    n->vacrel = NULL;
-                   n->va_spec = NIL;
+                   n->va_cols = NIL;
                    $$ = (Node *)n;
                }
-       | VACUUM opt_verbose opt_analyze relation_name opt_va_list
+       | VACUUM opt_verbose relation_name
                {
                    VacuumStmt *n = makeNode(VacuumStmt);
+                   n->vacuum = true;
+                   n->analyze = false;
                    n->verbose = $2;
-                   n->analyze = $3;
-                   n->vacrel = $4;
-                   n->va_spec = $5;
-                   if ( $5 != NIL && !$4 )
-                       elog(ERROR,"VACUUM syntax error at or near \"(\""
-                           "\n\tRelation name must be specified");
+                   n->vacrel = $3;
+                   n->va_cols = NIL;
+                   $$ = (Node *)n;
+               }
+       | VACUUM opt_verbose AnalyzeStmt
+               {
+                   VacuumStmt *n = (VacuumStmt *) $3;
+                   n->vacuum = true;
+                   n->verbose |= $2;
                    $$ = (Node *)n;
                }
        ;
 
-opt_verbose:  VERBOSE                          { $$ = TRUE; }
-       | /*EMPTY*/                             { $$ = FALSE; }
+AnalyzeStmt:  analyze_keyword opt_verbose
+               {
+                   VacuumStmt *n = makeNode(VacuumStmt);
+                   n->vacuum = false;
+                   n->analyze = true;
+                   n->verbose = $2;
+                   n->vacrel = NULL;
+                   n->va_cols = NIL;
+                   $$ = (Node *)n;
+               }
+       | analyze_keyword opt_verbose relation_name opt_name_list
+               {
+                   VacuumStmt *n = makeNode(VacuumStmt);
+                   n->vacuum = false;
+                   n->analyze = true;
+                   n->verbose = $2;
+                   n->vacrel = $3;
+                   n->va_cols = $4;
+                   $$ = (Node *)n;
+               }
        ;
 
-opt_analyze:  ANALYZE                          { $$ = TRUE; }
+analyze_keyword:  ANALYZE                      { $$ = TRUE; }
        |     ANALYSE /* British */             { $$ = TRUE; }
-       | /*EMPTY*/                             { $$ = FALSE; }
        ;
 
-opt_va_list:  '(' va_list ')'                  { $$ = $2; }
-       | /*EMPTY*/                             { $$ = NIL; }
+opt_verbose:  VERBOSE                          { $$ = TRUE; }
+       | /*EMPTY*/                             { $$ = FALSE; }
        ;
 
-va_list:  name
-               { $$ = makeList1($1); }
-       | va_list ',' name
-               { $$ = lappend($1, $3); }
+opt_name_list:  '(' name_list ')'              { $$ = $2; }
+       | /*EMPTY*/                             { $$ = NIL; }
        ;
 
 
@@ -3160,12 +3190,12 @@ columnElem:  ColId opt_indirection
  *
  *****************************************************************************/
 
-DeleteStmt:  DELETE FROM opt_only relation_name where_clause
+DeleteStmt:  DELETE FROM relation_expr where_clause
                {
                    DeleteStmt *n = makeNode(DeleteStmt);
-                   n->inhOpt = $3;
-                   n->relname = $4;
-                   n->whereClause = $5;
+                   n->relname = $3->relname;
+                   n->inhOpt = $3->inhOpt;
+                   n->whereClause = $4;
                    $$ = (Node *)n;
                }
        ;
@@ -3202,17 +3232,17 @@ opt_lmode:  SHARE               { $$ = TRUE; }
  *
  *****************************************************************************/
 
-UpdateStmt:  UPDATE opt_only relation_name
+UpdateStmt:  UPDATE relation_expr
              SET update_target_list
              from_clause
              where_clause
                {
                    UpdateStmt *n = makeNode(UpdateStmt);
-                   n->inhOpt = $2;
-                   n->relname = $3;
-                   n->targetList = $5;
-                   n->fromClause = $6;
-                   n->whereClause = $7;
+                   n->relname = $2->relname;
+                   n->inhOpt = $2->inhOpt;
+                   n->targetList = $4;
+                   n->fromClause = $5;
+                   n->whereClause = $6;
                    $$ = (Node *)n;
                }
        ;
@@ -3545,10 +3575,6 @@ select_offset_value: Iconst
  * ...however, recursive addattr and rename supported.  make special
  * cases for these.
  */
-opt_inh_star:  '*'                             { $$ = INH_YES; }
-       | /*EMPTY*/                             { $$ = INH_DEFAULT; }
-       ;
-
 relation_name_list:  name_list;
 
 name_list:  name
@@ -3576,7 +3602,7 @@ opt_for_update_clause:    for_update_clause       { $$ = $1; }
        | /* EMPTY */                           { $$ = NULL; }
        ;
 
-update_list:  OF va_list                       { $$ = $2; }
+update_list:  OF name_list                     { $$ = $2; }
        | /* EMPTY */                           { $$ = makeList1(NULL); }
        ;
 
@@ -5525,6 +5551,7 @@ TokenId:  ABSOLUTE                        { $$ = "absolute"; }
        | SHARE                         { $$ = "share"; }
        | START                         { $$ = "start"; }
        | STATEMENT                     { $$ = "statement"; }
+       | STATISTICS                    { $$ = "statistics"; }
        | STDIN                         { $$ = "stdin"; }
        | STDOUT                        { $$ = "stdout"; }
        | SYSID                         { $$ = "sysid"; }


diff --git a/src/backend/parser/keywords.c b/src/backend/parser/keywords.c

index 402dbfd28ca561a2c9d9ba513e7986dda06ec7df..8ab19f86ae8582213730311845cdbdcae0977f18 100644 (file)


--- a/src/backend/parser/keywords.c
+++ b/src/backend/parser/keywords.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/parser/keywords.c,v 1.90 2001/03/22 03:59:40 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/parser/keywords.c,v 1.91 2001/05/07 00:43:23 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -238,6 +238,7 @@ static ScanKeyword ScanKeywords[] = {
    {"some", SOME},
    {"start", START},
    {"statement", STATEMENT},
+   {"statistics", STATISTICS},
    {"stdin", STDIN},
    {"stdout", STDOUT},
    {"substring", SUBSTRING},


diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c

index f5324cb37355532ef4233a335bc40fb5e5eb635e..e1d49842fd2398a3338bf8fb8329c7ca0677a2fe 100644 (file)


--- a/src/backend/parser/parse_relation.c
+++ b/src/backend/parser/parse_relation.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/parser/parse_relation.c,v 1.54 2001/04/18 17:04:24 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/parser/parse_relation.c,v 1.55 2001/05/07 00:43:23 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -75,7 +75,7 @@ static struct
    }
 };
 
-#define SPECIALS ((int) (sizeof(special_attr)/sizeof(special_attr[0])))
+#define SPECIALS ((int) lengthof(special_attr))
 
 
 /*
@@ -670,7 +670,7 @@ isForUpdate(ParseState *pstate, char *relname)
 
                foreach(l, pstate->p_forUpdate)
                {
-                   char       *rname = lfirst(l);
+                   char       *rname = strVal(lfirst(l));
 
                    if (strcmp(relname, rname) == 0)
                        return true;
@@ -1020,20 +1020,6 @@ attnameIsSet(Relation rd, char *name)
 
 #endif
 
-#ifdef NOT_USED
-/*
- * This should only be used if the relation is already
- * heap_open()'ed.  Use the cache version
- * for access to non-opened relations.
- */
-int
-attnumAttNelems(Relation rd, int attid)
-{
-   return rd->rd_att->attrs[attid - 1]->attnelems;
-}
-
-#endif
-
 /* given attribute id, return type of that attribute */
 /*
  * This should only be used if the relation is already


diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c

index ae6cd20a5db3838c76a6f006232a8f04e5d4a800..b616f7e68ef875a0774de3f270c4cf98aa3dcc94 100644 (file)


--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -10,7 +10,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/tcop/utility.c,v 1.109 2001/03/22 06:16:17 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/tcop/utility.c,v 1.110 2001/05/07 00:43:23 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -427,13 +427,19 @@ ProcessUtility(Node *parsetree,
                                        interpretInhOption(stmt->inhOpt),
                                            (ColumnDef *) stmt->def);
                        break;
-                   case 'T':   /* ALTER COLUMN */
-                       AlterTableAlterColumn(stmt->relname,
+                   case 'T':   /* ALTER COLUMN DEFAULT */
+                       AlterTableAlterColumnDefault(stmt->relname,
                                        interpretInhOption(stmt->inhOpt),
-                                             stmt->name,
-                                             stmt->def);
+                                                    stmt->name,
+                                                    stmt->def);
                        break;
-                   case 'D':   /* ALTER DROP */
+                   case 'S':   /* ALTER COLUMN STATISTICS */
+                       AlterTableAlterColumnStatistics(stmt->relname,
+                                       interpretInhOption(stmt->inhOpt),
+                                                       stmt->name,
+                                                       stmt->def);
+                       break;
+                   case 'D':   /* DROP COLUMN */
                        AlterTableDropColumn(stmt->relname,
                                        interpretInhOption(stmt->inhOpt),
                                             stmt->name,
@@ -703,12 +709,13 @@ ProcessUtility(Node *parsetree,
            break;
 
        case T_VacuumStmt:
-           set_ps_display(commandTag = "VACUUM");
+           if (((VacuumStmt *) parsetree)->vacuum)
+               commandTag = "VACUUM";
+           else
+               commandTag = "ANALYZE";
+           set_ps_display(commandTag);
 
-           vacuum(((VacuumStmt *) parsetree)->vacrel,
-                  ((VacuumStmt *) parsetree)->verbose,
-                  ((VacuumStmt *) parsetree)->analyze,
-                  ((VacuumStmt *) parsetree)->va_spec);
+           vacuum((VacuumStmt *) parsetree);
            break;
 
        case T_ExplainStmt:


diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c

index 1fe0afb0a35b44ad34e76fbb73439194a73690ad..41ba82db7b574d6ba6d095a25092376d04702250 100644 (file)


--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -15,7 +15,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.87 2001/03/23 04:49:54 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.88 2001/05/07 00:43:23 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -57,9 +57,6 @@
 /* default selectivity estimate for pattern-match operators such as LIKE */
 #define DEFAULT_MATCH_SEL  0.01
 
-/* "fudge factor" for estimating frequency of not-most-common values */
-#define NOT_MOST_COMMON_RATIO  0.1
-
 static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
                  Datum lobound, Datum hibound, Oid boundstypid,
                  double *scaledlobound, double *scaledhibound);
@@ -75,17 +72,9 @@ static double convert_one_string_to_scalar(unsigned char *value,
 static unsigned char *convert_string_datum(Datum value, Oid typid);
 static double convert_timevalue_to_scalar(Datum value, Oid typid);
 static void getattproperties(Oid relid, AttrNumber attnum,
-                Oid *typid,
-                int *typlen,
-                bool *typbyval,
-                int32 *typmod);
-static bool getattstatistics(Oid relid, AttrNumber attnum,
-                Oid typid, int32 typmod,
-                double *nullfrac,
-                double *commonfrac,
-                Datum *commonval,
-                Datum *loval,
-                Datum *hival);
+                            Oid *typid, int32 *typmod);
+static double get_att_numdistinct(Oid relid, AttrNumber attnum, Oid typid,
+                                 Form_pg_statistic stats);
 static Selectivity prefix_selectivity(char *prefix,
                   Oid relid,
                   AttrNumber attno,
@@ -115,134 +104,173 @@ eqsel(PG_FUNCTION_ARGS)
    AttrNumber  attno = PG_GETARG_INT16(2);
    Datum       value = PG_GETARG_DATUM(3);
    int32       flag = PG_GETARG_INT32(4);
-   float8      result;
-
-   if (NONVALUE(attno) || NONVALUE(relid))
-       result = DEFAULT_EQ_SEL;
-   else
+   Oid         typid;
+   int32       typmod;
+   HeapTuple   statsTuple;
+   Datum      *values;
+   int         nvalues;
+   float4     *numbers;
+   int         nnumbers;
+   double      selec;
+
+   if (NONVALUE(relid) || NONVALUE(attno))
+       PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
+
+   /* get info about the attribute */
+   getattproperties(relid, attno, &typid, &typmod);
+
+   /* get stats for the attribute, if available */
+   statsTuple = SearchSysCache(STATRELATT,
+                               ObjectIdGetDatum(relid),
+                               Int16GetDatum(attno),
+                               0, 0);
+   if (HeapTupleIsValid(statsTuple))
    {
-       Oid         typid;
-       int         typlen;
-       bool        typbyval;
-       int32       typmod;
-       double      nullfrac;
-       double      commonfrac;
-       Datum       commonval;
-       double      selec;
-
-       /* get info about the attribute */
-       getattproperties(relid, attno,
-                        &typid, &typlen, &typbyval, &typmod);
-
-       /* get stats for the attribute, if available */
-       if (getattstatistics(relid, attno, typid, typmod,
-                            &nullfrac, &commonfrac, &commonval,
-                            NULL, NULL))
-       {
-           if (flag & SEL_CONSTANT)
-           {
+       Form_pg_statistic stats;
 
-               /*
-                * Is the constant "=" to the column's most common value?
-                * (Although the operator may not really be "=", we will
-                * assume that seeing whether it returns TRUE for the most
-                * common value is useful information. If you don't like
-                * it, maybe you shouldn't be using eqsel for your
-                * operator...)
-                */
-               RegProcedure eqproc = get_opcode(opid);
-               bool        mostcommon;
+       stats = (Form_pg_statistic) GETSTRUCT(statsTuple);
 
-               if (eqproc == (RegProcedure) NULL)
-                   elog(ERROR, "eqsel: no procedure for operator %u",
-                        opid);
+       if (flag & SEL_CONSTANT)
+       {
+           bool    match = false;
+           int     i;
 
-               /* be careful to apply operator right way 'round */
-               if (flag & SEL_RIGHT)
-                   mostcommon = DatumGetBool(OidFunctionCall2(eqproc,
-                                                              commonval,
-                                                              value));
-               else
-                   mostcommon = DatumGetBool(OidFunctionCall2(eqproc,
-                                                              value,
-                                                            commonval));
+           /*
+            * Is the constant "=" to any of the column's most common
+            * values?  (Although the given operator may not really be
+            * "=", we will assume that seeing whether it returns TRUE
+            * is an appropriate test.  If you don't like this, maybe you
+            * shouldn't be using eqsel for your operator...)
+            */
+           if (get_attstatsslot(statsTuple, typid, typmod,
+                                STATISTIC_KIND_MCV, InvalidOid,
+                                &values, &nvalues,
+                                &numbers, &nnumbers))
+           {
+               FmgrInfo    eqproc;
 
-               if (mostcommon)
-               {
+               fmgr_info(get_opcode(opid), &eqproc);
 
-                   /*
-                    * Constant is "=" to the most common value.  We know
-                    * selectivity exactly (or as exactly as VACUUM could
-                    * calculate it, anyway).
-                    */
-                   selec = commonfrac;
-               }
-               else
+               for (i = 0; i < nvalues; i++)
                {
-
-                   /*
-                    * Comparison is against a constant that is neither
-                    * the most common value nor null.  Its selectivity
-                    * cannot be more than this:
-                    */
-                   selec = 1.0 - commonfrac - nullfrac;
-                   if (selec > commonfrac)
-                       selec = commonfrac;
-
-                   /*
-                    * and in fact it's probably less, so we should apply
-                    * a fudge factor.  The only case where we don't is
-                    * for a boolean column, where indeed we have
-                    * estimated the less-common value's frequency
-                    * exactly!
-                    */
-                   if (typid != BOOLOID)
-                       selec *= NOT_MOST_COMMON_RATIO;
+                   /* be careful to apply operator right way 'round */
+                   if (flag & SEL_RIGHT)
+                       match = DatumGetBool(FunctionCall2(&eqproc,
+                                                          values[i],
+                                                          value));
+                   else
+                       match = DatumGetBool(FunctionCall2(&eqproc,
+                                                          value,
+                                                          values[i]));
+                   if (match)
+                       break;
                }
            }
            else
            {
+               /* no most-common-value info available */
+               values = NULL;
+               numbers = NULL;
+               i = nvalues = nnumbers = 0;
+           }
 
+           if (match)
+           {
+               /*
+                * Constant is "=" to this common value.  We know
+                * selectivity exactly (or as exactly as VACUUM
+                * could calculate it, anyway).
+                */
+               selec = numbers[i];
+           }
+           else
+           {
                /*
-                * Search is for a value that we do not know a priori, but
-                * we will assume it is not NULL.  Selectivity cannot be
-                * more than this:
+                * Comparison is against a constant that is neither
+                * NULL nor any of the common values.  Its selectivity
+                * cannot be more than this:
                 */
-               selec = 1.0 - nullfrac;
-               if (selec > commonfrac)
-                   selec = commonfrac;
+               double  sumcommon = 0.0;
+               double  otherdistinct;
 
+               for (i = 0; i < nnumbers; i++)
+                   sumcommon += numbers[i];
+               selec = 1.0 - sumcommon - stats->stanullfrac;
+               /*
+                * and in fact it's probably a good deal less.
+                * We approximate that all the not-common values
+                * share this remaining fraction equally, so we
+                * divide by the number of other distinct values.
+                */
+               otherdistinct = get_att_numdistinct(relid, attno,
+                                                   typid, stats)
+                   - nnumbers;
+               if (otherdistinct > 1)
+                   selec /= otherdistinct;
                /*
-                * and in fact it's probably less, so apply a fudge
-                * factor.
+                * Another cross-check: selectivity shouldn't be
+                * estimated as more than the least common
+                * "most common value".
                 */
-               selec *= NOT_MOST_COMMON_RATIO;
+               if (nnumbers > 0 && selec > numbers[nnumbers-1])
+                   selec = numbers[nnumbers-1];
            }
 
-           /* result should be in range, but make sure... */
-           if (selec < 0.0)
-               selec = 0.0;
-           else if (selec > 1.0)
-               selec = 1.0;
-
-           if (!typbyval)
-               pfree(DatumGetPointer(commonval));
+           free_attstatsslot(typid, values, nvalues, numbers, nnumbers);
        }
        else
        {
+           double      ndistinct;
 
            /*
-            * No VACUUM ANALYZE stats available, so make a guess using
-            * the dispersion stat (if we have that, which is unlikely for
-            * a normal attribute; but for a system attribute we may be
-            * able to estimate it).
+            * Search is for a value that we do not know a priori, but
+            * we will assume it is not NULL.  Estimate the selectivity
+            * as non-null fraction divided by number of distinct values,
+            * so that we get a result averaged over all possible values
+            * whether common or uncommon.  (Essentially, we are assuming
+            * that the not-yet-known comparison value is equally likely
+            * to be any of the possible values, regardless of their
+            * frequency in the table.  Is that a good idea?)
+            */
+           selec = 1.0 - stats->stanullfrac;
+           ndistinct = get_att_numdistinct(relid, attno, typid, stats);
+           if (ndistinct > 1)
+               selec /= ndistinct;
+           /*
+            * Cross-check: selectivity should never be
+            * estimated as more than the most common value's.
             */
-           selec = get_attdispersion(relid, attno, 0.01);
+           if (get_attstatsslot(statsTuple, typid, typmod,
+                                STATISTIC_KIND_MCV, InvalidOid,
+                                NULL, NULL,
+                                &numbers, &nnumbers))
+           {
+               if (nnumbers > 0 && selec > numbers[0])
+                   selec = numbers[0];
+               free_attstatsslot(typid, NULL, 0, numbers, nnumbers);
+           }
        }
 
-       result = (float8) selec;
+       ReleaseSysCache(statsTuple);
    }
-   PG_RETURN_FLOAT8(result);
+   else
+   {
+       /*
+        * No VACUUM ANALYZE stats available, so make a guess using
+        * estimated number of distinct values and assuming they are
+        * equally common.  (The guess is unlikely to be very good,
+        * but we do know a few special cases.)
+        */
+       selec = 1.0 / get_att_numdistinct(relid, attno, typid, NULL);
+   }
+
+   /* result should be in range, but make sure... */
+   if (selec < 0.0)
+       selec = 0.0;
+   else if (selec > 1.0)
+       selec = 1.0;
+
+   PG_RETURN_FLOAT8((float8) selec);
 }
 
 /*
@@ -301,117 +329,263 @@ scalarltsel(PG_FUNCTION_ARGS)
    AttrNumber  attno = PG_GETARG_INT16(2);
    Datum       value = PG_GETARG_DATUM(3);
    int32       flag = PG_GETARG_INT32(4);
-   float8      result;
+   bool        isgt;
+   HeapTuple   oprTuple;
+   HeapTuple   statsTuple;
+   Form_pg_statistic stats;
+   Oid         contype;
+   FmgrInfo    opproc;
+   Oid         typid;
+   int32       typmod;
+   Datum      *values;
+   int         nvalues;
+   float4     *numbers;
+   int         nnumbers;
+   double      mcv_selec,
+               hist_selec,
+               sumcommon;
+   double      selec;
+   int         i;
+
+   if (NONVALUE(relid) || NONVALUE(attno))
+       PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
+
+   /* Can't do anything useful if no constant to compare against, either */
+   if (!(flag & SEL_CONSTANT))
+       PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
 
-   if (!(flag & SEL_CONSTANT) || NONVALUE(attno) || NONVALUE(relid))
-       result = DEFAULT_INEQ_SEL;
+   /*
+    * Force the constant to be on the right to simplify later logic.
+    * This means that we may be dealing with either "<" or ">" cases.
+    */
+   if (flag & SEL_RIGHT)
+   {
+       /* we have x < const */
+       isgt = false;
+   }
    else
    {
-       HeapTuple   oprtuple;
-       Oid         ltype,
-                   rtype,
-                   contype;
-       Oid         typid;
-       int         typlen;
-       bool        typbyval;
-       int32       typmod;
-       Datum       hival,
-                   loval;
-       double      val,
-                   high,
-                   low,
-                   numerator,
-                   denominator;
-
-       /*
-        * Get left and right datatypes of the operator so we know what
-        * type the constant is.
-        */
-       oprtuple = SearchSysCache(OPEROID,
-                                 ObjectIdGetDatum(opid),
-                                 0, 0, 0);
-       if (!HeapTupleIsValid(oprtuple))
-           elog(ERROR, "scalarltsel: no tuple for operator %u", opid);
-       ltype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprleft;
-       rtype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprright;
-       contype = (flag & SEL_RIGHT) ? rtype : ltype;
-       ReleaseSysCache(oprtuple);
-
-       /* Now get info and stats about the attribute */
-       getattproperties(relid, attno,
-                        &typid, &typlen, &typbyval, &typmod);
-
-       if (!getattstatistics(relid, attno, typid, typmod,
-                             NULL, NULL, NULL,
-                             &loval, &hival))
+       /* we have const < x, commute to make x > const */
+       opid = get_commutator(opid);
+       if (!opid)
        {
-           /* no stats available, so default result */
+           /* Use default selectivity (should we raise an error instead?) */
            PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
        }
+       isgt = true;
+   }
 
-       /* Convert the values to a uniform comparison scale. */
-       if (!convert_to_scalar(value, contype, &val,
-                              loval, hival, typid,
-                              &low, &high))
-       {
+   /*
+    * The constant might not be the same datatype as the column;
+    * look at the operator's input types to find out what it is.
+    * Also set up to be able to call the operator's execution proc.
+    */
+   oprTuple = SearchSysCache(OPEROID,
+                             ObjectIdGetDatum(opid),
+                             0, 0, 0);
+   if (!HeapTupleIsValid(oprTuple))
+       elog(ERROR, "scalarltsel: no tuple for operator %u", opid);
+   contype = ((Form_pg_operator) GETSTRUCT(oprTuple))->oprright;
+   fmgr_info(((Form_pg_operator) GETSTRUCT(oprTuple))->oprcode, &opproc);
+   ReleaseSysCache(oprTuple);
+
+   /* Now get info and stats about the attribute */
+   getattproperties(relid, attno, &typid, &typmod);
+
+   statsTuple = SearchSysCache(STATRELATT,
+                               ObjectIdGetDatum(relid),
+                               Int16GetDatum(attno),
+                               0, 0);
+   if (!HeapTupleIsValid(statsTuple))
+   {
+       /* no stats available, so default result */
+       PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
+   }
+   stats = (Form_pg_statistic) GETSTRUCT(statsTuple);
 
-           /*
-            * Ideally we'd produce an error here, on the grounds that the
-            * given operator shouldn't have scalarltsel registered as its
-            * selectivity func unless we can deal with its operand types.
-            * But currently, all manner of stuff is invoking scalarltsel,
-            * so give a default estimate until that can be fixed.
-            */
-           if (!typbyval)
-           {
-               pfree(DatumGetPointer(hival));
-               pfree(DatumGetPointer(loval));
-           }
-           PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
-       }
+   /*
+    * If we have most-common-values info, add up the fractions of the
+    * MCV entries that satisfy MCV OP CONST.  These fractions contribute
+    * directly to the result selectivity.  Also add up the total fraction
+    * represented by MCV entries.
+    */
+   mcv_selec = 0.0;
+   sumcommon = 0.0;
 
-       /* release temp storage if needed */
-       if (!typbyval)
+   if (get_attstatsslot(statsTuple, typid, typmod,
+                        STATISTIC_KIND_MCV, InvalidOid,
+                        &values, &nvalues,
+                        &numbers, &nnumbers))
+   {
+       for (i = 0; i < nvalues; i++)
        {
-           pfree(DatumGetPointer(hival));
-           pfree(DatumGetPointer(loval));
+           if (DatumGetBool(FunctionCall2(&opproc,
+                                          values[i],
+                                          value)))
+               mcv_selec += numbers[i];
+           sumcommon += numbers[i];
        }
+       free_attstatsslot(typid, values, nvalues, numbers, nnumbers);
+   }
+
+   /*
+    * If there is a histogram, determine which bin the constant falls in,
+    * and compute the resulting contribution to selectivity.
+    *
+    * Someday, VACUUM might store more than one histogram per rel/att,
+    * corresponding to more than one possible sort ordering defined for
+    * the column type.  However, to make that work we will need to figure
+    * out which staop to search for --- it's not necessarily the one we
+    * have at hand!  (For example, we might have a '<=' operator rather
+    * than the '<' operator that will appear in staop.)  For now, assume
+    * that whatever appears in pg_statistic is sorted the same way our
+    * operator sorts.
+    */
+   hist_selec = 0.0;
 
-       if (high <= low)
+   if (get_attstatsslot(statsTuple, typid, typmod,
+                        STATISTIC_KIND_HISTOGRAM, InvalidOid,
+                        &values, &nvalues,
+                        NULL, NULL))
+   {
+       if (nvalues > 1)
        {
+           double  histfrac;
+           bool    ltcmp;
+
+           ltcmp = DatumGetBool(FunctionCall2(&opproc,
+                                              values[0],
+                                              value));
+           if (isgt)
+               ltcmp = !ltcmp;
+           if (!ltcmp)
+           {
+               /* Constant is below lower histogram boundary. */
+               histfrac = 0.0;
+           }
+           else
+           {
+               /*
+                * Scan to find proper location.  This could be made faster
+                * by using a binary-search method, but it's probably not
+                * worth the trouble for typical histogram sizes.
+                */
+               for (i = 1; i < nvalues; i++)
+               {
+                   ltcmp = DatumGetBool(FunctionCall2(&opproc,
+                                                      values[i],
+                                                      value));
+                   if (isgt)
+                       ltcmp = !ltcmp;
+                   if (!ltcmp)
+                       break;
+               }
+               if (i >= nvalues)
+               {
+                   /* Constant is above upper histogram boundary. */
+                   histfrac = 1.0;
+               }
+               else
+               {
+                   double      val,
+                               high,
+                               low;
+                   double      binfrac;
 
+                   /*
+                    * We have values[i-1] < constant < values[i].
+                    *
+                    * Convert the constant and the two nearest bin boundary
+                    * values to a uniform comparison scale, and do a linear
+                    * interpolation within this bin.
+                    */
+                   if (convert_to_scalar(value, contype, &val,
+                                         values[i-1], values[i], typid,
+                                         &low, &high))
+                   {
+                       if (high <= low)
+                       {
+                           /* cope if bin boundaries appear identical */
+                           binfrac = 0.5;
+                       }
+                       else if (val <= low)
+                           binfrac = 0.0;
+                       else if (val >= high)
+                           binfrac = 1.0;
+                       else
+                           binfrac = (val - low) / (high - low);
+                   }
+                   else
+                   {
+                       /*
+                        * Ideally we'd produce an error here, on the grounds
+                        * that the given operator shouldn't have scalarltsel
+                        * registered as its selectivity func unless we can
+                        * deal with its operand types.  But currently, all
+                        * manner of stuff is invoking scalarltsel, so give a
+                        * default estimate until that can be fixed.
+                        */
+                       binfrac = 0.5;
+                   }
+                   /*
+                    * Now, compute the overall selectivity across the values
+                    * represented by the histogram.  We have i-1 full bins
+                    * and binfrac partial bin below the constant.
+                    */
+                   histfrac = (double) (i-1) + binfrac;
+                   histfrac /= (double) (nvalues - 1);
+               }
+           }
            /*
-            * If we trusted the stats fully, we could return a small or
-            * large selec depending on which side of the single data
-            * point the constant is on.  But it seems better to assume
-            * that the stats are wrong and return a default...
+            * Now histfrac = fraction of histogram entries below the constant.
+            *
+            * Account for "<" vs ">"
             */
-           result = DEFAULT_INEQ_SEL;
-       }
-       else if (val < low || val > high)
-       {
-
+           hist_selec = isgt ? (1.0 - histfrac) : histfrac;
            /*
-            * If given value is outside the statistical range, return a
-            * small or large value; but not 0.0/1.0 since there is a
-            * chance the stats are out of date.
+            * The histogram boundaries are only approximate to begin
+            * with, and may well be out of date anyway.  Therefore,
+            * don't believe extremely small or large selectivity
+            * estimates.
             */
-           if (flag & SEL_RIGHT)
-               result = (val < low) ? 0.001 : 0.999;
-           else
-               result = (val < low) ? 0.999 : 0.001;
-       }
-       else
-       {
-           denominator = high - low;
-           if (flag & SEL_RIGHT)
-               numerator = val - low;
-           else
-               numerator = high - val;
-           result = numerator / denominator;
+           if (hist_selec < 0.001)
+               hist_selec = 0.001;
+           else if (hist_selec > 0.999)
+               hist_selec = 0.999;
        }
+
+       free_attstatsslot(typid, values, nvalues, NULL, 0);
    }
-   PG_RETURN_FLOAT8(result);
+
+   /*
+    * Now merge the results from the MCV and histogram calculations,
+    * realizing that the histogram covers only the non-null values that
+    * are not listed in MCV.
+    */
+   selec = 1.0 - stats->stanullfrac - sumcommon;
+
+   if (hist_selec > 0.0)
+       selec *= hist_selec;
+   else
+   {
+       /*
+        * If no histogram but there are values not accounted for by MCV,
+        * arbitrarily assume half of them will match.
+        */
+       selec *= 0.5;
+   }
+
+   selec += mcv_selec;
+
+   ReleaseSysCache(statsTuple);
+
+   /* result should be in range, but make sure... */
+   if (selec < 0.0)
+       selec = 0.0;
+   else if (selec > 1.0)
+       selec = 1.0;
+
+   PG_RETURN_FLOAT8((float8) selec);
 }
 
 /*
@@ -428,34 +602,25 @@ scalargtsel(PG_FUNCTION_ARGS)
    Datum       value = PG_GETARG_DATUM(3);
    int32       flag = PG_GETARG_INT32(4);
    Oid         ltopid;
-   float8      result;
 
    /*
-    * Compute selectivity of "<", then invert --- but only if we were
-    * able to produce a non-default estimate.  Note that we get the
-    * negator which strictly speaking means we are looking at "<=" for
-    * ">" or "<" for ">=".  We assume this won't matter.
+    * Commute so that we have a "<" or "<=" operator, then apply
+    * scalarltsel.
     */
-   ltopid = get_negator(opid);
-   if (ltopid)
-   {
-       result = DatumGetFloat8(DirectFunctionCall5(scalarltsel,
-                                               ObjectIdGetDatum(ltopid),
-                                                ObjectIdGetDatum(relid),
-                                                   Int16GetDatum(attno),
-                                                   value,
-                                                   Int32GetDatum(flag)));
-   }
-   else
+   ltopid = get_commutator(opid);
+   if (!ltopid)
    {
        /* Use default selectivity (should we raise an error instead?) */
-       result = DEFAULT_INEQ_SEL;
+       PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
    }
 
-   if (result != DEFAULT_INEQ_SEL)
-       result = 1.0 - result;
-
-   PG_RETURN_FLOAT8(result);
+   flag ^= SEL_RIGHT;
+   return DirectFunctionCall5(scalarltsel,
+                              ObjectIdGetDatum(ltopid),
+                              ObjectIdGetDatum(relid),
+                              Int16GetDatum(attno),
+                              value,
+                              Int32GetDatum(flag));
 }
 
 /*
@@ -476,7 +641,7 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype)
        result = DEFAULT_MATCH_SEL;
    else
    {
-       HeapTuple   oprtuple;
+       HeapTuple   oprTuple;
        Oid         ltype,
                    rtype;
        char       *patt;
@@ -488,14 +653,14 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype)
         * Get left and right datatypes of the operator so we know what
         * type the attribute is.
         */
-       oprtuple = SearchSysCache(OPEROID,
+       oprTuple = SearchSysCache(OPEROID,
                                  ObjectIdGetDatum(opid),
                                  0, 0, 0);
-       if (!HeapTupleIsValid(oprtuple))
+       if (!HeapTupleIsValid(oprTuple))
            elog(ERROR, "patternsel: no tuple for operator %u", opid);
-       ltype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprleft;
-       rtype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprright;
-       ReleaseSysCache(oprtuple);
+       ltype = ((Form_pg_operator) GETSTRUCT(oprTuple))->oprleft;
+       rtype = ((Form_pg_operator) GETSTRUCT(oprTuple))->oprright;
+       ReleaseSysCache(oprTuple);
 
        /* the right-hand const is type text for all supported operators */
        Assert(rtype == TEXTOID);
@@ -659,42 +824,88 @@ eqjoinsel(PG_FUNCTION_ARGS)
    AttrNumber  attno1 = PG_GETARG_INT16(2);
    Oid         relid2 = PG_GETARG_OID(3);
    AttrNumber  attno2 = PG_GETARG_INT16(4);
-   float8      result;
-   float8      num1,
-               num2,
-               min;
    bool        unknown1 = NONVALUE(relid1) || NONVALUE(attno1);
    bool        unknown2 = NONVALUE(relid2) || NONVALUE(attno2);
+   double      selec;
 
    if (unknown1 && unknown2)
-       result = DEFAULT_EQ_SEL;
+       selec = DEFAULT_EQ_SEL;
    else
    {
-       num1 = unknown1 ? 1.0 : get_attdispersion(relid1, attno1, 0.01);
-       num2 = unknown2 ? 1.0 : get_attdispersion(relid2, attno2, 0.01);
+       Oid         typid1;
+       Oid         typid2;
+       int32       typmod1;
+       int32       typmod2;
+       HeapTuple   statsTuple1 = NULL;
+       HeapTuple   statsTuple2 = NULL;
+       Form_pg_statistic stats1 = NULL;
+       Form_pg_statistic stats2 = NULL;
+       double      nd1,
+                   nd2;
+
+       if (unknown1)
+       {
+           nd1 = 100.0;
+       }
+       else
+       {
+           /* get info about the attribute */
+           getattproperties(relid1, attno1, &typid1, &typmod1);
+
+           /* get stats for the attribute, if available */
+           statsTuple1 = SearchSysCache(STATRELATT,
+                                        ObjectIdGetDatum(relid1),
+                                        Int16GetDatum(attno1),
+                                        0, 0);
+           if (HeapTupleIsValid(statsTuple1))
+               stats1 = (Form_pg_statistic) GETSTRUCT(statsTuple1);
+
+           nd1 = get_att_numdistinct(relid1, attno1, typid1, stats1);
+       }
+
+       if (unknown2)
+       {
+           nd2 = 100.0;
+       }
+       else
+       {
+           /* get info about the attribute */
+           getattproperties(relid2, attno2, &typid2, &typmod2);
+
+           /* get stats for the attribute, if available */
+           statsTuple2 = SearchSysCache(STATRELATT,
+                                        ObjectIdGetDatum(relid2),
+                                        Int16GetDatum(attno2),
+                                        0, 0);
+           if (HeapTupleIsValid(statsTuple2))
+               stats2 = (Form_pg_statistic) GETSTRUCT(statsTuple2);
+
+           nd2 = get_att_numdistinct(relid2, attno2, typid2, stats2);
+       }
 
        /*
-        * The join selectivity cannot be more than num2, since each tuple
-        * in table 1 could match no more than num2 fraction of tuples in
-        * table 2 (and that's only if the table-1 tuple matches the most
-        * common value in table 2, so probably it's less).  By the same
-        * reasoning it is not more than num1. The min is therefore an
-        * upper bound.
+        * Estimate the join selectivity as 1 / sqrt(nd1*nd2)
+        * (can we produce any theory for this)?
         *
-        * If we know the dispersion of only one side, use it; the reasoning
-        * above still works.
+        * XXX possibility to do better: if both attributes have histograms
+        * then we could determine the exact join selectivity between the
+        * MCV sets, and only have to assume the join behavior of the non-MCV
+        * values.  This could be a big win when the MCVs cover a large part
+        * of the population.
         *
-        * XXX can we make a better estimate here?  Using the nullfrac
-        * statistic might be helpful, for example.  Assuming the operator
-        * is strict (does not succeed for null inputs) then the
-        * selectivity couldn't be more than (1-nullfrac1)*(1-nullfrac2),
-        * which might be usefully small if there are many nulls.  How
-        * about applying the operator to the most common values?
+        * XXX what about nulls?
         */
-       min = (num1 < num2) ? num1 : num2;
-       result = min;
+       selec = 1.0 / sqrt(nd1 * nd2);
+       if (selec > 1.0)
+           selec = 1.0;
+
+       if (HeapTupleIsValid(statsTuple1))
+           ReleaseSysCache(statsTuple1);
+       if (HeapTupleIsValid(statsTuple2))
+           ReleaseSysCache(statsTuple2);
+
    }
-   PG_RETURN_FLOAT8(result);
+   PG_RETURN_FLOAT8((float8) selec);
 }
 
 /*
@@ -829,7 +1040,8 @@ icnlikejoinsel(PG_FUNCTION_ARGS)
  *   Returns "true" if successful.
  *
  * All numeric datatypes are simply converted to their equivalent
- * "double" values.
+ * "double" values.  XXX what about NUMERIC values that are outside
+ * the range of "double"?
  *
  * String datatypes are converted by convert_string_to_scalar(),
  * which is explained below.  The reason why this routine deals with
@@ -917,7 +1129,7 @@ convert_numeric_to_scalar(Datum value, Oid typid)
 {
    switch (typid)
    {
-           case BOOLOID:
+       case BOOLOID:
            return (double) DatumGetBool(value);
        case INT2OID:
            return (double) DatumGetInt16(value);
@@ -963,6 +1175,8 @@ convert_numeric_to_scalar(Datum value, Oid typid)
  * three strings before computing the scaled values.  This allows us to
  * "zoom in" when we encounter a narrow data range.  An example is a phone
  * number database where all the values begin with the same area code.
+ * (Actually, the bounds will be adjacent histogram-bin-boundary values,
+ * so this is more likely to happen than you might think.)
  */
 static void
 convert_string_to_scalar(unsigned char *value,
@@ -1208,11 +1422,11 @@ convert_timevalue_to_scalar(Datum value, Oid typid)
 /*
  * getattproperties
  *   Retrieve pg_attribute properties for an attribute,
- *   including type OID, type len, type byval flag, typmod.
+ *   including type OID and typmod.
  */
 static void
 getattproperties(Oid relid, AttrNumber attnum,
-                Oid *typid, int *typlen, bool *typbyval, int32 *typmod)
+                Oid *typid, int32 *typmod)
 {
    HeapTuple   atp;
    Form_pg_attribute att_tup;
@@ -1227,164 +1441,87 @@ getattproperties(Oid relid, AttrNumber attnum,
    att_tup = (Form_pg_attribute) GETSTRUCT(atp);
 
    *typid = att_tup->atttypid;
-   *typlen = att_tup->attlen;
-   *typbyval = att_tup->attbyval;
    *typmod = att_tup->atttypmod;
 
    ReleaseSysCache(atp);
 }
 
 /*
- * getattstatistics
- *   Retrieve the pg_statistic data for an attribute.
- *   Returns 'false' if no stats are available.
+ * get_att_numdistinct
  *
- * Inputs:
- * 'relid' and 'attnum' are the relation and attribute number.
- * 'typid' and 'typmod' are the type and typmod of the column,
- * which the caller must already have looked up.
+ *   Estimate the number of distinct values of an attribute.
  *
- * Outputs:
- * The available stats are nullfrac, commonfrac, commonval, loval, hival.
- * The caller need not retrieve all five --- pass NULL pointers for the
- * unwanted values.
+ * relid, attnum: identify the attribute to examine.
+ * typid: type of attribute.
+ * stats: pg_statistic tuple for attribute, or NULL if not available.
  *
- * commonval, loval, hival are returned as Datums holding the internal
- * representation of the values.  (Note that these should be pfree'd
- * after use if the data type is not by-value.)
+ * XXX possible future improvement: look to see if there is a unique
+ * index on the attribute.  If so, we can estimate ndistinct = ntuples.
+ * This should probably override any info from pg_statistic.
  */
-static bool
-getattstatistics(Oid relid,
-                AttrNumber attnum,
-                Oid typid,
-                int32 typmod,
-                double *nullfrac,
-                double *commonfrac,
-                Datum *commonval,
-                Datum *loval,
-                Datum *hival)
+static double
+get_att_numdistinct(Oid relid, AttrNumber attnum, Oid typid,
+                   Form_pg_statistic stats)
 {
-   HeapTuple   tuple;
-   HeapTuple   typeTuple;
-   FmgrInfo    inputproc;
-   Oid         typelem;
-   bool        isnull;
+   HeapTuple   reltup;
+   double      ntuples;
 
    /*
-    * We assume that there will only be one entry in pg_statistic for the
-    * given rel/att, so we search WITHOUT considering the staop column.
-    * Someday, VACUUM might store more than one entry per rel/att,
-    * corresponding to more than one possible sort ordering defined for
-    * the column type.  However, to make that work we will need to figure
-    * out which staop to search for --- it's not necessarily the one we
-    * have at hand!  (For example, we might have a '>' operator rather
-    * than the '<' operator that will appear in staop.)
+    * Special-case boolean columns: presumably, two distinct values.
+    *
+    * Are there any other cases we should wire in special estimates for?
     */
-   tuple = SearchSysCache(STATRELID,
-                          ObjectIdGetDatum(relid),
-                          Int16GetDatum((int16) attnum),
-                          0, 0);
-   if (!HeapTupleIsValid(tuple))
-   {
-       /* no such stats entry */
-       return false;
-   }
+   if (typid == BOOLOID)
+       return 2.0;
 
-   if (nullfrac)
-       *nullfrac = ((Form_pg_statistic) GETSTRUCT(tuple))->stanullfrac;
-   if (commonfrac)
-       *commonfrac = ((Form_pg_statistic) GETSTRUCT(tuple))->stacommonfrac;
-
-   /* Get the type input proc for the column datatype */
-   typeTuple = SearchSysCache(TYPEOID,
-                              ObjectIdGetDatum(typid),
-                              0, 0, 0);
-   if (!HeapTupleIsValid(typeTuple))
-       elog(ERROR, "getattstatistics: Cache lookup failed for type %u",
-            typid);
-   fmgr_info(((Form_pg_type) GETSTRUCT(typeTuple))->typinput, &inputproc);
-   typelem = ((Form_pg_type) GETSTRUCT(typeTuple))->typelem;
-   ReleaseSysCache(typeTuple);
+   /*
+    * If VACUUM ANALYZE determined a fixed estimate, use it.
+    */
+   if (stats && stats->stadistinct > 0.0)
+       return stats->stadistinct;
 
    /*
-    * Values are variable-length fields, so cannot access as struct
-    * fields. Must do it the hard way with SysCacheGetAttr.
+    * Otherwise we need to get the relation size.
     */
-   if (commonval)
-   {
-       Datum       val = SysCacheGetAttr(STATRELID, tuple,
-                                         Anum_pg_statistic_stacommonval,
-                                         &isnull);
+   reltup = SearchSysCache(RELOID,
+                           ObjectIdGetDatum(relid),
+                           0, 0, 0);
+   if (!HeapTupleIsValid(reltup))
+       elog(ERROR, "get_att_numdistinct: no relation tuple %u", relid);
 
-       if (isnull)
-       {
-           elog(DEBUG, "getattstatistics: stacommonval is null");
-           *commonval = PointerGetDatum(NULL);
-       }
-       else
-       {
-           char       *strval = DatumGetCString(DirectFunctionCall1(textout,
-                                                                  val));
-
-           *commonval = FunctionCall3(&inputproc,
-                                      CStringGetDatum(strval),
-                                      ObjectIdGetDatum(typelem),
-                                      Int32GetDatum(typmod));
-           pfree(strval);
-       }
-   }
+   ntuples = ((Form_pg_class) GETSTRUCT(reltup))->reltuples;
 
-   if (loval)
-   {
-       Datum       val = SysCacheGetAttr(STATRELID, tuple,
-                                         Anum_pg_statistic_staloval,
-                                         &isnull);
+   ReleaseSysCache(reltup);
 
-       if (isnull)
-       {
-           elog(DEBUG, "getattstatistics: staloval is null");
-           *loval = PointerGetDatum(NULL);
-       }
-       else
-       {
-           char       *strval = DatumGetCString(DirectFunctionCall1(textout,
-                                                                  val));
-
-           *loval = FunctionCall3(&inputproc,
-                                  CStringGetDatum(strval),
-                                  ObjectIdGetDatum(typelem),
-                                  Int32GetDatum(typmod));
-           pfree(strval);
-       }
-   }
+   if (ntuples <= 0.0)
+       return 100.0;           /* no data available; return a default */
 
-   if (hival)
-   {
-       Datum       val = SysCacheGetAttr(STATRELID, tuple,
-                                         Anum_pg_statistic_stahival,
-                                         &isnull);
+   /*
+    * If VACUUM ANALYZE determined a scaled estimate, use it.
+    */
+   if (stats && stats->stadistinct < 0.0)
+       return - stats->stadistinct * ntuples;
 
-       if (isnull)
-       {
-           elog(DEBUG, "getattstatistics: stahival is null");
-           *hival = PointerGetDatum(NULL);
-       }
-       else
-       {
-           char       *strval = DatumGetCString(DirectFunctionCall1(textout,
-                                                                  val));
-
-           *hival = FunctionCall3(&inputproc,
-                                  CStringGetDatum(strval),
-                                  ObjectIdGetDatum(typelem),
-                                  Int32GetDatum(typmod));
-           pfree(strval);
-       }
+   /*
+    * VACUUM ANALYZE does not compute stats for system attributes,
+    * but some of them can reasonably be assumed unique anyway.
+    */
+   switch (attnum)
+   {
+       case ObjectIdAttributeNumber:
+       case SelfItemPointerAttributeNumber:
+           return ntuples;
+       case TableOidAttributeNumber:
+           return 1.0;
    }
 
-   ReleaseSysCache(tuple);
+   /*
+    * Estimate ndistinct = ntuples if the table is small, else 100.
+    */
+   if (ntuples < 100.0)
+       return ntuples;
 
-   return true;
+   return 100.0;
 }
 
 /*-------------------------------------------------------------------------


diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c

index 82d55866215aac34724aa44deb029feea9d94a76..3995de5d7a1325085c901b0d2427cbbd775170ee 100644 (file)


--- a/src/backend/utils/cache/lsyscache.c
+++ b/src/backend/utils/cache/lsyscache.c
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/utils/cache/lsyscache.c,v 1.52 2001/03/23 04:49:55 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/utils/cache/lsyscache.c,v 1.53 2001/05/07 00:43:24 tgl Exp $
  *
  * NOTES
  *   Eventually, the index information should go through here, too.
@@ -18,7 +18,10 @@
 #include "access/tupmacs.h"
 #include "catalog/pg_operator.h"
 #include "catalog/pg_proc.h"
+#include "catalog/pg_statistic.h"
 #include "catalog/pg_type.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
 #include "utils/lsyscache.h"
 #include "utils/syscache.h"
 
@@ -182,106 +185,6 @@ get_atttypmod(Oid relid, AttrNumber attnum)
        return -1;
 }
 
-/*
- * get_attdispersion
- *
- *   Retrieve the dispersion statistic for an attribute,
- *   or produce an estimate if no info is available.
- *
- * min_estimate is the minimum estimate to return if insufficient data
- * is available to produce a reliable value.  This value may vary
- * depending on context.  (For example, when deciding whether it is
- * safe to use a hashjoin, we want to be more conservative than when
- * estimating the number of tuples produced by an equijoin.)
- */
-double
-get_attdispersion(Oid relid, AttrNumber attnum, double min_estimate)
-{
-   HeapTuple   atp;
-   Form_pg_attribute att_tup;
-   double      dispersion;
-   Oid         atttypid;
-   int32       ntuples;
-
-   atp = SearchSysCache(ATTNUM,
-                        ObjectIdGetDatum(relid),
-                        Int16GetDatum(attnum),
-                        0, 0);
-   if (!HeapTupleIsValid(atp))
-   {
-       /* this should not happen */
-       elog(ERROR, "get_attdispersion: no attribute tuple %u %d",
-            relid, attnum);
-       return min_estimate;
-   }
-
-   att_tup = (Form_pg_attribute) GETSTRUCT(atp);
-
-   dispersion = att_tup->attdispersion;
-   atttypid = att_tup->atttypid;
-
-   ReleaseSysCache(atp);
-
-   if (dispersion > 0.0)
-       return dispersion;      /* we have a specific estimate from VACUUM */
-
-   /*
-    * Special-case boolean columns: the dispersion of a boolean is highly
-    * unlikely to be anywhere near 1/numtuples, instead it's probably
-    * more like 0.5.
-    *
-    * Are there any other cases we should wire in special estimates for?
-    */
-   if (atttypid == BOOLOID)
-       return 0.5;
-
-   /*
-    * Dispersion is either 0 (no data available) or -1 (dispersion is
-    * 1/numtuples).  Either way, we need the relation size.
-    */
-
-   atp = SearchSysCache(RELOID,
-                        ObjectIdGetDatum(relid),
-                        0, 0, 0);
-   if (!HeapTupleIsValid(atp))
-   {
-       /* this should not happen */
-       elog(ERROR, "get_attdispersion: no relation tuple %u", relid);
-       return min_estimate;
-   }
-
-   ntuples = ((Form_pg_class) GETSTRUCT(atp))->reltuples;
-
-   ReleaseSysCache(atp);
-
-   if (ntuples == 0)
-       return min_estimate;    /* no data available */
-
-   if (dispersion < 0.0)       /* VACUUM thinks there are no duplicates */
-       return 1.0 / (double) ntuples;
-
-   /*
-    * VACUUM ANALYZE does not compute dispersion for system attributes,
-    * but some of them can reasonably be assumed unique anyway.
-    */
-   if (attnum == ObjectIdAttributeNumber ||
-       attnum == SelfItemPointerAttributeNumber)
-       return 1.0 / (double) ntuples;
-   if (attnum == TableOidAttributeNumber)
-       return 1.0;
-
-   /*
-    * VACUUM ANALYZE has not been run for this table. Produce an estimate
-    * of 1/numtuples.  This may produce unreasonably small estimates for
-    * large tables, so limit the estimate to no less than min_estimate.
-    */
-   dispersion = 1.0 / (double) ntuples;
-   if (dispersion < min_estimate)
-       dispersion = min_estimate;
-
-   return dispersion;
-}
-
 /*             ---------- INDEX CACHE ----------                        */
 
 /*     watch this space...
@@ -876,3 +779,157 @@ get_typtype(Oid typid)
 }
 
 #endif
+
+/*             ---------- STATISTICS CACHE ----------                   */
+
+/*
+ * get_attstatsslot
+ *
+ *     Extract the contents of a "slot" of a pg_statistic tuple.
+ *     Returns TRUE if requested slot type was found, else FALSE.
+ *
+ * Unlike other routines in this file, this takes a pointer to an
+ * already-looked-up tuple in the pg_statistic cache.  We do this since
+ * most callers will want to extract more than one value from the cache
+ * entry, and we don't want to repeat the cache lookup unnecessarily.
+ *
+ * statstuple: pg_statistics tuple to be examined.
+ * atttype: type OID of attribute.
+ * atttypmod: typmod of attribute.
+ * reqkind: STAKIND code for desired statistics slot kind.
+ * reqop: STAOP value wanted, or InvalidOid if don't care.
+ * values, nvalues: if not NULL, the slot's stavalues are extracted.
+ * numbers, nnumbers: if not NULL, the slot's stanumbers are extracted.
+ *
+ * If assigned, values and numbers are set to point to palloc'd arrays.
+ * If the attribute type is pass-by-reference, the values referenced by
+ * the values array are themselves palloc'd.  The palloc'd stuff can be
+ * freed by calling free_attstatsslot.
+ */
+bool
+get_attstatsslot(HeapTuple statstuple,
+                Oid atttype, int32 atttypmod,
+                int reqkind, Oid reqop,
+                Datum **values, int *nvalues,
+                float4 **numbers, int *nnumbers)
+{
+   Form_pg_statistic stats = (Form_pg_statistic) GETSTRUCT(statstuple);
+   int         i,
+               j;
+   Datum       val;
+   bool        isnull;
+   ArrayType  *statarray;
+   int         narrayelem;
+   HeapTuple   typeTuple;
+   FmgrInfo    inputproc;
+   Oid         typelem;
+
+   for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
+   {
+       if ((&stats->stakind1)[i] == reqkind &&
+           (reqop == InvalidOid || (&stats->staop1)[i] == reqop))
+           break;
+   }
+   if (i >= STATISTIC_NUM_SLOTS)
+       return false;           /* not there */
+
+   if (values)
+   {
+       val = SysCacheGetAttr(STATRELATT, statstuple,
+                             Anum_pg_statistic_stavalues1 + i,
+                             &isnull);
+       if (isnull)
+           elog(ERROR, "get_attstatsslot: stavalues is null");
+       statarray = DatumGetArrayTypeP(val);
+       /*
+        * Do initial examination of the array.  This produces a list
+        * of text Datums --- ie, pointers into the text array value.
+        */
+       deconstruct_array(statarray, false, -1, 'i', values, nvalues);
+       narrayelem = *nvalues;
+       /*
+        * We now need to replace each text Datum by its internal equivalent.
+        *
+        * Get the type input proc and typelem for the column datatype.
+        */
+       typeTuple = SearchSysCache(TYPEOID,
+                                  ObjectIdGetDatum(atttype),
+                                  0, 0, 0);
+       if (!HeapTupleIsValid(typeTuple))
+           elog(ERROR, "get_attstatsslot: Cache lookup failed for type %u",
+                atttype);
+       fmgr_info(((Form_pg_type) GETSTRUCT(typeTuple))->typinput, &inputproc);
+       typelem = ((Form_pg_type) GETSTRUCT(typeTuple))->typelem;
+       ReleaseSysCache(typeTuple);
+       /*
+        * Do the conversions.  The palloc'd array of Datums is reused
+        * in place.
+        */
+       for (j = 0; j < narrayelem; j++)
+       {
+           char       *strval;
+
+           strval = DatumGetCString(DirectFunctionCall1(textout,
+                                                        (*values)[j]));
+           (*values)[j] = FunctionCall3(&inputproc,
+                                        CStringGetDatum(strval),
+                                        ObjectIdGetDatum(typelem),
+                                        Int32GetDatum(atttypmod));
+           pfree(strval);
+       }
+       /*
+        * Free statarray if it's a detoasted copy.
+        */
+       if ((Pointer) statarray != DatumGetPointer(val))
+           pfree(statarray);
+   }
+
+   if (numbers)
+   {
+       val = SysCacheGetAttr(STATRELATT, statstuple,
+                             Anum_pg_statistic_stanumbers1 + i,
+                             &isnull);
+       if (isnull)
+           elog(ERROR, "get_attstatsslot: stanumbers is null");
+       statarray = DatumGetArrayTypeP(val);
+       /*
+        * We expect the array to be a 1-D float4 array; verify that.
+        * We don't need to use deconstruct_array() since the array
+        * data is just going to look like a C array of float4 values.
+        */
+       narrayelem = ARR_DIMS(statarray)[0];
+       if (ARR_NDIM(statarray) != 1 || narrayelem <= 0 ||
+           ARR_SIZE(statarray) != (ARR_OVERHEAD(1) + narrayelem * sizeof(float4)))
+           elog(ERROR, "get_attstatsslot: stanumbers is bogus");
+       *numbers = (float4 *) palloc(narrayelem * sizeof(float4));
+       memcpy(*numbers, ARR_DATA_PTR(statarray), narrayelem * sizeof(float4));
+       *nnumbers = narrayelem;
+       /*
+        * Free statarray if it's a detoasted copy.
+        */
+       if ((Pointer) statarray != DatumGetPointer(val))
+           pfree(statarray);
+   }
+
+   return true;
+}
+
+void
+free_attstatsslot(Oid atttype,
+                 Datum *values, int nvalues,
+                 float4 *numbers, int nnumbers)
+{
+   if (values)
+   {
+       if (! get_typbyval(atttype))
+       {
+           int     i;
+
+           for (i = 0; i < nvalues; i++)
+               pfree(DatumGetPointer(values[i]));
+       }
+       pfree(values);
+   }
+   if (numbers)
+       pfree(numbers);
+}


diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c

index 75ef3179202695a3fb7a5336b7bc4f3e24d3f3f5..4e35b3fb35ba67aa78d337e6bdb39149c6256f8c 100644 (file)


--- a/src/backend/utils/cache/syscache.c
+++ b/src/backend/utils/cache/syscache.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/utils/cache/syscache.c,v 1.60 2001/03/22 03:59:57 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/utils/cache/syscache.c,v 1.61 2001/05/07 00:43:24 tgl Exp $
  *
  * NOTES
  *   These routines allow the parser/planner/executor to perform
@@ -313,7 +313,7 @@ static struct cachedesc cacheinfo[] = {
            0,
            0
    }},
-   {StatisticRelationName,     /* STATRELID */
+   {StatisticRelationName,     /* STATRELATT */
        StatisticRelidAttnumIndex,
        2,
        {


diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c

index d27bfb29668711e985f1ba29bd1285ab77201bf2..5a77c47c20085f0d24ae5b8edb6ef2ca70acdc27 100644 (file)


--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -78,7 +78,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/utils/sort/tuplesort.c,v 1.15 2001/03/23 04:49:55 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/utils/sort/tuplesort.c,v 1.16 2001/05/07 00:43:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -87,7 +87,11 @@
 
 #include "access/heapam.h"
 #include "access/nbtree.h"
+#include "catalog/catname.h"
+#include "catalog/pg_amop.h"
+#include "catalog/pg_amproc.h"
 #include "miscadmin.h"
+#include "utils/fmgroids.h"
 #include "utils/logtape.h"
 #include "utils/lsyscache.h"
 #include "utils/tuplesort.h"
@@ -263,6 +267,7 @@ struct Tuplesortstate
    TupleDesc   tupDesc;
    int         nKeys;
    ScanKey     scanKeys;
+   SortFunctionKind *sortFnKinds;
 
    /*
     * These variables are specific to the IndexTuple case; they are set
@@ -279,6 +284,7 @@ struct Tuplesortstate
    Oid         datumType;
    Oid         sortOperator;
    FmgrInfo    sortOpFn;       /* cached lookup data for sortOperator */
+   SortFunctionKind sortFnKind;
    /* we need typelen and byval in order to know how to copy the Datums. */
    int         datumTypeLen;
    bool        datumTypeByVal;
@@ -458,14 +464,14 @@ tuplesort_begin_common(bool randomAccess)
 
 Tuplesortstate *
 tuplesort_begin_heap(TupleDesc tupDesc,
-                    int nkeys, ScanKey keys,
+                    int nkeys,
+                    Oid *sortOperators, AttrNumber *attNums,
                     bool randomAccess)
 {
    Tuplesortstate *state = tuplesort_begin_common(randomAccess);
+   int         i;
 
-   AssertArg(nkeys >= 1);
-   AssertArg(keys[0].sk_attno != 0);
-   AssertArg(keys[0].sk_procedure != 0);
+   AssertArg(nkeys > 0);
 
    state->comparetup = comparetup_heap;
    state->copytup = copytup_heap;
@@ -475,7 +481,29 @@ tuplesort_begin_heap(TupleDesc tupDesc,
 
    state->tupDesc = tupDesc;
    state->nKeys = nkeys;
-   state->scanKeys = keys;
+   state->scanKeys = (ScanKey) palloc(nkeys * sizeof(ScanKeyData));
+   MemSet(state->scanKeys, 0, nkeys * sizeof(ScanKeyData));
+   state->sortFnKinds = (SortFunctionKind *)
+       palloc(nkeys * sizeof(SortFunctionKind));
+   MemSet(state->sortFnKinds, 0, nkeys * sizeof(SortFunctionKind));
+
+   for (i = 0; i < nkeys; i++)
+   {
+       RegProcedure sortFunction;
+
+       AssertArg(sortOperators[i] != 0);
+       AssertArg(attNums[i] != 0);
+
+       /* select a function that implements the sort operator */
+       SelectSortFunction(sortOperators[i], &sortFunction,
+                          &state->sortFnKinds[i]);
+
+       ScanKeyEntryInitialize(&state->scanKeys[i],
+                              0x0,
+                              attNums[i],
+                              sortFunction,
+                              (Datum) 0);
+   }
 
    return state;
 }
@@ -507,6 +535,7 @@ tuplesort_begin_datum(Oid datumType,
                      bool randomAccess)
 {
    Tuplesortstate *state = tuplesort_begin_common(randomAccess);
+   RegProcedure sortFunction;
    int16       typlen;
    bool        typbyval;
 
@@ -518,8 +547,12 @@ tuplesort_begin_datum(Oid datumType,
 
    state->datumType = datumType;
    state->sortOperator = sortOperator;
-   /* lookup the function that implements the sort operator */
-   fmgr_info(get_opcode(sortOperator), &state->sortOpFn);
+
+   /* select a function that implements the sort operator */
+   SelectSortFunction(sortOperator, &sortFunction, &state->sortFnKind);
+   /* and look up the function */
+   fmgr_info(sortFunction, &state->sortOpFn);
+
    /* lookup necessary attributes of the datum type */
    get_typlenbyval(datumType, &typlen, &typbyval);
    state->datumTypeLen = typlen;
@@ -548,6 +581,13 @@ tuplesort_end(Tuplesortstate *state)
    }
    if (state->memtupindex)
        pfree(state->memtupindex);
+
+   /* this stuff might better belong in a variant-specific shutdown routine */
+   if (state->scanKeys)
+       pfree(state->scanKeys);
+   if (state->sortFnKinds)
+       pfree(state->sortFnKinds);
+
    pfree(state);
 }
 
@@ -1692,6 +1732,7 @@ comparetup_heap(Tuplesortstate *state, const void *a, const void *b)
    for (nkey = 0; nkey < state->nKeys; nkey++)
    {
        ScanKey     scanKey = state->scanKeys + nkey;
+       SortFunctionKind fnKind = state->sortFnKinds[nkey];
        AttrNumber  attno = scanKey->sk_attno;
        Datum       lattr,
                    rattr;
@@ -1708,23 +1749,36 @@ comparetup_heap(Tuplesortstate *state, const void *a, const void *b)
        }
        else if (isnull2)
            return -1;
-       else if (scanKey->sk_flags & SK_COMMUTE)
-       {
-           if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
-                                          rattr, lattr)))
-               return -1;      /* a < b after commute */
-           if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
-                                          lattr, rattr)))
-               return 1;       /* a > b after commute */
-       }
        else
        {
-           if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
-                                          lattr, rattr)))
-               return -1;      /* a < b */
-           if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
-                                          rattr, lattr)))
-               return 1;       /* a > b */
+           int32       compare;
+
+           if (fnKind == SORTFUNC_LT)
+           {
+               if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
+                                              lattr, rattr)))
+                   compare = -1;   /* a < b */
+               else if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
+                                                   rattr, lattr)))
+                   compare = 1;    /* a > b */
+               else
+                   compare = 0;
+           }
+           else
+           {
+               /* sort function is CMP or REVCMP */
+               compare = DatumGetInt32(FunctionCall2(&scanKey->sk_func,
+                                                     lattr, rattr));
+               if (fnKind == SORTFUNC_REVCMP)
+                   compare = -compare;
+           }
+
+           if (compare != 0)
+           {
+               if (scanKey->sk_flags & SK_COMMUTE)
+                   compare = -compare;
+               return compare;
+           }
        }
    }
 
@@ -1852,8 +1906,10 @@ comparetup_index(Tuplesortstate *state, const void *a, const void *b)
        }
        else
        {
+           /* the comparison function is always of CMP type */
            compare = DatumGetInt32(FunctionCall2(&entry->sk_func,
-                                               attrDatum1, attrDatum2));
+                                                 attrDatum1,
+                                                 attrDatum2));
        }
 
        if (compare != 0)
@@ -1954,7 +2010,7 @@ comparetup_datum(Tuplesortstate *state, const void *a, const void *b)
    }
    else if (rtup->isNull)
        return -1;
-   else
+   else if (state->sortFnKind == SORTFUNC_LT)
    {
        if (DatumGetBool(FunctionCall2(&state->sortOpFn,
                                       ltup->val, rtup->val)))
@@ -1964,6 +2020,17 @@ comparetup_datum(Tuplesortstate *state, const void *a, const void *b)
            return 1;           /* a > b */
        return 0;
    }
+   else
+   {
+       /* sort function is CMP or REVCMP */
+       int32   compare;
+
+       compare = DatumGetInt32(FunctionCall2(&state->sortOpFn,
+                                             ltup->val, rtup->val));
+       if (state->sortFnKind == SORTFUNC_REVCMP)
+           compare = -compare;
+       return compare;
+   }
 }
 
 static void *
@@ -2032,3 +2099,119 @@ tuplesize_datum(Tuplesortstate *state, void *tup)
        return (unsigned int) tuplelen;
    }
 }
+
+
+/*
+ * This routine selects an appropriate sorting function to implement
+ * a sort operator as efficiently as possible.  The straightforward
+ * method is to use the operator's implementation proc --- ie, "<"
+ * comparison.  However, that way often requires two calls of the function
+ * per comparison.  If we can find a btree three-way comparator function
+ * associated with the operator, we can use it to do the comparisons
+ * more efficiently.  We also support the possibility that the operator
+ * is ">" (descending sort), in which case we have to reverse the output
+ * of the btree comparator.
+ *
+ * Possibly this should live somewhere else (backend/catalog/, maybe?).
+ */
+void
+SelectSortFunction(Oid sortOperator,
+                  RegProcedure *sortFunction,
+                  SortFunctionKind *kind)
+{
+   Relation    relation;
+   HeapScanDesc scan;
+   ScanKeyData skey[3];
+   HeapTuple   tuple;
+   Oid         opclass = InvalidOid;
+
+   /*
+    * Scan pg_amop to see if the target operator is registered as the
+    * "<" or ">" operator of any btree opclass.  It's possible that it
+    * might be registered both ways (eg, if someone were to build a
+    * "reverse sort" opclass for some reason); prefer the "<" case if so.
+    * If the operator is registered the same way in multiple opclasses,
+    * assume we can use the associated comparator function from any one.
+    */
+   relation = heap_openr(AccessMethodOperatorRelationName,
+                         AccessShareLock);
+
+   ScanKeyEntryInitialize(&skey[0], 0,
+                          Anum_pg_amop_amopid,
+                          F_OIDEQ,
+                          ObjectIdGetDatum(BTREE_AM_OID));
+
+   ScanKeyEntryInitialize(&skey[1], 0,
+                          Anum_pg_amop_amopopr,
+                          F_OIDEQ,
+                          ObjectIdGetDatum(sortOperator));
+
+   scan = heap_beginscan(relation, false, SnapshotNow, 2, skey);
+
+   while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
+   {
+       Form_pg_amop aform = (Form_pg_amop) GETSTRUCT(tuple);
+
+       if (aform->amopstrategy == BTLessStrategyNumber)
+       {
+           opclass = aform->amopclaid;
+           *kind = SORTFUNC_CMP;
+           break;              /* done looking */
+       }
+       else if (aform->amopstrategy == BTGreaterStrategyNumber)
+       {
+           opclass = aform->amopclaid;
+           *kind = SORTFUNC_REVCMP;
+           /* keep scanning in hopes of finding a BTLess entry */
+       }
+   }
+
+   heap_endscan(scan);
+   heap_close(relation, AccessShareLock);
+
+   if (OidIsValid(opclass))
+   {
+       /* Found a suitable opclass, get its comparator support function */
+       relation = heap_openr(AccessMethodProcedureRelationName,
+                             AccessShareLock);
+
+       ScanKeyEntryInitialize(&skey[0], 0,
+                              Anum_pg_amproc_amid,
+                              F_OIDEQ,
+                              ObjectIdGetDatum(BTREE_AM_OID));
+
+       ScanKeyEntryInitialize(&skey[1], 0,
+                              Anum_pg_amproc_amopclaid,
+                              F_OIDEQ,
+                              ObjectIdGetDatum(opclass));
+
+       ScanKeyEntryInitialize(&skey[2], 0,
+                              Anum_pg_amproc_amprocnum,
+                              F_INT2EQ,
+                              Int16GetDatum(BTORDER_PROC));
+
+       scan = heap_beginscan(relation, false, SnapshotNow, 3, skey);
+
+       *sortFunction = InvalidOid;
+
+       if (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
+       {
+           Form_pg_amproc aform = (Form_pg_amproc) GETSTRUCT(tuple);
+           *sortFunction = aform->amproc;
+       }
+
+       heap_endscan(scan);
+       heap_close(relation, AccessShareLock);
+
+       if (RegProcedureIsValid(*sortFunction))
+           return;
+   }
+
+   /* Can't find a comparator, so use the operator as-is */
+
+   *kind = SORTFUNC_LT;
+   *sortFunction = get_opcode(sortOperator);
+   if (!RegProcedureIsValid(*sortFunction))
+       elog(ERROR, "SelectSortFunction: operator %u has no implementation",
+            sortOperator);
+}


diff --git a/src/include/access/tuptoaster.h b/src/include/access/tuptoaster.h

index 759ab3d39e2494de4fa021c8070ac7e5da62d283..6e38529204dabaab44c078c0af05a6687fd0d966 100644 (file)


--- a/src/include/access/tuptoaster.h
+++ b/src/include/access/tuptoaster.h
@@ -6,15 +6,13 @@
  *
  * Copyright (c) 2000, PostgreSQL Development Team
  *
- * $Id: tuptoaster.h,v 1.10 2001/03/22 04:00:32 momjian Exp $
+ * $Id: tuptoaster.h,v 1.11 2001/05/07 00:43:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #ifndef TUPTOASTER_H
 #define TUPTOASTER_H
 
-#ifdef TUPLE_TOASTER_ACTIVE
-
 #include "access/heapam.h"
 #include "access/htup.h"
 #include "access/tupmacs.h"
@@ -109,7 +107,13 @@ extern varattrib *heap_tuple_untoast_attr(varattrib *attr);
  */
 extern Datum toast_compress_datum(Datum value);
 
-#endif  /* TUPLE_TOASTER_ACTIVE */
+/* ----------
+ * toast_raw_datum_size -
+ *
+ * Return the raw (detoasted) size of a varlena datum
+ * ----------
+ */
+extern Size toast_raw_datum_size(Datum value);
 
 
 #endif  /* TUPTOASTER_H */


diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h

index 963b11c1d386ed6df175ad0e2e92cfe2929af774..832f91fb09f172d5ffc3d31aba10fccd5431c783 100644 (file)


--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -37,7 +37,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: catversion.h,v 1.70 2001/03/22 04:00:35 momjian Exp $
+ * $Id: catversion.h,v 1.71 2001/05/07 00:43:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -53,6 +53,6 @@
  */
 
 /*                         yyyymmddN */
-#define CATALOG_VERSION_NO 200101061
+#define CATALOG_VERSION_NO 200105051
 
 #endif


diff --git a/src/include/catalog/heap.h b/src/include/catalog/heap.h

index a7248f6c6dc4fb438d27b29fe250c446534ad228..7ab04b05fb25b1dd765830e90ec1b717c6e2814e 100644 (file)


--- a/src/include/catalog/heap.h
+++ b/src/include/catalog/heap.h
@@ -7,13 +7,14 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: heap.h,v 1.34 2001/03/22 04:00:35 momjian Exp $
+ * $Id: heap.h,v 1.35 2001/05/07 00:43:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #ifndef HEAP_H
 #define HEAP_H
 
+#include "catalog/pg_attribute.h"
 #include "utils/rel.h"
 
 typedef struct RawColumnDefault
@@ -44,4 +45,6 @@ extern void AddRelationRawConstraints(Relation rel,
                          List *rawColDefaults,
                          List *rawConstraints);
 
+extern Form_pg_attribute SystemAttributeDefinition(AttrNumber attno);
+
 #endif  /* HEAP_H */


diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h

index 1dac0bb1c31596e1b31e5fc2d82f20835ed7879b..07aaad61c798bc295723dfe80cded8dbc848d6c9 100644 (file)


--- a/src/include/catalog/index.h
+++ b/src/include/catalog/index.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: index.h,v 1.33 2001/03/22 04:00:35 momjian Exp $
+ * $Id: index.h,v 1.34 2001/05/07 00:43:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -46,7 +46,7 @@ extern void FormIndexDatum(IndexInfo *indexInfo,
               Datum *datum,
               char *nullv);
 
-extern void UpdateStats(Oid relid, long reltuples);
+extern void UpdateStats(Oid relid, double reltuples);
 extern bool IndexesAreActive(Oid relid, bool comfirmCommitted);
 extern void setRelhasindex(Oid relid, bool hasindex);
 


diff --git a/src/include/catalog/indexing.h b/src/include/catalog/indexing.h

index 41a580a37779abae1d46c2b8422b8ece0fbebc2b..cc155cf1bbb314f4cb54a41c23a3a2ed5e1fd5d8 100644 (file)


--- a/src/include/catalog/indexing.h
+++ b/src/include/catalog/indexing.h
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: indexing.h,v 1.48 2001/03/22 04:00:36 momjian Exp $
+ * $Id: indexing.h,v 1.49 2001/05/07 00:43:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -171,7 +171,7 @@ DECLARE_UNIQUE_INDEX(pg_rewrite_rulename_index on pg_rewrite using btree(rulenam
 xDECLARE_UNIQUE_INDEX(pg_shadow_name_index on pg_shadow using btree(usename name_ops));
 xDECLARE_UNIQUE_INDEX(pg_shadow_sysid_index on pg_shadow using btree(usesysid int4_ops));
 */
-DECLARE_INDEX(pg_statistic_relid_att_index on pg_statistic using btree(starelid oid_ops, staattnum int2_ops));
+DECLARE_UNIQUE_INDEX(pg_statistic_relid_att_index on pg_statistic using btree(starelid oid_ops, staattnum int2_ops));
 DECLARE_INDEX(pg_trigger_tgconstrname_index on pg_trigger using btree(tgconstrname name_ops));
 DECLARE_INDEX(pg_trigger_tgconstrrelid_index on pg_trigger using btree(tgconstrrelid oid_ops));
 DECLARE_INDEX(pg_trigger_tgrelid_index on pg_trigger using btree(tgrelid oid_ops));


diff --git a/src/include/catalog/pg_attribute.h b/src/include/catalog/pg_attribute.h

index 58724e94dc966cef18b5345521cafa985a4dbf1e..6e11aa6d530707371c7b5b0f5af4e4174c4919f5 100644 (file)


--- a/src/include/catalog/pg_attribute.h
+++ b/src/include/catalog/pg_attribute.h
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pg_attribute.h,v 1.70 2001/03/22 04:00:37 momjian Exp $
+ * $Id: pg_attribute.h,v 1.71 2001/05/07 00:43:24 tgl Exp $
  *
  * NOTES
  *   the genbki.sh script reads this file and generates .bki
@@ -36,15 +36,14 @@
  *     typedef struct FormData_pg_attribute
  *
  *     If you change the following, make sure you change the structs for
- *     system attributes in heap.c and index.c also.
+ *     system attributes in catalog/heap.c also.
  * ----------------
  */
 CATALOG(pg_attribute) BOOTSTRAP
 {
    Oid         attrelid;       /* OID of relation containing this
                                 * attribute */
-   NameData    attname;
-   Oid         atttypid;
+   NameData    attname;        /* name of attribute */
 
    /*
     * atttypid is the OID of the instance in Catalog Class pg_type that
@@ -53,30 +52,20 @@ CATALOG(pg_attribute) BOOTSTRAP
     * attalign attributes of this instance, so they had better match or
     * Postgres will fail.
     */
-
-   float4      attdispersion;
+   Oid         atttypid;
 
    /*
-    * attdispersion is the dispersion statistic of the column (0.0 to
-    * 1.0), or zero if the statistic has not been calculated, or -1.0 if
-    * VACUUM found that the column contains no duplicate entries (in
-    * which case the dispersion should be taken as 1.0/numberOfRows for
-    * the current table size).  The -1.0 hack is useful because the
-    * number of rows may be updated more often than attdispersion is. We
-    * assume that the column will retain its no-duplicate-entry property.
-    * (Perhaps this should be driven off the existence of a UNIQUE index
-    * for the column, instead of being a statistical guess?)
+    * attstattarget is the target number of statistics datapoints to collect
+    * during VACUUM ANALYZE of this column.  A zero here indicates that we
+    * do not wish to collect any stats about this column.
     */
-
-   int2        attlen;
+   int4        attstattarget;
 
    /*
     * attlen is a copy of the typlen field from pg_type for this
-    * attribute.  See atttypid above.  See struct Form_pg_type for
-    * definition.
+    * attribute.  See atttypid comments above.
     */
-
-   int2        attnum;
+   int2        attlen;
 
    /*
     * attnum is the "attribute number" for the attribute:  A value that
@@ -91,10 +80,13 @@ CATALOG(pg_attribute) BOOTSTRAP
     *
     * Note that (attnum - 1) is often used as the index to an array.
     */
+   int2        attnum;
 
-   int4        attnelems;      /* number of dimensions, if an array type */
-
-   int4        attcacheoff;
+   /*
+    * attndims is the declared number of dimensions, if an array type,
+    * otherwise zero.
+    */
+   int4        attndims;
 
    /*
     * fastgetattr() uses attcacheoff to cache byte offsets of attributes
@@ -103,8 +95,7 @@ CATALOG(pg_attribute) BOOTSTRAP
     * tuple descriptor, we may then update attcacheoff in the copies.
     * This speeds up the attribute walking process.
     */
-
-   int4        atttypmod;
+   int4        attcacheoff;
 
    /*
     * atttypmod records type-specific data supplied at table creation
@@ -113,16 +104,13 @@ CATALOG(pg_attribute) BOOTSTRAP
     * argument. The value will generally be -1 for types that do not need
     * typmod.
     */
-
-   bool        attbyval;
+   int4        atttypmod;
 
    /*
     * attbyval is a copy of the typbyval field from pg_type for this
-    * attribute.  See atttypid above.  See struct Form_pg_type for
-    * definition.
+    * attribute.  See atttypid comments above.
     */
-
-   char        attstorage;
+   bool        attbyval;
 
    /*----------
     * attstorage tells for VARLENA attributes, what the heap access
@@ -137,30 +125,31 @@ CATALOG(pg_attribute) BOOTSTRAP
     * but only as a last resort ('e' and 'x' fields are moved first).
     *----------
     */
+   char        attstorage;
 
+   /* This flag indicates that the attribute is really a set */
    bool        attisset;
-   char        attalign;
 
    /*
     * attalign is a copy of the typalign field from pg_type for this
-    * attribute.  See atttypid above.  See struct Form_pg_type for
-    * definition.
+    * attribute.  See atttypid comments above.
     */
-
-   bool        attnotnull;
+   char        attalign;
 
    /* This flag represents the "NOT NULL" constraint */
-   bool        atthasdef;
+   bool        attnotnull;
 
    /* Has DEFAULT value or not */
+   bool        atthasdef;
 } FormData_pg_attribute;
 
 /*
  * someone should figure out how to do this properly. (The problem is
- * the size of the C struct is not the same as the size of the tuple.)
+ * the size of the C struct is not the same as the size of the tuple
+ * because of alignment padding at the end of the struct.)
  */
 #define ATTRIBUTE_TUPLE_SIZE \
-   (offsetof(FormData_pg_attribute,atthasdef) + sizeof(char))
+   (offsetof(FormData_pg_attribute,atthasdef) + sizeof(bool))
 
 /* ----------------
  *     Form_pg_attribute corresponds to a pointer to a tuple with
@@ -178,10 +167,10 @@ typedef FormData_pg_attribute *Form_pg_attribute;
 #define Anum_pg_attribute_attrelid     1
 #define Anum_pg_attribute_attname      2
 #define Anum_pg_attribute_atttypid     3
-#define Anum_pg_attribute_attdispersion 4
+#define Anum_pg_attribute_attstattarget 4
 #define Anum_pg_attribute_attlen       5
 #define Anum_pg_attribute_attnum       6
-#define Anum_pg_attribute_attnelems        7
+#define Anum_pg_attribute_attndims     7
 #define Anum_pg_attribute_attcacheoff  8
 #define Anum_pg_attribute_atttypmod        9
 #define Anum_pg_attribute_attbyval     10
@@ -206,6 +195,7 @@ typedef FormData_pg_attribute *Form_pg_attribute;
    (attribute)->attnotnull = false; \
    (attribute)->atthasdef = false;
 #endif  /* _DROP_COLUMN_HACK__ */
+
 /* ----------------
  *     SCHEMA_ macros for declaring hardcoded tuple descriptors.
  *     these are used in utils/cache/relcache.c
@@ -231,25 +221,25 @@ typedef FormData_pg_attribute *Form_pg_attribute;
  * ----------------
  */
 #define Schema_pg_type \
-{ 1247, {"typname"},      19, 0, NAMEDATALEN,  1, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typowner"},     23, 0,   4,  2, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typlen"},           21, 0,   2,  3, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1247, {"typprtlen"},    21, 0,   2,  4, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1247, {"typbyval"},     16, 0,   1,  5, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typtype"},      18, 0,   1,  6, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typisdefined"},  16, 0,  1,  7, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typdelim"},     18, 0,   1,  8, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typrelid"},     26, 0,   4,  9, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typelem"},      26, 0,   4, 10, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typinput"},     24, 0,   4, 11, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typoutput"},    24, 0,   4, 12, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typreceive"},    24, 0,  4, 13, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typsend"},      24, 0,   4, 14, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typalign"},     18, 0,   1, 15, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typstorage"},    18, 0,  1, 16, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typdefault"},    25, 0,  -1, 17, 0, -1, -1, '\0' , 'x', '\0', 'i', '\0', '\0' }
-
-DATA(insert OID = 0 ( 1247 typname         19 0 NAMEDATALEN   1 0 -1 -1 f p f i f f));
+{ 1247, {"typname"},      19, DEFAULT_ATTSTATTARGET, NAMEDATALEN,  1, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1247, {"typowner"},     23, 0,   4,  2, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typlen"},           21, 0,   2,  3, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1247, {"typprtlen"},    21, 0,   2,  4, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1247, {"typbyval"},     16, 0,   1,  5, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typtype"},      18, 0,   1,  6, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typisdefined"},  16, 0,  1,  7, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typdelim"},     18, 0,   1,  8, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typrelid"},     26, 0,   4,  9, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typelem"},      26, 0,   4, 10, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typinput"},     24, 0,   4, 11, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typoutput"},    24, 0,   4, 12, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typreceive"},    24, 0,  4, 13, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typsend"},      24, 0,   4, 14, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typalign"},     18, 0,   1, 15, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typstorage"},    18, 0,  1, 16, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typdefault"},    25, 0,  -1, 17, 0, -1, -1, false    , 'x', false, 'i', false, false }
+
+DATA(insert OID = 0 ( 1247 typname         19 DEFAULT_ATTSTATTARGET NAMEDATALEN   1 0 -1 -1 f p f i f f));
 DATA(insert OID = 0 ( 1247 typowner            23 0  4   2 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1247 typlen          21 0  2   3 0 -1 -1 t p f s f f));
 DATA(insert OID = 0 ( 1247 typprtlen       21 0  2   4 0 -1 -1 t p f s f f));
@@ -299,25 +289,25 @@ DATA(insert OID = 0 ( 1262 tableoid           26 0  4  -7 0 -1 -1 t p f i f f));
  * ----------------
  */
 #define Schema_pg_proc \
-{ 1255, {"proname"},           19, 0, NAMEDATALEN,  1, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"proowner"},          23, 0,  4,  2, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"prolang"},           26, 0,  4,  3, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"proisinh"},          16, 0,  1,  4, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1255, {"proistrusted"},      16, 0,  1,  5, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1255, {"proiscachable"},     16, 0,  1,  6, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1255, {"proisstrict"},       16, 0,  1,  7, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1255, {"pronargs"},          21, 0,  2,  8, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1255, {"proretset"},         16, 0,  1,  9, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1255, {"prorettype"},            26, 0,  4, 10, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"proargtypes"},       30, 0, INDEX_MAX_KEYS*4, 11, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"probyte_pct"},       23, 0,  4, 12, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"properbyte_cpu"},        23, 0,  4, 13, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"propercall_cpu"},        23, 0,  4, 14, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"prooutin_ratio"},        23, 0,  4, 15, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"prosrc"},                25, 0, -1, 16, 0, -1, -1, '\0', 'x', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"probin"},                17, 0, -1, 17, 0, -1, -1, '\0', 'x', '\0', 'i', '\0', '\0' }
-
-DATA(insert OID = 0 ( 1255 proname         19 0 NAMEDATALEN   1 0 -1 -1 f p f i f f));
+{ 1255, {"proname"},           19, DEFAULT_ATTSTATTARGET, NAMEDATALEN,  1, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1255, {"proowner"},          23, 0,  4,  2, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"prolang"},           26, 0,  4,  3, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"proisinh"},          16, 0,  1,  4, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1255, {"proistrusted"},      16, 0,  1,  5, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1255, {"proiscachable"},     16, 0,  1,  6, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1255, {"proisstrict"},       16, 0,  1,  7, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1255, {"pronargs"},          21, 0,  2,  8, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1255, {"proretset"},         16, 0,  1,  9, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1255, {"prorettype"},            26, 0,  4, 10, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"proargtypes"},       30, 0, INDEX_MAX_KEYS*4, 11, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1255, {"probyte_pct"},       23, 0,  4, 12, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"properbyte_cpu"},        23, 0,  4, 13, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"propercall_cpu"},        23, 0,  4, 14, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"prooutin_ratio"},        23, 0,  4, 15, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"prosrc"},                25, 0, -1, 16, 0, -1, -1, false, 'x', false, 'i', false, false }, \
+{ 1255, {"probin"},                17, 0, -1, 17, 0, -1, -1, false, 'x', false, 'i', false, false }
+
+DATA(insert OID = 0 ( 1255 proname         19 DEFAULT_ATTSTATTARGET NAMEDATALEN   1 0 -1 -1 f p f i f f));
 DATA(insert OID = 0 ( 1255 proowner            23 0  4   2 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1255 prolang         26 0  4   3 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1255 proisinh            16 0  1   4 0 -1 -1 t p f c f f));
@@ -346,8 +336,8 @@ DATA(insert OID = 0 ( 1255 tableoid         26 0  4  -7 0 -1 -1 t p f i f f));
  *     pg_shadow
  * ----------------
  */
-DATA(insert OID = 0 ( 1260 usename         19  0 NAMEDATALEN   1 0 -1 -1 f p f i f f));
-DATA(insert OID = 0 ( 1260 usesysid            23  0   4   2 0 -1 -1 t p f i f f));
+DATA(insert OID = 0 ( 1260 usename         19  DEFAULT_ATTSTATTARGET NAMEDATALEN   1 0 -1 -1 f p f i f f));
+DATA(insert OID = 0 ( 1260 usesysid            23  DEFAULT_ATTSTATTARGET   4   2 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1260 usecreatedb     16  0   1   3 0 -1 -1 t p f c f f));
 DATA(insert OID = 0 ( 1260 usetrace            16  0   1   4 0 -1 -1 t p f c f f));
 DATA(insert OID = 0 ( 1260 usesuper            16  0   1   5 0 -1 -1 t p f c f f));
@@ -366,8 +356,8 @@ DATA(insert OID = 0 ( 1260 tableoid         26 0  4  -7 0 -1 -1 t p f i f f));
  *     pg_group
  * ----------------
  */
-DATA(insert OID = 0 ( 1261 groname         19 0 NAMEDATALEN  1 0 -1 -1 f p f i f f));
-DATA(insert OID = 0 ( 1261 grosysid            23 0  4   2 0 -1 -1 t p f i f f));
+DATA(insert OID = 0 ( 1261 groname         19 DEFAULT_ATTSTATTARGET NAMEDATALEN  1 0 -1 -1 f p f i f f));
+DATA(insert OID = 0 ( 1261 grosysid            23 DEFAULT_ATTSTATTARGET  4   2 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1261 grolist       1007 0 -1   3 0 -1 -1 f x f i f f));
 DATA(insert OID = 0 ( 1261 ctid                27 0  6  -1 0 -1 -1 f p f i f f));
 DATA(insert OID = 0 ( 1261 oid             26 0  4  -2 0 -1 -1 t p f i f f));
@@ -382,29 +372,29 @@ DATA(insert OID = 0 ( 1261 tableoid           26 0  4  -7 0 -1 -1 t p f i f f));
  * ----------------
  */
 #define Schema_pg_attribute \
-{ 1249, {"attrelid"},    26, 0,    4,  1, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"attname"},     19, 0, NAMEDATALEN,   2, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"atttypid"},    26, 0,    4,  3, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"attdispersion"}, 700, 0, 4,  4, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"attlen"},          21, 0,    2,  5, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1249, {"attnum"},          21, 0,    2,  6, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1249, {"attnelems"},   23, 0,    4,  7, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"attcacheoff"},  23, 0,   4,  8, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"atttypmod"},   23, 0,    4,  9, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"attbyval"},    16, 0,    1, 10, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1249, {"attstorage"},   18, 0,   1, 11, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1249, {"attisset"},    16, 0,    1, 12, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1249, {"attalign"},    18, 0,    1, 13, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1249, {"attnotnull"},  16, 0, 1, 14, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1249, {"atthasdef"},  16, 0, 1, 15, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }
-
-DATA(insert OID = 0 ( 1249 attrelid            26 0  4   1 0 -1 -1 t p f i f f));
-DATA(insert OID = 0 ( 1249 attname         19 0 NAMEDATALEN  2 0 -1 -1 f p f i f f));
+{ 1249, {"attrelid"},    26, DEFAULT_ATTSTATTARGET,    4,  1, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"attname"},     19, DEFAULT_ATTSTATTARGET, NAMEDATALEN,   2, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1249, {"atttypid"},    26, 0,    4,  3, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"attstattarget"}, 23, 0,  4,  4, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"attlen"},          21, 0,    2,  5, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1249, {"attnum"},          21, 0,    2,  6, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1249, {"attndims"},    23, 0,    4,  7, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"attcacheoff"},  23, 0,   4,  8, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"atttypmod"},   23, 0,    4,  9, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"attbyval"},    16, 0,    1, 10, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1249, {"attstorage"},   18, 0,   1, 11, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1249, {"attisset"},    16, 0,    1, 12, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1249, {"attalign"},    18, 0,    1, 13, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1249, {"attnotnull"},  16, 0, 1, 14, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1249, {"atthasdef"},  16, 0, 1, 15, 0, -1, -1, true, 'p', false, 'c', false, false }
+
+DATA(insert OID = 0 ( 1249 attrelid            26 DEFAULT_ATTSTATTARGET  4   1 0 -1 -1 t p f i f f));
+DATA(insert OID = 0 ( 1249 attname         19 DEFAULT_ATTSTATTARGET NAMEDATALEN  2 0 -1 -1 f p f i f f));
 DATA(insert OID = 0 ( 1249 atttypid            26 0  4   3 0 -1 -1 t p f i f f));
-DATA(insert OID = 0 ( 1249 attdispersion   700 0  4   4 0 -1 -1 f p f i f f));
+DATA(insert OID = 0 ( 1249 attstattarget   23 0  4   4 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1249 attlen          21 0  2   5 0 -1 -1 t p f s f f));
 DATA(insert OID = 0 ( 1249 attnum          21 0  2   6 0 -1 -1 t p f s f f));
-DATA(insert OID = 0 ( 1249 attnelems       23 0  4   7 0 -1 -1 t p f i f f));
+DATA(insert OID = 0 ( 1249 attndims            23 0  4   7 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1249 attcacheoff     23 0  4   8 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1249 atttypmod       23 0  4   9 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1249 attbyval            16 0  1  10 0 -1 -1 t p f c f f));
@@ -426,36 +416,36 @@ DATA(insert OID = 0 ( 1249 tableoid           26 0  4  -7 0 -1 -1 t p f i f f));
  * ----------------
  */
 #define Schema_pg_class \
-{ 1259, {"relname"},      19, 0, NAMEDATALEN,  1, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"reltype"},      26, 0,   4,  2, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"relowner"},     23, 0,   4,  3, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"relam"},        26, 0,   4,  4, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"relfilenode"},   26, 0,  4,  5, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"relpages"},     23, 0,   4,  6, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"reltuples"},    23, 0,   4,  7, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"reltoastrelid"}, 26, 0,  4,  8, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"reltoastidxid"}, 26, 0,  4,  9, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"relhasindex"},   16, 0,  1, 10, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relisshared"},   16, 0,  1, 11, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relkind"},      18, 0,   1, 12, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relnatts"},     21, 0,   2, 13, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"relchecks"},    21, 0,   2, 14, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"reltriggers"},   21, 0,  2, 15, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"relukeys"},     21, 0,   2, 16, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"relfkeys"},     21, 0,   2, 17, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"relrefs"},      21, 0,   2, 18, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"relhaspkey"},    16, 0,  1, 19, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relhasrules"},   16, 0,  1, 20, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relhassubclass"},16, 0,  1, 21, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relacl"},         1034, 0,  -1, 22, 0, -1, -1,   '\0', 'x', '\0', 'i', '\0', '\0' }
-
-DATA(insert OID = 0 ( 1259 relname         19 0 NAMEDATALEN   1 0 -1 -1 f p f i f f));
+{ 1259, {"relname"},      19, DEFAULT_ATTSTATTARGET, NAMEDATALEN,  1, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1259, {"reltype"},      26, 0,   4,  2, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"relowner"},     23, 0,   4,  3, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"relam"},        26, 0,   4,  4, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"relfilenode"},   26, 0,  4,  5, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"relpages"},     23, 0,   4,  6, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"reltuples"},    700, 0,  4,  7, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1259, {"reltoastrelid"}, 26, 0,  4,  8, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"reltoastidxid"}, 26, 0,  4,  9, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"relhasindex"},   16, 0,  1, 10, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relisshared"},   16, 0,  1, 11, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relkind"},      18, 0,   1, 12, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relnatts"},     21, 0,   2, 13, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"relchecks"},    21, 0,   2, 14, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"reltriggers"},   21, 0,  2, 15, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"relukeys"},     21, 0,   2, 16, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"relfkeys"},     21, 0,   2, 17, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"relrefs"},      21, 0,   2, 18, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"relhaspkey"},    16, 0,  1, 19, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relhasrules"},   16, 0,  1, 20, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relhassubclass"},16, 0,  1, 21, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relacl"},         1034, 0,  -1, 22, 0, -1, -1,   false, 'x', false, 'i', false, false }
+
+DATA(insert OID = 0 ( 1259 relname         19 DEFAULT_ATTSTATTARGET NAMEDATALEN   1 0 -1 -1 f p f i f f));
 DATA(insert OID = 0 ( 1259 reltype         26 0  4   2 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1259 relowner            23 0  4   3 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1259 relam           26 0  4   4 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1259 relfilenode     26 0  4   5 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1259 relpages            23 0  4   6 0 -1 -1 t p f i f f));
-DATA(insert OID = 0 ( 1259 reltuples       23 0  4   7 0 -1 -1 t p f i f f));
+DATA(insert OID = 0 ( 1259 reltuples      700 0  4   7 0 -1 -1 f p f i f f));
 DATA(insert OID = 0 ( 1259 reltoastrelid   26 0  4   8 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1259 reltoastidxid   26 0  4   9 0 -1 -1 t p f i f f));
 DATA(insert OID = 0 ( 1259 relhasindex     16 0  1  10 0 -1 -1 t p f c f f));
@@ -544,7 +534,7 @@ DATA(insert OID = 0 ( 1219 tableoid         26 0  4  -7 0 -1 -1 t p f i f f));
  * ----------------
  */
 #define Schema_pg_variable \
-{ 1264, {"varfoo"},  26, 0, 4, 1, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }
+{ 1264, {"varfoo"},  26, 0, 4, 1, 0, -1, -1, true, 'p', false, 'i', false, false }
 
 DATA(insert OID = 0 ( 1264 varfoo          26 0  4   1 0 -1 -1 t p f i f f));
 
@@ -555,7 +545,7 @@ DATA(insert OID = 0 ( 1264 varfoo           26 0  4   1 0 -1 -1 t p f i f f));
  * ----------------
  */
 #define Schema_pg_log \
-{ 1269, {"logfoo"},  26, 0, 4, 1, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }
+{ 1269, {"logfoo"},  26, 0, 4, 1, 0, -1, -1, true, 'p', false, 'i', false, false }
 
 DATA(insert OID = 0 ( 1269 logfoo          26 0  4   1 0 -1 -1 t p f i f f));
 
@@ -566,7 +556,7 @@ DATA(insert OID = 0 ( 1269 logfoo           26 0  4   1 0 -1 -1 t p f i f f));
  * ----------------
  */
 #define Schema_pg_xactlock \
-{ 376, {"xactlockfoo"},  26, 0, 4, 1, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }
+{ 376, {"xactlockfoo"},  26, 0, 4, 1, 0, -1, -1, true, 'p', false, 'i', false, false }
 
 DATA(insert OID = 0 ( 376 xactlockfoo      26 0  4   1 0 -1 -1 t p f i f f));
 


diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h

index 81e75e14b6a7dc7372f9dcd6808f824944f5f028..86de88cc9b662fe5c65f43301e2a28a247bf69ee 100644 (file)


--- a/src/include/catalog/pg_class.h
+++ b/src/include/catalog/pg_class.h
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pg_class.h,v 1.47 2001/03/22 04:00:38 momjian Exp $
+ * $Id: pg_class.h,v 1.48 2001/05/07 00:43:25 tgl Exp $
  *
  * NOTES
  *   the genbki.sh script reads this file and generates .bki
@@ -52,7 +52,7 @@ CATALOG(pg_class) BOOTSTRAP
    Oid         relam;
    Oid         relfilenode;
    int4        relpages;
-   int4        reltuples;
+   float4      reltuples;
    Oid         reltoastrelid;
    Oid         reltoastidxid;
    bool        relhasindex;


diff --git a/src/include/catalog/pg_statistic.h b/src/include/catalog/pg_statistic.h

index 2f39bea3245e1772984b1b3b4fca0dbb36f41c1d..8d6a6b37c16ac513468f052508aadf91a034ff85 100644 (file)


--- a/src/include/catalog/pg_statistic.h
+++ b/src/include/catalog/pg_statistic.h
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pg_statistic.h,v 1.10 2001/01/24 19:43:22 momjian Exp $
+ * $Id: pg_statistic.h,v 1.11 2001/05/07 00:43:25 tgl Exp $
  *
  * NOTES
  *   the genbki.sh script reads this file and generates .bki
@@ -36,40 +36,91 @@ CATALOG(pg_statistic)
    /* These fields form the unique key for the entry: */
    Oid         starelid;       /* relation containing attribute */
    int2        staattnum;      /* attribute (column) stats are for */
-   Oid         staop;          /* '<' comparison op used for lo/hi vals */
+
+   /* the fraction of the column's entries that are NULL: */
+   float4      stanullfrac;
 
    /*
-    * Note: the current VACUUM code will never produce more than one
-    * entry per column, but in theory there could be multiple entries if
-    * a datatype has more than one useful ordering operator.  Also, the
-    * current code will not write an entry unless it found at least one
-    * non-NULL value in the column; so the remaining fields will never be
-    * NULL.
+    * stawidth is the average width in bytes of non-null entries.  For
+    * fixed-width datatypes this is of course the same as the typlen, but
+    * for varlena types it is more useful.  Note that this is the average
+    * width of the data as actually stored, post-TOASTing (eg, for a
+    * moved-out-of-line value, only the size of the pointer object is
+    * counted).  This is the appropriate definition for the primary use of
+    * the statistic, which is to estimate sizes of in-memory hash tables of
+    * tuples.
+    */
+   int4        stawidth;
+
+   /* ----------------
+    * stadistinct indicates the (approximate) number of distinct non-null
+    * data values in the column.  The interpretation is:
+    *      0       unknown or not computed
+    *      > 0     actual number of distinct values
+    *      < 0     negative of multiplier for number of rows
+    * The special negative case allows us to cope with columns that are
+    * unique (stadistinct = -1) or nearly so (for example, a column in
+    * which values appear about twice on the average could be represented
+    * by stadistinct = -0.5).  Because the number-of-rows statistic in
+    * pg_class may be updated more frequently than pg_statistic is, it's
+    * important to be able to describe such situations as a multiple of
+    * the number of rows, rather than a fixed number of distinct values.
+    * But in other cases a fixed number is correct (eg, a boolean column).
+    * ----------------
+    */
+   float4      stadistinct;
+
+   /* ----------------
+    * To allow keeping statistics on different kinds of datatypes,
+    * we do not hard-wire any particular meaning for the remaining
+    * statistical fields.  Instead, we provide several "slots" in which
+    * statistical data can be placed.  Each slot includes:
+    *      kind            integer code identifying kind of data
+    *      op              OID of associated operator, if needed
+    *      numbers         float4 array (for statistical values)
+    *      values          text array (for representations of data values)
+    * The ID and operator fields are never NULL; they are zeroes in an
+    * unused slot.  The numbers and values fields are NULL in an unused
+    * slot, and might also be NULL in a used slot if the slot kind has
+    * no need for one or the other.
+    * ----------------
     */
 
+   int2        stakind1;
+   int2        stakind2;
+   int2        stakind3;
+   int2        stakind4;
+
+   Oid         staop1;
+   Oid         staop2;
+   Oid         staop3;
+   Oid         staop4;
+
    /*
-    * These fields contain the stats about the column indicated by the
-    * key
+    * THE REST OF THESE ARE VARIABLE LENGTH FIELDS, and may even be absent
+    * (NULL). They cannot be accessed as C struct entries; you have to use
+    * the full field access machinery (heap_getattr) for them.  We declare
+    * them here for the catalog machinery.
     */
-   float4      stanullfrac;    /* the fraction of the entries that are
-                                * NULL */
-   float4      stacommonfrac;  /* the fraction that are the most common
-                                * val */
+
+   float4      stanumbers1[1];
+   float4      stanumbers2[1];
+   float4      stanumbers3[1];
+   float4      stanumbers4[1];
 
    /*
-    * THE REST OF THESE ARE VARIABLE LENGTH FIELDS. They cannot be
-    * accessed as C struct entries; you have to use the full field access
-    * machinery (heap_getattr) for them.
-    *
-    * All three of these are text representations of data values of the
-    * column's data type.  To re-create the actual Datum, do
-    * datatypein(textout(givenvalue)).
+    * Values in these text arrays are external representations of values
+    * of the column's data type.  To re-create the actual Datum, do
+    * datatypein(textout(arrayelement)).
     */
-   text        stacommonval;   /* most common non-null value in column */
-   text        staloval;       /* smallest non-null value in column */
-   text        stahival;       /* largest non-null value in column */
+   text        stavalues1[1];
+   text        stavalues2[1];
+   text        stavalues3[1];
+   text        stavalues4[1];
 } FormData_pg_statistic;
 
+#define STATISTIC_NUM_SLOTS  4
+
 /* ----------------
  *     Form_pg_statistic corresponds to a pointer to a tuple with
  *     the format of pg_statistic relation.
@@ -81,14 +132,78 @@ typedef FormData_pg_statistic *Form_pg_statistic;
  *     compiler constants for pg_statistic
  * ----------------
  */
-#define Natts_pg_statistic             8
+#define Natts_pg_statistic             21
 #define Anum_pg_statistic_starelid     1
 #define Anum_pg_statistic_staattnum        2
-#define Anum_pg_statistic_staop            3
-#define Anum_pg_statistic_stanullfrac  4
-#define Anum_pg_statistic_stacommonfrac 5
-#define Anum_pg_statistic_stacommonval 6
-#define Anum_pg_statistic_staloval     7
-#define Anum_pg_statistic_stahival     8
+#define Anum_pg_statistic_stanullfrac  3
+#define Anum_pg_statistic_stawidth     4
+#define Anum_pg_statistic_stadistinct  5
+#define Anum_pg_statistic_stakind1     6
+#define Anum_pg_statistic_stakind2     7
+#define Anum_pg_statistic_stakind3     8
+#define Anum_pg_statistic_stakind4     9
+#define Anum_pg_statistic_staop1       10
+#define Anum_pg_statistic_staop2       11
+#define Anum_pg_statistic_staop3       12
+#define Anum_pg_statistic_staop4       13
+#define Anum_pg_statistic_stanumbers1  14
+#define Anum_pg_statistic_stanumbers2  15
+#define Anum_pg_statistic_stanumbers3  16
+#define Anum_pg_statistic_stanumbers4  17
+#define Anum_pg_statistic_stavalues1   18
+#define Anum_pg_statistic_stavalues2   19
+#define Anum_pg_statistic_stavalues3   20
+#define Anum_pg_statistic_stavalues4   21
+
+/*
+ * Currently, three statistical slot "kinds" are defined: most common values,
+ * histogram, and correlation.  Additional "kinds" will probably appear in
+ * future to help cope with non-scalar datatypes.
+ *
+ * Code reading the pg_statistic relation should not assume that a particular
+ * data "kind" will appear in any particular slot.  Instead, search the
+ * stakind fields to see if the desired data is available.
+ */
+
+/*
+ * In a "most common values" slot, staop is the OID of the "=" operator
+ * used to decide whether values are the same or not.  stavalues contains
+ * the K most common non-null values appearing in the column, and stanumbers
+ * contains their frequencies (fractions of total row count).  The values
+ * shall be ordered in decreasing frequency.  Note that since the arrays are
+ * variable-size, K may be chosen by the statistics collector.  Values should
+ * not appear in MCV unless they have been observed to occur more than once;
+ * a unique column will have no MCV slot.
+ */
+#define STATISTIC_KIND_MCV  1
+
+/*
+ * A "histogram" slot describes the distribution of scalar data.  staop is
+ * the OID of the "<" operator that describes the sort ordering.  (In theory,
+ * more than one histogram could appear, if a datatype has more than one
+ * useful sort operator.)  stavalues contains M (>=2) non-null values that
+ * divide the non-null column data values into M-1 bins of approximately equal
+ * population.  The first stavalues item is the MIN and the last is the MAX.
+ * stanumbers is not used and should be NULL.  IMPORTANT POINT: if an MCV
+ * slot is also provided, then the histogram describes the data distribution
+ * *after removing the values listed in MCV* (thus, it's a "compressed
+ * histogram" in the technical parlance).  This allows a more accurate
+ * representation of the distribution of a column with some very-common
+ * values.  In a column with only a few distinct values, it's possible that
+ * the MCV list describes the entire data population; in this case the
+ * histogram reduces to empty and should be omitted.
+ */
+#define STATISTIC_KIND_HISTOGRAM  2
+
+/*
+ * A "correlation" slot describes the correlation between the physical order
+ * of table tuples and the ordering of data values of this column, as seen
+ * by the "<" operator identified by staop.  (As with the histogram, more
+ * than one entry could theoretically appear.)  stavalues is not used and
+ * should be NULL.  stanumbers contains a single entry, the correlation
+ * coefficient between the sequence of data values and the sequence of
+ * their actual tuple positions.  The coefficient ranges from +1 to -1.
+ */
+#define STATISTIC_KIND_CORRELATION  3
 
 #endif  /* PG_STATISTIC_H */


diff --git a/src/include/commands/command.h b/src/include/commands/command.h

index 8b108451d2accff7969f55e6972ad389551829a1..7eb1a4fab846aeff33b3f5cca4f60b9c4c3b5fb5 100644 (file)


--- a/src/include/commands/command.h
+++ b/src/include/commands/command.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: command.h,v 1.26 2001/03/22 04:00:41 momjian Exp $
+ * $Id: command.h,v 1.27 2001/05/07 00:43:25 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -43,9 +43,13 @@ extern void PortalCleanup(Portal portal);
 extern void AlterTableAddColumn(const char *relationName,
                    bool inh, ColumnDef *colDef);
 
-extern void AlterTableAlterColumn(const char *relationName,
-                     bool inh, const char *colName,
-                     Node *newDefault);
+extern void AlterTableAlterColumnDefault(const char *relationName,
+                                        bool inh, const char *colName,
+                                        Node *newDefault);
+
+extern void AlterTableAlterColumnStatistics(const char *relationName,
+                                           bool inh, const char *colName,
+                                           Node *statsTarget);
 
 extern void AlterTableDropColumn(const char *relationName,
                     bool inh, const char *colName,


diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h

index d82d22fcdfcbe3482ed5dbf1b66bf52b607767c3..87bb0007aa067dcbfbe15d31cccfbe00f61df460 100644 (file)


--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -1,129 +1,27 @@
 /*-------------------------------------------------------------------------
  *
  * vacuum.h
- *   header file for postgres vacuum cleaner
+ *   header file for postgres vacuum cleaner and statistics analyzer
  *
  *
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: vacuum.h,v 1.34 2001/03/22 04:00:43 momjian Exp $
+ * $Id: vacuum.h,v 1.35 2001/05/07 00:43:25 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #ifndef VACUUM_H
 #define VACUUM_H
 
-#include "catalog/pg_attribute.h"
-#include "catalog/pg_index.h"
-#include "fmgr.h"
-#include "nodes/pg_list.h"
-#include "storage/itemptr.h"
+#include "nodes/parsenodes.h"
 
 
-typedef struct VAttListData
-{
-   int         val_dummy;
-   struct VAttListData *val_next;
-} VAttListData;
-
-typedef VAttListData *VAttList;
-
-typedef struct VacPageData
-{
-   BlockNumber blkno;          /* BlockNumber of this Page */
-   Size        free;           /* FreeSpace on this Page */
-   uint16      offsets_used;   /* Number of OffNums used by vacuum */
-   uint16      offsets_free;   /* Number of OffNums free or to be free */
-   OffsetNumber offsets[1];    /* Array of its OffNums */
-} VacPageData;
-
-typedef VacPageData *VacPage;
-
-typedef struct VacPageListData
-{
-   int         empty_end_pages;/* Number of "empty" end-pages */
-   int         num_pages;      /* Number of pages in pagedesc */
-   int         num_allocated_pages;    /* Number of allocated pages in
-                                        * pagedesc */
-   VacPage    *pagedesc;       /* Descriptions of pages */
-} VacPageListData;
-
-typedef VacPageListData *VacPageList;
-
-typedef struct
-{
-   Form_pg_attribute attr;
-   Datum       best,
-               guess1,
-               guess2,
-               max,
-               min;
-   int         best_len,
-               guess1_len,
-               guess2_len,
-               max_len,
-               min_len;
-   long        best_cnt,
-               guess1_cnt,
-               guess1_hits,
-               guess2_hits,
-               null_cnt,
-               nonnull_cnt,
-               max_cnt,
-               min_cnt;
-   FmgrInfo    f_cmpeq,
-               f_cmplt,
-               f_cmpgt;
-   Oid         op_cmplt;
-   regproc     outfunc;
-   Oid         typelem;
-   bool        initialized;
-} VacAttrStats;
-
-typedef struct VRelListData
-{
-   Oid         vrl_relid;
-   struct VRelListData *vrl_next;
-} VRelListData;
-
-typedef VRelListData *VRelList;
-
-typedef struct VTupleLinkData
-{
-   ItemPointerData new_tid;
-   ItemPointerData this_tid;
-} VTupleLinkData;
-
-typedef VTupleLinkData *VTupleLink;
-
-typedef struct VTupleMoveData
-{
-   ItemPointerData tid;        /* tuple ID */
-   VacPage     vacpage;        /* where to move */
-   bool        cleanVpd;       /* clean vacpage before using */
-} VTupleMoveData;
-
-typedef VTupleMoveData *VTupleMove;
-
-typedef struct VRelStats
-{
-   Oid         relid;
-   int         num_tuples;
-   int         num_pages;
-   Size        min_tlen;
-   Size        max_tlen;
-   bool        hasindex;
-   int         num_vtlinks;
-   VTupleLink  vtlinks;
-} VRelStats;
-
-extern bool VacuumRunning;
-
-extern void vc_abort(void);
-extern void vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols);
-extern void analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL);
-
-#define ATTNVALS_SCALE 1000000000      /* XXX so it can act as a float4 */
+/* in commands/vacuum.c */
+extern void vacuum(VacuumStmt *vacstmt);
+extern void vac_update_relstats(Oid relid, long num_pages, double num_tuples,
+                               bool hasindex);
+/* in commands/analyze.c */
+extern void analyze_rel(Oid relid, VacuumStmt *vacstmt);
 
 #endif  /* VACUUM_H */


diff --git a/src/include/config.h.in b/src/include/config.h.in

index 0d989dbbb3155bfaa218fce2d6181c45921191de..01593a4ce963a05484b025e5206f27d8b2bd952b 100644 (file)


--- a/src/include/config.h.in
+++ b/src/include/config.h.in
@@ -8,7 +8,7 @@
  * or in config.h afterwards.  Of course, if you edit config.h, then your
  * changes will be overwritten the next time you run configure.
  *
- * $Id: config.h.in,v 1.162 2001/04/14 22:55:02 petere Exp $
+ * $Id: config.h.in,v 1.163 2001/05/07 00:43:25 tgl Exp $
  */
 
 #ifndef CONFIG_H
@@ -156,6 +156,11 @@
 #define INDEX_MAX_KEYS     16
 #define FUNC_MAX_ARGS      INDEX_MAX_KEYS
 
+/*
+ * System default value for pg_attribute.attstattarget
+ */
+#define DEFAULT_ATTSTATTARGET  10
+
 /*
  * Define this to make libpgtcl's "pg_result -assign" command process C-style
  * backslash sequences in returned tuple data and convert Postgres array


diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h

index 2cf9378cf116426106be2cba0bb29d970e561c09..0967bef24ba9437360c5142ffc6f770107c9aa5a 100644 (file)


--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: execnodes.h,v 1.57 2001/03/22 04:00:50 momjian Exp $
+ * $Id: execnodes.h,v 1.58 2001/05/07 00:43:25 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -628,7 +628,6 @@ typedef struct GroupState
  *  SortState information
  *
  *     sort_Done       indicates whether sort has been performed yet
- *     sort_Keys       scan key structures describing the sort keys
  *     tuplesortstate  private state of tuplesort.c
  * ----------------
  */
@@ -636,7 +635,6 @@ typedef struct SortState
 {
    CommonScanState csstate;    /* its first field is NodeTag */
    bool        sort_Done;
-   ScanKey     sort_Keys;
    void       *tuplesortstate;
 } SortState;
 


diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h

index 1614d787bcb8d0ceac119c020b51ae18ffebd013..63b1b1046a8e71675ed81102c38134886a45f0bc 100644 (file)


--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: parsenodes.h,v 1.126 2001/03/23 04:49:56 momjian Exp $
+ * $Id: parsenodes.h,v 1.127 2001/05/07 00:43:25 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -118,11 +118,12 @@ typedef struct AlterTableStmt
    NodeTag     type;
    char        subtype;        /*------------
                                 *  A = add column
-                                *  T = alter column
+                                *  T = alter column default
+                                *  S = alter column statistics
                                 *  D = drop column
                                 *  C = add constraint
                                 *  X = drop constraint
-                                *  E = add toast table,
+                                *  E = create toast table
                                 *  U = change owner
                                 *------------
                                 */
@@ -690,16 +691,20 @@ typedef struct ClusterStmt
 } ClusterStmt;
 
 /* ----------------------
- *     Vacuum Statement
+ *     Vacuum and Analyze Statements
+ *
+ * Even though these are nominally two statements, it's convenient to use
+ * just one node type for both.
  * ----------------------
  */
 typedef struct VacuumStmt
 {
    NodeTag     type;
-   bool        verbose;        /* print status info */
-   bool        analyze;        /* analyze data */
-   char       *vacrel;         /* table to vacuum */
-   List       *va_spec;        /* columns to analyse */
+   bool        vacuum;         /* do VACUUM step */
+   bool        analyze;        /* do ANALYZE step */
+   bool        verbose;        /* print progress info */
+   char       *vacrel;         /* name of single table to process, or NULL */
+   List       *va_cols;        /* list of column names, or NIL for all */
 } VacuumStmt;
 
 /* ----------------------


diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h

index 3ae8e09f57a30468fdece0f7fe9098a3ca05653f..9e69ed60992a7b7307fcc79150eccd7a6f62f963 100644 (file)


--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -10,7 +10,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: primnodes.h,v 1.53 2001/03/22 04:00:52 momjian Exp $
+ * $Id: primnodes.h,v 1.54 2001/05/07 00:43:25 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -45,8 +45,8 @@ typedef struct FunctionCache *FunctionCachePtr;
  * reskey and reskeyop are the execution-time representation of sorting.
  * reskey must be zero in any non-sort-key item.  The reskey of sort key
  * targetlist items for a sort plan node is 1,2,...,n for the n sort keys.
- * The reskeyop of each such targetlist item is the sort operator's
- * regproc OID.  reskeyop will be zero in non-sort-key items.
+ * The reskeyop of each such targetlist item is the sort operator's OID.
+ * reskeyop will be zero in non-sort-key items.
  *
  * Both reskey and reskeyop are typically zero during parse/plan stages.
  * The executor does not pay any attention to ressortgroupref.
@@ -62,7 +62,7 @@ typedef struct Resdom
    Index       ressortgroupref;
    /* nonzero if referenced by a sort/group clause */
    Index       reskey;         /* order of key in a sort (for those > 0) */
-   Oid         reskeyop;       /* sort operator's regproc Oid */
+   Oid         reskeyop;       /* sort operator's Oid */
    bool        resjunk;        /* set to true to eliminate the attribute
                                 * from final target list */
 } Resdom;


diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h

index f643ef879689640186250b344d4734f80aa6dc49..c76d9b4af7136f23fdc022f53127925129760519 100644 (file)


--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: relation.h,v 1.54 2001/03/22 04:00:53 momjian Exp $
+ * $Id: relation.h,v 1.55 2001/05/07 00:43:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -512,8 +512,8 @@ typedef struct RestrictInfo
    Oid         hashjoinoperator;       /* copy of clause operator */
 
    /* cache space for hashclause processing; -1 if not yet set */
-   Selectivity left_dispersion;/* dispersion of left side */
-   Selectivity right_dispersion;       /* dispersion of right side */
+   Selectivity left_bucketsize;        /* avg bucketsize of left side */
+   Selectivity right_bucketsize;       /* avg bucketsize of right side */
 } RestrictInfo;
 
 /*


diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h

index 5caa576f0c0be002c3e1bc88a7ff75746f5c45b4..cbf6df063a3cc4ae782cab805acaaf80b9d2025f 100644 (file)


--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: cost.h,v 1.38 2001/02/16 00:03:05 tgl Exp $
+ * $Id: cost.h,v 1.39 2001/05/07 00:43:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -64,7 +64,8 @@ extern void cost_mergejoin(Path *path, Path *outer_path, Path *inner_path,
               List *restrictlist,
               List *outersortkeys, List *innersortkeys);
 extern void cost_hashjoin(Path *path, Path *outer_path, Path *inner_path,
-             List *restrictlist, Selectivity innerdispersion);
+             List *restrictlist, Selectivity innerbucketsize);
+extern Selectivity estimate_hash_bucketsize(Query *root, Var *var);
 extern Cost cost_qual_eval(List *quals);
 extern void set_baserel_size_estimates(Query *root, RelOptInfo *rel);
 extern void set_joinrel_size_estimates(Query *root, RelOptInfo *rel,


diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h

index 5b71eded86fcac8f21a5732ef81d8906fd9263a3..0839feb4b2fe5c0d137a7705469acb3814779181 100644 (file)


--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pathnode.h,v 1.35 2001/03/22 04:00:54 momjian Exp $
+ * $Id: pathnode.h,v 1.36 2001/05/07 00:43:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -59,7 +59,7 @@ extern HashPath *create_hashjoin_path(RelOptInfo *joinrel,
                     Path *inner_path,
                     List *restrict_clauses,
                     List *hashclauses,
-                    Selectivity innerdispersion);
+                    Selectivity innerbucketsize);
 
 /*
  * prototypes for relnode.c


diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h

index f1c4aff1c804172da17b24a438551c0b631c98c0..6b35deed2867649e350da0c081a983eb0dec5821 100644 (file)


--- a/src/include/utils/lsyscache.h
+++ b/src/include/utils/lsyscache.h
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: lsyscache.h,v 1.30 2001/03/22 04:01:13 momjian Exp $
+ * $Id: lsyscache.h,v 1.31 2001/05/07 00:43:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -21,8 +21,6 @@ extern AttrNumber get_attnum(Oid relid, char *attname);
 extern Oid get_atttype(Oid relid, AttrNumber attnum);
 extern bool get_attisset(Oid relid, char *attname);
 extern int32 get_atttypmod(Oid relid, AttrNumber attnum);
-extern double get_attdispersion(Oid relid, AttrNumber attnum,
-                 double min_estimate);
 extern RegProcedure get_opcode(Oid opno);
 extern char *get_opname(Oid opno);
 extern bool op_mergejoinable(Oid opno, Oid ltype, Oid rtype,
@@ -41,6 +39,14 @@ extern bool get_typbyval(Oid typid);
 extern void get_typlenbyval(Oid typid, int16 *typlen, bool *typbyval);
 extern char get_typstorage(Oid typid);
 extern Datum get_typdefault(Oid typid);
+extern bool get_attstatsslot(HeapTuple statstuple,
+                            Oid atttype, int32 atttypmod,
+                            int reqkind, Oid reqop,
+                            Datum **values, int *nvalues,
+                            float4 **numbers, int *nnumbers);
+extern void free_attstatsslot(Oid atttype,
+                             Datum *values, int nvalues,
+                             float4 *numbers, int nnumbers);
 
 #define TypeIsToastable(typid) (get_typstorage(typid) != 'p')
 


diff --git a/src/include/utils/syscache.h b/src/include/utils/syscache.h

index 8d4e2ae62c419658f44ec3f1adb9853a658ea2c6..342f7bf8a566b73e4f8393553ccb332ed067ed06 100644 (file)


--- a/src/include/utils/syscache.h
+++ b/src/include/utils/syscache.h
@@ -9,7 +9,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: syscache.h,v 1.29 2001/03/22 04:01:14 momjian Exp $
+ * $Id: syscache.h,v 1.30 2001/05/07 00:43:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -53,7 +53,7 @@
 #define RULEOID            22
 #define SHADOWNAME     23
 #define SHADOWSYSID        24
-#define STATRELID      25
+#define STATRELATT     25
 #define TYPENAME       26
 #define TYPEOID            27
 


diff --git a/src/include/utils/tuplesort.h b/src/include/utils/tuplesort.h

index 7f273776c36a26cc1e6b688b4a530f74a7c108f2..001761796e2492781d98aec7c8b311b4538e251a 100644 (file)


--- a/src/include/utils/tuplesort.h
+++ b/src/include/utils/tuplesort.h
@@ -13,7 +13,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: tuplesort.h,v 1.6 2001/01/24 19:43:29 momjian Exp $
+ * $Id: tuplesort.h,v 1.7 2001/05/07 00:43:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -36,8 +36,9 @@ typedef struct Tuplesortstate Tuplesortstate;
  */
 
 extern Tuplesortstate *tuplesort_begin_heap(TupleDesc tupDesc,
-                    int nkeys, ScanKey keys,
-                    bool randomAccess);
+                     int nkeys,
+                     Oid *sortOperators, AttrNumber *attNums,
+                     bool randomAccess);
 extern Tuplesortstate *tuplesort_begin_index(Relation indexRel,
                      bool enforceUnique,
                      bool randomAccess);
@@ -75,4 +76,19 @@ extern void tuplesort_rescan(Tuplesortstate *state);
 extern void tuplesort_markpos(Tuplesortstate *state);
 extern void tuplesort_restorepos(Tuplesortstate *state);
 
+/*
+ * This routine selects an appropriate sorting function to implement
+ * a sort operator as efficiently as possible.
+ */
+typedef enum
+{
+   SORTFUNC_LT,                /* raw "<" operator */
+   SORTFUNC_CMP,               /* -1 / 0 / 1 three-way comparator */
+   SORTFUNC_REVCMP             /* 1 / 0 / -1 (reversed) 3-way comparator */
+} SortFunctionKind;
+
+extern void SelectSortFunction(Oid sortOperator,
+                              RegProcedure *sortFunction,
+                              SortFunctionKind *kind);
+
 #endif  /* TUPLESORT_H */


diff --git a/src/interfaces/ecpg/preproc/keywords.c b/src/interfaces/ecpg/preproc/keywords.c

index 5614a34b0fef7390ba8ec0a4184fea10da5e7d69..c03880f497d0d62526a94157175fede654376f28 100644 (file)


--- a/src/interfaces/ecpg/preproc/keywords.c
+++ b/src/interfaces/ecpg/preproc/keywords.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/interfaces/ecpg/preproc/keywords.c,v 1.39 2001/03/22 04:01:21 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/interfaces/ecpg/preproc/keywords.c,v 1.40 2001/05/07 00:43:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -238,6 +238,7 @@ static ScanKeyword ScanKeywords[] = {
    {"some", SOME},
    {"start", START},
    {"statement", STATEMENT},
+   {"statistics", STATISTICS},
    {"stdin", STDIN},
    {"stdout", STDOUT},
    {"substring", SUBSTRING},


diff --git a/src/interfaces/ecpg/preproc/preproc.y b/src/interfaces/ecpg/preproc/preproc.y

index 345efb6576e2ddd8ff944993799a816b12bd34c8..91708bd91fae24f446576cacaea6ccbc1028163d 100644 (file)


--- a/src/interfaces/ecpg/preproc/preproc.y
+++ b/src/interfaces/ecpg/preproc/preproc.y
@@ -134,7 +134,7 @@ make_name(void)
 
 %union {
    double                  dval;
-        int                     ival;
+   int                     ival;
    char *                  str;
    struct when             action;
    struct index        index;
@@ -224,7 +224,7 @@ make_name(void)
        NONE, NOTHING, NOTIFY, NOTNULL, OFFSET, OIDS,
        OPERATOR, OWNER, PASSWORD, PROCEDURAL, REINDEX, RENAME, RESET,
        RETURNS, ROW, RULE, SEQUENCE, SERIAL, SETOF, SHARE,
-       SHOW, START, STATEMENT, STDIN, STDOUT, SYSID TEMP,
+       SHOW, START, STATEMENT, STATISTICS, STDIN, STDOUT, SYSID TEMP,
        TEMPLATE, TOAST, TRUNCATE, TRUSTED, UNLISTEN, UNTIL, VACUUM,
        VALID, VERBOSE, VERSION
 
@@ -285,7 +285,7 @@ make_name(void)
 %type      file_name AexprConst ParamNo c_expr ConstTypename
 %type     in_expr_nodes a_expr b_expr TruncateStmt CommentStmt
 %type     opt_indirection expr_list extract_list extract_arg
-%type     position_list substr_list substr_from alter_column_action
+%type     position_list substr_list substr_from alter_column_default
 %type     trim_list in_expr substr_for attr attrs drop_behavior
 %type     Typename SimpleTypename Generic Numeric generic opt_float opt_numeric
 %type     opt_decimal Character character opt_varying opt_charset
@@ -293,7 +293,7 @@ make_name(void)
 %type     row_expr row_descriptor row_list ConstDatetime opt_chain
 %type     SelectStmt into_clause OptTemp ConstraintAttributeSpec
 %type     opt_table opt_all sort_clause sortby_list ConstraintAttr 
-%type     sortby OptUseOp opt_inh_star relation_name_list name_list
+%type     sortby OptUseOp relation_name_list name_list
 %type     group_clause having_clause from_clause opt_distinct
 %type     join_outer where_clause relation_expr sub_type opt_arg
 %type     opt_column_list insert_rest InsertStmt OptimizableStmt
@@ -301,8 +301,8 @@ make_name(void)
 %type      NotifyStmt columnElem copy_dirn UnlistenStmt copy_null
 %type      copy_delimiter ListenStmt CopyStmt copy_file_name opt_binary
 %type      opt_with_copy FetchStmt direction fetch_how_many from_in
-%type      ClosePortalStmt DropStmt VacuumStmt opt_verbose func_arg
-%type      opt_analyze opt_va_list va_list ExplainStmt index_params
+%type      ClosePortalStmt DropStmt VacuumStmt AnalyzeStmt opt_verbose func_arg
+%type      analyze_keyword opt_name_list ExplainStmt index_params
 %type      index_list func_index index_elem opt_class access_method_clause
 %type      index_opt_unique IndexStmt func_return ConstInterval
 %type      func_args_list func_args opt_with ProcedureStmt def_arg
@@ -329,7 +329,7 @@ make_name(void)
 %type     opt_cursor opt_lmode ConstraintsSetStmt comment_tg AllConst
 %type     case_expr when_clause_list case_default case_arg when_clause
 %type      select_clause opt_select_limit select_limit_value ConstraintTimeSpec
-%type      select_offset_value ReindexStmt join_type opt_only opt_boolean
+%type      select_offset_value ReindexStmt join_type opt_boolean
 %type     join_qual update_list AlterSchemaStmt joined_table
 %type     opt_level opt_lock lock_type users_in_new_group_clause
 %type      OptConstrFromTable comment_op OptTempTableName StringConst
@@ -447,6 +447,7 @@ stmt:  AlterSchemaStmt          { output_statement($1, 0, NULL, connection); }
        | CreatedbStmt      { output_statement($1, 0, NULL, connection); }
        | DropdbStmt        { output_statement($1, 0, NULL, connection); }
        | VacuumStmt        { output_statement($1, 0, NULL, connection); }
+       | AnalyzeStmt       { output_statement($1, 0, NULL, connection); }
        | VariableSetStmt   { output_statement($1, 0, NULL, connection); }
        | VariableShowStmt  { output_statement($1, 0, NULL, connection); }
        | VariableResetStmt { output_statement($1, 0, NULL, connection); }
@@ -908,40 +909,41 @@ CheckPointStmt: CHECKPOINT     { $$= make_str("checkpoint"); }
 
 
 /*****************************************************************************
- *
- *     QUERY :
  *
  * ALTER TABLE variations
  *
  *****************************************************************************/
 
 AlterTableStmt:
-/* ALTER TABLE  ADD [COLUMN]  */
-        ALTER TABLE relation_name opt_inh_star ADD opt_column columnDef
+/* ALTER TABLE  ADD [COLUMN]  */
+        ALTER TABLE relation_expr ADD opt_column columnDef
+       {
+           $$ = cat_str(5, make_str("alter table"), $3, make_str("add"), $5, $6);
+       }
+/* ALTER TABLE  ALTER [COLUMN]  {SET DEFAULT |DROP DEFAULT} */
+   | ALTER TABLE relation_expr ALTER opt_column ColId alter_column_default
        {
-           $$ = cat_str(6, make_str("alter table"), $3, $4, make_str("add"), $6, $7);
+           $$ = cat_str(6, make_str("alter table"), $3, make_str("alter"), $5, $6, $7);
        }
-/* ALTER TABLE  ALTER [COLUMN]  {SET DEFAULT |DROP
-DEFAULT} */
-   | ALTER TABLE relation_name opt_inh_star ALTER opt_column ColId
-       alter_column_action
+/* ALTER TABLE  ALTER [COLUMN]  SET STATISTICS  */
+   | ALTER TABLE relation_expr ALTER opt_column ColId SET STATISTICS Iconst
        {
-           $$ = cat_str(7, make_str("alter table"), $3, $4, make_str("alter"), $6, $7, $8);
+           $$ = cat_str(7, make_str("alter table"), $3, make_str("alter"), $5, $6, make_str("set statistics"), $9);
        }
-/* ALTER TABLE <name> DROP [COLUMN] <name> {RESTRICT|CASCADE} */
-   | ALTER TABLE relation_name opt_inh_star DROP opt_column ColId drop_behavior
+/* ALTER TABLE <relation> DROP [COLUMN] name> {RESTRICT|CASCADE} */
+   | ALTER TABLE relation_expr DROP opt_column ColId drop_behavior
        {
-           $$ = cat_str(7, make_str("alter table"), $3, $4, make_str("drop"), $6, $7, $8);
+           $$ = cat_str(6, make_str("alter table"), $3, make_str("drop"), $5, $6, $7);
        }
-/* ALTER TABLE <name> ADD CONSTRAINT ... */
-   | ALTER TABLE relation_name opt_inh_star ADD TableConstraint
+/* ALTER TABLE <relation> ADD CONSTRAINT ... */
+   | ALTER TABLE relation_expr ADD TableConstraint
        {
-           $$ = cat_str(5, make_str("alter table"), $3, $4, make_str("add"), $6);
+           $$ = cat_str(4, make_str("alter table"), $3, make_str("add"), $5);
        }
-/* ALTER TABLE <name> DROP CONSTRAINT ... */
-   | ALTER TABLE relation_name opt_inh_star DROP CONSTRAINT name drop_behavior
+/* ALTER TABLE <relation> DROP CONSTRAINT ... */
+   | ALTER TABLE relation_expr DROP CONSTRAINT name drop_behavior
        {
-           $$ = cat_str(6, make_str("alter table"), $3, $4, make_str("drop constraint"), $7, $8);
+           $$ = cat_str(5, make_str("alter table"), $3, make_str("drop constraint"), $6, $7);
        }
 /* ALTER TABLE  OWNER TO UserId */     
    | ALTER TABLE relation_name OWNER TO UserId   
@@ -950,7 +952,7 @@ DEFAULT} */
        }
        ;
 
-alter_column_action:
+alter_column_default:
         SET DEFAULT a_expr { $$ = cat2_str(make_str("set default"), $3); }
         | DROP DEFAULT          { $$ = make_str("drop default"); }
         ;
@@ -1234,10 +1236,6 @@ key_reference:  NO ACTION    { $$ = make_str("no action"); }
        | SET NULL_P    { $$ = make_str("set null"); }
        ;
 
-opt_only: ONLY     { $$ = make_str("only"); }
-   | /*EMPTY*/ { $$ = EMPTY; }
-   ;
-
 OptInherit:  INHERITS '(' relation_name_list ')'                { $$ = cat_str(3, make_str("inherits ("), $3, make_str(")")); }
                 | /*EMPTY*/                    { $$ = EMPTY; }
                 ;      
@@ -2013,10 +2011,9 @@ opt_force:      FORCE        { $$ = make_str("force"); }
  *
  *****************************************************************************/
 
-RenameStmt:  ALTER TABLE relation_name opt_inh_star
-                 RENAME opt_column opt_name TO name
+RenameStmt:  ALTER TABLE relation_expr RENAME opt_column opt_name TO name
                {
-                   $$ = cat_str(8, make_str("alter table"), $3, $4, make_str("rename"), $6, $7, make_str("to"), $9);
+                   $$ = cat_str(7, make_str("alter table"), $3, make_str("rename"), $5, $6, make_str("to"), $8);
                }
        ;
 
@@ -2250,38 +2247,44 @@ ClusterStmt:  CLUSTER index_name ON relation_name
  *
  *     QUERY:
  *             vacuum
+ *             analyze
  *
  *****************************************************************************/
 
-VacuumStmt:  VACUUM opt_verbose opt_analyze
+VacuumStmt:  VACUUM opt_verbose
+               {
+                   $$ = cat_str(2, make_str("vacuum"), $2);
+               }
+       | VACUUM opt_verbose relation_name
                {
                    $$ = cat_str(3, make_str("vacuum"), $2, $3);
                }
-       | VACUUM opt_verbose opt_analyze relation_name opt_va_list
+       | VACUUM opt_verbose AnalyzeStmt
                {
-                   if ( strlen($5) > 0 && strlen($4) == 0 )
-                       mmerror(ET_ERROR, "VACUUM syntax error at or near \"(\"\n\tRelations name must be specified");
-                   $$ = cat_str(5, make_str("vacuum"), $2, $3, $4, $5);
+                   $$ = cat_str(3, make_str("vacuum"), $2, $3);
                }
        ;
 
-opt_verbose:  VERBOSE                  { $$ = make_str("verbose"); }
-       | /*EMPTY*/             { $$ = EMPTY; }
+AnalyzeStmt:  analyze_keyword opt_verbose
+               {
+                   $$ = cat_str(2, $1, $2);
+               }
+       | analyze_keyword opt_verbose relation_name opt_name_list
+               {
+                   $$ = cat_str(4, $1, $2, $3, $4);
+               }
        ;
 
-opt_analyze:  ANALYZE                  { $$ = make_str("analyze"); }
-       | ANALYSE               { $$ = make_str("analyse"); }
-       | /*EMPTY*/             { $$ = EMPTY; }
+analyze_keyword:  ANALYZE                  { $$ = make_str("analyze"); }
+       | ANALYSE                           { $$ = make_str("analyse"); }
        ;
 
-opt_va_list:  '(' va_list ')'              { $$ = cat_str(3, make_str("("), $2, make_str(")")); }
+opt_verbose:  VERBOSE                  { $$ = make_str("verbose"); }
        | /*EMPTY*/             { $$ = EMPTY; }
        ;
 
-va_list:  name
-               { $$=$1; }
-       | va_list ',' name
-               { $$=cat_str(3, $1, make_str(","), $3); }
+opt_name_list:  '(' name_list ')'      { $$ = cat_str(3, make_str("("), $2, make_str(")")); }
+       | /*EMPTY*/             { $$ = EMPTY; }
        ;
 
 
@@ -2383,9 +2386,9 @@ columnElem:  ColId opt_indirection
  *
  *****************************************************************************/
 
-DeleteStmt:  DELETE FROM opt_only name where_clause
+DeleteStmt:  DELETE FROM relation_expr where_clause
                {
-                   $$ = cat_str(4, make_str("delete from"), $3, $4, $5);
+                   $$ = cat_str(3, make_str("delete from"), $3, $4);
                }
        ;
 
@@ -2416,12 +2419,12 @@ opt_lmode:      SHARE                           { $$ = make_str("share"); }
  *
  *****************************************************************************/
 
-UpdateStmt:  UPDATE opt_only relation_name
+UpdateStmt:  UPDATE relation_expr
              SET update_target_list
              from_clause
              where_clause
                {
-                   $$ = cat_str(7, make_str("update"), $2, $3, make_str("set"), $5, $6, $7);
+                   $$ = cat_str(6, make_str("update"), $2, make_str("set"), $4, $5, $6);
                }
        ;
 
@@ -2667,10 +2670,6 @@ select_offset_value:     PosIntConst {
  * ...however, recursive addattr and rename supported.  make special
  * cases for these.
  */
-opt_inh_star:  '*'                 { $$ = make_str("*"); }
-       | /*EMPTY*/             { $$ = EMPTY; }
-       ;
-
 relation_name_list:  name_list { $$ = $1; };
 
 name_list:  name
@@ -2704,7 +2703,7 @@ opt_for_update_clause: for_update_clause                { $$ = $1; }
        | /* EMPTY */               { $$ = EMPTY; }
                 ;
 
-update_list:  OF va_list
+update_list:  OF name_list
               {
            $$ = cat2_str(make_str("of"), $2);
          }
@@ -5028,6 +5027,7 @@ TokenId:  ABSOLUTE            { $$ = make_str("absolute"); }
    | SHARE             { $$ = make_str("share"); }
    | START             { $$ = make_str("start"); }
    | STATEMENT         { $$ = make_str("statement"); }
+   | STATISTICS        { $$ = make_str("statistics"); }
    | STDIN                         { $$ = make_str("stdin"); }
    | STDOUT                        { $$ = make_str("stdout"); }
    | SYSID                         { $$ = make_str("sysid"); }


diff --git a/src/test/regress/expected/oidjoins.out b/src/test/regress/expected/oidjoins.out

index a2b0ad9e3e722827117e707ae7033a210771e9b4..46bc60f6955d60e4a52170d4b7281048b146d97d 100644 (file)


--- a/src/test/regress/expected/oidjoins.out
+++ b/src/test/regress/expected/oidjoins.out
@@ -353,12 +353,28 @@ WHERE pg_statistic.starelid != 0 AND
 -----+----------
 (0 rows)
 
-SELECT oid, pg_statistic.staop 
+SELECT oid, pg_statistic.staop1 
 FROM   pg_statistic 
-WHERE  pg_statistic.staop != 0 AND 
-   NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop);
- oid | staop 
------+-------
+WHERE  pg_statistic.staop1 != 0 AND 
+   NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop1);
+ oid | staop1 
+-----+--------
+(0 rows)
+
+SELECT oid, pg_statistic.staop2 
+FROM   pg_statistic 
+WHERE  pg_statistic.staop2 != 0 AND 
+   NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop2);
+ oid | staop2 
+-----+--------
+(0 rows)
+
+SELECT oid, pg_statistic.staop3 
+FROM   pg_statistic 
+WHERE  pg_statistic.staop3 != 0 AND 
+   NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop3);
+ oid | staop3 
+-----+--------
 (0 rows)
 
 SELECT oid, pg_trigger.tgrelid 


diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out

index 9d4ff1b39856b3d468938ef709578649fe4d84ce..1b094a6e3bfe2f58a8e7b108c0088131a36feb35 100644 (file)


--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -482,8 +482,8 @@ WHERE p1.aggtransfn = p2.oid AND
           (p2.pronargs = 1 AND p1.aggbasetype = 0)));
   oid  | aggname | oid |   proname   
 -------+---------+-----+-------------
- 16997 | max     | 768 | int4larger
- 17011 | min     | 769 | int4smaller
+ 17010 | max     | 768 | int4larger
+ 17024 | min     | 769 | int4smaller
 (2 rows)
 
 -- Cross-check finalfn (if present) against its entry in pg_proc.


diff --git a/src/test/regress/sql/oidjoins.sql b/src/test/regress/sql/oidjoins.sql

index b7ea1f63eaa8268d3583a670e9f3985619be0453..88727a6c76ec6922fc12f4456fba2dc650570f0a 100644 (file)


--- a/src/test/regress/sql/oidjoins.sql
+++ b/src/test/regress/sql/oidjoins.sql
@@ -177,10 +177,18 @@ SELECT    oid, pg_statistic.starelid
 FROM   pg_statistic 
 WHERE  pg_statistic.starelid != 0 AND 
    NOT EXISTS(SELECT * FROM pg_class AS t1 WHERE t1.oid = pg_statistic.starelid);
-SELECT oid, pg_statistic.staop 
+SELECT oid, pg_statistic.staop1 
 FROM   pg_statistic 
-WHERE  pg_statistic.staop != 0 AND 
-   NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop);
+WHERE  pg_statistic.staop1 != 0 AND 
+   NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop1);
+SELECT oid, pg_statistic.staop2 
+FROM   pg_statistic 
+WHERE  pg_statistic.staop2 != 0 AND 
+   NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop2);
+SELECT oid, pg_statistic.staop3 
+FROM   pg_statistic 
+WHERE  pg_statistic.staop3 != 0 AND 
+   NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop3);
 SELECT oid, pg_trigger.tgrelid 
 FROM   pg_trigger 
 WHERE  pg_trigger.tgrelid != 0 AND 
     linkend="SQL-CREATETABLE" endterm="SQL-CREATETABLE-title">. 
-   The OWNER clause chnages the owner of the table to the user 
+   The OWNER clause changes the owner of the table to the user 
     new user.
    
  
@@ -190,10 +197,11 @@ ALTER TABLE table
     
  
     
-    In the current implementation, default and constraint clauses for the
+    In the current implementation of ADD COLUMN,
+    default and constraint clauses for the
      new column will be ignored. You can use the SET DEFAULT
      form of ALTER TABLE to set the default later.
-    (You will also have to update the already existing rows to the
+    (You may also want to update the already existing rows to the
      new default value, using 
     endterm="sql-update-title">.)
    
@@ -210,7 +218,7 @@ ALTER TABLE table
 
    
     You must own the table in order to change it.
-    Renaming any  part  of  the schema of a system
+    Changing any  part  of  the schema of a system
     catalog is not permitted.
     The PostgreSQL User's Guide has further
     information on inheritance.
      endterm="sql-update-title">.)
     
@@ -210,7 +218,7 @@ ALTER TABLE table
  
     
      You must own the table in order to change it.
-    Renaming any  part  of  the schema of a system
+    Changing any  part  of  the schema of a system
      catalog is not permitted.
      The PostgreSQL User's Guide has further
      information on inheritance.
diff --git a/doc/src/sgml/ref/analyze.sgml b/doc/src/sgml/ref/analyze.sgml

new file mode 100644 (file)

index 0000000..57d3213
--- /dev/null
+++ b/doc/src/sgml/ref/analyze.sgml
@@ -0,0 +1,219 @@
+
+
+
+ 
+  
+   ANALYZE
+  
+  SQL - Language Statements
+ 
+ 
+  
+   ANALYZE
+  
+  
+   Collect statistics about a Postgres database
+  
+ 
+ 
+  
+   2001-05-04
+  
+  
+ANALYZE [ VERBOSE ] [ table [ (column [, ...] ) ] ]
+  
+
+  
+   
+    2001-05-04
+   
+   </div>
<div class="diff add">+    Inputs</div>
<div class="diff add">+   
+
+   
+    
+     
+      VERBOSE
+      
+       
+   Enables display of progress messages.
+       
+      
+     
+     
+      table
+      
+       
+   The name of a specific table to analyze. Defaults to all tables.
+       
+      
+     
+     
+      column
+      
+       
+   The name of a specific column to analyze. Defaults to all columns.
+       
+      
+     
+    
+   
+  
+
+  
+   
+    2001-05-04
+   
+   </div>
<div class="diff add">+    Outputs</div>
<div class="diff add">+   
+   
+
+    
+     
+      
+ANALYZE
+       
+      
+       
+   The command is complete.
+       
+      
+     
+
+    
+   
+  
+ 
+
+ 
+  
+   2001-05-04
+  
+  </div>
<div class="diff add">+   Description</div>
<div class="diff add">+  
+  
+   ANALYZE collects statistics about the contents of
+   Postgres tables, and stores the results in
+   the system table pg_statistic.  Subsequently,
+   the query planner uses the statistics to help determine the most efficient
+   execution plans for queries.
+  
+
+  
+   With no parameter, ANALYZE examines every table in the
+   current database.  With a parameter, ANALYZE examines
+   only that table.  It is further possible to give a list of column names,
+   in which case only the statistics for those columns are updated.
+  
+
+  
+   
+    2001-05-04
+   
+   </div>
<div class="diff add">+    Notes</div>
<div class="diff add">+   
+
+  
+   It is a good idea to run ANALYZE periodically, or
+   just after making major changes in the contents of a table.  Accurate
+   statistics will help the planner to choose the most appropriate query
+   plan, and thereby improve the speed of query processing.  A common
+   strategy is to run VACUUM and ANALYZE
+   once a day during a low-usage time of day.
+  
+
+  
+   Unlike ,
+   ANALYZE requires
+   only a read lock on the target table, so it can run in parallel with
+   other activity on the table.
+  
+
+  
+   For large tables, ANALYZE takes a random sample of the
+   table contents, rather than examining every row.  This allows even very
+   large tables to be analyzed in a small amount of time.  Note however
+   that the statistics are only approximate, and will change slightly each
+   time ANALYZE is run, even if the actual table contents
+   did not change.  This may result in small changes in the planner's
+   estimated costs shown by EXPLAIN.
+  
+
+  
+   The collected statistics usually include a list of some of the most common
+   values in each column and a histogram showing the approximate data
+   distribution in each column.  One or both of these may be omitted if
+   ANALYZE deems them uninteresting (for example, in
+   a unique-key column, there are no common values) or if the column
+   datatype does not support the appropriate operators.
+  
+
+  
+   The extent of analysis can be controlled by adjusting the per-column
+   statistics target with ALTER TABLE ALTER COLUMN SET
+   STATISTICS (see
+   ).  The
+   target value sets the maximum number of entries in the most-common-value
+   list and the maximum number of bins in the histogram.  The default
+   target value is 10, but this can be adjusted up or down to trade off
+   accuracy of planner estimates against the time taken for
+   ANALYZE and the
+   amount of space occupied in pg_statistic.
+   In particular, setting the statistics target to zero disables collection of
+   statistics for that column.  It may be useful to do that for columns that
+   are never used as part of the WHERE, GROUP BY, or ORDER BY clauses of
+   queries, since the planner will have no use for statistics on such columns.
+  
+
+  
+   The largest statistics target among the columns being analyzed determines
+   the number of table rows sampled to prepare the statistics.  Increasing
+   the target causes a proportional increase in the time and space needed
+   to do ANALYZE.
+  
+
+  
+ 
+
+ 
+  </div>
<div class="diff add">+   Compatibility</div>
<div class="diff add">+  
+
+  
+   
+    2001-05-04
+   
+   </div>
<div class="diff add">+    SQL92</div>
<div class="diff add">+   
+   
+    There is no ANALYZE statement in SQL92.
+   
+  
+ 
+
+
+
diff --git a/doc/src/sgml/ref/vacuum.sgml b/doc/src/sgml/ref/vacuum.sgml

index 51cb8a9ffdacfe41115a94d41b11e97fa1e6b6f9..cbb182466ea44d231b4271f54f2c14da9534307b 100644 (file)
--- a/doc/src/sgml/ref/vacuum.sgml
+++ b/doc/src/sgml/ref/vacuum.sgml
@@ -1,5 +1,5 @@
  
  
@@ -15,15 +15,15 @@ Postgres documentation
     VACUUM
    
    
-   Clean and analyze a Postgres database
+   Clean and optionally analyze a Postgres database
    
   
   
    
-   1999-07-20
+   2001-05-04
    
    
-VACUUM [ VERBOSE ] [ ANALYZE ] [ table ]
+VACUUM [ VERBOSE ] [ table ]
  VACUUM [ VERBOSE ] ANALYZE [ table [ (column [, ...] ) ] ]
    
  
@@ -49,7 +49,7 @@ VACUUM [ VERBOSE ] ANALYZE [ table
        ANALYZE
        
         
-   Updates column statistics used by the optimizer to
+   Updates statistics used by the optimizer to
     determine the most efficient way to execute a query.
         
        
@@ -90,7 +90,7 @@ VACUUM [ VERBOSE ] ANALYZE [ table
         
        
         
-   The command has been accepted and the database is being cleaned.
+   The command is complete.
         
        
       
@@ -144,28 +144,26 @@ NOTICE:  Index index: Pages 28;
     Description
    
    
-   VACUUM serves two purposes in 
-   Postgres as both a means to reclaim storage and
-   also a means to collect information for the optimizer.
+   VACUUM reclaims storage occupied by deleted tuples.
+   In normal Postgres operation, tuples that
+   are DELETEd or obsoleted by UPDATE are not physically removed from
+   their table; they remain present until a VACUUM is
+   done.  Therefore it's necessary to do VACUUM
+   periodically, especially on frequently-updated tables.
    
  
    
-   VACUUM opens every table in the database,
-   cleans out records from rolled back transactions, and updates statistics in the
-   system catalogs.  The statistics maintained include the number of
-   tuples and number of pages stored in all tables.
-  
-
-
-  
-   VACUUM ANALYZE collects statistics representing the
-   dispersion of the data in each column.
-   This information is valuable when several query execution paths are possible.
+   With no parameter, VACUUM processes every table in the
+   current database.  With a parameter, VACUUM processes
+   only that table.
    
  
    
-   Running VACUUM
-   periodically will increase the speed of the database in processing user queries.
+   VACUUM ANALYZE performs a VACUUM
+   and then an ANALYZE for each selected table.  This
+   is a handy combination form for routine maintenance scripts.  See
+   
+   for more details about its processing.
    
  
    
@@ -175,16 +173,15 @@ NOTICE:  Index index: Pages 28;
     </div>
<div class="diff ctx">     Notes</div>
<div class="diff ctx">    
-   
-    The open database is the target for VACUUM.
-   
+
     
      We recommend that active production databases be
      VACUUM-ed nightly, in order to remove
      expired rows. After copying a large table into
      Postgres or after deleting a large number
      of records, it may be a good idea to issue a VACUUM
-    ANALYZE query. This will update the system catalogs with
+    ANALYZE command for the affected table. This will update the
+    system catalogs with
      the results of all recent changes, and allow the
      Postgres query optimizer to make better
      choices in planning user queries.
diff --git a/doc/src/sgml/reference.sgml b/doc/src/sgml/reference.sgml

index b92ee0868d029cf48443f4240fab5224bc958862..9a977a6515c97db601f13f5f43413bc3e81a46c8 100644 (file)
--- a/doc/src/sgml/reference.sgml
+++ b/doc/src/sgml/reference.sgml
@@ -1,5 +1,5 @@
  
@@ -26,6 +26,7 @@ PostgreSQL Reference Manual
     &alterGroup;
     &alterTable;
     &alterUser;
+   &analyze;
     &begin;
     &checkpoint;
     &close;
diff --git a/doc/src/sgml/xoper.sgml b/doc/src/sgml/xoper.sgml

index d38e78a4e1af22651531a65d320f427ea71b175b..57d8bb79c28d69da43ce1897f0dacb4f3dd1a56b 100644 (file)
--- a/doc/src/sgml/xoper.sgml
+++ b/doc/src/sgml/xoper.sgml
@@ -1,5 +1,5 @@
  
  
   
@@ -244,7 +244,7 @@ SELECT (a + b) AS c FROM test_complex;
      only a small fraction.  '<' will accept a fraction that depends on
      where the given constant falls in the range of values for that table
      column (which, it just so happens, is information collected by
-    VACUUM ANALYZE and made available to the selectivity estimator).
+    ANALYZE and made available to the selectivity estimator).
      '<=' will accept a slightly larger fraction than '<' for the same
      comparison constant, but they're close enough to not be worth
      distinguishing, especially since we're not likely to do better than a
diff --git a/src/backend/access/common/tupdesc.c b/src/backend/access/common/tupdesc.c

index 769f754b6690919606bdaaf8a016260382abdef8..86d704e8d08779e32b38e3d4d4f938072adeccf7 100644 (file)
--- a/src/backend/access/common/tupdesc.c
+++ b/src/backend/access/common/tupdesc.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/access/common/tupdesc.c,v 1.73 2001/03/22 06:16:06 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/access/common/tupdesc.c,v 1.74 2001/05/07 00:43:15 tgl Exp $
   *
   * NOTES
   *   some of the executor utility code such as "ExecTypeFromTL" should be
@@ -237,16 +237,16 @@ equalTupleDescs(TupleDesc tupdesc1, TupleDesc tupdesc2)
         Form_pg_attribute attr2 = tupdesc2->attrs[i];
  
         /*
-        * We do not need to check every single field here, and in fact
-        * some fields such as attdispersion probably shouldn't be
-        * compared.  We can also disregard attnum (it was used to place
-        * the row in the attrs array) and everything derived from the
-        * column datatype.
+        * We do not need to check every single field here: we can disregard
+        * attrelid, attnum (it was used to place the row in the attrs array)
+        * and everything derived from the column datatype.
          */
         if (strcmp(NameStr(attr1->attname), NameStr(attr2->attname)) != 0)
             return false;
         if (attr1->atttypid != attr2->atttypid)
             return false;
+       if (attr1->attstattarget != attr2->attstattarget)
+           return false;
         if (attr1->atttypmod != attr2->atttypmod)
             return false;
         if (attr1->attstorage != attr2->attstorage)
@@ -365,12 +365,12 @@ TupleDescInitEntry(TupleDesc desc,
     else
         MemSet(NameStr(att->attname), 0, NAMEDATALEN);
  
-   att->attdispersion = 0;     /* dummy value */
+   att->attstattarget = 0;
     att->attcacheoff = -1;
     att->atttypmod = typmod;
  
     att->attnum = attributeNumber;
-   att->attnelems = attdim;
+   att->attndims = attdim;
     att->attisset = attisset;
  
     att->attnotnull = false;
@@ -506,7 +506,7 @@ TupleDescMakeSelfReference(TupleDesc desc,
     att->attbyval = true;
     att->attalign = 'i';
     att->attstorage = 'p';
-   att->attnelems = 0;
+   att->attndims = 0;
  }
  
  /* ----------------------------------------------------------------
diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c

index 1c5577b88a066a1ecebfd6ce317147efc28d489c..06010896821e5caa9627c17f6328239ec3c277b6 100644 (file)
--- a/src/backend/access/gist/gist.c
+++ b/src/backend/access/gist/gist.c
@@ -6,7 +6,7 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/access/gist/gist.c,v 1.72 2001/03/22 03:59:12 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/access/gist/gist.c,v 1.73 2001/05/07 00:43:15 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -84,8 +84,8 @@ static void gist_dumptree(Relation r, int level, BlockNumber blk, OffsetNumber c
  #endif
  
  /*
-** routine to build an index.  Basically calls insert over and over
-*/
+ * routine to build an index.  Basically calls insert over and over
+ */
  Datum
  gistbuild(PG_FUNCTION_ARGS)
  {
@@ -105,7 +105,7 @@ gistbuild(PG_FUNCTION_ARGS)
                 itupdesc;
     Datum       attdata[INDEX_MAX_KEYS];
     char        nulls[INDEX_MAX_KEYS];
-   int         nhtups,
+   double      nhtups,
                 nitups;
     Node       *pred = indexInfo->ii_Predicate;
  
@@ -172,7 +172,7 @@ gistbuild(PG_FUNCTION_ARGS)
  #endif  /* OMIT_PARTIAL_INDEX */
  
     /* build the index */
-   nhtups = nitups = 0;
+   nhtups = nitups = 0.0;
  
     compvec = (bool *) palloc(sizeof(bool) * indexInfo->ii_NumIndexAttrs);
  
@@ -183,7 +183,7 @@ gistbuild(PG_FUNCTION_ARGS)
     {
         MemoryContextReset(econtext->ecxt_per_tuple_memory);
  
-       nhtups++;
+       nhtups += 1.0;
  
  #ifndef OMIT_PARTIAL_INDEX
  
@@ -196,7 +196,7 @@ gistbuild(PG_FUNCTION_ARGS)
             slot->val = htup;
             if (ExecQual((List *) oldPred, econtext, false))
             {
-               nitups++;
+               nitups += 1.0;
                 continue;
             }
         }
@@ -213,7 +213,7 @@ gistbuild(PG_FUNCTION_ARGS)
         }
  #endif  /* OMIT_PARTIAL_INDEX */
  
-       nitups++;
+       nitups += 1.0;
  
         /*
          * For the current heap tuple, extract all the attributes we use
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c

index aa76ba232a05c21da94012fbefbc287924aa154f..9617fcc33a6a0bb5bf4556944cc433be26ad0331 100644 (file)
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.50 2001/03/22 03:59:12 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.51 2001/05/07 00:43:15 tgl Exp $
   *
   * NOTES
   *   This file contains only the public interface routines.
@@ -57,7 +57,7 @@ hashbuild(PG_FUNCTION_ARGS)
                 itupdesc;
     Datum       attdata[INDEX_MAX_KEYS];
     char        nulls[INDEX_MAX_KEYS];
-   int         nhtups,
+   double      nhtups,
                 nitups;
     HashItem    hitem;
     Node       *pred = indexInfo->ii_Predicate;
@@ -109,7 +109,7 @@ hashbuild(PG_FUNCTION_ARGS)
  #endif  /* OMIT_PARTIAL_INDEX */
  
     /* build the index */
-   nhtups = nitups = 0;
+   nhtups = nitups = 0.0;
  
     /* start a heap scan */
     hscan = heap_beginscan(heap, 0, SnapshotNow, 0, (ScanKey) NULL);
@@ -118,7 +118,7 @@ hashbuild(PG_FUNCTION_ARGS)
     {
         MemoryContextReset(econtext->ecxt_per_tuple_memory);
  
-       nhtups++;
+       nhtups += 1.0;
  
  #ifndef OMIT_PARTIAL_INDEX
  
@@ -131,7 +131,7 @@ hashbuild(PG_FUNCTION_ARGS)
             slot->val = htup;
             if (ExecQual((List *) oldPred, econtext, false))
             {
-               nitups++;
+               nitups += 1.0;
                 continue;
             }
         }
@@ -148,7 +148,7 @@ hashbuild(PG_FUNCTION_ARGS)
         }
  #endif  /* OMIT_PARTIAL_INDEX */
  
-       nitups++;
+       nitups += 1.0;
  
         /*
          * For the current heap tuple, extract all the attributes we use
diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c

index fb509ab66de99d90fcdab322dd36af40551316d1..2a9df577b10c56de723c68ae329e47847849fb71 100644 (file)
--- a/src/backend/access/heap/tuptoaster.c
+++ b/src/backend/access/heap/tuptoaster.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/access/heap/tuptoaster.c,v 1.21 2001/03/25 00:45:20 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/access/heap/tuptoaster.c,v 1.22 2001/05/07 00:43:15 tgl Exp $
   *
   *
   * INTERFACE ROUTINES
@@ -166,6 +166,43 @@ heap_tuple_untoast_attr(varattrib *attr)
  }
  
  
+/* ----------
+ * toast_raw_datum_size -
+ *
+ * Return the raw (detoasted) size of a varlena datum
+ * ----------
+ */
+Size
+toast_raw_datum_size(Datum value)
+{
+   varattrib  *attr = (varattrib *) DatumGetPointer(value);
+   Size        result;
+
+   if (VARATT_IS_COMPRESSED(attr))
+   {
+       /*
+        * va_rawsize shows the original data size, whether the datum
+        * is external or not.
+        */
+       result = attr->va_content.va_compressed.va_rawsize + VARHDRSZ;
+   }
+   else if (VARATT_IS_EXTERNAL(attr))
+   {
+       /*
+        * an uncompressed external attribute has rawsize including the
+        * header (not too consistent!)
+        */
+       result = attr->va_content.va_external.va_rawsize;
+   }
+   else
+   {
+       /* plain untoasted datum */
+       result = VARSIZE(attr);
+   }
+   return result;
+}
+
+
  /* ----------
   * toast_delete -
   *
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c

index 97d99da4fde7bbbfe009c7c7baf04dc557390cd9..f456e0c9306f4f3c191d75172463bf852e905041 100644 (file)
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -12,7 +12,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.79 2001/03/22 03:59:15 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.80 2001/05/07 00:43:16 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -69,7 +69,7 @@ btbuild(PG_FUNCTION_ARGS)
                 itupdesc;
     Datum       attdata[INDEX_MAX_KEYS];
     char        nulls[INDEX_MAX_KEYS];
-   int         nhtups,
+   double      nhtups,
                 nitups;
     Node       *pred = indexInfo->ii_Predicate;
  
@@ -156,7 +156,7 @@ btbuild(PG_FUNCTION_ARGS)
  #endif  /* OMIT_PARTIAL_INDEX */
  
     /* build the index */
-   nhtups = nitups = 0;
+   nhtups = nitups = 0.0;
  
     if (usefast)
     {
@@ -196,7 +196,7 @@ btbuild(PG_FUNCTION_ARGS)
  
         MemoryContextReset(econtext->ecxt_per_tuple_memory);
  
-       nhtups++;
+       nhtups += 1.0;
  
  #ifndef OMIT_PARTIAL_INDEX
  
@@ -209,7 +209,7 @@ btbuild(PG_FUNCTION_ARGS)
             slot->val = htup;
             if (ExecQual((List *) oldPred, econtext, false))
             {
-               nitups++;
+               nitups += 1.0;
                 continue;
             }
         }
@@ -226,7 +226,7 @@ btbuild(PG_FUNCTION_ARGS)
         }
  #endif  /* OMIT_PARTIAL_INDEX */
  
-       nitups++;
+       nitups += 1.0;
  
         /*
          * For the current heap tuple, extract all the attributes we use
diff --git a/src/backend/access/rtree/rtree.c b/src/backend/access/rtree/rtree.c

index 3752a59e99a3259dcef8feb7660927baf8308a4a..a8c6a13ea3c14626245bad59e372b66b0d5c25e2 100644 (file)
--- a/src/backend/access/rtree/rtree.c
+++ b/src/backend/access/rtree/rtree.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.61 2001/03/22 03:59:16 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.62 2001/05/07 00:43:16 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -100,7 +100,7 @@ rtbuild(PG_FUNCTION_ARGS)
                 itupdesc;
     Datum       attdata[INDEX_MAX_KEYS];
     char        nulls[INDEX_MAX_KEYS];
-   int         nhtups,
+   double      nhtups,
                 nitups;
     Node       *pred = indexInfo->ii_Predicate;
  
@@ -163,7 +163,7 @@ rtbuild(PG_FUNCTION_ARGS)
  #endif  /* OMIT_PARTIAL_INDEX */
  
     /* count the tuples as we insert them */
-   nhtups = nitups = 0;
+   nhtups = nitups = 0.0;
  
     /* start a heap scan */
     hscan = heap_beginscan(heap, 0, SnapshotNow, 0, (ScanKey) NULL);
@@ -172,7 +172,7 @@ rtbuild(PG_FUNCTION_ARGS)
     {
         MemoryContextReset(econtext->ecxt_per_tuple_memory);
  
-       nhtups++;
+       nhtups += 1.0;
  
  #ifndef OMIT_PARTIAL_INDEX
  
@@ -185,7 +185,7 @@ rtbuild(PG_FUNCTION_ARGS)
             slot->val = htup;
             if (ExecQual((List *) oldPred, econtext, false))
             {
-               nitups++;
+               nitups += 1.0;
                 continue;
             }
         }
@@ -202,7 +202,7 @@ rtbuild(PG_FUNCTION_ARGS)
         }
  #endif  /* OMIT_PARTIAL_INDEX */
  
-       nitups++;
+       nitups += 1.0;
  
         /*
          * For the current heap tuple, extract all the attributes we use
diff --git a/src/backend/catalog/genbki.sh b/src/backend/catalog/genbki.sh

index c2993fa8fc6b474bc13badd0c4369ca56fdbb9d4..cac53f3e0853262c213239e698170311a6ee8e1c 100644 (file)
--- a/src/backend/catalog/genbki.sh
+++ b/src/backend/catalog/genbki.sh
@@ -10,7 +10,7 @@
  #
  #
  # IDENTIFICATION
-#    $Header: /cvsroot/pgsql/src/backend/catalog/Attic/genbki.sh,v 1.19 2001/01/16 22:48:34 tgl Exp $
+#    $Header: /cvsroot/pgsql/src/backend/catalog/Attic/genbki.sh,v 1.20 2001/05/07 00:43:16 tgl Exp $
  #
  # NOTES
  #    non-essential whitespace is removed from the generated file.
@@ -126,10 +126,12 @@ for dir in $INCLUDE_DIRS; do
      fi
  done
  
-# Get INDEX_MAX_KEYS from config.h (who needs consistency?)
+# Get INDEX_MAX_KEYS and DEFAULT_ATTSTATTARGET from config.h
+# (who needs consistency?)
  for dir in $INCLUDE_DIRS; do
      if [ -f "$dir/config.h" ]; then
          INDEXMAXKEYS=`grep '#define[   ]*INDEX_MAX_KEYS' $dir/config.h | $AWK '{ print $3 }'`
+        DEFAULTATTSTATTARGET=`grep '#define[   ]*DEFAULT_ATTSTATTARGET' $dir/config.h | $AWK '{ print $3 }'`
          break
      fi
  done
@@ -168,6 +170,7 @@ sed -e "s/;[    ]*$//g" \
      -e "s/(NameData/(name/g" \
      -e "s/(Oid/(oid/g" \
      -e "s/NAMEDATALEN/$NAMEDATALEN/g" \
+    -e "s/DEFAULT_ATTSTATTARGET/$DEFAULTATTSTATTARGET/g" \
      -e "s/INDEX_MAX_KEYS\*2/$INDEXMAXKEYS2/g" \
      -e "s/INDEX_MAX_KEYS\*4/$INDEXMAXKEYS4/g" \
      -e "s/INDEX_MAX_KEYS/$INDEXMAXKEYS/g" \
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c

index 54867d51a4b631241e649453750b03ee0c1aeef4..03f16e11c3f3710b2589d8e7330bfd0a2bb386b8 100644 (file)
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/catalog/heap.c,v 1.162 2001/03/22 06:16:10 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/catalog/heap.c,v 1.163 2001/05/07 00:43:17 tgl Exp $
   *
   *
   * INTERFACE ROUTINES
@@ -96,54 +96,72 @@ static void RemoveStatistics(Relation rel);
  
  /*
   * Note:
- *     Should the executor special case these attributes in the future?
- *     Advantage:  consume 1/2 the space in the ATTRIBUTE relation.
- *     Disadvantage:  having rules to compute values in these tuples may
- *             be more difficult if not impossible.
+ *     Should the system special case these attributes in the future?
+ *     Advantage:  consume much less space in the ATTRIBUTE relation.
+ *     Disadvantage:  special cases will be all over the place.
   */
  
  static FormData_pg_attribute a1 = {
-   0xffffffff, {"ctid"}, TIDOID, 0, sizeof(ItemPointerData),
-   SelfItemPointerAttributeNumber, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0'
+   0, {"ctid"}, TIDOID, 0, sizeof(ItemPointerData),
+   SelfItemPointerAttributeNumber, 0, -1, -1,
+   false, 'p', false, 'i', false, false
  };
  
  static FormData_pg_attribute a2 = {
-   0xffffffff, {"oid"}, OIDOID, 0, sizeof(Oid),
-   ObjectIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+   0, {"oid"}, OIDOID, 0, sizeof(Oid),
+   ObjectIdAttributeNumber, 0, -1, -1,
+   true, 'p', false, 'i', false, false
  };
  
  static FormData_pg_attribute a3 = {
-   0xffffffff, {"xmin"}, XIDOID, 0, sizeof(TransactionId),
-   MinTransactionIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+   0, {"xmin"}, XIDOID, 0, sizeof(TransactionId),
+   MinTransactionIdAttributeNumber, 0, -1, -1,
+   true, 'p', false, 'i', false, false
  };
  
  static FormData_pg_attribute a4 = {
-   0xffffffff, {"cmin"}, CIDOID, 0, sizeof(CommandId),
-   MinCommandIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+   0, {"cmin"}, CIDOID, 0, sizeof(CommandId),
+   MinCommandIdAttributeNumber, 0, -1, -1,
+   true, 'p', false, 'i', false, false
  };
  
  static FormData_pg_attribute a5 = {
-   0xffffffff, {"xmax"}, XIDOID, 0, sizeof(TransactionId),
-   MaxTransactionIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+   0, {"xmax"}, XIDOID, 0, sizeof(TransactionId),
+   MaxTransactionIdAttributeNumber, 0, -1, -1,
+   true, 'p', false, 'i', false, false
  };
  
  static FormData_pg_attribute a6 = {
-   0xffffffff, {"cmax"}, CIDOID, 0, sizeof(CommandId),
-   MaxCommandIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+   0, {"cmax"}, CIDOID, 0, sizeof(CommandId),
+   MaxCommandIdAttributeNumber, 0, -1, -1,
+   true, 'p', false, 'i', false, false
  };
  
  /*
-   We decide to call this attribute "tableoid" rather than say
-"classoid" on the basis that in the future there may be more than one
-table of a particular class/type. In any case table is still the word
-used in SQL.
-*/
+ * We decided to call this attribute "tableoid" rather than say
+ * "classoid" on the basis that in the future there may be more than one
+ * table of a particular class/type. In any case table is still the word
+ * used in SQL.
+ */
  static FormData_pg_attribute a7 = {
-   0xffffffff, {"tableoid"}, OIDOID, 0, sizeof(Oid),
-   TableOidAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+   0, {"tableoid"}, OIDOID, 0, sizeof(Oid),
+   TableOidAttributeNumber, 0, -1, -1,
+   true, 'p', false, 'i', false, false
  };
  
-static Form_pg_attribute HeapAtt[] = {&a1, &a2, &a3, &a4, &a5, &a6, &a7};
+static Form_pg_attribute SysAtt[] = {&a1, &a2, &a3, &a4, &a5, &a6, &a7};
+
+/*
+ * This function returns a Form_pg_attribute pointer for a system attribute.
+ */
+Form_pg_attribute
+SystemAttributeDefinition(AttrNumber attno)
+{
+   if (attno >= 0 || attno < - (int) lengthof(SysAtt))
+       elog(ERROR, "SystemAttributeDefinition: invalid attribute number %d",
+            attno);
+   return SysAtt[-attno - 1];
+}
  
  /* ----------------------------------------------------------------
   *             XXX END OF UGLY HARD CODED BADNESS XXX
@@ -380,32 +398,6 @@ heap_storage_create(Relation rel)
   *     8) the relations are closed and the new relation's oid
   *        is returned.
   *
- * old comments:
- *     A new relation is inserted into the RELATION relation
- *     with the specified attribute(s) (newly inserted into
- *     the ATTRIBUTE relation).  How does concurrency control
- *     work?  Is it automatic now?  Expects the caller to have
- *     attname, atttypid, atttyparg, attproc, and attlen domains filled.
- *     Create fills the attnum domains sequentually from zero,
- *     fills the attdispersion domains with zeros, and fills the
- *     attrelid fields with the relid.
- *
- *     scan relation catalog for name conflict
- *     scan type catalog for typids (if not arg)
- *     create and insert attribute(s) into attribute catalog
- *     create new relation
- *     insert new relation into attribute catalog
- *
- *     Should coordinate with heap_create_with_catalog(). Either
- *     it should not be called or there should be a way to prevent
- *     the relation from being removed at the end of the
- *     transaction if it is successful ('u'/'r' may be enough).
- *     Also, if the transaction does not commit, then the
- *     relation should be removed.
- *
- *     XXX amcreate ignores "off" when inserting (for now).
- *     XXX amcreate (like the other utilities) needs to understand indexes.
- *
   * ----------------------------------------------------------------
   */
  
@@ -432,14 +424,14 @@ CheckAttributeNames(TupleDesc tupdesc)
      */
     for (i = 0; i < natts; i++)
     {
-       for (j = 0; j < (int) (sizeof(HeapAtt) / sizeof(HeapAtt[0])); j++)
+       for (j = 0; j < (int) lengthof(SysAtt); j++)
         {
-           if (strcmp(NameStr(HeapAtt[j]->attname),
+           if (strcmp(NameStr(SysAtt[j]->attname),
                        NameStr(tupdesc->attrs[i]->attname)) == 0)
             {
                 elog(ERROR, "Attribute '%s' has a name conflict"
                      "\n\tName matches an existing system attribute",
-                    NameStr(HeapAtt[j]->attname));
+                    NameStr(SysAtt[j]->attname));
             }
         }
         if (tupdesc->attrs[i]->atttypid == UNKNOWNOID)
@@ -574,7 +566,7 @@ AddNewAttributeTuples(Oid new_rel_oid,
         /* Fill in the correct relation OID */
         (*dpp)->attrelid = new_rel_oid;
         /* Make sure these are OK, too */
-       (*dpp)->attdispersion = 0;
+       (*dpp)->attstattarget = DEFAULT_ATTSTATTARGET;
         (*dpp)->attcacheoff = -1;
  
         tup = heap_addheader(Natts_pg_attribute,
@@ -593,14 +585,14 @@ AddNewAttributeTuples(Oid new_rel_oid,
     /*
      * next we add the system attributes..
      */
-   dpp = HeapAtt;
+   dpp = SysAtt;
     for (i = 0; i < -1 - FirstLowInvalidHeapAttributeNumber; i++)
     {
         /* Fill in the correct relation OID */
         /* HACK: we are writing on static data here */
         (*dpp)->attrelid = new_rel_oid;
         /* Unneeded since they should be OK in the constant data anyway */
-       /* (*dpp)->attdispersion = 0; */
+       /* (*dpp)->attstattarget = 0; */
         /* (*dpp)->attcacheoff = -1; */
  
         tup = heap_addheader(Natts_pg_attribute,
@@ -669,8 +661,23 @@ AddNewRelationTuple(Relation pg_class_desc,
      * save. (NOTE: CREATE INDEX inserts the same bogus estimates if it
      * finds the relation has 0 rows and pages. See index.c.)
      */
-   new_rel_reltup->relpages = 10;      /* bogus estimates */
-   new_rel_reltup->reltuples = 1000;
+   switch (relkind)
+   {
+       case RELKIND_RELATION:
+       case RELKIND_INDEX:
+       case RELKIND_TOASTVALUE:
+           new_rel_reltup->relpages = 10;  /* bogus estimates */
+           new_rel_reltup->reltuples = 1000;
+           break;
+       case RELKIND_SEQUENCE:
+           new_rel_reltup->relpages = 1;
+           new_rel_reltup->reltuples = 1;
+           break;
+       default:                /* views, etc */
+           new_rel_reltup->relpages = 0;
+           new_rel_reltup->reltuples = 0;
+           break;
+   }
  
     new_rel_reltup->relowner = GetUserId();
     new_rel_reltup->reltype = new_type_oid;
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c

index 2adb30e1ed8ecf91d12c0028495b8911ece7068d..5eefab114891fdc1b2bbcc7b407d6c96ac3c75ca 100644 (file)
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/catalog/index.c,v 1.145 2001/04/02 14:34:25 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/catalog/index.c,v 1.146 2001/05/07 00:43:17 tgl Exp $
   *
   *
   * INTERFACE ROUTINES
@@ -55,7 +55,7 @@
   */
  #define AVG_ATTR_SIZE 8
  #define NTUPLES_PER_PAGE(natts) \
-   ((BLCKSZ - MAXALIGN(sizeof (PageHeaderData))) / \
+   ((BLCKSZ - MAXALIGN(sizeof(PageHeaderData))) / \
     ((natts) * AVG_ATTR_SIZE + MAXALIGN(sizeof(HeapTupleHeaderData))))
  
  /* non-export function prototypes */
@@ -98,39 +98,6 @@ IsReindexProcessing(void)
     return reindexing;
  }
  
-/* ----------------------------------------------------------------
- *   sysatts is a structure containing attribute tuple forms
- *   for system attributes (numbered -1, -2, ...).  This really
- *   should be generated or eliminated or moved elsewhere. -cim 1/19/91
- *
- * typedef struct FormData_pg_attribute {
- *     Oid             attrelid;
- *     NameData        attname;
- *     Oid             atttypid;
- *     uint32          attnvals;
- *     int16           attlen;
- *     AttrNumber      attnum;
- *     uint32          attnelems;
- *     int32           attcacheoff;
- *     int32           atttypmod;
- *     bool            attbyval;
- *     bool            attisset;
- *     char            attalign;
- *     bool            attnotnull;
- *     bool            atthasdef;
- * } FormData_pg_attribute;
- *
- * ----------------------------------------------------------------
- */
-static FormData_pg_attribute sysatts[] = {
-   {0, {"ctid"}, TIDOID, 0, 6, -1, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0'},
-   {0, {"oid"}, OIDOID, 0, 4, -2, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'},
-   {0, {"xmin"}, XIDOID, 0, 4, -3, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'},
-   {0, {"cmin"}, CIDOID, 0, 4, -4, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'},
-   {0, {"xmax"}, XIDOID, 0, 4, -5, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'},
-   {0, {"cmax"}, CIDOID, 0, 4, -6, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'},
-};
-
  /* ----------------------------------------------------------------
   *     GetHeapRelationOid
   * ----------------------------------------------------------------
@@ -250,7 +217,6 @@ ConstructTupleDescriptor(Relation heapRelation,
     for (i = 0; i < numatts; i++)
     {
         AttrNumber  atnum;      /* attributeNumber[attributeOffset] */
-       AttrNumber  atind;
         Form_pg_attribute from;
         Form_pg_attribute to;
  
@@ -264,16 +230,9 @@ ConstructTupleDescriptor(Relation heapRelation,
         {
  
             /*
-            * here we are indexing on a system attribute (-1...-n) so we
-            * convert atnum into a usable index 0...n-1 so we can use it
-            * to dereference the array sysatts[] which stores tuple
-            * descriptor information for system attributes.
+            * here we are indexing on a system attribute (-1...-n)
              */
-           if (atnum <= FirstLowInvalidHeapAttributeNumber || atnum >= 0)
-               elog(ERROR, "Cannot create index on system attribute: attribute number out of range (%d)", atnum);
-           atind = (-atnum) - 1;
-
-           from = &sysatts[atind];
+           from = SystemAttributeDefinition(atnum);
         }
         else
         {
@@ -284,9 +243,8 @@ ConstructTupleDescriptor(Relation heapRelation,
             if (atnum > natts)
                 elog(ERROR, "Cannot create index: attribute %d does not exist",
                      atnum);
-           atind = AttrNumberGetAttrOffset(atnum);
  
-           from = heapTupDesc->attrs[atind];
+           from = heapTupDesc->attrs[AttrNumberGetAttrOffset(atnum)];
         }
  
         /*
@@ -303,10 +261,10 @@ ConstructTupleDescriptor(Relation heapRelation,
          */
         to->attnum = i + 1;
  
-       to->attdispersion = 0.0;
+       to->attstattarget = 0;
+       to->attcacheoff = -1;
         to->attnotnull = false;
         to->atthasdef = false;
-       to->attcacheoff = -1;
  
         /*
          * We do not yet have the correct relation OID for the index, so
@@ -1542,10 +1500,14 @@ setNewRelfilenode(Relation relation)
  
  /* ----------------
   *     UpdateStats
+ *
+ * Update pg_class' relpages and reltuples statistics for the given relation
+ * (which can be either a table or an index).  Note that this is not used
+ * in the context of VACUUM.
   * ----------------
   */
  void
-UpdateStats(Oid relid, long reltuples)
+UpdateStats(Oid relid, double reltuples)
  {
     Relation    whichRel;
     Relation    pg_class;
@@ -1636,6 +1598,10 @@ UpdateStats(Oid relid, long reltuples)
      * with zero size statistics until a VACUUM is done.  The optimizer
      * will generate very bad plans if the stats claim the table is empty
      * when it is actually sizable.  See also CREATE TABLE in heap.c.
+    *
+    * Note: this path is also taken during bootstrap, because bootstrap.c
+    * passes reltuples = 0 after loading a table.  We have to estimate some
+    * number for reltuples based on the actual number of pages.
      */
     relpages = RelationGetNumberOfBlocks(whichRel);
  
@@ -1689,15 +1655,15 @@ UpdateStats(Oid relid, long reltuples)
  
         for (i = 0; i < Natts_pg_class; i++)
         {
-           nulls[i] = heap_attisnull(tuple, i + 1) ? 'n' : ' ';
+           nulls[i] = ' ';
             replace[i] = ' ';
             values[i] = (Datum) NULL;
         }
  
         replace[Anum_pg_class_relpages - 1] = 'r';
-       values[Anum_pg_class_relpages - 1] = (Datum) relpages;
+       values[Anum_pg_class_relpages - 1] = Int32GetDatum(relpages);
         replace[Anum_pg_class_reltuples - 1] = 'r';
-       values[Anum_pg_class_reltuples - 1] = (Datum) reltuples;
+       values[Anum_pg_class_reltuples - 1] = Float4GetDatum((float4) reltuples);
         newtup = heap_modifytuple(tuple, pg_class, values, nulls, replace);
         simple_heap_update(pg_class, &tuple->t_self, newtup);
         if (!IsIgnoringSystemIndexes())
@@ -1741,7 +1707,7 @@ DefaultBuild(Relation heapRelation,
     TupleDesc   heapDescriptor;
     Datum       datum[INDEX_MAX_KEYS];
     char        nullv[INDEX_MAX_KEYS];
-   long        reltuples,
+   double      reltuples,
                 indtuples;
     Node       *predicate = indexInfo->ii_Predicate;
  
@@ -1796,7 +1762,7 @@ DefaultBuild(Relation heapRelation,
                           0,    /* number of keys */
                           (ScanKey) NULL);      /* scan key */
  
-   reltuples = indtuples = 0;
+   reltuples = indtuples = 0.0;
  
     /*
      * for each tuple in the base relation, we create an index tuple and
@@ -1808,7 +1774,7 @@ DefaultBuild(Relation heapRelation,
     {
         MemoryContextReset(econtext->ecxt_per_tuple_memory);
  
-       reltuples++;
+       reltuples += 1.0;
  
  #ifndef OMIT_PARTIAL_INDEX
  
@@ -1821,7 +1787,7 @@ DefaultBuild(Relation heapRelation,
             slot->val = heapTuple;
             if (ExecQual((List *) oldPred, econtext, false))
             {
-               indtuples++;
+               indtuples += 1.0;
                 continue;
             }
         }
@@ -1838,7 +1804,7 @@ DefaultBuild(Relation heapRelation,
         }
  #endif  /* OMIT_PARTIAL_INDEX */
  
-       indtuples++;
+       indtuples += 1.0;
  
         /*
          * FormIndexDatum fills in its datum and null parameters with
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c

index 88e56869da58eee31d6c7b0a764b93c6c73476a7..24cc7a8b254dc9a10dea74b263e52cf30f477964 100644 (file)
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -8,19 +8,16 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/commands/analyze.c,v 1.16 2001/03/22 06:16:11 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/commands/analyze.c,v 1.17 2001/05/07 00:43:17 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
  #include "postgres.h"
  
-#include 
-#include 
-#include 
-#include 
-#include 
+#include 
  
  #include "access/heapam.h"
+#include "access/tuptoaster.h"
  #include "catalog/catname.h"
  #include "catalog/indexing.h"
  #include "catalog/pg_operator.h"
@@ -29,43 +26,139 @@
  #include "commands/vacuum.h"
  #include "miscadmin.h"
  #include "parser/parse_oper.h"
-#include "tcop/tcopprot.h"
  #include "utils/acl.h"
  #include "utils/builtins.h"
+#include "utils/datum.h"
  #include "utils/fmgroids.h"
-#include "utils/inval.h"
  #include "utils/syscache.h"
+#include "utils/tuplesort.h"
  
-#define swapLong(a,b)  {long tmp; tmp=a; a=b; b=tmp;}
-#define swapInt(a,b)   {int tmp; tmp=a; a=b; b=tmp;}
-#define swapDatum(a,b) {Datum tmp; tmp=a; a=b; b=tmp;}
-#define VacAttrStatsEqValid(stats) ( stats->f_cmpeq.fn_addr != NULL )
-#define VacAttrStatsLtGtValid(stats) ( stats->f_cmplt.fn_addr != NULL && \
-                                  stats->f_cmpgt.fn_addr != NULL && \
-                                  RegProcedureIsValid(stats->outfunc) )
  
+/*
+ * Analysis algorithms supported
+ */
+typedef enum {
+   ALG_MINIMAL = 1,            /* Compute only most-common-values */
+   ALG_SCALAR                  /* Compute MCV, histogram, sort correlation */
+} AlgCode;
+
+/*
+ * To avoid consuming too much memory during analysis and/or too much space
+ * in the resulting pg_statistic rows, we ignore varlena datums that are wider
+ * than WIDTH_THRESHOLD (after detoasting!).  This is legitimate for MCV
+ * and distinct-value calculations since a wide value is unlikely to be
+ * duplicated at all, much less be a most-common value.  For the same reason,
+ * ignoring wide values will not affect our estimates of histogram bin
+ * boundaries very much.
+ */
+#define WIDTH_THRESHOLD  256
+
+/*
+ * We build one of these structs for each attribute (column) that is to be
+ * analyzed.  The struct and subsidiary data are in TransactionCommandContext,
+ * so they live until the end of the ANALYZE operation.
+ */
+typedef struct
+{
+   /* These fields are set up by examine_attribute */
+   int         attnum;         /* attribute number */
+   AlgCode     algcode;        /* Which algorithm to use for this column */
+   int         minrows;        /* Minimum # of rows needed for stats */
+   Form_pg_attribute attr;     /* copy of pg_attribute row for column */
+   Form_pg_type attrtype;      /* copy of pg_type row for column */
+   Oid         eqopr;          /* '=' operator for datatype, if any */
+   Oid         eqfunc;         /* and associated function */
+   Oid         ltopr;          /* '<' operator for datatype, if any */
+
+   /* These fields are filled in by the actual statistics-gathering routine */
+   bool        stats_valid;
+   float4      stanullfrac;    /* fraction of entries that are NULL */
+   int4        stawidth;       /* average width */
+   float4      stadistinct;    /* # distinct values */
+   int2        stakind[STATISTIC_NUM_SLOTS];
+   Oid         staop[STATISTIC_NUM_SLOTS];
+   int         numnumbers[STATISTIC_NUM_SLOTS];
+   float4     *stanumbers[STATISTIC_NUM_SLOTS];
+   int         numvalues[STATISTIC_NUM_SLOTS];
+   Datum      *stavalues[STATISTIC_NUM_SLOTS];
+} VacAttrStats;
+
+
+typedef struct
+{
+   Datum       value;          /* a data value */
+   int         tupno;          /* position index for tuple it came from */
+} ScalarItem;
+
+typedef struct
+{
+   int         count;          /* # of duplicates */
+   int         first;          /* values[] index of first occurrence */
+} ScalarMCVItem;
+
+
+#define swapInt(a,b)   {int _tmp; _tmp=a; a=b; b=_tmp;}
+#define swapDatum(a,b) {Datum _tmp; _tmp=a; a=b; b=_tmp;}
  
-static void attr_stats(Relation onerel, int attr_cnt, VacAttrStats *vacattrstats, HeapTuple tuple);
-static void bucketcpy(Form_pg_attribute attr, Datum value, Datum *bucket, int *bucket_len);
-static void update_attstats(Oid relid, int natts, VacAttrStats *vacattrstats);
-static void del_stats(Oid relid, int attcnt, int *attnums);
+
+static int MESSAGE_LEVEL;
+
+/* context information for compare_scalars() */
+static FmgrInfo *datumCmpFn;
+static SortFunctionKind datumCmpFnKind;
+static int *datumCmpTupnoLink;
+
+
+static VacAttrStats *examine_attribute(Relation onerel, int attnum);
+static int acquire_sample_rows(Relation onerel, HeapTuple *rows,
+                              int targrows, long *totalrows);
+static double random_fract(void);
+static double init_selection_state(int n);
+static long select_next_random_record(long t, int n, double *stateptr);
+static int compare_rows(const void *a, const void *b);
+static int compare_scalars(const void *a, const void *b);
+static int compare_mcvs(const void *a, const void *b);
+static OffsetNumber get_page_max_offset(Relation relation,
+                                       BlockNumber blocknumber);
+static void compute_minimal_stats(VacAttrStats *stats,
+                                 TupleDesc tupDesc, long totalrows,
+                                 HeapTuple *rows, int numrows);
+static void compute_scalar_stats(VacAttrStats *stats,
+                                TupleDesc tupDesc, long totalrows,
+                                HeapTuple *rows, int numrows);
+static void update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats);
  
  
  /*
- * analyze_rel() -- analyze relation
+ * analyze_rel() -- analyze one relation
   */
  void
-analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL)
+analyze_rel(Oid relid, VacuumStmt *vacstmt)
  {
-   HeapTuple   tuple;
     Relation    onerel;
-   int32       i;
-   int         attr_cnt,
-              *attnums = NULL;
     Form_pg_attribute *attr;
-   VacAttrStats *vacattrstats;
-   HeapScanDesc scan;
+   int         attr_cnt,
+               tcnt,
+               i;
+   VacAttrStats **vacattrstats;
+   int         targrows,
+               numrows;
+   long        totalrows;
+   HeapTuple  *rows;
+   HeapTuple   tuple;
+
+   if (vacstmt->verbose)
+       MESSAGE_LEVEL = NOTICE;
+   else
+       MESSAGE_LEVEL = DEBUG;
  
+   /*
+    * Begin a transaction for analyzing this relation.
+    *
+    * Note: All memory allocated during ANALYZE will live in
+    * TransactionCommandContext or a subcontext thereof, so it will
+    * all be released by transaction commit at the end of this routine.
+    */
     StartTransactionCommand();
  
     /*
@@ -76,7 +169,7 @@ analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL)
  
     /*
      * Race condition -- if the pg_class tuple has gone away since the
-    * last time we saw it, we don't need to vacuum it.
+    * last time we saw it, we don't need to process it.
      */
     tuple = SearchSysCache(RELOID,
                            ObjectIdGetDatum(relid),
@@ -88,8 +181,7 @@ analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL)
     }
  
     /*
-    * We can VACUUM ANALYZE any table except pg_statistic. see
-    * update_relstats
+    * We can ANALYZE any table except pg_statistic. See update_attstats
      */
     if (strcmp(NameStr(((Form_pg_class) GETSTRUCT(tuple))->relname),
                StatisticRelationName) == 0)
@@ -100,586 +192,1466 @@ analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL)
     }
     ReleaseSysCache(tuple);
  
+   /*
+    * Open the class, getting only a read lock on it, and check permissions
+    */
     onerel = heap_open(relid, AccessShareLock);
  
     if (!pg_ownercheck(GetUserId(), RelationGetRelationName(onerel),
                        RELNAME))
     {
-
-       /*
-        * we already did an elog during vacuum elog(NOTICE, "Skipping
-        * \"%s\" --- only table owner can VACUUM it",
-        * RelationGetRelationName(onerel));
-        */
+       /* No need for a notice if we already complained during VACUUM */
+       if (!vacstmt->vacuum)
+           elog(NOTICE, "Skipping \"%s\" --- only table owner can ANALYZE it",
+                RelationGetRelationName(onerel));
         heap_close(onerel, NoLock);
         CommitTransactionCommand();
         return;
     }
  
-   elog(MESSAGE_LEVEL, "Analyzing...");
+   elog(MESSAGE_LEVEL, "Analyzing %s", RelationGetRelationName(onerel));
  
-   attr_cnt = onerel->rd_att->natts;
+   /*
+    * Determine which columns to analyze
+    *
+    * Note that system attributes are never analyzed.
+    */
     attr = onerel->rd_att->attrs;
+   attr_cnt = onerel->rd_att->natts;
  
-   if (anal_cols2 != NIL)
+   if (vacstmt->va_cols != NIL)
     {
-       int         tcnt = 0;
         List       *le;
  
-       if (length(anal_cols2) > attr_cnt)
-           elog(ERROR, "vacuum: too many attributes specified for relation %s",
-                RelationGetRelationName(onerel));
-       attnums = (int *) palloc(attr_cnt * sizeof(int));
-       foreach(le, anal_cols2)
+       vacattrstats = (VacAttrStats **) palloc(length(vacstmt->va_cols) *
+                                               sizeof(VacAttrStats *));
+       tcnt = 0;
+       foreach(le, vacstmt->va_cols)
         {
-           char       *col = (char *) lfirst(le);
+           char       *col = strVal(lfirst(le));
  
             for (i = 0; i < attr_cnt; i++)
             {
                 if (namestrcmp(&(attr[i]->attname), col) == 0)
                     break;
             }
-           if (i < attr_cnt)   /* found */
-               attnums[tcnt++] = i;
-           else
-           {
-               elog(ERROR, "vacuum: there is no attribute %s in %s",
+           if (i >= attr_cnt)
+               elog(ERROR, "ANALYZE: there is no attribute %s in %s",
                      col, RelationGetRelationName(onerel));
-           }
+           vacattrstats[tcnt] = examine_attribute(onerel, i+1);
+           if (vacattrstats[tcnt] != NULL)
+               tcnt++;
+       }
+       attr_cnt = tcnt;
+   }
+   else
+   {
+       vacattrstats = (VacAttrStats **) palloc(attr_cnt *
+                                               sizeof(VacAttrStats *));
+       tcnt = 0;
+       for (i = 0; i < attr_cnt; i++)
+       {
+           vacattrstats[tcnt] = examine_attribute(onerel, i+1);
+           if (vacattrstats[tcnt] != NULL)
+               tcnt++;
         }
         attr_cnt = tcnt;
     }
  
-   vacattrstats = (VacAttrStats *) palloc(attr_cnt * sizeof(VacAttrStats));
+   /*
+    * Quit if no analyzable columns
+    */
+   if (attr_cnt <= 0)
+   {
+       heap_close(onerel, NoLock);
+       CommitTransactionCommand();
+       return;
+   }
  
+   /*
+    * Determine how many rows we need to sample, using the worst case
+    * from all analyzable columns.  We use a lower bound of 100 rows
+    * to avoid possible overflow in Vitter's algorithm.
+    */
+   targrows = 100;
     for (i = 0; i < attr_cnt; i++)
     {
-       Operator    func_operator;
-       VacAttrStats *stats;
-
-       stats = &vacattrstats[i];
-       stats->attr = palloc(ATTRIBUTE_TUPLE_SIZE);
-       memcpy(stats->attr, attr[((attnums) ? attnums[i] : i)],
-              ATTRIBUTE_TUPLE_SIZE);
-       stats->best = stats->guess1 = stats->guess2 = 0;
-       stats->max = stats->min = 0;
-       stats->best_len = stats->guess1_len = stats->guess2_len = 0;
-       stats->max_len = stats->min_len = 0;
-       stats->initialized = false;
-       stats->best_cnt = stats->guess1_cnt = stats->guess1_hits = stats->guess2_hits = 0;
-       stats->max_cnt = stats->min_cnt = stats->null_cnt = stats->nonnull_cnt = 0;
-
-       func_operator = compatible_oper("=",
-                                       stats->attr->atttypid,
-                                       stats->attr->atttypid,
-                                       true);
-       if (func_operator != NULL)
-       {
-           fmgr_info(oprfuncid(func_operator), &(stats->f_cmpeq));
-           ReleaseSysCache(func_operator);
-       }
-       else
-           stats->f_cmpeq.fn_addr = NULL;
+       if (targrows < vacattrstats[i]->minrows)
+           targrows = vacattrstats[i]->minrows;
+   }
+
+   /*
+    * Acquire the sample rows
+    */
+   rows = (HeapTuple *) palloc(targrows * sizeof(HeapTuple));
+   numrows = acquire_sample_rows(onerel, rows, targrows, &totalrows);
  
-       func_operator = compatible_oper("<",
-                                       stats->attr->atttypid,
-                                       stats->attr->atttypid,
-                                       true);
-       if (func_operator != NULL)
+   /*
+    * If we are running a standalone ANALYZE, update pages/tuples stats
+    * in pg_class.  We have the accurate page count from heap_beginscan,
+    * but only an approximate number of tuples; therefore, if we are
+    * part of VACUUM ANALYZE do *not* overwrite the accurate count already
+    * inserted by VACUUM.
+    */
+   if (!vacstmt->vacuum)
+       vac_update_relstats(RelationGetRelid(onerel),
+                           onerel->rd_nblocks,
+                           (double) totalrows,
+                           RelationGetForm(onerel)->relhasindex);
+
+   /*
+    * Compute the statistics.  Temporary results during the calculations
+    * for each column are stored in a child context.  The calc routines
+    * are responsible to make sure that whatever they store into the
+    * VacAttrStats structure is allocated in TransactionCommandContext.
+    */
+   if (numrows > 0)
+   {
+       MemoryContext col_context,
+                   old_context;
+
+       col_context = AllocSetContextCreate(CurrentMemoryContext,
+                                           "Analyze Column",
+                                           ALLOCSET_DEFAULT_MINSIZE,
+                                           ALLOCSET_DEFAULT_INITSIZE,
+                                           ALLOCSET_DEFAULT_MAXSIZE);
+       old_context = MemoryContextSwitchTo(col_context);
+       for (i = 0; i < attr_cnt; i++)
         {
-           fmgr_info(oprfuncid(func_operator), &(stats->f_cmplt));
-           stats->op_cmplt = oprid(func_operator);
-           ReleaseSysCache(func_operator);
+           switch (vacattrstats[i]->algcode)
+           {
+               case ALG_MINIMAL:
+                   compute_minimal_stats(vacattrstats[i],
+                                         onerel->rd_att, totalrows,
+                                         rows, numrows);
+                   break;
+               case ALG_SCALAR:
+                   compute_scalar_stats(vacattrstats[i],
+                                        onerel->rd_att, totalrows,
+                                        rows, numrows);
+                   break;
+           }
+           MemoryContextResetAndDeleteChildren(col_context);
         }
-       else
+       MemoryContextSwitchTo(old_context);
+       MemoryContextDelete(col_context);
+
+       /*
+        * Emit the completed stats rows into pg_statistic, replacing any
+        * previous statistics for the target columns.  (If there are stats
+        * in pg_statistic for columns we didn't process, we leave them alone.)
+        */
+       update_attstats(relid, attr_cnt, vacattrstats);
+   }
+
+   /*
+    * Close source relation now, but keep lock so that no one deletes it
+    * before we commit.  (If someone did, they'd fail to clean up the
+    * entries we made in pg_statistic.)
+    */
+   heap_close(onerel, NoLock);
+
+   /* Commit and release working memory */
+   CommitTransactionCommand();
+}
+
+/*
+ * examine_attribute -- pre-analysis of a single column
+ *
+ * Determine whether the column is analyzable; if so, create and initialize
+ * a VacAttrStats struct for it.  If not, return NULL.
+ */
+static VacAttrStats *
+examine_attribute(Relation onerel, int attnum)
+{
+   Form_pg_attribute attr = onerel->rd_att->attrs[attnum-1];
+   Operator    func_operator;
+   Oid         oprrest;
+   HeapTuple   typtuple;
+   Oid         eqopr = InvalidOid;
+   Oid         eqfunc = InvalidOid;
+   Oid         ltopr = InvalidOid;
+   VacAttrStats *stats;
+
+   /* Don't analyze column if user has specified not to */
+   if (attr->attstattarget <= 0)
+       return NULL;
+
+   /* If column has no "=" operator, we can't do much of anything */
+   func_operator = compatible_oper("=",
+                                   attr->atttypid,
+                                   attr->atttypid,
+                                   true);
+   if (func_operator != NULL)
+   {
+       oprrest = ((Form_pg_operator) GETSTRUCT(func_operator))->oprrest;
+       if (oprrest == F_EQSEL)
         {
-           stats->f_cmplt.fn_addr = NULL;
-           stats->op_cmplt = InvalidOid;
+           eqopr = oprid(func_operator);
+           eqfunc = oprfuncid(func_operator);
         }
+       ReleaseSysCache(func_operator);
+   }
+   if (!OidIsValid(eqfunc))
+       return NULL;
  
-       func_operator = compatible_oper(">",
-                                       stats->attr->atttypid,
-                                       stats->attr->atttypid,
-                                       true);
-       if (func_operator != NULL)
+   /*
+    * If we have "=" then we're at least able to do the minimal algorithm,
+    * so start filling in a VacAttrStats struct.
+    */
+   stats = (VacAttrStats *) palloc(sizeof(VacAttrStats));
+   MemSet(stats, 0, sizeof(VacAttrStats));
+   stats->attnum = attnum;
+   stats->attr = (Form_pg_attribute) palloc(ATTRIBUTE_TUPLE_SIZE);
+   memcpy(stats->attr, attr, ATTRIBUTE_TUPLE_SIZE);
+   typtuple = SearchSysCache(TYPEOID,
+                             ObjectIdGetDatum(attr->atttypid),
+                             0, 0, 0);
+   if (!HeapTupleIsValid(typtuple))
+       elog(ERROR, "cache lookup of type %u failed", attr->atttypid);
+   stats->attrtype = (Form_pg_type) palloc(sizeof(FormData_pg_type));
+   memcpy(stats->attrtype, GETSTRUCT(typtuple), sizeof(FormData_pg_type));
+   ReleaseSysCache(typtuple);
+   stats->eqopr = eqopr;
+   stats->eqfunc = eqfunc;
+
+   /* Is there a "<" operator with suitable semantics? */
+   func_operator = compatible_oper("<",
+                                   attr->atttypid,
+                                   attr->atttypid,
+                                   true);
+   if (func_operator != NULL)
+   {
+       oprrest = ((Form_pg_operator) GETSTRUCT(func_operator))->oprrest;
+       if (oprrest == F_SCALARLTSEL)
         {
-           fmgr_info(oprfuncid(func_operator), &(stats->f_cmpgt));
-           ReleaseSysCache(func_operator);
+           ltopr = oprid(func_operator);
         }
-       else
-           stats->f_cmpgt.fn_addr = NULL;
+       ReleaseSysCache(func_operator);
+   }
+   stats->ltopr = ltopr;
+
+   /*
+    * Determine the algorithm to use (this will get more complicated later)
+    */
+   if (OidIsValid(ltopr))
+   {
+       /* Seems to be a scalar datatype */
+       stats->algcode = ALG_SCALAR;
+       /*--------------------
+        * The following choice of minrows is based on the paper
+        * "Random sampling for histogram construction: how much is enough?"
+        * by Surajit Chaudhuri, Rajeev Motwani and Vivek Narasayya, in
+        * Proceedings of ACM SIGMOD International Conference on Management
+        * of Data, 1998, Pages 436-447.  Their Corollary 1 to Theorem 5
+        * says that for table size n, histogram size k, maximum relative
+        * error in bin size f, and error probability gamma, the minimum
+        * random sample size is
+        *      r = 4 * k * ln(2*n/gamma) / f^2
+        * Taking f = 0.5, gamma = 0.01, n = 1 million rows, we obtain
+        *      r = 305.82 * k
+        * Note that because of the log function, the dependence on n is
+        * quite weak; even at n = 1 billion, a 300*k sample gives <= 0.59
+        * bin size error with probability 0.99.  So there's no real need to
+        * scale for n, which is a good thing because we don't necessarily
+        * know it at this point.
+        *--------------------
+        */
+       stats->minrows = 300 * attr->attstattarget;
+   }
+   else
+   {
+       /* Can't do much but the minimal stuff */
+       stats->algcode = ALG_MINIMAL;
+       /* Might as well use the same minrows as above */
+       stats->minrows = 300 * attr->attstattarget;
+   }
+
+   return stats;
+}
  
-       tuple = SearchSysCache(TYPEOID,
-                              ObjectIdGetDatum(stats->attr->atttypid),
-                              0, 0, 0);
-       if (HeapTupleIsValid(tuple))
+/*
+ * acquire_sample_rows -- acquire a random sample of rows from the table
+ *
+ * Up to targrows rows are collected (if there are fewer than that many
+ * rows in the table, all rows are collected).  When the table is larger
+ * than targrows, a truly random sample is collected: every row has an
+ * equal chance of ending up in the final sample.
+ *
+ * We also estimate the total number of rows in the table, and return that
+ * into *totalrows.
+ *
+ * The returned list of tuples is in order by physical position in the table.
+ * (We will rely on this later to derive correlation estimates.)
+ */
+static int
+acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
+                   long *totalrows)
+{
+   int         numrows = 0;
+   HeapScanDesc scan;
+   HeapTuple   tuple;
+   ItemPointer lasttuple;
+   BlockNumber lastblock,
+               estblock;
+   OffsetNumber lastoffset;
+   int         numest;
+   double      tuplesperpage;
+   long        t;
+   double      rstate;
+
+   Assert(targrows > 1);
+   /*
+    * Do a simple linear scan until we reach the target number of rows.
+    */
+   scan = heap_beginscan(onerel, false, SnapshotNow, 0, NULL);
+   while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
+   {
+       rows[numrows++] = heap_copytuple(tuple);
+       if (numrows >= targrows)
+           break;
+   }
+   heap_endscan(scan);
+   /*
+    * If we ran out of tuples then we're done, no matter how few we 
+    * collected.  No sort is needed, since they're already in order.
+    */
+   if (!HeapTupleIsValid(tuple))
+   {
+       *totalrows = numrows;
+       return numrows;
+   }
+   /*
+    * Otherwise, start replacing tuples in the sample until we reach the
+    * end of the relation.  This algorithm is from Jeff Vitter's paper
+    * (see full citation below).  It works by repeatedly computing the number
+    * of the next tuple we want to fetch, which will replace a randomly
+    * chosen element of the reservoir (current set of tuples).  At all times
+    * the reservoir is a true random sample of the tuples we've passed over
+    * so far, so when we fall off the end of the relation we're done.
+    *
+    * A slight difficulty is that since we don't want to fetch tuples or even
+    * pages that we skip over, it's not possible to fetch *exactly* the N'th
+    * tuple at each step --- we don't know how many valid tuples are on
+    * the skipped pages.  We handle this by assuming that the average number
+    * of valid tuples/page on the pages already scanned over holds good for
+    * the rest of the relation as well; this lets us estimate which page
+    * the next tuple should be on and its position in the page.  Then we
+    * fetch the first valid tuple at or after that position, being careful
+    * not to use the same tuple twice.  This approach should still give a
+    * good random sample, although it's not perfect.
+    */
+   lasttuple = &(rows[numrows-1]->t_self);
+   lastblock = ItemPointerGetBlockNumber(lasttuple);
+   lastoffset = ItemPointerGetOffsetNumber(lasttuple);
+   /*
+    * If possible, estimate tuples/page using only completely-scanned pages.
+    */
+   for (numest = numrows; numest > 0; numest--)
+   {
+       if (ItemPointerGetBlockNumber(&(rows[numest-1]->t_self)) != lastblock)
+           break;
+   }
+   if (numest == 0)
+   {
+       numest = numrows;       /* don't have a full page? */
+       estblock = lastblock + 1;
+   }
+   else
+   {
+       estblock = lastblock;
+   }
+   tuplesperpage = (double) numest / (double) estblock;
+
+   t = numrows;                /* t is the # of records processed so far */
+   rstate = init_selection_state(targrows);
+   for (;;)
+   {
+       double          targpos;
+       BlockNumber     targblock;
+       OffsetNumber    targoffset,
+                       maxoffset;
+
+       t = select_next_random_record(t, targrows, &rstate);
+       /* Try to read the t'th record in the table */
+       targpos = (double) t / tuplesperpage;
+       targblock = (BlockNumber) targpos;
+       targoffset = ((int) (targpos - targblock) * tuplesperpage) + 
+           FirstOffsetNumber;
+       /* Make sure we are past the last selected record */
+       if (targblock <= lastblock)
         {
-           stats->outfunc = ((Form_pg_type) GETSTRUCT(tuple))->typoutput;
-           stats->typelem = ((Form_pg_type) GETSTRUCT(tuple))->typelem;
-           ReleaseSysCache(tuple);
+           targblock = lastblock;
+           if (targoffset <= lastoffset)
+               targoffset = lastoffset + 1;
         }
-       else
+       /* Loop to find first valid record at or after given position */
+   pageloop:;
+       /*
+        * Have we fallen off the end of the relation?  (We rely on
+        * heap_beginscan to have updated rd_nblocks.)
+        */
+       if (targblock >= onerel->rd_nblocks)
+           break;
+       maxoffset = get_page_max_offset(onerel, targblock);
+       for (;;)
         {
-           stats->outfunc = InvalidOid;
-           stats->typelem = InvalidOid;
+           HeapTupleData targtuple;
+           Buffer      targbuffer;
+
+           if (targoffset > maxoffset)
+           {
+               /* Fell off end of this page, try next */
+               targblock++;
+               targoffset = FirstOffsetNumber;
+               goto pageloop;
+           }
+           ItemPointerSet(&targtuple.t_self, targblock, targoffset);
+           heap_fetch(onerel, SnapshotNow, &targtuple, &targbuffer);
+           if (targtuple.t_data != NULL)
+           {
+               /*
+                * Found a suitable tuple, so save it, replacing one old
+                * tuple at random
+                */
+               int     k = (int) (targrows * random_fract());
+
+               Assert(k >= 0 && k < targrows);
+               heap_freetuple(rows[k]);
+               rows[k] = heap_copytuple(&targtuple);
+               ReleaseBuffer(targbuffer);
+               lastblock = targblock;
+               lastoffset = targoffset;
+               break;
+           }
+           /* this tuple is dead, so advance to next one on same page */
+           targoffset++;
         }
     }
-   /* delete existing pg_statistic rows for relation */
-   del_stats(relid, ((attnums) ? attr_cnt : 0), attnums);
-
-   /* scan relation to gather statistics */
-   scan = heap_beginscan(onerel, false, SnapshotNow, 0, NULL);
  
-   while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
-       attr_stats(onerel, attr_cnt, vacattrstats, tuple);
+   /*
+    * Now we need to sort the collected tuples by position (itempointer).
+    */
+   qsort((void *) rows, numrows, sizeof(HeapTuple), compare_rows);
  
-   heap_endscan(scan);
+   /*
+    * Estimate total number of valid rows in relation.
+    */
+   *totalrows = (long) (onerel->rd_nblocks * tuplesperpage + 0.5);
  
-   /* close rel, but keep lock so it doesn't go away before commit */
-   heap_close(onerel, NoLock);
+   return numrows;
+}
  
-   /* update statistics in pg_class */
-   update_attstats(relid, attr_cnt, vacattrstats);
+/* Select a random value R uniformly distributed in 0 < R < 1 */
+static double
+random_fract(void)
+{
+   long    z;
  
-   CommitTransactionCommand();
+   /* random() can produce endpoint values, try again if so */
+   do
+   {
+       z = random();
+   } while (! (z > 0 && z < MAX_RANDOM_VALUE));
+   return (double) z / (double) MAX_RANDOM_VALUE;
  }
  
  /*
- * attr_stats() -- compute column statistics used by the planner
+ * These two routines embody Algorithm Z from "Random sampling with a
+ * reservoir" by Jeffrey S. Vitter, in ACM Trans. Math. Softw. 11, 1
+ * (Mar. 1985), Pages 37-57.  While Vitter describes his algorithm in terms
+ * of the count S of records to skip before processing another record,
+ * it is convenient to work primarily with t, the index (counting from 1)
+ * of the last record processed and next record to process.  The only extra
+ * state needed between calls is W, a random state variable.
   *
- * We compute the column min, max, null and non-null counts.
- * Plus we attempt to find the count of the value that occurs most
- * frequently in each column.  These figures are used to compute
- * the selectivity of the column.
+ * init_selection_state computes the initial W value.
   *
- * We use a three-bucket cache to get the most frequent item.
- * The 'guess' buckets count hits.  A cache miss causes guess1
- * to get the most hit 'guess' item in the most recent cycle, and
- * the new item goes into guess2.  Whenever the total count of hits
- * of a 'guess' entry is larger than 'best', 'guess' becomes 'best'.
+ * Given that we've already processed t records (t >= n),
+ * select_next_random_record determines the number of the next record to
+ * process.
+ */
+static double
+init_selection_state(int n)
+{
+   /* Initial value of W (for use when Algorithm Z is first applied) */
+   return exp(- log(random_fract())/n);
+}
+
+static long
+select_next_random_record(long t, int n, double *stateptr)
+{
+   /* The magic constant here is T from Vitter's paper */
+   if (t <= (22 * n))
+   {
+       /* Process records using Algorithm X until t is large enough */
+       double  V,
+               quot;
+
+       V = random_fract();     /* Generate V */
+       t++;
+       quot = (double) (t - n) / (double) t;
+       /* Find min S satisfying (4.1) */
+       while (quot > V)
+       {
+           t++;
+           quot *= (double) (t - n) / (double) t;
+       }
+   }
+   else
+   {
+       /* Now apply Algorithm Z */
+       double  W = *stateptr;
+       long    term = t - n + 1;
+       int     S;
+
+       for (;;)
+       {
+           long    numer,
+                   numer_lim,
+                   denom;
+           double  U,
+                   X,
+                   lhs,
+                   rhs,
+                   y,
+                   tmp;
+
+           /* Generate U and X */
+           U = random_fract();
+           X = t * (W - 1.0);
+           S = X;              /* S is tentatively set to floor(X) */
+           /* Test if U <= h(S)/cg(X) in the manner of (6.3) */
+           tmp = (double) (t + 1) / (double) term;
+           lhs = exp(log(((U * tmp * tmp) * (term + S))/(t + X))/n);
+           rhs = (((t + X)/(term + S)) * term)/t;
+           if (lhs <= rhs)
+           {
+               W = rhs/lhs;
+               break;
+           }
+           /* Test if U <= f(S)/cg(X) */
+           y = (((U * (t + 1))/term) * (t + S + 1))/(t + X);
+           if (n < S)
+           {
+               denom = t;
+               numer_lim = term + S;
+           }
+           else
+           {
+               denom = t - n + S;
+               numer_lim = t + 1;
+           }
+           for (numer = t + S; numer >= numer_lim; numer--)
+           {
+               y *= (double) numer / (double) denom;
+               denom--;
+           }
+           W = exp(- log(random_fract())/n); /* Generate W in advance */
+           if (exp(log(y)/n) <= (t + X)/t)
+               break;
+       }
+       t += S + 1;
+       *stateptr = W;
+   }
+   return t;
+}
+
+/*
+ * qsort comparator for sorting rows[] array
+ */
+static int
+compare_rows(const void *a, const void *b)
+{
+   HeapTuple   ha = * (HeapTuple *) a;
+   HeapTuple   hb = * (HeapTuple *) b;
+   BlockNumber ba = ItemPointerGetBlockNumber(&ha->t_self);
+   OffsetNumber oa = ItemPointerGetOffsetNumber(&ha->t_self);
+   BlockNumber bb = ItemPointerGetBlockNumber(&hb->t_self);
+   OffsetNumber ob = ItemPointerGetOffsetNumber(&hb->t_self);
+
+   if (ba < bb)
+       return -1;
+   if (ba > bb)
+       return 1;
+   if (oa < ob)
+       return -1;
+   if (oa > ob)
+       return 1;
+   return 0;
+}
+
+/*
+ * Discover the largest valid tuple offset number on the given page
+ *
+ * This code probably ought to live in some other module.
+ */
+static OffsetNumber
+get_page_max_offset(Relation relation, BlockNumber blocknumber)
+{
+   Buffer      buffer;
+   Page        p;
+   OffsetNumber offnum;
+
+   buffer = ReadBuffer(relation, blocknumber);
+   if (!BufferIsValid(buffer))
+       elog(ERROR, "get_page_max_offset: %s relation: ReadBuffer(%ld) failed",
+            RelationGetRelationName(relation), (long) blocknumber);
+   LockBuffer(buffer, BUFFER_LOCK_SHARE);
+   p = BufferGetPage(buffer);
+   offnum = PageGetMaxOffsetNumber(p);
+   LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+   ReleaseBuffer(buffer);
+   return offnum;
+}
+
+
+/*
+ * compute_minimal_stats() -- compute minimal column statistics
   *
- * This method works perfectly for columns with unique values, and columns
- * with only two unique values, plus nulls.
+ * We use this when we can find only an "=" operator for the datatype.
   *
- * It becomes less perfect as the number of unique values increases and
- * their distribution in the table becomes more random.
+ * We determine the fraction of non-null rows, the average width, the
+ * most common values, and the (estimated) number of distinct values.
   *
+ * The most common values are determined by brute force: we keep a list
+ * of previously seen values, ordered by number of times seen, as we scan
+ * the samples.  A newly seen value is inserted just after the last
+ * multiply-seen value, causing the bottommost (oldest) singly-seen value
+ * to drop off the list.  The accuracy of this method, and also its cost,
+ * depend mainly on the length of the list we are willing to keep.
   */
  static void
-attr_stats(Relation onerel, int attr_cnt, VacAttrStats *vacattrstats, HeapTuple tuple)
+compute_minimal_stats(VacAttrStats *stats,
+                     TupleDesc tupDesc, long totalrows,
+                     HeapTuple *rows, int numrows)
  {
     int         i;
-   TupleDesc   tupDesc = onerel->rd_att;
-
-   for (i = 0; i < attr_cnt; i++)
+   int         null_cnt = 0;
+   int         nonnull_cnt = 0;
+   int         toowide_cnt = 0;
+   double      total_width = 0;
+   bool        is_varlena = (!stats->attr->attbyval &&
+                             stats->attr->attlen == -1);
+   FmgrInfo    f_cmpeq;
+   typedef struct
+   {
+       Datum   value;
+       int     count;
+   } TrackItem;
+   TrackItem  *track;
+   int         track_cnt,
+               track_max;
+   int         num_mcv = stats->attr->attstattarget;
+
+   /* We track up to 2*n values for an n-element MCV list; but at least 10 */
+   track_max = 2 * num_mcv;
+   if (track_max < 10)
+       track_max = 10;
+   track = (TrackItem *) palloc(track_max * sizeof(TrackItem));
+   track_cnt = 0;
+
+   fmgr_info(stats->eqfunc, &f_cmpeq);
+
+   for (i = 0; i < numrows; i++)
     {
-       VacAttrStats *stats = &vacattrstats[i];
-       Datum       origvalue;
+       HeapTuple   tuple = rows[i];
         Datum       value;
         bool        isnull;
-       bool        value_hit;
-
-       if (!VacAttrStatsEqValid(stats))
-           continue;
-
-#ifdef _DROP_COLUMN_HACK__
-       if (COLUMN_IS_DROPPED(stats->attr))
-           continue;
-#endif  /* _DROP_COLUMN_HACK__ */
+       bool        match;
+       int         firstcount1,
+                   j;
  
-       origvalue = heap_getattr(tuple, stats->attr->attnum,
-                                tupDesc, &isnull);
+       value = heap_getattr(tuple, stats->attnum, tupDesc, &isnull);
  
+       /* Check for null/nonnull */
         if (isnull)
         {
-           stats->null_cnt++;
+           null_cnt++;
             continue;
         }
-       stats->nonnull_cnt++;
+       nonnull_cnt++;
  
         /*
-        * If the value is toasted, detoast it to avoid repeated
-        * detoastings and resultant memory leakage inside the comparison
-        * routines.
+        * If it's a varlena field, add up widths for average width
+        * calculation.  Note that if the value is toasted, we
+        * use the toasted width.  We don't bother with this calculation
+        * if it's a fixed-width type.
          */
-       if (!stats->attr->attbyval && stats->attr->attlen == -1)
-           value = PointerGetDatum(PG_DETOAST_DATUM(origvalue));
-       else
-           value = origvalue;
-
-       if (!stats->initialized)
+       if (is_varlena)
         {
-           bucketcpy(stats->attr, value, &stats->best, &stats->best_len);
-           /* best_cnt gets incremented below */
-           bucketcpy(stats->attr, value, &stats->guess1, &stats->guess1_len);
-           stats->guess1_cnt = stats->guess1_hits = 1;
-           bucketcpy(stats->attr, value, &stats->guess2, &stats->guess2_len);
-           stats->guess2_hits = 1;
-           if (VacAttrStatsLtGtValid(stats))
+           total_width += VARSIZE(DatumGetPointer(value));
+           /*
+            * If the value is toasted, we want to detoast it just once to
+            * avoid repeated detoastings and resultant excess memory usage
+            * during the comparisons.  Also, check to see if the value is
+            * excessively wide, and if so don't detoast at all --- just
+            * ignore the value.
+            */
+           if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
             {
-               bucketcpy(stats->attr, value, &stats->max, &stats->max_len);
-               bucketcpy(stats->attr, value, &stats->min, &stats->min_len);
-               /* min_cnt, max_cnt get incremented below */
+               toowide_cnt++;
+               continue;
             }
-           stats->initialized = true;
+           value = PointerGetDatum(PG_DETOAST_DATUM(value));
         }
  
-       if (VacAttrStatsLtGtValid(stats))
+       /*
+        * See if the value matches anything we're already tracking.
+        */
+       match = false;
+       firstcount1 = track_cnt;
+       for (j = 0; j < track_cnt; j++)
         {
-           if (DatumGetBool(FunctionCall2(&stats->f_cmplt,
-                                          value, stats->min)))
+           if (DatumGetBool(FunctionCall2(&f_cmpeq, value, track[j].value)))
             {
-               bucketcpy(stats->attr, value, &stats->min, &stats->min_len);
-               stats->min_cnt = 1;
+               match = true;
+               break;
             }
-           else if (DatumGetBool(FunctionCall2(&stats->f_cmpeq,
-                                               value, stats->min)))
-               stats->min_cnt++;
+           if (j < firstcount1 && track[j].count == 1)
+               firstcount1 = j;
+       }
  
-           if (DatumGetBool(FunctionCall2(&stats->f_cmpgt,
-                                          value, stats->max)))
+       if (match)
+       {
+           /* Found a match */
+           track[j].count++;
+           /* This value may now need to "bubble up" in the track list */
+           while (j > 0 && track[j].count > track[j-1].count)
             {
-               bucketcpy(stats->attr, value, &stats->max, &stats->max_len);
-               stats->max_cnt = 1;
+               swapDatum(track[j].value, track[j-1].value);
+               swapInt(track[j].count, track[j-1].count);
+               j--;
             }
-           else if (DatumGetBool(FunctionCall2(&stats->f_cmpeq,
-                                               value, stats->max)))
-               stats->max_cnt++;
         }
-
-       value_hit = true;
-       if (DatumGetBool(FunctionCall2(&stats->f_cmpeq,
-                                      value, stats->best)))
-           stats->best_cnt++;
-       else if (DatumGetBool(FunctionCall2(&stats->f_cmpeq,
-                                           value, stats->guess1)))
+       else
         {
-           stats->guess1_cnt++;
-           stats->guess1_hits++;
+           /* No match.  Insert at head of count-1 list */
+           if (track_cnt < track_max)
+               track_cnt++;
+           for (j = track_cnt-1; j > firstcount1; j--)
+           {
+               track[j].value = track[j-1].value;
+               track[j].count = track[j-1].count;
+           }
+           if (firstcount1 < track_cnt)
+           {
+               track[firstcount1].value = value;
+               track[firstcount1].count = 1;
+           }
         }
-       else if (DatumGetBool(FunctionCall2(&stats->f_cmpeq,
-                                           value, stats->guess2)))
-           stats->guess2_hits++;
+   }
+
+   /* We can only compute valid stats if we found some non-null values. */
+   if (nonnull_cnt > 0)
+   {
+       int     nmultiple,
+               summultiple;
+
+       stats->stats_valid = true;
+       /* Do the simple null-frac and width stats */
+       stats->stanullfrac = (double) null_cnt / (double) numrows;
+       if (is_varlena)
+           stats->stawidth = total_width / (double) nonnull_cnt;
         else
-           value_hit = false;
+           stats->stawidth = stats->attrtype->typlen;
  
-       if (stats->guess2_hits > stats->guess1_hits)
+       /* Count the number of values we found multiple times */
+       summultiple = 0;
+       for (nmultiple = 0; nmultiple < track_cnt; nmultiple++)
         {
-           swapDatum(stats->guess1, stats->guess2);
-           swapInt(stats->guess1_len, stats->guess2_len);
-           swapLong(stats->guess1_hits, stats->guess2_hits);
-           stats->guess1_cnt = stats->guess1_hits;
+           if (track[nmultiple].count == 1)
+               break;
+           summultiple += track[nmultiple].count;
         }
-       if (stats->guess1_cnt > stats->best_cnt)
+
+       if (nmultiple == 0)
         {
-           swapDatum(stats->best, stats->guess1);
-           swapInt(stats->best_len, stats->guess1_len);
-           swapLong(stats->best_cnt, stats->guess1_cnt);
-           stats->guess1_hits = 1;
-           stats->guess2_hits = 1;
+           /* If we found no repeated values, assume it's a unique column */
+           stats->stadistinct = -1.0;
         }
-       if (!value_hit)
+       else if (track_cnt < track_max && toowide_cnt == 0 &&
+                nmultiple == track_cnt)
         {
-           bucketcpy(stats->attr, value, &stats->guess2, &stats->guess2_len);
-           stats->guess1_hits = 1;
-           stats->guess2_hits = 1;
+           /*
+            * Our track list includes every value in the sample, and every
+            * value appeared more than once.  Assume the column has just
+            * these values.
+            */
+           stats->stadistinct = track_cnt;
         }
+       else
+       {
+           /*----------
+            * Estimate the number of distinct values using the estimator
+            * proposed by Chaudhuri et al (see citation above).  This is
+            *      sqrt(n/r) * max(f1,1) + f2 + f3 + ...
+            * where fk is the number of distinct values that occurred
+            * exactly k times in our sample of r rows (from a total of n).
+            * We assume (not very reliably!) that all the multiply-occurring
+            * values are reflected in the final track[] list, and the other
+            * nonnull values all appeared but once.
+            *----------
+            */
+           int     f1 = nonnull_cnt - summultiple;
+           double  term1;
  
-       /* Clean up detoasted copy, if any */
-       if (value != origvalue)
-           pfree(DatumGetPointer(value));
-   }
-}
+           if (f1 < 1)
+               f1 = 1;
+           term1 = sqrt((double) totalrows / (double) numrows) * f1;
+           stats->stadistinct = floor(term1 + nmultiple + 0.5);
+       }
  
-/*
- * bucketcpy() -- copy a new value into one of the statistics buckets
- */
-static void
-bucketcpy(Form_pg_attribute attr, Datum value, Datum *bucket, int *bucket_len)
-{
-   if (attr->attbyval)
-       *bucket = value;
-   else
-   {
-       int         len = (attr->attlen != -1 ? attr->attlen : VARSIZE(value));
+       /*
+        * If we estimated the number of distinct values at more than 10%
+        * of the total row count (a very arbitrary limit), then assume
+        * that stadistinct should scale with the row count rather than be
+        * a fixed value.
+        */
+       if (stats->stadistinct > 0.1 * totalrows)
+           stats->stadistinct = - (stats->stadistinct / totalrows);
  
-       /* Avoid unnecessary palloc() traffic... */
-       if (len > *bucket_len)
+       /* Generate an MCV slot entry, only if we found multiples */
+       if (nmultiple < num_mcv)
+           num_mcv = nmultiple;
+       if (num_mcv > 0)
         {
-           if (*bucket_len != 0)
-               pfree(DatumGetPointer(*bucket));
-           *bucket = PointerGetDatum(palloc(len));
-           *bucket_len = len;
+           MemoryContext old_context;
+           Datum  *mcv_values;
+           float4 *mcv_freqs;
+
+           /* Must copy the target values into TransactionCommandContext */
+           old_context = MemoryContextSwitchTo(TransactionCommandContext);
+           mcv_values = (Datum *) palloc(num_mcv * sizeof(Datum));
+           mcv_freqs = (float4 *) palloc(num_mcv * sizeof(float4));
+           for (i = 0; i < num_mcv; i++)
+           {
+               mcv_values[i] = datumCopy(track[i].value,
+                                         stats->attr->attbyval,
+                                         stats->attr->attlen);
+               mcv_freqs[i] = (double) track[i].count / (double) numrows;
+           }
+           MemoryContextSwitchTo(old_context);
+
+           stats->stakind[0] = STATISTIC_KIND_MCV;
+           stats->staop[0] = stats->eqopr;
+           stats->stanumbers[0] = mcv_freqs;
+           stats->numnumbers[0] = num_mcv;
+           stats->stavalues[0] = mcv_values;
+           stats->numvalues[0] = num_mcv;
         }
-       memcpy(DatumGetPointer(*bucket), DatumGetPointer(value), len);
     }
+
+   /* We don't need to bother cleaning up any of our temporary palloc's */
  }
  
  
  /*
- * update_attstats() -- update attribute statistics for one relation
+ * compute_scalar_stats() -- compute column statistics
   *
- *     Statistics are stored in several places: the pg_class row for the
- *     relation has stats about the whole relation, the pg_attribute rows
- *     for each attribute store "dispersion", and there is a pg_statistic
- *     row for each (non-system) attribute.  (Dispersion probably ought to
- *     be moved to pg_statistic, but it's not worth doing unless there's
- *     another reason to have to change pg_attribute.)  The pg_class values
- *     are updated by VACUUM, not here.
- *
- *     We violate no-overwrite semantics here by storing new values for
- *     the dispersion column directly into the pg_attribute tuple that's
- *     already on the page.  The reason for this is that if we updated
- *     these tuples in the usual way, vacuuming pg_attribute itself
- *     wouldn't work very well --- by the time we got done with a vacuum
- *     cycle, most of the tuples in pg_attribute would've been obsoleted.
- *     Updating pg_attribute's own statistics would be especially tricky.
- *     Of course, this only works for fixed-size never-null columns, but
- *     dispersion is.
+ * We use this when we can find "=" and "<" operators for the datatype.
   *
- *     pg_statistic rows are just added normally.  This means that
- *     pg_statistic will probably contain some deleted rows at the
- *     completion of a vacuum cycle, unless it happens to get vacuumed last.
+ * We determine the fraction of non-null rows, the average width, the
+ * most common values, the (estimated) number of distinct values, the
+ * distribution histogram, and the correlation of physical to logical order.
   *
- *     To keep things simple, we punt for pg_statistic, and don't try
- *     to compute or store rows for pg_statistic itself in pg_statistic.
- *     This could possibly be made to work, but it's not worth the trouble.
+ * The desired stats can be determined fairly easily after sorting the
+ * data values into order.
   */
  static void
-update_attstats(Oid relid, int natts, VacAttrStats *vacattrstats)
+compute_scalar_stats(VacAttrStats *stats,
+                    TupleDesc tupDesc, long totalrows,
+                    HeapTuple *rows, int numrows)
  {
-   Relation    ad,
-               sd;
-   HeapScanDesc scan;
-   HeapTuple   atup,
-               stup;
-   ScanKeyData askey;
-   Form_pg_attribute attp;
-
-   ad = heap_openr(AttributeRelationName, RowExclusiveLock);
-   sd = heap_openr(StatisticRelationName, RowExclusiveLock);
-
-   /* Find pg_attribute rows for this relation */
-   ScanKeyEntryInitialize(&askey, 0, Anum_pg_attribute_attrelid,
-                          F_INT4EQ, relid);
-
-   scan = heap_beginscan(ad, false, SnapshotNow, 1, &askey);
-
-   while (HeapTupleIsValid(atup = heap_getnext(scan, 0)))
+   int         i;
+   int         null_cnt = 0;
+   int         nonnull_cnt = 0;
+   int         toowide_cnt = 0;
+   double      total_width = 0;
+   bool        is_varlena = (!stats->attr->attbyval &&
+                             stats->attr->attlen == -1);
+   double      corr_xysum;
+   RegProcedure cmpFn;
+   SortFunctionKind cmpFnKind;
+   FmgrInfo    f_cmpfn;
+   ScalarItem *values;
+   int         values_cnt = 0;
+   int        *tupnoLink;
+   ScalarMCVItem *track;
+   int         track_cnt = 0;
+   int         num_mcv = stats->attr->attstattarget;
+
+   values = (ScalarItem *) palloc(numrows * sizeof(ScalarItem));
+   tupnoLink = (int *) palloc(numrows * sizeof(int));
+   track = (ScalarMCVItem *) palloc(num_mcv * sizeof(ScalarMCVItem));
+
+   SelectSortFunction(stats->ltopr, &cmpFn, &cmpFnKind);
+   fmgr_info(cmpFn, &f_cmpfn);
+
+   /* Initial scan to find sortable values */
+   for (i = 0; i < numrows; i++)
     {
-       int         i;
-       VacAttrStats *stats;
+       HeapTuple   tuple = rows[i];
+       Datum       value;
+       bool        isnull;
  
-       attp = (Form_pg_attribute) GETSTRUCT(atup);
-       if (attp->attnum <= 0)  /* skip system attributes for now */
-           continue;
+       value = heap_getattr(tuple, stats->attnum, tupDesc, &isnull);
  
-       for (i = 0; i < natts; i++)
+       /* Check for null/nonnull */
+       if (isnull)
         {
-           if (attp->attnum == vacattrstats[i].attr->attnum)
-               break;
+           null_cnt++;
+           continue;
         }
-       if (i >= natts)
-           continue;           /* skip attr if no stats collected */
-       stats = &(vacattrstats[i]);
+       nonnull_cnt++;
  
-       if (VacAttrStatsEqValid(stats))
+       /*
+        * If it's a varlena field, add up widths for average width
+        * calculation.  Note that if the value is toasted, we
+        * use the toasted width.  We don't bother with this calculation
+        * if it's a fixed-width type.
+        */
+       if (is_varlena)
         {
-           float4      selratio;       /* average ratio of rows selected
-                                        * for a random constant */
-
-           /* Compute dispersion */
-           if (stats->nonnull_cnt == 0 && stats->null_cnt == 0)
+           total_width += VARSIZE(DatumGetPointer(value));
+           /*
+            * If the value is toasted, we want to detoast it just once to
+            * avoid repeated detoastings and resultant excess memory usage
+            * during the comparisons.  Also, check to see if the value is
+            * excessively wide, and if so don't detoast at all --- just
+            * ignore the value.
+            */
+           if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
             {
-
-               /*
-                * empty relation, so put a dummy value in attdispersion
-                */
-               selratio = 0;
+               toowide_cnt++;
+               continue;
             }
-           else if (stats->null_cnt <= 1 && stats->best_cnt == 1)
-           {
+           value = PointerGetDatum(PG_DETOAST_DATUM(value));
+       }
  
-               /*
-                * looks like we have a unique-key attribute --- flag this
-                * with special -1.0 flag value.
-                *
-                * The correct dispersion is 1.0/numberOfRows, but since the
-                * relation row count can get updated without recomputing
-                * dispersion, we want to store a "symbolic" value and
-                * figure 1.0/numberOfRows on the fly.
-                */
-               selratio = -1;
-           }
-           else
+       /* Add it to the list to be sorted */
+       values[values_cnt].value = value;
+       values[values_cnt].tupno = values_cnt;
+       tupnoLink[values_cnt] = values_cnt;
+       values_cnt++;
+   }
+
+   /* We can only compute valid stats if we found some sortable values. */
+   if (values_cnt > 0)
+   {
+       int     ndistinct,      /* # distinct values in sample */
+               nmultiple,      /* # that appear multiple times */
+               num_hist,
+               dups_cnt;
+       int     slot_idx = 0;
+
+       /* Sort the collected values */
+       datumCmpFn = &f_cmpfn;
+       datumCmpFnKind = cmpFnKind;
+       datumCmpTupnoLink = tupnoLink;
+       qsort((void *) values, values_cnt,
+             sizeof(ScalarItem), compare_scalars);
+
+       /*
+        * Now scan the values in order, find the most common ones,
+        * and also accumulate ordering-correlation statistics.
+        *
+        * To determine which are most common, we first have to count the
+        * number of duplicates of each value.  The duplicates are adjacent
+        * in the sorted list, so a brute-force approach is to compare
+        * successive datum values until we find two that are not equal.
+        * However, that requires N-1 invocations of the datum comparison
+        * routine, which are completely redundant with work that was done
+        * during the sort.  (The sort algorithm must at some point have
+        * compared each pair of items that are adjacent in the sorted order;
+        * otherwise it could not know that it's ordered the pair correctly.)
+        * We exploit this by having compare_scalars remember the highest
+        * tupno index that each ScalarItem has been found equal to.  At the
+        * end of the sort, a ScalarItem's tupnoLink will still point to
+        * itself if and only if it is the last item of its group of
+        * duplicates (since the group will be ordered by tupno).
+        */
+       corr_xysum = 0;
+       ndistinct = 0;
+       nmultiple = 0;
+       dups_cnt = 0;
+       for (i = 0; i < values_cnt; i++)
+       {
+           int         tupno = values[i].tupno;
+
+           corr_xysum += (double) i * (double) tupno;
+           dups_cnt++;
+           if (tupnoLink[tupno] == tupno)
             {
-               if (VacAttrStatsLtGtValid(stats) &&
-                   stats->min_cnt + stats->max_cnt == stats->nonnull_cnt)
+               /* Reached end of duplicates of this value */
+               ndistinct++;
+               if (dups_cnt > 1)
                 {
+                   nmultiple++;
+                   if (track_cnt < num_mcv ||
+                       dups_cnt > track[track_cnt-1].count)
+                   {
+                       /*
+                        * Found a new item for the mcv list; find its
+                        * position, bubbling down old items if needed.
+                        * Loop invariant is that j points at an empty/
+                        * replaceable slot.
+                        */
+                       int     j;
+
+                       if (track_cnt < num_mcv)
+                           track_cnt++;
+                       for (j = track_cnt-1; j > 0; j--)
+                       {
+                           if (dups_cnt <= track[j-1].count)
+                               break;
+                           track[j].count = track[j-1].count;
+                           track[j].first = track[j-1].first;
+                       }
+                       track[j].count = dups_cnt;
+                       track[j].first = i + 1 - dups_cnt;
+                   }
+               }
+               dups_cnt = 0;
+           }
+       }
  
-                   /*
-                    * exact result when there are just 1 or 2 values...
-                    */
-                   double      min_cnt_d = stats->min_cnt,
-                               max_cnt_d = stats->max_cnt,
-                               null_cnt_d = stats->null_cnt;
-                   double      total = ((double) stats->nonnull_cnt) + null_cnt_d;
+       stats->stats_valid = true;
+       /* Do the simple null-frac and width stats */
+       stats->stanullfrac = (double) null_cnt / (double) numrows;
+       if (is_varlena)
+           stats->stawidth = total_width / (double) nonnull_cnt;
+       else
+           stats->stawidth = stats->attrtype->typlen;
  
-                   selratio = (min_cnt_d * min_cnt_d + max_cnt_d * max_cnt_d + null_cnt_d * null_cnt_d) / (total * total);
-               }
-               else
-               {
-                   double      most = (double) (stats->best_cnt > stats->null_cnt ? stats->best_cnt : stats->null_cnt);
-                   double      total = ((double) stats->nonnull_cnt) + ((double) stats->null_cnt);
+       if (nmultiple == 0)
+       {
+           /* If we found no repeated values, assume it's a unique column */
+           stats->stadistinct = -1.0;
+       }
+       else if (toowide_cnt == 0 && nmultiple == ndistinct)
+       {
+           /*
+            * Every value in the sample appeared more than once.  Assume the
+            * column has just these values.
+            */
+           stats->stadistinct = ndistinct;
+       }
+       else
+       {
+           /*----------
+            * Estimate the number of distinct values using the estimator
+            * proposed by Chaudhuri et al (see citation above).  This is
+            *      sqrt(n/r) * max(f1,1) + f2 + f3 + ...
+            * where fk is the number of distinct values that occurred
+            * exactly k times in our sample of r rows (from a total of n).
+            * Overwidth values are assumed to have been distinct.
+            *----------
+            */
+           int     f1 = ndistinct - nmultiple + toowide_cnt;
+           double  term1;
  
-                   /*
-                    * we assume count of other values are 20% of best
-                    * count in table
-                    */
-                   selratio = (most * most + 0.20 * most * (total - most)) / (total * total);
-               }
-               /* Make sure calculated values are in-range */
-               if (selratio < 0.0)
-                   selratio = 0.0;
-               else if (selratio > 1.0)
-                   selratio = 1.0;
+           if (f1 < 1)
+               f1 = 1;
+           term1 = sqrt((double) totalrows / (double) numrows) * f1;
+           stats->stadistinct = floor(term1 + nmultiple + 0.5);
+       }
+
+       /*
+        * If we estimated the number of distinct values at more than 10%
+        * of the total row count (a very arbitrary limit), then assume
+        * that stadistinct should scale with the row count rather than be
+        * a fixed value.
+        */
+       if (stats->stadistinct > 0.1 * totalrows)
+           stats->stadistinct = - (stats->stadistinct / totalrows);
+
+       /* Generate an MCV slot entry, only if we found multiples */
+       if (nmultiple < num_mcv)
+           num_mcv = nmultiple;
+       Assert(track_cnt >= num_mcv);
+       if (num_mcv > 0)
+       {
+           MemoryContext old_context;
+           Datum  *mcv_values;
+           float4 *mcv_freqs;
+
+           /* Must copy the target values into TransactionCommandContext */
+           old_context = MemoryContextSwitchTo(TransactionCommandContext);
+           mcv_values = (Datum *) palloc(num_mcv * sizeof(Datum));
+           mcv_freqs = (float4 *) palloc(num_mcv * sizeof(float4));
+           for (i = 0; i < num_mcv; i++)
+           {
+               mcv_values[i] = datumCopy(values[track[i].first].value,
+                                         stats->attr->attbyval,
+                                         stats->attr->attlen);
+               mcv_freqs[i] = (double) track[i].count / (double) numrows;
             }
+           MemoryContextSwitchTo(old_context);
+
+           stats->stakind[slot_idx] = STATISTIC_KIND_MCV;
+           stats->staop[slot_idx] = stats->eqopr;
+           stats->stanumbers[slot_idx] = mcv_freqs;
+           stats->numnumbers[slot_idx] = num_mcv;
+           stats->stavalues[slot_idx] = mcv_values;
+           stats->numvalues[slot_idx] = num_mcv;
+           slot_idx++;
+       }
  
-           /* overwrite the existing statistics in the tuple */
-           attp->attdispersion = selratio;
+       /*
+        * Generate a histogram slot entry if there are at least two
+        * distinct values not accounted for in the MCV list.  (This
+        * ensures the histogram won't collapse to empty or a singleton.)
+        */
+       num_hist = ndistinct - num_mcv;
+       if (num_hist > stats->attr->attstattarget)
+           num_hist = stats->attr->attstattarget + 1;
+       if (num_hist >= 2)
+       {
+           MemoryContext old_context;
+           Datum  *hist_values;
+           int     nvals;
  
-           /* invalidate the tuple in the cache and write the buffer */
-           RelationInvalidateHeapTuple(ad, atup);
-           WriteNoReleaseBuffer(scan->rs_cbuf);
+           /* Sort the MCV items into position order to speed next loop */
+           qsort((void *) track, num_mcv,
+                 sizeof(ScalarMCVItem), compare_mcvs);
  
             /*
-            * Create pg_statistic tuples for the relation, if we have
-            * gathered the right data.  del_stats() previously deleted
-            * all the pg_statistic tuples for the rel, so we just have to
-            * insert new ones here.
+            * Collapse out the MCV items from the values[] array.
              *
-            * Note analyze_rel() has seen to it that we won't come here when
-            * vacuuming pg_statistic itself.
+            * Note we destroy the values[] array here... but we don't need
+            * it for anything more.  We do, however, still need values_cnt.
              */
-           if (VacAttrStatsLtGtValid(stats) && stats->initialized)
+           if (num_mcv > 0)
             {
-               float4      nullratio;
-               float4      bestratio;
-               FmgrInfo    out_function;
-               char       *out_string;
-               double      best_cnt_d = stats->best_cnt,
-                           null_cnt_d = stats->null_cnt,
-                           nonnull_cnt_d = stats->nonnull_cnt; /* prevent overflow */
-               Datum       values[Natts_pg_statistic];
-               char        nulls[Natts_pg_statistic];
-               Relation    irelations[Num_pg_statistic_indices];
+               int     src,
+                       dest;
+               int     j;
  
-               nullratio = null_cnt_d / (nonnull_cnt_d + null_cnt_d);
-               bestratio = best_cnt_d / (nonnull_cnt_d + null_cnt_d);
-
-               fmgr_info(stats->outfunc, &out_function);
+               src = dest = 0;
+               j = 0;          /* index of next interesting MCV item */
+               while (src < values_cnt)
+               {
+                   int     ncopy;
+
+                   if (j < num_mcv)
+                   {
+                       int     first = track[j].first;
+
+                       if (src >= first)
+                       {
+                           /* advance past this MCV item */
+                           src = first + track[j].count;
+                           j++;
+                           continue;
+                       }
+                       ncopy = first - src;
+                   }
+                   else
+                   {
+                       ncopy = values_cnt - src;
+                   }
+                   memmove(&values[dest], &values[src],
+                           ncopy * sizeof(ScalarItem));
+                   src += ncopy;
+                   dest += ncopy;
+               }
+               nvals = dest;
+           }
+           else
+               nvals = values_cnt;
+           Assert(nvals >= num_hist);
  
-               for (i = 0; i < Natts_pg_statistic; ++i)
-                   nulls[i] = ' ';
+           /* Must copy the target values into TransactionCommandContext */
+           old_context = MemoryContextSwitchTo(TransactionCommandContext);
+           hist_values = (Datum *) palloc(num_hist * sizeof(Datum));
+           for (i = 0; i < num_hist; i++)
+           {
+               int     pos;
  
-               /*
-                * initialize values[]
-                */
-               i = 0;
-               values[i++] = ObjectIdGetDatum(relid);  /* starelid */
-               values[i++] = Int16GetDatum(attp->attnum);      /* staattnum */
-               values[i++] = ObjectIdGetDatum(stats->op_cmplt);        /* staop */
-               values[i++] = Float4GetDatum(nullratio);        /* stanullfrac */
-               values[i++] = Float4GetDatum(bestratio);        /* stacommonfrac */
-               out_string = DatumGetCString(FunctionCall3(&out_function,
-                                                          stats->best,
-                                       ObjectIdGetDatum(stats->typelem),
-                                Int32GetDatum(stats->attr->atttypmod)));
-               values[i++] = DirectFunctionCall1(textin,       /* stacommonval */
-                                           CStringGetDatum(out_string));
-               pfree(out_string);
-               out_string = DatumGetCString(FunctionCall3(&out_function,
-                                                          stats->min,
-                                       ObjectIdGetDatum(stats->typelem),
-                                Int32GetDatum(stats->attr->atttypmod)));
-               values[i++] = DirectFunctionCall1(textin,       /* staloval */
-                                           CStringGetDatum(out_string));
-               pfree(out_string);
-               out_string = DatumGetCString(FunctionCall3(&out_function,
-                                                          stats->max,
-                                       ObjectIdGetDatum(stats->typelem),
-                                Int32GetDatum(stats->attr->atttypmod)));
-               values[i++] = DirectFunctionCall1(textin,       /* stahival */
-                                           CStringGetDatum(out_string));
-               pfree(out_string);
-
-               stup = heap_formtuple(sd->rd_att, values, nulls);
-
-               /* store tuple and update indexes too */
-               heap_insert(sd, stup);
-
-               CatalogOpenIndices(Num_pg_statistic_indices, Name_pg_statistic_indices, irelations);
-               CatalogIndexInsert(irelations, Num_pg_statistic_indices, sd, stup);
-               CatalogCloseIndices(Num_pg_statistic_indices, irelations);
-
-               /* release allocated space */
-               pfree(DatumGetPointer(values[Anum_pg_statistic_stacommonval - 1]));
-               pfree(DatumGetPointer(values[Anum_pg_statistic_staloval - 1]));
-               pfree(DatumGetPointer(values[Anum_pg_statistic_stahival - 1]));
-               heap_freetuple(stup);
+               pos = (i * (nvals - 1)) / (num_hist - 1);
+               hist_values[i] = datumCopy(values[pos].value,
+                                          stats->attr->attbyval,
+                                          stats->attr->attlen);
             }
+           MemoryContextSwitchTo(old_context);
+
+           stats->stakind[slot_idx] = STATISTIC_KIND_HISTOGRAM;
+           stats->staop[slot_idx] = stats->ltopr;
+           stats->stavalues[slot_idx] = hist_values;
+           stats->numvalues[slot_idx] = num_hist;
+           slot_idx++;
+       }
+
+       /* Generate a correlation entry if there are multiple values */
+       if (values_cnt > 1)
+       {
+           MemoryContext old_context;
+           float4 *corrs;
+           double  corr_xsum,
+                   corr_x2sum;
+
+           /* Must copy the target values into TransactionCommandContext */
+           old_context = MemoryContextSwitchTo(TransactionCommandContext);
+           corrs = (float4 *) palloc(sizeof(float4));
+           MemoryContextSwitchTo(old_context);
+
+           /*----------
+            * Since we know the x and y value sets are both
+            *      0, 1, ..., values_cnt-1
+            * we have sum(x) = sum(y) =
+            *      (values_cnt-1)*values_cnt / 2
+            * and sum(x^2) = sum(y^2) =
+            *      (values_cnt-1)*values_cnt*(2*values_cnt-1) / 6.
+            *----------
+            */
+           corr_xsum = (double) (values_cnt-1) * (double) values_cnt / 2.0;
+           corr_x2sum = (double) (values_cnt-1) * (double) values_cnt *
+               (double) (2*values_cnt-1) / 6.0;
+           /* And the correlation coefficient reduces to */
+           corrs[0] = (values_cnt * corr_xysum - corr_xsum * corr_xsum) /
+               (values_cnt * corr_x2sum - corr_xsum * corr_xsum);
+
+           stats->stakind[slot_idx] = STATISTIC_KIND_CORRELATION;
+           stats->staop[slot_idx] = stats->ltopr;
+           stats->stanumbers[slot_idx] = corrs;
+           stats->numnumbers[slot_idx] = 1;
+           slot_idx++;
         }
     }
-   heap_endscan(scan);
-   /* close rels, but hold locks till upcoming commit */
-   heap_close(ad, NoLock);
-   heap_close(sd, NoLock);
+
+   /* We don't need to bother cleaning up any of our temporary palloc's */
  }
  
  /*
- * del_stats() -- delete pg_statistic rows for a relation
+ * qsort comparator for sorting ScalarItems
   *
- * If a list of attribute numbers is given, only zap stats for those attrs.
+ * Aside from sorting the items, we update the datumCmpTupnoLink[] array
+ * whenever two ScalarItems are found to contain equal datums.  The array
+ * is indexed by tupno; for each ScalarItem, it contains the highest
+ * tupno that that item's datum has been found to be equal to.  This allows
+ * us to avoid additional comparisons in compute_scalar_stats().
   */
-static void
-del_stats(Oid relid, int attcnt, int *attnums)
+static int
+compare_scalars(const void *a, const void *b)
  {
-   Relation    pgstatistic;
-   HeapScanDesc scan;
-   HeapTuple   tuple;
-   ScanKeyData key;
+   Datum       da = ((ScalarItem *) a)->value;
+   int         ta = ((ScalarItem *) a)->tupno;
+   Datum       db = ((ScalarItem *) b)->value;
+   int         tb = ((ScalarItem *) b)->tupno;
  
-   pgstatistic = heap_openr(StatisticRelationName, RowExclusiveLock);
+   if (datumCmpFnKind == SORTFUNC_LT)
+   {
+       if (DatumGetBool(FunctionCall2(datumCmpFn, da, db)))
+           return -1;          /* a < b */
+       if (DatumGetBool(FunctionCall2(datumCmpFn, db, da)))
+           return 1;           /* a > b */
+   }
+   else
+   {
+       /* sort function is CMP or REVCMP */
+       int32   compare;
  
-   ScanKeyEntryInitialize(&key, 0x0, Anum_pg_statistic_starelid,
-                          F_OIDEQ, ObjectIdGetDatum(relid));
-   scan = heap_beginscan(pgstatistic, false, SnapshotNow, 1, &key);
+       compare = DatumGetInt32(FunctionCall2(datumCmpFn, da, db));
+       if (compare != 0)
+       {
+           if (datumCmpFnKind == SORTFUNC_REVCMP)
+               compare = -compare;
+           return compare;
+       }
+   }
  
-   while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
+   /*
+    * The two datums are equal, so update datumCmpTupnoLink[].
+    */
+   if (datumCmpTupnoLink[ta] < tb)
+       datumCmpTupnoLink[ta] = tb;
+   if (datumCmpTupnoLink[tb] < ta)
+       datumCmpTupnoLink[tb] = ta;
+
+   /*
+    * For equal datums, sort by tupno
+    */
+   return ta - tb;
+}
+
+/*
+ * qsort comparator for sorting ScalarMCVItems by position
+ */
+static int
+compare_mcvs(const void *a, const void *b)
+{
+   int         da = ((ScalarMCVItem *) a)->first;
+   int         db = ((ScalarMCVItem *) b)->first;
+
+   return da - db;
+}
+
+
+/*
+ * update_attstats() -- update attribute statistics for one relation
+ *
+ *     Statistics are stored in several places: the pg_class row for the
+ *     relation has stats about the whole relation, and there is a
+ *     pg_statistic row for each (non-system) attribute that has ever
+ *     been analyzed.  The pg_class values are updated by VACUUM, not here.
+ *
+ *     pg_statistic rows are just added or updated normally.  This means
+ *     that pg_statistic will probably contain some deleted rows at the
+ *     completion of a vacuum cycle, unless it happens to get vacuumed last.
+ *
+ *     To keep things simple, we punt for pg_statistic, and don't try
+ *     to compute or store rows for pg_statistic itself in pg_statistic.
+ *     This could possibly be made to work, but it's not worth the trouble.
+ *     Note analyze_rel() has seen to it that we won't come here when
+ *     vacuuming pg_statistic itself.
+ */
+static void
+update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats)
+{
+   Relation    sd;
+   int         attno;
+
+   /*
+    * We use an ExclusiveLock on pg_statistic to ensure that only one
+    * backend is writing it at a time --- without that, we might have to
+    * deal with concurrent updates here, and it's not worth the trouble.
+    */
+   sd = heap_openr(StatisticRelationName, ExclusiveLock);
+
+   for (attno = 0; attno < natts; attno++)
     {
-       if (attcnt > 0)
+       VacAttrStats *stats = vacattrstats[attno];
+       FmgrInfo    out_function;
+       HeapTuple   stup,
+                   oldtup;
+       int         i, k, n;
+       Datum       values[Natts_pg_statistic];
+       char        nulls[Natts_pg_statistic];
+       char        replaces[Natts_pg_statistic];
+       Relation    irelations[Num_pg_statistic_indices];
+
+       /* Ignore attr if we weren't able to collect stats */
+       if (!stats->stats_valid)
+           continue;
+
+       fmgr_info(stats->attrtype->typoutput, &out_function);
+
+       /*
+        * Construct a new pg_statistic tuple
+        */
+       for (i = 0; i < Natts_pg_statistic; ++i)
         {
-           Form_pg_statistic pgs = (Form_pg_statistic) GETSTRUCT(tuple);
-           int         i;
+           nulls[i] = ' ';
+           replaces[i] = 'r';
+       }
  
-           for (i = 0; i < attcnt; i++)
+       i = 0;
+       values[i++] = ObjectIdGetDatum(relid); /* starelid */
+       values[i++] = Int16GetDatum(stats->attnum); /* staattnum */
+       values[i++] = Float4GetDatum(stats->stanullfrac); /* stanullfrac */
+       values[i++] = Int32GetDatum(stats->stawidth); /* stawidth */
+       values[i++] = Float4GetDatum(stats->stadistinct); /* stadistinct */
+       for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+       {
+           values[i++] = Int16GetDatum(stats->stakind[k]); /* stakindN */
+       }
+       for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+       {
+           values[i++] = ObjectIdGetDatum(stats->staop[k]); /* staopN */
+       }
+       for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+       {
+           int     nnum = stats->numnumbers[k];
+
+           if (nnum > 0)
             {
-               if (pgs->staattnum == attnums[i] + 1)
-                   break;
+               Datum      *numdatums = (Datum *) palloc(nnum * sizeof(Datum));
+               ArrayType  *arry;
+
+               for (n = 0; n < nnum; n++)
+                   numdatums[n] = Float4GetDatum(stats->stanumbers[k][n]);
+               /* XXX knows more than it should about type float4: */
+               arry = construct_array(numdatums, nnum,
+                                      false, sizeof(float4), 'i');
+               values[i++] = PointerGetDatum(arry); /* stanumbersN */
+           }
+           else
+           {
+               nulls[i] = 'n';
+               values[i++] = (Datum) 0;
             }
-           if (i >= attcnt)
-               continue;       /* don't delete it */
         }
-       simple_heap_delete(pgstatistic, &tuple->t_self);
-   }
+       for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+       {
+           int     ntxt = stats->numvalues[k];
  
-   heap_endscan(scan);
+           if (ntxt > 0)
+           {
+               Datum      *txtdatums = (Datum *) palloc(ntxt * sizeof(Datum));
+               ArrayType  *arry;
  
-   /*
-    * Close rel, but *keep* lock; we will need to reacquire it later, so
-    * there's a possibility of deadlock against another VACUUM process if
-    * we let go now.  Keeping the lock shouldn't delay any common
-    * operation other than an attempted VACUUM of pg_statistic itself.
-    */
-   heap_close(pgstatistic, NoLock);
+               for (n = 0; n < ntxt; n++)
+               {
+                   /*
+                    * Convert data values to a text string to be inserted
+                    * into the text array.
+                    */
+                   Datum   stringdatum;
+
+                   stringdatum =
+                       FunctionCall3(&out_function,
+                                     stats->stavalues[k][n],
+                                     ObjectIdGetDatum(stats->attrtype->typelem),
+                                     Int32GetDatum(stats->attr->atttypmod));
+                   txtdatums[n] = DirectFunctionCall1(textin, stringdatum);
+                   pfree(DatumGetPointer(stringdatum));
+               }
+               /* XXX knows more than it should about type text: */
+               arry = construct_array(txtdatums, ntxt,
+                                      false, -1, 'i');
+               values[i++] = PointerGetDatum(arry); /* stavaluesN */
+           }
+           else
+           {
+               nulls[i] = 'n';
+               values[i++] = (Datum) 0;
+           }
+       }
+
+       /* Is there already a pg_statistic tuple for this attribute? */
+       oldtup = SearchSysCache(STATRELATT,
+                               ObjectIdGetDatum(relid),
+                               Int16GetDatum(stats->attnum),
+                               0, 0);
+
+       if (HeapTupleIsValid(oldtup))
+       {
+           /* Yes, replace it */
+           stup = heap_modifytuple(oldtup,
+                                   sd,
+                                   values,
+                                   nulls,
+                                   replaces);
+           ReleaseSysCache(oldtup);
+           simple_heap_update(sd, &stup->t_self, stup);
+       }
+       else
+       {
+           /* No, insert new tuple */
+           stup = heap_formtuple(sd->rd_att, values, nulls);
+           heap_insert(sd, stup);
+       }
+
+       /* update indices too */
+       CatalogOpenIndices(Num_pg_statistic_indices, Name_pg_statistic_indices,
+                          irelations);
+       CatalogIndexInsert(irelations, Num_pg_statistic_indices, sd, stup);
+       CatalogCloseIndices(Num_pg_statistic_indices, irelations);
+
+       heap_freetuple(stup);
+   }
+
+   /* close rel, but hold lock till upcoming commit */
+   heap_close(sd, NoLock);
  }
diff --git a/src/backend/commands/command.c b/src/backend/commands/command.c

index 96d493688e328aaa1fc4bf56bc12e18865f2ee33..13a78f1177390f0108702c94a7cc005e0a28b183 100644 (file)
--- a/src/backend/commands/command.c
+++ b/src/backend/commands/command.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/commands/Attic/command.c,v 1.125 2001/03/23 04:49:52 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/commands/Attic/command.c,v 1.126 2001/05/07 00:43:17 tgl Exp $
   *
   * NOTES
   *   The PerformAddAttribute() code, like most of the relation
@@ -56,6 +56,7 @@
  #include "access/genam.h"
  
  
+static void drop_default(Oid relid, int16 attnum);
  static bool needs_toast_table(Relation rel);
  static bool is_relation(char *name);
  
@@ -408,7 +409,7 @@ AlterTableAddColumn(const char *relationName,
         HeapTuple   typeTuple;
         Form_pg_type tform;
         char       *typename;
-       int         attnelems;
+       int         attndims;
  
         if (SearchSysCacheExists(ATTNAME,
                                  ObjectIdGetDatum(reltup->t_data->t_oid),
@@ -425,11 +426,11 @@ AlterTableAddColumn(const char *relationName,
  
         if (colDef->typename->arrayBounds)
         {
-           attnelems = length(colDef->typename->arrayBounds);
+           attndims = length(colDef->typename->arrayBounds);
             typename = makeArrayTypeName(colDef->typename->name);
         }
         else
-           attnelems = 0;
+           attndims = 0;
  
         typeTuple = SearchSysCache(TYPENAME,
                                    PointerGetDatum(typename),
@@ -441,12 +442,12 @@ AlterTableAddColumn(const char *relationName,
         namestrcpy(&(attribute->attname), colDef->colname);
         attribute->atttypid = typeTuple->t_data->t_oid;
         attribute->attlen = tform->typlen;
-       attribute->attdispersion = 0;
+       attribute->attstattarget = DEFAULT_ATTSTATTARGET;
         attribute->attcacheoff = -1;
         attribute->atttypmod = colDef->typename->typmod;
         attribute->attnum = i;
         attribute->attbyval = tform->typbyval;
-       attribute->attnelems = attnelems;
+       attribute->attndims = attndims;
         attribute->attisset = (bool) (tform->typtype == 'c');
         attribute->attstorage = tform->typstorage;
         attribute->attalign = tform->typalign;
@@ -496,17 +497,13 @@ AlterTableAddColumn(const char *relationName,
  }
  
  
-
-static void drop_default(Oid relid, int16 attnum);
-
-
  /*
   * ALTER TABLE ALTER COLUMN SET/DROP DEFAULT
   */
  void
-AlterTableAlterColumn(const char *relationName,
-                     bool inh, const char *colName,
-                     Node *newDefault)
+AlterTableAlterColumnDefault(const char *relationName,
+                            bool inh, const char *colName,
+                            Node *newDefault)
  {
     Relation    rel;
     HeapTuple   tuple;
@@ -551,8 +548,8 @@ AlterTableAlterColumn(const char *relationName,
             if (childrelid == myrelid)
                 continue;
             rel = heap_open(childrelid, AccessExclusiveLock);
-           AlterTableAlterColumn(RelationGetRelationName(rel),
-                                 false, colName, newDefault);
+           AlterTableAlterColumnDefault(RelationGetRelationName(rel),
+                                        false, colName, newDefault);
             heap_close(rel, AccessExclusiveLock);
         }
     }
@@ -560,7 +557,7 @@ AlterTableAlterColumn(const char *relationName,
     /* -= now do the thing on this relation =- */
  
     /* reopen the business */
-   rel = heap_openr((char *) relationName, AccessExclusiveLock);
+   rel = heap_openr(relationName, AccessExclusiveLock);
  
     /*
      * get the number of the attribute
@@ -647,7 +644,6 @@ AlterTableAlterColumn(const char *relationName,
  }
  
  
-
  static void
  drop_default(Oid relid, int16 attnum)
  {
@@ -675,6 +671,104 @@ drop_default(Oid relid, int16 attnum)
  }
  
  
+/*
+ * ALTER TABLE ALTER COLUMN SET STATISTICS
+ */
+void
+AlterTableAlterColumnStatistics(const char *relationName,
+                               bool inh, const char *colName,
+                               Node *statsTarget)
+{
+   Relation    rel;
+   Oid         myrelid;
+   int         newtarget;
+   Relation    attrelation;
+   HeapTuple   tuple;
+
+#ifndef NO_SECURITY
+   if (!pg_ownercheck(GetUserId(), relationName, RELNAME))
+       elog(ERROR, "ALTER TABLE: permission denied");
+#endif
+
+   rel = heap_openr(relationName, AccessExclusiveLock);
+   if (rel->rd_rel->relkind != RELKIND_RELATION)
+       elog(ERROR, "ALTER TABLE: relation \"%s\" is not a table",
+            relationName);
+   myrelid = RelationGetRelid(rel);
+   heap_close(rel, NoLock);    /* close rel, but keep lock! */
+
+   /*
+    * Propagate to children if desired
+    */
+   if (inh)
+   {
+       List       *child,
+                  *children;
+
+       /* this routine is actually in the planner */
+       children = find_all_inheritors(myrelid);
+
+       /*
+        * find_all_inheritors does the recursive search of the
+        * inheritance hierarchy, so all we have to do is process all of
+        * the relids in the list that it returns.
+        */
+       foreach(child, children)
+       {
+           Oid         childrelid = lfirsti(child);
+
+           if (childrelid == myrelid)
+               continue;
+           rel = heap_open(childrelid, AccessExclusiveLock);
+           AlterTableAlterColumnStatistics(RelationGetRelationName(rel),
+                                           false, colName, statsTarget);
+           heap_close(rel, AccessExclusiveLock);
+       }
+   }
+
+   /* -= now do the thing on this relation =- */
+
+   Assert(IsA(statsTarget, Integer));
+   newtarget = intVal(statsTarget);
+
+   /* Limit target to sane range (should we raise an error instead?) */
+   if (newtarget < 0)
+       newtarget = 0;
+   else if (newtarget > 1000)
+       newtarget = 1000;
+
+   attrelation = heap_openr(AttributeRelationName, RowExclusiveLock);
+
+   tuple = SearchSysCacheCopy(ATTNAME,
+                              ObjectIdGetDatum(myrelid),
+                              PointerGetDatum(colName),
+                              0, 0);
+   if (!HeapTupleIsValid(tuple))
+       elog(ERROR, "ALTER TABLE: relation \"%s\" has no column \"%s\"",
+            relationName, colName);
+
+   if (((Form_pg_attribute) GETSTRUCT(tuple))->attnum < 0)
+       elog(ERROR, "ALTER TABLE: cannot change system attribute \"%s\"",
+            colName);
+
+   ((Form_pg_attribute) GETSTRUCT(tuple))->attstattarget = newtarget;
+
+   simple_heap_update(attrelation, &tuple->t_self, tuple);
+
+   /* keep system catalog indices current */
+   {
+       Relation    irelations[Num_pg_attr_indices];
+
+       CatalogOpenIndices(Num_pg_attr_indices, Name_pg_attr_indices, irelations);
+       CatalogIndexInsert(irelations, Num_pg_attr_indices, attrelation, tuple);
+       CatalogCloseIndices(Num_pg_attr_indices, irelations);
+   }
+
+   heap_freetuple(tuple);
+   heap_close(attrelation, RowExclusiveLock);
+}
+
+
  #ifdef _DROP_COLUMN_HACK__
  /*
   * ALTER TABLE DROP COLUMN trial implementation
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c

index 694d0e8bbc1491c39827ff833f8cc3fb68906e9e..9a0dbdc8c8e15c0b261068728c7d38546e3aa07c 100644 (file)
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.189 2001/03/25 23:23:58 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.190 2001/05/07 00:43:18 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -53,25 +53,90 @@ extern XLogRecPtr log_heap_move(Relation reln,
               Buffer oldbuf, ItemPointerData from,
               Buffer newbuf, HeapTuple newtup);
  
+
+typedef struct VRelListData
+{
+   Oid         vrl_relid;
+   struct VRelListData *vrl_next;
+} VRelListData;
+
+typedef VRelListData *VRelList;
+
+typedef struct VacPageData
+{
+   BlockNumber blkno;          /* BlockNumber of this Page */
+   Size        free;           /* FreeSpace on this Page */
+   uint16      offsets_used;   /* Number of OffNums used by vacuum */
+   uint16      offsets_free;   /* Number of OffNums free or to be free */
+   OffsetNumber offsets[1];    /* Array of its OffNums */
+} VacPageData;
+
+typedef VacPageData *VacPage;
+
+typedef struct VacPageListData
+{
+   int         empty_end_pages;/* Number of "empty" end-pages */
+   int         num_pages;      /* Number of pages in pagedesc */
+   int         num_allocated_pages;    /* Number of allocated pages in
+                                        * pagedesc */
+   VacPage    *pagedesc;       /* Descriptions of pages */
+} VacPageListData;
+
+typedef VacPageListData *VacPageList;
+
+typedef struct VTupleLinkData
+{
+   ItemPointerData new_tid;
+   ItemPointerData this_tid;
+} VTupleLinkData;
+
+typedef VTupleLinkData *VTupleLink;
+
+typedef struct VTupleMoveData
+{
+   ItemPointerData tid;        /* tuple ID */
+   VacPage     vacpage;        /* where to move */
+   bool        cleanVpd;       /* clean vacpage before using */
+} VTupleMoveData;
+
+typedef VTupleMoveData *VTupleMove;
+
+typedef struct VRelStats
+{
+   Oid         relid;
+   long        num_pages;
+   long        num_tuples;
+   Size        min_tlen;
+   Size        max_tlen;
+   bool        hasindex;
+   int         num_vtlinks;
+   VTupleLink  vtlinks;
+} VRelStats;
+
+
  static MemoryContext vac_context = NULL;
  
  static int MESSAGE_LEVEL;      /* message level */
  
  static TransactionId XmaxRecent;
  
+
  /* non-export function prototypes */
  static void vacuum_init(void);
  static void vacuum_shutdown(void);
-static void vac_vacuum(NameData *VacRelP, bool analyze, List *anal_cols2);
-static VRelList getrels(NameData *VacRelP);
+static VRelList getrels(Name VacRelP, const char *stmttype);
  static void vacuum_rel(Oid relid);
-static void scan_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages, VacPageList fraged_pages);
-static void repair_frag(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages, VacPageList fraged_pages, int nindices, Relation *Irel);
-static void vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacpagelist);
+static void scan_heap(VRelStats *vacrelstats, Relation onerel,
+                     VacPageList vacuum_pages, VacPageList fraged_pages);
+static void repair_frag(VRelStats *vacrelstats, Relation onerel,
+                       VacPageList vacuum_pages, VacPageList fraged_pages,
+                       int nindices, Relation *Irel);
+static void vacuum_heap(VRelStats *vacrelstats, Relation onerel,
+                       VacPageList vacpagelist);
  static void vacuum_page(Relation onerel, Buffer buffer, VacPage vacpage);
-static void vacuum_index(VacPageList vacpagelist, Relation indrel, int num_tuples, int keep_tuples);
-static void scan_index(Relation indrel, int num_tuples);
-static void update_relstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats *vacrelstats);
+static void vacuum_index(VacPageList vacpagelist, Relation indrel,
+                        long num_tuples, int keep_tuples);
+static void scan_index(Relation indrel, long num_tuples);
  static VacPage tid_reaped(ItemPointer itemptr, VacPageList vacpagelist);
  static void reap_page(VacPageList vacpagelist, VacPage vacpage);
  static void vpage_insert(VacPageList vacpagelist, VacPage vpnew);
@@ -88,17 +153,17 @@ static bool enough_space(VacPage vacpage, Size len);
  static char *show_rusage(struct rusage * ru0);
  
  
+/*
+ * Primary entry point for VACUUM and ANALYZE commands.
+ */
  void
-vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols)
+vacuum(VacuumStmt *vacstmt)
  {
+   const char *stmttype = vacstmt->vacuum ? "VACUUM" : "ANALYZE";
     NameData    VacRel;
     Name        VacRelName;
-   MemoryContext old;
-   List       *le;
-   List       *anal_cols2 = NIL;
-
-   if (anal_cols != NIL && !analyze)
-       elog(ERROR, "Can't vacuum columns, only tables.  You can 'vacuum analyze' columns.");
+   VRelList    vrl,
+               cur;
  
     /*
      * We cannot run VACUUM inside a user transaction block; if we were
@@ -110,9 +175,9 @@ vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols)
      * behavior.
      */
     if (IsTransactionBlock())
-       elog(ERROR, "VACUUM cannot run inside a BEGIN/END block");
+       elog(ERROR, "%s cannot run inside a BEGIN/END block", stmttype);
  
-   if (verbose)
+   if (vacstmt->verbose)
         MESSAGE_LEVEL = NOTICE;
     else
         MESSAGE_LEVEL = DEBUG;
@@ -130,37 +195,36 @@ vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols)
                                         ALLOCSET_DEFAULT_INITSIZE,
                                         ALLOCSET_DEFAULT_MAXSIZE);
  
-   /* vacrel gets de-allocated on xact commit, so copy it to safe storage */
-   if (vacrel)
+   /* Convert vacrel, which is just a string, to a Name */
+   if (vacstmt->vacrel)
     {
-       namestrcpy(&VacRel, vacrel);
+       namestrcpy(&VacRel, vacstmt->vacrel);
         VacRelName = &VacRel;
     }
     else
         VacRelName = NULL;
  
-   /* must also copy the column list, if any, to safe storage */
-   old = MemoryContextSwitchTo(vac_context);
-   foreach(le, anal_cols)
-   {
-       char       *col = (char *) lfirst(le);
-
-       anal_cols2 = lappend(anal_cols2, pstrdup(col));
-   }
-   MemoryContextSwitchTo(old);
+   /* Build list of relations to process (note this lives in vac_context) */
+   vrl = getrels(VacRelName, stmttype);
  
     /*
      * Start up the vacuum cleaner.
-    *
-    * NOTE: since this commits the current transaction, the memory holding
-    * any passed-in parameters gets freed here.  We must have already
-    * copied pass-by-reference parameters to safe storage.  Don't make me
-    * fix this again!
      */
     vacuum_init();
  
-   /* vacuum the database */
-   vac_vacuum(VacRelName, analyze, anal_cols2);
+   /*
+    * Process each selected relation.  We are careful to process
+    * each relation in a separate transaction in order to avoid holding
+    * too many locks at one time.
+    */
+   for (cur = vrl; cur != (VRelList) NULL; cur = cur->vrl_next)
+   {
+       if (vacstmt->vacuum)
+           vacuum_rel(cur->vrl_relid);
+       /* analyze separately so locking is minimized */
+       if (vacstmt->analyze)
+           analyze_rel(cur->vrl_relid, vacstmt);
+   }
  
     /* clean up */
     vacuum_shutdown();
@@ -187,14 +251,14 @@ vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols)
   *     PostgresMain().
   */
  static void
-vacuum_init()
+vacuum_init(void)
  {
     /* matches the StartTransaction in PostgresMain() */
     CommitTransactionCommand();
  }
  
  static void
-vacuum_shutdown()
+vacuum_shutdown(void)
  {
     /* on entry, we are not in a transaction */
  
@@ -223,34 +287,10 @@ vacuum_shutdown()
  }
  
  /*
- * vac_vacuum() -- vacuum the database.
- *
- *     This routine builds a list of relations to vacuum, and then calls
- *     code that vacuums them one at a time.  We are careful to vacuum each
- *     relation in a separate transaction in order to avoid holding too many
- *     locks at one time.
+ * Build a list of VRelListData nodes for each relation to be processed
   */
-static void
-vac_vacuum(NameData *VacRelP, bool analyze, List *anal_cols2)
-{
-   VRelList    vrl,
-               cur;
-
-   /* get list of relations */
-   vrl = getrels(VacRelP);
-
-   /* vacuum each heap relation */
-   for (cur = vrl; cur != (VRelList) NULL; cur = cur->vrl_next)
-   {
-       vacuum_rel(cur->vrl_relid);
-       /* analyze separately so locking is minimized */
-       if (analyze)
-           analyze_rel(cur->vrl_relid, anal_cols2, MESSAGE_LEVEL);
-   }
-}
-
  static VRelList
-getrels(NameData *VacRelP)
+getrels(Name VacRelP, const char *stmttype)
  {
     Relation    rel;
     TupleDesc   tupdesc;
@@ -262,12 +302,9 @@ getrels(NameData *VacRelP)
     char       *rname;
     char        rkind;
     bool        n;
-   bool        found = false;
     ScanKeyData key;
  
-   StartTransactionCommand();
-
-   if (NameStr(*VacRelP))
+   if (VacRelP)
     {
  
         /*
@@ -287,6 +324,7 @@ getrels(NameData *VacRelP)
     }
     else
     {
+       /* find all relations listed in pg_class */
         ScanKeyEntryInitialize(&key, 0x0, Anum_pg_class_relkind,
                                F_CHAREQ, CharGetDatum('r'));
     }
@@ -300,21 +338,20 @@ getrels(NameData *VacRelP)
  
     while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
     {
-       found = true;
-
         d = heap_getattr(tuple, Anum_pg_class_relname, tupdesc, &n);
-       rname = (char *) DatumGetPointer(d);
+       rname = (char *) DatumGetName(d);
  
         d = heap_getattr(tuple, Anum_pg_class_relkind, tupdesc, &n);
         rkind = DatumGetChar(d);
  
         if (rkind != RELKIND_RELATION)
         {
-           elog(NOTICE, "Vacuum: can not process indices, views and certain system tables");
+           elog(NOTICE, "%s: can not process indexes, views or special system tables",
+                stmttype);
             continue;
         }
  
-       /* get a relation list entry for this guy */
+       /* Make a relation list entry for this guy */
         if (vrl == (VRelList) NULL)
             vrl = cur = (VRelList)
                 MemoryContextAlloc(vac_context, sizeof(VRelListData));
@@ -332,10 +369,8 @@ getrels(NameData *VacRelP)
     heap_endscan(scan);
     heap_close(rel, AccessShareLock);
  
-   if (!found)
-       elog(NOTICE, "Vacuum: table not found");
-
-   CommitTransactionCommand();
+   if (vrl == NULL)
+       elog(NOTICE, "%s: table not found", stmttype);
  
     return vrl;
  }
@@ -432,7 +467,8 @@ vacuum_rel(Oid relid)
      */
     vacrelstats = (VRelStats *) palloc(sizeof(VRelStats));
     vacrelstats->relid = relid;
-   vacrelstats->num_pages = vacrelstats->num_tuples = 0;
+   vacrelstats->num_pages = 0;
+   vacrelstats->num_tuples = 0;
     vacrelstats->hasindex = false;
  
     GetXmaxRecent(&XmaxRecent);
@@ -457,8 +493,8 @@ vacuum_rel(Oid relid)
         vacrelstats->hasindex = true;
     else
         vacrelstats->hasindex = false;
-#ifdef NOT_USED
  
+#ifdef NOT_USED
     /*
      * reindex in VACUUM is dangerous under WAL. ifdef out until it
      * becomes safe.
@@ -528,9 +564,8 @@ vacuum_rel(Oid relid)
     heap_close(onerel, NoLock);
  
     /* update statistics in pg_class */
-   update_relstats(vacrelstats->relid, vacrelstats->num_pages,
-                   vacrelstats->num_tuples, vacrelstats->hasindex,
-                   vacrelstats);
+   vac_update_relstats(vacrelstats->relid, vacrelstats->num_pages,
+                       vacrelstats->num_tuples, vacrelstats->hasindex);
  
     /*
      * Complete the transaction and free all temporary memory used.
@@ -582,8 +617,8 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
     char       *relname;
     VacPage     vacpage,
                 vp;
+   long        num_tuples;
     uint32      tups_vacuumed,
-               num_tuples,
                 nkeep,
                 nunused,
                 ncrash,
@@ -913,7 +948,6 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
     /* save stats in the rel list for use later */
     vacrelstats->num_tuples = num_tuples;
     vacrelstats->num_pages = nblocks;
-/*   vacrelstats->natts = attr_cnt;*/
     if (num_tuples == 0)
         min_tlen = max_tlen = 0;
     vacrelstats->min_tlen = min_tlen;
@@ -960,7 +994,7 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
     }
  
     elog(MESSAGE_LEVEL, "Pages %u: Changed %u, reaped %u, Empty %u, New %u; \
-Tup %u: Vac %u, Keep/VTL %u/%u, Crash %u, UnUsed %u, MinLen %lu, MaxLen %lu; \
+Tup %lu: Vac %u, Keep/VTL %u/%u, Crash %u, UnUsed %u, MinLen %lu, MaxLen %lu; \
  Re-using: Free/Avail. Space %lu/%lu; EndEmpty/Avail. Pages %u/%u. %s",
          nblocks, changed_pages, vacuum_pages->num_pages, empty_pages,
          new_pages, num_tuples, tups_vacuumed,
@@ -2009,7 +2043,7 @@ vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages)
  {
     Buffer      buf;
     VacPage    *vacpage;
-   int         nblocks;
+   long        nblocks;
     int         i;
  
     nblocks = vacuum_pages->num_pages;
@@ -2044,7 +2078,7 @@ vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages)
     /* truncate relation if there are some empty end-pages */
     if (vacuum_pages->empty_end_pages > 0)
     {
-       elog(MESSAGE_LEVEL, "Rel %s: Pages: %u --> %u.",
+       elog(MESSAGE_LEVEL, "Rel %s: Pages: %lu --> %lu.",
              RelationGetRelationName(onerel),
              vacrelstats->num_pages, nblocks);
         nblocks = smgrtruncate(DEFAULT_SMGR, onerel, nblocks);
@@ -2094,11 +2128,11 @@ vacuum_page(Relation onerel, Buffer buffer, VacPage vacpage)
   *
   */
  static void
-scan_index(Relation indrel, int num_tuples)
+scan_index(Relation indrel, long num_tuples)
  {
     RetrieveIndexResult res;
     IndexScanDesc iscan;
-   int         nitups;
+   long        nitups;
     int         nipages;
     struct rusage ru0;
  
@@ -2119,14 +2153,14 @@ scan_index(Relation indrel, int num_tuples)
  
     /* now update statistics in pg_class */
     nipages = RelationGetNumberOfBlocks(indrel);
-   update_relstats(RelationGetRelid(indrel), nipages, nitups, false, NULL);
+   vac_update_relstats(RelationGetRelid(indrel), nipages, nitups, false);
  
-   elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %u. %s",
+   elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %lu. %s",
          RelationGetRelationName(indrel), nipages, nitups,
          show_rusage(&ru0));
  
     if (nitups != num_tuples)
-       elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%u) IS NOT THE SAME AS HEAP' (%u).\
+       elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%lu) IS NOT THE SAME AS HEAP' (%lu).\
  \n\tRecreate the index.",
              RelationGetRelationName(indrel), nitups, num_tuples);
  
@@ -2145,13 +2179,14 @@ scan_index(Relation indrel, int num_tuples)
   *     pg_class.
   */
  static void
-vacuum_index(VacPageList vacpagelist, Relation indrel, int num_tuples, int keep_tuples)
+vacuum_index(VacPageList vacpagelist, Relation indrel,
+            long num_tuples, int keep_tuples)
  {
     RetrieveIndexResult res;
     IndexScanDesc iscan;
     ItemPointer heapptr;
     int         tups_vacuumed;
-   int         num_index_tuples;
+   long        num_index_tuples;
     int         num_pages;
     VacPage     vp;
     struct rusage ru0;
@@ -2196,15 +2231,16 @@ vacuum_index(VacPageList vacpagelist, Relation indrel, int num_tuples, int keep_
  
     /* now update statistics in pg_class */
     num_pages = RelationGetNumberOfBlocks(indrel);
-   update_relstats(RelationGetRelid(indrel), num_pages, num_index_tuples, false, NULL);
+   vac_update_relstats(RelationGetRelid(indrel),
+                       num_pages, num_index_tuples, false);
  
-   elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %u: Deleted %u. %s",
+   elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %lu: Deleted %u. %s",
          RelationGetRelationName(indrel), num_pages,
          num_index_tuples - keep_tuples, tups_vacuumed,
          show_rusage(&ru0));
  
     if (num_index_tuples != num_tuples + keep_tuples)
-       elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%u) IS NOT THE SAME AS HEAP' (%u).\
+       elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%lu) IS NOT THE SAME AS HEAP' (%lu).\
  \n\tRecreate the index.",
           RelationGetRelationName(indrel), num_index_tuples, num_tuples);
  
@@ -2255,7 +2291,7 @@ tid_reaped(ItemPointer itemptr, VacPageList vacpagelist)
  }
  
  /*
- * update_relstats() -- update statistics for one relation
+ * vac_update_relstats() -- update statistics for one relation
   *
   *     Update the whole-relation statistics that are kept in its pg_class
   *     row.  There are additional stats that will be updated if we are
@@ -2268,13 +2304,12 @@ tid_reaped(ItemPointer itemptr, VacPageList vacpagelist)
   *     we updated these tuples in the usual way, vacuuming pg_class itself
   *     wouldn't work very well --- by the time we got done with a vacuum
   *     cycle, most of the tuples in pg_class would've been obsoleted.
- *     Updating pg_class's own statistics would be especially tricky.
   *     Of course, this only works for fixed-size never-null columns, but
   *     these are.
   */
-static void
-update_relstats(Oid relid, int num_pages, int num_tuples, bool hasindex,
-               VRelStats *vacrelstats)
+void
+vac_update_relstats(Oid relid, long num_pages, double num_tuples,
+                   bool hasindex)
  {
     Relation    rd;
     HeapTupleData rtup;
diff --git a/src/backend/executor/nodeSort.c b/src/backend/executor/nodeSort.c

index 12c6f82a8b224c0f773f79c7b53132447467d399..e0543a2810977526886fee0d639ec76cc069463f 100644 (file)
--- a/src/backend/executor/nodeSort.c
+++ b/src/backend/executor/nodeSort.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/executor/nodeSort.c,v 1.32 2001/03/22 06:16:13 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/executor/nodeSort.c,v 1.33 2001/05/07 00:43:18 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -20,24 +20,24 @@
  #include "utils/tuplesort.h"
  
  /* ----------------------------------------------------------------
- *     FormSortKeys(node)
+ *     ExtractSortKeys
   *
- *     Forms the structure containing information used to sort the relation.
+ *     Extract the sorting key information from the plan node.
   *
- *     Returns an array of ScanKeyData.
+ *     Returns two palloc'd arrays, one of sort operator OIDs and
+ *     one of attribute numbers.
   * ----------------------------------------------------------------
   */
-static ScanKey
-FormSortKeys(Sort *sortnode)
+static void
+ExtractSortKeys(Sort *sortnode,
+               Oid **sortOperators,
+               AttrNumber **attNums)
  {
-   ScanKey     sortkeys;
     List       *targetList;
-   List       *tl;
     int         keycount;
-   Resdom     *resdom;
-   AttrNumber  resno;
-   Index       reskey;
-   Oid         reskeyop;
+   Oid        *sortOps;
+   AttrNumber *attNos;
+   List       *tl;
  
     /*
      * get information from the node
@@ -46,36 +46,33 @@ FormSortKeys(Sort *sortnode)
     keycount = sortnode->keycount;
  
     /*
-    * first allocate space for scan keys
+    * first allocate space for results
      */
     if (keycount <= 0)
-       elog(ERROR, "FormSortKeys: keycount <= 0");
-   sortkeys = (ScanKey) palloc(keycount * sizeof(ScanKeyData));
-   MemSet((char *) sortkeys, 0, keycount * sizeof(ScanKeyData));
+       elog(ERROR, "ExtractSortKeys: keycount <= 0");
+   sortOps = (Oid *) palloc(keycount * sizeof(Oid));
+   MemSet(sortOps, 0, keycount * sizeof(Oid));
+   *sortOperators = sortOps;
+   attNos = (AttrNumber *) palloc(keycount * sizeof(AttrNumber));
+   MemSet(attNos, 0, keycount * sizeof(AttrNumber));
+   *attNums = attNos;
  
     /*
-    * form each scan key from the resdom info in the target list
+    * extract info from the resdom nodes in the target list
      */
     foreach(tl, targetList)
     {
         TargetEntry *target = (TargetEntry *) lfirst(tl);
-
-       resdom = target->resdom;
-       resno = resdom->resno;
-       reskey = resdom->reskey;
-       reskeyop = resdom->reskeyop;
+       Resdom     *resdom = target->resdom;
+       Index       reskey = resdom->reskey;
  
         if (reskey > 0)         /* ignore TLEs that are not sort keys */
         {
-           ScanKeyEntryInitialize(&sortkeys[reskey - 1],
-                                  0x0,
-                                  resno,
-                                  (RegProcedure) reskeyop,
-                                  (Datum) 0);
+           Assert(reskey <= keycount);
+           sortOps[reskey - 1] = resdom->reskeyop;
+           attNos[reskey - 1] = resdom->resno;
         }
     }
-
-   return sortkeys;
  }
  
  /* ----------------------------------------------------------------
@@ -124,8 +121,8 @@ ExecSort(Sort *node)
     {
         Plan       *outerNode;
         TupleDesc   tupDesc;
-       int         keycount;
-       ScanKey     sortkeys;
+       Oid        *sortOperators;
+       AttrNumber *attNums;
  
         SO1_printf("ExecSort: %s\n",
                    "sorting subplan");
@@ -145,14 +142,17 @@ ExecSort(Sort *node)
  
         outerNode = outerPlan((Plan *) node);
         tupDesc = ExecGetTupType(outerNode);
-       keycount = node->keycount;
-       sortkeys = (ScanKey) sortstate->sort_Keys;
  
-       tuplesortstate = tuplesort_begin_heap(tupDesc, keycount, sortkeys,
-                                             true /* randomAccess */ );
+       ExtractSortKeys(node, &sortOperators, &attNums);
  
+       tuplesortstate = tuplesort_begin_heap(tupDesc, node->keycount,
+                                             sortOperators, attNums,
+                                             true /* randomAccess */ );
         sortstate->tuplesortstate = (void *) tuplesortstate;
  
+       pfree(sortOperators);
+       pfree(attNums);
+
         /*
          * Scan the subplan and feed all the tuples to tuplesort.
          */
@@ -230,7 +230,6 @@ ExecInitSort(Sort *node, EState *estate, Plan *parent)
      */
     sortstate = makeNode(SortState);
     sortstate->sort_Done = false;
-   sortstate->sort_Keys = NULL;
     sortstate->tuplesortstate = NULL;
  
     node->sortstate = sortstate;
@@ -258,11 +257,6 @@ ExecInitSort(Sort *node, EState *estate, Plan *parent)
     outerPlan = outerPlan((Plan *) node);
     ExecInitNode(outerPlan, estate, (Plan *) node);
  
-   /*
-    * initialize sortstate information
-    */
-   sortstate->sort_Keys = FormSortKeys(node);
-
     /*
      * initialize tuple type.  no need to initialize projection info
      * because this node doesn't do projections.
@@ -321,9 +315,6 @@ ExecEndSort(Sort *node)
         tuplesort_end((Tuplesortstate *) sortstate->tuplesortstate);
     sortstate->tuplesortstate = NULL;
  
-   if (sortstate->sort_Keys != NULL)
-       pfree(sortstate->sort_Keys);
-
     pfree(sortstate);
     node->sortstate = NULL;
  
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c

index ad50630931e357a1ca7bae5f806f8cc242062722..ee5a803b8025ac9817834537bb5b4ccd10708527 100644 (file)
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -15,7 +15,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.140 2001/03/22 06:16:14 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.141 2001/05/07 00:43:18 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -1378,8 +1378,8 @@ _copyRestrictInfo(RestrictInfo *from)
     newnode->left_pathkey = NIL;
     newnode->right_pathkey = NIL;
     newnode->hashjoinoperator = from->hashjoinoperator;
-   newnode->left_dispersion = from->left_dispersion;
-   newnode->right_dispersion = from->right_dispersion;
+   newnode->left_bucketsize = from->left_bucketsize;
+   newnode->right_bucketsize = from->right_bucketsize;
  
     return newnode;
  }
@@ -2209,11 +2209,12 @@ _copyVacuumStmt(VacuumStmt *from)
  {
     VacuumStmt *newnode = makeNode(VacuumStmt);
  
-   newnode->verbose = from->verbose;
+   newnode->vacuum = from->vacuum;
     newnode->analyze = from->analyze;
+   newnode->verbose = from->verbose;
     if (from->vacrel)
         newnode->vacrel = pstrdup(from->vacrel);
-   Node_Copy(from, newnode, va_spec);
+   Node_Copy(from, newnode, va_cols);
  
     return newnode;
  }
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c

index 06ee63bbacd05398c5445bd4ce4f8dfb169090da..284a534aa966f03a5f69da55e5faa89a96925b1e 100644 (file)
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -20,7 +20,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.88 2001/03/22 03:59:31 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.89 2001/05/07 00:43:19 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -516,7 +516,7 @@ _equalRestrictInfo(RestrictInfo *a, RestrictInfo *b)
         return false;
  
     /*
-    * ignore eval_cost, left/right_pathkey, and left/right_dispersion,
+    * ignore eval_cost, left/right_pathkey, and left/right_bucketsize,
      * since they may not be set yet, and should be derivable from the
      * clause anyway
      */
@@ -1113,13 +1113,15 @@ _equalDropdbStmt(DropdbStmt *a, DropdbStmt *b)
  static bool
  _equalVacuumStmt(VacuumStmt *a, VacuumStmt *b)
  {
-   if (a->verbose != b->verbose)
+   if (a->vacuum != b->vacuum)
         return false;
     if (a->analyze != b->analyze)
         return false;
+   if (a->verbose != b->verbose)
+       return false;
     if (!equalstr(a->vacrel, b->vacrel))
         return false;
-   if (!equal(a->va_spec, b->va_spec))
+   if (!equal(a->va_cols, b->va_cols))
         return false;
  
     return true;
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c

index 9a071e7a250df88efe03c183927ffeadfa07a86c..4c0c1b03ef544c60b9161208ceb950a83862419c 100644 (file)
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.107 2001/03/22 03:59:32 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.108 2001/05/07 00:43:19 tgl Exp $
   *
   * NOTES
   *   Most of the read functions for plan nodes are tested. (In fact, they
@@ -1874,11 +1874,11 @@ _readRestrictInfo(void)
  
     /* eval_cost is not part of saved representation; compute on first use */
     local_node->eval_cost = -1;
-   /* ditto for cached pathkeys and dispersion */
+   /* ditto for cached pathkeys and bucketsize */
     local_node->left_pathkey = NIL;
     local_node->right_pathkey = NIL;
-   local_node->left_dispersion = -1;
-   local_node->right_dispersion = -1;
+   local_node->left_bucketsize = -1;
+   local_node->right_bucketsize = -1;
  
     return local_node;
  }
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c

index c52af72a16b824c1f37078bb4e185d8a34b22d2b..bdfbbb18186d9c7ef4201fa8eb294bbdb55e298c 100644 (file)
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -41,7 +41,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.70 2001/04/25 22:04:37 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.71 2001/05/07 00:43:20 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -50,11 +50,15 @@
  
  #include 
  
+#include "catalog/pg_statistic.h"
  #include "executor/nodeHash.h"
  #include "miscadmin.h"
  #include "optimizer/clauses.h"
  #include "optimizer/cost.h"
+#include "optimizer/pathnode.h"
+#include "parser/parsetree.h"
  #include "utils/lsyscache.h"
+#include "utils/syscache.h"
  
  
  /*
@@ -573,7 +577,7 @@ cost_mergejoin(Path *path,
   * 'outer_path' is the path for the outer relation
   * 'inner_path' is the path for the inner relation
   * 'restrictlist' are the RestrictInfo nodes to be applied at the join
- * 'innerdispersion' is an estimate of the dispersion statistic
+ * 'innerbucketsize' is an estimate of the bucketsize statistic
   *             for the inner hash key.
   */
  void
@@ -581,7 +585,7 @@ cost_hashjoin(Path *path,
               Path *outer_path,
               Path *inner_path,
               List *restrictlist,
-             Selectivity innerdispersion)
+             Selectivity innerbucketsize)
  {
     Cost        startup_cost = 0;
     Cost        run_cost = 0;
@@ -607,22 +611,20 @@ cost_hashjoin(Path *path,
  
     /*
      * The number of tuple comparisons needed is the number of outer
-    * tuples times the typical hash bucket size.  nodeHash.c tries for
-    * average bucket loading of NTUP_PER_BUCKET, but that goal will be
-    * reached only if data values are uniformly distributed among the
-    * buckets.  To be conservative, we scale up the target bucket size by
-    * the number of inner rows times inner dispersion, giving an estimate
-    * of the typical number of duplicates of each value. We then charge
-    * one cpu_operator_cost per tuple comparison.
+    * tuples times the typical number of tuples in a hash bucket,
+    * which is the inner relation size times its bucketsize fraction.
+    * We charge one cpu_operator_cost per tuple comparison.
      */
     run_cost += cpu_operator_cost * outer_path->parent->rows *
-       NTUP_PER_BUCKET * ceil(inner_path->parent->rows * innerdispersion);
+       ceil(inner_path->parent->rows * innerbucketsize);
  
     /*
      * Estimate the number of tuples that get through the hashing filter
      * as one per tuple in the two source relations.  This could be a
      * drastic underestimate if there are many equal-keyed tuples in
-    * either relation, but we have no good way of estimating that...
+    * either relation, but we have no simple way of estimating that;
+    * and since this is only a second-order parameter, it's probably
+    * not worth expending a lot of effort on the estimate.
      */
     ntuples = outer_path->parent->rows + inner_path->parent->rows;
  
@@ -651,7 +653,7 @@ cost_hashjoin(Path *path,
     /*
      * Bias against putting larger relation on inside.  We don't want an
      * absolute prohibition, though, since larger relation might have
-    * better dispersion --- and we can't trust the size estimates
+    * better bucketsize --- and we can't trust the size estimates
      * unreservedly, anyway.  Instead, inflate the startup cost by the
      * square root of the size ratio.  (Why square root?  No real good
      * reason, but it seems reasonable...)
@@ -663,6 +665,171 @@ cost_hashjoin(Path *path,
     path->total_cost = startup_cost + run_cost;
  }
  
+/*
+ * Estimate hash bucketsize fraction (ie, number of entries in a bucket
+ * divided by total tuples in relation) if the specified Var is used
+ * as a hash key.
+ *
+ * This statistic is used by cost_hashjoin.  We split out the calculation
+ * because it's useful to cache the result for re-use across multiple path
+ * cost calculations.
+ *
+ * XXX This is really pretty bogus since we're effectively assuming that the
+ * distribution of hash keys will be the same after applying restriction
+ * clauses as it was in the underlying relation.  However, we are not nearly
+ * smart enough to figure out how the restrict clauses might change the
+ * distribution, so this will have to do for now.
+ *
+ * The executor tries for average bucket loading of NTUP_PER_BUCKET by setting
+ * number of buckets equal to ntuples / NTUP_PER_BUCKET, which would yield
+ * a bucketsize fraction of NTUP_PER_BUCKET / ntuples.  But that goal will
+ * be reached only if the data values are uniformly distributed among the
+ * buckets, which requires (a) at least ntuples / NTUP_PER_BUCKET distinct
+ * data values, and (b) a not-too-skewed data distribution.  Otherwise the
+ * buckets will be nonuniformly occupied.  If the other relation in the join
+ * has a similar distribution, the most-loaded buckets are exactly those
+ * that will be probed most often.  Therefore, the "average" bucket size for
+ * costing purposes should really be taken as something close to the "worst
+ * case" bucket size.  We try to estimate this by first scaling up if there
+ * are too few distinct data values, and then scaling up again by the
+ * ratio of the most common value's frequency to the average frequency.
+ *
+ * If no statistics are available, use a default estimate of 0.1.  This will
+ * discourage use of a hash rather strongly if the inner relation is large,
+ * which is what we want.  We do not want to hash unless we know that the
+ * inner rel is well-dispersed (or the alternatives seem much worse).
+ */
+Selectivity
+estimate_hash_bucketsize(Query *root, Var *var)
+{
+   Oid         relid;
+   RelOptInfo *rel;
+   HeapTuple   tuple;
+   Form_pg_statistic stats;
+   double      estfract,
+               ndistinct,
+               needdistinct,
+               mcvfreq,
+               avgfreq;
+   float4     *numbers;
+   int         nnumbers;
+
+   /*
+    * Lookup info about var's relation and attribute;
+    * if none available, return default estimate.
+    */
+   if (!IsA(var, Var))
+       return 0.1;
+
+   relid = getrelid(var->varno, root->rtable);
+   if (relid == InvalidOid)
+       return 0.1;
+
+   rel = get_base_rel(root, var->varno);
+
+   if (rel->tuples <= 0.0 || rel->rows <= 0.0)
+       return 0.1;             /* ensure we can divide below */
+
+   tuple = SearchSysCache(STATRELATT,
+                          ObjectIdGetDatum(relid),
+                          Int16GetDatum(var->varattno),
+                          0, 0);
+   if (!HeapTupleIsValid(tuple))
+   {
+       /*
+        * Perhaps the Var is a system attribute; if so, it will have no
+        * entry in pg_statistic, but we may be able to guess something
+        * about its distribution anyway.
+        */
+       switch (var->varattno)
+       {
+           case ObjectIdAttributeNumber:
+           case SelfItemPointerAttributeNumber:
+               /* these are unique, so buckets should be well-distributed */
+               return (double) NTUP_PER_BUCKET / rel->rows;
+           case TableOidAttributeNumber:
+               /* hashing this is a terrible idea... */
+               return 1.0;
+       }
+       return 0.1;
+   }
+   stats = (Form_pg_statistic) GETSTRUCT(tuple);
+
+   /*
+    * Obtain number of distinct data values in raw relation.
+    */
+   ndistinct = stats->stadistinct;
+   if (ndistinct < 0.0)
+       ndistinct = -ndistinct * rel->tuples;
+
+   /*
+    * Adjust ndistinct to account for restriction clauses.  Observe we are
+    * assuming that the data distribution is affected uniformly by the
+    * restriction clauses!
+    *
+    * XXX Possibly better way, but much more expensive: multiply by
+    * selectivity of rel's restriction clauses that mention the target Var.
+    */
+   ndistinct *= rel->rows / rel->tuples;
+
+   /*
+    * Discourage use of hash join if there seem not to be very many distinct
+    * data values.  The threshold here is somewhat arbitrary, as is the
+    * fraction used to "discourage" the choice.
+    */
+   if (ndistinct < 50.0)
+   {
+       ReleaseSysCache(tuple);
+       return 0.5;
+   }
+
+   /*
+    * Form initial estimate of bucketsize fraction.  Here we use rel->rows,
+    * ie the number of rows after applying restriction clauses, because
+    * that's what the fraction will eventually be multiplied by in
+    * cost_heapjoin.
+    */
+   estfract = (double) NTUP_PER_BUCKET / rel->rows;
+
+   /*
+    * Adjust estimated bucketsize if too few distinct values to fill
+    * all the buckets.
+    */
+   needdistinct = rel->rows / (double) NTUP_PER_BUCKET;
+   if (ndistinct < needdistinct)
+       estfract *= needdistinct / ndistinct;
+
+   /*
+    * Look up the frequency of the most common value, if available.
+    */
+   mcvfreq = 0.0;
+
+   if (get_attstatsslot(tuple, var->vartype, var->vartypmod,
+                        STATISTIC_KIND_MCV, InvalidOid,
+                        NULL, NULL, &numbers, &nnumbers))
+   {
+       /*
+        * The first MCV stat is for the most common value.
+        */
+       if (nnumbers > 0)
+           mcvfreq = numbers[0];
+       free_attstatsslot(var->vartype, NULL, 0,
+                         numbers, nnumbers);
+   }
+
+   /*
+    * Adjust estimated bucketsize upward to account for skewed distribution.
+    */
+   avgfreq = (1.0 - stats->stanullfrac) / ndistinct;
+
+   if (avgfreq > 0.0 && mcvfreq > avgfreq)
+       estfract *= mcvfreq / avgfreq;
+
+   ReleaseSysCache(tuple);
+
+   return (Selectivity) estfract;
+}
+
  
  /*
   * cost_qual_eval
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c

index d41336ddcee0f9c26ad9a2ab0b1410a1f0ae38c7..cd7cabd41deb7bf52b323b437d847eede311b8cc 100644 (file)
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -8,15 +8,15 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.63 2001/04/15 00:48:17 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.64 2001/05/07 00:43:20 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
+#include "postgres.h"
+
  #include 
  #include 
  
-#include "postgres.h"
-
  #include "optimizer/clauses.h"
  #include "optimizer/cost.h"
  #include "optimizer/pathnode.h"
@@ -45,7 +45,6 @@ static void hash_inner_and_outer(Query *root, RelOptInfo *joinrel,
                      List *restrictlist, JoinType jointype);
  static Path *best_innerjoin(List *join_paths, List *outer_relid,
                JoinType jointype);
-static Selectivity estimate_dispersion(Query *root, Var *var);
  static List *select_mergejoin_clauses(RelOptInfo *joinrel,
                          RelOptInfo *outerrel,
                          RelOptInfo *innerrel,
@@ -722,7 +721,7 @@ hash_inner_and_outer(Query *root,
         Expr       *clause;
         Var        *left,
                    *right;
-       Selectivity innerdispersion;
+       Selectivity innerbucketsize;
         List       *hashclauses;
  
         if (restrictinfo->hashjoinoperator == InvalidOid)
@@ -742,34 +741,34 @@ hash_inner_and_outer(Query *root,
  
         /*
          * Check if clause is usable with these sub-rels, find inner side,
-        * estimate dispersion of inner var for costing purposes.
+        * estimate bucketsize of inner var for costing purposes.
          *
          * Since we tend to visit the same clauses over and over when
-        * planning a large query, we cache the dispersion estimates in
+        * planning a large query, we cache the bucketsize estimates in
          * the RestrictInfo node to avoid repeated lookups of statistics.
          */
         if (intMember(left->varno, outerrelids) &&
             intMember(right->varno, innerrelids))
         {
             /* righthand side is inner */
-           innerdispersion = restrictinfo->right_dispersion;
-           if (innerdispersion < 0)
+           innerbucketsize = restrictinfo->right_bucketsize;
+           if (innerbucketsize < 0)
             {
                 /* not cached yet */
-               innerdispersion = estimate_dispersion(root, right);
-               restrictinfo->right_dispersion = innerdispersion;
+               innerbucketsize = estimate_hash_bucketsize(root, right);
+               restrictinfo->right_bucketsize = innerbucketsize;
             }
         }
         else if (intMember(left->varno, innerrelids) &&
                  intMember(right->varno, outerrelids))
         {
             /* lefthand side is inner */
-           innerdispersion = restrictinfo->left_dispersion;
-           if (innerdispersion < 0)
+           innerbucketsize = restrictinfo->left_bucketsize;
+           if (innerbucketsize < 0)
             {
                 /* not cached yet */
-               innerdispersion = estimate_dispersion(root, left);
-               restrictinfo->left_dispersion = innerdispersion;
+               innerbucketsize = estimate_hash_bucketsize(root, left);
+               restrictinfo->left_bucketsize = innerbucketsize;
             }
         }
         else
@@ -790,7 +789,7 @@ hash_inner_and_outer(Query *root,
                                       innerrel->cheapest_total_path,
                                       restrictlist,
                                       hashclauses,
-                                     innerdispersion));
+                                     innerbucketsize));
         if (outerrel->cheapest_startup_path != outerrel->cheapest_total_path)
             add_path(joinrel, (Path *)
                      create_hashjoin_path(joinrel,
@@ -799,7 +798,7 @@ hash_inner_and_outer(Query *root,
                                           innerrel->cheapest_total_path,
                                           restrictlist,
                                           hashclauses,
-                                         innerdispersion));
+                                         innerbucketsize));
     }
  }
  
@@ -866,31 +865,6 @@ best_innerjoin(List *join_paths, Relids outer_relids, JoinType jointype)
     return cheapest;
  }
  
-/*
- * Estimate dispersion of the specified Var
- *
- * We use a default of 0.1 if we can't figure out anything better.
- * This will typically discourage use of a hash rather strongly,
- * if the inner relation is large. We do not want to hash unless
- * we know that the inner rel is well-dispersed (or the alternatives
- * seem much worse).
- */
-static Selectivity
-estimate_dispersion(Query *root, Var *var)
-{
-   Oid         relid;
-
-   if (!IsA(var, Var))
-       return 0.1;
-
-   relid = getrelid(var->varno, root->rtable);
-
-   if (relid == InvalidOid)
-       return 0.1;
-
-   return (Selectivity) get_attdispersion(relid, var->varattno, 0.1);
-}
-
  /*
   * select_mergejoin_clauses
   *   Select mergejoin clauses that are usable for a particular join.
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c

index 8c3b00289d3e7d467aeb03dcc1b53eb02f5a3a2b..2d264c46881730ba4ace2ade745fe6942c9d49fb 100644 (file)
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -10,14 +10,14 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.104 2001/03/22 03:59:36 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.105 2001/05/07 00:43:20 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
-#include 
-
  #include "postgres.h"
  
+#include 
+
  #include "catalog/pg_index.h"
  #include "nodes/makefuncs.h"
  #include "nodes/nodeFuncs.h"
@@ -1484,9 +1484,9 @@ make_sort_from_pathkeys(List *tlist, Plan *lefttree, List *pathkeys)
          */
         if (resdom->reskey == 0)
         {
-           /* OK, mark it as a sort key and set the sort operator regproc */
+           /* OK, mark it as a sort key and set the sort operator */
             resdom->reskey = ++numsortkeys;
-           resdom->reskeyop = get_opcode(pathkey->sortop);
+           resdom->reskeyop = pathkey->sortop;
         }
     }
  
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c

index 7c3e15a8f88d81b206e4d3f618eae9658294ad6a..5d67e02dacb44bce678665c592ab184f588469a5 100644 (file)
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -8,13 +8,14 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.59 2001/04/16 19:44:10 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.60 2001/05/07 00:43:21 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
+#include "postgres.h"
+
  #include 
  
-#include "postgres.h"
  #include "catalog/pg_operator.h"
  #include "catalog/pg_type.h"
  #include "nodes/makefuncs.h"
@@ -348,8 +349,8 @@ distribute_qual_to_rels(Query *root, Node *clause,
     restrictinfo->left_pathkey = NIL;   /* not computable yet */
     restrictinfo->right_pathkey = NIL;
     restrictinfo->hashjoinoperator = InvalidOid;
-   restrictinfo->left_dispersion = -1; /* not computed until needed */
-   restrictinfo->right_dispersion = -1;
+   restrictinfo->left_bucketsize = -1; /* not computed until needed */
+   restrictinfo->right_bucketsize = -1;
  
     /*
      * Retrieve all relids and vars contained within the clause.
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c

index b2ab4600209dd566fd281c5110f0e1f6ba5c1cb1..0aba4808c160f3bf5ba3a9cc3fd2c6cf26fa2fa3 100644 (file)
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.105 2001/04/30 19:24:47 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.106 2001/05/07 00:43:21 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -1367,7 +1367,7 @@ make_groupplan(List *group_tlist,
             {
                 /* OK, insert the ordering info needed by the executor. */
                 resdom->reskey = ++keyno;
-               resdom->reskeyop = get_opcode(grpcl->sortop);
+               resdom->reskeyop = grpcl->sortop;
             }
         }
  
@@ -1412,7 +1412,7 @@ make_sortplan(List *tlist, Plan *plannode, List *sortcls)
         {
             /* OK, insert the ordering info needed by the executor. */
             resdom->reskey = ++keyno;
-           resdom->reskeyop = get_opcode(sortcl->sortop);
+           resdom->reskeyop = sortcl->sortop;
         }
     }
  
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c

index 0b173466cf98061a3add13f850ba9e750dd9f4e0..ede4159d9707629729b5dffbc32f241f48629e72 100644 (file)
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -14,7 +14,7 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v 1.62 2001/03/27 18:02:19 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v 1.63 2001/05/07 00:43:22 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -682,8 +682,8 @@ adjust_inherited_attrs_mutator(Node *node,
         newinfo->eval_cost = -1;        /* reset this too */
         newinfo->left_pathkey = NIL;    /* and these */
         newinfo->right_pathkey = NIL;
-       newinfo->left_dispersion = -1;
-       newinfo->right_dispersion = -1;
+       newinfo->left_bucketsize = -1;
+       newinfo->right_bucketsize = -1;
  
         return (Node *) newinfo;
     }
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c

index cfba3ee395f2e0216f74c1e2497a7a8f5897d74b..407c132b4f7a6388b093806fd3eb01286906e084 100644 (file)
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -8,14 +8,14 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.71 2001/03/22 03:59:39 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.72 2001/05/07 00:43:22 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
-#include 
-
  #include "postgres.h"
  
+#include 
+
  #include "nodes/plannodes.h"
  #include "optimizer/cost.h"
  #include "optimizer/pathnode.h"
@@ -559,7 +559,7 @@ create_mergejoin_path(RelOptInfo *joinrel,
   * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
   * 'hashclauses' is a list of the hash join clause (always a 1-element list)
   *     (this should be a subset of the restrict_clauses list)
- * 'innerdispersion' is an estimate of the dispersion of the inner hash key
+ * 'innerbucketsize' is an estimate of the bucketsize of the inner hash key
   *
   */
  HashPath   *
@@ -569,7 +569,7 @@ create_hashjoin_path(RelOptInfo *joinrel,
                      Path *inner_path,
                      List *restrict_clauses,
                      List *hashclauses,
-                    Selectivity innerdispersion)
+                    Selectivity innerbucketsize)
  {
     HashPath   *pathnode = makeNode(HashPath);
  
@@ -587,7 +587,7 @@ create_hashjoin_path(RelOptInfo *joinrel,
                   outer_path,
                   inner_path,
                   restrict_clauses,
-                 innerdispersion);
+                 innerbucketsize);
  
     return pathnode;
  }
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c

index 4f711df203c846acf4402ed131def54dbbf94443..ee3523553e8693ac1b7762d01ebbabc3697a4d7a 100644 (file)
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -9,11 +9,10 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.64 2001/03/22 03:59:40 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.65 2001/05/07 00:43:22 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
-
  #include "postgres.h"
  
  #include 
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c

index 4687a5599623d09b416357721488369cc8eaaa38..76cc095bc4edcdbf4cfecad9627a1e5a29d03256 100644 (file)
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -6,7 +6,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Header: /cvsroot/pgsql/src/backend/parser/analyze.c,v 1.183 2001/03/22 06:16:15 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/parser/analyze.c,v 1.184 2001/05/07 00:43:22 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -2660,7 +2660,7 @@ transformForUpdate(Query *qry, List *forUpdate)
         /* just the named tables */
         foreach(l, forUpdate)
         {
-           char       *relname = lfirst(l);
+           char       *relname = strVal(lfirst(l));
  
             i = 0;
             foreach(rt, qry->rtable)
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y

index bed0ce239a42e2f75c48bdda8aff299cb2f02f9b..40c379aca51f280882945b9f5caf4aaeccc4475f 100644 (file)
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -11,7 +11,7 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.222 2001/05/01 01:36:10 thomas Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.223 2001/05/07 00:43:23 tgl Exp $
   *
   * HISTORY
   *   AUTHOR            DATE            MAJOR EVENT
@@ -104,7 +104,6 @@ static void doNegateFloat(Value *v);
     char                *str;
     bool                boolean;
     JoinType            jtype;
-   InhOption           inhOpt;
     List                *list;
     Node                *node;
     Value               *value;
@@ -130,6 +129,7 @@ static void doNegateFloat(Value *v);
  
  %type    stmt,
         AlterGroupStmt, AlterSchemaStmt, AlterTableStmt, AlterUserStmt,
+       AnalyzeStmt,
         ClosePortalStmt, ClusterStmt, CommentStmt, ConstraintsSetStmt,
         CopyStmt, CreateAsStmt, CreateGroupStmt, CreatePLangStmt,
         CreateSchemaStmt, CreateSeqStmt, CreateStmt, CreateTrigStmt,
@@ -147,7 +147,7 @@ static void doNegateFloat(Value *v);
  %type    select_no_parens, select_with_parens, select_clause,
                 simple_select
  
-%type     alter_column_action
+%type     alter_column_default
  %type     drop_behavior
  
  %type    createdb_opt_list, createdb_opt_item
@@ -185,7 +185,7 @@ static void doNegateFloat(Value *v);
         OptTableElementList, OptInherit, definition, opt_distinct,
         opt_with, func_args, func_args_list, func_as,
         oper_argtypes, RuleActionList, RuleActionMulti,
-       opt_column_list, columnList, opt_va_list, va_list,
+       opt_column_list, columnList, opt_name_list,
         sort_clause, sortby_list, index_params, index_list, name_list,
         from_clause, from_list, opt_array_bounds,
         expr_list, attrs, target_list, update_target_list,
@@ -210,9 +210,7 @@ static void doNegateFloat(Value *v);
  %type    substr_from, substr_for
  
  %type     opt_binary, opt_using, opt_instead, opt_cursor
-%type     opt_with_copy, index_opt_unique, opt_verbose, opt_analyze
-
-%type  opt_inh_star, opt_only
+%type     opt_with_copy, index_opt_unique, opt_verbose, analyze_keyword
  
  %type    copy_dirn, direction, reindex_type, drop_type,
         opt_column, event, comment_type, comment_cl,
@@ -350,7 +348,8 @@ static void doNegateFloat(Value *v);
         NEW, NOCREATEDB, NOCREATEUSER, NONE, NOTHING, NOTIFY, NOTNULL,
         OFFSET, OIDS, OPERATOR, OWNER, PASSWORD, PROCEDURAL,
         REINDEX, RENAME, RESET, RETURNS, ROW, RULE,
-       SEQUENCE, SERIAL, SETOF, SHARE, SHOW, START, STATEMENT, STDIN, STDOUT, SYSID,
+       SEQUENCE, SERIAL, SETOF, SHARE, SHOW, START, STATEMENT,
+       STATISTICS, STDIN, STDOUT, SYSID,
         TEMP, TEMPLATE, TOAST, TRUNCATE, TRUSTED, 
         UNLISTEN, UNTIL, VACUUM, VALID, VERBOSE, VERSION
  
@@ -470,6 +469,7 @@ stmt :  AlterSchemaStmt
         | CreatedbStmt
         | DropdbStmt
         | VacuumStmt
+       | AnalyzeStmt
         | VariableSetStmt
         | VariableShowStmt
         | VariableResetStmt
@@ -938,57 +938,68 @@ CheckPointStmt: CHECKPOINT
   *****************************************************************************/
  
  AlterTableStmt:
-/* ALTER TABLE <name> ADD [COLUMN]  */
-       ALTER TABLE relation_name opt_inh_star ADD opt_column columnDef
+/* ALTER TABLE <relation> ADD [COLUMN]  */
+       ALTER TABLE relation_expr ADD opt_column columnDef
                 {
                     AlterTableStmt *n = makeNode(AlterTableStmt);
                     n->subtype = 'A';
-                   n->relname = $3;
-                   n->inhOpt = $4;
-                   n->def = $7;
+                   n->relname = $3->relname;
+                   n->inhOpt = $3->inhOpt;
+                   n->def = $6;
                     $$ = (Node *)n;
                 }
-/* ALTER TABLE <name> ALTER [COLUMN]  {SET DEFAULT |DROP DEFAULT} */
-       | ALTER TABLE relation_name opt_inh_star ALTER opt_column ColId alter_column_action
+/* ALTER TABLE <relation> ALTER [COLUMN]  {SET DEFAULT |DROP DEFAULT} */
+       | ALTER TABLE relation_expr ALTER opt_column ColId alter_column_default
                 {
                     AlterTableStmt *n = makeNode(AlterTableStmt);
                     n->subtype = 'T';
-                   n->relname = $3;
-                   n->inhOpt = $4;
-                   n->name = $7;
-                   n->def = $8;
+                   n->relname = $3->relname;
+                   n->inhOpt = $3->inhOpt;
+                   n->name = $6;
+                   n->def = $7;
                     $$ = (Node *)n;
                 }
-/* ALTER TABLE  DROP [COLUMN]  {RESTRICT|CASCADE} */
-       | ALTER TABLE relation_name opt_inh_star DROP opt_column ColId drop_behavior
+/* ALTER TABLE  ALTER [COLUMN]  SET STATISTICS  */
+       | ALTER TABLE relation_expr ALTER opt_column ColId SET STATISTICS Iconst
+               {
+                   AlterTableStmt *n = makeNode(AlterTableStmt);
+                   n->subtype = 'S';
+                   n->relname = $3->relname;
+                   n->inhOpt = $3->inhOpt;
+                   n->name = $6;
+                   n->def = (Node *) makeInteger($9);
+                   $$ = (Node *)n;
+               }
+/* ALTER TABLE  DROP [COLUMN]  {RESTRICT|CASCADE} */
+       | ALTER TABLE relation_expr DROP opt_column ColId drop_behavior
                 {
                     AlterTableStmt *n = makeNode(AlterTableStmt);
                     n->subtype = 'D';
-                   n->relname = $3;
-                   n->inhOpt = $4;
-                   n->name = $7;
-                   n->behavior = $8;
+                   n->relname = $3->relname;
+                   n->inhOpt = $3->inhOpt;
+                   n->name = $6;
+                   n->behavior = $7;
                     $$ = (Node *)n;
                 }
-/* ALTER TABLE <name> ADD CONSTRAINT ... */
-       | ALTER TABLE relation_name opt_inh_star ADD TableConstraint
+/* ALTER TABLE <relation> ADD CONSTRAINT ... */
+       | ALTER TABLE relation_expr ADD TableConstraint
                 {
                     AlterTableStmt *n = makeNode(AlterTableStmt);
                     n->subtype = 'C';
-                   n->relname = $3;
-                   n->inhOpt = $4;
-                   n->def = $6;
+                   n->relname = $3->relname;
+                   n->inhOpt = $3->inhOpt;
+                   n->def = $5;
                     $$ = (Node *)n;
                 }
-/* ALTER TABLE <name> DROP CONSTRAINT  {RESTRICT|CASCADE} */
-       | ALTER TABLE relation_name opt_inh_star DROP CONSTRAINT name drop_behavior
+/* ALTER TABLE <relation> DROP CONSTRAINT  {RESTRICT|CASCADE} */
+       | ALTER TABLE relation_expr DROP CONSTRAINT name drop_behavior
                 {
                     AlterTableStmt *n = makeNode(AlterTableStmt);
                     n->subtype = 'X';
-                   n->relname = $3;
-                   n->inhOpt = $4;
-                   n->name = $7;
-                   n->behavior = $8;
+                   n->relname = $3->relname;
+                   n->inhOpt = $3->inhOpt;
+                   n->name = $6;
+                   n->behavior = $7;
                     $$ = (Node *)n;
                 }
  /* ALTER TABLE  CREATE TOAST TABLE */
@@ -997,6 +1008,7 @@ AlterTableStmt:
                     AlterTableStmt *n = makeNode(AlterTableStmt);
                     n->subtype = 'E';
                     n->relname = $3;
+                   n->inhOpt = INH_NO;
                     $$ = (Node *)n;
                 }
  /* ALTER TABLE  OWNER TO UserId */
@@ -1005,12 +1017,13 @@ AlterTableStmt:
                     AlterTableStmt *n = makeNode(AlterTableStmt);
                     n->subtype = 'U';
                     n->relname = $3;
+                   n->inhOpt = INH_NO;
                     n->name = $6;
                     $$ = (Node *)n;
                 }
         ;
  
-alter_column_action:
+alter_column_default:
         SET DEFAULT a_expr
             {
                 /* Treat SET DEFAULT NULL the same as DROP DEFAULT */
@@ -1478,10 +1491,6 @@ key_reference:  NO ACTION                { $$ = FKCONSTR_ON_KEY_NOACTION; }
         | SET DEFAULT                   { $$ = FKCONSTR_ON_KEY_SETDEFAULT; }
         ;
  
-opt_only: ONLY                                 { $$ = INH_NO; }
-        | /*EMPTY*/                                { $$ = INH_DEFAULT; } 
-       ;
-
  OptInherit:  INHERITS '(' relation_name_list ')'   { $$ = $3; }
         | /*EMPTY*/                                 { $$ = NIL; }
         ;
@@ -2598,14 +2607,13 @@ opt_force:  FORCE                                   {  $$ = TRUE; }
   *
   *****************************************************************************/
  
-RenameStmt:  ALTER TABLE relation_name opt_inh_star
-                 RENAME opt_column opt_name TO name
+RenameStmt:  ALTER TABLE relation_expr RENAME opt_column opt_name TO name
                 {
                     RenameStmt *n = makeNode(RenameStmt);
-                   n->relname = $3;
-                   n->inhOpt = $4;
-                   n->column = $7;
-                   n->newname = $9;
+                   n->relname = $3->relname;
+                   n->inhOpt = $3->inhOpt;
+                   n->column = $6;
+                   n->newname = $8;
                     $$ = (Node *)n;
                 }
         ;
@@ -2994,49 +3002,71 @@ ClusterStmt:  CLUSTER index_name ON relation_name
   *
   *     QUERY:
   *             vacuum
+ *             analyze
   *
   *****************************************************************************/
  
-VacuumStmt:  VACUUM opt_verbose opt_analyze
+VacuumStmt:  VACUUM opt_verbose
                 {
                     VacuumStmt *n = makeNode(VacuumStmt);
+                   n->vacuum = true;
+                   n->analyze = false;
                     n->verbose = $2;
-                   n->analyze = $3;
                     n->vacrel = NULL;
-                   n->va_spec = NIL;
+                   n->va_cols = NIL;
                     $$ = (Node *)n;
                 }
-       | VACUUM opt_verbose opt_analyze relation_name opt_va_list
+       | VACUUM opt_verbose relation_name
                 {
                     VacuumStmt *n = makeNode(VacuumStmt);
+                   n->vacuum = true;
+                   n->analyze = false;
                     n->verbose = $2;
-                   n->analyze = $3;
-                   n->vacrel = $4;
-                   n->va_spec = $5;
-                   if ( $5 != NIL && !$4 )
-                       elog(ERROR,"VACUUM syntax error at or near \"(\""
-                           "\n\tRelation name must be specified");
+                   n->vacrel = $3;
+                   n->va_cols = NIL;
+                   $$ = (Node *)n;
+               }
+       | VACUUM opt_verbose AnalyzeStmt
+               {
+                   VacuumStmt *n = (VacuumStmt *) $3;
+                   n->vacuum = true;
+                   n->verbose |= $2;
                     $$ = (Node *)n;
                 }
         ;
  
-opt_verbose:  VERBOSE                          { $$ = TRUE; }
-       | /*EMPTY*/                             { $$ = FALSE; }
+AnalyzeStmt:  analyze_keyword opt_verbose
+               {
+                   VacuumStmt *n = makeNode(VacuumStmt);
+                   n->vacuum = false;
+                   n->analyze = true;
+                   n->verbose = $2;
+                   n->vacrel = NULL;
+                   n->va_cols = NIL;
+                   $$ = (Node *)n;
+               }
+       | analyze_keyword opt_verbose relation_name opt_name_list
+               {
+                   VacuumStmt *n = makeNode(VacuumStmt);
+                   n->vacuum = false;
+                   n->analyze = true;
+                   n->verbose = $2;
+                   n->vacrel = $3;
+                   n->va_cols = $4;
+                   $$ = (Node *)n;
+               }
         ;
  
-opt_analyze:  ANALYZE                          { $$ = TRUE; }
+analyze_keyword:  ANALYZE                      { $$ = TRUE; }
         |     ANALYSE /* British */             { $$ = TRUE; }
-       | /*EMPTY*/                             { $$ = FALSE; }
         ;
  
-opt_va_list:  '(' va_list ')'                  { $$ = $2; }
-       | /*EMPTY*/                             { $$ = NIL; }
+opt_verbose:  VERBOSE                          { $$ = TRUE; }
+       | /*EMPTY*/                             { $$ = FALSE; }
         ;
  
-va_list:  name
-               { $$ = makeList1($1); }
-       | va_list ',' name
-               { $$ = lappend($1, $3); }
+opt_name_list:  '(' name_list ')'              { $$ = $2; }
+       | /*EMPTY*/                             { $$ = NIL; }
         ;
  
  
@@ -3160,12 +3190,12 @@ columnElem:  ColId opt_indirection
   *
   *****************************************************************************/
  
-DeleteStmt:  DELETE FROM opt_only relation_name where_clause
+DeleteStmt:  DELETE FROM relation_expr where_clause
                 {
                     DeleteStmt *n = makeNode(DeleteStmt);
-                   n->inhOpt = $3;
-                   n->relname = $4;
-                   n->whereClause = $5;
+                   n->relname = $3->relname;
+                   n->inhOpt = $3->inhOpt;
+                   n->whereClause = $4;
                     $$ = (Node *)n;
                 }
         ;
@@ -3202,17 +3232,17 @@ opt_lmode:  SHARE               { $$ = TRUE; }
   *
   *****************************************************************************/
  
-UpdateStmt:  UPDATE opt_only relation_name
+UpdateStmt:  UPDATE relation_expr
               SET update_target_list
               from_clause
               where_clause
                 {
                     UpdateStmt *n = makeNode(UpdateStmt);
-                   n->inhOpt = $2;
-                   n->relname = $3;
-                   n->targetList = $5;
-                   n->fromClause = $6;
-                   n->whereClause = $7;
+                   n->relname = $2->relname;
+                   n->inhOpt = $2->inhOpt;
+                   n->targetList = $4;
+                   n->fromClause = $5;
+                   n->whereClause = $6;
                     $$ = (Node *)n;
                 }
         ;
@@ -3545,10 +3575,6 @@ select_offset_value: Iconst
   * ...however, recursive addattr and rename supported.  make special
   * cases for these.
   */
-opt_inh_star:  '*'                             { $$ = INH_YES; }
-       | /*EMPTY*/                             { $$ = INH_DEFAULT; }
-       ;
-
  relation_name_list:  name_list;
  
  name_list:  name
@@ -3576,7 +3602,7 @@ opt_for_update_clause:    for_update_clause       { $$ = $1; }
         | /* EMPTY */                           { $$ = NULL; }
         ;
  
-update_list:  OF va_list                       { $$ = $2; }
+update_list:  OF name_list                     { $$ = $2; }
         | /* EMPTY */                           { $$ = makeList1(NULL); }
         ;
  
@@ -5525,6 +5551,7 @@ TokenId:  ABSOLUTE                        { $$ = "absolute"; }
         | SHARE                         { $$ = "share"; }
         | START                         { $$ = "start"; }
         | STATEMENT                     { $$ = "statement"; }
+       | STATISTICS                    { $$ = "statistics"; }
         | STDIN                         { $$ = "stdin"; }
         | STDOUT                        { $$ = "stdout"; }
         | SYSID                         { $$ = "sysid"; }
diff --git a/src/backend/parser/keywords.c b/src/backend/parser/keywords.c

index 402dbfd28ca561a2c9d9ba513e7986dda06ec7df..8ab19f86ae8582213730311845cdbdcae0977f18 100644 (file)
--- a/src/backend/parser/keywords.c
+++ b/src/backend/parser/keywords.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/parser/keywords.c,v 1.90 2001/03/22 03:59:40 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/parser/keywords.c,v 1.91 2001/05/07 00:43:23 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -238,6 +238,7 @@ static ScanKeyword ScanKeywords[] = {
     {"some", SOME},
     {"start", START},
     {"statement", STATEMENT},
+   {"statistics", STATISTICS},
     {"stdin", STDIN},
     {"stdout", STDOUT},
     {"substring", SUBSTRING},
diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c

index f5324cb37355532ef4233a335bc40fb5e5eb635e..e1d49842fd2398a3338bf8fb8329c7ca0677a2fe 100644 (file)
--- a/src/backend/parser/parse_relation.c
+++ b/src/backend/parser/parse_relation.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/parser/parse_relation.c,v 1.54 2001/04/18 17:04:24 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/parser/parse_relation.c,v 1.55 2001/05/07 00:43:23 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -75,7 +75,7 @@ static struct
     }
  };
  
-#define SPECIALS ((int) (sizeof(special_attr)/sizeof(special_attr[0])))
+#define SPECIALS ((int) lengthof(special_attr))
  
  
  /*
@@ -670,7 +670,7 @@ isForUpdate(ParseState *pstate, char *relname)
  
                 foreach(l, pstate->p_forUpdate)
                 {
-                   char       *rname = lfirst(l);
+                   char       *rname = strVal(lfirst(l));
  
                     if (strcmp(relname, rname) == 0)
                         return true;
@@ -1020,20 +1020,6 @@ attnameIsSet(Relation rd, char *name)
  
  #endif
  
-#ifdef NOT_USED
-/*
- * This should only be used if the relation is already
- * heap_open()'ed.  Use the cache version
- * for access to non-opened relations.
- */
-int
-attnumAttNelems(Relation rd, int attid)
-{
-   return rd->rd_att->attrs[attid - 1]->attnelems;
-}
-
-#endif
-
  /* given attribute id, return type of that attribute */
  /*
   * This should only be used if the relation is already
diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c

index ae6cd20a5db3838c76a6f006232a8f04e5d4a800..b616f7e68ef875a0774de3f270c4cf98aa3dcc94 100644 (file)
--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -10,7 +10,7 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/tcop/utility.c,v 1.109 2001/03/22 06:16:17 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/tcop/utility.c,v 1.110 2001/05/07 00:43:23 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -427,13 +427,19 @@ ProcessUtility(Node *parsetree,
                                         interpretInhOption(stmt->inhOpt),
                                             (ColumnDef *) stmt->def);
                         break;
-                   case 'T':   /* ALTER COLUMN */
-                       AlterTableAlterColumn(stmt->relname,
+                   case 'T':   /* ALTER COLUMN DEFAULT */
+                       AlterTableAlterColumnDefault(stmt->relname,
                                         interpretInhOption(stmt->inhOpt),
-                                             stmt->name,
-                                             stmt->def);
+                                                    stmt->name,
+                                                    stmt->def);
                         break;
-                   case 'D':   /* ALTER DROP */
+                   case 'S':   /* ALTER COLUMN STATISTICS */
+                       AlterTableAlterColumnStatistics(stmt->relname,
+                                       interpretInhOption(stmt->inhOpt),
+                                                       stmt->name,
+                                                       stmt->def);
+                       break;
+                   case 'D':   /* DROP COLUMN */
                         AlterTableDropColumn(stmt->relname,
                                         interpretInhOption(stmt->inhOpt),
                                              stmt->name,
@@ -703,12 +709,13 @@ ProcessUtility(Node *parsetree,
             break;
  
         case T_VacuumStmt:
-           set_ps_display(commandTag = "VACUUM");
+           if (((VacuumStmt *) parsetree)->vacuum)
+               commandTag = "VACUUM";
+           else
+               commandTag = "ANALYZE";
+           set_ps_display(commandTag);
  
-           vacuum(((VacuumStmt *) parsetree)->vacrel,
-                  ((VacuumStmt *) parsetree)->verbose,
-                  ((VacuumStmt *) parsetree)->analyze,
-                  ((VacuumStmt *) parsetree)->va_spec);
+           vacuum((VacuumStmt *) parsetree);
             break;
  
         case T_ExplainStmt:
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c

index 1fe0afb0a35b44ad34e76fbb73439194a73690ad..41ba82db7b574d6ba6d095a25092376d04702250 100644 (file)
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -15,7 +15,7 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.87 2001/03/23 04:49:54 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.88 2001/05/07 00:43:23 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -57,9 +57,6 @@
  /* default selectivity estimate for pattern-match operators such as LIKE */
  #define DEFAULT_MATCH_SEL  0.01
  
-/* "fudge factor" for estimating frequency of not-most-common values */
-#define NOT_MOST_COMMON_RATIO  0.1
-
  static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
                   Datum lobound, Datum hibound, Oid boundstypid,
                   double *scaledlobound, double *scaledhibound);
@@ -75,17 +72,9 @@ static double convert_one_string_to_scalar(unsigned char *value,
  static unsigned char *convert_string_datum(Datum value, Oid typid);
  static double convert_timevalue_to_scalar(Datum value, Oid typid);
  static void getattproperties(Oid relid, AttrNumber attnum,
-                Oid *typid,
-                int *typlen,
-                bool *typbyval,
-                int32 *typmod);
-static bool getattstatistics(Oid relid, AttrNumber attnum,
-                Oid typid, int32 typmod,
-                double *nullfrac,
-                double *commonfrac,
-                Datum *commonval,
-                Datum *loval,
-                Datum *hival);
+                            Oid *typid, int32 *typmod);
+static double get_att_numdistinct(Oid relid, AttrNumber attnum, Oid typid,
+                                 Form_pg_statistic stats);
  static Selectivity prefix_selectivity(char *prefix,
                    Oid relid,
                    AttrNumber attno,
@@ -115,134 +104,173 @@ eqsel(PG_FUNCTION_ARGS)
     AttrNumber  attno = PG_GETARG_INT16(2);
     Datum       value = PG_GETARG_DATUM(3);
     int32       flag = PG_GETARG_INT32(4);
-   float8      result;
-
-   if (NONVALUE(attno) || NONVALUE(relid))
-       result = DEFAULT_EQ_SEL;
-   else
+   Oid         typid;
+   int32       typmod;
+   HeapTuple   statsTuple;
+   Datum      *values;
+   int         nvalues;
+   float4     *numbers;
+   int         nnumbers;
+   double      selec;
+
+   if (NONVALUE(relid) || NONVALUE(attno))
+       PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
+
+   /* get info about the attribute */
+   getattproperties(relid, attno, &typid, &typmod);
+
+   /* get stats for the attribute, if available */
+   statsTuple = SearchSysCache(STATRELATT,
+                               ObjectIdGetDatum(relid),
+                               Int16GetDatum(attno),
+                               0, 0);
+   if (HeapTupleIsValid(statsTuple))
     {
-       Oid         typid;
-       int         typlen;
-       bool        typbyval;
-       int32       typmod;
-       double      nullfrac;
-       double      commonfrac;
-       Datum       commonval;
-       double      selec;
-
-       /* get info about the attribute */
-       getattproperties(relid, attno,
-                        &typid, &typlen, &typbyval, &typmod);
-
-       /* get stats for the attribute, if available */
-       if (getattstatistics(relid, attno, typid, typmod,
-                            &nullfrac, &commonfrac, &commonval,
-                            NULL, NULL))
-       {
-           if (flag & SEL_CONSTANT)
-           {
+       Form_pg_statistic stats;
  
-               /*
-                * Is the constant "=" to the column's most common value?
-                * (Although the operator may not really be "=", we will
-                * assume that seeing whether it returns TRUE for the most
-                * common value is useful information. If you don't like
-                * it, maybe you shouldn't be using eqsel for your
-                * operator...)
-                */
-               RegProcedure eqproc = get_opcode(opid);
-               bool        mostcommon;
+       stats = (Form_pg_statistic) GETSTRUCT(statsTuple);
  
-               if (eqproc == (RegProcedure) NULL)
-                   elog(ERROR, "eqsel: no procedure for operator %u",
-                        opid);
+       if (flag & SEL_CONSTANT)
+       {
+           bool    match = false;
+           int     i;
  
-               /* be careful to apply operator right way 'round */
-               if (flag & SEL_RIGHT)
-                   mostcommon = DatumGetBool(OidFunctionCall2(eqproc,
-                                                              commonval,
-                                                              value));
-               else
-                   mostcommon = DatumGetBool(OidFunctionCall2(eqproc,
-                                                              value,
-                                                            commonval));
+           /*
+            * Is the constant "=" to any of the column's most common
+            * values?  (Although the given operator may not really be
+            * "=", we will assume that seeing whether it returns TRUE
+            * is an appropriate test.  If you don't like this, maybe you
+            * shouldn't be using eqsel for your operator...)
+            */
+           if (get_attstatsslot(statsTuple, typid, typmod,
+                                STATISTIC_KIND_MCV, InvalidOid,
+                                &values, &nvalues,
+                                &numbers, &nnumbers))
+           {
+               FmgrInfo    eqproc;
  
-               if (mostcommon)
-               {
+               fmgr_info(get_opcode(opid), &eqproc);
  
-                   /*
-                    * Constant is "=" to the most common value.  We know
-                    * selectivity exactly (or as exactly as VACUUM could
-                    * calculate it, anyway).
-                    */
-                   selec = commonfrac;
-               }
-               else
+               for (i = 0; i < nvalues; i++)
                 {
-
-                   /*
-                    * Comparison is against a constant that is neither
-                    * the most common value nor null.  Its selectivity
-                    * cannot be more than this:
-                    */
-                   selec = 1.0 - commonfrac - nullfrac;
-                   if (selec > commonfrac)
-                       selec = commonfrac;
-
-                   /*
-                    * and in fact it's probably less, so we should apply
-                    * a fudge factor.  The only case where we don't is
-                    * for a boolean column, where indeed we have
-                    * estimated the less-common value's frequency
-                    * exactly!
-                    */
-                   if (typid != BOOLOID)
-                       selec *= NOT_MOST_COMMON_RATIO;
+                   /* be careful to apply operator right way 'round */
+                   if (flag & SEL_RIGHT)
+                       match = DatumGetBool(FunctionCall2(&eqproc,
+                                                          values[i],
+                                                          value));
+                   else
+                       match = DatumGetBool(FunctionCall2(&eqproc,
+                                                          value,
+                                                          values[i]));
+                   if (match)
+                       break;
                 }
             }
             else
             {
+               /* no most-common-value info available */
+               values = NULL;
+               numbers = NULL;
+               i = nvalues = nnumbers = 0;
+           }
  
+           if (match)
+           {
+               /*
+                * Constant is "=" to this common value.  We know
+                * selectivity exactly (or as exactly as VACUUM
+                * could calculate it, anyway).
+                */
+               selec = numbers[i];
+           }
+           else
+           {
                 /*
-                * Search is for a value that we do not know a priori, but
-                * we will assume it is not NULL.  Selectivity cannot be
-                * more than this:
+                * Comparison is against a constant that is neither
+                * NULL nor any of the common values.  Its selectivity
+                * cannot be more than this:
                  */
-               selec = 1.0 - nullfrac;
-               if (selec > commonfrac)
-                   selec = commonfrac;
+               double  sumcommon = 0.0;
+               double  otherdistinct;
  
+               for (i = 0; i < nnumbers; i++)
+                   sumcommon += numbers[i];
+               selec = 1.0 - sumcommon - stats->stanullfrac;
+               /*
+                * and in fact it's probably a good deal less.
+                * We approximate that all the not-common values
+                * share this remaining fraction equally, so we
+                * divide by the number of other distinct values.
+                */
+               otherdistinct = get_att_numdistinct(relid, attno,
+                                                   typid, stats)
+                   - nnumbers;
+               if (otherdistinct > 1)
+                   selec /= otherdistinct;
                 /*
-                * and in fact it's probably less, so apply a fudge
-                * factor.
+                * Another cross-check: selectivity shouldn't be
+                * estimated as more than the least common
+                * "most common value".
                  */
-               selec *= NOT_MOST_COMMON_RATIO;
+               if (nnumbers > 0 && selec > numbers[nnumbers-1])
+                   selec = numbers[nnumbers-1];
             }
  
-           /* result should be in range, but make sure... */
-           if (selec < 0.0)
-               selec = 0.0;
-           else if (selec > 1.0)
-               selec = 1.0;
-
-           if (!typbyval)
-               pfree(DatumGetPointer(commonval));
+           free_attstatsslot(typid, values, nvalues, numbers, nnumbers);
         }
         else
         {
+           double      ndistinct;
  
             /*
-            * No VACUUM ANALYZE stats available, so make a guess using
-            * the dispersion stat (if we have that, which is unlikely for
-            * a normal attribute; but for a system attribute we may be
-            * able to estimate it).
+            * Search is for a value that we do not know a priori, but
+            * we will assume it is not NULL.  Estimate the selectivity
+            * as non-null fraction divided by number of distinct values,
+            * so that we get a result averaged over all possible values
+            * whether common or uncommon.  (Essentially, we are assuming
+            * that the not-yet-known comparison value is equally likely
+            * to be any of the possible values, regardless of their
+            * frequency in the table.  Is that a good idea?)
+            */
+           selec = 1.0 - stats->stanullfrac;
+           ndistinct = get_att_numdistinct(relid, attno, typid, stats);
+           if (ndistinct > 1)
+               selec /= ndistinct;
+           /*
+            * Cross-check: selectivity should never be
+            * estimated as more than the most common value's.
              */
-           selec = get_attdispersion(relid, attno, 0.01);
+           if (get_attstatsslot(statsTuple, typid, typmod,
+                                STATISTIC_KIND_MCV, InvalidOid,
+                                NULL, NULL,
+                                &numbers, &nnumbers))
+           {
+               if (nnumbers > 0 && selec > numbers[0])
+                   selec = numbers[0];
+               free_attstatsslot(typid, NULL, 0, numbers, nnumbers);
+           }
         }
  
-       result = (float8) selec;
+       ReleaseSysCache(statsTuple);
     }
-   PG_RETURN_FLOAT8(result);
+   else
+   {
+       /*
+        * No VACUUM ANALYZE stats available, so make a guess using
+        * estimated number of distinct values and assuming they are
+        * equally common.  (The guess is unlikely to be very good,
+        * but we do know a few special cases.)
+        */
+       selec = 1.0 / get_att_numdistinct(relid, attno, typid, NULL);
+   }
+
+   /* result should be in range, but make sure... */
+   if (selec < 0.0)
+       selec = 0.0;
+   else if (selec > 1.0)
+       selec = 1.0;
+
+   PG_RETURN_FLOAT8((float8) selec);
  }
  
  /*
@@ -301,117 +329,263 @@ scalarltsel(PG_FUNCTION_ARGS)
     AttrNumber  attno = PG_GETARG_INT16(2);
     Datum       value = PG_GETARG_DATUM(3);
     int32       flag = PG_GETARG_INT32(4);
-   float8      result;
+   bool        isgt;
+   HeapTuple   oprTuple;
+   HeapTuple   statsTuple;
+   Form_pg_statistic stats;
+   Oid         contype;
+   FmgrInfo    opproc;
+   Oid         typid;
+   int32       typmod;
+   Datum      *values;
+   int         nvalues;
+   float4     *numbers;
+   int         nnumbers;
+   double      mcv_selec,
+               hist_selec,
+               sumcommon;
+   double      selec;
+   int         i;
+
+   if (NONVALUE(relid) || NONVALUE(attno))
+       PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
+
+   /* Can't do anything useful if no constant to compare against, either */
+   if (!(flag & SEL_CONSTANT))
+       PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
  
-   if (!(flag & SEL_CONSTANT) || NONVALUE(attno) || NONVALUE(relid))
-       result = DEFAULT_INEQ_SEL;
+   /*
+    * Force the constant to be on the right to simplify later logic.
+    * This means that we may be dealing with either "<" or ">" cases.
+    */
+   if (flag & SEL_RIGHT)
+   {
+       /* we have x < const */
+       isgt = false;
+   }
     else
     {
-       HeapTuple   oprtuple;
-       Oid         ltype,
-                   rtype,
-                   contype;
-       Oid         typid;
-       int         typlen;
-       bool        typbyval;
-       int32       typmod;
-       Datum       hival,
-                   loval;
-       double      val,
-                   high,
-                   low,
-                   numerator,
-                   denominator;
-
-       /*
-        * Get left and right datatypes of the operator so we know what
-        * type the constant is.
-        */
-       oprtuple = SearchSysCache(OPEROID,
-                                 ObjectIdGetDatum(opid),
-                                 0, 0, 0);
-       if (!HeapTupleIsValid(oprtuple))
-           elog(ERROR, "scalarltsel: no tuple for operator %u", opid);
-       ltype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprleft;
-       rtype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprright;
-       contype = (flag & SEL_RIGHT) ? rtype : ltype;
-       ReleaseSysCache(oprtuple);
-
-       /* Now get info and stats about the attribute */
-       getattproperties(relid, attno,
-                        &typid, &typlen, &typbyval, &typmod);
-
-       if (!getattstatistics(relid, attno, typid, typmod,
-                             NULL, NULL, NULL,
-                             &loval, &hival))
+       /* we have const < x, commute to make x > const */
+       opid = get_commutator(opid);
+       if (!opid)
         {
-           /* no stats available, so default result */
+           /* Use default selectivity (should we raise an error instead?) */
             PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
         }
+       isgt = true;
+   }
  
-       /* Convert the values to a uniform comparison scale. */
-       if (!convert_to_scalar(value, contype, &val,
-                              loval, hival, typid,
-                              &low, &high))
-       {
+   /*
+    * The constant might not be the same datatype as the column;
+    * look at the operator's input types to find out what it is.
+    * Also set up to be able to call the operator's execution proc.
+    */
+   oprTuple = SearchSysCache(OPEROID,
+                             ObjectIdGetDatum(opid),
+                             0, 0, 0);
+   if (!HeapTupleIsValid(oprTuple))
+       elog(ERROR, "scalarltsel: no tuple for operator %u", opid);
+   contype = ((Form_pg_operator) GETSTRUCT(oprTuple))->oprright;
+   fmgr_info(((Form_pg_operator) GETSTRUCT(oprTuple))->oprcode, &opproc);
+   ReleaseSysCache(oprTuple);
+
+   /* Now get info and stats about the attribute */
+   getattproperties(relid, attno, &typid, &typmod);
+
+   statsTuple = SearchSysCache(STATRELATT,
+                               ObjectIdGetDatum(relid),
+                               Int16GetDatum(attno),
+                               0, 0);
+   if (!HeapTupleIsValid(statsTuple))
+   {
+       /* no stats available, so default result */
+       PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
+   }
+   stats = (Form_pg_statistic) GETSTRUCT(statsTuple);
  
-           /*
-            * Ideally we'd produce an error here, on the grounds that the
-            * given operator shouldn't have scalarltsel registered as its
-            * selectivity func unless we can deal with its operand types.
-            * But currently, all manner of stuff is invoking scalarltsel,
-            * so give a default estimate until that can be fixed.
-            */
-           if (!typbyval)
-           {
-               pfree(DatumGetPointer(hival));
-               pfree(DatumGetPointer(loval));
-           }
-           PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
-       }
+   /*
+    * If we have most-common-values info, add up the fractions of the
+    * MCV entries that satisfy MCV OP CONST.  These fractions contribute
+    * directly to the result selectivity.  Also add up the total fraction
+    * represented by MCV entries.
+    */
+   mcv_selec = 0.0;
+   sumcommon = 0.0;
  
-       /* release temp storage if needed */
-       if (!typbyval)
+   if (get_attstatsslot(statsTuple, typid, typmod,
+                        STATISTIC_KIND_MCV, InvalidOid,
+                        &values, &nvalues,
+                        &numbers, &nnumbers))
+   {
+       for (i = 0; i < nvalues; i++)
         {
-           pfree(DatumGetPointer(hival));
-           pfree(DatumGetPointer(loval));
+           if (DatumGetBool(FunctionCall2(&opproc,
+                                          values[i],
+                                          value)))
+               mcv_selec += numbers[i];
+           sumcommon += numbers[i];
         }
+       free_attstatsslot(typid, values, nvalues, numbers, nnumbers);
+   }
+
+   /*
+    * If there is a histogram, determine which bin the constant falls in,
+    * and compute the resulting contribution to selectivity.
+    *
+    * Someday, VACUUM might store more than one histogram per rel/att,
+    * corresponding to more than one possible sort ordering defined for
+    * the column type.  However, to make that work we will need to figure
+    * out which staop to search for --- it's not necessarily the one we
+    * have at hand!  (For example, we might have a '<=' operator rather
+    * than the '<' operator that will appear in staop.)  For now, assume
+    * that whatever appears in pg_statistic is sorted the same way our
+    * operator sorts.
+    */
+   hist_selec = 0.0;
  
-       if (high <= low)
+   if (get_attstatsslot(statsTuple, typid, typmod,
+                        STATISTIC_KIND_HISTOGRAM, InvalidOid,
+                        &values, &nvalues,
+                        NULL, NULL))
+   {
+       if (nvalues > 1)
         {
+           double  histfrac;
+           bool    ltcmp;
+
+           ltcmp = DatumGetBool(FunctionCall2(&opproc,
+                                              values[0],
+                                              value));
+           if (isgt)
+               ltcmp = !ltcmp;
+           if (!ltcmp)
+           {
+               /* Constant is below lower histogram boundary. */
+               histfrac = 0.0;
+           }
+           else
+           {
+               /*
+                * Scan to find proper location.  This could be made faster
+                * by using a binary-search method, but it's probably not
+                * worth the trouble for typical histogram sizes.
+                */
+               for (i = 1; i < nvalues; i++)
+               {
+                   ltcmp = DatumGetBool(FunctionCall2(&opproc,
+                                                      values[i],
+                                                      value));
+                   if (isgt)
+                       ltcmp = !ltcmp;
+                   if (!ltcmp)
+                       break;
+               }
+               if (i >= nvalues)
+               {
+                   /* Constant is above upper histogram boundary. */
+                   histfrac = 1.0;
+               }
+               else
+               {
+                   double      val,
+                               high,
+                               low;
+                   double      binfrac;
  
+                   /*
+                    * We have values[i-1] < constant < values[i].
+                    *
+                    * Convert the constant and the two nearest bin boundary
+                    * values to a uniform comparison scale, and do a linear
+                    * interpolation within this bin.
+                    */
+                   if (convert_to_scalar(value, contype, &val,
+                                         values[i-1], values[i], typid,
+                                         &low, &high))
+                   {
+                       if (high <= low)
+                       {
+                           /* cope if bin boundaries appear identical */
+                           binfrac = 0.5;
+                       }
+                       else if (val <= low)
+                           binfrac = 0.0;
+                       else if (val >= high)
+                           binfrac = 1.0;
+                       else
+                           binfrac = (val - low) / (high - low);
+                   }
+                   else
+                   {
+                       /*
+                        * Ideally we'd produce an error here, on the grounds
+                        * that the given operator shouldn't have scalarltsel
+                        * registered as its selectivity func unless we can
+                        * deal with its operand types.  But currently, all
+                        * manner of stuff is invoking scalarltsel, so give a
+                        * default estimate until that can be fixed.
+                        */
+                       binfrac = 0.5;
+                   }
+                   /*
+                    * Now, compute the overall selectivity across the values
+                    * represented by the histogram.  We have i-1 full bins
+                    * and binfrac partial bin below the constant.
+                    */
+                   histfrac = (double) (i-1) + binfrac;
+                   histfrac /= (double) (nvalues - 1);
+               }
+           }
             /*
-            * If we trusted the stats fully, we could return a small or
-            * large selec depending on which side of the single data
-            * point the constant is on.  But it seems better to assume
-            * that the stats are wrong and return a default...
+            * Now histfrac = fraction of histogram entries below the constant.
+            *
+            * Account for "<" vs ">"
              */
-           result = DEFAULT_INEQ_SEL;
-       }
-       else if (val < low || val > high)
-       {
-
+           hist_selec = isgt ? (1.0 - histfrac) : histfrac;
             /*
-            * If given value is outside the statistical range, return a
-            * small or large value; but not 0.0/1.0 since there is a
-            * chance the stats are out of date.
+            * The histogram boundaries are only approximate to begin
+            * with, and may well be out of date anyway.  Therefore,
+            * don't believe extremely small or large selectivity
+            * estimates.
              */
-           if (flag & SEL_RIGHT)
-               result = (val < low) ? 0.001 : 0.999;
-           else
-               result = (val < low) ? 0.999 : 0.001;
-       }
-       else
-       {
-           denominator = high - low;
-           if (flag & SEL_RIGHT)
-               numerator = val - low;
-           else
-               numerator = high - val;
-           result = numerator / denominator;
+           if (hist_selec < 0.001)
+               hist_selec = 0.001;
+           else if (hist_selec > 0.999)
+               hist_selec = 0.999;
         }
+
+       free_attstatsslot(typid, values, nvalues, NULL, 0);
     }
-   PG_RETURN_FLOAT8(result);
+
+   /*
+    * Now merge the results from the MCV and histogram calculations,
+    * realizing that the histogram covers only the non-null values that
+    * are not listed in MCV.
+    */
+   selec = 1.0 - stats->stanullfrac - sumcommon;
+
+   if (hist_selec > 0.0)
+       selec *= hist_selec;
+   else
+   {
+       /*
+        * If no histogram but there are values not accounted for by MCV,
+        * arbitrarily assume half of them will match.
+        */
+       selec *= 0.5;
+   }
+
+   selec += mcv_selec;
+
+   ReleaseSysCache(statsTuple);
+
+   /* result should be in range, but make sure... */
+   if (selec < 0.0)
+       selec = 0.0;
+   else if (selec > 1.0)
+       selec = 1.0;
+
+   PG_RETURN_FLOAT8((float8) selec);
  }
  
  /*
@@ -428,34 +602,25 @@ scalargtsel(PG_FUNCTION_ARGS)
     Datum       value = PG_GETARG_DATUM(3);
     int32       flag = PG_GETARG_INT32(4);
     Oid         ltopid;
-   float8      result;
  
     /*
-    * Compute selectivity of "<", then invert --- but only if we were
-    * able to produce a non-default estimate.  Note that we get the
-    * negator which strictly speaking means we are looking at "<=" for
-    * ">" or "<" for ">=".  We assume this won't matter.
+    * Commute so that we have a "<" or "<=" operator, then apply
+    * scalarltsel.
      */
-   ltopid = get_negator(opid);
-   if (ltopid)
-   {
-       result = DatumGetFloat8(DirectFunctionCall5(scalarltsel,
-                                               ObjectIdGetDatum(ltopid),
-                                                ObjectIdGetDatum(relid),
-                                                   Int16GetDatum(attno),
-                                                   value,
-                                                   Int32GetDatum(flag)));
-   }
-   else
+   ltopid = get_commutator(opid);
+   if (!ltopid)
     {
         /* Use default selectivity (should we raise an error instead?) */
-       result = DEFAULT_INEQ_SEL;
+       PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
     }
  
-   if (result != DEFAULT_INEQ_SEL)
-       result = 1.0 - result;
-
-   PG_RETURN_FLOAT8(result);
+   flag ^= SEL_RIGHT;
+   return DirectFunctionCall5(scalarltsel,
+                              ObjectIdGetDatum(ltopid),
+                              ObjectIdGetDatum(relid),
+                              Int16GetDatum(attno),
+                              value,
+                              Int32GetDatum(flag));
  }
  
  /*
@@ -476,7 +641,7 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype)
         result = DEFAULT_MATCH_SEL;
     else
     {
-       HeapTuple   oprtuple;
+       HeapTuple   oprTuple;
         Oid         ltype,
                     rtype;
         char       *patt;
@@ -488,14 +653,14 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype)
          * Get left and right datatypes of the operator so we know what
          * type the attribute is.
          */
-       oprtuple = SearchSysCache(OPEROID,
+       oprTuple = SearchSysCache(OPEROID,
                                   ObjectIdGetDatum(opid),
                                   0, 0, 0);
-       if (!HeapTupleIsValid(oprtuple))
+       if (!HeapTupleIsValid(oprTuple))
             elog(ERROR, "patternsel: no tuple for operator %u", opid);
-       ltype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprleft;
-       rtype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprright;
-       ReleaseSysCache(oprtuple);
+       ltype = ((Form_pg_operator) GETSTRUCT(oprTuple))->oprleft;
+       rtype = ((Form_pg_operator) GETSTRUCT(oprTuple))->oprright;
+       ReleaseSysCache(oprTuple);
  
         /* the right-hand const is type text for all supported operators */
         Assert(rtype == TEXTOID);
@@ -659,42 +824,88 @@ eqjoinsel(PG_FUNCTION_ARGS)
     AttrNumber  attno1 = PG_GETARG_INT16(2);
     Oid         relid2 = PG_GETARG_OID(3);
     AttrNumber  attno2 = PG_GETARG_INT16(4);
-   float8      result;
-   float8      num1,
-               num2,
-               min;
     bool        unknown1 = NONVALUE(relid1) || NONVALUE(attno1);
     bool        unknown2 = NONVALUE(relid2) || NONVALUE(attno2);
+   double      selec;
  
     if (unknown1 && unknown2)
-       result = DEFAULT_EQ_SEL;
+       selec = DEFAULT_EQ_SEL;
     else
     {
-       num1 = unknown1 ? 1.0 : get_attdispersion(relid1, attno1, 0.01);
-       num2 = unknown2 ? 1.0 : get_attdispersion(relid2, attno2, 0.01);
+       Oid         typid1;
+       Oid         typid2;
+       int32       typmod1;
+       int32       typmod2;
+       HeapTuple   statsTuple1 = NULL;
+       HeapTuple   statsTuple2 = NULL;
+       Form_pg_statistic stats1 = NULL;
+       Form_pg_statistic stats2 = NULL;
+       double      nd1,
+                   nd2;
+
+       if (unknown1)
+       {
+           nd1 = 100.0;
+       }
+       else
+       {
+           /* get info about the attribute */
+           getattproperties(relid1, attno1, &typid1, &typmod1);
+
+           /* get stats for the attribute, if available */
+           statsTuple1 = SearchSysCache(STATRELATT,
+                                        ObjectIdGetDatum(relid1),
+                                        Int16GetDatum(attno1),
+                                        0, 0);
+           if (HeapTupleIsValid(statsTuple1))
+               stats1 = (Form_pg_statistic) GETSTRUCT(statsTuple1);
+
+           nd1 = get_att_numdistinct(relid1, attno1, typid1, stats1);
+       }
+
+       if (unknown2)
+       {
+           nd2 = 100.0;
+       }
+       else
+       {
+           /* get info about the attribute */
+           getattproperties(relid2, attno2, &typid2, &typmod2);
+
+           /* get stats for the attribute, if available */
+           statsTuple2 = SearchSysCache(STATRELATT,
+                                        ObjectIdGetDatum(relid2),
+                                        Int16GetDatum(attno2),
+                                        0, 0);
+           if (HeapTupleIsValid(statsTuple2))
+               stats2 = (Form_pg_statistic) GETSTRUCT(statsTuple2);
+
+           nd2 = get_att_numdistinct(relid2, attno2, typid2, stats2);
+       }
  
         /*
-        * The join selectivity cannot be more than num2, since each tuple
-        * in table 1 could match no more than num2 fraction of tuples in
-        * table 2 (and that's only if the table-1 tuple matches the most
-        * common value in table 2, so probably it's less).  By the same
-        * reasoning it is not more than num1. The min is therefore an
-        * upper bound.
+        * Estimate the join selectivity as 1 / sqrt(nd1*nd2)
+        * (can we produce any theory for this)?
          *
-        * If we know the dispersion of only one side, use it; the reasoning
-        * above still works.
+        * XXX possibility to do better: if both attributes have histograms
+        * then we could determine the exact join selectivity between the
+        * MCV sets, and only have to assume the join behavior of the non-MCV
+        * values.  This could be a big win when the MCVs cover a large part
+        * of the population.
          *
-        * XXX can we make a better estimate here?  Using the nullfrac
-        * statistic might be helpful, for example.  Assuming the operator
-        * is strict (does not succeed for null inputs) then the
-        * selectivity couldn't be more than (1-nullfrac1)*(1-nullfrac2),
-        * which might be usefully small if there are many nulls.  How
-        * about applying the operator to the most common values?
+        * XXX what about nulls?
          */
-       min = (num1 < num2) ? num1 : num2;
-       result = min;
+       selec = 1.0 / sqrt(nd1 * nd2);
+       if (selec > 1.0)
+           selec = 1.0;
+
+       if (HeapTupleIsValid(statsTuple1))
+           ReleaseSysCache(statsTuple1);
+       if (HeapTupleIsValid(statsTuple2))
+           ReleaseSysCache(statsTuple2);
+
     }
-   PG_RETURN_FLOAT8(result);
+   PG_RETURN_FLOAT8((float8) selec);
  }
  
  /*
@@ -829,7 +1040,8 @@ icnlikejoinsel(PG_FUNCTION_ARGS)
   *   Returns "true" if successful.
   *
   * All numeric datatypes are simply converted to their equivalent
- * "double" values.
+ * "double" values.  XXX what about NUMERIC values that are outside
+ * the range of "double"?
   *
   * String datatypes are converted by convert_string_to_scalar(),
   * which is explained below.  The reason why this routine deals with
@@ -917,7 +1129,7 @@ convert_numeric_to_scalar(Datum value, Oid typid)
  {
     switch (typid)
     {
-           case BOOLOID:
+       case BOOLOID:
             return (double) DatumGetBool(value);
         case INT2OID:
             return (double) DatumGetInt16(value);
@@ -963,6 +1175,8 @@ convert_numeric_to_scalar(Datum value, Oid typid)
   * three strings before computing the scaled values.  This allows us to
   * "zoom in" when we encounter a narrow data range.  An example is a phone
   * number database where all the values begin with the same area code.
+ * (Actually, the bounds will be adjacent histogram-bin-boundary values,
+ * so this is more likely to happen than you might think.)
   */
  static void
  convert_string_to_scalar(unsigned char *value,
@@ -1208,11 +1422,11 @@ convert_timevalue_to_scalar(Datum value, Oid typid)
  /*
   * getattproperties
   *   Retrieve pg_attribute properties for an attribute,
- *   including type OID, type len, type byval flag, typmod.
+ *   including type OID and typmod.
   */
  static void
  getattproperties(Oid relid, AttrNumber attnum,
-                Oid *typid, int *typlen, bool *typbyval, int32 *typmod)
+                Oid *typid, int32 *typmod)
  {
     HeapTuple   atp;
     Form_pg_attribute att_tup;
@@ -1227,164 +1441,87 @@ getattproperties(Oid relid, AttrNumber attnum,
     att_tup = (Form_pg_attribute) GETSTRUCT(atp);
  
     *typid = att_tup->atttypid;
-   *typlen = att_tup->attlen;
-   *typbyval = att_tup->attbyval;
     *typmod = att_tup->atttypmod;
  
     ReleaseSysCache(atp);
  }
  
  /*
- * getattstatistics
- *   Retrieve the pg_statistic data for an attribute.
- *   Returns 'false' if no stats are available.
+ * get_att_numdistinct
   *
- * Inputs:
- * 'relid' and 'attnum' are the relation and attribute number.
- * 'typid' and 'typmod' are the type and typmod of the column,
- * which the caller must already have looked up.
+ *   Estimate the number of distinct values of an attribute.
   *
- * Outputs:
- * The available stats are nullfrac, commonfrac, commonval, loval, hival.
- * The caller need not retrieve all five --- pass NULL pointers for the
- * unwanted values.
+ * relid, attnum: identify the attribute to examine.
+ * typid: type of attribute.
+ * stats: pg_statistic tuple for attribute, or NULL if not available.
   *
- * commonval, loval, hival are returned as Datums holding the internal
- * representation of the values.  (Note that these should be pfree'd
- * after use if the data type is not by-value.)
+ * XXX possible future improvement: look to see if there is a unique
+ * index on the attribute.  If so, we can estimate ndistinct = ntuples.
+ * This should probably override any info from pg_statistic.
   */
-static bool
-getattstatistics(Oid relid,
-                AttrNumber attnum,
-                Oid typid,
-                int32 typmod,
-                double *nullfrac,
-                double *commonfrac,
-                Datum *commonval,
-                Datum *loval,
-                Datum *hival)
+static double
+get_att_numdistinct(Oid relid, AttrNumber attnum, Oid typid,
+                   Form_pg_statistic stats)
  {
-   HeapTuple   tuple;
-   HeapTuple   typeTuple;
-   FmgrInfo    inputproc;
-   Oid         typelem;
-   bool        isnull;
+   HeapTuple   reltup;
+   double      ntuples;
  
     /*
-    * We assume that there will only be one entry in pg_statistic for the
-    * given rel/att, so we search WITHOUT considering the staop column.
-    * Someday, VACUUM might store more than one entry per rel/att,
-    * corresponding to more than one possible sort ordering defined for
-    * the column type.  However, to make that work we will need to figure
-    * out which staop to search for --- it's not necessarily the one we
-    * have at hand!  (For example, we might have a '>' operator rather
-    * than the '<' operator that will appear in staop.)
+    * Special-case boolean columns: presumably, two distinct values.
+    *
+    * Are there any other cases we should wire in special estimates for?
      */
-   tuple = SearchSysCache(STATRELID,
-                          ObjectIdGetDatum(relid),
-                          Int16GetDatum((int16) attnum),
-                          0, 0);
-   if (!HeapTupleIsValid(tuple))
-   {
-       /* no such stats entry */
-       return false;
-   }
+   if (typid == BOOLOID)
+       return 2.0;
  
-   if (nullfrac)
-       *nullfrac = ((Form_pg_statistic) GETSTRUCT(tuple))->stanullfrac;
-   if (commonfrac)
-       *commonfrac = ((Form_pg_statistic) GETSTRUCT(tuple))->stacommonfrac;
-
-   /* Get the type input proc for the column datatype */
-   typeTuple = SearchSysCache(TYPEOID,
-                              ObjectIdGetDatum(typid),
-                              0, 0, 0);
-   if (!HeapTupleIsValid(typeTuple))
-       elog(ERROR, "getattstatistics: Cache lookup failed for type %u",
-            typid);
-   fmgr_info(((Form_pg_type) GETSTRUCT(typeTuple))->typinput, &inputproc);
-   typelem = ((Form_pg_type) GETSTRUCT(typeTuple))->typelem;
-   ReleaseSysCache(typeTuple);
+   /*
+    * If VACUUM ANALYZE determined a fixed estimate, use it.
+    */
+   if (stats && stats->stadistinct > 0.0)
+       return stats->stadistinct;
  
     /*
-    * Values are variable-length fields, so cannot access as struct
-    * fields. Must do it the hard way with SysCacheGetAttr.
+    * Otherwise we need to get the relation size.
      */
-   if (commonval)
-   {
-       Datum       val = SysCacheGetAttr(STATRELID, tuple,
-                                         Anum_pg_statistic_stacommonval,
-                                         &isnull);
+   reltup = SearchSysCache(RELOID,
+                           ObjectIdGetDatum(relid),
+                           0, 0, 0);
+   if (!HeapTupleIsValid(reltup))
+       elog(ERROR, "get_att_numdistinct: no relation tuple %u", relid);
  
-       if (isnull)
-       {
-           elog(DEBUG, "getattstatistics: stacommonval is null");
-           *commonval = PointerGetDatum(NULL);
-       }
-       else
-       {
-           char       *strval = DatumGetCString(DirectFunctionCall1(textout,
-                                                                  val));
-
-           *commonval = FunctionCall3(&inputproc,
-                                      CStringGetDatum(strval),
-                                      ObjectIdGetDatum(typelem),
-                                      Int32GetDatum(typmod));
-           pfree(strval);
-       }
-   }
+   ntuples = ((Form_pg_class) GETSTRUCT(reltup))->reltuples;
  
-   if (loval)
-   {
-       Datum       val = SysCacheGetAttr(STATRELID, tuple,
-                                         Anum_pg_statistic_staloval,
-                                         &isnull);
+   ReleaseSysCache(reltup);
  
-       if (isnull)
-       {
-           elog(DEBUG, "getattstatistics: staloval is null");
-           *loval = PointerGetDatum(NULL);
-       }
-       else
-       {
-           char       *strval = DatumGetCString(DirectFunctionCall1(textout,
-                                                                  val));
-
-           *loval = FunctionCall3(&inputproc,
-                                  CStringGetDatum(strval),
-                                  ObjectIdGetDatum(typelem),
-                                  Int32GetDatum(typmod));
-           pfree(strval);
-       }
-   }
+   if (ntuples <= 0.0)
+       return 100.0;           /* no data available; return a default */
  
-   if (hival)
-   {
-       Datum       val = SysCacheGetAttr(STATRELID, tuple,
-                                         Anum_pg_statistic_stahival,
-                                         &isnull);
+   /*
+    * If VACUUM ANALYZE determined a scaled estimate, use it.
+    */
+   if (stats && stats->stadistinct < 0.0)
+       return - stats->stadistinct * ntuples;
  
-       if (isnull)
-       {
-           elog(DEBUG, "getattstatistics: stahival is null");
-           *hival = PointerGetDatum(NULL);
-       }
-       else
-       {
-           char       *strval = DatumGetCString(DirectFunctionCall1(textout,
-                                                                  val));
-
-           *hival = FunctionCall3(&inputproc,
-                                  CStringGetDatum(strval),
-                                  ObjectIdGetDatum(typelem),
-                                  Int32GetDatum(typmod));
-           pfree(strval);
-       }
+   /*
+    * VACUUM ANALYZE does not compute stats for system attributes,
+    * but some of them can reasonably be assumed unique anyway.
+    */
+   switch (attnum)
+   {
+       case ObjectIdAttributeNumber:
+       case SelfItemPointerAttributeNumber:
+           return ntuples;
+       case TableOidAttributeNumber:
+           return 1.0;
     }
  
-   ReleaseSysCache(tuple);
+   /*
+    * Estimate ndistinct = ntuples if the table is small, else 100.
+    */
+   if (ntuples < 100.0)
+       return ntuples;
  
-   return true;
+   return 100.0;
  }
  
  /*-------------------------------------------------------------------------
diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c

index 82d55866215aac34724aa44deb029feea9d94a76..3995de5d7a1325085c901b0d2427cbbd775170ee 100644 (file)
--- a/src/backend/utils/cache/lsyscache.c
+++ b/src/backend/utils/cache/lsyscache.c
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/utils/cache/lsyscache.c,v 1.52 2001/03/23 04:49:55 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/utils/cache/lsyscache.c,v 1.53 2001/05/07 00:43:24 tgl Exp $
   *
   * NOTES
   *   Eventually, the index information should go through here, too.
@@ -18,7 +18,10 @@
  #include "access/tupmacs.h"
  #include "catalog/pg_operator.h"
  #include "catalog/pg_proc.h"
+#include "catalog/pg_statistic.h"
  #include "catalog/pg_type.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
  #include "utils/lsyscache.h"
  #include "utils/syscache.h"
  
@@ -182,106 +185,6 @@ get_atttypmod(Oid relid, AttrNumber attnum)
         return -1;
  }
  
-/*
- * get_attdispersion
- *
- *   Retrieve the dispersion statistic for an attribute,
- *   or produce an estimate if no info is available.
- *
- * min_estimate is the minimum estimate to return if insufficient data
- * is available to produce a reliable value.  This value may vary
- * depending on context.  (For example, when deciding whether it is
- * safe to use a hashjoin, we want to be more conservative than when
- * estimating the number of tuples produced by an equijoin.)
- */
-double
-get_attdispersion(Oid relid, AttrNumber attnum, double min_estimate)
-{
-   HeapTuple   atp;
-   Form_pg_attribute att_tup;
-   double      dispersion;
-   Oid         atttypid;
-   int32       ntuples;
-
-   atp = SearchSysCache(ATTNUM,
-                        ObjectIdGetDatum(relid),
-                        Int16GetDatum(attnum),
-                        0, 0);
-   if (!HeapTupleIsValid(atp))
-   {
-       /* this should not happen */
-       elog(ERROR, "get_attdispersion: no attribute tuple %u %d",
-            relid, attnum);
-       return min_estimate;
-   }
-
-   att_tup = (Form_pg_attribute) GETSTRUCT(atp);
-
-   dispersion = att_tup->attdispersion;
-   atttypid = att_tup->atttypid;
-
-   ReleaseSysCache(atp);
-
-   if (dispersion > 0.0)
-       return dispersion;      /* we have a specific estimate from VACUUM */
-
-   /*
-    * Special-case boolean columns: the dispersion of a boolean is highly
-    * unlikely to be anywhere near 1/numtuples, instead it's probably
-    * more like 0.5.
-    *
-    * Are there any other cases we should wire in special estimates for?
-    */
-   if (atttypid == BOOLOID)
-       return 0.5;
-
-   /*
-    * Dispersion is either 0 (no data available) or -1 (dispersion is
-    * 1/numtuples).  Either way, we need the relation size.
-    */
-
-   atp = SearchSysCache(RELOID,
-                        ObjectIdGetDatum(relid),
-                        0, 0, 0);
-   if (!HeapTupleIsValid(atp))
-   {
-       /* this should not happen */
-       elog(ERROR, "get_attdispersion: no relation tuple %u", relid);
-       return min_estimate;
-   }
-
-   ntuples = ((Form_pg_class) GETSTRUCT(atp))->reltuples;
-
-   ReleaseSysCache(atp);
-
-   if (ntuples == 0)
-       return min_estimate;    /* no data available */
-
-   if (dispersion < 0.0)       /* VACUUM thinks there are no duplicates */
-       return 1.0 / (double) ntuples;
-
-   /*
-    * VACUUM ANALYZE does not compute dispersion for system attributes,
-    * but some of them can reasonably be assumed unique anyway.
-    */
-   if (attnum == ObjectIdAttributeNumber ||
-       attnum == SelfItemPointerAttributeNumber)
-       return 1.0 / (double) ntuples;
-   if (attnum == TableOidAttributeNumber)
-       return 1.0;
-
-   /*
-    * VACUUM ANALYZE has not been run for this table. Produce an estimate
-    * of 1/numtuples.  This may produce unreasonably small estimates for
-    * large tables, so limit the estimate to no less than min_estimate.
-    */
-   dispersion = 1.0 / (double) ntuples;
-   if (dispersion < min_estimate)
-       dispersion = min_estimate;
-
-   return dispersion;
-}
-
  /*             ---------- INDEX CACHE ----------                        */
  
  /*     watch this space...
@@ -876,3 +779,157 @@ get_typtype(Oid typid)
  }
  
  #endif
+
+/*             ---------- STATISTICS CACHE ----------                   */
+
+/*
+ * get_attstatsslot
+ *
+ *     Extract the contents of a "slot" of a pg_statistic tuple.
+ *     Returns TRUE if requested slot type was found, else FALSE.
+ *
+ * Unlike other routines in this file, this takes a pointer to an
+ * already-looked-up tuple in the pg_statistic cache.  We do this since
+ * most callers will want to extract more than one value from the cache
+ * entry, and we don't want to repeat the cache lookup unnecessarily.
+ *
+ * statstuple: pg_statistics tuple to be examined.
+ * atttype: type OID of attribute.
+ * atttypmod: typmod of attribute.
+ * reqkind: STAKIND code for desired statistics slot kind.
+ * reqop: STAOP value wanted, or InvalidOid if don't care.
+ * values, nvalues: if not NULL, the slot's stavalues are extracted.
+ * numbers, nnumbers: if not NULL, the slot's stanumbers are extracted.
+ *
+ * If assigned, values and numbers are set to point to palloc'd arrays.
+ * If the attribute type is pass-by-reference, the values referenced by
+ * the values array are themselves palloc'd.  The palloc'd stuff can be
+ * freed by calling free_attstatsslot.
+ */
+bool
+get_attstatsslot(HeapTuple statstuple,
+                Oid atttype, int32 atttypmod,
+                int reqkind, Oid reqop,
+                Datum **values, int *nvalues,
+                float4 **numbers, int *nnumbers)
+{
+   Form_pg_statistic stats = (Form_pg_statistic) GETSTRUCT(statstuple);
+   int         i,
+               j;
+   Datum       val;
+   bool        isnull;
+   ArrayType  *statarray;
+   int         narrayelem;
+   HeapTuple   typeTuple;
+   FmgrInfo    inputproc;
+   Oid         typelem;
+
+   for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
+   {
+       if ((&stats->stakind1)[i] == reqkind &&
+           (reqop == InvalidOid || (&stats->staop1)[i] == reqop))
+           break;
+   }
+   if (i >= STATISTIC_NUM_SLOTS)
+       return false;           /* not there */
+
+   if (values)
+   {
+       val = SysCacheGetAttr(STATRELATT, statstuple,
+                             Anum_pg_statistic_stavalues1 + i,
+                             &isnull);
+       if (isnull)
+           elog(ERROR, "get_attstatsslot: stavalues is null");
+       statarray = DatumGetArrayTypeP(val);
+       /*
+        * Do initial examination of the array.  This produces a list
+        * of text Datums --- ie, pointers into the text array value.
+        */
+       deconstruct_array(statarray, false, -1, 'i', values, nvalues);
+       narrayelem = *nvalues;
+       /*
+        * We now need to replace each text Datum by its internal equivalent.
+        *
+        * Get the type input proc and typelem for the column datatype.
+        */
+       typeTuple = SearchSysCache(TYPEOID,
+                                  ObjectIdGetDatum(atttype),
+                                  0, 0, 0);
+       if (!HeapTupleIsValid(typeTuple))
+           elog(ERROR, "get_attstatsslot: Cache lookup failed for type %u",
+                atttype);
+       fmgr_info(((Form_pg_type) GETSTRUCT(typeTuple))->typinput, &inputproc);
+       typelem = ((Form_pg_type) GETSTRUCT(typeTuple))->typelem;
+       ReleaseSysCache(typeTuple);
+       /*
+        * Do the conversions.  The palloc'd array of Datums is reused
+        * in place.
+        */
+       for (j = 0; j < narrayelem; j++)
+       {
+           char       *strval;
+
+           strval = DatumGetCString(DirectFunctionCall1(textout,
+                                                        (*values)[j]));
+           (*values)[j] = FunctionCall3(&inputproc,
+                                        CStringGetDatum(strval),
+                                        ObjectIdGetDatum(typelem),
+                                        Int32GetDatum(atttypmod));
+           pfree(strval);
+       }
+       /*
+        * Free statarray if it's a detoasted copy.
+        */
+       if ((Pointer) statarray != DatumGetPointer(val))
+           pfree(statarray);
+   }
+
+   if (numbers)
+   {
+       val = SysCacheGetAttr(STATRELATT, statstuple,
+                             Anum_pg_statistic_stanumbers1 + i,
+                             &isnull);
+       if (isnull)
+           elog(ERROR, "get_attstatsslot: stanumbers is null");
+       statarray = DatumGetArrayTypeP(val);
+       /*
+        * We expect the array to be a 1-D float4 array; verify that.
+        * We don't need to use deconstruct_array() since the array
+        * data is just going to look like a C array of float4 values.
+        */
+       narrayelem = ARR_DIMS(statarray)[0];
+       if (ARR_NDIM(statarray) != 1 || narrayelem <= 0 ||
+           ARR_SIZE(statarray) != (ARR_OVERHEAD(1) + narrayelem * sizeof(float4)))
+           elog(ERROR, "get_attstatsslot: stanumbers is bogus");
+       *numbers = (float4 *) palloc(narrayelem * sizeof(float4));
+       memcpy(*numbers, ARR_DATA_PTR(statarray), narrayelem * sizeof(float4));
+       *nnumbers = narrayelem;
+       /*
+        * Free statarray if it's a detoasted copy.
+        */
+       if ((Pointer) statarray != DatumGetPointer(val))
+           pfree(statarray);
+   }
+
+   return true;
+}
+
+void
+free_attstatsslot(Oid atttype,
+                 Datum *values, int nvalues,
+                 float4 *numbers, int nnumbers)
+{
+   if (values)
+   {
+       if (! get_typbyval(atttype))
+       {
+           int     i;
+
+           for (i = 0; i < nvalues; i++)
+               pfree(DatumGetPointer(values[i]));
+       }
+       pfree(values);
+   }
+   if (numbers)
+       pfree(numbers);
+}
diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c

index 75ef3179202695a3fb7a5336b7bc4f3e24d3f3f5..4e35b3fb35ba67aa78d337e6bdb39149c6256f8c 100644 (file)
--- a/src/backend/utils/cache/syscache.c
+++ b/src/backend/utils/cache/syscache.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/utils/cache/syscache.c,v 1.60 2001/03/22 03:59:57 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/utils/cache/syscache.c,v 1.61 2001/05/07 00:43:24 tgl Exp $
   *
   * NOTES
   *   These routines allow the parser/planner/executor to perform
@@ -313,7 +313,7 @@ static struct cachedesc cacheinfo[] = {
             0,
             0
     }},
-   {StatisticRelationName,     /* STATRELID */
+   {StatisticRelationName,     /* STATRELATT */
         StatisticRelidAttnumIndex,
         2,
         {
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c

index d27bfb29668711e985f1ba29bd1285ab77201bf2..5a77c47c20085f0d24ae5b8edb6ef2ca70acdc27 100644 (file)
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -78,7 +78,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/utils/sort/tuplesort.c,v 1.15 2001/03/23 04:49:55 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/utils/sort/tuplesort.c,v 1.16 2001/05/07 00:43:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -87,7 +87,11 @@
  
  #include "access/heapam.h"
  #include "access/nbtree.h"
+#include "catalog/catname.h"
+#include "catalog/pg_amop.h"
+#include "catalog/pg_amproc.h"
  #include "miscadmin.h"
+#include "utils/fmgroids.h"
  #include "utils/logtape.h"
  #include "utils/lsyscache.h"
  #include "utils/tuplesort.h"
@@ -263,6 +267,7 @@ struct Tuplesortstate
     TupleDesc   tupDesc;
     int         nKeys;
     ScanKey     scanKeys;
+   SortFunctionKind *sortFnKinds;
  
     /*
      * These variables are specific to the IndexTuple case; they are set
@@ -279,6 +284,7 @@ struct Tuplesortstate
     Oid         datumType;
     Oid         sortOperator;
     FmgrInfo    sortOpFn;       /* cached lookup data for sortOperator */
+   SortFunctionKind sortFnKind;
     /* we need typelen and byval in order to know how to copy the Datums. */
     int         datumTypeLen;
     bool        datumTypeByVal;
@@ -458,14 +464,14 @@ tuplesort_begin_common(bool randomAccess)
  
  Tuplesortstate *
  tuplesort_begin_heap(TupleDesc tupDesc,
-                    int nkeys, ScanKey keys,
+                    int nkeys,
+                    Oid *sortOperators, AttrNumber *attNums,
                      bool randomAccess)
  {
     Tuplesortstate *state = tuplesort_begin_common(randomAccess);
+   int         i;
  
-   AssertArg(nkeys >= 1);
-   AssertArg(keys[0].sk_attno != 0);
-   AssertArg(keys[0].sk_procedure != 0);
+   AssertArg(nkeys > 0);
  
     state->comparetup = comparetup_heap;
     state->copytup = copytup_heap;
@@ -475,7 +481,29 @@ tuplesort_begin_heap(TupleDesc tupDesc,
  
     state->tupDesc = tupDesc;
     state->nKeys = nkeys;
-   state->scanKeys = keys;
+   state->scanKeys = (ScanKey) palloc(nkeys * sizeof(ScanKeyData));
+   MemSet(state->scanKeys, 0, nkeys * sizeof(ScanKeyData));
+   state->sortFnKinds = (SortFunctionKind *)
+       palloc(nkeys * sizeof(SortFunctionKind));
+   MemSet(state->sortFnKinds, 0, nkeys * sizeof(SortFunctionKind));
+
+   for (i = 0; i < nkeys; i++)
+   {
+       RegProcedure sortFunction;
+
+       AssertArg(sortOperators[i] != 0);
+       AssertArg(attNums[i] != 0);
+
+       /* select a function that implements the sort operator */
+       SelectSortFunction(sortOperators[i], &sortFunction,
+                          &state->sortFnKinds[i]);
+
+       ScanKeyEntryInitialize(&state->scanKeys[i],
+                              0x0,
+                              attNums[i],
+                              sortFunction,
+                              (Datum) 0);
+   }
  
     return state;
  }
@@ -507,6 +535,7 @@ tuplesort_begin_datum(Oid datumType,
                       bool randomAccess)
  {
     Tuplesortstate *state = tuplesort_begin_common(randomAccess);
+   RegProcedure sortFunction;
     int16       typlen;
     bool        typbyval;
  
@@ -518,8 +547,12 @@ tuplesort_begin_datum(Oid datumType,
  
     state->datumType = datumType;
     state->sortOperator = sortOperator;
-   /* lookup the function that implements the sort operator */
-   fmgr_info(get_opcode(sortOperator), &state->sortOpFn);
+
+   /* select a function that implements the sort operator */
+   SelectSortFunction(sortOperator, &sortFunction, &state->sortFnKind);
+   /* and look up the function */
+   fmgr_info(sortFunction, &state->sortOpFn);
+
     /* lookup necessary attributes of the datum type */
     get_typlenbyval(datumType, &typlen, &typbyval);
     state->datumTypeLen = typlen;
@@ -548,6 +581,13 @@ tuplesort_end(Tuplesortstate *state)
     }
     if (state->memtupindex)
         pfree(state->memtupindex);
+
+   /* this stuff might better belong in a variant-specific shutdown routine */
+   if (state->scanKeys)
+       pfree(state->scanKeys);
+   if (state->sortFnKinds)
+       pfree(state->sortFnKinds);
+
     pfree(state);
  }
  
@@ -1692,6 +1732,7 @@ comparetup_heap(Tuplesortstate *state, const void *a, const void *b)
     for (nkey = 0; nkey < state->nKeys; nkey++)
     {
         ScanKey     scanKey = state->scanKeys + nkey;
+       SortFunctionKind fnKind = state->sortFnKinds[nkey];
         AttrNumber  attno = scanKey->sk_attno;
         Datum       lattr,
                     rattr;
@@ -1708,23 +1749,36 @@ comparetup_heap(Tuplesortstate *state, const void *a, const void *b)
         }
         else if (isnull2)
             return -1;
-       else if (scanKey->sk_flags & SK_COMMUTE)
-       {
-           if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
-                                          rattr, lattr)))
-               return -1;      /* a < b after commute */
-           if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
-                                          lattr, rattr)))
-               return 1;       /* a > b after commute */
-       }
         else
         {
-           if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
-                                          lattr, rattr)))
-               return -1;      /* a < b */
-           if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
-                                          rattr, lattr)))
-               return 1;       /* a > b */
+           int32       compare;
+
+           if (fnKind == SORTFUNC_LT)
+           {
+               if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
+                                              lattr, rattr)))
+                   compare = -1;   /* a < b */
+               else if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
+                                                   rattr, lattr)))
+                   compare = 1;    /* a > b */
+               else
+                   compare = 0;
+           }
+           else
+           {
+               /* sort function is CMP or REVCMP */
+               compare = DatumGetInt32(FunctionCall2(&scanKey->sk_func,
+                                                     lattr, rattr));
+               if (fnKind == SORTFUNC_REVCMP)
+                   compare = -compare;
+           }
+
+           if (compare != 0)
+           {
+               if (scanKey->sk_flags & SK_COMMUTE)
+                   compare = -compare;
+               return compare;
+           }
         }
     }
  
@@ -1852,8 +1906,10 @@ comparetup_index(Tuplesortstate *state, const void *a, const void *b)
         }
         else
         {
+           /* the comparison function is always of CMP type */
             compare = DatumGetInt32(FunctionCall2(&entry->sk_func,
-                                               attrDatum1, attrDatum2));
+                                                 attrDatum1,
+                                                 attrDatum2));
         }
  
         if (compare != 0)
@@ -1954,7 +2010,7 @@ comparetup_datum(Tuplesortstate *state, const void *a, const void *b)
     }
     else if (rtup->isNull)
         return -1;
-   else
+   else if (state->sortFnKind == SORTFUNC_LT)
     {
         if (DatumGetBool(FunctionCall2(&state->sortOpFn,
                                        ltup->val, rtup->val)))
@@ -1964,6 +2020,17 @@ comparetup_datum(Tuplesortstate *state, const void *a, const void *b)
             return 1;           /* a > b */
         return 0;
     }
+   else
+   {
+       /* sort function is CMP or REVCMP */
+       int32   compare;
+
+       compare = DatumGetInt32(FunctionCall2(&state->sortOpFn,
+                                             ltup->val, rtup->val));
+       if (state->sortFnKind == SORTFUNC_REVCMP)
+           compare = -compare;
+       return compare;
+   }
  }
  
  static void *
@@ -2032,3 +2099,119 @@ tuplesize_datum(Tuplesortstate *state, void *tup)
         return (unsigned int) tuplelen;
     }
  }
+
+
+/*
+ * This routine selects an appropriate sorting function to implement
+ * a sort operator as efficiently as possible.  The straightforward
+ * method is to use the operator's implementation proc --- ie, "<"
+ * comparison.  However, that way often requires two calls of the function
+ * per comparison.  If we can find a btree three-way comparator function
+ * associated with the operator, we can use it to do the comparisons
+ * more efficiently.  We also support the possibility that the operator
+ * is ">" (descending sort), in which case we have to reverse the output
+ * of the btree comparator.
+ *
+ * Possibly this should live somewhere else (backend/catalog/, maybe?).
+ */
+void
+SelectSortFunction(Oid sortOperator,
+                  RegProcedure *sortFunction,
+                  SortFunctionKind *kind)
+{
+   Relation    relation;
+   HeapScanDesc scan;
+   ScanKeyData skey[3];
+   HeapTuple   tuple;
+   Oid         opclass = InvalidOid;
+
+   /*
+    * Scan pg_amop to see if the target operator is registered as the
+    * "<" or ">" operator of any btree opclass.  It's possible that it
+    * might be registered both ways (eg, if someone were to build a
+    * "reverse sort" opclass for some reason); prefer the "<" case if so.
+    * If the operator is registered the same way in multiple opclasses,
+    * assume we can use the associated comparator function from any one.
+    */
+   relation = heap_openr(AccessMethodOperatorRelationName,
+                         AccessShareLock);
+
+   ScanKeyEntryInitialize(&skey[0], 0,
+                          Anum_pg_amop_amopid,
+                          F_OIDEQ,
+                          ObjectIdGetDatum(BTREE_AM_OID));
+
+   ScanKeyEntryInitialize(&skey[1], 0,
+                          Anum_pg_amop_amopopr,
+                          F_OIDEQ,
+                          ObjectIdGetDatum(sortOperator));
+
+   scan = heap_beginscan(relation, false, SnapshotNow, 2, skey);
+
+   while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
+   {
+       Form_pg_amop aform = (Form_pg_amop) GETSTRUCT(tuple);
+
+       if (aform->amopstrategy == BTLessStrategyNumber)
+       {
+           opclass = aform->amopclaid;
+           *kind = SORTFUNC_CMP;
+           break;              /* done looking */
+       }
+       else if (aform->amopstrategy == BTGreaterStrategyNumber)
+       {
+           opclass = aform->amopclaid;
+           *kind = SORTFUNC_REVCMP;
+           /* keep scanning in hopes of finding a BTLess entry */
+       }
+   }
+
+   heap_endscan(scan);
+   heap_close(relation, AccessShareLock);
+
+   if (OidIsValid(opclass))
+   {
+       /* Found a suitable opclass, get its comparator support function */
+       relation = heap_openr(AccessMethodProcedureRelationName,
+                             AccessShareLock);
+
+       ScanKeyEntryInitialize(&skey[0], 0,
+                              Anum_pg_amproc_amid,
+                              F_OIDEQ,
+                              ObjectIdGetDatum(BTREE_AM_OID));
+
+       ScanKeyEntryInitialize(&skey[1], 0,
+                              Anum_pg_amproc_amopclaid,
+                              F_OIDEQ,
+                              ObjectIdGetDatum(opclass));
+
+       ScanKeyEntryInitialize(&skey[2], 0,
+                              Anum_pg_amproc_amprocnum,
+                              F_INT2EQ,
+                              Int16GetDatum(BTORDER_PROC));
+
+       scan = heap_beginscan(relation, false, SnapshotNow, 3, skey);
+
+       *sortFunction = InvalidOid;
+
+       if (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
+       {
+           Form_pg_amproc aform = (Form_pg_amproc) GETSTRUCT(tuple);
+           *sortFunction = aform->amproc;
+       }
+
+       heap_endscan(scan);
+       heap_close(relation, AccessShareLock);
+
+       if (RegProcedureIsValid(*sortFunction))
+           return;
+   }
+
+   /* Can't find a comparator, so use the operator as-is */
+
+   *kind = SORTFUNC_LT;
+   *sortFunction = get_opcode(sortOperator);
+   if (!RegProcedureIsValid(*sortFunction))
+       elog(ERROR, "SelectSortFunction: operator %u has no implementation",
+            sortOperator);
+}
diff --git a/src/include/access/tuptoaster.h b/src/include/access/tuptoaster.h

index 759ab3d39e2494de4fa021c8070ac7e5da62d283..6e38529204dabaab44c078c0af05a6687fd0d966 100644 (file)
--- a/src/include/access/tuptoaster.h
+++ b/src/include/access/tuptoaster.h
@@ -6,15 +6,13 @@
   *
   * Copyright (c) 2000, PostgreSQL Development Team
   *
- * $Id: tuptoaster.h,v 1.10 2001/03/22 04:00:32 momjian Exp $
+ * $Id: tuptoaster.h,v 1.11 2001/05/07 00:43:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
  #ifndef TUPTOASTER_H
  #define TUPTOASTER_H
  
-#ifdef TUPLE_TOASTER_ACTIVE
-
  #include "access/heapam.h"
  #include "access/htup.h"
  #include "access/tupmacs.h"
@@ -109,7 +107,13 @@ extern varattrib *heap_tuple_untoast_attr(varattrib *attr);
   */
  extern Datum toast_compress_datum(Datum value);
  
-#endif  /* TUPLE_TOASTER_ACTIVE */
+/* ----------
+ * toast_raw_datum_size -
+ *
+ * Return the raw (detoasted) size of a varlena datum
+ * ----------
+ */
+extern Size toast_raw_datum_size(Datum value);
  
  
  #endif  /* TUPTOASTER_H */
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h

index 963b11c1d386ed6df175ad0e2e92cfe2929af774..832f91fb09f172d5ffc3d31aba10fccd5431c783 100644 (file)
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -37,7 +37,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: catversion.h,v 1.70 2001/03/22 04:00:35 momjian Exp $
+ * $Id: catversion.h,v 1.71 2001/05/07 00:43:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -53,6 +53,6 @@
   */
  
  /*                         yyyymmddN */
-#define CATALOG_VERSION_NO 200101061
+#define CATALOG_VERSION_NO 200105051
  
  #endif
diff --git a/src/include/catalog/heap.h b/src/include/catalog/heap.h

index a7248f6c6dc4fb438d27b29fe250c446534ad228..7ab04b05fb25b1dd765830e90ec1b717c6e2814e 100644 (file)
--- a/src/include/catalog/heap.h
+++ b/src/include/catalog/heap.h
@@ -7,13 +7,14 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: heap.h,v 1.34 2001/03/22 04:00:35 momjian Exp $
+ * $Id: heap.h,v 1.35 2001/05/07 00:43:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
  #ifndef HEAP_H
  #define HEAP_H
  
+#include "catalog/pg_attribute.h"
  #include "utils/rel.h"
  
  typedef struct RawColumnDefault
@@ -44,4 +45,6 @@ extern void AddRelationRawConstraints(Relation rel,
                           List *rawColDefaults,
                           List *rawConstraints);
  
+extern Form_pg_attribute SystemAttributeDefinition(AttrNumber attno);
+
  #endif  /* HEAP_H */
diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h

index 1dac0bb1c31596e1b31e5fc2d82f20835ed7879b..07aaad61c798bc295723dfe80cded8dbc848d6c9 100644 (file)
--- a/src/include/catalog/index.h
+++ b/src/include/catalog/index.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: index.h,v 1.33 2001/03/22 04:00:35 momjian Exp $
+ * $Id: index.h,v 1.34 2001/05/07 00:43:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -46,7 +46,7 @@ extern void FormIndexDatum(IndexInfo *indexInfo,
                Datum *datum,
                char *nullv);
  
-extern void UpdateStats(Oid relid, long reltuples);
+extern void UpdateStats(Oid relid, double reltuples);
  extern bool IndexesAreActive(Oid relid, bool comfirmCommitted);
  extern void setRelhasindex(Oid relid, bool hasindex);
  
diff --git a/src/include/catalog/indexing.h b/src/include/catalog/indexing.h

index 41a580a37779abae1d46c2b8422b8ece0fbebc2b..cc155cf1bbb314f4cb54a41c23a3a2ed5e1fd5d8 100644 (file)
--- a/src/include/catalog/indexing.h
+++ b/src/include/catalog/indexing.h
@@ -8,7 +8,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: indexing.h,v 1.48 2001/03/22 04:00:36 momjian Exp $
+ * $Id: indexing.h,v 1.49 2001/05/07 00:43:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -171,7 +171,7 @@ DECLARE_UNIQUE_INDEX(pg_rewrite_rulename_index on pg_rewrite using btree(rulenam
  xDECLARE_UNIQUE_INDEX(pg_shadow_name_index on pg_shadow using btree(usename name_ops));
  xDECLARE_UNIQUE_INDEX(pg_shadow_sysid_index on pg_shadow using btree(usesysid int4_ops));
  */
-DECLARE_INDEX(pg_statistic_relid_att_index on pg_statistic using btree(starelid oid_ops, staattnum int2_ops));
+DECLARE_UNIQUE_INDEX(pg_statistic_relid_att_index on pg_statistic using btree(starelid oid_ops, staattnum int2_ops));
  DECLARE_INDEX(pg_trigger_tgconstrname_index on pg_trigger using btree(tgconstrname name_ops));
  DECLARE_INDEX(pg_trigger_tgconstrrelid_index on pg_trigger using btree(tgconstrrelid oid_ops));
  DECLARE_INDEX(pg_trigger_tgrelid_index on pg_trigger using btree(tgrelid oid_ops));
diff --git a/src/include/catalog/pg_attribute.h b/src/include/catalog/pg_attribute.h

index 58724e94dc966cef18b5345521cafa985a4dbf1e..6e11aa6d530707371c7b5b0f5af4e4174c4919f5 100644 (file)
--- a/src/include/catalog/pg_attribute.h
+++ b/src/include/catalog/pg_attribute.h
@@ -8,7 +8,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: pg_attribute.h,v 1.70 2001/03/22 04:00:37 momjian Exp $
+ * $Id: pg_attribute.h,v 1.71 2001/05/07 00:43:24 tgl Exp $
   *
   * NOTES
   *   the genbki.sh script reads this file and generates .bki
@@ -36,15 +36,14 @@
   *     typedef struct FormData_pg_attribute
   *
   *     If you change the following, make sure you change the structs for
- *     system attributes in heap.c and index.c also.
+ *     system attributes in catalog/heap.c also.
   * ----------------
   */
  CATALOG(pg_attribute) BOOTSTRAP
  {
     Oid         attrelid;       /* OID of relation containing this
                                  * attribute */
-   NameData    attname;
-   Oid         atttypid;
+   NameData    attname;        /* name of attribute */
  
     /*
      * atttypid is the OID of the instance in Catalog Class pg_type that
@@ -53,30 +52,20 @@ CATALOG(pg_attribute) BOOTSTRAP
      * attalign attributes of this instance, so they had better match or
      * Postgres will fail.
      */
-
-   float4      attdispersion;
+   Oid         atttypid;
  
     /*
-    * attdispersion is the dispersion statistic of the column (0.0 to
-    * 1.0), or zero if the statistic has not been calculated, or -1.0 if
-    * VACUUM found that the column contains no duplicate entries (in
-    * which case the dispersion should be taken as 1.0/numberOfRows for
-    * the current table size).  The -1.0 hack is useful because the
-    * number of rows may be updated more often than attdispersion is. We
-    * assume that the column will retain its no-duplicate-entry property.
-    * (Perhaps this should be driven off the existence of a UNIQUE index
-    * for the column, instead of being a statistical guess?)
+    * attstattarget is the target number of statistics datapoints to collect
+    * during VACUUM ANALYZE of this column.  A zero here indicates that we
+    * do not wish to collect any stats about this column.
      */
-
-   int2        attlen;
+   int4        attstattarget;
  
     /*
      * attlen is a copy of the typlen field from pg_type for this
-    * attribute.  See atttypid above.  See struct Form_pg_type for
-    * definition.
+    * attribute.  See atttypid comments above.
      */
-
-   int2        attnum;
+   int2        attlen;
  
     /*
      * attnum is the "attribute number" for the attribute:  A value that
@@ -91,10 +80,13 @@ CATALOG(pg_attribute) BOOTSTRAP
      *
      * Note that (attnum - 1) is often used as the index to an array.
      */
+   int2        attnum;
  
-   int4        attnelems;      /* number of dimensions, if an array type */
-
-   int4        attcacheoff;
+   /*
+    * attndims is the declared number of dimensions, if an array type,
+    * otherwise zero.
+    */
+   int4        attndims;
  
     /*
      * fastgetattr() uses attcacheoff to cache byte offsets of attributes
@@ -103,8 +95,7 @@ CATALOG(pg_attribute) BOOTSTRAP
      * tuple descriptor, we may then update attcacheoff in the copies.
      * This speeds up the attribute walking process.
      */
-
-   int4        atttypmod;
+   int4        attcacheoff;
  
     /*
      * atttypmod records type-specific data supplied at table creation
@@ -113,16 +104,13 @@ CATALOG(pg_attribute) BOOTSTRAP
      * argument. The value will generally be -1 for types that do not need
      * typmod.
      */
-
-   bool        attbyval;
+   int4        atttypmod;
  
     /*
      * attbyval is a copy of the typbyval field from pg_type for this
-    * attribute.  See atttypid above.  See struct Form_pg_type for
-    * definition.
+    * attribute.  See atttypid comments above.
      */
-
-   char        attstorage;
+   bool        attbyval;
  
     /*----------
      * attstorage tells for VARLENA attributes, what the heap access
@@ -137,30 +125,31 @@ CATALOG(pg_attribute) BOOTSTRAP
      * but only as a last resort ('e' and 'x' fields are moved first).
      *----------
      */
+   char        attstorage;
  
+   /* This flag indicates that the attribute is really a set */
     bool        attisset;
-   char        attalign;
  
     /*
      * attalign is a copy of the typalign field from pg_type for this
-    * attribute.  See atttypid above.  See struct Form_pg_type for
-    * definition.
+    * attribute.  See atttypid comments above.
      */
-
-   bool        attnotnull;
+   char        attalign;
  
     /* This flag represents the "NOT NULL" constraint */
-   bool        atthasdef;
+   bool        attnotnull;
  
     /* Has DEFAULT value or not */
+   bool        atthasdef;
  } FormData_pg_attribute;
  
  /*
   * someone should figure out how to do this properly. (The problem is
- * the size of the C struct is not the same as the size of the tuple.)
+ * the size of the C struct is not the same as the size of the tuple
+ * because of alignment padding at the end of the struct.)
   */
  #define ATTRIBUTE_TUPLE_SIZE \
-   (offsetof(FormData_pg_attribute,atthasdef) + sizeof(char))
+   (offsetof(FormData_pg_attribute,atthasdef) + sizeof(bool))
  
  /* ----------------
   *     Form_pg_attribute corresponds to a pointer to a tuple with
@@ -178,10 +167,10 @@ typedef FormData_pg_attribute *Form_pg_attribute;
  #define Anum_pg_attribute_attrelid     1
  #define Anum_pg_attribute_attname      2
  #define Anum_pg_attribute_atttypid     3
-#define Anum_pg_attribute_attdispersion 4
+#define Anum_pg_attribute_attstattarget 4
  #define Anum_pg_attribute_attlen       5
  #define Anum_pg_attribute_attnum       6
-#define Anum_pg_attribute_attnelems        7
+#define Anum_pg_attribute_attndims     7
  #define Anum_pg_attribute_attcacheoff  8
  #define Anum_pg_attribute_atttypmod        9
  #define Anum_pg_attribute_attbyval     10
@@ -206,6 +195,7 @@ typedef FormData_pg_attribute *Form_pg_attribute;
     (attribute)->attnotnull = false; \
     (attribute)->atthasdef = false;
  #endif  /* _DROP_COLUMN_HACK__ */
+
  /* ----------------
   *     SCHEMA_ macros for declaring hardcoded tuple descriptors.
   *     these are used in utils/cache/relcache.c
@@ -231,25 +221,25 @@ typedef FormData_pg_attribute *Form_pg_attribute;
   * ----------------
   */
  #define Schema_pg_type \
-{ 1247, {"typname"},      19, 0, NAMEDATALEN,  1, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typowner"},     23, 0,   4,  2, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typlen"},           21, 0,   2,  3, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1247, {"typprtlen"},    21, 0,   2,  4, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1247, {"typbyval"},     16, 0,   1,  5, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typtype"},      18, 0,   1,  6, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typisdefined"},  16, 0,  1,  7, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typdelim"},     18, 0,   1,  8, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typrelid"},     26, 0,   4,  9, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typelem"},      26, 0,   4, 10, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typinput"},     24, 0,   4, 11, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typoutput"},    24, 0,   4, 12, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typreceive"},    24, 0,  4, 13, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typsend"},      24, 0,   4, 14, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typalign"},     18, 0,   1, 15, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typstorage"},    18, 0,  1, 16, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typdefault"},    25, 0,  -1, 17, 0, -1, -1, '\0' , 'x', '\0', 'i', '\0', '\0' }
-
-DATA(insert OID = 0 ( 1247 typname         19 0 NAMEDATALEN   1 0 -1 -1 f p f i f f));
+{ 1247, {"typname"},      19, DEFAULT_ATTSTATTARGET, NAMEDATALEN,  1, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1247, {"typowner"},     23, 0,   4,  2, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typlen"},           21, 0,   2,  3, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1247, {"typprtlen"},    21, 0,   2,  4, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1247, {"typbyval"},     16, 0,   1,  5, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typtype"},      18, 0,   1,  6, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typisdefined"},  16, 0,  1,  7, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typdelim"},     18, 0,   1,  8, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typrelid"},     26, 0,   4,  9, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typelem"},      26, 0,   4, 10, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typinput"},     24, 0,   4, 11, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typoutput"},    24, 0,   4, 12, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typreceive"},    24, 0,  4, 13, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typsend"},      24, 0,   4, 14, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typalign"},     18, 0,   1, 15, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typstorage"},    18, 0,  1, 16, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typdefault"},    25, 0,  -1, 17, 0, -1, -1, false    , 'x', false, 'i', false, false }
+
+DATA(insert OID = 0 ( 1247 typname         19 DEFAULT_ATTSTATTARGET NAMEDATALEN   1 0 -1 -1 f p f i f f));
  DATA(insert OID = 0 ( 1247 typowner            23 0  4   2 0 -1 -1 t p f i f f));
  DATA(insert OID = 0 ( 1247 typlen          21 0  2   3 0 -1 -1 t p f s f f));
  DATA(insert OID = 0 ( 1247 typprtlen       21 0  2   4 0 -1 -1 t p f s f f));
@@ -299,25 +289,25 @@ DATA(insert OID = 0 ( 1262 tableoid           26 0  4  -7 0 -1 -1 t p f i f f));
   * ----------------
   */
  #define Schema_pg_proc \
-{ 1255, {"proname"},           19, 0, NAMEDATALEN,  1, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"proowner"},          23, 0,  4,  2, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"prolang"},           26, 0,  4,  3, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"proisinh"},          16, 0,  1,  4, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1255, {"proistrusted"},      16, 0,  1,  5, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1255, {"proiscachable"},     16, 0,  1,  6, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1255, {"proisstrict"},       16, 0,  1,  7, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1255, {"pronargs"},          21, 0,  2,  8, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1255, {"proretset"},         16, 0,  1,  9, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1255, {"prorettype"},            26, 0,  4, 10, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"proargtypes"},       30, 0, INDEX_MAX_KEYS*4, 11, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"probyte_pct"},       23, 0,  4, 12, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"properbyte_cpu"},        23, 0,  4, 13, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"propercall_cpu"},        23, 0,  4, 14, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"prooutin_ratio"},        23, 0,  4, 15, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"prosrc"},                25, 0, -1, 16, 0, -1, -1, '\0', 'x', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"probin"},                17, 0, -1, 17, 0, -1, -1, '\0', 'x', '\0', 'i', '\0', '\0' }
-
-DATA(insert OID = 0 ( 1255 proname         19 0 NAMEDATALEN   1 0 -1 -1 f p f i f f));
+{ 1255, {"proname"},           19, DEFAULT_ATTSTATTARGET, NAMEDATALEN,  1, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1255, {"proowner"},          23, 0,  4,  2, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"prolang"},           26, 0,  4,  3, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"proisinh"},          16, 0,  1,  4, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1255, {"proistrusted"},      16, 0,  1,  5, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1255, {"proiscachable"},     16, 0,  1,  6, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1255, {"proisstrict"},       16, 0,  1,  7, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1255, {"pronargs"},          21, 0,  2,  8, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1255, {"proretset"},         16, 0,  1,  9, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1255, {"prorettype"},            26, 0,  4, 10, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"proargtypes"},       30, 0, INDEX_MAX_KEYS*4, 11, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1255, {"probyte_pct"},       23, 0,  4, 12, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"properbyte_cpu"},        23, 0,  4, 13, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"propercall_cpu"},        23, 0,  4, 14, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"prooutin_ratio"},        23, 0,  4, 15, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"prosrc"},                25, 0, -1, 16, 0, -1, -1, false, 'x', false, 'i', false, false }, \
+{ 1255, {"probin"},                17, 0, -1, 17, 0, -1, -1, false, 'x', false, 'i', false, false }
+
+DATA(insert OID = 0 ( 1255 proname         19 DEFAULT_ATTSTATTARGET NAMEDATALEN   1 0 -1 -1 f p f i f f));
  DATA(insert OID = 0 ( 1255 proowner            23 0  4   2 0 -1 -1 t p f i f f));
  DATA(insert OID = 0 ( 1255 prolang         26 0  4   3 0 -1 -1 t p f i f f));
  DATA(insert OID = 0 ( 1255 proisinh            16 0  1   4 0 -1 -1 t p f c f f));
@@ -346,8 +336,8 @@ DATA(insert OID = 0 ( 1255 tableoid         26 0  4  -7 0 -1 -1 t p f i f f));
   *     pg_shadow
   * ----------------
   */
-DATA(insert OID = 0 ( 1260 usename         19  0 NAMEDATALEN   1 0 -1 -1 f p f i f f));
-DATA(insert OID = 0 ( 1260 usesysid            23  0   4   2 0 -1 -1 t p f i f f));
+DATA(insert OID = 0 ( 1260 usename         19  DEFAULT_ATTSTATTARGET NAMEDATALEN   1 0 -1 -1 f p f i f f));
+DATA(insert OID = 0 ( 1260 usesysid            23  DEFAULT_ATTSTATTARGET   4   2 0 -1 -1 t p f i f f));
  DATA(insert OID = 0 ( 1260 usecreatedb     16  0   1   3 0 -1 -1 t p f c f f));
  DATA(insert OID = 0 ( 1260 usetrace            16  0   1   4 0 -1 -1 t p f c f f));
  DATA(insert OID = 0 ( 1260 usesuper            16  0   1   5 0 -1 -1 t p f c f f));
@@ -366,8 +356,8 @@ DATA(insert OID = 0 ( 1260 tableoid         26 0  4  -7 0 -1 -1 t p f i f f));
   *     pg_group
   * ----------------
   */
-DATA(insert OID = 0 ( 1261 groname         19 0 NAMEDATALEN  1 0 -1 -1 f p f i f f));
-DATA(insert OID = 0 ( 1261 grosysid            23 0  4   2 0 -1 -1 t p f i f f));
+DATA(insert OID = 0 ( 1261 groname         19 DEFAULT_ATTSTATTARGET NAMEDATALEN  1 0 -1 -1 f p f i f f));
+DATA(insert OID = 0 ( 1261 grosysid            23 DEFAULT_ATTSTATTARGET  4   2 0 -1 -1 t p f i f f));
  DATA(insert OID = 0 ( 1261 grolist       1007 0 -1   3 0 -1 -1 f x f i f f));
  DATA(insert OID = 0 ( 1261 ctid                27 0  6  -1 0 -1 -1 f p f i f f));
  DATA(insert OID = 0 ( 1261 oid             26 0  4  -2 0 -1 -1 t p f i f f));
@@ -382,29 +372,29 @@ DATA(insert OID = 0 ( 1261 tableoid           26 0  4  -7 0 -1 -1 t p f i f f));
   * ----------------
   */
  #define Schema_pg_attribute \
-{ 1249, {"attrelid"},    26, 0,    4,  1, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"attname"},     19, 0, NAMEDATALEN,   2, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"atttypid"},    26, 0,    4,  3, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"attdispersion"}, 700, 0, 4,  4, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"attlen"},          21, 0,    2,  5, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1249, {"attnum"},          21, 0,    2,  6, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1249, {"attnelems"},   23, 0,    4,  7, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"attcacheoff"},  23, 0,   4,  8, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"atttypmod"},   23, 0,    4,  9, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"attbyval"},    16, 0,    1, 10, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1249, {"attstorage"},   18, 0,   1, 11, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1249, {"attisset"},    16, 0,    1, 12, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1249, {"attalign"},    18, 0,    1, 13, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1249, {"attnotnull"},  16, 0, 1, 14, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1249, {"atthasdef"},  16, 0, 1, 15, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }
-
-DATA(insert OID = 0 ( 1249 attrelid            26 0  4   1 0 -1 -1 t p f i f f));
-DATA(insert OID = 0 ( 1249 attname         19 0 NAMEDATALEN  2 0 -1 -1 f p f i f f));
+{ 1249, {"attrelid"},    26, DEFAULT_ATTSTATTARGET,    4,  1, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"attname"},     19, DEFAULT_ATTSTATTARGET, NAMEDATALEN,   2, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1249, {"atttypid"},    26, 0,    4,  3, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"attstattarget"}, 23, 0,  4,  4, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"attlen"},          21, 0,    2,  5, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1249, {"attnum"},          21, 0,    2,  6, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1249, {"attndims"},    23, 0,    4,  7, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"attcacheoff"},  23, 0,   4,  8, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"atttypmod"},   23, 0,    4,  9, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"attbyval"},    16, 0,    1, 10, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1249, {"attstorage"},   18, 0,   1, 11, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1249, {"attisset"},    16, 0,    1, 12, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1249, {"attalign"},    18, 0,    1, 13, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1249, {"attnotnull"},  16, 0, 1, 14, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1249, {"atthasdef"},  16, 0, 1, 15, 0, -1, -1, true, 'p', false, 'c', false, false }
+
+DATA(insert OID = 0 ( 1249 attrelid            26 DEFAULT_ATTSTATTARGET  4   1 0 -1 -1 t p f i f f));
+DATA(insert OID = 0 ( 1249 attname         19 DEFAULT_ATTSTATTARGET NAMEDATALEN  2 0 -1 -1 f p f i f f));
  DATA(insert OID = 0 ( 1249 atttypid            26 0  4   3 0 -1 -1 t p f i f f));
-DATA(insert OID = 0 ( 1249 attdispersion   700 0  4   4 0 -1 -1 f p f i f f));
+DATA(insert OID = 0 ( 1249 attstattarget   23 0  4   4 0 -1 -1 t p f i f f));
  DATA(insert OID = 0 ( 1249 attlen          21 0  2   5 0 -1 -1 t p f s f f));
  DATA(insert OID = 0 ( 1249 attnum          21 0  2   6 0 -1 -1 t p f s f f));
-DATA(insert OID = 0 ( 1249 attnelems       23 0  4   7 0 -1 -1 t p f i f f));
+DATA(insert OID = 0 ( 1249 attndims            23 0  4   7 0 -1 -1 t p f i f f));
  DATA(insert OID = 0 ( 1249 attcacheoff     23 0  4   8 0 -1 -1 t p f i f f));
  DATA(insert OID = 0 ( 1249 atttypmod       23 0  4   9 0 -1 -1 t p f i f f));
  DATA(insert OID = 0 ( 1249 attbyval            16 0  1  10 0 -1 -1 t p f c f f));
@@ -426,36 +416,36 @@ DATA(insert OID = 0 ( 1249 tableoid           26 0  4  -7 0 -1 -1 t p f i f f));
   * ----------------
   */
  #define Schema_pg_class \
-{ 1259, {"relname"},      19, 0, NAMEDATALEN,  1, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"reltype"},      26, 0,   4,  2, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"relowner"},     23, 0,   4,  3, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"relam"},        26, 0,   4,  4, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"relfilenode"},   26, 0,  4,  5, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"relpages"},     23, 0,   4,  6, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"reltuples"},    23, 0,   4,  7, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"reltoastrelid"}, 26, 0,  4,  8, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"reltoastidxid"}, 26, 0,  4,  9, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"relhasindex"},   16, 0,  1, 10, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relisshared"},   16, 0,  1, 11, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relkind"},      18, 0,   1, 12, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relnatts"},     21, 0,   2, 13, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"relchecks"},    21, 0,   2, 14, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"reltriggers"},   21, 0,  2, 15, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"relukeys"},     21, 0,   2, 16, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"relfkeys"},     21, 0,   2, 17, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"relrefs"},      21, 0,   2, 18, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"relhaspkey"},    16, 0,  1, 19, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relhasrules"},   16, 0,  1, 20, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relhassubclass"},16, 0,  1, 21, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relacl"},         1034, 0,  -1, 22, 0, -1, -1,   '\0', 'x', '\0', 'i', '\0', '\0' }
-
-DATA(insert OID = 0 ( 1259 relname         19 0 NAMEDATALEN   1 0 -1 -1 f p f i f f));
+{ 1259, {"relname"},      19, DEFAULT_ATTSTATTARGET, NAMEDATALEN,  1, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1259, {"reltype"},      26, 0,   4,  2, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"relowner"},     23, 0,   4,  3, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"relam"},        26, 0,   4,  4, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"relfilenode"},   26, 0,  4,  5, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"relpages"},     23, 0,   4,  6, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"reltuples"},    700, 0,  4,  7, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1259, {"reltoastrelid"}, 26, 0,  4,  8, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"reltoastidxid"}, 26, 0,  4,  9, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"relhasindex"},   16, 0,  1, 10, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relisshared"},   16, 0,  1, 11, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relkind"},      18, 0,   1, 12, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relnatts"},     21, 0,   2, 13, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"relchecks"},    21, 0,   2, 14, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"reltriggers"},   21, 0,  2, 15, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"relukeys"},     21, 0,   2, 16, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"relfkeys"},     21, 0,   2, 17, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"relrefs"},      21, 0,   2, 18, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"relhaspkey"},    16, 0,  1, 19, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relhasrules"},   16, 0,  1, 20, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relhassubclass"},16, 0,  1, 21, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relacl"},         1034, 0,  -1, 22, 0, -1, -1,   false, 'x', false, 'i', false, false }
+
+DATA(insert OID = 0 ( 1259 relname         19 DEFAULT_ATTSTATTARGET NAMEDATALEN   1 0 -1 -1 f p f i f f));
  DATA(insert OID = 0 ( 1259 reltype         26 0  4   2 0 -1 -1 t p f i f f));
  DATA(insert OID = 0 ( 1259 relowner            23 0  4   3 0 -1 -1 t p f i f f));
  DATA(insert OID = 0 ( 1259 relam           26 0  4   4 0 -1 -1 t p f i f f));
  DATA(insert OID = 0 ( 1259 relfilenode     26 0  4   5 0 -1 -1 t p f i f f));
  DATA(insert OID = 0 ( 1259 relpages            23 0  4   6 0 -1 -1 t p f i f f));
-DATA(insert OID = 0 ( 1259 reltuples       23 0  4   7 0 -1 -1 t p f i f f));
+DATA(insert OID = 0 ( 1259 reltuples      700 0  4   7 0 -1 -1 f p f i f f));
  DATA(insert OID = 0 ( 1259 reltoastrelid   26 0  4   8 0 -1 -1 t p f i f f));
  DATA(insert OID = 0 ( 1259 reltoastidxid   26 0  4   9 0 -1 -1 t p f i f f));
  DATA(insert OID = 0 ( 1259 relhasindex     16 0  1  10 0 -1 -1 t p f c f f));
@@ -544,7 +534,7 @@ DATA(insert OID = 0 ( 1219 tableoid         26 0  4  -7 0 -1 -1 t p f i f f));
   * ----------------
   */
  #define Schema_pg_variable \
-{ 1264, {"varfoo"},  26, 0, 4, 1, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }
+{ 1264, {"varfoo"},  26, 0, 4, 1, 0, -1, -1, true, 'p', false, 'i', false, false }
  
  DATA(insert OID = 0 ( 1264 varfoo          26 0  4   1 0 -1 -1 t p f i f f));
  
@@ -555,7 +545,7 @@ DATA(insert OID = 0 ( 1264 varfoo           26 0  4   1 0 -1 -1 t p f i f f));
   * ----------------
   */
  #define Schema_pg_log \
-{ 1269, {"logfoo"},  26, 0, 4, 1, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }
+{ 1269, {"logfoo"},  26, 0, 4, 1, 0, -1, -1, true, 'p', false, 'i', false, false }
  
  DATA(insert OID = 0 ( 1269 logfoo          26 0  4   1 0 -1 -1 t p f i f f));
  
@@ -566,7 +556,7 @@ DATA(insert OID = 0 ( 1269 logfoo           26 0  4   1 0 -1 -1 t p f i f f));
   * ----------------
   */
  #define Schema_pg_xactlock \
-{ 376, {"xactlockfoo"},  26, 0, 4, 1, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }
+{ 376, {"xactlockfoo"},  26, 0, 4, 1, 0, -1, -1, true, 'p', false, 'i', false, false }
  
  DATA(insert OID = 0 ( 376 xactlockfoo      26 0  4   1 0 -1 -1 t p f i f f));
  
diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h

index 81e75e14b6a7dc7372f9dcd6808f824944f5f028..86de88cc9b662fe5c65f43301e2a28a247bf69ee 100644 (file)
--- a/src/include/catalog/pg_class.h
+++ b/src/include/catalog/pg_class.h
@@ -8,7 +8,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: pg_class.h,v 1.47 2001/03/22 04:00:38 momjian Exp $
+ * $Id: pg_class.h,v 1.48 2001/05/07 00:43:25 tgl Exp $
   *
   * NOTES
   *   the genbki.sh script reads this file and generates .bki
@@ -52,7 +52,7 @@ CATALOG(pg_class) BOOTSTRAP
     Oid         relam;
     Oid         relfilenode;
     int4        relpages;
-   int4        reltuples;
+   float4      reltuples;
     Oid         reltoastrelid;
     Oid         reltoastidxid;
     bool        relhasindex;
diff --git a/src/include/catalog/pg_statistic.h b/src/include/catalog/pg_statistic.h

index 2f39bea3245e1772984b1b3b4fca0dbb36f41c1d..8d6a6b37c16ac513468f052508aadf91a034ff85 100644 (file)
--- a/src/include/catalog/pg_statistic.h
+++ b/src/include/catalog/pg_statistic.h
@@ -8,7 +8,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: pg_statistic.h,v 1.10 2001/01/24 19:43:22 momjian Exp $
+ * $Id: pg_statistic.h,v 1.11 2001/05/07 00:43:25 tgl Exp $
   *
   * NOTES
   *   the genbki.sh script reads this file and generates .bki
@@ -36,40 +36,91 @@ CATALOG(pg_statistic)
     /* These fields form the unique key for the entry: */
     Oid         starelid;       /* relation containing attribute */
     int2        staattnum;      /* attribute (column) stats are for */
-   Oid         staop;          /* '<' comparison op used for lo/hi vals */
+
+   /* the fraction of the column's entries that are NULL: */
+   float4      stanullfrac;
  
     /*
-    * Note: the current VACUUM code will never produce more than one
-    * entry per column, but in theory there could be multiple entries if
-    * a datatype has more than one useful ordering operator.  Also, the
-    * current code will not write an entry unless it found at least one
-    * non-NULL value in the column; so the remaining fields will never be
-    * NULL.
+    * stawidth is the average width in bytes of non-null entries.  For
+    * fixed-width datatypes this is of course the same as the typlen, but
+    * for varlena types it is more useful.  Note that this is the average
+    * width of the data as actually stored, post-TOASTing (eg, for a
+    * moved-out-of-line value, only the size of the pointer object is
+    * counted).  This is the appropriate definition for the primary use of
+    * the statistic, which is to estimate sizes of in-memory hash tables of
+    * tuples.
+    */
+   int4        stawidth;
+
+   /* ----------------
+    * stadistinct indicates the (approximate) number of distinct non-null
+    * data values in the column.  The interpretation is:
+    *      0       unknown or not computed
+    *      > 0     actual number of distinct values
+    *      < 0     negative of multiplier for number of rows
+    * The special negative case allows us to cope with columns that are
+    * unique (stadistinct = -1) or nearly so (for example, a column in
+    * which values appear about twice on the average could be represented
+    * by stadistinct = -0.5).  Because the number-of-rows statistic in
+    * pg_class may be updated more frequently than pg_statistic is, it's
+    * important to be able to describe such situations as a multiple of
+    * the number of rows, rather than a fixed number of distinct values.
+    * But in other cases a fixed number is correct (eg, a boolean column).
+    * ----------------
+    */
+   float4      stadistinct;
+
+   /* ----------------
+    * To allow keeping statistics on different kinds of datatypes,
+    * we do not hard-wire any particular meaning for the remaining
+    * statistical fields.  Instead, we provide several "slots" in which
+    * statistical data can be placed.  Each slot includes:
+    *      kind            integer code identifying kind of data
+    *      op              OID of associated operator, if needed
+    *      numbers         float4 array (for statistical values)
+    *      values          text array (for representations of data values)
+    * The ID and operator fields are never NULL; they are zeroes in an
+    * unused slot.  The numbers and values fields are NULL in an unused
+    * slot, and might also be NULL in a used slot if the slot kind has
+    * no need for one or the other.
+    * ----------------
      */
  
+   int2        stakind1;
+   int2        stakind2;
+   int2        stakind3;
+   int2        stakind4;
+
+   Oid         staop1;
+   Oid         staop2;
+   Oid         staop3;
+   Oid         staop4;
+
     /*
-    * These fields contain the stats about the column indicated by the
-    * key
+    * THE REST OF THESE ARE VARIABLE LENGTH FIELDS, and may even be absent
+    * (NULL). They cannot be accessed as C struct entries; you have to use
+    * the full field access machinery (heap_getattr) for them.  We declare
+    * them here for the catalog machinery.
      */
-   float4      stanullfrac;    /* the fraction of the entries that are
-                                * NULL */
-   float4      stacommonfrac;  /* the fraction that are the most common
-                                * val */
+
+   float4      stanumbers1[1];
+   float4      stanumbers2[1];
+   float4      stanumbers3[1];
+   float4      stanumbers4[1];
  
     /*
-    * THE REST OF THESE ARE VARIABLE LENGTH FIELDS. They cannot be
-    * accessed as C struct entries; you have to use the full field access
-    * machinery (heap_getattr) for them.
-    *
-    * All three of these are text representations of data values of the
-    * column's data type.  To re-create the actual Datum, do
-    * datatypein(textout(givenvalue)).
+    * Values in these text arrays are external representations of values
+    * of the column's data type.  To re-create the actual Datum, do
+    * datatypein(textout(arrayelement)).
      */
-   text        stacommonval;   /* most common non-null value in column */
-   text        staloval;       /* smallest non-null value in column */
-   text        stahival;       /* largest non-null value in column */
+   text        stavalues1[1];
+   text        stavalues2[1];
+   text        stavalues3[1];
+   text        stavalues4[1];
  } FormData_pg_statistic;
  
+#define STATISTIC_NUM_SLOTS  4
+
  /* ----------------
   *     Form_pg_statistic corresponds to a pointer to a tuple with
   *     the format of pg_statistic relation.
@@ -81,14 +132,78 @@ typedef FormData_pg_statistic *Form_pg_statistic;
   *     compiler constants for pg_statistic
   * ----------------
   */
-#define Natts_pg_statistic             8
+#define Natts_pg_statistic             21
  #define Anum_pg_statistic_starelid     1
  #define Anum_pg_statistic_staattnum        2
-#define Anum_pg_statistic_staop            3
-#define Anum_pg_statistic_stanullfrac  4
-#define Anum_pg_statistic_stacommonfrac 5
-#define Anum_pg_statistic_stacommonval 6
-#define Anum_pg_statistic_staloval     7
-#define Anum_pg_statistic_stahival     8
+#define Anum_pg_statistic_stanullfrac  3
+#define Anum_pg_statistic_stawidth     4
+#define Anum_pg_statistic_stadistinct  5
+#define Anum_pg_statistic_stakind1     6
+#define Anum_pg_statistic_stakind2     7
+#define Anum_pg_statistic_stakind3     8
+#define Anum_pg_statistic_stakind4     9
+#define Anum_pg_statistic_staop1       10
+#define Anum_pg_statistic_staop2       11
+#define Anum_pg_statistic_staop3       12
+#define Anum_pg_statistic_staop4       13
+#define Anum_pg_statistic_stanumbers1  14
+#define Anum_pg_statistic_stanumbers2  15
+#define Anum_pg_statistic_stanumbers3  16
+#define Anum_pg_statistic_stanumbers4  17
+#define Anum_pg_statistic_stavalues1   18
+#define Anum_pg_statistic_stavalues2   19
+#define Anum_pg_statistic_stavalues3   20
+#define Anum_pg_statistic_stavalues4   21
+
+/*
+ * Currently, three statistical slot "kinds" are defined: most common values,
+ * histogram, and correlation.  Additional "kinds" will probably appear in
+ * future to help cope with non-scalar datatypes.
+ *
+ * Code reading the pg_statistic relation should not assume that a particular
+ * data "kind" will appear in any particular slot.  Instead, search the
+ * stakind fields to see if the desired data is available.
+ */
+
+/*
+ * In a "most common values" slot, staop is the OID of the "=" operator
+ * used to decide whether values are the same or not.  stavalues contains
+ * the K most common non-null values appearing in the column, and stanumbers
+ * contains their frequencies (fractions of total row count).  The values
+ * shall be ordered in decreasing frequency.  Note that since the arrays are
+ * variable-size, K may be chosen by the statistics collector.  Values should
+ * not appear in MCV unless they have been observed to occur more than once;
+ * a unique column will have no MCV slot.
+ */
+#define STATISTIC_KIND_MCV  1
+
+/*
+ * A "histogram" slot describes the distribution of scalar data.  staop is
+ * the OID of the "<" operator that describes the sort ordering.  (In theory,
+ * more than one histogram could appear, if a datatype has more than one
+ * useful sort operator.)  stavalues contains M (>=2) non-null values that
+ * divide the non-null column data values into M-1 bins of approximately equal
+ * population.  The first stavalues item is the MIN and the last is the MAX.
+ * stanumbers is not used and should be NULL.  IMPORTANT POINT: if an MCV
+ * slot is also provided, then the histogram describes the data distribution
+ * *after removing the values listed in MCV* (thus, it's a "compressed
+ * histogram" in the technical parlance).  This allows a more accurate
+ * representation of the distribution of a column with some very-common
+ * values.  In a column with only a few distinct values, it's possible that
+ * the MCV list describes the entire data population; in this case the
+ * histogram reduces to empty and should be omitted.
+ */
+#define STATISTIC_KIND_HISTOGRAM  2
+
+/*
+ * A "correlation" slot describes the correlation between the physical order
+ * of table tuples and the ordering of data values of this column, as seen
+ * by the "<" operator identified by staop.  (As with the histogram, more
+ * than one entry could theoretically appear.)  stavalues is not used and
+ * should be NULL.  stanumbers contains a single entry, the correlation
+ * coefficient between the sequence of data values and the sequence of
+ * their actual tuple positions.  The coefficient ranges from +1 to -1.
+ */
+#define STATISTIC_KIND_CORRELATION  3
  
  #endif  /* PG_STATISTIC_H */
diff --git a/src/include/commands/command.h b/src/include/commands/command.h

index 8b108451d2accff7969f55e6972ad389551829a1..7eb1a4fab846aeff33b3f5cca4f60b9c4c3b5fb5 100644 (file)
--- a/src/include/commands/command.h
+++ b/src/include/commands/command.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: command.h,v 1.26 2001/03/22 04:00:41 momjian Exp $
+ * $Id: command.h,v 1.27 2001/05/07 00:43:25 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -43,9 +43,13 @@ extern void PortalCleanup(Portal portal);
  extern void AlterTableAddColumn(const char *relationName,
                     bool inh, ColumnDef *colDef);
  
-extern void AlterTableAlterColumn(const char *relationName,
-                     bool inh, const char *colName,
-                     Node *newDefault);
+extern void AlterTableAlterColumnDefault(const char *relationName,
+                                        bool inh, const char *colName,
+                                        Node *newDefault);
+
+extern void AlterTableAlterColumnStatistics(const char *relationName,
+                                           bool inh, const char *colName,
+                                           Node *statsTarget);
  
  extern void AlterTableDropColumn(const char *relationName,
                      bool inh, const char *colName,
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h

index d82d22fcdfcbe3482ed5dbf1b66bf52b607767c3..87bb0007aa067dcbfbe15d31cccfbe00f61df460 100644 (file)
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -1,129 +1,27 @@
  /*-------------------------------------------------------------------------
   *
   * vacuum.h
- *   header file for postgres vacuum cleaner
+ *   header file for postgres vacuum cleaner and statistics analyzer
   *
   *
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: vacuum.h,v 1.34 2001/03/22 04:00:43 momjian Exp $
+ * $Id: vacuum.h,v 1.35 2001/05/07 00:43:25 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
  #ifndef VACUUM_H
  #define VACUUM_H
  
-#include "catalog/pg_attribute.h"
-#include "catalog/pg_index.h"
-#include "fmgr.h"
-#include "nodes/pg_list.h"
-#include "storage/itemptr.h"
+#include "nodes/parsenodes.h"
  
  
-typedef struct VAttListData
-{
-   int         val_dummy;
-   struct VAttListData *val_next;
-} VAttListData;
-
-typedef VAttListData *VAttList;
-
-typedef struct VacPageData
-{
-   BlockNumber blkno;          /* BlockNumber of this Page */
-   Size        free;           /* FreeSpace on this Page */
-   uint16      offsets_used;   /* Number of OffNums used by vacuum */
-   uint16      offsets_free;   /* Number of OffNums free or to be free */
-   OffsetNumber offsets[1];    /* Array of its OffNums */
-} VacPageData;
-
-typedef VacPageData *VacPage;
-
-typedef struct VacPageListData
-{
-   int         empty_end_pages;/* Number of "empty" end-pages */
-   int         num_pages;      /* Number of pages in pagedesc */
-   int         num_allocated_pages;    /* Number of allocated pages in
-                                        * pagedesc */
-   VacPage    *pagedesc;       /* Descriptions of pages */
-} VacPageListData;
-
-typedef VacPageListData *VacPageList;
-
-typedef struct
-{
-   Form_pg_attribute attr;
-   Datum       best,
-               guess1,
-               guess2,
-               max,
-               min;
-   int         best_len,
-               guess1_len,
-               guess2_len,
-               max_len,
-               min_len;
-   long        best_cnt,
-               guess1_cnt,
-               guess1_hits,
-               guess2_hits,
-               null_cnt,
-               nonnull_cnt,
-               max_cnt,
-               min_cnt;
-   FmgrInfo    f_cmpeq,
-               f_cmplt,
-               f_cmpgt;
-   Oid         op_cmplt;
-   regproc     outfunc;
-   Oid         typelem;
-   bool        initialized;
-} VacAttrStats;
-
-typedef struct VRelListData
-{
-   Oid         vrl_relid;
-   struct VRelListData *vrl_next;
-} VRelListData;
-
-typedef VRelListData *VRelList;
-
-typedef struct VTupleLinkData
-{
-   ItemPointerData new_tid;
-   ItemPointerData this_tid;
-} VTupleLinkData;
-
-typedef VTupleLinkData *VTupleLink;
-
-typedef struct VTupleMoveData
-{
-   ItemPointerData tid;        /* tuple ID */
-   VacPage     vacpage;        /* where to move */
-   bool        cleanVpd;       /* clean vacpage before using */
-} VTupleMoveData;
-
-typedef VTupleMoveData *VTupleMove;
-
-typedef struct VRelStats
-{
-   Oid         relid;
-   int         num_tuples;
-   int         num_pages;
-   Size        min_tlen;
-   Size        max_tlen;
-   bool        hasindex;
-   int         num_vtlinks;
-   VTupleLink  vtlinks;
-} VRelStats;
-
-extern bool VacuumRunning;
-
-extern void vc_abort(void);
-extern void vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols);
-extern void analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL);
-
-#define ATTNVALS_SCALE 1000000000      /* XXX so it can act as a float4 */
+/* in commands/vacuum.c */
+extern void vacuum(VacuumStmt *vacstmt);
+extern void vac_update_relstats(Oid relid, long num_pages, double num_tuples,
+                               bool hasindex);
+/* in commands/analyze.c */
+extern void analyze_rel(Oid relid, VacuumStmt *vacstmt);
  
  #endif  /* VACUUM_H */
diff --git a/src/include/config.h.in b/src/include/config.h.in

index 0d989dbbb3155bfaa218fce2d6181c45921191de..01593a4ce963a05484b025e5206f27d8b2bd952b 100644 (file)
--- a/src/include/config.h.in
+++ b/src/include/config.h.in
@@ -8,7 +8,7 @@
   * or in config.h afterwards.  Of course, if you edit config.h, then your
   * changes will be overwritten the next time you run configure.
   *
- * $Id: config.h.in,v 1.162 2001/04/14 22:55:02 petere Exp $
+ * $Id: config.h.in,v 1.163 2001/05/07 00:43:25 tgl Exp $
   */
  
  #ifndef CONFIG_H
@@ -156,6 +156,11 @@
  #define INDEX_MAX_KEYS     16
  #define FUNC_MAX_ARGS      INDEX_MAX_KEYS
  
+/*
+ * System default value for pg_attribute.attstattarget
+ */
+#define DEFAULT_ATTSTATTARGET  10
+
  /*
   * Define this to make libpgtcl's "pg_result -assign" command process C-style
   * backslash sequences in returned tuple data and convert Postgres array
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h

index 2cf9378cf116426106be2cba0bb29d970e561c09..0967bef24ba9437360c5142ffc6f770107c9aa5a 100644 (file)
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: execnodes.h,v 1.57 2001/03/22 04:00:50 momjian Exp $
+ * $Id: execnodes.h,v 1.58 2001/05/07 00:43:25 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -628,7 +628,6 @@ typedef struct GroupState
   *  SortState information
   *
   *     sort_Done       indicates whether sort has been performed yet
- *     sort_Keys       scan key structures describing the sort keys
   *     tuplesortstate  private state of tuplesort.c
   * ----------------
   */
@@ -636,7 +635,6 @@ typedef struct SortState
  {
     CommonScanState csstate;    /* its first field is NodeTag */
     bool        sort_Done;
-   ScanKey     sort_Keys;
     void       *tuplesortstate;
  } SortState;
  
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h

index 1614d787bcb8d0ceac119c020b51ae18ffebd013..63b1b1046a8e71675ed81102c38134886a45f0bc 100644 (file)
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: parsenodes.h,v 1.126 2001/03/23 04:49:56 momjian Exp $
+ * $Id: parsenodes.h,v 1.127 2001/05/07 00:43:25 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -118,11 +118,12 @@ typedef struct AlterTableStmt
     NodeTag     type;
     char        subtype;        /*------------
                                  *  A = add column
-                                *  T = alter column
+                                *  T = alter column default
+                                *  S = alter column statistics
                                  *  D = drop column
                                  *  C = add constraint
                                  *  X = drop constraint
-                                *  E = add toast table,
+                                *  E = create toast table
                                  *  U = change owner
                                  *------------
                                  */
@@ -690,16 +691,20 @@ typedef struct ClusterStmt
  } ClusterStmt;
  
  /* ----------------------
- *     Vacuum Statement
+ *     Vacuum and Analyze Statements
+ *
+ * Even though these are nominally two statements, it's convenient to use
+ * just one node type for both.
   * ----------------------
   */
  typedef struct VacuumStmt
  {
     NodeTag     type;
-   bool        verbose;        /* print status info */
-   bool        analyze;        /* analyze data */
-   char       *vacrel;         /* table to vacuum */
-   List       *va_spec;        /* columns to analyse */
+   bool        vacuum;         /* do VACUUM step */
+   bool        analyze;        /* do ANALYZE step */
+   bool        verbose;        /* print progress info */
+   char       *vacrel;         /* name of single table to process, or NULL */
+   List       *va_cols;        /* list of column names, or NIL for all */
  } VacuumStmt;
  
  /* ----------------------
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h

index 3ae8e09f57a30468fdece0f7fe9098a3ca05653f..9e69ed60992a7b7307fcc79150eccd7a6f62f963 100644 (file)
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -10,7 +10,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: primnodes.h,v 1.53 2001/03/22 04:00:52 momjian Exp $
+ * $Id: primnodes.h,v 1.54 2001/05/07 00:43:25 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -45,8 +45,8 @@ typedef struct FunctionCache *FunctionCachePtr;
   * reskey and reskeyop are the execution-time representation of sorting.
   * reskey must be zero in any non-sort-key item.  The reskey of sort key
   * targetlist items for a sort plan node is 1,2,...,n for the n sort keys.
- * The reskeyop of each such targetlist item is the sort operator's
- * regproc OID.  reskeyop will be zero in non-sort-key items.
+ * The reskeyop of each such targetlist item is the sort operator's OID.
+ * reskeyop will be zero in non-sort-key items.
   *
   * Both reskey and reskeyop are typically zero during parse/plan stages.
   * The executor does not pay any attention to ressortgroupref.
@@ -62,7 +62,7 @@ typedef struct Resdom
     Index       ressortgroupref;
     /* nonzero if referenced by a sort/group clause */
     Index       reskey;         /* order of key in a sort (for those > 0) */
-   Oid         reskeyop;       /* sort operator's regproc Oid */
+   Oid         reskeyop;       /* sort operator's Oid */
     bool        resjunk;        /* set to true to eliminate the attribute
                                  * from final target list */
  } Resdom;
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h

index f643ef879689640186250b344d4734f80aa6dc49..c76d9b4af7136f23fdc022f53127925129760519 100644 (file)
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: relation.h,v 1.54 2001/03/22 04:00:53 momjian Exp $
+ * $Id: relation.h,v 1.55 2001/05/07 00:43:26 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -512,8 +512,8 @@ typedef struct RestrictInfo
     Oid         hashjoinoperator;       /* copy of clause operator */
  
     /* cache space for hashclause processing; -1 if not yet set */
-   Selectivity left_dispersion;/* dispersion of left side */
-   Selectivity right_dispersion;       /* dispersion of right side */
+   Selectivity left_bucketsize;        /* avg bucketsize of left side */
+   Selectivity right_bucketsize;       /* avg bucketsize of right side */
  } RestrictInfo;
  
  /*
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h

index 5caa576f0c0be002c3e1bc88a7ff75746f5c45b4..cbf6df063a3cc4ae782cab805acaaf80b9d2025f 100644 (file)
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: cost.h,v 1.38 2001/02/16 00:03:05 tgl Exp $
+ * $Id: cost.h,v 1.39 2001/05/07 00:43:26 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -64,7 +64,8 @@ extern void cost_mergejoin(Path *path, Path *outer_path, Path *inner_path,
                List *restrictlist,
                List *outersortkeys, List *innersortkeys);
  extern void cost_hashjoin(Path *path, Path *outer_path, Path *inner_path,
-             List *restrictlist, Selectivity innerdispersion);
+             List *restrictlist, Selectivity innerbucketsize);
+extern Selectivity estimate_hash_bucketsize(Query *root, Var *var);
  extern Cost cost_qual_eval(List *quals);
  extern void set_baserel_size_estimates(Query *root, RelOptInfo *rel);
  extern void set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h

index 5b71eded86fcac8f21a5732ef81d8906fd9263a3..0839feb4b2fe5c0d137a7705469acb3814779181 100644 (file)
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: pathnode.h,v 1.35 2001/03/22 04:00:54 momjian Exp $
+ * $Id: pathnode.h,v 1.36 2001/05/07 00:43:26 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -59,7 +59,7 @@ extern HashPath *create_hashjoin_path(RelOptInfo *joinrel,
                      Path *inner_path,
                      List *restrict_clauses,
                      List *hashclauses,
-                    Selectivity innerdispersion);
+                    Selectivity innerbucketsize);
  
  /*
   * prototypes for relnode.c
diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h

index f1c4aff1c804172da17b24a438551c0b631c98c0..6b35deed2867649e350da0c081a983eb0dec5821 100644 (file)
--- a/src/include/utils/lsyscache.h
+++ b/src/include/utils/lsyscache.h
@@ -6,7 +6,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: lsyscache.h,v 1.30 2001/03/22 04:01:13 momjian Exp $
+ * $Id: lsyscache.h,v 1.31 2001/05/07 00:43:26 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -21,8 +21,6 @@ extern AttrNumber get_attnum(Oid relid, char *attname);
  extern Oid get_atttype(Oid relid, AttrNumber attnum);
  extern bool get_attisset(Oid relid, char *attname);
  extern int32 get_atttypmod(Oid relid, AttrNumber attnum);
-extern double get_attdispersion(Oid relid, AttrNumber attnum,
-                 double min_estimate);
  extern RegProcedure get_opcode(Oid opno);
  extern char *get_opname(Oid opno);
  extern bool op_mergejoinable(Oid opno, Oid ltype, Oid rtype,
@@ -41,6 +39,14 @@ extern bool get_typbyval(Oid typid);
  extern void get_typlenbyval(Oid typid, int16 *typlen, bool *typbyval);
  extern char get_typstorage(Oid typid);
  extern Datum get_typdefault(Oid typid);
+extern bool get_attstatsslot(HeapTuple statstuple,
+                            Oid atttype, int32 atttypmod,
+                            int reqkind, Oid reqop,
+                            Datum **values, int *nvalues,
+                            float4 **numbers, int *nnumbers);
+extern void free_attstatsslot(Oid atttype,
+                             Datum *values, int nvalues,
+                             float4 *numbers, int nnumbers);
  
  #define TypeIsToastable(typid) (get_typstorage(typid) != 'p')
  
diff --git a/src/include/utils/syscache.h b/src/include/utils/syscache.h

index 8d4e2ae62c419658f44ec3f1adb9853a658ea2c6..342f7bf8a566b73e4f8393553ccb332ed067ed06 100644 (file)
--- a/src/include/utils/syscache.h
+++ b/src/include/utils/syscache.h
@@ -9,7 +9,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: syscache.h,v 1.29 2001/03/22 04:01:14 momjian Exp $
+ * $Id: syscache.h,v 1.30 2001/05/07 00:43:26 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -53,7 +53,7 @@
  #define RULEOID            22
  #define SHADOWNAME     23
  #define SHADOWSYSID        24
-#define STATRELID      25
+#define STATRELATT     25
  #define TYPENAME       26
  #define TYPEOID            27
  
diff --git a/src/include/utils/tuplesort.h b/src/include/utils/tuplesort.h

index 7f273776c36a26cc1e6b688b4a530f74a7c108f2..001761796e2492781d98aec7c8b311b4538e251a 100644 (file)
--- a/src/include/utils/tuplesort.h
+++ b/src/include/utils/tuplesort.h
@@ -13,7 +13,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: tuplesort.h,v 1.6 2001/01/24 19:43:29 momjian Exp $
+ * $Id: tuplesort.h,v 1.7 2001/05/07 00:43:26 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -36,8 +36,9 @@ typedef struct Tuplesortstate Tuplesortstate;
   */
  
  extern Tuplesortstate *tuplesort_begin_heap(TupleDesc tupDesc,
-                    int nkeys, ScanKey keys,
-                    bool randomAccess);
+                     int nkeys,
+                     Oid *sortOperators, AttrNumber *attNums,
+                     bool randomAccess);
  extern Tuplesortstate *tuplesort_begin_index(Relation indexRel,
                       bool enforceUnique,
                       bool randomAccess);
@@ -75,4 +76,19 @@ extern void tuplesort_rescan(Tuplesortstate *state);
  extern void tuplesort_markpos(Tuplesortstate *state);
  extern void tuplesort_restorepos(Tuplesortstate *state);
  
+/*
+ * This routine selects an appropriate sorting function to implement
+ * a sort operator as efficiently as possible.
+ */
+typedef enum
+{
+   SORTFUNC_LT,                /* raw "<" operator */
+   SORTFUNC_CMP,               /* -1 / 0 / 1 three-way comparator */
+   SORTFUNC_REVCMP             /* 1 / 0 / -1 (reversed) 3-way comparator */
+} SortFunctionKind;
+
+extern void SelectSortFunction(Oid sortOperator,
+                              RegProcedure *sortFunction,
+                              SortFunctionKind *kind);
+
  #endif  /* TUPLESORT_H */
diff --git a/src/interfaces/ecpg/preproc/keywords.c b/src/interfaces/ecpg/preproc/keywords.c

index 5614a34b0fef7390ba8ec0a4184fea10da5e7d69..c03880f497d0d62526a94157175fede654376f28 100644 (file)
--- a/src/interfaces/ecpg/preproc/keywords.c
+++ b/src/interfaces/ecpg/preproc/keywords.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/interfaces/ecpg/preproc/keywords.c,v 1.39 2001/03/22 04:01:21 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/interfaces/ecpg/preproc/keywords.c,v 1.40 2001/05/07 00:43:26 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -238,6 +238,7 @@ static ScanKeyword ScanKeywords[] = {
     {"some", SOME},
     {"start", START},
     {"statement", STATEMENT},
+   {"statistics", STATISTICS},
     {"stdin", STDIN},
     {"stdout", STDOUT},
     {"substring", SUBSTRING},
diff --git a/src/interfaces/ecpg/preproc/preproc.y b/src/interfaces/ecpg/preproc/preproc.y

index 345efb6576e2ddd8ff944993799a816b12bd34c8..91708bd91fae24f446576cacaea6ccbc1028163d 100644 (file)
--- a/src/interfaces/ecpg/preproc/preproc.y
+++ b/src/interfaces/ecpg/preproc/preproc.y
@@ -134,7 +134,7 @@ make_name(void)
  
  %union {
     double                  dval;
-        int                     ival;
+   int                     ival;
     char *                  str;
     struct when             action;
     struct index        index;
@@ -224,7 +224,7 @@ make_name(void)
         NONE, NOTHING, NOTIFY, NOTNULL, OFFSET, OIDS,
         OPERATOR, OWNER, PASSWORD, PROCEDURAL, REINDEX, RENAME, RESET,
         RETURNS, ROW, RULE, SEQUENCE, SERIAL, SETOF, SHARE,
-       SHOW, START, STATEMENT, STDIN, STDOUT, SYSID TEMP,
+       SHOW, START, STATEMENT, STATISTICS, STDIN, STDOUT, SYSID TEMP,
         TEMPLATE, TOAST, TRUNCATE, TRUSTED, UNLISTEN, UNTIL, VACUUM,
         VALID, VERBOSE, VERSION
  
@@ -285,7 +285,7 @@ make_name(void)
  %type      file_name AexprConst ParamNo c_expr ConstTypename
  %type     in_expr_nodes a_expr b_expr TruncateStmt CommentStmt
  %type     opt_indirection expr_list extract_list extract_arg
-%type     position_list substr_list substr_from alter_column_action
+%type     position_list substr_list substr_from alter_column_default
  %type     trim_list in_expr substr_for attr attrs drop_behavior
  %type     Typename SimpleTypename Generic Numeric generic opt_float opt_numeric
  %type     opt_decimal Character character opt_varying opt_charset
@@ -293,7 +293,7 @@ make_name(void)
  %type     row_expr row_descriptor row_list ConstDatetime opt_chain
  %type     SelectStmt into_clause OptTemp ConstraintAttributeSpec
  %type     opt_table opt_all sort_clause sortby_list ConstraintAttr 
-%type     sortby OptUseOp opt_inh_star relation_name_list name_list
+%type     sortby OptUseOp relation_name_list name_list
  %type     group_clause having_clause from_clause opt_distinct
  %type     join_outer where_clause relation_expr sub_type opt_arg
  %type     opt_column_list insert_rest InsertStmt OptimizableStmt
@@ -301,8 +301,8 @@ make_name(void)
  %type      NotifyStmt columnElem copy_dirn UnlistenStmt copy_null
  %type      copy_delimiter ListenStmt CopyStmt copy_file_name opt_binary
  %type      opt_with_copy FetchStmt direction fetch_how_many from_in
-%type      ClosePortalStmt DropStmt VacuumStmt opt_verbose func_arg
-%type      opt_analyze opt_va_list va_list ExplainStmt index_params
+%type      ClosePortalStmt DropStmt VacuumStmt AnalyzeStmt opt_verbose func_arg
+%type      analyze_keyword opt_name_list ExplainStmt index_params
  %type      index_list func_index index_elem opt_class access_method_clause
  %type      index_opt_unique IndexStmt func_return ConstInterval
  %type      func_args_list func_args opt_with ProcedureStmt def_arg
@@ -329,7 +329,7 @@ make_name(void)
  %type     opt_cursor opt_lmode ConstraintsSetStmt comment_tg AllConst
  %type     case_expr when_clause_list case_default case_arg when_clause
  %type      select_clause opt_select_limit select_limit_value ConstraintTimeSpec
-%type      select_offset_value ReindexStmt join_type opt_only opt_boolean
+%type      select_offset_value ReindexStmt join_type opt_boolean
  %type     join_qual update_list AlterSchemaStmt joined_table
  %type     opt_level opt_lock lock_type users_in_new_group_clause
  %type      OptConstrFromTable comment_op OptTempTableName StringConst
@@ -447,6 +447,7 @@ stmt:  AlterSchemaStmt          { output_statement($1, 0, NULL, connection); }
         | CreatedbStmt      { output_statement($1, 0, NULL, connection); }
         | DropdbStmt        { output_statement($1, 0, NULL, connection); }
         | VacuumStmt        { output_statement($1, 0, NULL, connection); }
+       | AnalyzeStmt       { output_statement($1, 0, NULL, connection); }
         | VariableSetStmt   { output_statement($1, 0, NULL, connection); }
         | VariableShowStmt  { output_statement($1, 0, NULL, connection); }
         | VariableResetStmt { output_statement($1, 0, NULL, connection); }
@@ -908,40 +909,41 @@ CheckPointStmt: CHECKPOINT     { $$= make_str("checkpoint"); }
  
  
  /*****************************************************************************
- *
- *     QUERY :
   *
   * ALTER TABLE variations
   *
   *****************************************************************************/
  
  AlterTableStmt:
-/* ALTER TABLE  ADD [COLUMN]  */
-        ALTER TABLE relation_name opt_inh_star ADD opt_column columnDef
+/* ALTER TABLE  ADD [COLUMN]  */
+        ALTER TABLE relation_expr ADD opt_column columnDef
+       {
+           $$ = cat_str(5, make_str("alter table"), $3, make_str("add"), $5, $6);
+       }
+/* ALTER TABLE  ALTER [COLUMN]  {SET DEFAULT |DROP DEFAULT} */
+   | ALTER TABLE relation_expr ALTER opt_column ColId alter_column_default
         {
-           $$ = cat_str(6, make_str("alter table"), $3, $4, make_str("add"), $6, $7);
+           $$ = cat_str(6, make_str("alter table"), $3, make_str("alter"), $5, $6, $7);
         }
-/* ALTER TABLE  ALTER [COLUMN]  {SET DEFAULT |DROP
-DEFAULT} */
-   | ALTER TABLE relation_name opt_inh_star ALTER opt_column ColId
-       alter_column_action
+/* ALTER TABLE  ALTER [COLUMN]  SET STATISTICS  */
+   | ALTER TABLE relation_expr ALTER opt_column ColId SET STATISTICS Iconst
         {
-           $$ = cat_str(7, make_str("alter table"), $3, $4, make_str("alter"), $6, $7, $8);
+           $$ = cat_str(7, make_str("alter table"), $3, make_str("alter"), $5, $6, make_str("set statistics"), $9);
         }
-/* ALTER TABLE <name> DROP [COLUMN] <name> {RESTRICT|CASCADE} */
-   | ALTER TABLE relation_name opt_inh_star DROP opt_column ColId drop_behavior
+/* ALTER TABLE <relation> DROP [COLUMN] name> {RESTRICT|CASCADE} */
+   | ALTER TABLE relation_expr DROP opt_column ColId drop_behavior
         {
-           $$ = cat_str(7, make_str("alter table"), $3, $4, make_str("drop"), $6, $7, $8);
+           $$ = cat_str(6, make_str("alter table"), $3, make_str("drop"), $5, $6, $7);
         }
-/* ALTER TABLE <name> ADD CONSTRAINT ... */
-   | ALTER TABLE relation_name opt_inh_star ADD TableConstraint
+/* ALTER TABLE <relation> ADD CONSTRAINT ... */
+   | ALTER TABLE relation_expr ADD TableConstraint
         {
-           $$ = cat_str(5, make_str("alter table"), $3, $4, make_str("add"), $6);
+           $$ = cat_str(4, make_str("alter table"), $3, make_str("add"), $5);
         }
-/* ALTER TABLE <name> DROP CONSTRAINT ... */
-   | ALTER TABLE relation_name opt_inh_star DROP CONSTRAINT name drop_behavior
+/* ALTER TABLE <relation> DROP CONSTRAINT ... */
+   | ALTER TABLE relation_expr DROP CONSTRAINT name drop_behavior
         {
-           $$ = cat_str(6, make_str("alter table"), $3, $4, make_str("drop constraint"), $7, $8);
+           $$ = cat_str(5, make_str("alter table"), $3, make_str("drop constraint"), $6, $7);
         }
  /* ALTER TABLE  OWNER TO UserId */     
     | ALTER TABLE relation_name OWNER TO UserId   
@@ -950,7 +952,7 @@ DEFAULT} */
         }
         ;
  
-alter_column_action:
+alter_column_default:
          SET DEFAULT a_expr { $$ = cat2_str(make_str("set default"), $3); }
          | DROP DEFAULT          { $$ = make_str("drop default"); }
          ;
@@ -1234,10 +1236,6 @@ key_reference:  NO ACTION    { $$ = make_str("no action"); }
         | SET NULL_P    { $$ = make_str("set null"); }
         ;
  
-opt_only: ONLY     { $$ = make_str("only"); }
-   | /*EMPTY*/ { $$ = EMPTY; }
-   ;
-
  OptInherit:  INHERITS '(' relation_name_list ')'                { $$ = cat_str(3, make_str("inherits ("), $3, make_str(")")); }
                  | /*EMPTY*/                    { $$ = EMPTY; }
                  ;      
@@ -2013,10 +2011,9 @@ opt_force:      FORCE        { $$ = make_str("force"); }
   *
   *****************************************************************************/
  
-RenameStmt:  ALTER TABLE relation_name opt_inh_star
-                 RENAME opt_column opt_name TO name
+RenameStmt:  ALTER TABLE relation_expr RENAME opt_column opt_name TO name
                 {
-                   $$ = cat_str(8, make_str("alter table"), $3, $4, make_str("rename"), $6, $7, make_str("to"), $9);
+                   $$ = cat_str(7, make_str("alter table"), $3, make_str("rename"), $5, $6, make_str("to"), $8);
                 }
         ;
  
@@ -2250,38 +2247,44 @@ ClusterStmt:  CLUSTER index_name ON relation_name
   *
   *     QUERY:
   *             vacuum
+ *             analyze
   *
   *****************************************************************************/
  
-VacuumStmt:  VACUUM opt_verbose opt_analyze
+VacuumStmt:  VACUUM opt_verbose
+               {
+                   $$ = cat_str(2, make_str("vacuum"), $2);
+               }
+       | VACUUM opt_verbose relation_name
                 {
                     $$ = cat_str(3, make_str("vacuum"), $2, $3);
                 }
-       | VACUUM opt_verbose opt_analyze relation_name opt_va_list
+       | VACUUM opt_verbose AnalyzeStmt
                 {
-                   if ( strlen($5) > 0 && strlen($4) == 0 )
-                       mmerror(ET_ERROR, "VACUUM syntax error at or near \"(\"\n\tRelations name must be specified");
-                   $$ = cat_str(5, make_str("vacuum"), $2, $3, $4, $5);
+                   $$ = cat_str(3, make_str("vacuum"), $2, $3);
                 }
         ;
  
-opt_verbose:  VERBOSE                  { $$ = make_str("verbose"); }
-       | /*EMPTY*/             { $$ = EMPTY; }
+AnalyzeStmt:  analyze_keyword opt_verbose
+               {
+                   $$ = cat_str(2, $1, $2);
+               }
+       | analyze_keyword opt_verbose relation_name opt_name_list
+               {
+                   $$ = cat_str(4, $1, $2, $3, $4);
+               }
         ;
  
-opt_analyze:  ANALYZE                  { $$ = make_str("analyze"); }
-       | ANALYSE               { $$ = make_str("analyse"); }
-       | /*EMPTY*/             { $$ = EMPTY; }
+analyze_keyword:  ANALYZE                  { $$ = make_str("analyze"); }
+       | ANALYSE                           { $$ = make_str("analyse"); }
         ;
  
-opt_va_list:  '(' va_list ')'              { $$ = cat_str(3, make_str("("), $2, make_str(")")); }
+opt_verbose:  VERBOSE                  { $$ = make_str("verbose"); }
         | /*EMPTY*/             { $$ = EMPTY; }
         ;
  
-va_list:  name
-               { $$=$1; }
-       | va_list ',' name
-               { $$=cat_str(3, $1, make_str(","), $3); }
+opt_name_list:  '(' name_list ')'      { $$ = cat_str(3, make_str("("), $2, make_str(")")); }
+       | /*EMPTY*/             { $$ = EMPTY; }
         ;
  
  
@@ -2383,9 +2386,9 @@ columnElem:  ColId opt_indirection
   *
   *****************************************************************************/
  
-DeleteStmt:  DELETE FROM opt_only name where_clause
+DeleteStmt:  DELETE FROM relation_expr where_clause
                 {
-                   $$ = cat_str(4, make_str("delete from"), $3, $4, $5);
+                   $$ = cat_str(3, make_str("delete from"), $3, $4);
                 }
         ;
  
@@ -2416,12 +2419,12 @@ opt_lmode:      SHARE                           { $$ = make_str("share"); }
   *
   *****************************************************************************/
  
-UpdateStmt:  UPDATE opt_only relation_name
+UpdateStmt:  UPDATE relation_expr
               SET update_target_list
               from_clause
               where_clause
                 {
-                   $$ = cat_str(7, make_str("update"), $2, $3, make_str("set"), $5, $6, $7);
+                   $$ = cat_str(6, make_str("update"), $2, make_str("set"), $4, $5, $6);
                 }
         ;
  
@@ -2667,10 +2670,6 @@ select_offset_value:     PosIntConst {
   * ...however, recursive addattr and rename supported.  make special
   * cases for these.
   */
-opt_inh_star:  '*'                 { $$ = make_str("*"); }
-       | /*EMPTY*/             { $$ = EMPTY; }
-       ;
-
  relation_name_list:  name_list { $$ = $1; };
  
  name_list:  name
@@ -2704,7 +2703,7 @@ opt_for_update_clause: for_update_clause                { $$ = $1; }
         | /* EMPTY */               { $$ = EMPTY; }
                  ;
  
-update_list:  OF va_list
+update_list:  OF name_list
                {
             $$ = cat2_str(make_str("of"), $2);
           }
@@ -5028,6 +5027,7 @@ TokenId:  ABSOLUTE            { $$ = make_str("absolute"); }
     | SHARE             { $$ = make_str("share"); }
     | START             { $$ = make_str("start"); }
     | STATEMENT         { $$ = make_str("statement"); }
+   | STATISTICS        { $$ = make_str("statistics"); }
     | STDIN                         { $$ = make_str("stdin"); }
     | STDOUT                        { $$ = make_str("stdout"); }
     | SYSID                         { $$ = make_str("sysid"); }
diff --git a/src/test/regress/expected/oidjoins.out b/src/test/regress/expected/oidjoins.out

index a2b0ad9e3e722827117e707ae7033a210771e9b4..46bc60f6955d60e4a52170d4b7281048b146d97d 100644 (file)
--- a/src/test/regress/expected/oidjoins.out
+++ b/src/test/regress/expected/oidjoins.out
@@ -353,12 +353,28 @@ WHERE pg_statistic.starelid != 0 AND
  -----+----------
  (0 rows)
  
-SELECT oid, pg_statistic.staop 
+SELECT oid, pg_statistic.staop1 
  FROM   pg_statistic 
-WHERE  pg_statistic.staop != 0 AND 
-   NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop);
- oid | staop 
------+-------
+WHERE  pg_statistic.staop1 != 0 AND 
+   NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop1);
+ oid | staop1 
+-----+--------
+(0 rows)
+
+SELECT oid, pg_statistic.staop2 
+FROM   pg_statistic 
+WHERE  pg_statistic.staop2 != 0 AND 
+   NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop2);
+ oid | staop2 
+-----+--------
+(0 rows)
+
+SELECT oid, pg_statistic.staop3 
+FROM   pg_statistic 
+WHERE  pg_statistic.staop3 != 0 AND 
+   NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop3);
+ oid | staop3 
+-----+--------
  (0 rows)
  
  SELECT oid, pg_trigger.tgrelid 
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out

index 9d4ff1b39856b3d468938ef709578649fe4d84ce..1b094a6e3bfe2f58a8e7b108c0088131a36feb35 100644 (file)
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -482,8 +482,8 @@ WHERE p1.aggtransfn = p2.oid AND
            (p2.pronargs = 1 AND p1.aggbasetype = 0)));
    oid  | aggname | oid |   proname   
  -------+---------+-----+-------------
- 16997 | max     | 768 | int4larger
- 17011 | min     | 769 | int4smaller
+ 17010 | max     | 768 | int4larger
+ 17024 | min     | 769 | int4smaller
  (2 rows)
  
  -- Cross-check finalfn (if present) against its entry in pg_proc.
diff --git a/src/test/regress/sql/oidjoins.sql b/src/test/regress/sql/oidjoins.sql

index b7ea1f63eaa8268d3583a670e9f3985619be0453..88727a6c76ec6922fc12f4456fba2dc650570f0a 100644 (file)
--- a/src/test/regress/sql/oidjoins.sql
+++ b/src/test/regress/sql/oidjoins.sql
@@ -177,10 +177,18 @@ SELECT    oid, pg_statistic.starelid
  FROM   pg_statistic 
  WHERE  pg_statistic.starelid != 0 AND 
     NOT EXISTS(SELECT * FROM pg_class AS t1 WHERE t1.oid = pg_statistic.starelid);
-SELECT oid, pg_statistic.staop 
+SELECT oid, pg_statistic.staop1 
  FROM   pg_statistic 
-WHERE  pg_statistic.staop != 0 AND 
-   NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop);
+WHERE  pg_statistic.staop1 != 0 AND 
+   NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop1);
+SELECT oid, pg_statistic.staop2 
+FROM   pg_statistic 
+WHERE  pg_statistic.staop2 != 0 AND 
+   NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop2);
+SELECT oid, pg_statistic.staop3 
+FROM   pg_statistic 
+WHERE  pg_statistic.staop3 != 0 AND 
+   NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop3);
  SELECT oid, pg_trigger.tgrelid 
  FROM   pg_trigger 
  WHERE  pg_trigger.tgrelid != 0 AND
author	Tom Lane
	Mon, 7 May 2001 00:43:27 +0000 (00:43 +0000)
committer	Tom Lane
	Mon, 7 May 2001 00:43:27 +0000 (00:43 +0000)
doc/src/sgml/catalogs.sgml		patch \| blob \| blame \| history
doc/src/sgml/indices.sgml		patch \| blob \| blame \| history
doc/src/sgml/ref/allfiles.sgml		patch \| blob \| blame \| history
doc/src/sgml/ref/alter_table.sgml		patch \| blob \| blame \| history
doc/src/sgml/ref/analyze.sgml	[new file with mode: 0644]	patch \| blob
doc/src/sgml/ref/vacuum.sgml		patch \| blob \| blame \| history
doc/src/sgml/reference.sgml		patch \| blob \| blame \| history
doc/src/sgml/xoper.sgml		patch \| blob \| blame \| history
src/backend/access/common/tupdesc.c		patch \| blob \| blame \| history
src/backend/access/gist/gist.c		patch \| blob \| blame \| history
src/backend/access/hash/hash.c		patch \| blob \| blame \| history
src/backend/access/heap/tuptoaster.c		patch \| blob \| blame \| history
src/backend/access/nbtree/nbtree.c		patch \| blob \| blame \| history
src/backend/access/rtree/rtree.c		patch \| blob \| blame \| history
src/backend/catalog/genbki.sh		patch \| blob \| blame \| history
src/backend/catalog/heap.c		patch \| blob \| blame \| history
src/backend/catalog/index.c		patch \| blob \| blame \| history
src/backend/commands/analyze.c		patch \| blob \| blame \| history
src/backend/commands/command.c		patch \| blob \| blame \| history
src/backend/commands/vacuum.c		patch \| blob \| blame \| history
src/backend/executor/nodeSort.c		patch \| blob \| blame \| history
src/backend/nodes/copyfuncs.c		patch \| blob \| blame \| history
src/backend/nodes/equalfuncs.c		patch \| blob \| blame \| history
src/backend/nodes/readfuncs.c		patch \| blob \| blame \| history
src/backend/optimizer/path/costsize.c		patch \| blob \| blame \| history
src/backend/optimizer/path/joinpath.c		patch \| blob \| blame \| history
src/backend/optimizer/plan/createplan.c		patch \| blob \| blame \| history
src/backend/optimizer/plan/initsplan.c		patch \| blob \| blame \| history
src/backend/optimizer/plan/planner.c		patch \| blob \| blame \| history
src/backend/optimizer/prep/prepunion.c		patch \| blob \| blame \| history
src/backend/optimizer/util/pathnode.c		patch \| blob \| blame \| history
src/backend/optimizer/util/plancat.c		patch \| blob \| blame \| history
src/backend/parser/analyze.c		patch \| blob \| blame \| history
src/backend/parser/gram.y		patch \| blob \| blame \| history
src/backend/parser/keywords.c		patch \| blob \| blame \| history
src/backend/parser/parse_relation.c		patch \| blob \| blame \| history
src/backend/tcop/utility.c		patch \| blob \| blame \| history
src/backend/utils/adt/selfuncs.c		patch \| blob \| blame \| history
src/backend/utils/cache/lsyscache.c		patch \| blob \| blame \| history
src/backend/utils/cache/syscache.c		patch \| blob \| blame \| history
src/backend/utils/sort/tuplesort.c		patch \| blob \| blame \| history
src/include/access/tuptoaster.h		patch \| blob \| blame \| history
src/include/catalog/catversion.h		patch \| blob \| blame \| history
src/include/catalog/heap.h		patch \| blob \| blame \| history
src/include/catalog/index.h		patch \| blob \| blame \| history
src/include/catalog/indexing.h		patch \| blob \| blame \| history
src/include/catalog/pg_attribute.h		patch \| blob \| blame \| history
src/include/catalog/pg_class.h		patch \| blob \| blame \| history
src/include/catalog/pg_statistic.h		patch \| blob \| blame \| history
src/include/commands/command.h		patch \| blob \| blame \| history
src/include/commands/vacuum.h		patch \| blob \| blame \| history
src/include/config.h.in		patch \| blob \| blame \| history
src/include/nodes/execnodes.h		patch \| blob \| blame \| history
src/include/nodes/parsenodes.h		patch \| blob \| blame \| history
src/include/nodes/primnodes.h		patch \| blob \| blame \| history
src/include/nodes/relation.h		patch \| blob \| blame \| history
src/include/optimizer/cost.h		patch \| blob \| blame \| history
src/include/optimizer/pathnode.h		patch \| blob \| blame \| history
src/include/utils/lsyscache.h		patch \| blob \| blame \| history
src/include/utils/syscache.h		patch \| blob \| blame \| history
src/include/utils/tuplesort.h		patch \| blob \| blame \| history
src/interfaces/ecpg/preproc/keywords.c		patch \| blob \| blame \| history
src/interfaces/ecpg/preproc/preproc.y		patch \| blob \| blame \| history
src/test/regress/expected/oidjoins.out		patch \| blob \| blame \| history
src/test/regress/expected/opr_sanity.out		patch \| blob \| blame \| history
src/test/regress/sql/oidjoins.sql		patch \| blob \| blame \| history