vacuumdb: Add option for analyzing only relations missing stats.
authorNathan Bossart
Tue, 18 Mar 2025 21:32:56 +0000 (16:32 -0500)
committerNathan Bossart
Tue, 18 Mar 2025 21:32:56 +0000 (16:32 -0500)
This commit adds a new --missing-stats-only option that can be used
with --analyze-only or --analyze-in-stages.  When this option is
specified, vacuumdb will analyze a relation if it lacks any
statistics for a column, expression index, or extended statistics
object.  This new option is primarily intended for use after
pg_upgrade (since it can now retain most optimizer statistics), but
it might be useful in other situations, too.

Author: Corey Huinker 
Co-authored-by: Nathan Bossart
Reviewed-by: John Naylor
Discussion: https://postgr.es/m/Z5O1bpcwDrMgyrYy%40nathan

doc/src/sgml/ref/vacuumdb.sgml
src/bin/scripts/t/100_vacuumdb.pl
src/bin/scripts/vacuumdb.c
src/test/perl/PostgreSQL/Test/Cluster.pm

index 66fccb30a2d26013abb9750be8a56185c10ebe1d..d102f9d48406c18512197989d643db6b9df8920b 100644 (file)
@@ -277,6 +277,22 @@ PostgreSQL documentation
       
      
 
+     
+      
+      
+       
+        Only analyze relations that are missing statistics for a column, index
+        expression, or extended statistics object.  This option prevents
+        vacuumdb from deleting existing statistics
+        so that the query optimizer's choices do not become transiently worse.
+       
+       
+        This option can only be used in conjunction with
+         or .
+       
+      
+     
+
      
       
       
index 2d174df9aaee4b40077ca82b5019418d1a8c26f1..bb56b35349208df79bec9f3bd883ea532680cf07 100644 (file)
@@ -237,4 +237,64 @@ $node->command_fails_like(
    qr/cannot vacuum all databases and a specific one at the same time/,
    'cannot use option --all and a dbname as argument at the same time');
 
+$node->safe_psql('postgres',
+   'CREATE TABLE regression_vacuumdb_test AS select generate_series(1, 10) a, generate_series(2, 11) b;');
+$node->issues_sql_like(
+   [ 'vacuumdb', '--analyze-only', '--missing-stats-only', '-t', 'regression_vacuumdb_test', 'postgres' ],
+   qr/statement:\ ANALYZE/sx,
+   '--missing-stats-only with missing stats');
+$node->issues_sql_unlike(
+   [ 'vacuumdb', '--analyze-only', '--missing-stats-only', '-t', 'regression_vacuumdb_test', 'postgres' ],
+   qr/statement:\ ANALYZE/sx,
+   '--missing-stats-only with no missing stats');
+
+$node->safe_psql('postgres',
+   'CREATE INDEX regression_vacuumdb_test_idx ON regression_vacuumdb_test (mod(a, 2));');
+$node->issues_sql_like(
+   [ 'vacuumdb', '--analyze-in-stages', '--missing-stats-only', '-t', 'regression_vacuumdb_test', 'postgres' ],
+   qr/statement:\ ANALYZE/sx,
+   '--missing-stats-only with missing index expression stats');
+$node->issues_sql_unlike(
+   [ 'vacuumdb', '--analyze-in-stages', '--missing-stats-only', '-t', 'regression_vacuumdb_test', 'postgres' ],
+   qr/statement:\ ANALYZE/sx,
+   '--missing-stats-only with no missing index expression stats');
+
+$node->safe_psql('postgres',
+   'CREATE STATISTICS regression_vacuumdb_test_stat ON a, b FROM regression_vacuumdb_test;');
+$node->issues_sql_like(
+   [ 'vacuumdb', '--analyze-only', '--missing-stats-only', '-t', 'regression_vacuumdb_test', 'postgres' ],
+   qr/statement:\ ANALYZE/sx,
+   '--missing-stats-only with missing extended stats');
+$node->issues_sql_unlike(
+   [ 'vacuumdb', '--analyze-only', '--missing-stats-only', '-t', 'regression_vacuumdb_test', 'postgres' ],
+   qr/statement:\ ANALYZE/sx,
+   '--missing-stats-only with no missing extended stats');
+
+$node->safe_psql('postgres',
+   "CREATE TABLE regression_vacuumdb_child (a INT) INHERITS (regression_vacuumdb_test);\n"
+   . "INSERT INTO regression_vacuumdb_child VALUES (1, 2);\n"
+   . "ANALYZE regression_vacuumdb_child;\n");
+$node->issues_sql_like(
+   [ 'vacuumdb', '--analyze-in-stages', '--missing-stats-only', '-t', 'regression_vacuumdb_test', 'postgres' ],
+   qr/statement:\ ANALYZE/sx,
+   '--missing-stats-only with missing inherited stats');
+$node->issues_sql_unlike(
+   [ 'vacuumdb', '--analyze-in-stages', '--missing-stats-only', '-t', 'regression_vacuumdb_test', 'postgres' ],
+   qr/statement:\ ANALYZE/sx,
+   '--missing-stats-only with no missing inherited stats');
+
+$node->safe_psql('postgres',
+   "CREATE TABLE regression_vacuumdb_parted (a INT) PARTITION BY LIST (a);\n"
+   . "CREATE TABLE regression_vacuumdb_part1 PARTITION OF regression_vacuumdb_parted FOR VALUES IN (1);\n"
+   . "INSERT INTO regression_vacuumdb_parted VALUES (1);\n"
+   . "ANALYZE regression_vacuumdb_part1;\n");
+$node->issues_sql_like(
+   [ 'vacuumdb', '--analyze-only', '--missing-stats-only', '-t', 'regression_vacuumdb_parted', 'postgres' ],
+   qr/statement:\ ANALYZE/sx,
+   '--missing-stats-only with missing partition stats');
+$node->issues_sql_unlike(
+   [ 'vacuumdb', '--analyze-only', '--missing-stats-only', '-t', 'regression_vacuumdb_parted', 'postgres' ],
+   qr/statement:\ ANALYZE/sx,
+   '--missing-stats-only with no missing partition stats');
+
 done_testing();
index e28f82a0ebaf449cbf5f4014a3b6c40f0af3224f..935e6da3c179a898e34da983a9079baf105b54b6 100644 (file)
@@ -47,6 +47,7 @@ typedef struct vacuumingOptions
    bool        process_toast;
    bool        skip_database_stats;
    char       *buffer_usage_limit;
+   bool        missing_stats_only;
 } vacuumingOptions;
 
 /* object filter options */
@@ -134,6 +135,7 @@ main(int argc, char *argv[])
        {"no-process-toast", no_argument, NULL, 11},
        {"no-process-main", no_argument, NULL, 12},
        {"buffer-usage-limit", required_argument, NULL, 13},
+       {"missing-stats-only", no_argument, NULL, 14},
        {NULL, 0, NULL, 0}
    };
 
@@ -281,6 +283,9 @@ main(int argc, char *argv[])
            case 13:
                vacopts.buffer_usage_limit = escape_quotes(optarg);
                break;
+           case 14:
+               vacopts.missing_stats_only = true;
+               break;
            default:
                /* getopt_long already emitted a complaint */
                pg_log_error_hint("Try \"%s --help\" for more information.", progname);
@@ -366,6 +371,14 @@ main(int argc, char *argv[])
        pg_fatal("cannot use the \"%s\" option with the \"%s\" option",
                 "buffer-usage-limit", "full");
 
+   /*
+    * Prohibit --missing-stats-only without --analyze-only or
+    * --analyze-in-stages.
+    */
+   if (vacopts.missing_stats_only && !vacopts.analyze_only)
+       pg_fatal("cannot use the \"%s\" option without \"%s\" or \"%s\"",
+                "missing-stats-only", "analyze-only", "analyze-in-stages");
+
    /* fill cparams except for dbname, which is set below */
    cparams.pghost = host;
    cparams.pgport = port;
@@ -406,12 +419,14 @@ main(int argc, char *argv[])
        if (analyze_in_stages)
        {
            int         stage;
+           SimpleStringList *found_objs = NULL;
 
            for (stage = 0; stage < ANALYZE_NUM_STAGES; stage++)
            {
                vacuum_one_database(&cparams, &vacopts,
                                    stage,
-                                   &objects, NULL,
+                                   &objects,
+                                   vacopts.missing_stats_only ? &found_objs : NULL,
                                    concurrentCons,
                                    progname, echo, quiet);
            }
@@ -614,6 +629,13 @@ vacuum_one_database(ConnParams *cparams,
                 "--buffer-usage-limit", "16");
    }
 
+   if (vacopts->missing_stats_only && PQserverVersion(conn) < 150000)
+   {
+       PQfinish(conn);
+       pg_fatal("cannot use the \"%s\" option on server versions older than PostgreSQL %s",
+                "--missing-stats-only", "15");
+   }
+
    /* skip_database_stats is used automatically if server supports it */
    vacopts->skip_database_stats = (PQserverVersion(conn) >= 160000);
 
@@ -838,6 +860,9 @@ retrieve_objects(PGconn *conn, vacuumingOptions *vacopts,
                         " FROM pg_catalog.pg_class c\n"
                         " JOIN pg_catalog.pg_namespace ns"
                         " ON c.relnamespace OPERATOR(pg_catalog.=) ns.oid\n"
+                        " CROSS JOIN LATERAL (SELECT c.relkind IN ("
+                        CppAsString2(RELKIND_PARTITIONED_TABLE) ", "
+                        CppAsString2(RELKIND_PARTITIONED_INDEX) ")) as p (inherited)\n"
                         " LEFT JOIN pg_catalog.pg_class t"
                         " ON c.reltoastrelid OPERATOR(pg_catalog.=) t.oid\n");
 
@@ -921,6 +946,84 @@ retrieve_objects(PGconn *conn, vacuumingOptions *vacopts,
                          vacopts->min_mxid_age);
    }
 
+   if (vacopts->missing_stats_only)
+   {
+       appendPQExpBufferStr(&catalog_query, " AND (\n");
+
+       /* regular stats */
+       appendPQExpBufferStr(&catalog_query,
+                            " EXISTS (SELECT NULL FROM pg_catalog.pg_attribute a\n"
+                            " WHERE a.attrelid OPERATOR(pg_catalog.=) c.oid\n"
+                            " AND c.reltuples OPERATOR(pg_catalog.!=) 0::pg_catalog.float4\n"
+                            " AND a.attnum OPERATOR(pg_catalog.>) 0::pg_catalog.int2\n"
+                            " AND NOT a.attisdropped\n"
+                            " AND a.attstattarget IS DISTINCT FROM 0::pg_catalog.int2\n"
+                            " AND NOT EXISTS (SELECT NULL FROM pg_catalog.pg_statistic s\n"
+                            " WHERE s.starelid OPERATOR(pg_catalog.=) a.attrelid\n"
+                            " AND s.staattnum OPERATOR(pg_catalog.=) a.attnum\n"
+                            " AND s.stainherit OPERATOR(pg_catalog.=) p.inherited))\n");
+
+       /* extended stats */
+       appendPQExpBufferStr(&catalog_query,
+                            " OR EXISTS (SELECT NULL FROM pg_catalog.pg_statistic_ext e\n"
+                            " WHERE e.stxrelid OPERATOR(pg_catalog.=) c.oid\n"
+                            " AND c.reltuples OPERATOR(pg_catalog.!=) 0::pg_catalog.float4\n"
+                            " AND e.stxstattarget IS DISTINCT FROM 0::pg_catalog.int2\n"
+                            " AND NOT EXISTS (SELECT NULL FROM pg_catalog.pg_statistic_ext_data d\n"
+                            " WHERE d.stxoid OPERATOR(pg_catalog.=) e.oid\n"
+                            " AND d.stxdinherit OPERATOR(pg_catalog.=) p.inherited))\n");
+
+       /* expression indexes */
+       appendPQExpBufferStr(&catalog_query,
+                            " OR EXISTS (SELECT NULL FROM pg_catalog.pg_attribute a\n"
+                            " JOIN pg_catalog.pg_index i"
+                            " ON i.indexrelid OPERATOR(pg_catalog.=) a.attrelid\n"
+                            " WHERE i.indrelid OPERATOR(pg_catalog.=) c.oid\n"
+                            " AND c.reltuples OPERATOR(pg_catalog.!=) 0::pg_catalog.float4\n"
+                            " AND i.indkey[a.attnum OPERATOR(pg_catalog.-) 1::pg_catalog.int2]"
+                            " OPERATOR(pg_catalog.=) 0::pg_catalog.int2\n"
+                            " AND a.attnum OPERATOR(pg_catalog.>) 0::pg_catalog.int2\n"
+                            " AND NOT a.attisdropped\n"
+                            " AND a.attstattarget IS DISTINCT FROM 0::pg_catalog.int2\n"
+                            " AND NOT EXISTS (SELECT NULL FROM pg_catalog.pg_statistic s\n"
+                            " WHERE s.starelid OPERATOR(pg_catalog.=) a.attrelid\n"
+                            " AND s.staattnum OPERATOR(pg_catalog.=) a.attnum\n"
+                            " AND s.stainherit OPERATOR(pg_catalog.=) p.inherited))\n");
+
+       /* inheritance and regular stats */
+       appendPQExpBufferStr(&catalog_query,
+                            " OR EXISTS (SELECT NULL FROM pg_catalog.pg_attribute a\n"
+                            " WHERE a.attrelid OPERATOR(pg_catalog.=) c.oid\n"
+                            " AND c.reltuples OPERATOR(pg_catalog.!=) 0::pg_catalog.float4\n"
+                            " AND a.attnum OPERATOR(pg_catalog.>) 0::pg_catalog.int2\n"
+                            " AND NOT a.attisdropped\n"
+                            " AND a.attstattarget IS DISTINCT FROM 0::pg_catalog.int2\n"
+                            " AND c.relhassubclass\n"
+                            " AND NOT p.inherited\n"
+                            " AND EXISTS (SELECT NULL FROM pg_catalog.pg_inherits h\n"
+                            " WHERE h.inhparent OPERATOR(pg_catalog.=) c.oid)\n"
+                            " AND NOT EXISTS (SELECT NULL FROM pg_catalog.pg_statistic s\n"
+                            " WHERE s.starelid OPERATOR(pg_catalog.=) a.attrelid\n"
+                            " AND s.staattnum OPERATOR(pg_catalog.=) a.attnum\n"
+                            " AND s.stainherit))\n");
+
+       /* inheritance and extended stats */
+       appendPQExpBufferStr(&catalog_query,
+                            " OR EXISTS (SELECT NULL FROM pg_catalog.pg_statistic_ext e\n"
+                            " WHERE e.stxrelid OPERATOR(pg_catalog.=) c.oid\n"
+                            " AND c.reltuples OPERATOR(pg_catalog.!=) 0::pg_catalog.float4\n"
+                            " AND e.stxstattarget IS DISTINCT FROM 0::pg_catalog.int2\n"
+                            " AND c.relhassubclass\n"
+                            " AND NOT p.inherited\n"
+                            " AND EXISTS (SELECT NULL FROM pg_catalog.pg_inherits h\n"
+                            " WHERE h.inhparent OPERATOR(pg_catalog.=) c.oid)\n"
+                            " AND NOT EXISTS (SELECT NULL FROM pg_catalog.pg_statistic_ext_data d\n"
+                            " WHERE d.stxoid OPERATOR(pg_catalog.=) e.oid\n"
+                            " AND d.stxdinherit))\n");
+
+       appendPQExpBufferStr(&catalog_query, " )\n");
+   }
+
    /*
     * Execute the catalog query.  We use the default search_path for this
     * query for consistency with table lookups done elsewhere by the user.
@@ -983,6 +1086,11 @@ vacuum_all_databases(ConnParams *cparams,
 
    if (analyze_in_stages)
    {
+       SimpleStringList **found_objs = NULL;
+
+       if (vacopts->missing_stats_only)
+           found_objs = palloc0(PQntuples(result) * sizeof(SimpleStringList *));
+
        /*
         * When analyzing all databases in stages, we analyze them all in the
         * fastest stage first, so that initial statistics become available
@@ -999,7 +1107,8 @@ vacuum_all_databases(ConnParams *cparams,
 
                vacuum_one_database(cparams, vacopts,
                                    stage,
-                                   objects, NULL,
+                                   objects,
+                                   vacopts->missing_stats_only ? &found_objs[i] : NULL,
                                    concurrentCons,
                                    progname, echo, quiet);
            }
@@ -1239,6 +1348,7 @@ help(const char *progname)
    printf(_("  -j, --jobs=NUM                  use this many concurrent connections to vacuum\n"));
    printf(_("      --min-mxid-age=MXID_AGE     minimum multixact ID age of tables to vacuum\n"));
    printf(_("      --min-xid-age=XID_AGE       minimum transaction ID age of tables to vacuum\n"));
+   printf(_("      --missing-stats-only        only analyze relations with missing statistics\n"));
    printf(_("      --no-index-cleanup          don't remove index entries that point to dead tuples\n"));
    printf(_("      --no-process-main           skip the main relation\n"));
    printf(_("      --no-process-toast          skip the TOAST table associated with the table to vacuum\n"));
index bab3f3d2dbef87829da8fdcb4a658ee500a787fe..05bd94609d4fb1205eba6ed7936f9c7e648ef4af 100644 (file)
@@ -2849,6 +2849,33 @@ sub issues_sql_like
 
 =pod
 
+=item $node->issues_sql_unlike(cmd, unexpected_sql, test_name)
+
+Run a command on the node, then verify that $unexpected_sql does not appear in
+the server log file.
+
+=cut
+
+sub issues_sql_unlike
+{
+   local $Test::Builder::Level = $Test::Builder::Level + 1;
+
+   my ($self, $cmd, $unexpected_sql, $test_name) = @_;
+
+   local %ENV = $self->_get_env();
+
+   my $log_location = -s $self->logfile;
+
+   my $result = PostgreSQL::Test::Utils::run_log($cmd);
+   ok($result, "@$cmd exit code 0");
+   my $log =
+     PostgreSQL::Test::Utils::slurp_file($self->logfile, $log_location);
+   unlike($log, $unexpected_sql, "$test_name: SQL not found in server log");
+   return;
+}
+
+=pod
+
 =item $node->log_content()
 
 Returns the contents of log of the node