Expose track_iotiming data via the statistics collector.
authorRobert Haas
Thu, 5 Apr 2012 15:37:31 +0000 (11:37 -0400)
committerRobert Haas
Thu, 5 Apr 2012 15:40:24 +0000 (11:40 -0400)
Ants Aasma's original patch to add timing information for buffer I/O
requests exposed this data at the relation level, which was judged too
costly.  I've here exposed it at the database level instead.

doc/src/sgml/config.sgml
doc/src/sgml/monitoring.sgml
src/backend/catalog/system_views.sql
src/backend/postmaster/pgstat.c
src/backend/storage/buffer/bufmgr.c
src/backend/utils/adt/pgstatfuncs.c
src/include/catalog/catversion.h
src/include/catalog/pg_proc.h
src/include/pgstat.h
src/test/regress/expected/rules.out

index 0a5d519214da042a6ad53a2e92e77b42221e9513..81392997f8848466c3c61fc0e611e406ae1ae3e9 100644 (file)
@@ -4295,8 +4295,11 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
         default, because it will repeatedly query the operating system for
         the current time, which may cause significant overhead on some
         platforms.  You can use the  tool to
-        measure the overhead of timing on your system.  Only superusers can
-        change this setting.
+        measure the overhead of timing on your system.  Timing information is
+        displayed in , in the output of
+         when the BUFFERS option is
+        used, and by .  Only superusers can
+        change this setting.  
        
       
      
index 840e54aef8a55232bf0dc94c6429b88c7537f00c..013dd335a4a73317a75c0d98785f29c9d4e38801 100644 (file)
@@ -144,6 +144,11 @@ postgres: user database host 
    statistics are collected about table and index accesses.
   
 
+  
+   The parameter  enables monitoring
+   of block read and write times.
+  
+
   
    The parameter  enables tracking of
    usage of user-defined functions.
@@ -961,6 +966,20 @@ SELECT pg_stat_get_backend_pid(s.backendid) AS pid,
       This value can also be returned by directly calling
       the pg_stat_get_db_deadlocks function.
     
+    
+     block_read_time
+     bigint
+     Time spent by backends reading data file blocks, in milliseconds.
+      The same value can be returned in microseconds by directly calling
+      the pg_stat_get_db_block_time_read function.
+    
+    
+     block_write_time
+     bigint
+     Time spent by backends writing data file blocks, in milliseconds.
+      The same value can be returned in microseconds by directly calling
+      the pg_stat_get_db_block_time_write function.
+    
     
      stats_reset
      timestamptz
index ab594eba9bcc114778a42f7ab9d3a590c6e2155d..9daebdf54784fe70dac350e0fb92bc0ffaa93161 100644 (file)
@@ -604,6 +604,8 @@ CREATE VIEW pg_stat_database AS
             pg_stat_get_db_temp_files(D.oid) AS temp_files,
             pg_stat_get_db_temp_bytes(D.oid) AS temp_bytes,
             pg_stat_get_db_deadlocks(D.oid) AS deadlocks,
+            pg_stat_get_db_block_time_read(D.oid) / 1000 AS block_read_time,
+            pg_stat_get_db_block_time_write(D.oid) / 1000 AS block_write_time,
             pg_stat_get_db_stat_reset_time(D.oid) AS stats_reset
     FROM pg_database D;
 
index a53fc52f4701f1eeeae80988529c9c6cc1d961a1..a7cb036cac01f61e9679875fd39c027d928d028d 100644 (file)
@@ -197,6 +197,8 @@ static PgStat_SubXactStatus *pgStatXactStack = NULL;
 
 static int pgStatXactCommit = 0;
 static int pgStatXactRollback = 0;
+PgStat_Counter pgStatBlockTimeRead = 0;
+PgStat_Counter pgStatBlockTimeWrite = 0;
 
 /* Record that's written to 2PC state file when pgstat state is persisted */
 typedef struct TwoPhasePgStatRecord
@@ -782,13 +784,15 @@ pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg)
        return;
 
    /*
-    * Report accumulated xact commit/rollback whenever we send a normal
-    * tabstat message
+    * Report accumulated xact commit/rollback and I/O timings whenever we send
+    * a normal tabstat message
     */
    if (OidIsValid(tsmsg->m_databaseid))
    {
        tsmsg->m_xact_commit = pgStatXactCommit;
        tsmsg->m_xact_rollback = pgStatXactRollback;
+       tsmsg->m_block_time_read = pgStatBlockTimeRead;
+       tsmsg->m_block_time_write = pgStatBlockTimeWrite;
        pgStatXactCommit = 0;
        pgStatXactRollback = 0;
    }
@@ -796,6 +800,8 @@ pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg)
    {
        tsmsg->m_xact_commit = 0;
        tsmsg->m_xact_rollback = 0;
+       tsmsg->m_block_time_read = 0;
+       tsmsg->m_block_time_write = 0;
    }
 
    n = tsmsg->m_nentries;
@@ -3352,6 +3358,8 @@ pgstat_get_db_entry(Oid databaseid, bool create)
        result->n_temp_files = 0;
        result->n_temp_bytes = 0;
        result->n_deadlocks = 0;
+       result->n_block_time_read = 0;
+       result->n_block_time_write = 0;
 
        result->stat_reset_timestamp = GetCurrentTimestamp();
 
@@ -4070,6 +4078,8 @@ pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len)
     */
    dbentry->n_xact_commit += (PgStat_Counter) (msg->m_xact_commit);
    dbentry->n_xact_rollback += (PgStat_Counter) (msg->m_xact_rollback);
+   dbentry->n_block_time_read += msg->m_block_time_read;
+   dbentry->n_block_time_write += msg->m_block_time_write;
 
    /*
     * Process all table entries in the message.
@@ -4266,6 +4276,8 @@ pgstat_recv_resetcounter(PgStat_MsgResetcounter *msg, int len)
    dbentry->n_temp_bytes = 0;
    dbentry->n_temp_files = 0;
    dbentry->n_deadlocks = 0;
+   dbentry->n_block_time_read = 0;
+   dbentry->n_block_time_write = 0;
 
    dbentry->stat_reset_timestamp = GetCurrentTimestamp();
 
index 4de6a7212ccce2482006b1340ad1de21882fe677..613d7544c6356e0bdcf9978ac4cc41d97cef2262 100644 (file)
@@ -450,6 +450,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
            {
                INSTR_TIME_SET_CURRENT(io_time);
                INSTR_TIME_SUBTRACT(io_time, io_start);
+               pgstat_count_buffer_read_time(INSTR_TIME_GET_MICROSEC(io_time));
                INSTR_TIME_ADD(pgBufferUsage.time_read, io_time);
            }
 
@@ -1888,7 +1889,8 @@ FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln)
 {
    XLogRecPtr  recptr;
    ErrorContextCallback errcontext;
-   instr_time io_start, io_end;
+   instr_time  io_start,
+               io_time;
 
    /*
     * Acquire the buffer's io_in_progress lock.  If StartBufferIO returns
@@ -1947,8 +1949,10 @@ FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln)
 
    if (track_iotiming)
    {
-       INSTR_TIME_SET_CURRENT(io_end);
-       INSTR_TIME_ACCUM_DIFF(pgBufferUsage.time_write, io_end, io_start);
+       INSTR_TIME_SET_CURRENT(io_time);
+       INSTR_TIME_SUBTRACT(io_time, io_start);
+       pgstat_count_buffer_write_time(INSTR_TIME_GET_MICROSEC(io_time));
+       INSTR_TIME_ADD(pgBufferUsage.time_write, io_time);
    }
 
    pgBufferUsage.shared_blks_written++;
index 68b25274e38195c83486d4f1f9492f7da720bf81..20ee13db6cf545623570bc56d2b1976edaccae23 100644 (file)
@@ -82,6 +82,8 @@ extern Datum pg_stat_get_db_deadlocks(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_db_stat_reset_time(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_db_temp_files(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_db_temp_bytes(PG_FUNCTION_ARGS);
+extern Datum pg_stat_get_db_block_time_read(PG_FUNCTION_ARGS);
+extern Datum pg_stat_get_db_block_time_write(PG_FUNCTION_ARGS);
 
 extern Datum pg_stat_get_bgwriter_timed_checkpoints(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_bgwriter_requested_checkpoints(PG_FUNCTION_ARGS);
@@ -1357,6 +1359,36 @@ pg_stat_get_db_deadlocks(PG_FUNCTION_ARGS)
    PG_RETURN_INT64(result);
 }
 
+Datum
+pg_stat_get_db_block_time_read(PG_FUNCTION_ARGS)
+{
+   Oid         dbid = PG_GETARG_OID(0);
+   int64       result;
+   PgStat_StatDBEntry *dbentry;
+
+   if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+       result = 0;
+   else
+       result = (int64) (dbentry->n_block_time_read);
+
+   PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_db_block_time_write(PG_FUNCTION_ARGS)
+{
+   Oid         dbid = PG_GETARG_OID(0);
+   int64       result;
+   PgStat_StatDBEntry *dbentry;
+
+   if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+       result = 0;
+   else
+       result = (int64) (dbentry->n_block_time_write);
+
+   PG_RETURN_INT64(result);
+}
+
 Datum
 pg_stat_get_bgwriter_timed_checkpoints(PG_FUNCTION_ARGS)
 {
index 5d896bd11ae019aff70867379e16d06fddbe7d7d..2c21a958f55cfbf1548aae9d3df916d53f729986 100644 (file)
@@ -53,6 +53,6 @@
  */
 
 /*                         yyyymmddN */
-#define CATALOG_VERSION_NO 201203191
+#define CATALOG_VERSION_NO 201204051
 
 #endif
index 49b0754a2ca540d7c513ce4a706c1d2b155af2a5..597b9f5fe6a476bf1633fdafceed6617f0b58e5e 100644 (file)
@@ -2662,6 +2662,10 @@ DATA(insert OID = 3150 (  pg_stat_get_db_temp_files PGNSP PGUID 12 1 0 0 0 f f f
 DESCR("statistics: number of temporary files written");
 DATA(insert OID = 3151 (  pg_stat_get_db_temp_bytes PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_db_temp_bytes _null_ _null_ _null_ ));
 DESCR("statistics: number of bytes in temporary files written");
+DATA(insert OID = 2844 (  pg_stat_get_db_block_time_read   PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_db_block_time_read _null_ _null_ _null_ ));
+DESCR("statistics: block read time in microseconds");
+DATA(insert OID = 2845 (  pg_stat_get_db_block_time_write  PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_db_block_time_write _null_ _null_ _null_ ));
+DESCR("statistics: block write time in microseconds");
 DATA(insert OID = 2769 ( pg_stat_get_bgwriter_timed_checkpoints PGNSP PGUID 12 1 0 0 0 f f f f t f s 0 0 20 "" _null_ _null_ _null_ _null_ pg_stat_get_bgwriter_timed_checkpoints _null_ _null_ _null_ ));
 DESCR("statistics: number of timed checkpoints started by the bgwriter");
 DATA(insert OID = 2770 ( pg_stat_get_bgwriter_requested_checkpoints PGNSP PGUID 12 1 0 0 0 f f f f t f s 0 0 20 "" _null_ _null_ _null_ _null_ pg_stat_get_bgwriter_requested_checkpoints _null_ _null_ _null_ ));
index 1281bd8b69f2ef433bcd2b1f87a6e63f1d09dd07..b3618f28d01b8a08c82cf2d91d139e5a39f0c646 100644 (file)
@@ -233,6 +233,8 @@ typedef struct PgStat_MsgTabstat
    int         m_nentries;
    int         m_xact_commit;
    int         m_xact_rollback;
+   PgStat_Counter    m_block_time_read;
+   PgStat_Counter    m_block_time_write;
    PgStat_TableEntry m_entry[PGSTAT_NUM_TABENTRIES];
 } PgStat_MsgTabstat;
 
@@ -536,6 +538,8 @@ typedef struct PgStat_StatDBEntry
    PgStat_Counter n_temp_files;
    PgStat_Counter n_temp_bytes;
    PgStat_Counter n_deadlocks;
+   PgStat_Counter n_block_time_read;       /* times in microseconds */
+   PgStat_Counter n_block_time_write;
 
    TimestampTz stat_reset_timestamp;
 
@@ -721,6 +725,12 @@ extern char *pgstat_stat_filename;
  */
 extern PgStat_MsgBgWriter BgWriterStats;
 
+/*
+ * Updated by pgstat_count_time_* macros.
+ */
+extern PgStat_Counter pgStatBlockTimeRead;
+extern PgStat_Counter pgStatBlockTimeWrite;
+
 /* ----------
  * Functions called from postmaster
  * ----------
@@ -816,6 +826,10 @@ extern void pgstat_initstats(Relation rel);
        if ((rel)->pgstat_info != NULL)                             \
            (rel)->pgstat_info->t_counts.t_blocks_hit++;            \
    } while (0)
+#define pgstat_count_buffer_read_time(n)                           \
+   pgStatBlockTimeRead += (n);
+#define pgstat_count_buffer_write_time(n)                          \
+   pgStatBlockTimeWrite += (n);
 
 extern void pgstat_count_heap_insert(Relation rel, int n);
 extern void pgstat_count_heap_update(Relation rel, bool hot);
index aaf0cca02609191dbd20550860217bad7ab91b71..67ebb951ebf363f53a1b440114ea8755f0e30c08 100644 (file)
@@ -1296,7 +1296,7 @@ SELECT viewname, definition FROM pg_views WHERE schemaname <> 'information_schem
  pg_stat_all_indexes             | SELECT c.oid AS relid, i.oid AS indexrelid, n.nspname AS schemaname, c.relname, i.relname AS indexrelname, pg_stat_get_numscans(i.oid) AS idx_scan, pg_stat_get_tuples_returned(i.oid) AS idx_tup_read, pg_stat_get_tuples_fetched(i.oid) AS idx_tup_fetch FROM (((pg_class c JOIN pg_index x ON ((c.oid = x.indrelid))) JOIN pg_class i ON ((i.oid = x.indexrelid))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE (c.relkind = ANY (ARRAY['r'::"char", 't'::"char"]));
  pg_stat_all_tables              | SELECT c.oid AS relid, n.nspname AS schemaname, c.relname, pg_stat_get_numscans(c.oid) AS seq_scan, pg_stat_get_tuples_returned(c.oid) AS seq_tup_read, (sum(pg_stat_get_numscans(i.indexrelid)))::bigint AS idx_scan, ((sum(pg_stat_get_tuples_fetched(i.indexrelid)))::bigint + pg_stat_get_tuples_fetched(c.oid)) AS idx_tup_fetch, pg_stat_get_tuples_inserted(c.oid) AS n_tup_ins, pg_stat_get_tuples_updated(c.oid) AS n_tup_upd, pg_stat_get_tuples_deleted(c.oid) AS n_tup_del, pg_stat_get_tuples_hot_updated(c.oid) AS n_tup_hot_upd, pg_stat_get_live_tuples(c.oid) AS n_live_tup, pg_stat_get_dead_tuples(c.oid) AS n_dead_tup, pg_stat_get_last_vacuum_time(c.oid) AS last_vacuum, pg_stat_get_last_autovacuum_time(c.oid) AS last_autovacuum, pg_stat_get_last_analyze_time(c.oid) AS last_analyze, pg_stat_get_last_autoanalyze_time(c.oid) AS last_autoanalyze, pg_stat_get_vacuum_count(c.oid) AS vacuum_count, pg_stat_get_autovacuum_count(c.oid) AS autovacuum_count, pg_stat_get_analyze_count(c.oid) AS analyze_count, pg_stat_get_autoanalyze_count(c.oid) AS autoanalyze_count FROM ((pg_class c LEFT JOIN pg_index i ON ((c.oid = i.indrelid))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE (c.relkind = ANY (ARRAY['r'::"char", 't'::"char"])) GROUP BY c.oid, n.nspname, c.relname;
  pg_stat_bgwriter                | SELECT pg_stat_get_bgwriter_timed_checkpoints() AS checkpoints_timed, pg_stat_get_bgwriter_requested_checkpoints() AS checkpoints_req, pg_stat_get_bgwriter_buf_written_checkpoints() AS buffers_checkpoint, pg_stat_get_bgwriter_buf_written_clean() AS buffers_clean, pg_stat_get_bgwriter_maxwritten_clean() AS maxwritten_clean, pg_stat_get_buf_written_backend() AS buffers_backend, pg_stat_get_buf_fsync_backend() AS buffers_backend_fsync, pg_stat_get_buf_alloc() AS buffers_alloc, pg_stat_get_bgwriter_stat_reset_time() AS stats_reset;
- pg_stat_database                | SELECT d.oid AS datid, d.datname, pg_stat_get_db_numbackends(d.oid) AS numbackends, pg_stat_get_db_xact_commit(d.oid) AS xact_commit, pg_stat_get_db_xact_rollback(d.oid) AS xact_rollback, (pg_stat_get_db_blocks_fetched(d.oid) - pg_stat_get_db_blocks_hit(d.oid)) AS blks_read, pg_stat_get_db_blocks_hit(d.oid) AS blks_hit, pg_stat_get_db_tuples_returned(d.oid) AS tup_returned, pg_stat_get_db_tuples_fetched(d.oid) AS tup_fetched, pg_stat_get_db_tuples_inserted(d.oid) AS tup_inserted, pg_stat_get_db_tuples_updated(d.oid) AS tup_updated, pg_stat_get_db_tuples_deleted(d.oid) AS tup_deleted, pg_stat_get_db_conflict_all(d.oid) AS conflicts, pg_stat_get_db_temp_files(d.oid) AS temp_files, pg_stat_get_db_temp_bytes(d.oid) AS temp_bytes, pg_stat_get_db_deadlocks(d.oid) AS deadlocks, pg_stat_get_db_stat_reset_time(d.oid) AS stats_reset FROM pg_database d;
+ pg_stat_database                | SELECT d.oid AS datid, d.datname, pg_stat_get_db_numbackends(d.oid) AS numbackends, pg_stat_get_db_xact_commit(d.oid) AS xact_commit, pg_stat_get_db_xact_rollback(d.oid) AS xact_rollback, (pg_stat_get_db_blocks_fetched(d.oid) - pg_stat_get_db_blocks_hit(d.oid)) AS blks_read, pg_stat_get_db_blocks_hit(d.oid) AS blks_hit, pg_stat_get_db_tuples_returned(d.oid) AS tup_returned, pg_stat_get_db_tuples_fetched(d.oid) AS tup_fetched, pg_stat_get_db_tuples_inserted(d.oid) AS tup_inserted, pg_stat_get_db_tuples_updated(d.oid) AS tup_updated, pg_stat_get_db_tuples_deleted(d.oid) AS tup_deleted, pg_stat_get_db_conflict_all(d.oid) AS conflicts, pg_stat_get_db_temp_files(d.oid) AS temp_files, pg_stat_get_db_temp_bytes(d.oid) AS temp_bytes, pg_stat_get_db_deadlocks(d.oid) AS deadlocks, (pg_stat_get_db_block_time_read(d.oid) / 1000) AS block_read_time, (pg_stat_get_db_block_time_write(d.oid) / 1000) AS block_write_time, pg_stat_get_db_stat_reset_time(d.oid) AS stats_reset FROM pg_database d;
  pg_stat_database_conflicts      | SELECT d.oid AS datid, d.datname, pg_stat_get_db_conflict_tablespace(d.oid) AS confl_tablespace, pg_stat_get_db_conflict_lock(d.oid) AS confl_lock, pg_stat_get_db_conflict_snapshot(d.oid) AS confl_snapshot, pg_stat_get_db_conflict_bufferpin(d.oid) AS confl_bufferpin, pg_stat_get_db_conflict_startup_deadlock(d.oid) AS confl_deadlock FROM pg_database d;
  pg_stat_replication             | SELECT s.pid, s.usesysid, u.rolname AS usename, s.application_name, s.client_addr, s.client_hostname, s.client_port, s.backend_start, w.state, w.sent_location, w.write_location, w.flush_location, w.replay_location, w.sync_priority, w.sync_state FROM pg_stat_get_activity(NULL::integer) s(datid, pid, usesysid, application_name, state, query, waiting, xact_start, query_start, backend_start, state_change, client_addr, client_hostname, client_port), pg_authid u, pg_stat_get_wal_senders() w(pid, state, sent_location, write_location, flush_location, replay_location, sync_priority, sync_state) WHERE ((s.usesysid = u.oid) AND (s.pid = w.pid));
  pg_stat_sys_indexes             | SELECT pg_stat_all_indexes.relid, pg_stat_all_indexes.indexrelid, pg_stat_all_indexes.schemaname, pg_stat_all_indexes.relname, pg_stat_all_indexes.indexrelname, pg_stat_all_indexes.idx_scan, pg_stat_all_indexes.idx_tup_read, pg_stat_all_indexes.idx_tup_fetch FROM pg_stat_all_indexes WHERE ((pg_stat_all_indexes.schemaname = ANY (ARRAY['pg_catalog'::name, 'information_schema'::name])) OR (pg_stat_all_indexes.schemaname ~ '^pg_toast'::text));