New GUC, track_iotiming, to track I/O timings.
authorRobert Haas
Tue, 27 Mar 2012 18:52:37 +0000 (14:52 -0400)
committerRobert Haas
Tue, 27 Mar 2012 18:55:02 +0000 (14:55 -0400)
Currently, the only way to see the numbers this gathers is via
EXPLAIN (ANALYZE, BUFFERS), but the plan is to add visibility through
the stats collector and pg_stat_statements in subsequent patches.

Ants Aasma, reviewed by Greg Smith, with some further changes by me.

doc/src/sgml/config.sgml
src/backend/commands/explain.c
src/backend/executor/instrument.c
src/backend/storage/buffer/bufmgr.c
src/backend/utils/misc/guc.c
src/backend/utils/misc/postgresql.conf.sample
src/include/executor/instrument.h
src/include/storage/bufmgr.h

index 3e178759a29e11b3346b7796b32f853aae9b4323..9baaa3fb36acd9c6a4ff50a7074a5b7491034dca 100644 (file)
@@ -4284,6 +4284,21 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
       
      
 
+     
+      track_iotiming (boolean)
+      
+       track_iotiming configuration parameter
+      
+      
+       
+        Enables timing of database I/O calls.  This parameter is off by
+        default, because it will repeatedly query the operating system for
+        the current time, which may cause significant overhead on some
+        platforms.  Only superusers can change this setting.
+       
+      
+     
+
      
       track_functions (enum)
       
index a14cae144205a86956289e2f74c78350a8960553..81d63f87f10a3cedd335af7f9889d5157ae806fb 100644 (file)
@@ -1236,6 +1236,8 @@ ExplainNode(PlanState *planstate, List *ancestors,
                                     usage->local_blks_written > 0);
            bool        has_temp = (usage->temp_blks_read > 0 ||
                                    usage->temp_blks_written > 0);
+           bool        has_timing = (!INSTR_TIME_IS_ZERO(usage->time_read) ||
+                                     !INSTR_TIME_IS_ZERO(usage->time_write));
 
            /* Show only positive counter values. */
            if (has_shared || has_local || has_temp)
@@ -1291,6 +1293,20 @@ ExplainNode(PlanState *planstate, List *ancestors,
                }
                appendStringInfoChar(es->str, '\n');
            }
+
+           /* As above, show only positive counter values. */
+           if (has_timing)
+           {
+               appendStringInfoSpaces(es->str, es->indent * 2);
+               appendStringInfoString(es->str, "I/O Timings:");
+               if (!INSTR_TIME_IS_ZERO(usage->time_read))
+                   appendStringInfo(es->str, " read=%0.2f",
+                                INSTR_TIME_GET_MILLISEC(usage->time_read));
+               if (!INSTR_TIME_IS_ZERO(usage->time_write))
+                   appendStringInfo(es->str, " write=%0.2f",
+                                INSTR_TIME_GET_MILLISEC(usage->time_write));
+               appendStringInfoChar(es->str, '\n');
+           }
        }
        else
        {
@@ -1304,6 +1320,8 @@ ExplainNode(PlanState *planstate, List *ancestors,
            ExplainPropertyLong("Local Written Blocks", usage->local_blks_written, es);
            ExplainPropertyLong("Temp Read Blocks", usage->temp_blks_read, es);
            ExplainPropertyLong("Temp Written Blocks", usage->temp_blks_written, es);
+           ExplainPropertyFloat("I/O Read Time", INSTR_TIME_GET_MILLISEC(usage->time_read), 3, es);
+           ExplainPropertyFloat("I/O Write Time", INSTR_TIME_GET_MILLISEC(usage->time_write), 3, es);
        }
    }
 
index 6e9f450d6886524ec23972cfdfee6e63b909dfeb..92e56d13ae6b501014e54a3112152ce25bdd7e57 100644 (file)
@@ -145,4 +145,6 @@ BufferUsageAccumDiff(BufferUsage *dst,
    dst->local_blks_written += add->local_blks_written - sub->local_blks_written;
    dst->temp_blks_read += add->temp_blks_read - sub->temp_blks_read;
    dst->temp_blks_written += add->temp_blks_written - sub->temp_blks_written;
+   INSTR_TIME_ACCUM_DIFF(dst->time_read, add->time_read, sub->time_read);
+   INSTR_TIME_ACCUM_DIFF(dst->time_write, add->time_write, sub->time_write);
 }
index 3924a51c0c6d31e1b7612ee3cf78ca04639e58aa..4de6a7212ccce2482006b1340ad1de21882fe677 100644 (file)
@@ -67,6 +67,7 @@
 bool       zero_damaged_pages = false;
 int            bgwriter_lru_maxpages = 100;
 double     bgwriter_lru_multiplier = 2.0;
+bool       track_iotiming = false;
 
 /*
  * How many buffers PrefetchBuffer callers should try to stay ahead of their
@@ -437,8 +438,21 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
            MemSet((char *) bufBlock, 0, BLCKSZ);
        else
        {
+           instr_time  io_start,
+                       io_time;
+
+           if (track_iotiming)
+               INSTR_TIME_SET_CURRENT(io_start);
+
            smgrread(smgr, forkNum, blockNum, (char *) bufBlock);
 
+           if (track_iotiming)
+           {
+               INSTR_TIME_SET_CURRENT(io_time);
+               INSTR_TIME_SUBTRACT(io_time, io_start);
+               INSTR_TIME_ADD(pgBufferUsage.time_read, io_time);
+           }
+
            /* check for garbage data */
            if (!PageHeaderIsValid((PageHeader) bufBlock))
            {
@@ -1874,6 +1888,7 @@ FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln)
 {
    XLogRecPtr  recptr;
    ErrorContextCallback errcontext;
+   instr_time io_start, io_end;
 
    /*
     * Acquire the buffer's io_in_progress lock.  If StartBufferIO returns
@@ -1921,12 +1936,21 @@ FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln)
    buf->flags &= ~BM_JUST_DIRTIED;
    UnlockBufHdr(buf);
 
+   if (track_iotiming)
+       INSTR_TIME_SET_CURRENT(io_start);
+
    smgrwrite(reln,
              buf->tag.forkNum,
              buf->tag.blockNum,
              (char *) BufHdrGetBlock(buf),
              false);
 
+   if (track_iotiming)
+   {
+       INSTR_TIME_SET_CURRENT(io_end);
+       INSTR_TIME_ACCUM_DIFF(pgBufferUsage.time_write, io_end, io_start);
+   }
+
    pgBufferUsage.shared_blks_written++;
 
    /*
index 3d2fe3ebc87c864155bcd69cd51856b4e42ddda7..8ea391a8e6cc1154751d46eda18231ab4230370f 100644 (file)
@@ -1018,6 +1018,15 @@ static struct config_bool ConfigureNamesBool[] =
        true,
        NULL, NULL, NULL
    },
+   {
+       {"track_iotiming", PGC_SUSET, STATS_COLLECTOR,
+           gettext_noop("Collects timing information for database IO activity."),
+           NULL
+       },
+       &track_iotiming,
+       false,
+       NULL, NULL, NULL
+   },
 
    {
        {"update_process_title", PGC_SUSET, STATS_COLLECTOR,
index 96da086b0f4c8f8e6fcd9a934021bd7a170105bd..cbae3bfb2286fe8feff8a658273857e0f82c01a8 100644 (file)
 
 #track_activities = on
 #track_counts = on
+#track_iotiming = off
 #track_functions = none            # none, pl, all
 #track_activity_query_size = 1024  # (change requires restart)
 #update_process_title = on
index 066f684f330ca942958d94ab59ad4bb4aecbf604..d29ab9b9f341a7c1dabbe007f0584d92cb5229ad 100644 (file)
@@ -28,6 +28,8 @@ typedef struct BufferUsage
    long        local_blks_written;     /* # of local disk blocks written */
    long        temp_blks_read; /* # of temp blocks read */
    long        temp_blks_written;      /* # of temp blocks written */
+   instr_time  time_read;              /* time spent reading */
+   instr_time  time_write;             /* time spent writing */
 } BufferUsage;
 
 /* Flag bits included in InstrAlloc's instrument_options bitmask */
index de1bbd01d839db3cdbf9dab2b466a67225d4cc8c..d72bad9f56143b36650a80fc1e16fcecd3d63fbd 100644 (file)
@@ -48,6 +48,7 @@ extern PGDLLIMPORT int NBuffers;
 extern bool zero_damaged_pages;
 extern int bgwriter_lru_maxpages;
 extern double bgwriter_lru_multiplier;
+extern bool track_iotiming;
 extern int target_prefetch_pages;
 
 /* in buf_init.c */