Add contrib/pg_stat_statements for server-wide tracking of statement execution
authorTom Lane
Sun, 4 Jan 2009 22:19:59 +0000 (22:19 +0000)
committerTom Lane
Sun, 4 Jan 2009 22:19:59 +0000 (22:19 +0000)
statistics.

Takahiro Itagaki

contrib/Makefile
contrib/README
contrib/pg_stat_statements/Makefile [new file with mode: 0644]
contrib/pg_stat_statements/pg_stat_statements.c [new file with mode: 0644]
contrib/pg_stat_statements/pg_stat_statements.sql.in [new file with mode: 0644]
contrib/pg_stat_statements/uninstall_pg_stat_statements.sql [new file with mode: 0644]
doc/src/sgml/contrib.sgml
doc/src/sgml/filelist.sgml
doc/src/sgml/pgstatstatements.sgml [new file with mode: 0644]
src/backend/postmaster/postmaster.c
src/include/pgstat.h

index bbd43e1f661eb3bbf987088b68fa476a1cc27c6b..4b1d2ae95d39c6951e0fbcb5e764121ba4db1904 100644 (file)
@@ -1,4 +1,4 @@
-# $PostgreSQL: pgsql/contrib/Makefile,v 1.85 2008/11/19 02:59:28 tgl Exp $
+# $PostgreSQL: pgsql/contrib/Makefile,v 1.86 2009/01/04 22:19:59 tgl Exp $
 
 subdir = contrib
 top_builddir = ..
@@ -27,6 +27,7 @@ WANTED_DIRS = \
        pg_buffercache  \
        pg_freespacemap \
        pg_standby  \
+       pg_stat_statements \
        pg_trgm     \
        pgbench     \
        pgcrypto    \
index 060853fa216272770d291a994dd1ece966982b88..7d258d4b2fc43a945b3fab3e335393195d71732a 100644 (file)
@@ -112,6 +112,10 @@ pg_standby -
    Sample archive_command for warm standby operation
    by Simon Riggs 
 
+pg_stat_statements -
+   Track statement execution times across a whole database cluster
+   by Takahiro Itagaki 
+
 pg_trgm -
    Functions for determining the similarity of text based on trigram
    matching.
diff --git a/contrib/pg_stat_statements/Makefile b/contrib/pg_stat_statements/Makefile
new file mode 100644 (file)
index 0000000..ce335a6
--- /dev/null
@@ -0,0 +1,17 @@
+# $PostgreSQL: pgsql/contrib/pg_stat_statements/Makefile,v 1.1 2009/01/04 22:19:59 tgl Exp $
+
+MODULE_big = pg_stat_statements
+DATA_built = pg_stat_statements.sql
+DATA = uninstall_pg_stat_statements.sql
+OBJS = pg_stat_statements.o
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/pg_stat_statements
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
new file mode 100644 (file)
index 0000000..df14d05
--- /dev/null
@@ -0,0 +1,904 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_stat_statements.c
+ *     Track statement execution times across a whole database cluster.
+ *
+ * Note about locking issues: to create or delete an entry in the shared
+ * hashtable, one must hold pgss->lock exclusively.  Modifying any field
+ * in an entry except the counters requires the same.  To look up an entry,
+ * one must hold the lock shared.  To read or update the counters within
+ * an entry, one must hold the lock shared or exclusive (so the entry doesn't
+ * disappear!) and also take the entry's mutex spinlock.
+ *
+ *
+ * Copyright (c) 2008-2009, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *   $PostgreSQL: pgsql/contrib/pg_stat_statements/pg_stat_statements.c,v 1.1 2009/01/04 22:19:59 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include 
+
+#include "access/hash.h"
+#include "catalog/pg_type.h"
+#include "executor/executor.h"
+#include "executor/instrument.h"
+#include "mb/pg_wchar.h"
+#include "miscadmin.h"
+#include "pgstat.h"
+#include "storage/fd.h"
+#include "storage/ipc.h"
+#include "storage/spin.h"
+#include "utils/builtins.h"
+#include "utils/hsearch.h"
+#include "utils/guc.h"
+
+
+PG_MODULE_MAGIC;
+
+/* Location of stats file */
+#define PGSS_DUMP_FILE "global/pg_stat_statements.stat"
+
+/* This constant defines the magic number in the stats file header */
+static const uint32 PGSS_FILE_HEADER = 0x20081202;
+
+/* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */
+#define USAGE_EXEC(duration)   (1.0)
+#define USAGE_INIT             (1.0)   /* including initial planning */
+#define USAGE_DECREASE_FACTOR  (0.99)  /* decreased every entry_dealloc */
+#define USAGE_DEALLOC_PERCENT  5       /* free this % of entries at once */
+
+/*
+ * Hashtable key that defines the identity of a hashtable entry.  The
+ * hash comparators do not assume that the query string is null-terminated;
+ * this lets us search for an mbcliplen'd string without copying it first.
+ *
+ * Presently, the query encoding is fully determined by the source database
+ * and so we don't really need it to be in the key.  But that might not always
+ * be true. Anyway it's notationally convenient to pass it as part of the key.
+ */
+typedef struct pgssHashKey
+{
+   Oid         userid;         /* user OID */
+   Oid         dbid;           /* database OID */
+   int         encoding;       /* query encoding */
+   int         query_len;      /* # of valid bytes in query string */
+   const char *query_ptr;      /* query string proper */
+} pgssHashKey;
+
+/*
+ * The actual stats counters kept within pgssEntry.
+ */
+typedef struct Counters
+{
+   int64       calls;          /* # of times executed */
+   double      total_time;     /* total execution time in seconds */
+   int64       rows;           /* total # of retrieved or affected rows */
+   double      usage;          /* usage factor */
+} Counters;
+
+/*
+ * Statistics per statement
+ *
+ * NB: see the file read/write code before changing field order here.
+ */
+typedef struct pgssEntry
+{
+   pgssHashKey key;            /* hash key of entry - MUST BE FIRST */
+   Counters    counters;       /* the statistics for this query */
+   slock_t     mutex;          /* protects the counters only */
+   char        query[1];       /* VARIABLE LENGTH ARRAY - MUST BE LAST */
+   /* Note: the allocated length of query[] is actually pgss->query_size */
+} pgssEntry;
+
+/*
+ * Global shared state
+ */
+typedef struct pgssSharedState
+{
+   LWLockId    lock;           /* protects hashtable search/modification */
+   int         query_size;     /* max query length in bytes */
+} pgssSharedState;
+
+/*---- Local variables ----*/
+
+/* Current nesting depth of ExecutorRun calls */
+static int                     nested_level = 0;
+/* Saved hook values in case of unload */
+static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
+static ExecutorStart_hook_type prev_ExecutorStart = NULL;
+static ExecutorRun_hook_type   prev_ExecutorRun = NULL;
+static ExecutorEnd_hook_type   prev_ExecutorEnd = NULL;
+/* Links to shared memory state */
+static pgssSharedState        *pgss = NULL;
+static HTAB                       *pgss_hash = NULL;
+
+/*---- GUC variables ----*/
+
+typedef enum
+{
+   PGSS_TRACK_NONE,            /* track no statements */
+   PGSS_TRACK_TOP,             /* only top level statements */
+   PGSS_TRACK_ALL,             /* all statements, including nested ones */
+} PGSSTrackLevel;
+
+static const struct config_enum_entry track_options[] = {
+   {"none", PGSS_TRACK_NONE, false},
+   {"top", PGSS_TRACK_TOP, false},
+   {"all", PGSS_TRACK_ALL, false},
+   {NULL, 0, false}
+};
+
+static int pgss_max;           /* max # statements to track */
+static int pgss_track;         /* tracking level */
+static bool pgss_save;         /* whether to save stats across shutdown */
+
+
+#define pgss_enabled() \
+   (pgss_track == PGSS_TRACK_ALL || \
+   (pgss_track == PGSS_TRACK_TOP && nested_level == 0))
+
+/*---- Function declarations ----*/
+
+void   _PG_init(void);
+void   _PG_fini(void);
+
+Datum  pg_stat_statements_reset(PG_FUNCTION_ARGS);
+Datum  pg_stat_statements(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(pg_stat_statements_reset);
+PG_FUNCTION_INFO_V1(pg_stat_statements);
+
+static void pgss_shmem_startup(void);
+static void pgss_shmem_shutdown(int code, Datum arg);
+static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags);
+static void pgss_ExecutorRun(QueryDesc *queryDesc,
+                            ScanDirection direction,
+                            long count);
+static void pgss_ExecutorEnd(QueryDesc *queryDesc);
+static uint32 pgss_hash_fn(const void *key, Size keysize);
+static int pgss_match_fn(const void *key1, const void *key2, Size keysize);
+static void pgss_store(const char *query,
+                      const Instrumentation *instr, uint32 rows);
+static Size    pgss_memsize(void);
+static pgssEntry *entry_alloc(pgssHashKey *key);
+static void entry_dealloc(void);
+static void entry_reset(void);
+
+
+/*
+ * Module load callback
+ */
+void
+_PG_init(void)
+{
+   /*
+    * In order to create our shared memory area, we have to be loaded via
+    * shared_preload_libraries.  If not, fall out without hooking into
+    * any of the main system.  (We don't throw error here because it seems
+    * useful to allow the pg_stat_statements functions to be created even
+    * when the module isn't active.  The functions must protect themselves
+    * against being called then, however.)
+    */
+   if (!process_shared_preload_libraries_in_progress)
+       return;
+
+   /*
+    * Define (or redefine) custom GUC variables.
+    */
+   DefineCustomIntVariable("pg_stat_statements.max",
+                           "Sets the maximum number of statements tracked by pg_stat_statements.",
+                           NULL,
+                           &pgss_max,
+                           1000,
+                           100,
+                           INT_MAX,
+                           PGC_POSTMASTER,
+                           0,
+                           NULL,
+                           NULL);
+
+   DefineCustomEnumVariable("pg_stat_statements.track",
+                            "Selects which statements are tracked by pg_stat_statements.",
+                            NULL,
+                            &pgss_track,
+                            PGSS_TRACK_TOP,
+                            track_options,
+                            PGC_SUSET,
+                            0,
+                            NULL,
+                            NULL);
+
+   DefineCustomBoolVariable("pg_stat_statements.save",
+                            "Save pg_stat_statements statistics across server shutdowns.",
+                            NULL,
+                            &pgss_save,
+                            true,
+                            PGC_SIGHUP,
+                            0,
+                            NULL,
+                            NULL);
+
+   /*
+    * Request additional shared resources.  (These are no-ops if we're not in
+    * the postmaster process.)  We'll allocate or attach to the shared
+    * resources in pgss_shmem_startup().
+    */
+   RequestAddinShmemSpace(pgss_memsize());
+   RequestAddinLWLocks(1);
+
+   /*
+    * Install hooks.
+    */
+   prev_shmem_startup_hook = shmem_startup_hook;
+   shmem_startup_hook = pgss_shmem_startup;
+   prev_ExecutorStart = ExecutorStart_hook;
+   ExecutorStart_hook = pgss_ExecutorStart;
+   prev_ExecutorRun = ExecutorRun_hook;
+   ExecutorRun_hook = pgss_ExecutorRun;
+   prev_ExecutorEnd = ExecutorEnd_hook;
+   ExecutorEnd_hook = pgss_ExecutorEnd;
+}
+
+/*
+ * Module unload callback
+ */
+void
+_PG_fini(void)
+{
+   /* Uninstall hooks. */
+   ExecutorStart_hook = prev_ExecutorStart;
+   ExecutorRun_hook = prev_ExecutorRun;
+   ExecutorEnd_hook = prev_ExecutorEnd;
+   shmem_startup_hook = prev_shmem_startup_hook;
+}
+
+/*
+ * shmem_startup hook: allocate or attach to shared memory,
+ * then load any pre-existing statistics from file.
+ */
+static void
+pgss_shmem_startup(void)
+{
+    bool       found;
+   HASHCTL     info;
+   FILE       *file;
+   uint32      header;
+   int32       num;
+   int32       i;
+   int         query_size;
+   int         buffer_size;
+   char       *buffer = NULL;
+
+   if (prev_shmem_startup_hook)
+       prev_shmem_startup_hook();
+
+   /* reset in case this is a restart within the postmaster */
+   pgss = NULL;
+   pgss_hash = NULL;
+
+   /*
+    * Create or attach to the shared memory state, including hash table
+    */
+   LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
+
+   pgss = ShmemInitStruct("pg_stat_statements",
+                          sizeof(pgssSharedState),
+                          &found);
+   if (!pgss)
+       elog(ERROR, "out of shared memory");
+
+   if (!found)
+    {
+       /* First time through ... */
+       pgss->lock = LWLockAssign();
+       pgss->query_size = pgstat_track_activity_query_size;
+   }
+
+   /* Be sure everyone agrees on the hash table entry size */
+   query_size = pgss->query_size;
+
+   memset(&info, 0, sizeof(info));
+   info.keysize = sizeof(pgssHashKey);
+   info.entrysize = offsetof(pgssEntry, query) + query_size;
+   info.hash = pgss_hash_fn;
+   info.match = pgss_match_fn;
+   pgss_hash = ShmemInitHash("pg_stat_statements hash",
+                             pgss_max, pgss_max,
+                             &info,
+                             HASH_ELEM | HASH_FUNCTION | HASH_COMPARE);
+   if (!pgss_hash)
+       elog(ERROR, "out of shared memory");
+
+   LWLockRelease(AddinShmemInitLock);
+
+   /*
+    * If we're in the postmaster (or a standalone backend...), set up a
+    * shmem exit hook to dump the statistics to disk.
+    */
+   if (!IsUnderPostmaster)
+       on_shmem_exit(pgss_shmem_shutdown, (Datum) 0);
+
+   /*
+    * Attempt to load old statistics from the dump file.
+    *
+    * Note: we don't bother with locks here, because there should be no
+    * other processes running when this is called.
+    */
+   if (!pgss_save)
+       return;
+
+   file = AllocateFile(PGSS_DUMP_FILE, PG_BINARY_R);
+   if (file == NULL)
+   {
+       if (errno == ENOENT)
+           return;             /* ignore not-found error */
+       goto error;
+   }
+
+   buffer_size = query_size;
+   buffer = (char *) palloc(buffer_size);
+
+   if (fread(&header, sizeof(uint32), 1, file) != 1 ||
+       header != PGSS_FILE_HEADER ||
+       fread(&num, sizeof(int32), 1, file) != 1)
+       goto error;
+
+   for (i = 0; i < num; i++)
+   {
+       pgssEntry   temp;
+       pgssEntry   *entry;
+
+       if (fread(&temp, offsetof(pgssEntry, mutex), 1, file) != 1)
+           goto error;
+
+       /* Encoding is the only field we can easily sanity-check */
+       if (!PG_VALID_BE_ENCODING(temp.key.encoding))
+           goto error;
+
+       /* Previous incarnation might have had a larger query_size */
+       if (temp.key.query_len >= buffer_size)
+       {
+           buffer = (char *) repalloc(buffer, temp.key.query_len + 1);
+           buffer_size = temp.key.query_len + 1;
+       }
+
+       if (fread(buffer, 1, temp.key.query_len, file) != temp.key.query_len)
+           goto error;
+       buffer[temp.key.query_len] = '\0';
+
+       /* Clip to available length if needed */
+       if (temp.key.query_len >= query_size)
+           temp.key.query_len = pg_encoding_mbcliplen(temp.key.encoding,
+                                                      buffer,
+                                                      temp.key.query_len,
+                                                      query_size - 1);
+       temp.key.query_ptr = buffer;
+
+       /* make the hashtable entry (discards old entries if too many) */
+       entry = entry_alloc(&temp.key);
+
+       /* copy in the actual stats */
+       entry->counters = temp.counters;
+   }
+
+   pfree(buffer);
+   FreeFile(file);
+   return;
+
+error:
+   ereport(LOG,
+           (errcode_for_file_access(),
+            errmsg("could not read pg_stat_statement file \"%s\": %m",
+                   PGSS_DUMP_FILE)));
+   if (buffer)
+       pfree(buffer);
+   if (file)
+       FreeFile(file);
+   /* If possible, throw away the bogus file; ignore any error */
+   unlink(PGSS_DUMP_FILE);
+}
+
+/*
+ * shmem_shutdown hook: Dump statistics into file.
+ *
+ * Note: we don't bother with acquiring lock, because there should be no
+ * other processes running when this is called.
+ */
+static void
+pgss_shmem_shutdown(int code, Datum arg)
+{
+   FILE               *file;
+   HASH_SEQ_STATUS     hash_seq;
+   int32               num_entries;
+   pgssEntry          *entry;
+
+   /* Don't try to dump during a crash. */
+   if (code)
+       return;
+
+   /* Safety check ... shouldn't get here unless shmem is set up. */
+   if (!pgss || !pgss_hash)
+       return;
+
+   /* Don't dump if told not to. */
+   if (!pgss_save)
+       return;
+
+   file = AllocateFile(PGSS_DUMP_FILE, PG_BINARY_W);
+   if (file == NULL)
+       goto error;
+
+   if (fwrite(&PGSS_FILE_HEADER, sizeof(uint32), 1, file) != 1)
+       goto error;
+   num_entries = hash_get_num_entries(pgss_hash);
+   if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
+       goto error;
+
+   hash_seq_init(&hash_seq, pgss_hash);
+   while ((entry = hash_seq_search(&hash_seq)) != NULL)
+   {
+       int     len = entry->key.query_len;
+
+       if (fwrite(entry, offsetof(pgssEntry, mutex), 1, file) != 1 ||
+           fwrite(entry->query, 1, len, file) != len)
+           goto error;
+   }
+
+   if (FreeFile(file))
+   {
+       file = NULL;
+       goto error;
+   }
+
+   return;
+
+error:
+   ereport(LOG,
+           (errcode_for_file_access(),
+            errmsg("could not write pg_stat_statement file \"%s\": %m",
+                   PGSS_DUMP_FILE)));
+   if (file)
+       FreeFile(file);
+   unlink(PGSS_DUMP_FILE);
+}
+
+/*
+ * ExecutorStart hook: start up tracking if needed
+ */
+static void
+pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
+{
+   if (prev_ExecutorStart)
+       prev_ExecutorStart(queryDesc, eflags);
+   else
+       standard_ExecutorStart(queryDesc, eflags);
+
+   if (pgss_enabled())
+   {
+       /*
+        * Set up to track total elapsed time in ExecutorRun.  Make sure
+        * the space is allocated in the per-query context so it will go
+        * away at ExecutorEnd.
+        */
+       if (queryDesc->totaltime == NULL)
+       {
+           MemoryContext oldcxt;
+
+           oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
+           queryDesc->totaltime = InstrAlloc(1);
+           MemoryContextSwitchTo(oldcxt);
+       }
+   }
+}
+
+/*
+ * ExecutorRun hook: all we need do is track nesting depth
+ */
+static void
+pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, long count)
+{
+   nested_level++;
+   PG_TRY();
+   {
+       if (prev_ExecutorRun)
+           prev_ExecutorRun(queryDesc, direction, count);
+       else
+           standard_ExecutorRun(queryDesc, direction, count);
+       nested_level--;
+   }
+   PG_CATCH();
+   {
+       nested_level--;
+       PG_RE_THROW();
+   }
+   PG_END_TRY();
+}
+
+/*
+ * ExecutorEnd hook: store results if needed
+ */
+static void
+pgss_ExecutorEnd(QueryDesc *queryDesc)
+{
+   if (queryDesc->totaltime && pgss_enabled())
+   {
+       /*
+        * Make sure stats accumulation is done.  (Note: it's okay if
+        * several levels of hook all do this.)
+        */
+       InstrEndLoop(queryDesc->totaltime);
+
+       pgss_store(queryDesc->sourceText,
+                  queryDesc->totaltime,
+                  queryDesc->estate->es_processed);
+   }
+
+   if (prev_ExecutorEnd)
+       prev_ExecutorEnd(queryDesc);
+   else
+       standard_ExecutorEnd(queryDesc);
+}
+
+/*
+ * Calculate hash value for a key
+ */
+static uint32
+pgss_hash_fn(const void *key, Size keysize)
+{
+   const pgssHashKey *k = (const pgssHashKey *) key;
+
+   /* we don't bother to include encoding in the hash */
+   return hash_uint32((uint32) k->userid) ^
+       hash_uint32((uint32) k->dbid) ^
+       DatumGetUInt32(hash_any((const unsigned char *) k->query_ptr,
+                               k->query_len));
+}
+
+/*
+ * Compare two keys - zero means match
+ */
+static int
+pgss_match_fn(const void *key1, const void *key2, Size keysize)
+{
+   const pgssHashKey *k1 = (const pgssHashKey *) key1;
+   const pgssHashKey *k2 = (const pgssHashKey *) key2;
+
+   if (k1->userid == k2->userid &&
+       k1->dbid == k2->dbid &&
+       k1->encoding == k2->encoding &&
+       k1->query_len == k2->query_len &&
+       memcmp(k1->query_ptr, k2->query_ptr, k1->query_len) == 0)
+       return 0;
+   else
+       return 1;
+}
+
+/*
+ * Store some statistics for a statement.
+ */
+static void
+pgss_store(const char *query, const Instrumentation *instr, uint32 rows)
+{
+   pgssHashKey key;
+   double      usage;
+   pgssEntry   *entry;
+
+   Assert(query != NULL);
+
+   /* Safety check... */
+   if (!pgss || !pgss_hash)
+       return;
+
+   /* Set up key for hashtable search */
+   key.userid = GetUserId();
+   key.dbid = MyDatabaseId;
+   key.encoding = GetDatabaseEncoding();
+   key.query_len = strlen(query);
+   if (key.query_len >= pgss->query_size)
+       key.query_len = pg_encoding_mbcliplen(key.encoding,
+                                             query,
+                                             key.query_len,
+                                             pgss->query_size - 1);
+   key.query_ptr = query;
+
+   usage = USAGE_EXEC(duration);
+
+   /* Lookup the hash table entry with shared lock. */
+   LWLockAcquire(pgss->lock, LW_SHARED);
+
+   entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
+   if (!entry)
+   {
+       /* Must acquire exclusive lock to add a new entry. */
+       LWLockRelease(pgss->lock);
+       LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
+       entry = entry_alloc(&key);
+   }
+
+   /* Grab the spinlock while updating the counters. */
+   {
+       volatile pgssEntry *e = (volatile pgssEntry *) entry;
+
+       SpinLockAcquire(&e->mutex);
+       e->counters.calls += 1;
+       e->counters.total_time += instr->total;
+       e->counters.rows += rows;
+       e->counters.usage += usage;
+       SpinLockRelease(&e->mutex);
+   }
+
+   LWLockRelease(pgss->lock);
+}
+
+/*
+ * Reset all statement statistics.
+ */
+Datum
+pg_stat_statements_reset(PG_FUNCTION_ARGS)
+{
+   if (!pgss || !pgss_hash)
+       ereport(ERROR,
+               (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
+   entry_reset();
+   PG_RETURN_VOID();
+}
+
+#define PG_STAT_STATEMENTS_COLS        6
+
+/*
+ * Retrieve statement statistics.
+ */
+Datum
+pg_stat_statements(PG_FUNCTION_ARGS)
+{
+   ReturnSetInfo      *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+   TupleDesc           tupdesc;
+   Tuplestorestate    *tupstore;
+   MemoryContext       per_query_ctx;
+   MemoryContext       oldcontext;
+   Oid                 userid = GetUserId();
+   bool                is_superuser = superuser();
+   HASH_SEQ_STATUS     hash_seq;
+   pgssEntry          *entry;
+
+   if (!pgss || !pgss_hash)
+       ereport(ERROR,
+               (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
+
+   /* check to see if caller supports us returning a tuplestore */
+   if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
+       ereport(ERROR,
+               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                errmsg("set-valued function called in context that cannot accept a set")));
+   if (!(rsinfo->allowedModes & SFRM_Materialize))
+       ereport(ERROR,
+               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                errmsg("materialize mode required, but it is not " \
+                       "allowed in this context")));
+
+   per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
+   oldcontext = MemoryContextSwitchTo(per_query_ctx);
+
+   tupdesc = CreateTemplateTupleDesc(PG_STAT_STATEMENTS_COLS, false);
+   TupleDescInitEntry(tupdesc, (AttrNumber) 1, "userid",
+                      OIDOID, -1, 0);
+   TupleDescInitEntry(tupdesc, (AttrNumber) 2, "dbid",
+                      OIDOID, -1, 0);
+   TupleDescInitEntry(tupdesc, (AttrNumber) 3, "query",
+                      TEXTOID, -1, 0);
+   TupleDescInitEntry(tupdesc, (AttrNumber) 4, "calls",
+                      INT8OID, -1, 0);
+   TupleDescInitEntry(tupdesc, (AttrNumber) 5, "total_time",
+                      FLOAT8OID, -1, 0);
+   TupleDescInitEntry(tupdesc, (AttrNumber) 6, "rows",
+                      INT8OID, -1, 0);
+
+   tupstore = tuplestore_begin_heap(true, false, work_mem);
+   rsinfo->returnMode = SFRM_Materialize;
+   rsinfo->setResult = tupstore;
+   rsinfo->setDesc = tupdesc;
+
+   LWLockAcquire(pgss->lock, LW_SHARED);
+
+   hash_seq_init(&hash_seq, pgss_hash);
+   while ((entry = hash_seq_search(&hash_seq)) != NULL)
+   {
+       Datum       values[PG_STAT_STATEMENTS_COLS];
+       bool        nulls[PG_STAT_STATEMENTS_COLS];
+       int         i = 0;
+       Counters    tmp;
+
+       /* generate junk in short-term context */
+       MemoryContextSwitchTo(oldcontext);
+
+       memset(values, 0, sizeof(values));
+       memset(nulls, 0, sizeof(nulls));
+
+       values[i++] = ObjectIdGetDatum(entry->key.userid);
+       values[i++] = ObjectIdGetDatum(entry->key.dbid);
+
+       if (is_superuser || entry->key.userid == userid)
+       {
+           char   *qstr;
+
+           qstr = (char *)
+               pg_do_encoding_conversion((unsigned char *) entry->query,
+                                         entry->key.query_len,
+                                         entry->key.encoding,
+                                         GetDatabaseEncoding());
+           values[i++] = CStringGetTextDatum(qstr);
+           if (qstr != entry->query)
+               pfree(qstr);
+       }
+       else
+           values[i++] = CStringGetTextDatum("");
+
+       /* copy counters to a local variable to keep locking time short */
+       {
+           volatile pgssEntry *e = (volatile pgssEntry *) entry;
+
+           SpinLockAcquire(&e->mutex);
+           tmp = e->counters;
+           SpinLockRelease(&e->mutex);
+       }
+
+       values[i++] = Int64GetDatumFast(tmp.calls);
+       values[i++] = Float8GetDatumFast(tmp.total_time);
+       values[i++] = Int64GetDatumFast(tmp.rows);
+
+       Assert(i == PG_STAT_STATEMENTS_COLS);
+
+       /* switch to appropriate context while storing the tuple */
+       MemoryContextSwitchTo(per_query_ctx);
+       tuplestore_putvalues(tupstore, tupdesc, values, nulls);
+   }
+
+   LWLockRelease(pgss->lock);
+
+   /* clean up and return the tuplestore */
+   tuplestore_donestoring(tupstore);
+
+   MemoryContextSwitchTo(oldcontext);
+
+   return (Datum) 0;
+}
+
+/*
+ * Estimate shared memory space needed.
+ */
+static Size
+pgss_memsize(void)
+{
+   Size    size;
+   Size    entrysize;
+
+   size = MAXALIGN(sizeof(pgssSharedState));
+   entrysize = offsetof(pgssEntry, query) + pgstat_track_activity_query_size;
+   size = add_size(size, hash_estimate_size(pgss_max, entrysize));
+
+   return size;
+}
+
+/*
+ * Allocate a new hashtable entry.
+ * caller must hold an exclusive lock on pgss->lock
+ *
+ * Note: despite needing exclusive lock, it's not an error for the target
+ * entry to already exist.  This is because pgss_store releases and
+ * reacquires lock after failing to find a match; so someone else could
+ * have made the entry while we waited to get exclusive lock.
+ */
+static pgssEntry *
+entry_alloc(pgssHashKey *key)
+{
+   pgssEntry  *entry;
+   bool    found;
+
+   /* Caller must have clipped query properly */
+   Assert(key->query_len < pgss->query_size);
+
+   /* Make space if needed */
+   while (hash_get_num_entries(pgss_hash) >= pgss_max)
+       entry_dealloc();
+
+   /* Find or create an entry with desired hash code */
+   entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found);
+
+   if (!found)
+   {
+       /* New entry, initialize it */
+
+       /* dynahash tried to copy the key for us, but must fix query_ptr */
+       entry->key.query_ptr = entry->query;
+       /* reset the statistics */
+       memset(&entry->counters, 0, sizeof(Counters));
+       entry->counters.usage = USAGE_INIT;
+       /* re-initialize the mutex each time ... we assume no one using it */
+       SpinLockInit(&entry->mutex);
+       /* ... and don't forget the query text */
+       memcpy(entry->query, key->query_ptr, key->query_len);
+       entry->query[key->query_len] = '\0';
+   }
+
+   return entry;
+}
+
+/*
+ * qsort comparator for sorting into increasing usage order
+ */
+static int
+entry_cmp(const void *lhs, const void *rhs)
+{
+   double  l_usage = (*(const pgssEntry **)lhs)->counters.usage;
+   double  r_usage = (*(const pgssEntry **)rhs)->counters.usage;
+
+   if (l_usage < r_usage)
+       return -1;
+   else if (l_usage > r_usage)
+       return +1;
+   else
+       return 0;
+}
+
+/*
+ * Deallocate least used entries.
+ * Caller must hold an exclusive lock on pgss->lock.
+ */
+static void
+entry_dealloc(void)
+{
+   HASH_SEQ_STATUS     hash_seq;
+   pgssEntry         **entries;
+   pgssEntry          *entry;
+   int                 nvictims;
+   int                 i;
+
+   /* Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them. */
+
+   entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *));
+
+   i = 0;
+   hash_seq_init(&hash_seq, pgss_hash);
+   while ((entry = hash_seq_search(&hash_seq)) != NULL)
+   {
+       entries[i++] = entry;
+       entry->counters.usage *= USAGE_DECREASE_FACTOR;
+   }
+
+   qsort(entries, i, sizeof(pgssEntry *), entry_cmp);
+   nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
+   nvictims = Min(nvictims, i);
+
+   for (i = 0; i < nvictims; i++)
+   {
+       hash_search(pgss_hash, &entries[i]->key, HASH_REMOVE, NULL);
+   }
+
+   pfree(entries);
+}
+
+/*
+ * Release all entries.
+ */
+static void
+entry_reset(void)
+{
+   HASH_SEQ_STATUS     hash_seq;
+   pgssEntry          *entry;
+
+   LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
+
+   hash_seq_init(&hash_seq, pgss_hash);
+   while ((entry = hash_seq_search(&hash_seq)) != NULL)
+   {
+       hash_search(pgss_hash, &entry->key, HASH_REMOVE, NULL);
+   }
+
+   LWLockRelease(pgss->lock);
+}
diff --git a/contrib/pg_stat_statements/pg_stat_statements.sql.in b/contrib/pg_stat_statements/pg_stat_statements.sql.in
new file mode 100644 (file)
index 0000000..7655136
--- /dev/null
@@ -0,0 +1,31 @@
+/* $PostgreSQL: pgsql/contrib/pg_stat_statements/pg_stat_statements.sql.in,v 1.1 2009/01/04 22:19:59 tgl Exp $ */
+
+-- Adjust this setting to control where the objects get created.
+SET search_path = public;
+
+-- Register functions.
+CREATE FUNCTION pg_stat_statements_reset()
+RETURNS void
+AS 'MODULE_PATHNAME'
+LANGUAGE C;
+
+CREATE FUNCTION pg_stat_statements(
+    OUT userid oid,
+    OUT dbid oid,
+    OUT query text,
+    OUT calls int8,
+    OUT total_time float8,
+    OUT rows int8
+)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME'
+LANGUAGE C;
+
+-- Register a view on the function for ease of use.
+CREATE VIEW pg_stat_statements AS
+  SELECT * FROM pg_stat_statements();
+
+GRANT SELECT ON pg_stat_statements TO PUBLIC;
+
+-- Don't want this to be available to non-superusers.
+REVOKE ALL ON FUNCTION pg_stat_statements_reset() FROM PUBLIC;
diff --git a/contrib/pg_stat_statements/uninstall_pg_stat_statements.sql b/contrib/pg_stat_statements/uninstall_pg_stat_statements.sql
new file mode 100644 (file)
index 0000000..31fd0af
--- /dev/null
@@ -0,0 +1,8 @@
+/* $PostgreSQL: pgsql/contrib/pg_stat_statements/uninstall_pg_stat_statements.sql,v 1.1 2009/01/04 22:19:59 tgl Exp $ */
+
+-- Adjust this setting to control where the objects get dropped.
+SET search_path = public;
+
+DROP VIEW pg_stat_statements;
+DROP FUNCTION pg_stat_statements();
+DROP FUNCTION pg_stat_statements_reset();
index ecc5a0b23e3c8d77985beac990a9597489c0caaa..89fb5314fe61b3bd11e59dd19a5c80fdef87a3fb 100644 (file)
@@ -1,4 +1,4 @@
-
+
 
 
  Additional Supplied Modules
@@ -103,6 +103,7 @@ psql -d dbname -f SHAREDIR/contrib/module.sql
  &pgfreespacemap;
  &pgrowlocks;
  &pgstandby;
+ &pgstatstatements;
  &pgstattuple;
  &pgtrgm;
  &seg;
index ea1c7c274fa7cc44f01fe37fe2222880ab4ff30a..273d5a09799c011c2e036c8a0bc8b2e221ee4f2f 100644 (file)
@@ -1,4 +1,4 @@
-
+
 
 
 
 
 
 
+
 
 
 
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
new file mode 100644 (file)
index 0000000..93cda9f
--- /dev/null
@@ -0,0 +1,265 @@
+
+
+
pg_stat_statements
+
+  pg_stat_statements
+
+  The pg_stat_statements module provides a means for
+  tracking execution statistics of all SQL statements executed by a server.
+
+  The module must be loaded by adding pg_stat_statements to
+   in
+  postgresql.conf, because it requires additional shared memory.
+  This means that a server restart is needed to add or remove the module.
+
+  The <structname>pg_stat_statements</structname> view
+
+  
+   The statistics gathered by the module are made available via a system view
+   named pg_stat_statements.  This view contains one row for
+   each distinct query text, database ID, and user ID (up to the maximum
+   number of distinct statements that the module can track).  The columns
+   of the view are:
+  
+
+  
+   <structname>pg_stat_statements</> columns
+
+   
+    
+     
+      Name
+      Type
+      References
+      Description
+     
+    
+    
+     
+      userid
+      oid
+      pg_authid.oid
+      OID of user who executed the statement
+     
+
+     
+      dbid
+      oid
+      pg_database.oid
+      OID of database in which the statement was executed
+     
+
+    
+      query
+      text
+      
+      Text of the statement (up to  bytes)
+     
+
+     
+      calls
+      bigint
+      
+      Number of times executed
+     
+
+     
+      total_time
+      double precision
+      
+      Total time spent in the statement, in seconds
+     
+
+     
+      rows
+      bigint
+      
+      Total number of rows retrieved or affected by the statement
+     
+
+    
+   
+  
+
+  
+   This view, and the function pg_stat_statements_reset,
+   are available only in databases they have been specifically installed into
+   by running the pg_stat_statements.sql install script.
+   However, statistics are tracked across all databases of the server
+   whenever the pg_stat_statements module is loaded
+   into the server, regardless of presence of the view.
+  
+
+  
+   For security reasons, non-superusers are not allowed to see the text of
+   queries executed by other users.  They can see the statistics, however,
+   if the view has been installed in their database.
+  
+
+  
+   Note that statements are considered the same if they have the same text,
+   regardless of the values of any out-of-line parameters used in the
+   statement.  Using out-of-line parameters will help to group statements
+   together and may make the statistics more useful.
+  
+
+  Functions
+
+  
+   
+    
+     pg_stat_statements_reset() returns void
+    
+
+    
+     
+      pg_stat_statements_reset discards all statistics
+      gathered so far by pg_stat_statements.
+      By default, this function can only be executed by superusers.
+     
+    
+   
+
+  
+
+  Configuration parameters
+
+  
+   
+    
+     pg_stat_statements.max (integer)
+    
+
+    
+     
+      pg_stat_statements.max is the maximum number of
+      statements tracked by the module (i.e., the maximum number of rows
+      in the pg_stat_statements view).  If more distinct
+      statements than that are observed, information about the least-executed
+      statements is discarded.
+      The default value is 1000.
+      This parameter can only be set at server start.
+     
+    
+   
+
+   
+    
+     pg_stat_statements.track (enum)
+    
+
+    
+     
+      pg_stat_statements.track controls which statements
+      are counted by the module.
+      Specify top to track top-level statements (those issued
+      directly by clients), all to also track nested statements
+      (such as statements invoked within functions), or none to
+      disable.
+      The default value is top.
+      Only superusers can change this setting.
+     
+    
+   
+
+   
+    
+     pg_stat_statements.save (boolean)
+    
+
+    
+     
+      pg_stat_statements.save specifies whether to
+      save statement statistics across server shutdowns.
+      If it is off then statistics are not saved at
+      shutdown nor reloaded at server start.
+      The default value is on.
+      This parameter can only be set in the postgresql.conf
+      file or on the server command line.
+     
+    
+   
+  
+
+  
+   The module requires additional shared memory amounting to about
+   pg_stat_statements.max *
+    bytes.  Note that this
+   memory is consumed whenever the module is loaded, even if
+   pg_stat_statements.track is set to none.
+  
+
+  
+   In order to set any of these parameters in your
+   postgresql.conf file,
+   you will need to add pg_stat_statements to
+   .  Typical usage might be:
+  
+
+  
+# postgresql.conf
+shared_preload_libraries = 'pg_stat_statements'
+
+custom_variable_classes = 'pg_stat_statements'
+pg_stat_statements.max = 10000
+pg_stat_statements.track = all
+  
+
+  Sample output
+
+  
+$ pgbench -i bench
+
+postgres=# SELECT pg_stat_statements_reset();
+
+$ pgbench -c10 -t300 -M prepared bench
+
+postgres=# \x
+postgres=# SELECT * FROM pg_stat_statements ORDER BY total_time DESC LIMIT 3;
+-[ RECORD 1 ]------------------------------------------------------------
+userid     | 10
+dbid       | 63781
+query      | UPDATE branches SET bbalance = bbalance + $1 WHERE bid = $2;
+calls      | 3000
+total_time | 20.716706
+rows       | 3000
+-[ RECORD 2 ]------------------------------------------------------------
+userid     | 10
+dbid       | 63781
+query      | UPDATE tellers SET tbalance = tbalance + $1 WHERE tid = $2;
+calls      | 3000
+total_time | 17.1107649999999
+rows       | 3000
+-[ RECORD 3 ]------------------------------------------------------------
+userid     | 10
+dbid       | 63781
+query      | UPDATE accounts SET abalance = abalance + $1 WHERE aid = $2;
+calls      | 3000
+total_time | 0.645601
+rows       | 3000
+  
+
+  Author
+
+  
+   Takahiro Itagaki [email protected]
+  
+
+
index 22bc5b1f3f1080478876f10c7bbdd50b13c4ee34..0d0d23f53aea6c829c4dcebaa31d2f1eea9bdfe2 100644 (file)
@@ -37,7 +37,7 @@
  *
  *
  * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.569 2009/01/03 17:08:38 tgl Exp $
+ *   $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.570 2009/01/04 22:19:59 tgl Exp $
  *
  * NOTES
  *
@@ -2731,7 +2731,7 @@ PostmasterStateMachine(void)
        ereport(LOG,
                (errmsg("all server processes terminated; reinitializing")));
 
-       shmem_exit(0);
+       shmem_exit(1);
        reset_shared(PostPortNumber);
 
        StartupPID = StartupDataBase();
index 5bb4cc597af4b36abea0cbb92fddb3da389d12ab..169821b79e05201a1821dbbf4fff4be15dbc078f 100644 (file)
@@ -5,7 +5,7 @@
  *
  * Copyright (c) 2001-2009, PostgreSQL Global Development Group
  *
- * $PostgreSQL: pgsql/src/include/pgstat.h,v 1.81 2009/01/01 17:23:55 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/pgstat.h,v 1.82 2009/01/04 22:19:59 tgl Exp $
  * ----------
  */
 #ifndef PGSTAT_H
@@ -592,7 +592,7 @@ typedef struct PgStat_FunctionCallUsage
 extern bool pgstat_track_activities;
 extern bool pgstat_track_counts;
 extern int pgstat_track_functions;
-extern int pgstat_track_activity_query_size;
+extern PGDLLIMPORT int pgstat_track_activity_query_size;
 extern char *pgstat_stat_tmpname;
 extern char *pgstat_stat_filename;