ARC strategy backed out ... sorry
authorJan Wieck
Thu, 13 Nov 2003 05:34:58 +0000 (05:34 +0000)
committerJan Wieck
Thu, 13 Nov 2003 05:34:58 +0000 (05:34 +0000)
Jan

src/backend/commands/vacuum.c
src/backend/storage/buffer/buf_init.c
src/backend/storage/buffer/buf_table.c
src/backend/storage/buffer/bufmgr.c
src/backend/storage/buffer/freelist.c
src/backend/utils/misc/guc.c
src/backend/utils/misc/postgresql.conf.sample
src/include/miscadmin.h
src/include/storage/buf_internals.h

index 47564a3002d384f04918932e1c1523ad2969663f..36062d4e307fc1b63941814e8fe3f131aedaf520 100644 (file)
@@ -13,7 +13,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.266 2003/11/13 00:40:00 wieck Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.267 2003/11/13 05:34:57 wieck Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -33,7 +33,6 @@
 #include "commands/vacuum.h"
 #include "executor/executor.h"
 #include "miscadmin.h"
-#include "storage/buf_internals.h"
 #include "storage/freespace.h"
 #include "storage/sinval.h"
 #include "storage/smgr.h"
@@ -311,16 +310,8 @@ vacuum(VacuumStmt *vacstmt)
            else
                old_context = MemoryContextSwitchTo(anl_context);
 
-           /*
-            * Tell the buffer replacement strategy that vacuum is
-            * causing the IO
-            */
-           StrategyHintVacuum(true);
-
            analyze_rel(relid, vacstmt);
 
-           StrategyHintVacuum(false);
-
            if (vacstmt->vacuum)
                CommitTransactionCommand();
            else
@@ -758,12 +749,6 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, char expected_relkind)
    SetQuerySnapshot();         /* might be needed for functions in
                                 * indexes */
 
-   /*
-    * Tell the cache replacement strategy that vacuum is causing
-    * all following IO
-    */
-   StrategyHintVacuum(true);
-
    /*
     * Check for user-requested abort.  Note we want this to be inside a
     * transaction, so xact.c doesn't issue useless WARNING.
@@ -778,7 +763,6 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, char expected_relkind)
                              ObjectIdGetDatum(relid),
                              0, 0, 0))
    {
-       StrategyHintVacuum(false);
        CommitTransactionCommand();
        return true;            /* okay 'cause no data there */
    }
@@ -812,7 +796,6 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, char expected_relkind)
                (errmsg("skipping \"%s\" --- only table or database owner can vacuum it",
                        RelationGetRelationName(onerel))));
        relation_close(onerel, lmode);
-       StrategyHintVacuum(false);
        CommitTransactionCommand();
        return false;
    }
@@ -827,7 +810,6 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, char expected_relkind)
                (errmsg("skipping \"%s\" --- cannot vacuum indexes, views, or special system tables",
                        RelationGetRelationName(onerel))));
        relation_close(onerel, lmode);
-       StrategyHintVacuum(false);
        CommitTransactionCommand();
        return false;
    }
@@ -842,7 +824,6 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, char expected_relkind)
    if (isOtherTempNamespace(RelationGetNamespace(onerel)))
    {
        relation_close(onerel, lmode);
-       StrategyHintVacuum(false);
        CommitTransactionCommand();
        return true;            /* assume no long-lived data in temp
                                 * tables */
@@ -882,7 +863,6 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, char expected_relkind)
    /*
     * Complete the transaction and free all temporary memory used.
     */
-   StrategyHintVacuum(false);
    CommitTransactionCommand();
 
    /*
index 616338c60c6980c91ee2f10d7d453414baa94d28..79683b725c2a118c3382954773ebc6c70eb04049 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_init.c,v 1.55 2003/11/13 00:40:01 wieck Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_init.c,v 1.56 2003/11/13 05:34:58 wieck Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -48,6 +48,9 @@ long     *CurTraceBuf;
 int            ShowPinTrace = 0;
 
 int            Data_Descriptors;
+int            Free_List_Descriptor;
+int            Lookup_List_Descriptor;
+int            Num_Descriptors;
 
 BufferDesc *BufferDescriptors;
 Block     *BufferBlockPointers;
@@ -130,6 +133,9 @@ InitBufferPool(void)
    int         i;
 
    Data_Descriptors = NBuffers;
+   Free_List_Descriptor = Data_Descriptors;
+   Lookup_List_Descriptor = Data_Descriptors + 1;
+   Num_Descriptors = Data_Descriptors + 1;
 
    /*
     * It's probably not really necessary to grab the lock --- if there's
@@ -150,7 +156,7 @@ InitBufferPool(void)
 
    BufferDescriptors = (BufferDesc *)
        ShmemInitStruct("Buffer Descriptors",
-                     Data_Descriptors * sizeof(BufferDesc), &foundDescs);
+                     Num_Descriptors * sizeof(BufferDesc), &foundDescs);
 
    BufferBlocks = (char *)
        ShmemInitStruct("Buffer Blocks",
@@ -170,14 +176,16 @@ InitBufferPool(void)
        block = BufferBlocks;
 
        /*
-        * link the buffers into a single linked list. This will become the
-        * LiFo list of unused buffers returned by StragegyGetBuffer().
+        * link the buffers into a circular, doubly-linked list to
+        * initialize free list, and initialize the buffer headers. Still
+        * don't know anything about replacement strategy in this file.
         */
        for (i = 0; i < Data_Descriptors; block += BLCKSZ, buf++, i++)
        {
            Assert(ShmemIsValid((unsigned long) block));
 
-           buf->bufNext = i + 1;
+           buf->freeNext = i + 1;
+           buf->freePrev = i - 1;
 
            CLEAR_BUFFERTAG(&(buf->tag));
            buf->buf_id = i;
@@ -191,12 +199,14 @@ InitBufferPool(void)
            buf->wait_backend_id = 0;
        }
 
-       /* Correct last entry */
-       BufferDescriptors[Data_Descriptors - 1].bufNext = -1;
+       /* close the circular queue */
+       BufferDescriptors[0].freePrev = Data_Descriptors - 1;
+       BufferDescriptors[Data_Descriptors - 1].freeNext = 0;
    }
 
    /* Init other shared buffer-management stuff */
-   StrategyInitialize(!foundDescs);
+   InitBufTable();
+   InitFreeList(!foundDescs);
 
    LWLockRelease(BufMgrLock);
 }
index a2318a29f36e0e97c3e8903d85baf89cbfe1d6d2..95db86c5348fb682fd8645289c2a910b6a1168b7 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_table.c,v 1.30 2003/11/13 00:40:01 wieck Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_table.c,v 1.31 2003/11/13 05:34:58 wieck Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -38,7 +38,7 @@ static HTAB *SharedBufHash;
  * Initialize shmem hash table for mapping buffers
  */
 void
-InitBufTable(int size)
+InitBufTable(void)
 {
    HASHCTL     info;
 
@@ -50,7 +50,7 @@ InitBufTable(int size)
    info.hash = tag_hash;
 
    SharedBufHash = ShmemInitHash("Shared Buffer Lookup Table",
-                                 size, size,
+                                 NBuffers, NBuffers,
                                  &info,
                                  HASH_ELEM | HASH_FUNCTION);
 
@@ -58,63 +58,79 @@ InitBufTable(int size)
        elog(FATAL, "could not initialize shared buffer hash table");
 }
 
-/*
- * BufTableLookup
- */
-int
+BufferDesc *
 BufTableLookup(BufferTag *tagPtr)
 {
    BufferLookupEnt *result;
 
    if (tagPtr->blockNum == P_NEW)
-       return -1;
+       return NULL;
 
    result = (BufferLookupEnt *)
        hash_search(SharedBufHash, (void *) tagPtr, HASH_FIND, NULL);
    if (!result)
-       return -1;
+       return NULL;
 
-   return result->id;
+   return &(BufferDescriptors[result->id]);
 }
 
 /*
  * BufTableDelete
  */
 bool
-BufTableInsert(BufferTag *tagPtr, Buffer buf_id)
+BufTableDelete(BufferDesc *buf)
 {
    BufferLookupEnt *result;
-   bool        found;
 
-   result = (BufferLookupEnt *)
-       hash_search(SharedBufHash, (void *) tagPtr, HASH_ENTER, &found);
+   /*
+    * buffer not initialized or has been removed from table already.
+    * BM_DELETED keeps us from removing buffer twice.
+    */
+   if (buf->flags & BM_DELETED)
+       return TRUE;
 
-   if (!result)
-       ereport(ERROR,
-               (errcode(ERRCODE_OUT_OF_MEMORY),
-                errmsg("out of shared memory")));
+   buf->flags |= BM_DELETED;
 
-   if (found)                  /* found something else in the table? */
+   result = (BufferLookupEnt *)
+       hash_search(SharedBufHash, (void *) &(buf->tag), HASH_REMOVE, NULL);
+
+   if (!result)                /* shouldn't happen */
        elog(ERROR, "shared buffer hash table corrupted");
 
-   result->id = buf_id;
+   /*
+    * Clear the buffer's tag.  This doesn't matter for the hash table,
+    * since the buffer is already removed from it, but it ensures that
+    * sequential searches through the buffer table won't think the buffer
+    * is still valid for its old page.
+    */
+   buf->tag.rnode.relNode = InvalidOid;
+   buf->tag.rnode.tblNode = InvalidOid;
+
    return TRUE;
 }
 
-/*
- * BufTableDelete
- */
 bool
-BufTableDelete(BufferTag *tagPtr)
+BufTableInsert(BufferDesc *buf)
 {
    BufferLookupEnt *result;
+   bool        found;
+
+   /* cannot insert it twice */
+   Assert(buf->flags & BM_DELETED);
+   buf->flags &= ~(BM_DELETED);
 
    result = (BufferLookupEnt *)
-       hash_search(SharedBufHash, (void *) tagPtr, HASH_REMOVE, NULL);
+       hash_search(SharedBufHash, (void *) &(buf->tag), HASH_ENTER, &found);
 
-   if (!result)                /* shouldn't happen */
+   if (!result)
+       ereport(ERROR,
+               (errcode(ERRCODE_OUT_OF_MEMORY),
+                errmsg("out of shared memory")));
+
+   if (found)                  /* found something else in the table? */
        elog(ERROR, "shared buffer hash table corrupted");
 
+   result->id = buf->buf_id;
    return TRUE;
 }
 
index f10ff7e5b744c8527802e02eef0bd3ed1e1ad6dc..5613966616e6b240fc18d5c6702d81566b9e345f 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.142 2003/11/13 00:40:01 wieck Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.143 2003/11/13 05:34:58 wieck Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -260,8 +260,12 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum,
    if (status == SM_FAIL)
    {
        /* IO Failed.  cleanup the data structures and go home */
-       StrategyInvalidateBuffer(bufHdr);
 
+       if (!BufTableDelete(bufHdr))
+       {
+           LWLockRelease(BufMgrLock);
+           elog(FATAL, "buffer table broken after I/O error");
+       }
        /* remember that BufferAlloc() pinned the buffer */
        UnpinBuffer(bufHdr);
 
@@ -314,7 +318,7 @@ BufferAlloc(Relation reln,
    INIT_BUFFERTAG(&newTag, reln, blockNum);
 
    /* see if the block is in the buffer pool already */
-   buf = StrategyBufferLookup(&newTag, false);
+   buf = BufTableLookup(&newTag);
    if (buf != NULL)
    {
        /*
@@ -375,7 +379,7 @@ BufferAlloc(Relation reln,
    inProgress = FALSE;
    for (buf = (BufferDesc *) NULL; buf == (BufferDesc *) NULL;)
    {
-       buf = StrategyGetBuffer();
+       buf = GetFreeBuffer();
 
        /* GetFreeBuffer will abort if it can't find a free buffer */
        Assert(buf);
@@ -488,7 +492,7 @@ BufferAlloc(Relation reln,
             * we haven't gotten around to insert the new tag into the
             * buffer table. So we need to check here.      -ay 3/95
             */
-           buf2 = StrategyBufferLookup(&newTag, true);
+           buf2 = BufTableLookup(&newTag);
            if (buf2 != NULL)
            {
                /*
@@ -531,12 +535,29 @@ BufferAlloc(Relation reln,
     */
 
    /*
-    * Tell the buffer replacement strategy that we are replacing the
-    * buffer content. Then rename the buffer.
+    * Change the name of the buffer in the lookup table:
+    *
+    * Need to update the lookup table before the read starts. If someone
+    * comes along looking for the buffer while we are reading it in, we
+    * don't want them to allocate a new buffer.  For the same reason, we
+    * didn't want to erase the buf table entry for the buffer we were
+    * writing back until now, either.
     */
-   StrategyReplaceBuffer(buf, reln, blockNum);
+
+   if (!BufTableDelete(buf))
+   {
+       LWLockRelease(BufMgrLock);
+       elog(FATAL, "buffer wasn't in the buffer hash table");
+   }
+
    INIT_BUFFERTAG(&(buf->tag), reln, blockNum);
 
+   if (!BufTableInsert(buf))
+   {
+       LWLockRelease(BufMgrLock);
+       elog(FATAL, "buffer in buffer hash table twice");
+   }
+
    /*
     * Buffer contents are currently invalid.  Have to mark IO IN PROGRESS
     * so no one fiddles with them until the read completes.  If this
@@ -688,28 +709,13 @@ BufferSync(void)
    BufferDesc *bufHdr;
    ErrorContextCallback errcontext;
 
-   int         num_buffer_dirty;
-   int        *buffer_dirty;
-
    /* Setup error traceback support for ereport() */
    errcontext.callback = buffer_write_error_callback;
    errcontext.arg = NULL;
    errcontext.previous = error_context_stack;
    error_context_stack = &errcontext;
 
-   /*
-    * Get a list of all currently dirty buffers and how many there are.
-    * We do not flush buffers that get dirtied after we started. They
-    * have to wait until the next checkpoint.
-    */
-   buffer_dirty = (int *)palloc(NBuffers * sizeof(int));
-   num_buffer_dirty = 0;
-
-   LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
-   num_buffer_dirty = StrategyDirtyBufferList(buffer_dirty, NBuffers);
-   LWLockRelease(BufMgrLock);
-
-   for (i = 0; i < num_buffer_dirty; i++)
+   for (i = 0, bufHdr = BufferDescriptors; i < NBuffers; i++, bufHdr++)
    {
        Buffer      buffer;
        int         status;
@@ -717,11 +723,10 @@ BufferSync(void)
        XLogRecPtr  recptr;
        Relation    reln;
 
-       LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
-
-       bufHdr = &BufferDescriptors[buffer_dirty[i]];
        errcontext.arg = bufHdr;
 
+       LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
+
        if (!(bufHdr->flags & BM_VALID))
        {
            LWLockRelease(BufMgrLock);
@@ -850,8 +855,6 @@ BufferSync(void)
            RelationDecrementReferenceCount(reln);
    }
 
-   pfree(buffer_dirty);
-
    /* Pop the error context stack */
    error_context_stack = errcontext.previous;
 }
@@ -956,9 +959,9 @@ AtEOXact_Buffers(bool isCommit)
 
            if (isCommit)
                elog(WARNING,
-               "buffer refcount leak: [%03d] (bufNext=%d, "
+               "buffer refcount leak: [%03d] (freeNext=%d, freePrev=%d, "
                  "rel=%u/%u, blockNum=%u, flags=0x%x, refcount=%d %ld)",
-                    i, buf->bufNext,
+                    i, buf->freeNext, buf->freePrev,
                     buf->tag.rnode.tblNode, buf->tag.rnode.relNode,
                     buf->tag.blockNum, buf->flags,
                     buf->refcount, PrivateRefCount[i]);
@@ -1226,7 +1229,7 @@ recheck:
            /*
             * And mark the buffer as no longer occupied by this rel.
             */
-           StrategyInvalidateBuffer(bufHdr);
+           BufTableDelete(bufHdr);
        }
    }
 
@@ -1292,7 +1295,7 @@ recheck:
            /*
             * And mark the buffer as no longer occupied by this page.
             */
-           StrategyInvalidateBuffer(bufHdr);
+           BufTableDelete(bufHdr);
        }
    }
 
@@ -1540,7 +1543,7 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
                return -2;
            }
            if (bufHdr->tag.blockNum >= firstDelBlock)
-               StrategyInvalidateBuffer(bufHdr);
+               BufTableDelete(bufHdr);
        }
    }
 
index 9e340b47209e26a80ce1d2b22b1f047de495010c..ac8931752d105f83871a4c4c145d136238b57e54 100644 (file)
@@ -1,14 +1,15 @@
 /*-------------------------------------------------------------------------
  *
  * freelist.c
- *   routines for manipulating the buffer pool's replacement strategy.
+ *   routines for manipulating the buffer pool's replacement strategy
+ *   freelist.
  *
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/storage/buffer/freelist.c,v 1.32 2003/11/13 00:40:01 wieck Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/storage/buffer/freelist.c,v 1.33 2003/11/13 05:34:58 wieck Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "storage/bufmgr.h"
 #include "storage/ipc.h"
 #include "storage/proc.h"
-#include "access/xact.h"
 
-#define STRAT_LIST_UNUSED  -1
-#define STRAT_LIST_B1      0
-#define STRAT_LIST_T1      1
-#define STRAT_LIST_T2      2
-#define STRAT_LIST_B2      3
-#define STRAT_NUM_LISTS        4
-
-#ifndef MAX
-#define MAX(a,b) (((a) > (b)) ? (a) : (b))
-#endif
-#ifndef MIN
-#define MIN(a,b) (((a) < (b)) ? (a) : (b))
-#endif
-
-/*
- * The Cache Directory Block (CDB) of the Adaptive Replacement Cache (ARC)
- */
-typedef struct bufstratcdb
-{
-   int             prev;       /* links in the queue */
-   int             next;
-   int             list;       /* current list */
-   BufferTag       buf_tag;    /* buffer key */
-   Buffer          buf_id;     /* currently assigned data buffer */
-   TransactionId   t1_xid;     /* the xid this entry went onto T1 */
-} BufferStrategyCDB;
-
-/*
- * The shared ARC control information.
- */
-typedef struct bufstratcontrol
-{
-
-   int     target_T1_size;             /* What T1 size are we aiming for */
-   int     listUnusedCDB;              /* All unused StrategyCDB */
-   int     listHead[STRAT_NUM_LISTS];  /* ARC lists B1, T1, T2 and B2 */
-   int     listTail[STRAT_NUM_LISTS];
-   int     listSize[STRAT_NUM_LISTS];
-   Buffer  listFreeBuffers;            /* List of unused buffers */
-
-   long    num_lookup;                 /* Some hit statistics */
-   long    num_hit[STRAT_NUM_LISTS];
-   time_t  stat_report;
-
-   BufferStrategyCDB   cdb[1];         /* The cache directory */
-} BufferStrategyControl;
-
-static BufferStrategyControl   *StrategyControl = NULL;
-static BufferStrategyCDB       *StrategyCDB = NULL;
-
-static int     strategy_cdb_found;
-static int     strategy_cdb_replace;
-static int     strategy_get_from;
-
-int                BufferStrategyStatInterval = 0;
-
-static bool                strategy_hint_vacuum;
-static TransactionId   strategy_vacuum_xid;
-
-
-#define T1_TARGET  StrategyControl->target_T1_size
-#define B1_LENGTH  StrategyControl->listSize[STRAT_LIST_B1]
-#define T1_LENGTH  StrategyControl->listSize[STRAT_LIST_T1]
-#define T2_LENGTH  StrategyControl->listSize[STRAT_LIST_T2]
-#define B2_LENGTH  StrategyControl->listSize[STRAT_LIST_B2]
 
+static BufferDesc *SharedFreeList;
 
 /*
- * Macro to remove a CDB from whichever list it currently is on
+ * State-checking macros
  */
-#define    STRAT_LIST_REMOVE(cdb) \
-{ \
-   AssertMacro((cdb)->list >= 0 && (cdb)->list < STRAT_NUM_LISTS);     \
-   if ((cdb)->prev < 0)                                                \
-       StrategyControl->listHead[(cdb)->list] = (cdb)->next;           \
-   else                                                                \
-       StrategyCDB[(cdb)->prev].next = (cdb)->next;                    \
-   if ((cdb)->next < 0)                                                \
-       StrategyControl->listTail[(cdb)->list] = (cdb)->prev;           \
-   else                                                                \
-       StrategyCDB[(cdb)->next].prev = (cdb)->prev;                    \
-   StrategyControl->listSize[(cdb)->list]--;                           \
-   (cdb)->list = STRAT_LIST_UNUSED;                                    \
-}
 
-/*
- * Macro to add a CDB to the tail of a list (MRU position)
- */
-#define STRAT_MRU_INSERT(cdb,l) \
-{ \
-   AssertMacro((cdb)->list == STRAT_LIST_UNUSED);                      \
-   if (StrategyControl->listTail[(l)] < 0)                             \
-   {                                                                   \
-       (cdb)->prev = (cdb)->next = -1;                                 \
-       StrategyControl->listHead[(l)] =                                \
-           StrategyControl->listTail[(l)] =                            \
-           ((cdb) - StrategyCDB);                                      \
-   }                                                                   \
-   else                                                                \
-   {                                                                   \
-       (cdb)->next = -1;                                               \
-       (cdb)->prev = StrategyControl->listTail[(l)];                   \
-       StrategyCDB[StrategyControl->listTail[(l)]].next =              \
-           ((cdb) - StrategyCDB);                                      \
-       StrategyControl->listTail[(l)] =                                \
-           ((cdb) - StrategyCDB);                                      \
-   }                                                                   \
-   StrategyControl->listSize[(l)]++;                                   \
-   (cdb)->list = (l);                                                  \
-}
+#define IsInQueue(bf) \
+( \
+   AssertMacro((bf->freeNext != INVALID_DESCRIPTOR)), \
+   AssertMacro((bf->freePrev != INVALID_DESCRIPTOR)), \
+   AssertMacro((bf->flags & BM_FREE)) \
+)
 
-/*
- * Macro to add a CDB to the head of a list (LRU position)
- */
-#define STRAT_LRU_INSERT(cdb,l) \
-{ \
-   AssertMacro((cdb)->list == STRAT_LIST_UNUSED);                      \
-   if (StrategyControl->listHead[(l)] < 0)                             \
-   {                                                                   \
-       (cdb)->prev = (cdb)->next = -1;                                 \
-       StrategyControl->listHead[(l)] =                                \
-           StrategyControl->listTail[(l)] =                            \
-           ((cdb) - StrategyCDB);                                      \
-   }                                                                   \
-   else                                                                \
-   {                                                                   \
-       (cdb)->prev = -1;                                               \
-       (cdb)->next = StrategyControl->listHead[(l)];                   \
-       StrategyCDB[StrategyControl->listHead[(l)]].prev =              \
-           ((cdb) - StrategyCDB);                                      \
-       StrategyControl->listHead[(l)] =                                \
-           ((cdb) - StrategyCDB);                                      \
-   }                                                                   \
-   StrategyControl->listSize[(l)]++;                                   \
-   (cdb)->list = (l);                                                  \
-}
+#define IsNotInQueue(bf) \
+( \
+   AssertMacro((bf->freeNext == INVALID_DESCRIPTOR)), \
+   AssertMacro((bf->freePrev == INVALID_DESCRIPTOR)), \
+   AssertMacro(! (bf->flags & BM_FREE)) \
+)
 
 
 /*
- * StrategyBufferLookup
+ * AddBufferToFreelist
  *
- * Lookup a page request in the cache directory. A buffer is only
- * returned for a T1 or T2 cache hit. B1 and B2 hits are only
- * remembered here to later affect the behaviour.
+ * In theory, this is the only routine that needs to be changed
+ * if the buffer replacement strategy changes. Just change
+ * the manner in which buffers are added to the freelist queue.
+ * Currently, they are added on an LRU basis.
  */
-BufferDesc *
-StrategyBufferLookup(BufferTag *tagPtr, bool recheck)
-{
-   BufferStrategyCDB  *cdb;
-   time_t              now;
-
-   if (BufferStrategyStatInterval > 0)
-   {
-       time(&now);
-       if (StrategyControl->stat_report + BufferStrategyStatInterval < now)
-       {
-           long    all_hit, b1_hit, t1_hit, t2_hit, b2_hit;
-           ErrorContextCallback    *errcxtold;
-
-           if (StrategyControl->num_lookup == 0)
-           {
-               all_hit = b1_hit = t1_hit = t2_hit = b2_hit = 0;
-           }
-           else
-           {
-               b1_hit = (StrategyControl->num_hit[STRAT_LIST_B1] * 100 /
-                         StrategyControl->num_lookup);
-               t1_hit = (StrategyControl->num_hit[STRAT_LIST_T1] * 100 /
-                         StrategyControl->num_lookup);
-               t2_hit = (StrategyControl->num_hit[STRAT_LIST_T2] * 100 /
-                         StrategyControl->num_lookup);
-               b2_hit = (StrategyControl->num_hit[STRAT_LIST_B2] * 100 /
-                         StrategyControl->num_lookup);
-               all_hit = b1_hit + t1_hit + t2_hit + b2_hit;
-           }
-
-           errcxtold = error_context_stack;
-           error_context_stack = NULL;
-           elog(DEBUG1, "ARC T1target=%5d B1len=%5d T1len=%5d T2len=%5d B2len=%5d",
-                   T1_TARGET, B1_LENGTH, T1_LENGTH, T2_LENGTH, B2_LENGTH);
-           elog(DEBUG1, "ARC total   =%4ld%% B1hit=%4ld%% T1hit=%4ld%% T2hit=%4ld%% B2hit=%4ld%%",
-                   all_hit, b1_hit, t1_hit, t2_hit, b2_hit);
-           error_context_stack = errcxtold;
-
-           StrategyControl->num_lookup = 0;
-           StrategyControl->num_hit[STRAT_LIST_B1] = 0;
-           StrategyControl->num_hit[STRAT_LIST_T1] = 0;
-           StrategyControl->num_hit[STRAT_LIST_T2] = 0;
-           StrategyControl->num_hit[STRAT_LIST_B2] = 0;
-           StrategyControl->stat_report = now;
-       }
-   }
-
-   /*
-    * Count lookups
-    */
-   StrategyControl->num_lookup++;
-
-   /*
-    * Lookup the block in the shared hash table
-    */
-   strategy_cdb_found = BufTableLookup(tagPtr);
-
-   /*
-    * Handle CDB lookup miss
-    */
-   if (strategy_cdb_found < 0)
-   {
-       if (!recheck)
-       {
-           /*
-            * This is an initial lookup and we have a complete
-            * cache miss (block found nowhere). This means we
-            * remember according to the current T1 size and the
-            * target T1 size from where we take a block if we
-            * need one later.
-            */
-           if (T1_LENGTH >= MAX(1, T1_TARGET))
-               strategy_get_from = STRAT_LIST_T1;
-           else
-               strategy_get_from = STRAT_LIST_T2;
-       }
-
-       /* report cache miss */
-       return NULL;
-   }
-
-   /*
-    * We found a CDB
-    */
-   cdb = &StrategyCDB[strategy_cdb_found];
-
-   /*
-    * Count hits
-    */
-   StrategyControl->num_hit[cdb->list]++;
-
-   /*
-    * If this is a T2 hit, we simply move the CDB to the
-    * T2 MRU position and return the found buffer.
-    */
-   if (cdb->list == STRAT_LIST_T2)
-   {
-       STRAT_LIST_REMOVE(cdb);
-       STRAT_MRU_INSERT(cdb, STRAT_LIST_T2);
-
-       return &BufferDescriptors[cdb->buf_id];
-   }
-
-   /*
-    * If this is a T1 hit, we move the buffer to the T2 MRU
-    * only if another transaction had read it into T1. This is
-    * required because any UPDATE or DELETE in PostgreSQL does
-    * multiple ReadBuffer(), first during the scan, later during
-    * the heap_update() or heap_delete().
-    */
-   if (cdb->list == STRAT_LIST_T1)
-   {
-       if (!TransactionIdIsCurrentTransactionId(cdb->t1_xid))
-       {
-           STRAT_LIST_REMOVE(cdb);
-           STRAT_MRU_INSERT(cdb, STRAT_LIST_T2);
-       }
-
-       return &BufferDescriptors[cdb->buf_id];
-   }
-
-   /*
-    * In the case of a recheck we don't care about B1 or B2 hits here.
-    * The bufmgr does this call only to make sure noone faulted in the
-    * block while we where busy flushing another. Now for this really
-    * to end up as a B1 or B2 cache hit, we must have been flushing for
-    * quite some time as the block not only must have been read, but
-    * also traveled through the queue and evicted from the T cache again
-    * already. 
-    */
-   if (recheck)
-       return NULL;
-
-   /*
-    * Adjust the target size of the T1 cache depending on if this is
-    * a B1 or B2 hit.
-    */
-   switch (cdb->list)
-   {
-       case STRAT_LIST_B1:
-           /*
-            * B1 hit means that the T1 cache is probably too
-            * small. Adjust the T1 target size and continue
-            * below.
-            */
-           T1_TARGET = MIN(T1_TARGET + MAX(B2_LENGTH / B1_LENGTH, 1),
-                           Data_Descriptors);
-           break;
-
-       case STRAT_LIST_B2:
-           /* 
-            * B2 hit means that the T2 cache is probably too
-            * small. Adjust the T1 target size and continue
-            * below.
- */
-           T1_TARGET = MAX(T1_TARGET - MAX(B1_LENGTH / B2_LENGTH, 1), 0);
-           break;
-
-       default:
-           elog(ERROR, "Buffer hash table corrupted - CDB on list %d found",
-                   cdb->list);
-   }
-
-   /*
-    * Decide where to take from if we will be out of
-    * free blocks later in StrategyGetBuffer().
-    */
-   if (T1_LENGTH >= MAX(1, T1_TARGET))
-       strategy_get_from = STRAT_LIST_T1;
-   else
-       strategy_get_from = STRAT_LIST_T2;
-
-   /*
-    * Even if we had seen the block in the past, it's data is
-    * not currently in memory ... cache miss to the bufmgr.
-    */
-   return NULL;
-}
-
-
-/*
- * StrategyGetBuffer
- *
- * Called by the bufmgr to get the next candidate buffer to use in
- * BufferAlloc(). The only hard requirement BufferAlloc() has is that
- * this buffer must not currently be pinned. 
- */
-BufferDesc *
-StrategyGetBuffer(void)
-{
-   int             cdb_id;
-   BufferDesc     *buf;
-
-   if (StrategyControl->listFreeBuffers < 0)
-   {
-       /* We don't have a free buffer, must take one from T1 or T2 */
-
-       if (strategy_get_from == STRAT_LIST_T1)
-       {
-           /*
-            * We should take the first unpinned buffer from T1.
-            */
-           cdb_id = StrategyControl->listHead[STRAT_LIST_T1];
-           while (cdb_id >= 0)
-           {
-               buf = &BufferDescriptors[StrategyCDB[cdb_id].buf_id];
-               if (buf->refcount == 0)
-               {
-                   strategy_cdb_replace = cdb_id;
-                   Assert(StrategyCDB[cdb_id].list == STRAT_LIST_T1);
-                   return buf;
-               }
-               cdb_id = StrategyCDB[cdb_id].next;
-           }
-
-           /*
-            * No unpinned T1 buffer found - pardon T2 cache.
-            */
-           cdb_id = StrategyControl->listHead[STRAT_LIST_T2];
-           while (cdb_id >= 0)
-           {
-               buf = &BufferDescriptors[StrategyCDB[cdb_id].buf_id];
-               if (buf->refcount == 0)
-               {
-                   strategy_cdb_replace = cdb_id;
-                   Assert(StrategyCDB[cdb_id].list == STRAT_LIST_T2);
-                   return buf;
-               }
-               cdb_id = StrategyCDB[cdb_id].next;
-           }
-
-           /*
-            * No unpinned buffers at all!!!
-            */
-           elog(ERROR, "StrategyGetBuffer(): Out of unpinned buffers");
-       }
-       else
-       {
-           /*
-            * We should take the first unpinned buffer from T2.
-            */
-           cdb_id = StrategyControl->listHead[STRAT_LIST_T2];
-           while (cdb_id >= 0)
-           {
-               buf = &BufferDescriptors[StrategyCDB[cdb_id].buf_id];
-               if (buf->refcount == 0)
-               {
-                   strategy_cdb_replace = cdb_id;
-                   Assert(StrategyCDB[cdb_id].list == STRAT_LIST_T2);
-                   return buf;
-               }
-               cdb_id = StrategyCDB[cdb_id].next;
-           }
-
-           /*
-            * No unpinned T2 buffer found - pardon T1 cache.
-            */
-           cdb_id = StrategyControl->listHead[STRAT_LIST_T1];
-           while (cdb_id >= 0)
-           {
-               buf = &BufferDescriptors[StrategyCDB[cdb_id].buf_id];
-               if (buf->refcount == 0)
-               {
-                   strategy_cdb_replace = cdb_id;
-                   Assert(StrategyCDB[cdb_id].list == STRAT_LIST_T1);
-                   return buf;
-               }
-               cdb_id = StrategyCDB[cdb_id].next;
-           }
-
-           /*
-            * No unpinned buffers at all!!!
-            */
-           elog(ERROR, "StrategyGetBuffer(): Out of unpinned buffers");
-       }
-   }
-   else
-   {
-       /* There is a completely free buffer available - take it */
-
-       /*
-        * Note: This code uses the side effect that a free buffer
-        * can never be pinned or dirty and therefore the call to
-        * StrategyReplaceBuffer() will happen without the bufmgr
-        * releasing the bufmgr-lock in the meantime. That means,
-        * that there will never be any reason to recheck. Otherwise
-        * we would leak shared buffers here!
-        */
-       strategy_cdb_replace = -1;
-       buf = &BufferDescriptors[StrategyControl->listFreeBuffers];
-
-       StrategyControl->listFreeBuffers = buf->bufNext;
-       buf->bufNext = -1;
-
-       /* Buffer of freelist cannot be pinned */
-       Assert(buf->refcount == 0);
-
-       return buf;
-   }
-
-   /* not reached */
-   return NULL;
-}
-
-
-/*
- * StrategyReplaceBuffer
- *
- * Called by the buffer manager to inform us that he possibly flushed
- *     a buffer and is now about to replace the content. Prior to this call,
- * the cache algorithm still reports the buffer as in the cache. After
- * this call we report the new block, even if IO might still need to
- * start.
- */
-void
-StrategyReplaceBuffer(BufferDesc *buf, Relation rnode, BlockNumber blockNum)
-{
-   BufferStrategyCDB      *cdb_found;
-   BufferStrategyCDB      *cdb_replace;
-
-   if (strategy_cdb_found >= 0)
-   {
-       /* This was a ghost buffer cache hit (B1 or B2) */
-       cdb_found = &StrategyCDB[strategy_cdb_found];
-
-       /* Assert that the buffer remembered in cdb_found is the one */
-       /* the buffer manager is currently faulting in */
-       Assert(BUFFERTAG_EQUALS(&(cdb_found->buf_tag), rnode, blockNum));
-       
-       if (strategy_cdb_replace >= 0)
-       {
-           /* We are satisfying it with an evicted T buffer */
-           cdb_replace = &StrategyCDB[strategy_cdb_replace];
-
-           /* Assert that the buffer remembered in cdb_replace is */
-           /* the one the buffer manager has just evicted */
-           Assert(cdb_replace->list == STRAT_LIST_T1 || 
-                   cdb_replace->list == STRAT_LIST_T2);
-           Assert(cdb_replace->buf_id == buf->buf_id);
-           Assert(BUFFERTAGS_EQUAL(&(cdb_replace->buf_tag), &(buf->tag)));
-
-           /* If this was a T1 buffer faulted in by vacuum, just */
-           /* do not cause the CDB end up in the B1 list, so that */
-           /* the vacuum scan does not affect T1_target adjusting */
-           if (strategy_hint_vacuum)
-           {
-               BufTableDelete(&(cdb_replace->buf_tag));
-               STRAT_LIST_REMOVE(cdb_replace);
-               cdb_replace->buf_id = -1;
-               cdb_replace->next = StrategyControl->listUnusedCDB;
-               StrategyControl->listUnusedCDB = strategy_cdb_replace;
-           }
-           else
-           {
-               /* Under normal circumstances move the evicted */
-               /* T list entry to it's corresponding B list */
-               if (cdb_replace->list == STRAT_LIST_T1)
-               {
-                   STRAT_LIST_REMOVE(cdb_replace);
-                   STRAT_MRU_INSERT(cdb_replace, STRAT_LIST_B1);
-               }
-               else
-               {
-                   STRAT_LIST_REMOVE(cdb_replace);
-                   STRAT_MRU_INSERT(cdb_replace, STRAT_LIST_B2);
-               }
-           }
-           /* And clear it's block reference */
-           cdb_replace->buf_id = -1;
-       }
-       else
-       {
-           /* or we satisfy it with an unused buffer */
-       }
-
-       /* Now the found B CDB get's the buffer and is moved to T2 */
-       cdb_found->buf_id = buf->buf_id;
-       STRAT_LIST_REMOVE(cdb_found);
-       STRAT_MRU_INSERT(cdb_found, STRAT_LIST_T2);
-   }
-   else
-   {
-       /* This was a complete cache miss, so we need to create */
-       /* a new CDB. The goal is to keep T1len+B1len <= c */
-
-       if (B1_LENGTH > 0 && (T1_LENGTH + B1_LENGTH) >= Data_Descriptors)
-       {
-           /* So if B1 isn't empty and T1len+B1len >= c we take B1-LRU */
-           cdb_found = &StrategyCDB[StrategyControl->listHead[STRAT_LIST_B1]];
-
-           BufTableDelete(&(cdb_found->buf_tag));
-           STRAT_LIST_REMOVE(cdb_found);
-       }
-       else
-       {
-           /* Otherwise, we try to use a free one */
-           if (StrategyControl->listUnusedCDB >= 0)
-           {
-               cdb_found = &StrategyCDB[StrategyControl->listUnusedCDB];
-               StrategyControl->listUnusedCDB = cdb_found->next;
-           }
-           else
-           {
-               /* If there isn't, we take B2-LRU ... except if */
-               /* T1len+B1len+T2len = c ... oh my */
-               if (B2_LENGTH > 0)
-                   cdb_found = &StrategyCDB[StrategyControl->listHead[STRAT_LIST_B2]];
-               else
-                   cdb_found = &StrategyCDB[StrategyControl->listHead[STRAT_LIST_B1]];
-
-               BufTableDelete(&(cdb_found->buf_tag));
-               STRAT_LIST_REMOVE(cdb_found);
-           }
-       }
-
-       /* Set the CDB's buf_tag and insert the hash key */
-       INIT_BUFFERTAG(&(cdb_found->buf_tag), rnode, blockNum);
-       BufTableInsert(&(cdb_found->buf_tag), (cdb_found - StrategyCDB));
-
-       if (strategy_cdb_replace >= 0)
-       {
-           /* The buffer was formerly in a T list, move it's CDB
-            * to the corresponding B list */
-           cdb_replace = &StrategyCDB[strategy_cdb_replace];
-
-           Assert(cdb_replace->list == STRAT_LIST_T1 || 
-                   cdb_replace->list == STRAT_LIST_T2);
-           Assert(cdb_replace->buf_id == buf->buf_id);
-           Assert(BUFFERTAGS_EQUAL(&(cdb_replace->buf_tag), &(buf->tag)));
-
-           if (cdb_replace->list == STRAT_LIST_T1)
-           {
-               STRAT_LIST_REMOVE(cdb_replace);
-               STRAT_MRU_INSERT(cdb_replace, STRAT_LIST_B1);
-           }
-           else
-           {
-               STRAT_LIST_REMOVE(cdb_replace);
-               STRAT_MRU_INSERT(cdb_replace, STRAT_LIST_B2);
-           }
-           /* And clear it's block reference */
-           cdb_replace->buf_id = -1;
-       }
-       else
-       {
-           /* or we satisfy it with an unused buffer */
-       }
-
-       /* Assign the buffer id to the new CDB */
-       cdb_found->buf_id = buf->buf_id;
-
-       /*
-        * Specialized VACUUM optimization. If this "complete cache miss"
-        * happened because vacuum needed the page, we want it later on
-        * to be placed at the LRU instead of the MRU position of T1.
-        */
-       if (strategy_hint_vacuum)
-       {
-           if (strategy_vacuum_xid != GetCurrentTransactionId())
-           {
-               strategy_hint_vacuum = false;
-               STRAT_MRU_INSERT(cdb_found, STRAT_LIST_T1);
-           }
-           else
-               STRAT_LRU_INSERT(cdb_found, STRAT_LIST_T1);
-           
-       }
-       else
-           STRAT_MRU_INSERT(cdb_found, STRAT_LIST_T1);
-
-       /*
-        * Remember the Xid when this buffer went onto T1 to avoid
-        * a single UPDATE promoting a newcomer straight into T2.
-        */
-       cdb_found->t1_xid = GetCurrentTransactionId();
-   }
-}
-
-
-/*
- * StrategyInvalidateBuffer
- *
- * Called by the buffer manager to inform us that a buffer content
- * is no longer valid. We simply throw away any eventual existing
- * buffer hash entry and move the CDB and buffer to the free lists.
- */
-void
-StrategyInvalidateBuffer(BufferDesc *buf)
-{
-   int                 cdb_id;
-   BufferStrategyCDB  *cdb;
-
-   cdb_id = BufTableLookup(&(buf->tag));
-
-   /* If we have the buffer somewhere in the directory, remove it
-    * and add the CDB to the list of unused CDB's. */
-   if (cdb_id >= 0)
-   {
-       cdb = &StrategyCDB[cdb_id];
-       BufTableDelete(&(cdb->buf_tag));
-       STRAT_LIST_REMOVE(cdb);
-       cdb->buf_id = -1;
-       cdb->next = StrategyControl->listUnusedCDB;
-       StrategyControl->listUnusedCDB = cdb_id;
-   }
-
-   /* Buffer is unreferenced now and should not contain any valid data
-    * so add it to the list of free buffers */
-   buf->bufNext = StrategyControl->listFreeBuffers;
-   StrategyControl->listFreeBuffers = buf->buf_id;
-}
-
-
-void
-StrategyHintVacuum(bool vacuum_active)
-{
-   strategy_hint_vacuum = vacuum_active;
-   strategy_vacuum_xid = GetCurrentTransactionId();
-}
-
-
-int
-StrategyDirtyBufferList(int *buffer_list, int max_buffers)
-{
-   int                 num_buffer_dirty = 0;
-   int                 cdb_id_t1;
-   int                 cdb_id_t2;
-   int                 buf_id;
-   BufferDesc         *buf;
-
-   /*
-    * Traverse the T1 and T2 list LRU to MRU in "parallel"
-    * and add all dirty buffers found in that order to the list.
-    * The ARC strategy keeps all used buffers including pinned ones
-    * in the T1 or T2 list. So we cannot loose any dirty buffers.
-    */
-   cdb_id_t1 = StrategyControl->listHead[STRAT_LIST_T1];
-   cdb_id_t2 = StrategyControl->listHead[STRAT_LIST_T2];
-
-   while ((cdb_id_t1 >= 0 || cdb_id_t2 >= 0) && 
-           num_buffer_dirty < max_buffers)
-   {
-       if (cdb_id_t1 >= 0)
-       {
-           buf_id = StrategyCDB[cdb_id_t1].buf_id;
-           buf = &BufferDescriptors[buf_id];
-
-           if (buf->flags & BM_VALID)
-           {
-               if ((buf->flags & BM_DIRTY) || (buf->cntxDirty))
-               {
-                   buffer_list[num_buffer_dirty++] = buf_id;
-               }
-           }
-
-           cdb_id_t1 = StrategyCDB[cdb_id_t1].next;
-       }
-
-       if (cdb_id_t2 >= 0)
-       {
-           buf_id = StrategyCDB[cdb_id_t2].buf_id;
-           buf = &BufferDescriptors[buf_id];
-
-           if (buf->flags & BM_VALID)
-           {
-               if ((buf->flags & BM_DIRTY) || (buf->cntxDirty))
-               {
-                   buffer_list[num_buffer_dirty++] = buf_id;
-               }
-           }
-
-           cdb_id_t2 = StrategyCDB[cdb_id_t2].next;
-       }
-   }
-
-   return num_buffer_dirty;
-}
-
-
-/*
- * StrategyInitialize -- initialize the buffer cache replacement
- *     strategy.
- *
- * Assume: All of the buffers are already building a linked list.
- *     Only called by postmaster and only during initialization.
- */
-void
-StrategyInitialize(bool init)
+static void
+AddBufferToFreelist(BufferDesc *bf)
 {
-   bool found;
-   int i;
-
-   /*
-    * Initialize the shared CDB lookup hashtable
-    */
-   InitBufTable(Data_Descriptors * 2);
-
-   /*
-    * Get or create the shared strategy control block and the CDB's
-    */
-   StrategyControl = (BufferStrategyControl *)
-           ShmemInitStruct("Buffer Strategy Status",
-                   sizeof(BufferStrategyControl) +
-                   sizeof(BufferStrategyCDB) * (Data_Descriptors * 2 - 1),
-                   &found);
-   StrategyCDB = &(StrategyControl->cdb[0]);
-
-   if (!found)
-   {
-       /*
-        * Only done once, usually in postmaster
-        */
-       Assert(init);
-
-       /*
-        * Grab the whole linked list of free buffers for our
-        * strategy
-        */
-       StrategyControl->listFreeBuffers = 0;
-
-       /*
-        * We start off with a target T1 list size of
-        * half the available cache blocks.
-        */
-       StrategyControl->target_T1_size = Data_Descriptors / 2;
-
-       /*
-        * Initialize B1, T1, T2 and B2 lists to be empty
-        */
-       for (i = 0; i < STRAT_NUM_LISTS; i++)
-       {
-           StrategyControl->listHead[i] = -1;
-           StrategyControl->listTail[i] = -1;
-           StrategyControl->listSize[i] = 0;
-           StrategyControl->num_hit[i] = 0;
-       }
-       StrategyControl->num_lookup  = 0;
-       StrategyControl->stat_report = 0;
-
-       /*
-        * All CDB's are linked as the listUnusedCDB
-        */
-       for (i = 0; i < Data_Descriptors * 2; i++)
-       {
-           StrategyCDB[i].next = i + 1;
-           StrategyCDB[i].list = STRAT_LIST_UNUSED;
-           CLEAR_BUFFERTAG(&(StrategyCDB[i].buf_tag));
-           StrategyCDB[i].buf_id = -1;
-       }
-       StrategyCDB[Data_Descriptors * 2 - 1].next = -1;
-       StrategyControl->listUnusedCDB = 0;
-   }
-   else
-   {
-       Assert(!init);
-   }
+#ifdef BMTRACE
+   _bm_trace(bf->tag.relId.dbId, bf->tag.relId.relId, bf->tag.blockNum,
+             BufferDescriptorGetBuffer(bf), BMT_DEALLOC);
+#endif   /* BMTRACE */
+   IsNotInQueue(bf);
+
+   /* change bf so it points to inFrontOfNew and its successor */
+   bf->freePrev = SharedFreeList->freePrev;
+   bf->freeNext = Free_List_Descriptor;
+
+   /* insert new into chain */
+   BufferDescriptors[bf->freeNext].freePrev = bf->buf_id;
+   BufferDescriptors[bf->freePrev].freeNext = bf->buf_id;
 }
 
-
 #undef PinBuffer
 
 /*
@@ -853,9 +95,18 @@ PinBuffer(BufferDesc *buf)
 
    if (buf->refcount == 0)
    {
+       IsInQueue(buf);
+
+       /* remove from freelist queue */
+       BufferDescriptors[buf->freeNext].freePrev = buf->freePrev;
+       BufferDescriptors[buf->freePrev].freeNext = buf->freeNext;
+       buf->freeNext = buf->freePrev = INVALID_DESCRIPTOR;
+
        /* mark buffer as no longer free */
        buf->flags &= ~BM_FREE;
    }
+   else
+       IsNotInQueue(buf);
 
    if (PrivateRefCount[b] == 0)
        buf->refcount++;
@@ -893,6 +144,7 @@ UnpinBuffer(BufferDesc *buf)
 {
    int         b = BufferDescriptorGetBuffer(buf) - 1;
 
+   IsNotInQueue(buf);
    Assert(buf->refcount > 0);
    Assert(PrivateRefCount[b] > 0);
    PrivateRefCount[b]--;
@@ -902,6 +154,7 @@ UnpinBuffer(BufferDesc *buf)
    if (buf->refcount == 0)
    {
        /* buffer is now unpinned */
+       AddBufferToFreelist(buf);
        buf->flags |= BM_FREE;
    }
    else if ((buf->flags & BM_PIN_COUNT_WAITER) != 0 &&
@@ -934,6 +187,64 @@ refcount = %ld, file: %s, line: %d\n",
 }
 #endif
 
+/*
+ * GetFreeBuffer() -- get the 'next' buffer from the freelist.
+ */
+BufferDesc *
+GetFreeBuffer(void)
+{
+   BufferDesc *buf;
+
+   if (Free_List_Descriptor == SharedFreeList->freeNext)
+   {
+       /* queue is empty. All buffers in the buffer pool are pinned. */
+       ereport(ERROR,
+               (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
+                errmsg("out of free buffers")));
+       return NULL;
+   }
+   buf = &(BufferDescriptors[SharedFreeList->freeNext]);
+
+   /* remove from freelist queue */
+   BufferDescriptors[buf->freeNext].freePrev = buf->freePrev;
+   BufferDescriptors[buf->freePrev].freeNext = buf->freeNext;
+   buf->freeNext = buf->freePrev = INVALID_DESCRIPTOR;
+
+   buf->flags &= ~(BM_FREE);
+
+   return buf;
+}
+
+/*
+ * InitFreeList -- initialize the dummy buffer descriptor used
+ *     as a freelist head.
+ *
+ * Assume: All of the buffers are already linked in a circular
+ *     queue.   Only called by postmaster and only during
+ *     initialization.
+ */
+void
+InitFreeList(bool init)
+{
+   SharedFreeList = &(BufferDescriptors[Free_List_Descriptor]);
+
+   if (init)
+   {
+       /* we only do this once, normally in the postmaster */
+       SharedFreeList->data = INVALID_OFFSET;
+       SharedFreeList->flags = 0;
+       SharedFreeList->flags &= ~(BM_VALID | BM_DELETED | BM_FREE);
+       SharedFreeList->buf_id = Free_List_Descriptor;
+
+       /* insert it into a random spot in the circular queue */
+       SharedFreeList->freeNext = BufferDescriptors[0].freeNext;
+       SharedFreeList->freePrev = 0;
+       BufferDescriptors[SharedFreeList->freeNext].freePrev =
+           BufferDescriptors[SharedFreeList->freePrev].freeNext =
+           Free_List_Descriptor;
+   }
+}
+
 
 /*
  * print out the free list and check for breaks.
index ac235ecc3a425f8368ef644ae207af6349c7f93b..3dcb3748dc9391548ffd96c2dec55cabcd29d293 100644 (file)
@@ -10,7 +10,7 @@
  * Written by Peter Eisentraut .
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/utils/misc/guc.c,v 1.167 2003/11/13 00:40:01 wieck Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/utils/misc/guc.c,v 1.168 2003/11/13 05:34:58 wieck Exp $
  *
  *--------------------------------------------------------------------
  */
@@ -73,7 +73,6 @@ extern int    CheckPointTimeout;
 extern int CommitDelay;
 extern int CommitSiblings;
 extern char *preload_libraries_string;
-extern int BufferStrategyStatInterval;
 
 #ifdef HAVE_SYSLOG
 extern char *Syslog_facility;
@@ -1191,15 +1190,6 @@ static struct config_int ConfigureNamesInt[] =
        -1, -1, INT_MAX / 1000, NULL, NULL
    },
 
-   {
-       {"buffer_strategy_status_interval", PGC_POSTMASTER, RESOURCES_MEM,
-           gettext_noop("Interval to report buffer strategy status in seconds"),
-           NULL
-       },
-       &BufferStrategyStatInterval,
-       0, 0, 600, NULL, NULL
-   },
-
    /* End-of-list marker */
    {
        {NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL
index 15c73e6692e5afae71580f099abf27f22adf9bf0..1ead89e130844f153f47147840e6c06368bbce0f 100644 (file)
@@ -58,7 +58,6 @@
 #shared_buffers = 1000     # min 16, at least max_connections*2, 8KB each
 #sort_mem = 1024       # min 64, size in KB
 #vacuum_mem = 8192     # min 1024, size in KB
-#buffer_strategy_status_interval = 0   # 0-600 seconds
 
 # - Free Space Map -
 
index 492f16cc115b3afc723970dbb91ae19d8a964bb9..d5b509fe511fc107fd2509974995a4142243a6be 100644 (file)
@@ -12,7 +12,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: miscadmin.h,v 1.135 2003/11/13 00:40:01 wieck Exp $
+ * $Id: miscadmin.h,v 1.136 2003/11/13 05:34:58 wieck Exp $
  *
  * NOTES
  *   some of the information in this file should be moved to
@@ -96,13 +96,6 @@ extern void ProcessInterrupts(void);
        CritSectionCount--; \
    } while(0)
 
-#define PG_DELAY(_msec) \
-{ \
-   struct timeval delay; \
-   delay.tv_sec = (_msec) / 1000; \
-   delay.tv_usec = ((_msec) % 1000) * 1000; \
-   (void) select(0, NULL, NULL, NULL, &delay); \
-}
 
 /*****************************************************************************
  *   globals.h --                                                           *
index deafa0b8cec20f612b0dd064db66466307e1796d..7b7975133921234d4ba1be406ee40f0eda656154 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: buf_internals.h,v 1.62 2003/11/13 00:40:02 wieck Exp $
+ * $Id: buf_internals.h,v 1.63 2003/11/13 05:34:58 wieck Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -72,29 +72,17 @@ typedef struct buftag
    (a)->rnode = (xx_reln)->rd_node \
 )
 
-#define BUFFERTAG_EQUALS(a,xx_reln,xx_blockNum) \
-( \
-   (a)->rnode.tblNode == (xx_reln)->rd_node.tblNode && \
-   (a)->rnode.relNode == (xx_reln)->rd_node.relNode && \
-   (a)->blockNum == (xx_blockNum) \
-)
-#define BUFFERTAGS_EQUAL(a,b) \
-( \
-   (a)->rnode.tblNode == (b)->rnode.tblNode && \
-   (a)->rnode.relNode == (b)->rnode.relNode && \
-   (a)->blockNum == (b)->blockNum \
-)
-
 /*
  * BufferDesc -- shared buffer cache metadata for a single
  *               shared buffer descriptor.
  */
 typedef struct sbufdesc
 {
-   Buffer      bufNext;        /* link in freelist chain */
+   Buffer      freeNext;       /* links for freelist chain */
+   Buffer      freePrev;
    SHMEM_OFFSET data;          /* pointer to data in buf pool */
 
-   /* tag and id must be together for table lookup */
+   /* tag and id must be together for table lookup (still true?) */
    BufferTag   tag;            /* file/block identifier */
    int         buf_id;         /* buffer's index number (from 0) */
 
@@ -119,7 +107,6 @@ typedef struct sbufdesc
 
 #define BufferDescriptorGetBuffer(bdesc) ((bdesc)->buf_id + 1)
 
-
 /*
  * Each backend has its own BufferLocks[] array holding flag bits
  * showing what locks it has set on each buffer.
@@ -180,19 +167,14 @@ extern long int LocalBufferFlushCount;
 /*freelist.c*/
 extern void PinBuffer(BufferDesc *buf);
 extern void UnpinBuffer(BufferDesc *buf);
-extern BufferDesc *StrategyBufferLookup(BufferTag *tagPtr, bool recheck);
-extern BufferDesc *StrategyGetBuffer(void);
-extern void StrategyReplaceBuffer(BufferDesc *buf, Relation rnode, BlockNumber blockNum);
-extern void StrategyInvalidateBuffer(BufferDesc *buf);
-extern void StrategyHintVacuum(bool vacuum_active);
-extern int StrategyDirtyBufferList(int *buffer_dirty, int max_buffers);
-extern void StrategyInitialize(bool init);
+extern BufferDesc *GetFreeBuffer(void);
+extern void InitFreeList(bool init);
 
 /* buf_table.c */
-extern void InitBufTable(int size);
-extern int BufTableLookup(BufferTag *tagPtr);
-extern bool BufTableInsert(BufferTag *tagPtr, Buffer buf_id);
-extern bool BufTableDelete(BufferTag *tagPtr);
+extern void InitBufTable(void);
+extern BufferDesc *BufTableLookup(BufferTag *tagPtr);
+extern bool BufTableDelete(BufferDesc *buf);
+extern bool BufTableInsert(BufferDesc *buf);
 
 /* bufmgr.c */
 extern BufferDesc *BufferDescriptors;