Reply-To: [email protected], Dan McGuirk
To:
[email protected]
Subject: [HACKERS] tmin writeback optimization
I was doing some profiling of the backend, and noticed that during a certain
benchmark I was running somewhere between 30% and 75% of the backend's CPU
time was being spent in calls to TransactionIdDidCommit() from
HeapTupleSatisfiesNow() or HeapTupleSatisfiesItself() to determine that
changed rows' transactions had in fact been committed even though the rows'
tmin values had not yet been set.
When a query looks at a given row, it needs to figure out whether the
transaction that changed the row has been committed and hence it should pay
attention to the row, or whether on the other hand the transaction is still
in progress or has been aborted and hence the row should be ignored. If
a tmin value is set, it is known definitively that the row's transaction
has been committed. However, if tmin is not set, the transaction
referred to in xmin must be looked up in pg_log, and this is what the
backend was spending a lot of time doing during my benchmark.
So, implementing a method suggested by Vadim, I created the following
patch that, the first time a query finds a committed row whose tmin value
is not set, sets it, and marks the buffer where the row is stored as
dirty. (It works for tmax, too.) This doesn't result in the boost in
real time performance I was hoping for, however it does decrease backend
CPU usage by up to two-thirds in certain situations, so it could be
rather beneficial in high-concurrency settings.
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/access/common/Attic/heapvalid.c,v 1.12 1996/11/10 02:56:47 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/common/Attic/heapvalid.c,v 1.13 1997/03/28 07:03:53 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#include
#include
#include
+#include
/* ----------------
* heap_keytest
HeapTuple
heap_tuple_satisfies(ItemId itemId,
Relation relation,
+ Buffer buffer,
PageHeader disk_page,
TimeQual qual,
int nKeys,
ScanKey key)
{
- HeapTuple tuple;
+ HeapTuple tuple, result;
bool res;
-
+ TransactionId old_tmin, old_tmax;
+
if (! ItemIdIsUsed(itemId))
return NULL;
nKeys, key);
else
res = TRUE;
-
- if (res && (relation->rd_rel->relkind == RELKIND_UNCATALOGED
- || HeapTupleSatisfiesTimeQual(tuple,qual)))
- return tuple;
-
- return (HeapTuple) NULL;
+
+ result = (HeapTuple)NULL;
+ if (res) {
+ if(relation->rd_rel->relkind == RELKIND_UNCATALOGED) {
+ result = tuple;
+ } else {
+ old_tmin = tuple->t_tmin;
+ old_tmax = tuple->t_tmax;
+ res = HeapTupleSatisfiesTimeQual(tuple,qual);
+ if(tuple->t_tmin != old_tmin ||
+ tuple->t_tmax != old_tmax) {
+ SetBufferCommitInfoNeedsSave(buffer);
+ }
+ if(res) {
+ result = tuple;
+ }
+ }
+ }
+
+ return result;
}
/*
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.10 1997/01/23 05:59:47 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.11 1997/03/28 07:04:11 scrappy Exp $
*
*
* INTERFACE ROUTINES
* if current tuple qualifies, return it.
* ----------------
*/
- if ((rtup = heap_tuple_satisfies(lpp, relation, (PageHeader) dp,
+ if ((rtup = heap_tuple_satisfies(lpp, relation, *b, (PageHeader) dp,
timeQual, nkeys, key)) != NULL) {
ItemPointer iptr = &(rtup->t_ctid);
if (ItemPointerGetBlockNumber(iptr) != page) {
* ----------------
*/
- tuple = heap_tuple_satisfies(lp, relation, dp,
+ tuple = heap_tuple_satisfies(lp, relation, buffer, dp,
timeQual, 0,(ScanKey)NULL);
if (tuple == NULL)
* check that we're deleteing a valid item
* ----------------
*/
- if (!(tp = heap_tuple_satisfies(lp, relation, dp,
+ if (!(tp = heap_tuple_satisfies(lp, relation, b, dp,
NowTimeQual, 0, (ScanKey) NULL))) {
/* XXX call something else */
*/
if (!heap_tuple_satisfies(lp,
relation,
+ buffer,
(PageHeader)dp,
NowTimeQual,
0,
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_init.c,v 1.7 1997/01/26 00:45:25 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_init.c,v 1.8 1997/03/28 07:04:52 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
long *PrivateRefCount; /* also used in freelist.c */
long *LastRefCount; /* refcounts of last ExecMain level */
+long *CommitInfoNeedsSave; /* to write buffers where we have filled in */
+ /* t_tmin (or t_tmax) */
/*
* Data Structures:
#endif
PrivateRefCount = (long *) calloc(NBuffers, sizeof(long));
LastRefCount = (long *) calloc(NBuffers, sizeof(long));
+ CommitInfoNeedsSave = (long *) calloc(NBuffers, sizeof(long));
}
/* -----------------------------------------------------
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.10 1997/01/23 19:43:23 scrappy Exp $
+ * $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.11 1997/03/28 07:05:03 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
AddBufferToFreelist(bufHdr);
bufHdr->flags |= BM_FREE;
}
+ if(CommitInfoNeedsSave[buffer - 1]) {
+ bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
+ CommitInfoNeedsSave[buffer - 1] = 0;
+ }
retbuf = ReadBufferWithBufferLock(relation, blockNum, true);
return retbuf;
}
{
register int i;
for (i=1; i<=NBuffers; i++) {
+ CommitInfoNeedsSave[i - 1] = 0;
if (BufferIsValid(i)) {
while(PrivateRefCount[i - 1] > 0) {
ReleaseBuffer(i);
AddBufferToFreelist(bufHdr);
bufHdr->flags |= BM_FREE;
}
+ if(CommitInfoNeedsSave[buffer - 1]) {
+ bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
+ CommitInfoNeedsSave[buffer - 1] = 0;
+ }
SpinRelease(BufMgrLock);
}
WriteMode = mode;
return (old);
}
+
+void SetBufferCommitInfoNeedsSave(Buffer buffer)
+{
+ CommitInfoNeedsSave[buffer - 1]++;
+}
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/utils/time/tqual.c,v 1.1.1.1 1996/07/09 06:22:10 scrappy Exp $
+ * $Header: /cvsroot/pgsql/src/backend/utils/time/tqual.c,v 1.2 1997/03/28 07:05:28 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
/*
* HeapTupleSatisfiesItself --
* True iff heap tuple is valid for "itself."
+ * "{it}self" means valid as of everything that's happened
+ * in the current transaction, _including_ the current command.
*
* Note:
* Assumes heap tuple is valid.
/*
* The satisfaction of "itself" requires the following:
*
- * ((Xmin == my-transaction && (Xmax is null [|| Xmax != my-transaction)])
+ * ((Xmin == my-transaction && the row was updated by the current transaction, and
+ * (Xmax is null it was not deleted
+ * [|| Xmax != my-transaction)]) [or it was deleted by another transaction]
* ||
*
- * (Xmin is committed &&
- * (Xmax is null || (Xmax != my-transaction && Xmax is not committed)))
+ * (Xmin is committed && the row was modified by a committed transaction, and
+ * (Xmax is null || the row has not been deleted, or
+ * (Xmax != my-transaction && the row was deleted by another transaction
+ * Xmax is not committed))) that has not been committed
*/
static bool
HeapTupleSatisfiesItself(HeapTuple tuple)
if (!TransactionIdDidCommit((TransactionId)tuple->t_xmin)) {
return (false);
}
+
+ tuple->t_tmin = TransactionIdGetCommitTime(tuple->t_xmin);
}
/* the tuple was inserted validly */
if (TransactionIdIsCurrentTransactionId((TransactionId)tuple->t_xmax)) {
return (false);
}
+
+ if (!TransactionIdDidCommit((TransactionId)tuple->t_xmax)) {
+ return (true);
+ }
- return ((bool)!TransactionIdDidCommit((TransactionId)tuple->t_xmax));
+ /* by here, deleting transaction has committed */
+ tuple->t_tmax = TransactionIdGetCommitTime(tuple->t_xmax);
+
+ return (false);
}
/*
* HeapTupleSatisfiesNow --
* True iff heap tuple is valid "now."
+ * "now" means valid including everything that's happened
+ * in the current transaction _up to, but not including,_
+ * the current command.
*
* Note:
* Assumes heap tuple is valid.
/*
* The satisfaction of "now" requires the following:
*
- * ((Xmin == my-transaction && Cmin != my-command &&
- * (Xmax is null || (Xmax == my-transaction && Cmax != my-command)))
- * ||
+ * ((Xmin == my-transaction && changed by the current transaction
+ * Cmin != my-command && but not by this command, and
+ * (Xmax is null || the row has not been deleted, or
+ * (Xmax == my-transaction && it was deleted by the current transaction
+ * Cmax != my-command))) but not by this command,
+ * || or
*
- * (Xmin is committed &&
- * (Xmax is null || (Xmax == my-transaction && Cmax == my-command) ||
- * (Xmax is not committed && Xmax != my-transaction))))
+ * (Xmin is committed && the row was modified by a committed transaction, and
+ * (Xmax is null || the row has not been deleted, or
+ * (Xmax == my-transaction && the row is being deleted by this command, or
+ * Cmax == my-command) ||
+ * (Xmax is not committed && the row was deleted by another transaction
+ * Xmax != my-transaction)))) that has not been committed
*
* mao says 17 march 1993: the tests in this routine are correct;
* if you think they're not, you're wrong, and you should think
if (!TransactionIdDidCommit((TransactionId)tuple->t_xmin)) {
return (false);
}
+
+ /*
+ * the transaction has been committed--store the commit time _now_
+ * instead of waiting for a vacuum so we avoid the expensive call
+ * next time.
+ */
+ tuple->t_tmin = TransactionIdGetCommitTime(tuple->t_xmin);
}
/* by here, the inserting transaction has committed */
if (TransactionIdIsCurrentTransactionId((TransactionId)tuple->t_xmax)) {
return (false);
}
-
+
+ if (AbsoluteTimeIsBackwardCompatiblyReal(tuple->t_tmax)) {
+ return (false);
+ }
+
if (!TransactionIdDidCommit((TransactionId)tuple->t_xmax)) {
- return (true);
+ return (true);
}
-
- /* by here, deleting transaction has committed */
+
+ /* xmax transaction committed, but no tmax set. so set it. */
+ tuple->t_tmax = TransactionIdGetCommitTime(tuple->t_xmax);
+
return (false);
}
*
* Copyright (c) 1994, Regents of the University of California
*
- * $Id: valid.h,v 1.3 1996/11/05 10:37:07 scrappy Exp $
+ * $Id: valid.h,v 1.4 1997/03/28 07:05:54 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
int nkeys, ScanKey keys);
extern HeapTuple heap_tuple_satisfies(ItemId itemId, Relation relation,
- PageHeader disk_page, TimeQual qual, int nKeys, ScanKey key);
+ Buffer buffer, PageHeader disk_page,
+ TimeQual qual, int nKeys,
+ ScanKey key);
extern bool TupleUpdatedByCurXactAndCmd(HeapTuple t);
*
* Copyright (c) 1994, Regents of the University of California
*
- * $Id: buf_internals.h,v 1.11 1997/01/25 03:09:33 momjian Exp $
+ * $Id: buf_internals.h,v 1.12 1997/03/28 07:06:48 scrappy Exp $
*
* NOTE
* If BUFFERPAGE0 is defined, then 0 will be used as a
extern BufferBlock BufferBlocks;
extern long *PrivateRefCount;
extern long *LastRefCount;
+extern long *CommitInfoNeedsSave;
extern SPINLOCK BufMgrLock;
/* localbuf.c */
*
* Copyright (c) 1994, Regents of the University of California
*
- * $Id: bufmgr.h,v 1.9 1997/01/16 07:53:27 vadim Exp $
+ * $Id: bufmgr.h,v 1.10 1997/03/28 07:06:53 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
extern void BufferRefCountReset(int *refcountsave);
extern void BufferRefCountRestore(int *refcountsave);
extern int SetBufferWriteMode (int mode);
+extern void SetBufferCommitInfoNeedsSave(Buffer buffer);
#endif /* !defined(BufMgrIncluded) */