TOAST needs to do at least minimal time-qual checking in order not to
authorTom Lane
Wed, 16 Jan 2002 20:29:02 +0000 (20:29 +0000)
committerTom Lane
Wed, 16 Jan 2002 20:29:02 +0000 (20:29 +0000)
mess up after an aborted VACUUM FULL, per today's pghackers discussion.
Add a suitable HeapTupleSatisfiesToast routine.  Remove useless special-
case test in HeapTupleSatisfiesVisibility macro for xmax =
BootstrapTransactionId; perhaps that was needed at one time, but it's
a waste of cycles now, not to mention actively wrong for SnapshotAny.
Along the way, add some much-needed comments to tqual.c, and simplify
toast_fetch_datum, which no longer needs to assume it may see chunks
out-of-order.

src/backend/access/heap/tuptoaster.c
src/backend/utils/time/tqual.c
src/include/utils/tqual.h

index f46c75fd6a5077b5ed1b0ac0496d15122d809cad..48a15cf5d341c16613ccba5481baec8343db722c 100644 (file)
@@ -4,11 +4,11 @@
  *   Support routines for external and compressed storage of
  *   variable size attributes.
  *
- * Copyright (c) 2000, PostgreSQL Global Development Group
+ * Copyright (c) 2000-2002, PostgreSQL Global Development Group
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/access/heap/tuptoaster.c,v 1.26 2001/11/05 17:46:23 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/access/heap/tuptoaster.c,v 1.27 2002/01/16 20:29:01 tgl Exp $
  *
  *
  * INTERFACE ROUTINES
@@ -921,7 +921,7 @@ toast_delete_datum(Relation rel, Datum value)
    while ((indexRes = index_getnext(toastscan, ForwardScanDirection)) != NULL)
    {
        toasttup.t_self = indexRes->heap_iptr;
-       heap_fetch(toastrel, SnapshotAny, &toasttup, &buffer, toastscan);
+       heap_fetch(toastrel, SnapshotToast, &toasttup, &buffer, toastscan);
        pfree(indexRes);
 
        if (!toasttup.t_data)
@@ -963,26 +963,18 @@ toast_fetch_datum(varattrib *attr)
    TupleDesc   toasttupDesc;
    RetrieveIndexResult indexRes;
    Buffer      buffer;
-
    varattrib  *result;
    int32       ressize;
-   int32       residx;
-   int         numchunks;
+   int32       residx,
+               nextidx;
+   int32       numchunks;
    Pointer     chunk;
    bool        isnull;
    int32       chunksize;
 
-   char       *chunks_found;
-   char       *chunks_expected;
-
    ressize = attr->va_content.va_external.va_extsize;
    numchunks = ((ressize - 1) / TOAST_MAX_CHUNK_SIZE) + 1;
 
-   chunks_found = palloc(numchunks);
-   chunks_expected = palloc(numchunks);
-   memset(chunks_found, 0, numchunks);
-   memset(chunks_expected, 1, numchunks);
-
    result = (varattrib *) palloc(ressize + VARHDRSZ);
    VARATT_SIZEP(result) = ressize + VARHDRSZ;
    if (VARATT_IS_COMPRESSED(attr))
@@ -1008,13 +1000,17 @@ toast_fetch_datum(varattrib *attr)
    /*
     * Read the chunks by index
     *
-    * Note we will not necessarily see the chunks in sequence-number order.
+    * Note that because the index is actually on (valueid, chunkidx)
+    * we will see the chunks in chunkidx order, even though we didn't
+    * explicitly ask for it.
     */
+   nextidx = 0;
+
    toastscan = index_beginscan(toastidx, false, 1, &toastkey);
    while ((indexRes = index_getnext(toastscan, ForwardScanDirection)) != NULL)
    {
        toasttup.t_self = indexRes->heap_iptr;
-       heap_fetch(toastrel, SnapshotAny, &toasttup, &buffer, toastscan);
+       heap_fetch(toastrel, SnapshotToast, &toasttup, &buffer, toastscan);
        pfree(indexRes);
 
        if (toasttup.t_data == NULL)
@@ -1033,9 +1029,9 @@ toast_fetch_datum(varattrib *attr)
        /*
         * Some checks on the data we've found
         */
-       if (residx < 0 || residx >= numchunks)
-           elog(ERROR, "unexpected chunk number %d for toast value %u",
-                residx,
+       if (residx != nextidx)
+           elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u",
+                residx, nextidx,
                 attr->va_content.va_external.va_valueid);
        if (residx < numchunks - 1)
        {
@@ -1044,15 +1040,15 @@ toast_fetch_datum(varattrib *attr)
                     chunksize, residx,
                     attr->va_content.va_external.va_valueid);
        }
-       else
+       else if (residx < numchunks)
        {
            if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != ressize)
                elog(ERROR, "unexpected chunk size %d in chunk %d for toast value %u",
                     chunksize, residx,
                     attr->va_content.va_external.va_valueid);
        }
-       if (chunks_found[residx]++ > 0)
-           elog(ERROR, "chunk %d for toast value %u appears multiple times",
+       else
+           elog(ERROR, "unexpected chunk number %d for toast value %u",
                 residx,
                 attr->va_content.va_external.va_valueid);
 
@@ -1064,16 +1060,16 @@ toast_fetch_datum(varattrib *attr)
               chunksize);
 
        ReleaseBuffer(buffer);
+       nextidx++;
    }
 
    /*
     * Final checks that we successfully fetched the datum
     */
-   if (memcmp(chunks_found, chunks_expected, numchunks) != 0)
-       elog(ERROR, "not all toast chunks found for value %u",
+   if (nextidx != numchunks)
+       elog(ERROR, "missing chunk number %d for toast value %u",
+            nextidx,
             attr->va_content.va_external.va_valueid);
-   pfree(chunks_expected);
-   pfree(chunks_found);
 
    /*
     * End scan and close relations
index b71b97088bf4aa7497fffcd5ab8d600d9d055baf..aa2c717e9d8a5eb886b4bc2d04058fc200b4aa45 100644 (file)
@@ -1,14 +1,22 @@
 /*-------------------------------------------------------------------------
  *
  * tqual.c
- *   POSTGRES "time" qualification code.
+ *   POSTGRES "time" qualification code, ie, tuple visibility rules.
+ *
+ * NOTE: all the HeapTupleSatisfies routines will update the tuple's
+ * "hint" status bits if we see that the inserting or deleting transaction
+ * has now committed or aborted.  The caller is responsible for noticing any
+ * change in t_infomask and scheduling a disk write if so.  Note that the
+ * caller must hold at least a shared buffer context lock on the buffer
+ * containing the tuple.  (VACUUM FULL assumes it's sufficient to have
+ * exclusive lock on the containing relation, instead.)
+ *
  *
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/utils/time/tqual.c,v 1.46 2002/01/11 20:07:03 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/utils/time/tqual.c,v 1.47 2002/01/16 20:29:02 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -36,8 +44,7 @@ bool      ReferentialIntegritySnapshotOverride = false;
  *
  * Note:
  *     Assumes heap tuple is valid.
- */
-/*
+ *
  * The satisfaction of "itself" requires the following:
  *
  * ((Xmin == my-transaction &&             the row was updated by the current transaction, and
@@ -153,8 +160,7 @@ HeapTupleSatisfiesItself(HeapTupleHeader tuple)
  *
  * Note:
  *     Assumes heap tuple is valid.
- */
-/*
+ *
  * The satisfaction of "now" requires the following:
  *
  * ((Xmin == my-transaction &&             changed by the current transaction
@@ -288,6 +294,71 @@ HeapTupleSatisfiesNow(HeapTupleHeader tuple)
    return false;
 }
 
+/*
+ * HeapTupleSatisfiesToast
+ *     True iff heap tuple is valid for TOAST usage.
+ *
+ * This is a simplified version that only checks for VACUUM moving conditions.
+ * It's appropriate for TOAST usage because TOAST really doesn't want to do
+ * its own time qual checks; if you can see the main-table row that contains
+ * a TOAST reference, you should be able to see the TOASTed value.  However,
+ * vacuuming a TOAST table is independent of the main table, and in case such
+ * a vacuum fails partway through, we'd better do this much checking.
+ *
+ * Among other things, this means you can't do UPDATEs of rows in a TOAST
+ * table.
+ */
+bool
+HeapTupleSatisfiesToast(HeapTupleHeader tuple)
+{
+   if (!(tuple->t_infomask & HEAP_XMIN_COMMITTED))
+   {
+       if (tuple->t_infomask & HEAP_XMIN_INVALID)
+           return false;
+
+       if (tuple->t_infomask & HEAP_MOVED_OFF)
+       {
+           if (TransactionIdIsCurrentTransactionId((TransactionId) tuple->t_cmin))
+               return false;
+           if (!TransactionIdIsInProgress((TransactionId) tuple->t_cmin))
+           {
+               if (TransactionIdDidCommit((TransactionId) tuple->t_cmin))
+               {
+                   tuple->t_infomask |= HEAP_XMIN_INVALID;
+                   return false;
+               }
+               tuple->t_infomask |= HEAP_XMIN_COMMITTED;
+           }
+       }
+       else if (tuple->t_infomask & HEAP_MOVED_IN)
+       {
+           if (!TransactionIdIsCurrentTransactionId((TransactionId) tuple->t_cmin))
+           {
+               if (TransactionIdIsInProgress((TransactionId) tuple->t_cmin))
+                   return false;
+               if (TransactionIdDidCommit((TransactionId) tuple->t_cmin))
+                   tuple->t_infomask |= HEAP_XMIN_COMMITTED;
+               else
+               {
+                   tuple->t_infomask |= HEAP_XMIN_INVALID;
+                   return false;
+               }
+           }
+       }
+   }
+
+   /* otherwise assume the tuple is valid for TOAST. */
+   return true;
+}
+
+/*
+ * HeapTupleSatisfiesUpdate
+ *     Check whether a tuple can be updated.
+ *
+ * This applies exactly the same checks as HeapTupleSatisfiesNow,
+ * but returns a more-detailed result code, since UPDATE needs to know
+ * more than "is it visible?"
+ */
 int
 HeapTupleSatisfiesUpdate(HeapTuple htuple)
 {
@@ -404,6 +475,18 @@ HeapTupleSatisfiesUpdate(HeapTuple htuple)
    return HeapTupleUpdated;    /* updated by other */
 }
 
+/*
+ * HeapTupleSatisfiesDirty
+ *     True iff heap tuple is valid, including effects of concurrent xacts.
+ *
+ * This is essentially like HeapTupleSatisfiesItself as far as effects of
+ * the current transaction and committed/aborted xacts are concerned.
+ * However, we also include the effects of other xacts still in progress.
+ *
+ * Returns extra information in the global variable SnapshotDirty, namely
+ * xids of concurrent xacts that affected the tuple.  Also, the tuple's
+ * t_ctid (forward link) is returned if it's being updated.
+ */
 bool
 HeapTupleSatisfiesDirty(HeapTupleHeader tuple)
 {
@@ -516,6 +599,18 @@ HeapTupleSatisfiesDirty(HeapTupleHeader tuple)
    return false;               /* updated by other */
 }
 
+/*
+ * HeapTupleSatisfiesSnapshot
+ *     True iff heap tuple is valid for the given snapshot.
+ *
+ * This is the same as HeapTupleSatisfiesNow, except that transactions that
+ * were in progress or as yet unstarted when the snapshot was taken will
+ * be treated as uncommitted, even if they really have committed by now.
+ *
+ * (Notice, however, that the tuple status hint bits will be updated on the
+ * basis of the true state of the transaction, even if we then pretend we
+ * can't see it.)
+ */
 bool
 HeapTupleSatisfiesSnapshot(HeapTupleHeader tuple, Snapshot snapshot)
 {
@@ -658,11 +753,6 @@ HeapTupleSatisfiesSnapshot(HeapTupleHeader tuple, Snapshot snapshot)
  * deleted by XIDs >= OldestXmin are deemed "recently dead"; they might
  * still be visible to some open transaction, so we can't remove them,
  * even if we see that the deleting transaction has committed.
- *
- * As with the other HeapTupleSatisfies routines, we may update the tuple's
- * "hint" status bits if we see that the inserting or deleting transaction
- * has now committed or aborted.  The caller is responsible for noticing any
- * change in t_infomask and scheduling a disk write if so.
  */
 HTSV_Result
 HeapTupleSatisfiesVacuum(HeapTupleHeader tuple, TransactionId OldestXmin)
@@ -808,13 +898,21 @@ HeapTupleSatisfiesVacuum(HeapTupleHeader tuple, TransactionId OldestXmin)
 }
 
 
+/*
+ * SetQuerySnapshot
+ *     Initialize query snapshot for a new query
+ *
+ * The SerializableSnapshot is the first one taken in a transaction.
+ * In serializable mode we just use that one throughout the transaction.
+ * In read-committed mode, we take a new snapshot at the start of each query.
+ */
 void
 SetQuerySnapshot(void)
 {
    /* Initialize snapshot overriding to false */
    ReferentialIntegritySnapshotOverride = false;
 
-   /* 1st call in xaction */
+   /* 1st call in xaction? */
    if (SerializableSnapshot == NULL)
    {
        SerializableSnapshot = GetSnapshotData(true);
@@ -837,6 +935,10 @@ SetQuerySnapshot(void)
    Assert(QuerySnapshot != NULL);
 }
 
+/*
+ * FreeXactSnapshot
+ *     Free snapshot(s) at end of transaction.
+ */
 void
 FreeXactSnapshot(void)
 {
index 070ee9cda1ebed451c5b58f535c849824192cc2d..a69851e66611ccce2467485dabe04a4363b8d532 100644 (file)
@@ -1,14 +1,14 @@
 /*-------------------------------------------------------------------------
  *
  * tqual.h
- *   POSTGRES "time" qualification definitions.
+ *   POSTGRES "time" qualification definitions, ie, tuple visibility rules.
  *
  *   Should be moved/renamed...    - vadim 07/28/98
  *
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: tqual.h,v 1.37 2001/11/05 17:46:36 momjian Exp $
+ * $Id: tqual.h,v 1.38 2002/01/16 20:29:02 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -34,6 +34,7 @@ typedef SnapshotData *Snapshot;
 #define SnapshotNow                    ((Snapshot) 0x0)
 #define SnapshotSelf               ((Snapshot) 0x1)
 #define SnapshotAny                    ((Snapshot) 0x2)
+#define SnapshotToast              ((Snapshot) 0x3)
 
 extern DLLIMPORT Snapshot SnapshotDirty;
 extern DLLIMPORT Snapshot QuerySnapshot;
@@ -44,37 +45,36 @@ extern bool ReferentialIntegritySnapshotOverride;
 #define IsSnapshotNow(snapshot)        ((Snapshot) (snapshot) == SnapshotNow)
 #define IsSnapshotSelf(snapshot)   ((Snapshot) (snapshot) == SnapshotSelf)
 #define IsSnapshotAny(snapshot)        ((Snapshot) (snapshot) == SnapshotAny)
+#define IsSnapshotToast(snapshot)  ((Snapshot) (snapshot) == SnapshotToast)
 #define IsSnapshotDirty(snapshot)  ((Snapshot) (snapshot) == SnapshotDirty)
 
 
 /*
  * HeapTupleSatisfiesVisibility
- *     True iff heap tuple satsifies a time qual.
+ *     True iff heap tuple satisfies a time qual.
  *
  * Notes:
  *     Assumes heap tuple is valid.
- *     Beware of multiple evaluations of arguments.
+ *     Beware of multiple evaluations of snapshot argument.
  */
 #define HeapTupleSatisfiesVisibility(tuple, snapshot) \
-( \
-   TransactionIdEquals((tuple)->t_data->t_xmax, BootstrapTransactionId) ? \
-       false \
+(IsSnapshotNow(snapshot) ? \
+   HeapTupleSatisfiesNow((tuple)->t_data) \
+: \
+   (IsSnapshotSelf(snapshot) ? \
+       HeapTupleSatisfiesItself((tuple)->t_data) \
    : \
-   ( \
-       IsSnapshotAny(snapshot) ? \
+       (IsSnapshotAny(snapshot) ? \
            true \
        : \
-           (IsSnapshotSelf(snapshot) ? \
-               HeapTupleSatisfiesItself((tuple)->t_data) \
+           (IsSnapshotToast(snapshot) ? \
+               HeapTupleSatisfiesToast((tuple)->t_data) \
            : \
-               (IsSnapshotNow(snapshot) ? \
-                   HeapTupleSatisfiesNow((tuple)->t_data) \
+               (IsSnapshotDirty(snapshot) ? \
+                   HeapTupleSatisfiesDirty((tuple)->t_data) \
                : \
-                   (IsSnapshotDirty(snapshot) ? \
-                       HeapTupleSatisfiesDirty((tuple)->t_data) \
-                   : \
-                       HeapTupleSatisfiesSnapshot((tuple)->t_data, snapshot) \
-                   ) \
+                   HeapTupleSatisfiesSnapshot((tuple)->t_data, snapshot) \
+               ) \
            ) \
        ) \
    ) \
@@ -93,14 +93,15 @@ typedef enum
    HEAPTUPLE_DEAD,             /* tuple is dead and deletable */
    HEAPTUPLE_LIVE,             /* tuple is live (committed, no deleter) */
    HEAPTUPLE_RECENTLY_DEAD,    /* tuple is dead, but not deletable yet */
-   HEAPTUPLE_INSERT_IN_PROGRESS,       /* inserting xact is still in
-                                        * progress */
+   HEAPTUPLE_INSERT_IN_PROGRESS,   /* inserting xact is still in
+                                    * progress */
    HEAPTUPLE_DELETE_IN_PROGRESS    /* deleting xact is still in progress */
 } HTSV_Result;
 
 extern bool HeapTupleSatisfiesItself(HeapTupleHeader tuple);
 extern bool HeapTupleSatisfiesNow(HeapTupleHeader tuple);
 extern bool HeapTupleSatisfiesDirty(HeapTupleHeader tuple);
+extern bool HeapTupleSatisfiesToast(HeapTupleHeader tuple);
 extern bool HeapTupleSatisfiesSnapshot(HeapTupleHeader tuple,
                           Snapshot snapshot);
 extern int HeapTupleSatisfiesUpdate(HeapTuple tuple);