Minor changes to autovacuum worker: change error handling so that it continues
authorAlvaro Herrera
Wed, 24 Oct 2007 19:08:25 +0000 (19:08 +0000)
committerAlvaro Herrera
Wed, 24 Oct 2007 19:08:25 +0000 (19:08 +0000)
with the next table on schedule instead of exiting, in all cases instead of
just on query cancel.

Add a errcontext() line indicating the activity of the worker to the error
message when it is cancelled.

Change the WorkerInfo struct to contain a pointer to the worker's PGPROC
instead of just the PID.

Add forgotten post-auth delays, per Simon Riggs.  Also to autovac launcher.

src/backend/postmaster/autovacuum.c

index bce0ba4323ff02b17b5acf6a43897db4e930eb79..9e472d02ff6bd20fd22e4c11cf683e522069c6cc 100644 (file)
@@ -55,7 +55,7 @@
  *
  *
  * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/postmaster/autovacuum.c,v 1.61 2007/09/24 04:12:01 alvherre Exp $
+ *   $PostgreSQL: pgsql/src/backend/postmaster/autovacuum.c,v 1.62 2007/10/24 19:08:25 alvherre Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -182,7 +182,7 @@ typedef struct autovac_table
  * wi_links        entry into free list or running list
  * wi_dboid        OID of the database this worker is supposed to work on
  * wi_tableoid OID of the table currently being vacuumed
- * wi_workerpid    PID of the running worker, 0 if not yet started
+ * wi_proc     pointer to PGPROC of the running worker, NULL if not started
  * wi_launchtime Time at which this worker was launched
  * wi_cost_*   Vacuum cost-based delay parameters current in this worker
  *
@@ -196,7 +196,7 @@ typedef struct WorkerInfoData
    SHM_QUEUE   wi_links;
    Oid         wi_dboid;
    Oid         wi_tableoid;
-   int         wi_workerpid;
+   PGPROC     *wi_proc;
    TimestampTz wi_launchtime;
    int         wi_cost_delay;
    int         wi_cost_limit;
@@ -395,6 +395,9 @@ AutoVacLauncherMain(int argc, char *argv[])
    /* Identify myself via ps */
    init_ps_display("autovacuum launcher process", "", "", "");
 
+   if (PostAuthDelay)
+       pg_usleep(PostAuthDelay * 1000000L);
+
    SetProcessingMode(InitProcessing);
 
    /*
@@ -694,7 +697,7 @@ AutoVacLauncherMain(int argc, char *argv[])
                    worker = (WorkerInfo) MAKE_PTR(AutoVacuumShmem->av_startingWorker);
                    worker->wi_dboid = InvalidOid;
                    worker->wi_tableoid = InvalidOid;
-                   worker->wi_workerpid = 0;
+                   worker->wi_proc = NULL;
                    worker->wi_launchtime = 0;
                    worker->wi_links.next = AutoVacuumShmem->av_freeWorkers;
                    AutoVacuumShmem->av_freeWorkers = MAKE_OFFSET(worker);
@@ -1198,7 +1201,7 @@ do_start_worker(void)
        AutoVacuumShmem->av_freeWorkers = worker->wi_links.next;
 
        worker->wi_dboid = avdb->adw_datid;
-       worker->wi_workerpid = 0;
+       worker->wi_proc = NULL;
        worker->wi_launchtime = GetCurrentTimestamp();
 
        AutoVacuumShmem->av_startingWorker = sworker;
@@ -1437,6 +1440,9 @@ AutoVacWorkerMain(int argc, char *argv[])
    /* Identify myself via ps */
    init_ps_display("autovacuum worker process", "", "", "");
 
+   if (PostAuthDelay)
+       pg_usleep(PostAuthDelay * 1000000L);
+
    SetProcessingMode(InitProcessing);
 
    /*
@@ -1542,7 +1548,7 @@ AutoVacWorkerMain(int argc, char *argv[])
    {
        MyWorkerInfo = (WorkerInfo) MAKE_PTR(AutoVacuumShmem->av_startingWorker);
        dbid = MyWorkerInfo->wi_dboid;
-       MyWorkerInfo->wi_workerpid = MyProcPid;
+       MyWorkerInfo->wi_proc = MyProc;
 
        /* insert into the running list */
        SHMQueueInsertBefore(&AutoVacuumShmem->av_runningWorkers, 
@@ -1637,7 +1643,7 @@ FreeWorkerInfo(int code, Datum arg)
        MyWorkerInfo->wi_links.next = AutoVacuumShmem->av_freeWorkers;
        MyWorkerInfo->wi_dboid = InvalidOid;
        MyWorkerInfo->wi_tableoid = InvalidOid;
-       MyWorkerInfo->wi_workerpid = 0;
+       MyWorkerInfo->wi_proc = NULL;
        MyWorkerInfo->wi_launchtime = 0;
        MyWorkerInfo->wi_cost_delay = 0;
        MyWorkerInfo->wi_cost_limit = 0;
@@ -1701,7 +1707,7 @@ autovac_balance_cost(void)
                                       offsetof(WorkerInfoData, wi_links));
    while (worker)
    {
-       if (worker->wi_workerpid != 0 &&
+       if (worker->wi_proc != NULL &&
            worker->wi_cost_limit_base > 0 && worker->wi_cost_delay > 0)
            cost_total +=
                (double) worker->wi_cost_limit_base / worker->wi_cost_delay;
@@ -1724,7 +1730,7 @@ autovac_balance_cost(void)
                                       offsetof(WorkerInfoData, wi_links));
    while (worker)
    {
-       if (worker->wi_workerpid != 0 &&
+       if (worker->wi_proc != NULL &&
            worker->wi_cost_limit_base > 0 && worker->wi_cost_delay > 0)
        {
            int     limit = (int)
@@ -1737,7 +1743,7 @@ autovac_balance_cost(void)
            worker->wi_cost_limit = Max(Min(limit, worker->wi_cost_limit_base), 1);
 
            elog(DEBUG2, "autovac_balance_cost(pid=%u db=%u, rel=%u, cost_limit=%d, cost_delay=%d)",
-                worker->wi_workerpid, worker->wi_dboid,
+                worker->wi_proc->pid, worker->wi_dboid,
                 worker->wi_tableoid, worker->wi_cost_limit, worker->wi_cost_delay);
        }
 
@@ -2062,25 +2068,27 @@ next_worker:
        VacuumCostDelay = tab->at_vacuum_cost_delay;
        VacuumCostLimit = tab->at_vacuum_cost_limit;
 
-       /*
-        * Advertise my cost delay parameters for the balancing algorithm, and
-        * do a balance
-        */
+       /* Last fixups before actually starting to work */
        LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
+
+       /* advertise my cost delay parameters for the balancing algorithm */
        MyWorkerInfo->wi_cost_delay = tab->at_vacuum_cost_delay;
        MyWorkerInfo->wi_cost_limit = tab->at_vacuum_cost_limit;
        MyWorkerInfo->wi_cost_limit_base = tab->at_vacuum_cost_limit;
+
+       /* do a balance */
        autovac_balance_cost();
+
+       /* done */
        LWLockRelease(AutovacuumLock);
 
        /* clean up memory before each iteration */
        MemoryContextResetAndDeleteChildren(PortalContext);
 
        /*
-        * We will abort vacuuming the current table if we are interrupted, and
-        * continue with the next one in schedule; but if anything else
-        * happens, we will do our usual error handling which is to cause the
-        * worker process to exit.
+        * We will abort vacuuming the current table if something errors out,
+        * and continue with the next one in schedule; in particular, this
+        * happens if we are interrupted with SIGINT.
         */
        PG_TRY();
        {
@@ -2094,39 +2102,40 @@ next_worker:
        }
        PG_CATCH();
        {
-           ErrorData      *errdata;
-
-           MemoryContextSwitchTo(TopTransactionContext);
-           errdata = CopyErrorData();
-
            /*
-            * If we errored out due to a cancel request, abort and restart the
-            * transaction and go to the next table.  Otherwise rethrow the
-            * error so that the outermost handler deals with it.
+            * Abort the transaction, start a new one, and proceed with the
+            * next table in our list.
             */
-           if (errdata->sqlerrcode == ERRCODE_QUERY_CANCELED)
-           {
-               HOLD_INTERRUPTS();
-               elog(LOG, "cancelling autovacuum of table \"%s.%s.%s\"",
-                    get_database_name(MyDatabaseId),
-                    get_namespace_name(get_rel_namespace(tab->at_relid)),
-                    get_rel_name(tab->at_relid));
-
-               AbortOutOfAnyTransaction();
-               FlushErrorState();
-               MemoryContextResetAndDeleteChildren(PortalContext);
-
-               /* restart our transaction for the following operations */
-               StartTransactionCommand();
-               RESUME_INTERRUPTS();
-           }
+           HOLD_INTERRUPTS();
+           if (tab->at_dovacuum)
+               errcontext("automatic vacuum of table \"%s.%s.%s\"",
+                          get_database_name(MyDatabaseId),
+                          get_namespace_name(get_rel_namespace(tab->at_relid)),
+                          get_rel_name(tab->at_relid));
            else
-               PG_RE_THROW();
+               errcontext("automatic analyze of table \"%s.%s.%s\"",
+                          get_database_name(MyDatabaseId),
+                          get_namespace_name(get_rel_namespace(tab->at_relid)),
+                          get_rel_name(tab->at_relid));
+           EmitErrorReport();
+
+           AbortOutOfAnyTransaction();
+           FlushErrorState();
+           MemoryContextResetAndDeleteChildren(PortalContext);
+
+           /* restart our transaction for the following operations */
+           StartTransactionCommand();
+           RESUME_INTERRUPTS();
        }
        PG_END_TRY();
 
        /* be tidy */
        pfree(tab);
+
+       /* remove my info from shared memory */
+       LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
+       MyWorkerInfo->wi_tableoid = InvalidOid;
+       LWLockRelease(AutovacuumLock);
    }
 
    /*