Avoid O(N^2) cost in ExecFindRowMark().
authorTom Lane
Mon, 8 Oct 2018 14:41:34 +0000 (10:41 -0400)
committerTom Lane
Mon, 8 Oct 2018 14:41:34 +0000 (10:41 -0400)
If there are many ExecRowMark structs, we spent O(N^2) time in
ExecFindRowMark during executor startup.  Once upon a time this was
not of great concern, but the addition of native partitioning has
squeezed out enough other costs that this can become the dominant
overhead in some use-cases for tables with many partitions.

To fix, simply replace that List data structure with an array.

This adds a little bit of cost to execCurrentOf(), but not much,
and anyway that code path is neither of large importance nor very
efficient now.  If we ever decide it is a bottleneck, constructing a
hash table for lookup-by-tableoid would likely be the thing to do.

Per complaint from Amit Langote, though this is different from
his fix proposal.

Discussion: https://postgr.es/m/468c85d9-540e-66a2-1dde-fec2b741e688@lab.ntt.co.jp

src/backend/executor/execCurrent.c
src/backend/executor/execMain.c
src/backend/executor/execUtils.c
src/include/nodes/execnodes.h

index 7480cf50ad0382fdc6ec1394c1cccb7e86a6cfa4..aadf7493827e9a3b7c74bac44e935ebb1256964f 100644 (file)
@@ -91,21 +91,22 @@ execCurrentOf(CurrentOfExpr *cexpr,
     * the other code can't, while the non-FOR-UPDATE case allows use of WHERE
     * CURRENT OF with an insensitive cursor.
     */
-   if (queryDesc->estate->es_rowMarks)
+   if (queryDesc->estate->es_rowmarks)
    {
        ExecRowMark *erm;
-       ListCell   *lc;
+       Index       i;
 
        /*
         * Here, the query must have exactly one FOR UPDATE/SHARE reference to
         * the target table, and we dig the ctid info out of that.
         */
        erm = NULL;
-       foreach(lc, queryDesc->estate->es_rowMarks)
+       for (i = 0; i < queryDesc->estate->es_range_table_size; i++)
        {
-           ExecRowMark *thiserm = (ExecRowMark *) lfirst(lc);
+           ExecRowMark *thiserm = queryDesc->estate->es_rowmarks[i];
 
-           if (!RowMarkRequiresRowShareLock(thiserm->markType))
+           if (thiserm == NULL ||
+               !RowMarkRequiresRowShareLock(thiserm->markType))
                continue;       /* ignore non-FOR UPDATE/SHARE items */
 
            if (thiserm->relid == table_oid)
index b6abad554a404a22bd7302c8bc453c22a5cf736e..0a766ef1e5adc09b91672818a94b8af8d76546f4 100644 (file)
@@ -909,61 +909,68 @@ InitPlan(QueryDesc *queryDesc, int eflags)
    }
 
    /*
-    * Next, build the ExecRowMark list from the PlanRowMark(s), if any.
+    * Next, build the ExecRowMark array from the PlanRowMark(s), if any.
     */
-   estate->es_rowMarks = NIL;
-   foreach(l, plannedstmt->rowMarks)
+   if (plannedstmt->rowMarks)
    {
-       PlanRowMark *rc = (PlanRowMark *) lfirst(l);
-       Oid         relid;
-       Relation    relation;
-       ExecRowMark *erm;
+       estate->es_rowmarks = (ExecRowMark **)
+           palloc0(estate->es_range_table_size * sizeof(ExecRowMark *));
+       foreach(l, plannedstmt->rowMarks)
+       {
+           PlanRowMark *rc = (PlanRowMark *) lfirst(l);
+           Oid         relid;
+           Relation    relation;
+           ExecRowMark *erm;
 
-       /* ignore "parent" rowmarks; they are irrelevant at runtime */
-       if (rc->isParent)
-           continue;
+           /* ignore "parent" rowmarks; they are irrelevant at runtime */
+           if (rc->isParent)
+               continue;
 
-       /* get relation's OID (will produce InvalidOid if subquery) */
-       relid = exec_rt_fetch(rc->rti, estate)->relid;
+           /* get relation's OID (will produce InvalidOid if subquery) */
+           relid = exec_rt_fetch(rc->rti, estate)->relid;
 
-       /* open relation, if we need to access it for this mark type */
-       switch (rc->markType)
-       {
-           case ROW_MARK_EXCLUSIVE:
-           case ROW_MARK_NOKEYEXCLUSIVE:
-           case ROW_MARK_SHARE:
-           case ROW_MARK_KEYSHARE:
-           case ROW_MARK_REFERENCE:
-               relation = ExecGetRangeTableRelation(estate, rc->rti);
-               break;
-           case ROW_MARK_COPY:
-               /* no physical table access is required */
-               relation = NULL;
-               break;
-           default:
-               elog(ERROR, "unrecognized markType: %d", rc->markType);
-               relation = NULL;    /* keep compiler quiet */
-               break;
-       }
+           /* open relation, if we need to access it for this mark type */
+           switch (rc->markType)
+           {
+               case ROW_MARK_EXCLUSIVE:
+               case ROW_MARK_NOKEYEXCLUSIVE:
+               case ROW_MARK_SHARE:
+               case ROW_MARK_KEYSHARE:
+               case ROW_MARK_REFERENCE:
+                   relation = ExecGetRangeTableRelation(estate, rc->rti);
+                   break;
+               case ROW_MARK_COPY:
+                   /* no physical table access is required */
+                   relation = NULL;
+                   break;
+               default:
+                   elog(ERROR, "unrecognized markType: %d", rc->markType);
+                   relation = NULL;    /* keep compiler quiet */
+                   break;
+           }
 
-       /* Check that relation is a legal target for marking */
-       if (relation)
-           CheckValidRowMarkRel(relation, rc->markType);
-
-       erm = (ExecRowMark *) palloc(sizeof(ExecRowMark));
-       erm->relation = relation;
-       erm->relid = relid;
-       erm->rti = rc->rti;
-       erm->prti = rc->prti;
-       erm->rowmarkId = rc->rowmarkId;
-       erm->markType = rc->markType;
-       erm->strength = rc->strength;
-       erm->waitPolicy = rc->waitPolicy;
-       erm->ermActive = false;
-       ItemPointerSetInvalid(&(erm->curCtid));
-       erm->ermExtra = NULL;
-
-       estate->es_rowMarks = lappend(estate->es_rowMarks, erm);
+           /* Check that relation is a legal target for marking */
+           if (relation)
+               CheckValidRowMarkRel(relation, rc->markType);
+
+           erm = (ExecRowMark *) palloc(sizeof(ExecRowMark));
+           erm->relation = relation;
+           erm->relid = relid;
+           erm->rti = rc->rti;
+           erm->prti = rc->prti;
+           erm->rowmarkId = rc->rowmarkId;
+           erm->markType = rc->markType;
+           erm->strength = rc->strength;
+           erm->waitPolicy = rc->waitPolicy;
+           erm->ermActive = false;
+           ItemPointerSetInvalid(&(erm->curCtid));
+           erm->ermExtra = NULL;
+
+           Assert(erm->rti > 0 && erm->rti <= estate->es_range_table_size &&
+                  estate->es_rowmarks[erm->rti - 1] == NULL);
+
+           estate->es_rowmarks[erm->rti - 1] = erm;
+       }
    }
 
    /*
@@ -2394,13 +2401,12 @@ ExecUpdateLockMode(EState *estate, ResultRelInfo *relinfo)
 ExecRowMark *
 ExecFindRowMark(EState *estate, Index rti, bool missing_ok)
 {
-   ListCell   *lc;
-
-   foreach(lc, estate->es_rowMarks)
+   if (rti > 0 && rti <= estate->es_range_table_size &&
+       estate->es_rowmarks != NULL)
    {
-       ExecRowMark *erm = (ExecRowMark *) lfirst(lc);
+       ExecRowMark *erm = estate->es_rowmarks[rti - 1];
 
-       if (erm->rti == rti)
+       if (erm)
            return erm;
    }
    if (!missing_ok)
@@ -3131,6 +3137,7 @@ EvalPlanQualStart(EPQState *epqstate, EState *parentestate, Plan *planTree)
    estate->es_range_table_array = parentestate->es_range_table_array;
    estate->es_range_table_size = parentestate->es_range_table_size;
    estate->es_relations = parentestate->es_relations;
+   estate->es_rowmarks = parentestate->es_rowmarks;
    estate->es_plannedstmt = parentestate->es_plannedstmt;
    estate->es_junkFilter = parentestate->es_junkFilter;
    estate->es_output_cid = parentestate->es_output_cid;
@@ -3148,7 +3155,6 @@ EvalPlanQualStart(EPQState *epqstate, EState *parentestate, Plan *planTree)
    }
    /* es_result_relation_info must NOT be copied */
    /* es_trig_target_relations must NOT be copied */
-   estate->es_rowMarks = parentestate->es_rowMarks;
    estate->es_top_eflags = parentestate->es_top_eflags;
    estate->es_instrument = parentestate->es_instrument;
    /* es_auxmodifytables must NOT be copied */
index d98e90e71173b9c38328fb81afe254b3c9a27d70..71c6b5dc0a85cfdd3c40f1ceebc6b3343d7b1ed0 100644 (file)
@@ -113,6 +113,7 @@ CreateExecutorState(void)
    estate->es_range_table_array = NULL;
    estate->es_range_table_size = 0;
    estate->es_relations = NULL;
+   estate->es_rowmarks = NULL;
    estate->es_plannedstmt = NULL;
 
    estate->es_junkFilter = NULL;
@@ -142,8 +143,6 @@ CreateExecutorState(void)
 
    estate->es_tupleTable = NIL;
 
-   estate->es_rowMarks = NIL;
-
    estate->es_processed = 0;
    estate->es_lastoid = InvalidOid;
 
@@ -709,6 +708,12 @@ ExecInitRangeTable(EState *estate, List *rangeTable)
     */
    estate->es_relations = (Relation *)
        palloc0(estate->es_range_table_size * sizeof(Relation));
+
+   /*
+    * es_rowmarks is also parallel to the es_range_table_array, but it's
+    * allocated only if needed.
+    */
+   estate->es_rowmarks = NULL;
 }
 
 /*
index 657b5936631f47f04b1378b465da1bb63fc4499f..834708944bf836c0edbd6e180e9bc822dfef75b9 100644 (file)
@@ -34,6 +34,7 @@
 
 struct PlanState;              /* forward references in this file */
 struct ParallelHashJoinState;
+struct ExecRowMark;
 struct ExprState;
 struct ExprContext;
 struct RangeTblEntry;          /* avoid including parsenodes.h here */
@@ -491,6 +492,8 @@ typedef struct EState
    Index       es_range_table_size;    /* size of the range table arrays */
    Relation   *es_relations;   /* Array of per-range-table-entry Relation
                                 * pointers, or NULL if not yet opened */
+   struct ExecRowMark **es_rowmarks;   /* Array of per-range-table-entry
+                                        * ExecRowMarks, or NULL if none */
    PlannedStmt *es_plannedstmt;    /* link to top of plan tree */
    const char *es_sourceText;  /* Source text from QueryDesc */
 
@@ -537,8 +540,6 @@ typedef struct EState
 
    List       *es_tupleTable;  /* List of TupleTableSlots */
 
-   List       *es_rowMarks;    /* List of ExecRowMarks */
-
    uint64      es_processed;   /* # of tuples processed */
    Oid         es_lastoid;     /* last oid processed (by INSERT) */
 
@@ -607,7 +608,9 @@ typedef struct EState
  * node that sources the relation (e.g., for a foreign table the FDW can use
  * ermExtra to hold information).
  *
- * EState->es_rowMarks is a list of these structs.
+ * EState->es_rowmarks is an array of these structs, indexed by RT index,
+ * with NULLs for irrelevant RT indexes.  es_rowmarks itself is NULL if
+ * there are no rowmarks.
  */
 typedef struct ExecRowMark
 {
@@ -629,7 +632,7 @@ typedef struct ExecRowMark
  *    additional runtime representation of FOR [KEY] UPDATE/SHARE clauses
  *
  * Each LockRows and ModifyTable node keeps a list of the rowmarks it needs to
- * deal with.  In addition to a pointer to the related entry in es_rowMarks,
+ * deal with.  In addition to a pointer to the related entry in es_rowmarks,
  * this struct carries the column number(s) of the resjunk columns associated
  * with the rowmark (see comments for PlanRowMark for more detail).  In the
  * case of ModifyTable, there has to be a separate ExecAuxRowMark list for
@@ -638,7 +641,7 @@ typedef struct ExecRowMark
  */
 typedef struct ExecAuxRowMark
 {
-   ExecRowMark *rowmark;       /* related entry in es_rowMarks */
+   ExecRowMark *rowmark;       /* related entry in es_rowmarks */
    AttrNumber  ctidAttNo;      /* resno of ctid junk attribute, if any */
    AttrNumber  toidAttNo;      /* resno of tableoid junk attribute, if any */
    AttrNumber  wholeAttNo;     /* resno of whole-row junk attribute, if any */