Minor cleanup of the BRIN parallel build code
authorTomas Vondra
Sat, 30 Dec 2023 21:50:54 +0000 (22:50 +0100)
committerTomas Vondra
Sat, 30 Dec 2023 22:15:04 +0000 (23:15 +0100)
Commit b437571714 added support for parallel builds for BRIN indexes,
using code similar to BTREE parallel builds, and also a new tuplesort
variant. This commit simplifies the new code in two ways:

* The "spool" grouping tuplesort and the heap/index is not necessary.
  The heap/index are available as separate arguments, causing confusion.
  So remove the spool, and use the tuplesort directly.

* The new tuplesort variant does not need the heap/index, as it sorts
  simply by the range block number, without accessing the tuple data.
  So simplify that too.

Initial report and patch by Ranier Vilela, further cleanup by me.

Author: Ranier Vilela
Discussion: https://postgr.es/m/CAEudQAqD7f2i4iyEaAz-5o-bf6zXVX-AkNUBm-YjUXEemaEh6A%40mail.gmail.com

src/backend/access/brin/brin.c
src/backend/utils/sort/tuplesortvariants.c
src/include/utils/tuplesort.h
src/tools/pgindent/typedefs.list

index 23f081389b2c51fc9f618202d40a28dcd63b3e0a..5e662a4d4eefefafbb0e2d02425b14ede4dfad3e 100644 (file)
 #define PARALLEL_KEY_WAL_USAGE         UINT64CONST(0xB000000000000004)
 #define PARALLEL_KEY_BUFFER_USAGE      UINT64CONST(0xB000000000000005)
 
-/*
- * Status record for spooling/sorting phase.
- */
-typedef struct BrinSpool
-{
-   Tuplesortstate *sortstate;  /* state data for tuplesort.c */
-   Relation    heap;
-   Relation    index;
-} BrinSpool;
-
 /*
  * Status for index builds performed in parallel.  This is allocated in a
  * dynamic shared memory segment.
@@ -183,7 +173,13 @@ typedef struct BrinBuildState
     */
    BrinLeader *bs_leader;
    int         bs_worker_id;
-   BrinSpool  *bs_spool;
+
+   /*
+    * The sortstate is used by workers (including the leader). It has to be
+    * part of the build state, because that's the only thing passed to the
+    * build callback etc.
+    */
+   Tuplesortstate *bs_sortstate;
 } BrinBuildState;
 
 /*
@@ -231,12 +227,11 @@ static void brin_fill_empty_ranges(BrinBuildState *state,
 /* parallel index builds */
 static void _brin_begin_parallel(BrinBuildState *buildstate, Relation heap, Relation index,
                                 bool isconcurrent, int request);
-static void _brin_end_parallel(BrinLeader *btleader, BrinBuildState *state);
+static void _brin_end_parallel(BrinLeader *brinleader, BrinBuildState *state);
 static Size _brin_parallel_estimate_shared(Relation heap, Snapshot snapshot);
 static void _brin_leader_participate_as_worker(BrinBuildState *buildstate,
                                               Relation heap, Relation index);
 static void _brin_parallel_scan_and_build(BrinBuildState *buildstate,
-                                         BrinSpool *brinspool,
                                          BrinShared *brinshared,
                                          Sharedsort *sharedsort,
                                          Relation heap, Relation index,
@@ -1143,10 +1138,6 @@ brinbuild(Relation heap, Relation index, IndexInfo *indexInfo)
    state = initialize_brin_buildstate(index, revmap, pagesPerRange,
                                       RelationGetNumberOfBlocks(heap));
 
-   state->bs_spool = (BrinSpool *) palloc0(sizeof(BrinSpool));
-   state->bs_spool->heap = heap;
-   state->bs_spool->index = index;
-
    /*
     * Attempt to launch parallel worker scan when required
     *
@@ -1160,11 +1151,13 @@ brinbuild(Relation heap, Relation index, IndexInfo *indexInfo)
                             indexInfo->ii_ParallelWorkers);
 
    /*
-    * Now scan the relation.  No syncscan allowed here because we want the
-    * heap blocks in physical order.
-    *
     * If parallel build requested and at least one worker process was
-    * successfully launched, set up coordination state
+    * successfully launched, set up coordination state, wait for workers to
+    * complete. Then read all tuples from the shared tuplesort and insert
+    * them into the index.
+    *
+    * In serial mode, simply scan the table and build the index one index
+    * tuple at a time.
     */
    if (state->bs_leader)
    {
@@ -1176,9 +1169,8 @@ brinbuild(Relation heap, Relation index, IndexInfo *indexInfo)
            state->bs_leader->nparticipanttuplesorts;
        coordinate->sharedsort = state->bs_leader->sharedsort;
 
-
        /*
-        * Begin serial/leader tuplesort.
+        * Begin leader tuplesort.
         *
         * In cases where parallelism is involved, the leader receives the
         * same share of maintenance_work_mem as a serial sort (it is
@@ -1199,19 +1191,18 @@ brinbuild(Relation heap, Relation index, IndexInfo *indexInfo)
         * INDEX operation, regardless of the use of parallelism or any other
         * factor.
         */
-       state->bs_spool->sortstate =
-           tuplesort_begin_index_brin(heap, index,
-                                      maintenance_work_mem, coordinate,
+       state->bs_sortstate =
+           tuplesort_begin_index_brin(maintenance_work_mem, coordinate,
                                       TUPLESORT_NONE);
 
-       /*
-        * In parallel mode, wait for workers to complete, and then read all
-        * tuples from the shared tuplesort and insert them into the index.
-        */
        _brin_end_parallel(state->bs_leader, state);
    }
    else                        /* no parallel index build */
    {
+       /*
+        * Now scan the relation.  No syncscan allowed here because we want
+        * the heap blocks in physical order.
+        */
        reltuples = table_index_build_scan(heap, index, indexInfo, false, true,
                                           brinbuildCallback, (void *) state, NULL);
 
@@ -1671,7 +1662,7 @@ initialize_brin_buildstate(Relation idxRel, BrinRevmap *revmap,
    state->bs_dtuple = brin_new_memtuple(state->bs_bdesc);
    state->bs_leader = NULL;
    state->bs_worker_id = 0;
-   state->bs_spool = NULL;
+   state->bs_sortstate = NULL;
    state->bs_context = CurrentMemoryContext;
    state->bs_emptyTuple = NULL;
    state->bs_emptyTupleLen = 0;
@@ -2002,7 +1993,7 @@ form_and_spill_tuple(BrinBuildState *state)
                          state->bs_dtuple, &size);
 
    /* write the BRIN tuple to the tuplesort */
-   tuplesort_putbrintuple(state->bs_spool->sortstate, tup, size);
+   tuplesort_putbrintuple(state->bs_sortstate, tup, size);
 
    state->bs_numtuples++;
 
@@ -2522,7 +2513,6 @@ _brin_end_parallel(BrinLeader *brinleader, BrinBuildState *state)
    Size        tuplen;
    BrinShared *brinshared = brinleader->brinshared;
    BlockNumber prevblkno = InvalidBlockNumber;
-   BrinSpool  *spool;
    MemoryContext rangeCxt,
                oldCxt;
 
@@ -2541,8 +2531,7 @@ _brin_end_parallel(BrinLeader *brinleader, BrinBuildState *state)
    state->bs_numtuples = brinshared->indtuples;
 
    /* do the actual sort in the leader */
-   spool = state->bs_spool;
-   tuplesort_performsort(spool->sortstate);
+   tuplesort_performsort(state->bs_sortstate);
 
    /*
     * Initialize BrinMemTuple we'll use to union summaries from workers (in
@@ -2568,7 +2557,7 @@ _brin_end_parallel(BrinLeader *brinleader, BrinBuildState *state)
     * That probably gives us an index that is cheaper to scan, thanks to
     * mostly getting data from the same index page as before.
     */
-   while ((btup = tuplesort_getbrintuple(spool->sortstate, &tuplen, true)) != NULL)
+   while ((btup = tuplesort_getbrintuple(state->bs_sortstate, &tuplen, true)) != NULL)
    {
        /* Ranges should be multiples of pages_per_range for the index. */
        Assert(btup->bt_blkno % brinshared->pagesPerRange == 0);
@@ -2640,7 +2629,7 @@ _brin_end_parallel(BrinLeader *brinleader, BrinBuildState *state)
        prevblkno = btup->bt_blkno;
    }
 
-   tuplesort_end(spool->sortstate);
+   tuplesort_end(state->bs_sortstate);
 
    /* Fill the BRIN tuple for the last page range with data. */
    if (prevblkno != InvalidBlockNumber)
@@ -2704,11 +2693,6 @@ _brin_leader_participate_as_worker(BrinBuildState *buildstate, Relation heap, Re
    BrinLeader *brinleader = buildstate->bs_leader;
    int         sortmem;
 
-   /* Allocate memory and initialize private spool */
-   buildstate->bs_spool = (BrinSpool *) palloc0(sizeof(BrinSpool));
-   buildstate->bs_spool->heap = buildstate->bs_spool->heap;
-   buildstate->bs_spool->index = buildstate->bs_spool->index;
-
    /*
     * Might as well use reliable figure when doling out maintenance_work_mem
     * (when requested number of workers were not launched, this will be
@@ -2717,16 +2701,14 @@ _brin_leader_participate_as_worker(BrinBuildState *buildstate, Relation heap, Re
    sortmem = maintenance_work_mem / brinleader->nparticipanttuplesorts;
 
    /* Perform work common to all participants */
-   _brin_parallel_scan_and_build(buildstate, buildstate->bs_spool, brinleader->brinshared,
+   _brin_parallel_scan_and_build(buildstate, brinleader->brinshared,
                                  brinleader->sharedsort, heap, index, sortmem, true);
 }
 
 /*
  * Perform a worker's portion of a parallel sort.
  *
- * This generates a tuplesort for passed btspool, and a second tuplesort
- * state if a second btspool is need (i.e. for unique index builds).  All
- * other spool fields should already be set when this is called.
+ * This generates a tuplesort for the worker portion of the table.
  *
  * sortmem is the amount of working memory to use within each worker,
  * expressed in KBs.
@@ -2734,10 +2716,10 @@ _brin_leader_participate_as_worker(BrinBuildState *buildstate, Relation heap, Re
  * When this returns, workers are done, and need only release resources.
  */
 static void
-_brin_parallel_scan_and_build(BrinBuildState *state, BrinSpool *brinspool,
+_brin_parallel_scan_and_build(BrinBuildState *state,
                              BrinShared *brinshared, Sharedsort *sharedsort,
-                             Relation heap, Relation index, int sortmem,
-                             bool progress)
+                             Relation heap, Relation index,
+                             int sortmem, bool progress)
 {
    SortCoordinate coordinate;
    TableScanDesc scan;
@@ -2751,10 +2733,8 @@ _brin_parallel_scan_and_build(BrinBuildState *state, BrinSpool *brinspool,
    coordinate->sharedsort = sharedsort;
 
    /* Begin "partial" tuplesort */
-   brinspool->sortstate = tuplesort_begin_index_brin(brinspool->heap,
-                                                     brinspool->index,
-                                                     sortmem, coordinate,
-                                                     TUPLESORT_NONE);
+   state->bs_sortstate = tuplesort_begin_index_brin(sortmem, coordinate,
+                                                    TUPLESORT_NONE);
 
    /* Join parallel scan */
    indexInfo = BuildIndexInfo(index);
@@ -2770,7 +2750,7 @@ _brin_parallel_scan_and_build(BrinBuildState *state, BrinSpool *brinspool,
    form_and_spill_tuple(state);
 
    /* sort the BRIN ranges built by this worker */
-   tuplesort_performsort(brinspool->sortstate);
+   tuplesort_performsort(state->bs_sortstate);
 
    state->bs_reltuples += reltuples;
 
@@ -2786,7 +2766,7 @@ _brin_parallel_scan_and_build(BrinBuildState *state, BrinSpool *brinspool,
    /* Notify leader */
    ConditionVariableSignal(&brinshared->workersdonecv);
 
-   tuplesort_end(brinspool->sortstate);
+   tuplesort_end(state->bs_sortstate);
 }
 
 /*
@@ -2844,11 +2824,6 @@ _brin_parallel_build_main(dsm_segment *seg, shm_toc *toc)
                                            brinshared->pagesPerRange,
                                            InvalidBlockNumber);
 
-   /* Initialize worker's own spool */
-   buildstate->bs_spool = (BrinSpool *) palloc0(sizeof(BrinSpool));
-   buildstate->bs_spool->heap = heapRel;
-   buildstate->bs_spool->index = indexRel;
-
    /* Look up shared state private to tuplesort.c */
    sharedsort = shm_toc_lookup(toc, PARALLEL_KEY_TUPLESORT, false);
    tuplesort_attach_shared(sharedsort, seg);
@@ -2863,8 +2838,7 @@ _brin_parallel_build_main(dsm_segment *seg, shm_toc *toc)
     */
    sortmem = maintenance_work_mem / brinshared->scantuplesortstates;
 
-   _brin_parallel_scan_and_build(buildstate, buildstate->bs_spool,
-                                 brinshared, sharedsort,
+   _brin_parallel_scan_and_build(buildstate, brinshared, sharedsort,
                                  heapRel, indexRel, sortmem, false);
 
    /* Report WAL/buffer usage during parallel execution */
index 90fc605f1ca2725f9e410163817085c12d0aeb8a..27425880a5f84ac2ba25085f0acd243bcbde57cc 100644 (file)
@@ -137,16 +137,6 @@ typedef struct
    uint32      max_buckets;
 } TuplesortIndexHashArg;
 
-/*
- * Data struture pointed by "TuplesortPublic.arg" for the index_brin subcase.
- */
-typedef struct
-{
-   TuplesortIndexArg index;
-
-   /* XXX do we need something here? */
-} TuplesortIndexBrinArg;
-
 /*
  * Data struture pointed by "TuplesortPublic.arg" for the Datum case.
  * Set by tuplesort_begin_datum and used only by the DatumTuple routines.
@@ -562,20 +552,13 @@ tuplesort_begin_index_gist(Relation heapRel,
 }
 
 Tuplesortstate *
-tuplesort_begin_index_brin(Relation heapRel,
-                          Relation indexRel,
-                          int workMem,
+tuplesort_begin_index_brin(int workMem,
                           SortCoordinate coordinate,
                           int sortopt)
 {
    Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
                                                   sortopt);
    TuplesortPublic *base = TuplesortstateGetPublic(state);
-   MemoryContext oldcontext;
-   TuplesortIndexBrinArg *arg;
-
-   oldcontext = MemoryContextSwitchTo(base->maincontext);
-   arg = (TuplesortIndexBrinArg *) palloc(sizeof(TuplesortIndexBrinArg));
 
 #ifdef TRACE_SORT
    if (trace_sort)
@@ -592,12 +575,7 @@ tuplesort_begin_index_brin(Relation heapRel,
    base->writetup = writetup_index_brin;
    base->readtup = readtup_index_brin;
    base->haveDatum1 = true;
-   base->arg = arg;
-
-   arg->index.heapRel = heapRel;
-   arg->index.indexRel = indexRel;
-
-   MemoryContextSwitchTo(oldcontext);
+   base->arg = NULL;
 
    return state;
 }
index 357eb35311d0fc66c95b8365e5e1fbc519369bfc..103ced4005fe3908928fdc9f0e5a66636fc6cf24 100644 (file)
@@ -430,9 +430,7 @@ extern Tuplesortstate *tuplesort_begin_index_gist(Relation heapRel,
                                                  Relation indexRel,
                                                  int workMem, SortCoordinate coordinate,
                                                  int sortopt);
-extern Tuplesortstate *tuplesort_begin_index_brin(Relation heapRel,
-                                                 Relation indexRel,
-                                                 int workMem, SortCoordinate coordinate,
+extern Tuplesortstate *tuplesort_begin_index_brin(int workMem, SortCoordinate coordinate,
                                                  int sortopt);
 extern Tuplesortstate *tuplesort_begin_datum(Oid datumType,
                                             Oid sortOperator, Oid sortCollation,
index e37ef9aa76da0cbdef7c33db52b6097fae7d4baf..3310726cdd6f7318150ae5afa6bcf823c800de72 100644 (file)
@@ -307,7 +307,6 @@ BrinRevmap
 BrinShared
 BrinSortTuple
 BrinSpecialSpace
-BrinSpool
 BrinStatsData
 BrinTuple
 BrinValues