xref: /petsc/src/mat/utils/matstash.c (revision 48a46eb9bd028bec07ec0f396b1a3abb43f14558)
12d5177cdSBarry Smith 
2af0996ceSBarry Smith #include <petsc/private/matimpl.h>
35bd3b8fbSHong Zhang 
4bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE 10000
54c1ff481SSatish Balay 
6ac2b2aa0SJed Brown static PetscErrorCode       MatStashScatterBegin_Ref(Mat, MatStash *, PetscInt *);
7d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
8d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *);
975d39b6aSBarry Smith #if !defined(PETSC_HAVE_MPIUNI)
10d7d60843SJed Brown static PetscErrorCode MatStashScatterBegin_BTS(Mat, MatStash *, PetscInt *);
11d7d60843SJed Brown static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
12d7d60843SJed Brown static PetscErrorCode MatStashScatterEnd_BTS(MatStash *);
1375d39b6aSBarry Smith #endif
14d7d60843SJed Brown 
159417f4adSLois Curfman McInnes /*
168798bf22SSatish Balay   MatStashCreate_Private - Creates a stash,currently used for all the parallel
174c1ff481SSatish Balay   matrix implementations. The stash is where elements of a matrix destined
184c1ff481SSatish Balay   to be stored on other processors are kept until matrix assembly is done.
199417f4adSLois Curfman McInnes 
204c1ff481SSatish Balay   This is a simple minded stash. Simply adds entries to end of stash.
214c1ff481SSatish Balay 
224c1ff481SSatish Balay   Input Parameters:
234c1ff481SSatish Balay   comm - communicator, required for scatters.
244c1ff481SSatish Balay   bs   - stash block size. used when stashing blocks of values
254c1ff481SSatish Balay 
264c1ff481SSatish Balay   Output Parameters:
274c1ff481SSatish Balay   stash    - the newly created stash
289417f4adSLois Curfman McInnes */
299371c9d4SSatish Balay PetscErrorCode MatStashCreate_Private(MPI_Comm comm, PetscInt bs, MatStash *stash) {
30533163c2SBarry Smith   PetscInt  max, *opt, nopt, i;
31ace3abfcSBarry Smith   PetscBool flg;
32bc5ccf88SSatish Balay 
333a40ed3dSBarry Smith   PetscFunctionBegin;
34bc5ccf88SSatish Balay   /* Require 2 tags,get the second using PetscCommGetNewTag() */
35752ec6e0SSatish Balay   stash->comm = comm;
368865f1eaSKarl Rupp 
379566063dSJacob Faibussowitsch   PetscCall(PetscCommGetNewTag(stash->comm, &stash->tag1));
389566063dSJacob Faibussowitsch   PetscCall(PetscCommGetNewTag(stash->comm, &stash->tag2));
399566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(stash->comm, &stash->size));
409566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(stash->comm, &stash->rank));
419566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(2 * stash->size, &stash->flg_v));
42533163c2SBarry Smith   for (i = 0; i < 2 * stash->size; i++) stash->flg_v[i] = -1;
43533163c2SBarry Smith 
44434d7ff9SSatish Balay   nopt = stash->size;
459566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nopt, &opt));
469566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetIntArray(NULL, NULL, "-matstash_initial_size", opt, &nopt, &flg));
47434d7ff9SSatish Balay   if (flg) {
48434d7ff9SSatish Balay     if (nopt == 1) max = opt[0];
49434d7ff9SSatish Balay     else if (nopt == stash->size) max = opt[stash->rank];
50434d7ff9SSatish Balay     else if (stash->rank < nopt) max = opt[stash->rank];
51f4ab19daSSatish Balay     else max = 0; /* Use default */
52434d7ff9SSatish Balay     stash->umax = max;
53434d7ff9SSatish Balay   } else {
54434d7ff9SSatish Balay     stash->umax = 0;
55434d7ff9SSatish Balay   }
569566063dSJacob Faibussowitsch   PetscCall(PetscFree(opt));
574c1ff481SSatish Balay   if (bs <= 0) bs = 1;
58a2d1c673SSatish Balay 
594c1ff481SSatish Balay   stash->bs         = bs;
609417f4adSLois Curfman McInnes   stash->nmax       = 0;
61434d7ff9SSatish Balay   stash->oldnmax    = 0;
629417f4adSLois Curfman McInnes   stash->n          = 0;
634c1ff481SSatish Balay   stash->reallocs   = -1;
64f4259b30SLisandro Dalcin   stash->space_head = NULL;
65f4259b30SLisandro Dalcin   stash->space      = NULL;
669417f4adSLois Curfman McInnes 
67f4259b30SLisandro Dalcin   stash->send_waits  = NULL;
68f4259b30SLisandro Dalcin   stash->recv_waits  = NULL;
69f4259b30SLisandro Dalcin   stash->send_status = NULL;
70bc5ccf88SSatish Balay   stash->nsends      = 0;
71bc5ccf88SSatish Balay   stash->nrecvs      = 0;
72f4259b30SLisandro Dalcin   stash->svalues     = NULL;
73f4259b30SLisandro Dalcin   stash->rvalues     = NULL;
74f4259b30SLisandro Dalcin   stash->rindices    = NULL;
75a2d1c673SSatish Balay   stash->nprocessed  = 0;
7667318a8aSJed Brown   stash->reproduce   = PETSC_FALSE;
77d7d60843SJed Brown   stash->blocktype   = MPI_DATATYPE_NULL;
788865f1eaSKarl Rupp 
799566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(NULL, NULL, "-matstash_reproduce", &stash->reproduce, NULL));
801667be42SBarry Smith #if !defined(PETSC_HAVE_MPIUNI)
81ca723aa4SStefano Zampini   flg = PETSC_FALSE;
829566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(NULL, NULL, "-matstash_legacy", &flg, NULL));
83b30fb036SBarry Smith   if (!flg) {
84d7d60843SJed Brown     stash->ScatterBegin   = MatStashScatterBegin_BTS;
85d7d60843SJed Brown     stash->ScatterGetMesg = MatStashScatterGetMesg_BTS;
86d7d60843SJed Brown     stash->ScatterEnd     = MatStashScatterEnd_BTS;
87d7d60843SJed Brown     stash->ScatterDestroy = MatStashScatterDestroy_BTS;
88ac2b2aa0SJed Brown   } else {
891667be42SBarry Smith #endif
90ac2b2aa0SJed Brown     stash->ScatterBegin   = MatStashScatterBegin_Ref;
91ac2b2aa0SJed Brown     stash->ScatterGetMesg = MatStashScatterGetMesg_Ref;
92ac2b2aa0SJed Brown     stash->ScatterEnd     = MatStashScatterEnd_Ref;
93ac2b2aa0SJed Brown     stash->ScatterDestroy = NULL;
941667be42SBarry Smith #if !defined(PETSC_HAVE_MPIUNI)
95ac2b2aa0SJed Brown   }
961667be42SBarry Smith #endif
973a40ed3dSBarry Smith   PetscFunctionReturn(0);
989417f4adSLois Curfman McInnes }
999417f4adSLois Curfman McInnes 
1004c1ff481SSatish Balay /*
1018798bf22SSatish Balay    MatStashDestroy_Private - Destroy the stash
1024c1ff481SSatish Balay */
1039371c9d4SSatish Balay PetscErrorCode MatStashDestroy_Private(MatStash *stash) {
104bc5ccf88SSatish Balay   PetscFunctionBegin;
1059566063dSJacob Faibussowitsch   PetscCall(PetscMatStashSpaceDestroy(&stash->space_head));
1069566063dSJacob Faibussowitsch   if (stash->ScatterDestroy) PetscCall((*stash->ScatterDestroy)(stash));
1078865f1eaSKarl Rupp 
108f4259b30SLisandro Dalcin   stash->space = NULL;
1098865f1eaSKarl Rupp 
1109566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->flg_v));
111bc5ccf88SSatish Balay   PetscFunctionReturn(0);
112bc5ccf88SSatish Balay }
113bc5ccf88SSatish Balay 
1144c1ff481SSatish Balay /*
11567318a8aSJed Brown    MatStashScatterEnd_Private - This is called as the final stage of
1164c1ff481SSatish Balay    scatter. The final stages of message passing is done here, and
11767318a8aSJed Brown    all the memory used for message passing is cleaned up. This
1184c1ff481SSatish Balay    routine also resets the stash, and deallocates the memory used
1194c1ff481SSatish Balay    for the stash. It also keeps track of the current memory usage
1204c1ff481SSatish Balay    so that the same value can be used the next time through.
1214c1ff481SSatish Balay */
1229371c9d4SSatish Balay PetscErrorCode MatStashScatterEnd_Private(MatStash *stash) {
123ac2b2aa0SJed Brown   PetscFunctionBegin;
1249566063dSJacob Faibussowitsch   PetscCall((*stash->ScatterEnd)(stash));
125ac2b2aa0SJed Brown   PetscFunctionReturn(0);
126ac2b2aa0SJed Brown }
127ac2b2aa0SJed Brown 
1289371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash) {
129533163c2SBarry Smith   PetscInt    nsends = stash->nsends, bs2, oldnmax, i;
130a2d1c673SSatish Balay   MPI_Status *send_status;
131a2d1c673SSatish Balay 
1323a40ed3dSBarry Smith   PetscFunctionBegin;
133533163c2SBarry Smith   for (i = 0; i < 2 * stash->size; i++) stash->flg_v[i] = -1;
134a2d1c673SSatish Balay   /* wait on sends */
135a2d1c673SSatish Balay   if (nsends) {
1369566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(2 * nsends, &send_status));
1379566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Waitall(2 * nsends, stash->send_waits, send_status));
1389566063dSJacob Faibussowitsch     PetscCall(PetscFree(send_status));
139a2d1c673SSatish Balay   }
140a2d1c673SSatish Balay 
141c0c58ca7SSatish Balay   /* Now update nmaxold to be app 10% more than max n used, this way the
142434d7ff9SSatish Balay      wastage of space is reduced the next time this stash is used.
143434d7ff9SSatish Balay      Also update the oldmax, only if it increases */
144b9b97703SBarry Smith   if (stash->n) {
14594b769a5SSatish Balay     bs2     = stash->bs * stash->bs;
1468a9378f0SSatish Balay     oldnmax = ((int)(stash->n * 1.1) + 5) * bs2;
147434d7ff9SSatish Balay     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
148b9b97703SBarry Smith   }
149434d7ff9SSatish Balay 
150d07ff455SSatish Balay   stash->nmax       = 0;
151d07ff455SSatish Balay   stash->n          = 0;
1524c1ff481SSatish Balay   stash->reallocs   = -1;
153a2d1c673SSatish Balay   stash->nprocessed = 0;
1548865f1eaSKarl Rupp 
1559566063dSJacob Faibussowitsch   PetscCall(PetscMatStashSpaceDestroy(&stash->space_head));
1568865f1eaSKarl Rupp 
157f4259b30SLisandro Dalcin   stash->space = NULL;
1588865f1eaSKarl Rupp 
1599566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->send_waits));
1609566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->recv_waits));
1619566063dSJacob Faibussowitsch   PetscCall(PetscFree2(stash->svalues, stash->sindices));
1629566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->rvalues[0]));
1639566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->rvalues));
1649566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->rindices[0]));
1659566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->rindices));
1663a40ed3dSBarry Smith   PetscFunctionReturn(0);
1679417f4adSLois Curfman McInnes }
1689417f4adSLois Curfman McInnes 
1694c1ff481SSatish Balay /*
1706aad120cSJose E. Roman    MatStashGetInfo_Private - Gets the relevant statistics of the stash
1714c1ff481SSatish Balay 
1724c1ff481SSatish Balay    Input Parameters:
1734c1ff481SSatish Balay    stash    - the stash
17494b769a5SSatish Balay    nstash   - the size of the stash. Indicates the number of values stored.
1754c1ff481SSatish Balay    reallocs - the number of additional mallocs incurred.
1764c1ff481SSatish Balay 
1774c1ff481SSatish Balay */
1789371c9d4SSatish Balay PetscErrorCode MatStashGetInfo_Private(MatStash *stash, PetscInt *nstash, PetscInt *reallocs) {
179c1ac3661SBarry Smith   PetscInt bs2 = stash->bs * stash->bs;
18094b769a5SSatish Balay 
1813a40ed3dSBarry Smith   PetscFunctionBegin;
1821ecfd215SBarry Smith   if (nstash) *nstash = stash->n * bs2;
1831ecfd215SBarry Smith   if (reallocs) {
184434d7ff9SSatish Balay     if (stash->reallocs < 0) *reallocs = 0;
185434d7ff9SSatish Balay     else *reallocs = stash->reallocs;
1861ecfd215SBarry Smith   }
187bc5ccf88SSatish Balay   PetscFunctionReturn(0);
188bc5ccf88SSatish Balay }
1894c1ff481SSatish Balay 
1904c1ff481SSatish Balay /*
1918798bf22SSatish Balay    MatStashSetInitialSize_Private - Sets the initial size of the stash
1924c1ff481SSatish Balay 
1934c1ff481SSatish Balay    Input Parameters:
1944c1ff481SSatish Balay    stash  - the stash
1954c1ff481SSatish Balay    max    - the value that is used as the max size of the stash.
1964c1ff481SSatish Balay             this value is used while allocating memory.
1974c1ff481SSatish Balay */
1989371c9d4SSatish Balay PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash, PetscInt max) {
199bc5ccf88SSatish Balay   PetscFunctionBegin;
200434d7ff9SSatish Balay   stash->umax = max;
2013a40ed3dSBarry Smith   PetscFunctionReturn(0);
20297530c3fSBarry Smith }
20397530c3fSBarry Smith 
2048798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called
2054c1ff481SSatish Balay    when the space in the stash is not sufficient to add the new values
2064c1ff481SSatish Balay    being inserted into the stash.
2074c1ff481SSatish Balay 
2084c1ff481SSatish Balay    Input Parameters:
2094c1ff481SSatish Balay    stash - the stash
2104c1ff481SSatish Balay    incr  - the minimum increase requested
2114c1ff481SSatish Balay 
2124c1ff481SSatish Balay    Notes:
2134c1ff481SSatish Balay    This routine doubles the currently used memory.
2144c1ff481SSatish Balay  */
2159371c9d4SSatish Balay static PetscErrorCode MatStashExpand_Private(MatStash *stash, PetscInt incr) {
2165bd3b8fbSHong Zhang   PetscInt newnmax, bs2 = stash->bs * stash->bs;
2179417f4adSLois Curfman McInnes 
2183a40ed3dSBarry Smith   PetscFunctionBegin;
2199417f4adSLois Curfman McInnes   /* allocate a larger stash */
220c481ceb5SSatish Balay   if (!stash->oldnmax && !stash->nmax) { /* new stash */
221434d7ff9SSatish Balay     if (stash->umax) newnmax = stash->umax / bs2;
222434d7ff9SSatish Balay     else newnmax = DEFAULT_STASH_SIZE / bs2;
223c481ceb5SSatish Balay   } else if (!stash->nmax) { /* resuing stash */
224434d7ff9SSatish Balay     if (stash->umax > stash->oldnmax) newnmax = stash->umax / bs2;
225434d7ff9SSatish Balay     else newnmax = stash->oldnmax / bs2;
226434d7ff9SSatish Balay   } else newnmax = stash->nmax * 2;
2274c1ff481SSatish Balay   if (newnmax < (stash->nmax + incr)) newnmax += 2 * incr;
228d07ff455SSatish Balay 
22975cae7c1SHong Zhang   /* Get a MatStashSpace and attach it to stash */
2309566063dSJacob Faibussowitsch   PetscCall(PetscMatStashSpaceGet(bs2, newnmax, &stash->space));
231b087b6d6SSatish Balay   if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
232b087b6d6SSatish Balay     stash->space_head = stash->space;
23375cae7c1SHong Zhang   }
234b087b6d6SSatish Balay 
235bc5ccf88SSatish Balay   stash->reallocs++;
23675cae7c1SHong Zhang   stash->nmax = newnmax;
237bc5ccf88SSatish Balay   PetscFunctionReturn(0);
238bc5ccf88SSatish Balay }
239bc5ccf88SSatish Balay /*
2408798bf22SSatish Balay   MatStashValuesRow_Private - inserts values into the stash. This function
2414c1ff481SSatish Balay   expects the values to be roworiented. Multiple columns belong to the same row
2424c1ff481SSatish Balay   can be inserted with a single call to this function.
2434c1ff481SSatish Balay 
2444c1ff481SSatish Balay   Input Parameters:
2454c1ff481SSatish Balay   stash  - the stash
2464c1ff481SSatish Balay   row    - the global row correspoiding to the values
2474c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
2484c1ff481SSatish Balay   idxn   - the global column indices corresponding to each of the values.
2494c1ff481SSatish Balay   values - the values inserted
250bc5ccf88SSatish Balay */
2519371c9d4SSatish Balay PetscErrorCode MatStashValuesRow_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscBool ignorezeroentries) {
252b400d20cSBarry Smith   PetscInt           i, k, cnt = 0;
25375cae7c1SHong Zhang   PetscMatStashSpace space = stash->space;
254bc5ccf88SSatish Balay 
255bc5ccf88SSatish Balay   PetscFunctionBegin;
2564c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
257*48a46eb9SPierre Jolivet   if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
25875cae7c1SHong Zhang   space = stash->space;
25975cae7c1SHong Zhang   k     = space->local_used;
2604c1ff481SSatish Balay   for (i = 0; i < n; i++) {
26114069ce9SStefano Zampini     if (ignorezeroentries && values && values[i] == 0.0) continue;
26275cae7c1SHong Zhang     space->idx[k] = row;
26375cae7c1SHong Zhang     space->idy[k] = idxn[i];
26414069ce9SStefano Zampini     space->val[k] = values ? values[i] : 0.0;
26575cae7c1SHong Zhang     k++;
266b400d20cSBarry Smith     cnt++;
2679417f4adSLois Curfman McInnes   }
268b400d20cSBarry Smith   stash->n += cnt;
269b400d20cSBarry Smith   space->local_used += cnt;
270b400d20cSBarry Smith   space->local_remaining -= cnt;
271a2d1c673SSatish Balay   PetscFunctionReturn(0);
272a2d1c673SSatish Balay }
27375cae7c1SHong Zhang 
2744c1ff481SSatish Balay /*
2758798bf22SSatish Balay   MatStashValuesCol_Private - inserts values into the stash. This function
2764c1ff481SSatish Balay   expects the values to be columnoriented. Multiple columns belong to the same row
2774c1ff481SSatish Balay   can be inserted with a single call to this function.
278a2d1c673SSatish Balay 
2794c1ff481SSatish Balay   Input Parameters:
2804c1ff481SSatish Balay   stash   - the stash
2814c1ff481SSatish Balay   row     - the global row correspoiding to the values
2824c1ff481SSatish Balay   n       - the number of elements inserted. All elements belong to the above row.
2834c1ff481SSatish Balay   idxn    - the global column indices corresponding to each of the values.
2844c1ff481SSatish Balay   values  - the values inserted
2854c1ff481SSatish Balay   stepval - the consecutive values are sepated by a distance of stepval.
2864c1ff481SSatish Balay             this happens because the input is columnoriented.
2874c1ff481SSatish Balay */
2889371c9d4SSatish Balay PetscErrorCode MatStashValuesCol_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt stepval, PetscBool ignorezeroentries) {
28950e9ab7cSBarry Smith   PetscInt           i, k, cnt = 0;
29075cae7c1SHong Zhang   PetscMatStashSpace space = stash->space;
291a2d1c673SSatish Balay 
2924c1ff481SSatish Balay   PetscFunctionBegin;
2934c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
294*48a46eb9SPierre Jolivet   if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
29575cae7c1SHong Zhang   space = stash->space;
29675cae7c1SHong Zhang   k     = space->local_used;
2974c1ff481SSatish Balay   for (i = 0; i < n; i++) {
29814069ce9SStefano Zampini     if (ignorezeroentries && values && values[i * stepval] == 0.0) continue;
29975cae7c1SHong Zhang     space->idx[k] = row;
30075cae7c1SHong Zhang     space->idy[k] = idxn[i];
30114069ce9SStefano Zampini     space->val[k] = values ? values[i * stepval] : 0.0;
30275cae7c1SHong Zhang     k++;
303b400d20cSBarry Smith     cnt++;
3044c1ff481SSatish Balay   }
305b400d20cSBarry Smith   stash->n += cnt;
306b400d20cSBarry Smith   space->local_used += cnt;
307b400d20cSBarry Smith   space->local_remaining -= cnt;
3084c1ff481SSatish Balay   PetscFunctionReturn(0);
3094c1ff481SSatish Balay }
3104c1ff481SSatish Balay 
3114c1ff481SSatish Balay /*
3128798bf22SSatish Balay   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
3134c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3144c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3154c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3164c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3174c1ff481SSatish Balay 
3184c1ff481SSatish Balay   Input Parameters:
3194c1ff481SSatish Balay   stash  - the stash
3204c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3214c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3224c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3234c1ff481SSatish Balay            values. Each block is of size bs*bs.
3244c1ff481SSatish Balay   values - the values inserted
3254c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
326a5b23f4aSJose E. Roman   cmax   - the number of block-columns on the original block.
3274c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3284c1ff481SSatish Balay */
3299371c9d4SSatish Balay PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt rmax, PetscInt cmax, PetscInt idx) {
33075cae7c1SHong Zhang   PetscInt           i, j, k, bs2, bs = stash->bs, l;
33154f21887SBarry Smith   const PetscScalar *vals;
33254f21887SBarry Smith   PetscScalar       *array;
33375cae7c1SHong Zhang   PetscMatStashSpace space = stash->space;
334a2d1c673SSatish Balay 
335a2d1c673SSatish Balay   PetscFunctionBegin;
336*48a46eb9SPierre Jolivet   if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
33775cae7c1SHong Zhang   space = stash->space;
33875cae7c1SHong Zhang   l     = space->local_used;
33975cae7c1SHong Zhang   bs2   = bs * bs;
3404c1ff481SSatish Balay   for (i = 0; i < n; i++) {
34175cae7c1SHong Zhang     space->idx[l] = row;
34275cae7c1SHong Zhang     space->idy[l] = idxn[i];
34375cae7c1SHong Zhang     /* Now copy over the block of values. Store the values column oriented.
34475cae7c1SHong Zhang        This enables inserting multiple blocks belonging to a row with a single
34575cae7c1SHong Zhang        funtion call */
34675cae7c1SHong Zhang     array         = space->val + bs2 * l;
34775cae7c1SHong Zhang     vals          = values + idx * bs2 * n + bs * i;
34875cae7c1SHong Zhang     for (j = 0; j < bs; j++) {
34914069ce9SStefano Zampini       for (k = 0; k < bs; k++) array[k * bs] = values ? vals[k] : 0.0;
35075cae7c1SHong Zhang       array++;
35175cae7c1SHong Zhang       vals += cmax * bs;
35275cae7c1SHong Zhang     }
35375cae7c1SHong Zhang     l++;
354a2d1c673SSatish Balay   }
3555bd3b8fbSHong Zhang   stash->n += n;
35675cae7c1SHong Zhang   space->local_used += n;
35775cae7c1SHong Zhang   space->local_remaining -= n;
3584c1ff481SSatish Balay   PetscFunctionReturn(0);
3594c1ff481SSatish Balay }
3604c1ff481SSatish Balay 
3614c1ff481SSatish Balay /*
3628798bf22SSatish Balay   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
3634c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3644c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3654c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3664c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3674c1ff481SSatish Balay 
3684c1ff481SSatish Balay   Input Parameters:
3694c1ff481SSatish Balay   stash  - the stash
3704c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3714c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3724c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3734c1ff481SSatish Balay            values. Each block is of size bs*bs.
3744c1ff481SSatish Balay   values - the values inserted
3754c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
376a5b23f4aSJose E. Roman   cmax   - the number of block-columns on the original block.
3774c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3784c1ff481SSatish Balay */
3799371c9d4SSatish Balay PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt rmax, PetscInt cmax, PetscInt idx) {
38075cae7c1SHong Zhang   PetscInt           i, j, k, bs2, bs = stash->bs, l;
38154f21887SBarry Smith   const PetscScalar *vals;
38254f21887SBarry Smith   PetscScalar       *array;
38375cae7c1SHong Zhang   PetscMatStashSpace space = stash->space;
3844c1ff481SSatish Balay 
3854c1ff481SSatish Balay   PetscFunctionBegin;
386*48a46eb9SPierre Jolivet   if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
38775cae7c1SHong Zhang   space = stash->space;
38875cae7c1SHong Zhang   l     = space->local_used;
38975cae7c1SHong Zhang   bs2   = bs * bs;
3904c1ff481SSatish Balay   for (i = 0; i < n; i++) {
39175cae7c1SHong Zhang     space->idx[l] = row;
39275cae7c1SHong Zhang     space->idy[l] = idxn[i];
39375cae7c1SHong Zhang     /* Now copy over the block of values. Store the values column oriented.
39475cae7c1SHong Zhang      This enables inserting multiple blocks belonging to a row with a single
39575cae7c1SHong Zhang      funtion call */
39675cae7c1SHong Zhang     array         = space->val + bs2 * l;
39775cae7c1SHong Zhang     vals          = values + idx * bs2 * n + bs * i;
39875cae7c1SHong Zhang     for (j = 0; j < bs; j++) {
39914069ce9SStefano Zampini       for (k = 0; k < bs; k++) array[k] = values ? vals[k] : 0.0;
40075cae7c1SHong Zhang       array += bs;
40175cae7c1SHong Zhang       vals += rmax * bs;
40275cae7c1SHong Zhang     }
4035bd3b8fbSHong Zhang     l++;
404a2d1c673SSatish Balay   }
4055bd3b8fbSHong Zhang   stash->n += n;
40675cae7c1SHong Zhang   space->local_used += n;
40775cae7c1SHong Zhang   space->local_remaining -= n;
4083a40ed3dSBarry Smith   PetscFunctionReturn(0);
4099417f4adSLois Curfman McInnes }
4104c1ff481SSatish Balay /*
4118798bf22SSatish Balay   MatStashScatterBegin_Private - Initiates the transfer of values to the
4124c1ff481SSatish Balay   correct owners. This function goes through the stash, and check the
4134c1ff481SSatish Balay   owners of each stashed value, and sends the values off to the owner
4144c1ff481SSatish Balay   processors.
415bc5ccf88SSatish Balay 
4164c1ff481SSatish Balay   Input Parameters:
4174c1ff481SSatish Balay   stash  - the stash
4184c1ff481SSatish Balay   owners - an array of size 'no-of-procs' which gives the ownership range
4194c1ff481SSatish Balay            for each node.
4204c1ff481SSatish Balay 
42195452b02SPatrick Sanan   Notes:
42295452b02SPatrick Sanan     The 'owners' array in the cased of the blocked-stash has the
4234c1ff481SSatish Balay   ranges specified blocked global indices, and for the regular stash in
4244c1ff481SSatish Balay   the proper global indices.
4254c1ff481SSatish Balay */
4269371c9d4SSatish Balay PetscErrorCode MatStashScatterBegin_Private(Mat mat, MatStash *stash, PetscInt *owners) {
427ac2b2aa0SJed Brown   PetscFunctionBegin;
4289566063dSJacob Faibussowitsch   PetscCall((*stash->ScatterBegin)(mat, stash, owners));
429ac2b2aa0SJed Brown   PetscFunctionReturn(0);
430ac2b2aa0SJed Brown }
431ac2b2aa0SJed Brown 
4329371c9d4SSatish Balay static PetscErrorCode MatStashScatterBegin_Ref(Mat mat, MatStash *stash, PetscInt *owners) {
433c1ac3661SBarry Smith   PetscInt          *owner, *startv, *starti, tag1 = stash->tag1, tag2 = stash->tag2, bs2;
434fe09c992SBarry Smith   PetscInt           size = stash->size, nsends;
43575cae7c1SHong Zhang   PetscInt           count, *sindices, **rindices, i, j, idx, lastidx, l;
43654f21887SBarry Smith   PetscScalar      **rvalues, *svalues;
437bc5ccf88SSatish Balay   MPI_Comm           comm = stash->comm;
438563fb871SSatish Balay   MPI_Request       *send_waits, *recv_waits, *recv_waits1, *recv_waits2;
43976ec1555SBarry Smith   PetscMPIInt       *sizes, *nlengths, nreceives;
4405bd3b8fbSHong Zhang   PetscInt          *sp_idx, *sp_idy;
44154f21887SBarry Smith   PetscScalar       *sp_val;
4425bd3b8fbSHong Zhang   PetscMatStashSpace space, space_next;
443bc5ccf88SSatish Balay 
444bc5ccf88SSatish Balay   PetscFunctionBegin;
4454b4eb8d3SJed Brown   { /* make sure all processors are either in INSERTMODE or ADDMODE */
4464b4eb8d3SJed Brown     InsertMode addv;
4471c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
44808401ef6SPierre Jolivet     PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
4494b4eb8d3SJed Brown     mat->insertmode = addv; /* in case this processor had no cache */
4504b4eb8d3SJed Brown   }
4514b4eb8d3SJed Brown 
4524c1ff481SSatish Balay   bs2 = stash->bs * stash->bs;
45375cae7c1SHong Zhang 
454bc5ccf88SSatish Balay   /*  first count number of contributors to each processor */
4559566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(size, &nlengths));
4569566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(stash->n + 1, &owner));
457a2d1c673SSatish Balay 
45875cae7c1SHong Zhang   i = j   = 0;
4597357eb19SBarry Smith   lastidx = -1;
4605bd3b8fbSHong Zhang   space   = stash->space_head;
4616c4ed002SBarry Smith   while (space) {
46275cae7c1SHong Zhang     space_next = space->next;
4635bd3b8fbSHong Zhang     sp_idx     = space->idx;
46475cae7c1SHong Zhang     for (l = 0; l < space->local_used; l++) {
4657357eb19SBarry Smith       /* if indices are NOT locally sorted, need to start search at the beginning */
4665bd3b8fbSHong Zhang       if (lastidx > (idx = sp_idx[l])) j = 0;
4677357eb19SBarry Smith       lastidx = idx;
4687357eb19SBarry Smith       for (; j < size; j++) {
4694c1ff481SSatish Balay         if (idx >= owners[j] && idx < owners[j + 1]) {
4709371c9d4SSatish Balay           nlengths[j]++;
4719371c9d4SSatish Balay           owner[i] = j;
4729371c9d4SSatish Balay           break;
473bc5ccf88SSatish Balay         }
474bc5ccf88SSatish Balay       }
47575cae7c1SHong Zhang       i++;
47675cae7c1SHong Zhang     }
47775cae7c1SHong Zhang     space = space_next;
478bc5ccf88SSatish Balay   }
479071fcb05SBarry Smith 
480563fb871SSatish Balay   /* Now check what procs get messages - and compute nsends. */
4819566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(size, &sizes));
482563fb871SSatish Balay   for (i = 0, nsends = 0; i < size; i++) {
4838865f1eaSKarl Rupp     if (nlengths[i]) {
4849371c9d4SSatish Balay       sizes[i] = 1;
4859371c9d4SSatish Balay       nsends++;
4868865f1eaSKarl Rupp     }
487563fb871SSatish Balay   }
488bc5ccf88SSatish Balay 
4899371c9d4SSatish Balay   {
4909371c9d4SSatish Balay     PetscMPIInt *onodes, *olengths;
491563fb871SSatish Balay     /* Determine the number of messages to expect, their lengths, from from-ids */
4929566063dSJacob Faibussowitsch     PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives));
4939566063dSJacob Faibussowitsch     PetscCall(PetscGatherMessageLengths(comm, nsends, nreceives, nlengths, &onodes, &olengths));
494563fb871SSatish Balay     /* since clubbing row,col - lengths are multiplied by 2 */
495563fb871SSatish Balay     for (i = 0; i < nreceives; i++) olengths[i] *= 2;
4969566063dSJacob Faibussowitsch     PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1));
497563fb871SSatish Balay     /* values are size 'bs2' lengths (and remove earlier factor 2 */
498563fb871SSatish Balay     for (i = 0; i < nreceives; i++) olengths[i] = olengths[i] * bs2 / 2;
4999566063dSJacob Faibussowitsch     PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2));
5009566063dSJacob Faibussowitsch     PetscCall(PetscFree(onodes));
5019371c9d4SSatish Balay     PetscCall(PetscFree(olengths));
5029371c9d4SSatish Balay   }
503bc5ccf88SSatish Balay 
504bc5ccf88SSatish Balay   /* do sends:
505bc5ccf88SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
506bc5ccf88SSatish Balay          the ith processor
507bc5ccf88SSatish Balay   */
5089566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices));
5099566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(2 * nsends, &send_waits));
5109566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(size, &startv, size, &starti));
511a2d1c673SSatish Balay   /* use 2 sends the first with all_a, the next with all_i and all_j */
5129371c9d4SSatish Balay   startv[0] = 0;
5139371c9d4SSatish Balay   starti[0] = 0;
514bc5ccf88SSatish Balay   for (i = 1; i < size; i++) {
515563fb871SSatish Balay     startv[i] = startv[i - 1] + nlengths[i - 1];
516533163c2SBarry Smith     starti[i] = starti[i - 1] + 2 * nlengths[i - 1];
517bc5ccf88SSatish Balay   }
51875cae7c1SHong Zhang 
51975cae7c1SHong Zhang   i     = 0;
5205bd3b8fbSHong Zhang   space = stash->space_head;
5216c4ed002SBarry Smith   while (space) {
52275cae7c1SHong Zhang     space_next = space->next;
5235bd3b8fbSHong Zhang     sp_idx     = space->idx;
5245bd3b8fbSHong Zhang     sp_idy     = space->idy;
5255bd3b8fbSHong Zhang     sp_val     = space->val;
52675cae7c1SHong Zhang     for (l = 0; l < space->local_used; l++) {
527bc5ccf88SSatish Balay       j = owner[i];
528a2d1c673SSatish Balay       if (bs2 == 1) {
5295bd3b8fbSHong Zhang         svalues[startv[j]] = sp_val[l];
530a2d1c673SSatish Balay       } else {
531c1ac3661SBarry Smith         PetscInt     k;
53254f21887SBarry Smith         PetscScalar *buf1, *buf2;
5334c1ff481SSatish Balay         buf1 = svalues + bs2 * startv[j];
534b087b6d6SSatish Balay         buf2 = space->val + bs2 * l;
5358865f1eaSKarl Rupp         for (k = 0; k < bs2; k++) buf1[k] = buf2[k];
536a2d1c673SSatish Balay       }
5375bd3b8fbSHong Zhang       sindices[starti[j]]               = sp_idx[l];
5385bd3b8fbSHong Zhang       sindices[starti[j] + nlengths[j]] = sp_idy[l];
539bc5ccf88SSatish Balay       startv[j]++;
540bc5ccf88SSatish Balay       starti[j]++;
54175cae7c1SHong Zhang       i++;
54275cae7c1SHong Zhang     }
54375cae7c1SHong Zhang     space = space_next;
544bc5ccf88SSatish Balay   }
545bc5ccf88SSatish Balay   startv[0] = 0;
5468865f1eaSKarl Rupp   for (i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1];
547e5d0e772SSatish Balay 
548bc5ccf88SSatish Balay   for (i = 0, count = 0; i < size; i++) {
54976ec1555SBarry Smith     if (sizes[i]) {
5509566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++));
5519566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++));
552bc5ccf88SSatish Balay     }
553b85c94c3SSatish Balay   }
5546cf91177SBarry Smith #if defined(PETSC_USE_INFO)
5559566063dSJacob Faibussowitsch   PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT " \n", nsends));
556e5d0e772SSatish Balay   for (i = 0; i < size; i++) {
557*48a46eb9SPierre Jolivet     if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt)))));
558e5d0e772SSatish Balay   }
559e5d0e772SSatish Balay #endif
5609566063dSJacob Faibussowitsch   PetscCall(PetscFree(nlengths));
5619566063dSJacob Faibussowitsch   PetscCall(PetscFree(owner));
5629566063dSJacob Faibussowitsch   PetscCall(PetscFree2(startv, starti));
5639566063dSJacob Faibussowitsch   PetscCall(PetscFree(sizes));
564a2d1c673SSatish Balay 
565563fb871SSatish Balay   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
5669566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(2 * nreceives, &recv_waits));
567563fb871SSatish Balay 
568563fb871SSatish Balay   for (i = 0; i < nreceives; i++) {
569563fb871SSatish Balay     recv_waits[2 * i]     = recv_waits1[i];
570563fb871SSatish Balay     recv_waits[2 * i + 1] = recv_waits2[i];
571563fb871SSatish Balay   }
572563fb871SSatish Balay   stash->recv_waits = recv_waits;
5738865f1eaSKarl Rupp 
5749566063dSJacob Faibussowitsch   PetscCall(PetscFree(recv_waits1));
5759566063dSJacob Faibussowitsch   PetscCall(PetscFree(recv_waits2));
576563fb871SSatish Balay 
577c05d87d6SBarry Smith   stash->svalues         = svalues;
578c05d87d6SBarry Smith   stash->sindices        = sindices;
579c05d87d6SBarry Smith   stash->rvalues         = rvalues;
580c05d87d6SBarry Smith   stash->rindices        = rindices;
581c05d87d6SBarry Smith   stash->send_waits      = send_waits;
582c05d87d6SBarry Smith   stash->nsends          = nsends;
583c05d87d6SBarry Smith   stash->nrecvs          = nreceives;
58467318a8aSJed Brown   stash->reproduce_count = 0;
585bc5ccf88SSatish Balay   PetscFunctionReturn(0);
586bc5ccf88SSatish Balay }
587bc5ccf88SSatish Balay 
588a2d1c673SSatish Balay /*
5898798bf22SSatish Balay    MatStashScatterGetMesg_Private - This function waits on the receives posted
5908798bf22SSatish Balay    in the function MatStashScatterBegin_Private() and returns one message at
5914c1ff481SSatish Balay    a time to the calling function. If no messages are left, it indicates this
5924c1ff481SSatish Balay    by setting flg = 0, else it sets flg = 1.
5934c1ff481SSatish Balay 
5944c1ff481SSatish Balay    Input Parameters:
5954c1ff481SSatish Balay    stash - the stash
5964c1ff481SSatish Balay 
5974c1ff481SSatish Balay    Output Parameters:
5984c1ff481SSatish Balay    nvals - the number of entries in the current message.
5994c1ff481SSatish Balay    rows  - an array of row indices (or blocked indices) corresponding to the values
6004c1ff481SSatish Balay    cols  - an array of columnindices (or blocked indices) corresponding to the values
6014c1ff481SSatish Balay    vals  - the values
6024c1ff481SSatish Balay    flg   - 0 indicates no more message left, and the current call has no values associated.
6034c1ff481SSatish Balay            1 indicates that the current call successfully received a message, and the
6044c1ff481SSatish Balay              other output parameters nvals,rows,cols,vals are set appropriately.
605a2d1c673SSatish Balay */
6069371c9d4SSatish Balay PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash, PetscMPIInt *nvals, PetscInt **rows, PetscInt **cols, PetscScalar **vals, PetscInt *flg) {
607ac2b2aa0SJed Brown   PetscFunctionBegin;
6089566063dSJacob Faibussowitsch   PetscCall((*stash->ScatterGetMesg)(stash, nvals, rows, cols, vals, flg));
609ac2b2aa0SJed Brown   PetscFunctionReturn(0);
610ac2b2aa0SJed Brown }
611ac2b2aa0SJed Brown 
6129371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash, PetscMPIInt *nvals, PetscInt **rows, PetscInt **cols, PetscScalar **vals, PetscInt *flg) {
613533163c2SBarry Smith   PetscMPIInt i, *flg_v = stash->flg_v, i1, i2;
614fe09c992SBarry Smith   PetscInt    bs2;
615a2d1c673SSatish Balay   MPI_Status  recv_status;
616ace3abfcSBarry Smith   PetscBool   match_found = PETSC_FALSE;
617bc5ccf88SSatish Balay 
618bc5ccf88SSatish Balay   PetscFunctionBegin;
619a2d1c673SSatish Balay   *flg = 0; /* When a message is discovered this is reset to 1 */
620a2d1c673SSatish Balay   /* Return if no more messages to process */
6218865f1eaSKarl Rupp   if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(0);
622a2d1c673SSatish Balay 
6234c1ff481SSatish Balay   bs2 = stash->bs * stash->bs;
62467318a8aSJed Brown   /* If a matching pair of receives are found, process them, and return the data to
625a2d1c673SSatish Balay      the calling function. Until then keep receiving messages */
626a2d1c673SSatish Balay   while (!match_found) {
62767318a8aSJed Brown     if (stash->reproduce) {
62867318a8aSJed Brown       i = stash->reproduce_count++;
6299566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Wait(stash->recv_waits + i, &recv_status));
63067318a8aSJed Brown     } else {
6319566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Waitany(2 * stash->nrecvs, stash->recv_waits, &i, &recv_status));
63267318a8aSJed Brown     }
63308401ef6SPierre Jolivet     PetscCheck(recv_status.MPI_SOURCE >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Negative MPI source!");
634533163c2SBarry Smith 
63567318a8aSJed Brown     /* Now pack the received message into a structure which is usable by others */
636a2d1c673SSatish Balay     if (i % 2) {
6379566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Get_count(&recv_status, MPIU_SCALAR, nvals));
638c1dc657dSBarry Smith       flg_v[2 * recv_status.MPI_SOURCE] = i / 2;
639a2d1c673SSatish Balay       *nvals                            = *nvals / bs2;
640563fb871SSatish Balay     } else {
6419566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Get_count(&recv_status, MPIU_INT, nvals));
642563fb871SSatish Balay       flg_v[2 * recv_status.MPI_SOURCE + 1] = i / 2;
643563fb871SSatish Balay       *nvals                                = *nvals / 2; /* This message has both row indices and col indices */
644bc5ccf88SSatish Balay     }
645a2d1c673SSatish Balay 
646cb2b73ccSBarry Smith     /* Check if we have both messages from this proc */
647c1dc657dSBarry Smith     i1 = flg_v[2 * recv_status.MPI_SOURCE];
648c1dc657dSBarry Smith     i2 = flg_v[2 * recv_status.MPI_SOURCE + 1];
649a2d1c673SSatish Balay     if (i1 != -1 && i2 != -1) {
650563fb871SSatish Balay       *rows = stash->rindices[i2];
651a2d1c673SSatish Balay       *cols = *rows + *nvals;
652563fb871SSatish Balay       *vals = stash->rvalues[i1];
653a2d1c673SSatish Balay       *flg  = 1;
654a2d1c673SSatish Balay       stash->nprocessed++;
65535d8aa7fSBarry Smith       match_found = PETSC_TRUE;
656bc5ccf88SSatish Balay     }
657bc5ccf88SSatish Balay   }
658bc5ccf88SSatish Balay   PetscFunctionReturn(0);
659bc5ccf88SSatish Balay }
660d7d60843SJed Brown 
661e69ee1f7SSatish Balay #if !defined(PETSC_HAVE_MPIUNI)
662d7d60843SJed Brown typedef struct {
663d7d60843SJed Brown   PetscInt    row;
664d7d60843SJed Brown   PetscInt    col;
665d7d60843SJed Brown   PetscScalar vals[1]; /* Actually an array of length bs2 */
666d7d60843SJed Brown } MatStashBlock;
667d7d60843SJed Brown 
6689371c9d4SSatish Balay static PetscErrorCode MatStashSortCompress_Private(MatStash *stash, InsertMode insertmode) {
669d7d60843SJed Brown   PetscMatStashSpace space;
670d7d60843SJed Brown   PetscInt           n = stash->n, bs = stash->bs, bs2 = bs * bs, cnt, *row, *col, *perm, rowstart, i;
671d7d60843SJed Brown   PetscScalar      **valptr;
672d7d60843SJed Brown 
673d7d60843SJed Brown   PetscFunctionBegin;
6749566063dSJacob Faibussowitsch   PetscCall(PetscMalloc4(n, &row, n, &col, n, &valptr, n, &perm));
675d7d60843SJed Brown   for (space = stash->space_head, cnt = 0; space; space = space->next) {
676d7d60843SJed Brown     for (i = 0; i < space->local_used; i++) {
677d7d60843SJed Brown       row[cnt]    = space->idx[i];
678d7d60843SJed Brown       col[cnt]    = space->idy[i];
679d7d60843SJed Brown       valptr[cnt] = &space->val[i * bs2];
680d7d60843SJed Brown       perm[cnt]   = cnt; /* Will tell us where to find valptr after sorting row[] and col[] */
681d7d60843SJed Brown       cnt++;
682d7d60843SJed Brown     }
683d7d60843SJed Brown   }
68408401ef6SPierre Jolivet   PetscCheck(cnt == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatStash n %" PetscInt_FMT ", but counted %" PetscInt_FMT " entries", n, cnt);
6859566063dSJacob Faibussowitsch   PetscCall(PetscSortIntWithArrayPair(n, row, col, perm));
686d7d60843SJed Brown   /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */
687d7d60843SJed Brown   for (rowstart = 0, cnt = 0, i = 1; i <= n; i++) {
688d7d60843SJed Brown     if (i == n || row[i] != row[rowstart]) { /* Sort the last row. */
689d7d60843SJed Brown       PetscInt colstart;
6909566063dSJacob Faibussowitsch       PetscCall(PetscSortIntWithArray(i - rowstart, &col[rowstart], &perm[rowstart]));
691d7d60843SJed Brown       for (colstart = rowstart; colstart < i;) { /* Compress multiple insertions to the same location */
692d7d60843SJed Brown         PetscInt       j, l;
693d7d60843SJed Brown         MatStashBlock *block;
6949566063dSJacob Faibussowitsch         PetscCall(PetscSegBufferGet(stash->segsendblocks, 1, &block));
695d7d60843SJed Brown         block->row = row[rowstart];
696d7d60843SJed Brown         block->col = col[colstart];
6979566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(block->vals, valptr[perm[colstart]], bs2));
698d7d60843SJed Brown         for (j = colstart + 1; j < i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */
699d7d60843SJed Brown           if (insertmode == ADD_VALUES) {
700d7d60843SJed Brown             for (l = 0; l < bs2; l++) block->vals[l] += valptr[perm[j]][l];
701d7d60843SJed Brown           } else {
7029566063dSJacob Faibussowitsch             PetscCall(PetscArraycpy(block->vals, valptr[perm[j]], bs2));
703d7d60843SJed Brown           }
704d7d60843SJed Brown         }
705d7d60843SJed Brown         colstart = j;
706d7d60843SJed Brown       }
707d7d60843SJed Brown       rowstart = i;
708d7d60843SJed Brown     }
709d7d60843SJed Brown   }
7109566063dSJacob Faibussowitsch   PetscCall(PetscFree4(row, col, valptr, perm));
711d7d60843SJed Brown   PetscFunctionReturn(0);
712d7d60843SJed Brown }
713d7d60843SJed Brown 
7149371c9d4SSatish Balay static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash) {
715d7d60843SJed Brown   PetscFunctionBegin;
716d7d60843SJed Brown   if (stash->blocktype == MPI_DATATYPE_NULL) {
717d7d60843SJed Brown     PetscInt     bs2 = PetscSqr(stash->bs);
718d7d60843SJed Brown     PetscMPIInt  blocklens[2];
719d7d60843SJed Brown     MPI_Aint     displs[2];
720d7d60843SJed Brown     MPI_Datatype types[2], stype;
721223b4c07SBarry Smith     /*
722223b4c07SBarry Smith         DummyBlock is a type having standard layout, even when PetscScalar is C++ std::complex.
723f41aa5efSJunchao Zhang        std::complex itself has standard layout, so does DummyBlock, recursively.
724a5b23f4aSJose E. Roman        To be compatible with C++ std::complex, complex implementations on GPUs must also have standard layout,
725f41aa5efSJunchao Zhang        though they can have different alignment, e.g, 16 bytes for double complex, instead of 8 bytes as in GCC stdlibc++.
726f41aa5efSJunchao Zhang        offsetof(type, member) only requires type to have standard layout. Ref. https://en.cppreference.com/w/cpp/types/offsetof.
727f41aa5efSJunchao Zhang 
728f41aa5efSJunchao Zhang        We can test if std::complex has standard layout with the following code:
729f41aa5efSJunchao Zhang        #include <iostream>
730f41aa5efSJunchao Zhang        #include <type_traits>
731f41aa5efSJunchao Zhang        #include <complex>
732f41aa5efSJunchao Zhang        int main() {
733f41aa5efSJunchao Zhang          std::cout << std::boolalpha;
734f41aa5efSJunchao Zhang          std::cout << std::is_standard_layout<std::complex<double> >::value << '\n';
735f41aa5efSJunchao Zhang        }
736f41aa5efSJunchao Zhang        Output: true
7379503c6c6SJed Brown      */
7389371c9d4SSatish Balay     struct DummyBlock {
7399371c9d4SSatish Balay       PetscInt    row, col;
7409371c9d4SSatish Balay       PetscScalar vals;
7419371c9d4SSatish Balay     };
742d7d60843SJed Brown 
7439503c6c6SJed Brown     stash->blocktype_size = offsetof(struct DummyBlock, vals) + bs2 * sizeof(PetscScalar);
744d7d60843SJed Brown     if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */
745d7d60843SJed Brown       stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt);
746d7d60843SJed Brown     }
7479566063dSJacob Faibussowitsch     PetscCall(PetscSegBufferCreate(stash->blocktype_size, 1, &stash->segsendblocks));
7489566063dSJacob Faibussowitsch     PetscCall(PetscSegBufferCreate(stash->blocktype_size, 1, &stash->segrecvblocks));
7499566063dSJacob Faibussowitsch     PetscCall(PetscSegBufferCreate(sizeof(MatStashFrame), 1, &stash->segrecvframe));
750d7d60843SJed Brown     blocklens[0] = 2;
751d7d60843SJed Brown     blocklens[1] = bs2;
7529503c6c6SJed Brown     displs[0]    = offsetof(struct DummyBlock, row);
7539503c6c6SJed Brown     displs[1]    = offsetof(struct DummyBlock, vals);
754d7d60843SJed Brown     types[0]     = MPIU_INT;
755d7d60843SJed Brown     types[1]     = MPIU_SCALAR;
7569566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Type_create_struct(2, blocklens, displs, types, &stype));
7579566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Type_commit(&stype));
7589566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Type_create_resized(stype, 0, stash->blocktype_size, &stash->blocktype));
7599566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Type_commit(&stash->blocktype));
7609566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Type_free(&stype));
761d7d60843SJed Brown   }
762d7d60843SJed Brown   PetscFunctionReturn(0);
763d7d60843SJed Brown }
764d7d60843SJed Brown 
765d7d60843SJed Brown /* Callback invoked after target rank has initiatied receive of rendezvous message.
766d7d60843SJed Brown  * Here we post the main sends.
767d7d60843SJed Brown  */
7689371c9d4SSatish Balay static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm, const PetscMPIInt tag[], PetscMPIInt rankid, PetscMPIInt rank, void *sdata, MPI_Request req[], void *ctx) {
769d7d60843SJed Brown   MatStash       *stash = (MatStash *)ctx;
770d7d60843SJed Brown   MatStashHeader *hdr   = (MatStashHeader *)sdata;
771d7d60843SJed Brown 
772d7d60843SJed Brown   PetscFunctionBegin;
77308401ef6SPierre Jolivet   PetscCheck(rank == stash->sendranks[rankid], comm, PETSC_ERR_PLIB, "BTS Send rank %d does not match sendranks[%d] %d", rank, rankid, stash->sendranks[rankid]);
7749566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Isend(stash->sendframes[rankid].buffer, hdr->count, stash->blocktype, rank, tag[0], comm, &req[0]));
775d7d60843SJed Brown   stash->sendframes[rankid].count   = hdr->count;
776d7d60843SJed Brown   stash->sendframes[rankid].pending = 1;
777d7d60843SJed Brown   PetscFunctionReturn(0);
778d7d60843SJed Brown }
779d7d60843SJed Brown 
780223b4c07SBarry Smith /*
781223b4c07SBarry Smith     Callback invoked by target after receiving rendezvous message.
782223b4c07SBarry Smith     Here we post the main recvs.
783d7d60843SJed Brown  */
7849371c9d4SSatish Balay static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm, const PetscMPIInt tag[], PetscMPIInt rank, void *rdata, MPI_Request req[], void *ctx) {
785d7d60843SJed Brown   MatStash       *stash = (MatStash *)ctx;
786d7d60843SJed Brown   MatStashHeader *hdr   = (MatStashHeader *)rdata;
787d7d60843SJed Brown   MatStashFrame  *frame;
788d7d60843SJed Brown 
789d7d60843SJed Brown   PetscFunctionBegin;
7909566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferGet(stash->segrecvframe, 1, &frame));
7919566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferGet(stash->segrecvblocks, hdr->count, &frame->buffer));
7929566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Irecv(frame->buffer, hdr->count, stash->blocktype, rank, tag[0], comm, &req[0]));
793d7d60843SJed Brown   frame->count   = hdr->count;
794d7d60843SJed Brown   frame->pending = 1;
795d7d60843SJed Brown   PetscFunctionReturn(0);
796d7d60843SJed Brown }
797d7d60843SJed Brown 
798d7d60843SJed Brown /*
799d7d60843SJed Brown  * owners[] contains the ownership ranges; may be indexed by either blocks or scalars
800d7d60843SJed Brown  */
8019371c9d4SSatish Balay static PetscErrorCode MatStashScatterBegin_BTS(Mat mat, MatStash *stash, PetscInt owners[]) {
802d7d60843SJed Brown   size_t nblocks;
803d7d60843SJed Brown   char  *sendblocks;
804d7d60843SJed Brown 
805d7d60843SJed Brown   PetscFunctionBegin;
80676bd3646SJed Brown   if (PetscDefined(USE_DEBUG)) { /* make sure all processors are either in INSERTMODE or ADDMODE */
8074b4eb8d3SJed Brown     InsertMode addv;
8081c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
80908401ef6SPierre Jolivet     PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
8104b4eb8d3SJed Brown   }
8114b4eb8d3SJed Brown 
8129566063dSJacob Faibussowitsch   PetscCall(MatStashBlockTypeSetUp(stash));
8139566063dSJacob Faibussowitsch   PetscCall(MatStashSortCompress_Private(stash, mat->insertmode));
8149566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferGetSize(stash->segsendblocks, &nblocks));
8159566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferExtractInPlace(stash->segsendblocks, &sendblocks));
81660f34548SJunchao Zhang   if (stash->first_assembly_done) { /* Set up sendhdrs and sendframes for each rank that we sent before */
81723b7d1baSJed Brown     PetscInt i;
81823b7d1baSJed Brown     size_t   b;
81997da8949SJed Brown     for (i = 0, b = 0; i < stash->nsendranks; i++) {
82097da8949SJed Brown       stash->sendframes[i].buffer = &sendblocks[b * stash->blocktype_size];
82197da8949SJed Brown       /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */
82297da8949SJed Brown       stash->sendhdr[i].count     = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */
82397da8949SJed Brown       for (; b < nblocks; b++) {
82497da8949SJed Brown         MatStashBlock *sendblock_b = (MatStashBlock *)&sendblocks[b * stash->blocktype_size];
825aed4548fSBarry Smith         PetscCheck(sendblock_b->row >= owners[stash->sendranks[i]], stash->comm, PETSC_ERR_ARG_WRONG, "MAT_SUBSET_OFF_PROC_ENTRIES set, but row %" PetscInt_FMT " owned by %d not communicated in initial assembly", sendblock_b->row, stash->sendranks[i]);
82697da8949SJed Brown         if (sendblock_b->row >= owners[stash->sendranks[i] + 1]) break;
82797da8949SJed Brown         stash->sendhdr[i].count++;
82897da8949SJed Brown       }
82997da8949SJed Brown     }
83097da8949SJed Brown   } else { /* Dynamically count and pack (first time) */
83123b7d1baSJed Brown     PetscInt sendno;
83223b7d1baSJed Brown     size_t   i, rowstart;
833d7d60843SJed Brown 
834d7d60843SJed Brown     /* Count number of send ranks and allocate for sends */
835d7d60843SJed Brown     stash->nsendranks = 0;
836d7d60843SJed Brown     for (rowstart = 0; rowstart < nblocks;) {
8377e2ea869SJed Brown       PetscInt       owner;
838d7d60843SJed Brown       MatStashBlock *sendblock_rowstart = (MatStashBlock *)&sendblocks[rowstart * stash->blocktype_size];
8399566063dSJacob Faibussowitsch       PetscCall(PetscFindInt(sendblock_rowstart->row, stash->size + 1, owners, &owner));
840d7d60843SJed Brown       if (owner < 0) owner = -(owner + 2);
841d7d60843SJed Brown       for (i = rowstart + 1; i < nblocks; i++) { /* Move forward through a run of blocks with the same owner */
842d7d60843SJed Brown         MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size];
8437e2ea869SJed Brown         if (sendblock_i->row >= owners[owner + 1]) break;
844d7d60843SJed Brown       }
845d7d60843SJed Brown       stash->nsendranks++;
846d7d60843SJed Brown       rowstart = i;
847d7d60843SJed Brown     }
8489566063dSJacob Faibussowitsch     PetscCall(PetscMalloc3(stash->nsendranks, &stash->sendranks, stash->nsendranks, &stash->sendhdr, stash->nsendranks, &stash->sendframes));
849d7d60843SJed Brown 
850d7d60843SJed Brown     /* Set up sendhdrs and sendframes */
851d7d60843SJed Brown     sendno = 0;
852d7d60843SJed Brown     for (rowstart = 0; rowstart < nblocks;) {
853d7d60843SJed Brown       PetscInt       owner;
854d7d60843SJed Brown       MatStashBlock *sendblock_rowstart = (MatStashBlock *)&sendblocks[rowstart * stash->blocktype_size];
8559566063dSJacob Faibussowitsch       PetscCall(PetscFindInt(sendblock_rowstart->row, stash->size + 1, owners, &owner));
856d7d60843SJed Brown       if (owner < 0) owner = -(owner + 2);
857d7d60843SJed Brown       stash->sendranks[sendno] = owner;
858d7d60843SJed Brown       for (i = rowstart + 1; i < nblocks; i++) { /* Move forward through a run of blocks with the same owner */
859d7d60843SJed Brown         MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size];
8607e2ea869SJed Brown         if (sendblock_i->row >= owners[owner + 1]) break;
861d7d60843SJed Brown       }
862d7d60843SJed Brown       stash->sendframes[sendno].buffer  = sendblock_rowstart;
863d7d60843SJed Brown       stash->sendframes[sendno].pending = 0;
864d7d60843SJed Brown       stash->sendhdr[sendno].count      = i - rowstart;
865d7d60843SJed Brown       sendno++;
866d7d60843SJed Brown       rowstart = i;
867d7d60843SJed Brown     }
86808401ef6SPierre Jolivet     PetscCheck(sendno == stash->nsendranks, stash->comm, PETSC_ERR_PLIB, "BTS counted %d sendranks, but %" PetscInt_FMT " sends", stash->nsendranks, sendno);
869d7d60843SJed Brown   }
870d7d60843SJed Brown 
8714b4eb8d3SJed Brown   /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new
8724b4eb8d3SJed Brown    * message or a dummy entry of some sort. */
8734b4eb8d3SJed Brown   if (mat->insertmode == INSERT_VALUES) {
87423b7d1baSJed Brown     size_t i;
8754b4eb8d3SJed Brown     for (i = 0; i < nblocks; i++) {
8764b4eb8d3SJed Brown       MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size];
8774b4eb8d3SJed Brown       sendblock_i->row           = -(sendblock_i->row + 1);
8784b4eb8d3SJed Brown     }
8794b4eb8d3SJed Brown   }
8804b4eb8d3SJed Brown 
88160f34548SJunchao Zhang   if (stash->first_assembly_done) {
88297da8949SJed Brown     PetscMPIInt i, tag;
8839566063dSJacob Faibussowitsch     PetscCall(PetscCommGetNewTag(stash->comm, &tag));
884*48a46eb9SPierre Jolivet     for (i = 0; i < stash->nrecvranks; i++) PetscCall(MatStashBTSRecv_Private(stash->comm, &tag, stash->recvranks[i], &stash->recvhdr[i], &stash->recvreqs[i], stash));
885*48a46eb9SPierre Jolivet     for (i = 0; i < stash->nsendranks; i++) PetscCall(MatStashBTSSend_Private(stash->comm, &tag, i, stash->sendranks[i], &stash->sendhdr[i], &stash->sendreqs[i], stash));
88697da8949SJed Brown     stash->use_status = PETSC_TRUE; /* Use count from message status. */
88797da8949SJed Brown   } else {
8889371c9d4SSatish Balay     PetscCall(PetscCommBuildTwoSidedFReq(stash->comm, 1, MPIU_INT, stash->nsendranks, stash->sendranks, (PetscInt *)stash->sendhdr, &stash->nrecvranks, &stash->recvranks, (PetscInt *)&stash->recvhdr, 1, &stash->sendreqs, &stash->recvreqs, MatStashBTSSend_Private, MatStashBTSRecv_Private, stash));
8899566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(stash->nrecvranks, &stash->some_indices, stash->nrecvranks, &stash->some_statuses));
89097da8949SJed Brown     stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */
89197da8949SJed Brown   }
892d7d60843SJed Brown 
8939566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferExtractInPlace(stash->segrecvframe, &stash->recvframes));
894d7d60843SJed Brown   stash->recvframe_active    = NULL;
895d7d60843SJed Brown   stash->recvframe_i         = 0;
896d7d60843SJed Brown   stash->some_i              = 0;
897d7d60843SJed Brown   stash->some_count          = 0;
898d7d60843SJed Brown   stash->recvcount           = 0;
89960f34548SJunchao Zhang   stash->first_assembly_done = mat->assembly_subset; /* See the same logic in VecAssemblyBegin_MPI_BTS */
9004b4eb8d3SJed Brown   stash->insertmode          = &mat->insertmode;
901d7d60843SJed Brown   PetscFunctionReturn(0);
902d7d60843SJed Brown }
903d7d60843SJed Brown 
9049371c9d4SSatish Balay static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash, PetscMPIInt *n, PetscInt **row, PetscInt **col, PetscScalar **val, PetscInt *flg) {
905d7d60843SJed Brown   MatStashBlock *block;
906d7d60843SJed Brown 
907d7d60843SJed Brown   PetscFunctionBegin;
908d7d60843SJed Brown   *flg = 0;
909d7d60843SJed Brown   while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) {
910d7d60843SJed Brown     if (stash->some_i == stash->some_count) {
911d7d60843SJed Brown       if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(0); /* Done */
9129566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Waitsome(stash->nrecvranks, stash->recvreqs, &stash->some_count, stash->some_indices, stash->use_status ? stash->some_statuses : MPI_STATUSES_IGNORE));
913d7d60843SJed Brown       stash->some_i = 0;
914d7d60843SJed Brown     }
915d7d60843SJed Brown     stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]];
916d7d60843SJed Brown     stash->recvframe_count  = stash->recvframe_active->count; /* From header; maximum count */
917d7d60843SJed Brown     if (stash->use_status) {                                  /* Count what was actually sent */
9189566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Get_count(&stash->some_statuses[stash->some_i], stash->blocktype, &stash->recvframe_count));
919d7d60843SJed Brown     }
9204b4eb8d3SJed Brown     if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */
9214b4eb8d3SJed Brown       block = (MatStashBlock *)&((char *)stash->recvframe_active->buffer)[0];
9224b4eb8d3SJed Brown       if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES;
923aed4548fSBarry Smith       PetscCheck(*stash->insertmode != INSERT_VALUES || block->row < 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Assembling INSERT_VALUES, but rank %d requested ADD_VALUES", stash->recvranks[stash->some_indices[stash->some_i]]);
924aed4548fSBarry Smith       PetscCheck(*stash->insertmode != ADD_VALUES || block->row >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Assembling ADD_VALUES, but rank %d requested INSERT_VALUES", stash->recvranks[stash->some_indices[stash->some_i]]);
9254b4eb8d3SJed Brown     }
926d7d60843SJed Brown     stash->some_i++;
927d7d60843SJed Brown     stash->recvcount++;
928d7d60843SJed Brown     stash->recvframe_i = 0;
929d7d60843SJed Brown   }
930d7d60843SJed Brown   *n    = 1;
931d7d60843SJed Brown   block = (MatStashBlock *)&((char *)stash->recvframe_active->buffer)[stash->recvframe_i * stash->blocktype_size];
9324b4eb8d3SJed Brown   if (block->row < 0) block->row = -(block->row + 1);
933d7d60843SJed Brown   *row = &block->row;
934d7d60843SJed Brown   *col = &block->col;
935d7d60843SJed Brown   *val = block->vals;
936d7d60843SJed Brown   stash->recvframe_i++;
937d7d60843SJed Brown   *flg = 1;
938d7d60843SJed Brown   PetscFunctionReturn(0);
939d7d60843SJed Brown }
940d7d60843SJed Brown 
9419371c9d4SSatish Balay static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash) {
942d7d60843SJed Brown   PetscFunctionBegin;
9439566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Waitall(stash->nsendranks, stash->sendreqs, MPI_STATUSES_IGNORE));
94460f34548SJunchao Zhang   if (stash->first_assembly_done) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks  */
945d2b3fd65SBarry Smith     PetscCall(PetscSegBufferExtractInPlace(stash->segrecvblocks, NULL));
9463575f486SJed Brown   } else { /* No reuse, so collect everything. */
9479566063dSJacob Faibussowitsch     PetscCall(MatStashScatterDestroy_BTS(stash));
94897da8949SJed Brown   }
949d7d60843SJed Brown 
950d7d60843SJed Brown   /* Now update nmaxold to be app 10% more than max n used, this way the
951d7d60843SJed Brown      wastage of space is reduced the next time this stash is used.
952d7d60843SJed Brown      Also update the oldmax, only if it increases */
953d7d60843SJed Brown   if (stash->n) {
954d7d60843SJed Brown     PetscInt bs2     = stash->bs * stash->bs;
955d7d60843SJed Brown     PetscInt oldnmax = ((int)(stash->n * 1.1) + 5) * bs2;
956d7d60843SJed Brown     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
957d7d60843SJed Brown   }
958d7d60843SJed Brown 
959d7d60843SJed Brown   stash->nmax       = 0;
960d7d60843SJed Brown   stash->n          = 0;
961d7d60843SJed Brown   stash->reallocs   = -1;
962d7d60843SJed Brown   stash->nprocessed = 0;
963d7d60843SJed Brown 
9649566063dSJacob Faibussowitsch   PetscCall(PetscMatStashSpaceDestroy(&stash->space_head));
965d7d60843SJed Brown 
966f4259b30SLisandro Dalcin   stash->space = NULL;
967d7d60843SJed Brown 
968d7d60843SJed Brown   PetscFunctionReturn(0);
969d7d60843SJed Brown }
970d7d60843SJed Brown 
9719371c9d4SSatish Balay PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash) {
972d7d60843SJed Brown   PetscFunctionBegin;
9739566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferDestroy(&stash->segsendblocks));
9749566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferDestroy(&stash->segrecvframe));
975d7d60843SJed Brown   stash->recvframes = NULL;
9769566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferDestroy(&stash->segrecvblocks));
977*48a46eb9SPierre Jolivet   if (stash->blocktype != MPI_DATATYPE_NULL) PetscCallMPI(MPI_Type_free(&stash->blocktype));
978d7d60843SJed Brown   stash->nsendranks = 0;
979d7d60843SJed Brown   stash->nrecvranks = 0;
9809566063dSJacob Faibussowitsch   PetscCall(PetscFree3(stash->sendranks, stash->sendhdr, stash->sendframes));
9819566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->sendreqs));
9829566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->recvreqs));
9839566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->recvranks));
9849566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->recvhdr));
9859566063dSJacob Faibussowitsch   PetscCall(PetscFree2(stash->some_indices, stash->some_statuses));
986d7d60843SJed Brown   PetscFunctionReturn(0);
987d7d60843SJed Brown }
9881667be42SBarry Smith #endif
989