xref: /petsc/src/mat/utils/matstash.c (revision 2fe279fdf3e687a416e4eadb7d3c7a82d60442c6)
12d5177cdSBarry Smith 
2af0996ceSBarry Smith #include <petsc/private/matimpl.h>
35bd3b8fbSHong Zhang 
4bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE 10000
54c1ff481SSatish Balay 
6ac2b2aa0SJed Brown static PetscErrorCode       MatStashScatterBegin_Ref(Mat, MatStash *, PetscInt *);
7d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
8d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *);
975d39b6aSBarry Smith #if !defined(PETSC_HAVE_MPIUNI)
10d7d60843SJed Brown static PetscErrorCode MatStashScatterBegin_BTS(Mat, MatStash *, PetscInt *);
11d7d60843SJed Brown static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
12d7d60843SJed Brown static PetscErrorCode MatStashScatterEnd_BTS(MatStash *);
1375d39b6aSBarry Smith #endif
14d7d60843SJed Brown 
159417f4adSLois Curfman McInnes /*
168798bf22SSatish Balay   MatStashCreate_Private - Creates a stash,currently used for all the parallel
174c1ff481SSatish Balay   matrix implementations. The stash is where elements of a matrix destined
184c1ff481SSatish Balay   to be stored on other processors are kept until matrix assembly is done.
199417f4adSLois Curfman McInnes 
204c1ff481SSatish Balay   This is a simple minded stash. Simply adds entries to end of stash.
214c1ff481SSatish Balay 
224c1ff481SSatish Balay   Input Parameters:
234c1ff481SSatish Balay   comm - communicator, required for scatters.
244c1ff481SSatish Balay   bs   - stash block size. used when stashing blocks of values
254c1ff481SSatish Balay 
26*2fe279fdSBarry Smith   Output Parameter:
274c1ff481SSatish Balay   stash    - the newly created stash
289417f4adSLois Curfman McInnes */
29d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashCreate_Private(MPI_Comm comm, PetscInt bs, MatStash *stash)
30d71ae5a4SJacob Faibussowitsch {
31533163c2SBarry Smith   PetscInt  max, *opt, nopt, i;
32ace3abfcSBarry Smith   PetscBool flg;
33bc5ccf88SSatish Balay 
343a40ed3dSBarry Smith   PetscFunctionBegin;
35bc5ccf88SSatish Balay   /* Require 2 tags,get the second using PetscCommGetNewTag() */
36752ec6e0SSatish Balay   stash->comm = comm;
378865f1eaSKarl Rupp 
389566063dSJacob Faibussowitsch   PetscCall(PetscCommGetNewTag(stash->comm, &stash->tag1));
399566063dSJacob Faibussowitsch   PetscCall(PetscCommGetNewTag(stash->comm, &stash->tag2));
409566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(stash->comm, &stash->size));
419566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(stash->comm, &stash->rank));
429566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(2 * stash->size, &stash->flg_v));
43533163c2SBarry Smith   for (i = 0; i < 2 * stash->size; i++) stash->flg_v[i] = -1;
44533163c2SBarry Smith 
45434d7ff9SSatish Balay   nopt = stash->size;
469566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nopt, &opt));
479566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetIntArray(NULL, NULL, "-matstash_initial_size", opt, &nopt, &flg));
48434d7ff9SSatish Balay   if (flg) {
49434d7ff9SSatish Balay     if (nopt == 1) max = opt[0];
50434d7ff9SSatish Balay     else if (nopt == stash->size) max = opt[stash->rank];
51434d7ff9SSatish Balay     else if (stash->rank < nopt) max = opt[stash->rank];
52f4ab19daSSatish Balay     else max = 0; /* Use default */
53434d7ff9SSatish Balay     stash->umax = max;
54434d7ff9SSatish Balay   } else {
55434d7ff9SSatish Balay     stash->umax = 0;
56434d7ff9SSatish Balay   }
579566063dSJacob Faibussowitsch   PetscCall(PetscFree(opt));
584c1ff481SSatish Balay   if (bs <= 0) bs = 1;
59a2d1c673SSatish Balay 
604c1ff481SSatish Balay   stash->bs         = bs;
619417f4adSLois Curfman McInnes   stash->nmax       = 0;
62434d7ff9SSatish Balay   stash->oldnmax    = 0;
639417f4adSLois Curfman McInnes   stash->n          = 0;
644c1ff481SSatish Balay   stash->reallocs   = -1;
65f4259b30SLisandro Dalcin   stash->space_head = NULL;
66f4259b30SLisandro Dalcin   stash->space      = NULL;
679417f4adSLois Curfman McInnes 
68f4259b30SLisandro Dalcin   stash->send_waits  = NULL;
69f4259b30SLisandro Dalcin   stash->recv_waits  = NULL;
70f4259b30SLisandro Dalcin   stash->send_status = NULL;
71bc5ccf88SSatish Balay   stash->nsends      = 0;
72bc5ccf88SSatish Balay   stash->nrecvs      = 0;
73f4259b30SLisandro Dalcin   stash->svalues     = NULL;
74f4259b30SLisandro Dalcin   stash->rvalues     = NULL;
75f4259b30SLisandro Dalcin   stash->rindices    = NULL;
76a2d1c673SSatish Balay   stash->nprocessed  = 0;
7767318a8aSJed Brown   stash->reproduce   = PETSC_FALSE;
78d7d60843SJed Brown   stash->blocktype   = MPI_DATATYPE_NULL;
798865f1eaSKarl Rupp 
809566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(NULL, NULL, "-matstash_reproduce", &stash->reproduce, NULL));
811667be42SBarry Smith #if !defined(PETSC_HAVE_MPIUNI)
82ca723aa4SStefano Zampini   flg = PETSC_FALSE;
839566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(NULL, NULL, "-matstash_legacy", &flg, NULL));
84b30fb036SBarry Smith   if (!flg) {
85d7d60843SJed Brown     stash->ScatterBegin   = MatStashScatterBegin_BTS;
86d7d60843SJed Brown     stash->ScatterGetMesg = MatStashScatterGetMesg_BTS;
87d7d60843SJed Brown     stash->ScatterEnd     = MatStashScatterEnd_BTS;
88d7d60843SJed Brown     stash->ScatterDestroy = MatStashScatterDestroy_BTS;
89ac2b2aa0SJed Brown   } else {
901667be42SBarry Smith #endif
91ac2b2aa0SJed Brown     stash->ScatterBegin   = MatStashScatterBegin_Ref;
92ac2b2aa0SJed Brown     stash->ScatterGetMesg = MatStashScatterGetMesg_Ref;
93ac2b2aa0SJed Brown     stash->ScatterEnd     = MatStashScatterEnd_Ref;
94ac2b2aa0SJed Brown     stash->ScatterDestroy = NULL;
951667be42SBarry Smith #if !defined(PETSC_HAVE_MPIUNI)
96ac2b2aa0SJed Brown   }
971667be42SBarry Smith #endif
983ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
999417f4adSLois Curfman McInnes }
1009417f4adSLois Curfman McInnes 
1014c1ff481SSatish Balay /*
1028798bf22SSatish Balay    MatStashDestroy_Private - Destroy the stash
1034c1ff481SSatish Balay */
104d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashDestroy_Private(MatStash *stash)
105d71ae5a4SJacob Faibussowitsch {
106bc5ccf88SSatish Balay   PetscFunctionBegin;
1079566063dSJacob Faibussowitsch   PetscCall(PetscMatStashSpaceDestroy(&stash->space_head));
1089566063dSJacob Faibussowitsch   if (stash->ScatterDestroy) PetscCall((*stash->ScatterDestroy)(stash));
109f4259b30SLisandro Dalcin   stash->space = NULL;
1109566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->flg_v));
1113ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
112bc5ccf88SSatish Balay }
113bc5ccf88SSatish Balay 
1144c1ff481SSatish Balay /*
11567318a8aSJed Brown    MatStashScatterEnd_Private - This is called as the final stage of
1164c1ff481SSatish Balay    scatter. The final stages of message passing is done here, and
11767318a8aSJed Brown    all the memory used for message passing is cleaned up. This
1184c1ff481SSatish Balay    routine also resets the stash, and deallocates the memory used
1194c1ff481SSatish Balay    for the stash. It also keeps track of the current memory usage
1204c1ff481SSatish Balay    so that the same value can be used the next time through.
1214c1ff481SSatish Balay */
122d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
123d71ae5a4SJacob Faibussowitsch {
124ac2b2aa0SJed Brown   PetscFunctionBegin;
1259566063dSJacob Faibussowitsch   PetscCall((*stash->ScatterEnd)(stash));
1263ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
127ac2b2aa0SJed Brown }
128ac2b2aa0SJed Brown 
129d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash)
130d71ae5a4SJacob Faibussowitsch {
131533163c2SBarry Smith   PetscInt    nsends = stash->nsends, bs2, oldnmax, i;
132a2d1c673SSatish Balay   MPI_Status *send_status;
133a2d1c673SSatish Balay 
1343a40ed3dSBarry Smith   PetscFunctionBegin;
135533163c2SBarry Smith   for (i = 0; i < 2 * stash->size; i++) stash->flg_v[i] = -1;
136a2d1c673SSatish Balay   /* wait on sends */
137a2d1c673SSatish Balay   if (nsends) {
1389566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(2 * nsends, &send_status));
1399566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Waitall(2 * nsends, stash->send_waits, send_status));
1409566063dSJacob Faibussowitsch     PetscCall(PetscFree(send_status));
141a2d1c673SSatish Balay   }
142a2d1c673SSatish Balay 
143c0c58ca7SSatish Balay   /* Now update nmaxold to be app 10% more than max n used, this way the
144434d7ff9SSatish Balay      wastage of space is reduced the next time this stash is used.
145434d7ff9SSatish Balay      Also update the oldmax, only if it increases */
146b9b97703SBarry Smith   if (stash->n) {
14794b769a5SSatish Balay     bs2     = stash->bs * stash->bs;
1488a9378f0SSatish Balay     oldnmax = ((int)(stash->n * 1.1) + 5) * bs2;
149434d7ff9SSatish Balay     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
150b9b97703SBarry Smith   }
151434d7ff9SSatish Balay 
152d07ff455SSatish Balay   stash->nmax       = 0;
153d07ff455SSatish Balay   stash->n          = 0;
1544c1ff481SSatish Balay   stash->reallocs   = -1;
155a2d1c673SSatish Balay   stash->nprocessed = 0;
1568865f1eaSKarl Rupp 
1579566063dSJacob Faibussowitsch   PetscCall(PetscMatStashSpaceDestroy(&stash->space_head));
1588865f1eaSKarl Rupp 
159f4259b30SLisandro Dalcin   stash->space = NULL;
1608865f1eaSKarl Rupp 
1619566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->send_waits));
1629566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->recv_waits));
1639566063dSJacob Faibussowitsch   PetscCall(PetscFree2(stash->svalues, stash->sindices));
1649566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->rvalues[0]));
1659566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->rvalues));
1669566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->rindices[0]));
1679566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->rindices));
1683ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1699417f4adSLois Curfman McInnes }
1709417f4adSLois Curfman McInnes 
1714c1ff481SSatish Balay /*
1726aad120cSJose E. Roman    MatStashGetInfo_Private - Gets the relevant statistics of the stash
1734c1ff481SSatish Balay 
1744c1ff481SSatish Balay    Input Parameters:
1754c1ff481SSatish Balay    stash    - the stash
17694b769a5SSatish Balay    nstash   - the size of the stash. Indicates the number of values stored.
1774c1ff481SSatish Balay    reallocs - the number of additional mallocs incurred.
1784c1ff481SSatish Balay 
1794c1ff481SSatish Balay */
180d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashGetInfo_Private(MatStash *stash, PetscInt *nstash, PetscInt *reallocs)
181d71ae5a4SJacob Faibussowitsch {
182c1ac3661SBarry Smith   PetscInt bs2 = stash->bs * stash->bs;
18394b769a5SSatish Balay 
1843a40ed3dSBarry Smith   PetscFunctionBegin;
1851ecfd215SBarry Smith   if (nstash) *nstash = stash->n * bs2;
1861ecfd215SBarry Smith   if (reallocs) {
187434d7ff9SSatish Balay     if (stash->reallocs < 0) *reallocs = 0;
188434d7ff9SSatish Balay     else *reallocs = stash->reallocs;
1891ecfd215SBarry Smith   }
1903ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
191bc5ccf88SSatish Balay }
1924c1ff481SSatish Balay 
1934c1ff481SSatish Balay /*
1948798bf22SSatish Balay    MatStashSetInitialSize_Private - Sets the initial size of the stash
1954c1ff481SSatish Balay 
1964c1ff481SSatish Balay    Input Parameters:
1974c1ff481SSatish Balay    stash  - the stash
1984c1ff481SSatish Balay    max    - the value that is used as the max size of the stash.
1994c1ff481SSatish Balay             this value is used while allocating memory.
2004c1ff481SSatish Balay */
201d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash, PetscInt max)
202d71ae5a4SJacob Faibussowitsch {
203bc5ccf88SSatish Balay   PetscFunctionBegin;
204434d7ff9SSatish Balay   stash->umax = max;
2053ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
20697530c3fSBarry Smith }
20797530c3fSBarry Smith 
2088798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called
2094c1ff481SSatish Balay    when the space in the stash is not sufficient to add the new values
2104c1ff481SSatish Balay    being inserted into the stash.
2114c1ff481SSatish Balay 
2124c1ff481SSatish Balay    Input Parameters:
2134c1ff481SSatish Balay    stash - the stash
2144c1ff481SSatish Balay    incr  - the minimum increase requested
2154c1ff481SSatish Balay 
21611a5261eSBarry Smith    Note:
2174c1ff481SSatish Balay    This routine doubles the currently used memory.
2184c1ff481SSatish Balay  */
219d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashExpand_Private(MatStash *stash, PetscInt incr)
220d71ae5a4SJacob Faibussowitsch {
2215bd3b8fbSHong Zhang   PetscInt newnmax, bs2 = stash->bs * stash->bs;
2229417f4adSLois Curfman McInnes 
2233a40ed3dSBarry Smith   PetscFunctionBegin;
2249417f4adSLois Curfman McInnes   /* allocate a larger stash */
225c481ceb5SSatish Balay   if (!stash->oldnmax && !stash->nmax) { /* new stash */
226434d7ff9SSatish Balay     if (stash->umax) newnmax = stash->umax / bs2;
227434d7ff9SSatish Balay     else newnmax = DEFAULT_STASH_SIZE / bs2;
228c481ceb5SSatish Balay   } else if (!stash->nmax) { /* resuing stash */
229434d7ff9SSatish Balay     if (stash->umax > stash->oldnmax) newnmax = stash->umax / bs2;
230434d7ff9SSatish Balay     else newnmax = stash->oldnmax / bs2;
231434d7ff9SSatish Balay   } else newnmax = stash->nmax * 2;
2324c1ff481SSatish Balay   if (newnmax < (stash->nmax + incr)) newnmax += 2 * incr;
233d07ff455SSatish Balay 
23475cae7c1SHong Zhang   /* Get a MatStashSpace and attach it to stash */
2359566063dSJacob Faibussowitsch   PetscCall(PetscMatStashSpaceGet(bs2, newnmax, &stash->space));
236b087b6d6SSatish Balay   if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
237b087b6d6SSatish Balay     stash->space_head = stash->space;
23875cae7c1SHong Zhang   }
239b087b6d6SSatish Balay 
240bc5ccf88SSatish Balay   stash->reallocs++;
24175cae7c1SHong Zhang   stash->nmax = newnmax;
2423ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
243bc5ccf88SSatish Balay }
244bc5ccf88SSatish Balay /*
2458798bf22SSatish Balay   MatStashValuesRow_Private - inserts values into the stash. This function
2464c1ff481SSatish Balay   expects the values to be roworiented. Multiple columns belong to the same row
2474c1ff481SSatish Balay   can be inserted with a single call to this function.
2484c1ff481SSatish Balay 
2494c1ff481SSatish Balay   Input Parameters:
2504c1ff481SSatish Balay   stash  - the stash
2514c1ff481SSatish Balay   row    - the global row correspoiding to the values
2524c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
2534c1ff481SSatish Balay   idxn   - the global column indices corresponding to each of the values.
2544c1ff481SSatish Balay   values - the values inserted
255bc5ccf88SSatish Balay */
256d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashValuesRow_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscBool ignorezeroentries)
257d71ae5a4SJacob Faibussowitsch {
258b400d20cSBarry Smith   PetscInt           i, k, cnt = 0;
25975cae7c1SHong Zhang   PetscMatStashSpace space = stash->space;
260bc5ccf88SSatish Balay 
261bc5ccf88SSatish Balay   PetscFunctionBegin;
2624c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
26348a46eb9SPierre Jolivet   if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
26475cae7c1SHong Zhang   space = stash->space;
26575cae7c1SHong Zhang   k     = space->local_used;
2664c1ff481SSatish Balay   for (i = 0; i < n; i++) {
26714069ce9SStefano Zampini     if (ignorezeroentries && values && values[i] == 0.0) continue;
26875cae7c1SHong Zhang     space->idx[k] = row;
26975cae7c1SHong Zhang     space->idy[k] = idxn[i];
27014069ce9SStefano Zampini     space->val[k] = values ? values[i] : 0.0;
27175cae7c1SHong Zhang     k++;
272b400d20cSBarry Smith     cnt++;
2739417f4adSLois Curfman McInnes   }
274b400d20cSBarry Smith   stash->n += cnt;
275b400d20cSBarry Smith   space->local_used += cnt;
276b400d20cSBarry Smith   space->local_remaining -= cnt;
2773ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
278a2d1c673SSatish Balay }
27975cae7c1SHong Zhang 
2804c1ff481SSatish Balay /*
2818798bf22SSatish Balay   MatStashValuesCol_Private - inserts values into the stash. This function
2824c1ff481SSatish Balay   expects the values to be columnoriented. Multiple columns belong to the same row
2834c1ff481SSatish Balay   can be inserted with a single call to this function.
284a2d1c673SSatish Balay 
2854c1ff481SSatish Balay   Input Parameters:
2864c1ff481SSatish Balay   stash   - the stash
2874c1ff481SSatish Balay   row     - the global row correspoiding to the values
2884c1ff481SSatish Balay   n       - the number of elements inserted. All elements belong to the above row.
2894c1ff481SSatish Balay   idxn    - the global column indices corresponding to each of the values.
2904c1ff481SSatish Balay   values  - the values inserted
2914c1ff481SSatish Balay   stepval - the consecutive values are sepated by a distance of stepval.
2924c1ff481SSatish Balay             this happens because the input is columnoriented.
2934c1ff481SSatish Balay */
294d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashValuesCol_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt stepval, PetscBool ignorezeroentries)
295d71ae5a4SJacob Faibussowitsch {
29650e9ab7cSBarry Smith   PetscInt           i, k, cnt = 0;
29775cae7c1SHong Zhang   PetscMatStashSpace space = stash->space;
298a2d1c673SSatish Balay 
2994c1ff481SSatish Balay   PetscFunctionBegin;
3004c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
30148a46eb9SPierre Jolivet   if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
30275cae7c1SHong Zhang   space = stash->space;
30375cae7c1SHong Zhang   k     = space->local_used;
3044c1ff481SSatish Balay   for (i = 0; i < n; i++) {
30514069ce9SStefano Zampini     if (ignorezeroentries && values && values[i * stepval] == 0.0) continue;
30675cae7c1SHong Zhang     space->idx[k] = row;
30775cae7c1SHong Zhang     space->idy[k] = idxn[i];
30814069ce9SStefano Zampini     space->val[k] = values ? values[i * stepval] : 0.0;
30975cae7c1SHong Zhang     k++;
310b400d20cSBarry Smith     cnt++;
3114c1ff481SSatish Balay   }
312b400d20cSBarry Smith   stash->n += cnt;
313b400d20cSBarry Smith   space->local_used += cnt;
314b400d20cSBarry Smith   space->local_remaining -= cnt;
3153ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3164c1ff481SSatish Balay }
3174c1ff481SSatish Balay 
3184c1ff481SSatish Balay /*
3198798bf22SSatish Balay   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
3204c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3214c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3224c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3234c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3244c1ff481SSatish Balay 
3254c1ff481SSatish Balay   Input Parameters:
3264c1ff481SSatish Balay   stash  - the stash
3274c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3284c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3294c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3304c1ff481SSatish Balay            values. Each block is of size bs*bs.
3314c1ff481SSatish Balay   values - the values inserted
3324c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
333a5b23f4aSJose E. Roman   cmax   - the number of block-columns on the original block.
3344c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3354c1ff481SSatish Balay */
336d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt rmax, PetscInt cmax, PetscInt idx)
337d71ae5a4SJacob Faibussowitsch {
33875cae7c1SHong Zhang   PetscInt           i, j, k, bs2, bs = stash->bs, l;
33954f21887SBarry Smith   const PetscScalar *vals;
34054f21887SBarry Smith   PetscScalar       *array;
34175cae7c1SHong Zhang   PetscMatStashSpace space = stash->space;
342a2d1c673SSatish Balay 
343a2d1c673SSatish Balay   PetscFunctionBegin;
34448a46eb9SPierre Jolivet   if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
34575cae7c1SHong Zhang   space = stash->space;
34675cae7c1SHong Zhang   l     = space->local_used;
34775cae7c1SHong Zhang   bs2   = bs * bs;
3484c1ff481SSatish Balay   for (i = 0; i < n; i++) {
34975cae7c1SHong Zhang     space->idx[l] = row;
35075cae7c1SHong Zhang     space->idy[l] = idxn[i];
35175cae7c1SHong Zhang     /* Now copy over the block of values. Store the values column oriented.
35275cae7c1SHong Zhang        This enables inserting multiple blocks belonging to a row with a single
353da81f932SPierre Jolivet        function call */
35475cae7c1SHong Zhang     array = space->val + bs2 * l;
35575cae7c1SHong Zhang     vals  = values + idx * bs2 * n + bs * i;
35675cae7c1SHong Zhang     for (j = 0; j < bs; j++) {
35714069ce9SStefano Zampini       for (k = 0; k < bs; k++) array[k * bs] = values ? vals[k] : 0.0;
35875cae7c1SHong Zhang       array++;
35975cae7c1SHong Zhang       vals += cmax * bs;
36075cae7c1SHong Zhang     }
36175cae7c1SHong Zhang     l++;
362a2d1c673SSatish Balay   }
3635bd3b8fbSHong Zhang   stash->n += n;
36475cae7c1SHong Zhang   space->local_used += n;
36575cae7c1SHong Zhang   space->local_remaining -= n;
3663ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3674c1ff481SSatish Balay }
3684c1ff481SSatish Balay 
3694c1ff481SSatish Balay /*
3708798bf22SSatish Balay   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
3714c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3724c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3734c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3744c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3754c1ff481SSatish Balay 
3764c1ff481SSatish Balay   Input Parameters:
3774c1ff481SSatish Balay   stash  - the stash
3784c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3794c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3804c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3814c1ff481SSatish Balay            values. Each block is of size bs*bs.
3824c1ff481SSatish Balay   values - the values inserted
3834c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
384a5b23f4aSJose E. Roman   cmax   - the number of block-columns on the original block.
3854c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3864c1ff481SSatish Balay */
387d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt rmax, PetscInt cmax, PetscInt idx)
388d71ae5a4SJacob Faibussowitsch {
38975cae7c1SHong Zhang   PetscInt           i, j, k, bs2, bs = stash->bs, l;
39054f21887SBarry Smith   const PetscScalar *vals;
39154f21887SBarry Smith   PetscScalar       *array;
39275cae7c1SHong Zhang   PetscMatStashSpace space = stash->space;
3934c1ff481SSatish Balay 
3944c1ff481SSatish Balay   PetscFunctionBegin;
39548a46eb9SPierre Jolivet   if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
39675cae7c1SHong Zhang   space = stash->space;
39775cae7c1SHong Zhang   l     = space->local_used;
39875cae7c1SHong Zhang   bs2   = bs * bs;
3994c1ff481SSatish Balay   for (i = 0; i < n; i++) {
40075cae7c1SHong Zhang     space->idx[l] = row;
40175cae7c1SHong Zhang     space->idy[l] = idxn[i];
40275cae7c1SHong Zhang     /* Now copy over the block of values. Store the values column oriented.
40375cae7c1SHong Zhang      This enables inserting multiple blocks belonging to a row with a single
404da81f932SPierre Jolivet      function call */
40575cae7c1SHong Zhang     array = space->val + bs2 * l;
40675cae7c1SHong Zhang     vals  = values + idx * bs2 * n + bs * i;
40775cae7c1SHong Zhang     for (j = 0; j < bs; j++) {
40814069ce9SStefano Zampini       for (k = 0; k < bs; k++) array[k] = values ? vals[k] : 0.0;
40975cae7c1SHong Zhang       array += bs;
41075cae7c1SHong Zhang       vals += rmax * bs;
41175cae7c1SHong Zhang     }
4125bd3b8fbSHong Zhang     l++;
413a2d1c673SSatish Balay   }
4145bd3b8fbSHong Zhang   stash->n += n;
41575cae7c1SHong Zhang   space->local_used += n;
41675cae7c1SHong Zhang   space->local_remaining -= n;
4173ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4189417f4adSLois Curfman McInnes }
4194c1ff481SSatish Balay /*
4208798bf22SSatish Balay   MatStashScatterBegin_Private - Initiates the transfer of values to the
4214c1ff481SSatish Balay   correct owners. This function goes through the stash, and check the
4224c1ff481SSatish Balay   owners of each stashed value, and sends the values off to the owner
4234c1ff481SSatish Balay   processors.
424bc5ccf88SSatish Balay 
4254c1ff481SSatish Balay   Input Parameters:
4264c1ff481SSatish Balay   stash  - the stash
4274c1ff481SSatish Balay   owners - an array of size 'no-of-procs' which gives the ownership range
4284c1ff481SSatish Balay            for each node.
4294c1ff481SSatish Balay 
43011a5261eSBarry Smith   Note:
43195452b02SPatrick Sanan     The 'owners' array in the cased of the blocked-stash has the
4324c1ff481SSatish Balay   ranges specified blocked global indices, and for the regular stash in
4334c1ff481SSatish Balay   the proper global indices.
4344c1ff481SSatish Balay */
435d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashScatterBegin_Private(Mat mat, MatStash *stash, PetscInt *owners)
436d71ae5a4SJacob Faibussowitsch {
437ac2b2aa0SJed Brown   PetscFunctionBegin;
4389566063dSJacob Faibussowitsch   PetscCall((*stash->ScatterBegin)(mat, stash, owners));
4393ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
440ac2b2aa0SJed Brown }
441ac2b2aa0SJed Brown 
442d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashScatterBegin_Ref(Mat mat, MatStash *stash, PetscInt *owners)
443d71ae5a4SJacob Faibussowitsch {
444c1ac3661SBarry Smith   PetscInt          *owner, *startv, *starti, tag1 = stash->tag1, tag2 = stash->tag2, bs2;
445fe09c992SBarry Smith   PetscInt           size = stash->size, nsends;
44675cae7c1SHong Zhang   PetscInt           count, *sindices, **rindices, i, j, idx, lastidx, l;
44754f21887SBarry Smith   PetscScalar      **rvalues, *svalues;
448bc5ccf88SSatish Balay   MPI_Comm           comm = stash->comm;
449563fb871SSatish Balay   MPI_Request       *send_waits, *recv_waits, *recv_waits1, *recv_waits2;
45076ec1555SBarry Smith   PetscMPIInt       *sizes, *nlengths, nreceives;
4515bd3b8fbSHong Zhang   PetscInt          *sp_idx, *sp_idy;
45254f21887SBarry Smith   PetscScalar       *sp_val;
4535bd3b8fbSHong Zhang   PetscMatStashSpace space, space_next;
454bc5ccf88SSatish Balay 
455bc5ccf88SSatish Balay   PetscFunctionBegin;
4564b4eb8d3SJed Brown   { /* make sure all processors are either in INSERTMODE or ADDMODE */
4574b4eb8d3SJed Brown     InsertMode addv;
4581c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
45908401ef6SPierre Jolivet     PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
4604b4eb8d3SJed Brown     mat->insertmode = addv; /* in case this processor had no cache */
4614b4eb8d3SJed Brown   }
4624b4eb8d3SJed Brown 
4634c1ff481SSatish Balay   bs2 = stash->bs * stash->bs;
46475cae7c1SHong Zhang 
465bc5ccf88SSatish Balay   /*  first count number of contributors to each processor */
4669566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(size, &nlengths));
4679566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(stash->n + 1, &owner));
468a2d1c673SSatish Balay 
46975cae7c1SHong Zhang   i = j   = 0;
4707357eb19SBarry Smith   lastidx = -1;
4715bd3b8fbSHong Zhang   space   = stash->space_head;
4726c4ed002SBarry Smith   while (space) {
47375cae7c1SHong Zhang     space_next = space->next;
4745bd3b8fbSHong Zhang     sp_idx     = space->idx;
47575cae7c1SHong Zhang     for (l = 0; l < space->local_used; l++) {
4767357eb19SBarry Smith       /* if indices are NOT locally sorted, need to start search at the beginning */
4775bd3b8fbSHong Zhang       if (lastidx > (idx = sp_idx[l])) j = 0;
4787357eb19SBarry Smith       lastidx = idx;
4797357eb19SBarry Smith       for (; j < size; j++) {
4804c1ff481SSatish Balay         if (idx >= owners[j] && idx < owners[j + 1]) {
4819371c9d4SSatish Balay           nlengths[j]++;
4829371c9d4SSatish Balay           owner[i] = j;
4839371c9d4SSatish Balay           break;
484bc5ccf88SSatish Balay         }
485bc5ccf88SSatish Balay       }
48675cae7c1SHong Zhang       i++;
48775cae7c1SHong Zhang     }
48875cae7c1SHong Zhang     space = space_next;
489bc5ccf88SSatish Balay   }
490071fcb05SBarry Smith 
491563fb871SSatish Balay   /* Now check what procs get messages - and compute nsends. */
4929566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(size, &sizes));
493563fb871SSatish Balay   for (i = 0, nsends = 0; i < size; i++) {
4948865f1eaSKarl Rupp     if (nlengths[i]) {
4959371c9d4SSatish Balay       sizes[i] = 1;
4969371c9d4SSatish Balay       nsends++;
4978865f1eaSKarl Rupp     }
498563fb871SSatish Balay   }
499bc5ccf88SSatish Balay 
5009371c9d4SSatish Balay   {
5019371c9d4SSatish Balay     PetscMPIInt *onodes, *olengths;
502563fb871SSatish Balay     /* Determine the number of messages to expect, their lengths, from from-ids */
5039566063dSJacob Faibussowitsch     PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives));
5049566063dSJacob Faibussowitsch     PetscCall(PetscGatherMessageLengths(comm, nsends, nreceives, nlengths, &onodes, &olengths));
505563fb871SSatish Balay     /* since clubbing row,col - lengths are multiplied by 2 */
506563fb871SSatish Balay     for (i = 0; i < nreceives; i++) olengths[i] *= 2;
5079566063dSJacob Faibussowitsch     PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1));
508563fb871SSatish Balay     /* values are size 'bs2' lengths (and remove earlier factor 2 */
509563fb871SSatish Balay     for (i = 0; i < nreceives; i++) olengths[i] = olengths[i] * bs2 / 2;
5109566063dSJacob Faibussowitsch     PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2));
5119566063dSJacob Faibussowitsch     PetscCall(PetscFree(onodes));
5129371c9d4SSatish Balay     PetscCall(PetscFree(olengths));
5139371c9d4SSatish Balay   }
514bc5ccf88SSatish Balay 
515bc5ccf88SSatish Balay   /* do sends:
516bc5ccf88SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
517bc5ccf88SSatish Balay          the ith processor
518bc5ccf88SSatish Balay   */
5199566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices));
5209566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(2 * nsends, &send_waits));
5219566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(size, &startv, size, &starti));
522a2d1c673SSatish Balay   /* use 2 sends the first with all_a, the next with all_i and all_j */
5239371c9d4SSatish Balay   startv[0] = 0;
5249371c9d4SSatish Balay   starti[0] = 0;
525bc5ccf88SSatish Balay   for (i = 1; i < size; i++) {
526563fb871SSatish Balay     startv[i] = startv[i - 1] + nlengths[i - 1];
527533163c2SBarry Smith     starti[i] = starti[i - 1] + 2 * nlengths[i - 1];
528bc5ccf88SSatish Balay   }
52975cae7c1SHong Zhang 
53075cae7c1SHong Zhang   i     = 0;
5315bd3b8fbSHong Zhang   space = stash->space_head;
5326c4ed002SBarry Smith   while (space) {
53375cae7c1SHong Zhang     space_next = space->next;
5345bd3b8fbSHong Zhang     sp_idx     = space->idx;
5355bd3b8fbSHong Zhang     sp_idy     = space->idy;
5365bd3b8fbSHong Zhang     sp_val     = space->val;
53775cae7c1SHong Zhang     for (l = 0; l < space->local_used; l++) {
538bc5ccf88SSatish Balay       j = owner[i];
539a2d1c673SSatish Balay       if (bs2 == 1) {
5405bd3b8fbSHong Zhang         svalues[startv[j]] = sp_val[l];
541a2d1c673SSatish Balay       } else {
542c1ac3661SBarry Smith         PetscInt     k;
54354f21887SBarry Smith         PetscScalar *buf1, *buf2;
5444c1ff481SSatish Balay         buf1 = svalues + bs2 * startv[j];
545b087b6d6SSatish Balay         buf2 = space->val + bs2 * l;
5468865f1eaSKarl Rupp         for (k = 0; k < bs2; k++) buf1[k] = buf2[k];
547a2d1c673SSatish Balay       }
5485bd3b8fbSHong Zhang       sindices[starti[j]]               = sp_idx[l];
5495bd3b8fbSHong Zhang       sindices[starti[j] + nlengths[j]] = sp_idy[l];
550bc5ccf88SSatish Balay       startv[j]++;
551bc5ccf88SSatish Balay       starti[j]++;
55275cae7c1SHong Zhang       i++;
55375cae7c1SHong Zhang     }
55475cae7c1SHong Zhang     space = space_next;
555bc5ccf88SSatish Balay   }
556bc5ccf88SSatish Balay   startv[0] = 0;
5578865f1eaSKarl Rupp   for (i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1];
558e5d0e772SSatish Balay 
559bc5ccf88SSatish Balay   for (i = 0, count = 0; i < size; i++) {
56076ec1555SBarry Smith     if (sizes[i]) {
5619566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++));
5629566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++));
563bc5ccf88SSatish Balay     }
564b85c94c3SSatish Balay   }
5656cf91177SBarry Smith #if defined(PETSC_USE_INFO)
5669566063dSJacob Faibussowitsch   PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT " \n", nsends));
567e5d0e772SSatish Balay   for (i = 0; i < size; i++) {
56848a46eb9SPierre Jolivet     if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt)))));
569e5d0e772SSatish Balay   }
570e5d0e772SSatish Balay #endif
5719566063dSJacob Faibussowitsch   PetscCall(PetscFree(nlengths));
5729566063dSJacob Faibussowitsch   PetscCall(PetscFree(owner));
5739566063dSJacob Faibussowitsch   PetscCall(PetscFree2(startv, starti));
5749566063dSJacob Faibussowitsch   PetscCall(PetscFree(sizes));
575a2d1c673SSatish Balay 
576563fb871SSatish Balay   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
5779566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(2 * nreceives, &recv_waits));
578563fb871SSatish Balay 
579563fb871SSatish Balay   for (i = 0; i < nreceives; i++) {
580563fb871SSatish Balay     recv_waits[2 * i]     = recv_waits1[i];
581563fb871SSatish Balay     recv_waits[2 * i + 1] = recv_waits2[i];
582563fb871SSatish Balay   }
583563fb871SSatish Balay   stash->recv_waits = recv_waits;
5848865f1eaSKarl Rupp 
5859566063dSJacob Faibussowitsch   PetscCall(PetscFree(recv_waits1));
5869566063dSJacob Faibussowitsch   PetscCall(PetscFree(recv_waits2));
587563fb871SSatish Balay 
588c05d87d6SBarry Smith   stash->svalues         = svalues;
589c05d87d6SBarry Smith   stash->sindices        = sindices;
590c05d87d6SBarry Smith   stash->rvalues         = rvalues;
591c05d87d6SBarry Smith   stash->rindices        = rindices;
592c05d87d6SBarry Smith   stash->send_waits      = send_waits;
593c05d87d6SBarry Smith   stash->nsends          = nsends;
594c05d87d6SBarry Smith   stash->nrecvs          = nreceives;
59567318a8aSJed Brown   stash->reproduce_count = 0;
5963ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
597bc5ccf88SSatish Balay }
598bc5ccf88SSatish Balay 
599a2d1c673SSatish Balay /*
6008798bf22SSatish Balay    MatStashScatterGetMesg_Private - This function waits on the receives posted
6018798bf22SSatish Balay    in the function MatStashScatterBegin_Private() and returns one message at
6024c1ff481SSatish Balay    a time to the calling function. If no messages are left, it indicates this
6034c1ff481SSatish Balay    by setting flg = 0, else it sets flg = 1.
6044c1ff481SSatish Balay 
605*2fe279fdSBarry Smith    Input Parameter:
6064c1ff481SSatish Balay    stash - the stash
6074c1ff481SSatish Balay 
6084c1ff481SSatish Balay    Output Parameters:
6094c1ff481SSatish Balay    nvals - the number of entries in the current message.
6104c1ff481SSatish Balay    rows  - an array of row indices (or blocked indices) corresponding to the values
6114c1ff481SSatish Balay    cols  - an array of columnindices (or blocked indices) corresponding to the values
6124c1ff481SSatish Balay    vals  - the values
6134c1ff481SSatish Balay    flg   - 0 indicates no more message left, and the current call has no values associated.
6144c1ff481SSatish Balay            1 indicates that the current call successfully received a message, and the
6154c1ff481SSatish Balay              other output parameters nvals,rows,cols,vals are set appropriately.
616a2d1c673SSatish Balay */
617d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash, PetscMPIInt *nvals, PetscInt **rows, PetscInt **cols, PetscScalar **vals, PetscInt *flg)
618d71ae5a4SJacob Faibussowitsch {
619ac2b2aa0SJed Brown   PetscFunctionBegin;
6209566063dSJacob Faibussowitsch   PetscCall((*stash->ScatterGetMesg)(stash, nvals, rows, cols, vals, flg));
6213ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
622ac2b2aa0SJed Brown }
623ac2b2aa0SJed Brown 
624d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash, PetscMPIInt *nvals, PetscInt **rows, PetscInt **cols, PetscScalar **vals, PetscInt *flg)
625d71ae5a4SJacob Faibussowitsch {
626533163c2SBarry Smith   PetscMPIInt i, *flg_v = stash->flg_v, i1, i2;
627fe09c992SBarry Smith   PetscInt    bs2;
628a2d1c673SSatish Balay   MPI_Status  recv_status;
629ace3abfcSBarry Smith   PetscBool   match_found = PETSC_FALSE;
630bc5ccf88SSatish Balay 
631bc5ccf88SSatish Balay   PetscFunctionBegin;
632a2d1c673SSatish Balay   *flg = 0; /* When a message is discovered this is reset to 1 */
633a2d1c673SSatish Balay   /* Return if no more messages to process */
6343ba16761SJacob Faibussowitsch   if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(PETSC_SUCCESS);
635a2d1c673SSatish Balay 
6364c1ff481SSatish Balay   bs2 = stash->bs * stash->bs;
63767318a8aSJed Brown   /* If a matching pair of receives are found, process them, and return the data to
638a2d1c673SSatish Balay      the calling function. Until then keep receiving messages */
639a2d1c673SSatish Balay   while (!match_found) {
64067318a8aSJed Brown     if (stash->reproduce) {
64167318a8aSJed Brown       i = stash->reproduce_count++;
6429566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Wait(stash->recv_waits + i, &recv_status));
64367318a8aSJed Brown     } else {
6449566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Waitany(2 * stash->nrecvs, stash->recv_waits, &i, &recv_status));
64567318a8aSJed Brown     }
64608401ef6SPierre Jolivet     PetscCheck(recv_status.MPI_SOURCE >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Negative MPI source!");
647533163c2SBarry Smith 
64867318a8aSJed Brown     /* Now pack the received message into a structure which is usable by others */
649a2d1c673SSatish Balay     if (i % 2) {
6509566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Get_count(&recv_status, MPIU_SCALAR, nvals));
651c1dc657dSBarry Smith       flg_v[2 * recv_status.MPI_SOURCE] = i / 2;
652a2d1c673SSatish Balay       *nvals                            = *nvals / bs2;
653563fb871SSatish Balay     } else {
6549566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Get_count(&recv_status, MPIU_INT, nvals));
655563fb871SSatish Balay       flg_v[2 * recv_status.MPI_SOURCE + 1] = i / 2;
656563fb871SSatish Balay       *nvals                                = *nvals / 2; /* This message has both row indices and col indices */
657bc5ccf88SSatish Balay     }
658a2d1c673SSatish Balay 
659cb2b73ccSBarry Smith     /* Check if we have both messages from this proc */
660c1dc657dSBarry Smith     i1 = flg_v[2 * recv_status.MPI_SOURCE];
661c1dc657dSBarry Smith     i2 = flg_v[2 * recv_status.MPI_SOURCE + 1];
662a2d1c673SSatish Balay     if (i1 != -1 && i2 != -1) {
663563fb871SSatish Balay       *rows = stash->rindices[i2];
664a2d1c673SSatish Balay       *cols = *rows + *nvals;
665563fb871SSatish Balay       *vals = stash->rvalues[i1];
666a2d1c673SSatish Balay       *flg  = 1;
667a2d1c673SSatish Balay       stash->nprocessed++;
66835d8aa7fSBarry Smith       match_found = PETSC_TRUE;
669bc5ccf88SSatish Balay     }
670bc5ccf88SSatish Balay   }
6713ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
672bc5ccf88SSatish Balay }
673d7d60843SJed Brown 
674e69ee1f7SSatish Balay #if !defined(PETSC_HAVE_MPIUNI)
675d7d60843SJed Brown typedef struct {
676d7d60843SJed Brown   PetscInt    row;
677d7d60843SJed Brown   PetscInt    col;
678d7d60843SJed Brown   PetscScalar vals[1]; /* Actually an array of length bs2 */
679d7d60843SJed Brown } MatStashBlock;
680d7d60843SJed Brown 
681d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashSortCompress_Private(MatStash *stash, InsertMode insertmode)
682d71ae5a4SJacob Faibussowitsch {
683d7d60843SJed Brown   PetscMatStashSpace space;
684d7d60843SJed Brown   PetscInt           n = stash->n, bs = stash->bs, bs2 = bs * bs, cnt, *row, *col, *perm, rowstart, i;
685d7d60843SJed Brown   PetscScalar      **valptr;
686d7d60843SJed Brown 
687d7d60843SJed Brown   PetscFunctionBegin;
6889566063dSJacob Faibussowitsch   PetscCall(PetscMalloc4(n, &row, n, &col, n, &valptr, n, &perm));
689d7d60843SJed Brown   for (space = stash->space_head, cnt = 0; space; space = space->next) {
690d7d60843SJed Brown     for (i = 0; i < space->local_used; i++) {
691d7d60843SJed Brown       row[cnt]    = space->idx[i];
692d7d60843SJed Brown       col[cnt]    = space->idy[i];
693d7d60843SJed Brown       valptr[cnt] = &space->val[i * bs2];
694d7d60843SJed Brown       perm[cnt]   = cnt; /* Will tell us where to find valptr after sorting row[] and col[] */
695d7d60843SJed Brown       cnt++;
696d7d60843SJed Brown     }
697d7d60843SJed Brown   }
69808401ef6SPierre Jolivet   PetscCheck(cnt == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatStash n %" PetscInt_FMT ", but counted %" PetscInt_FMT " entries", n, cnt);
6999566063dSJacob Faibussowitsch   PetscCall(PetscSortIntWithArrayPair(n, row, col, perm));
700d7d60843SJed Brown   /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */
701d7d60843SJed Brown   for (rowstart = 0, cnt = 0, i = 1; i <= n; i++) {
702d7d60843SJed Brown     if (i == n || row[i] != row[rowstart]) { /* Sort the last row. */
703d7d60843SJed Brown       PetscInt colstart;
7049566063dSJacob Faibussowitsch       PetscCall(PetscSortIntWithArray(i - rowstart, &col[rowstart], &perm[rowstart]));
705d7d60843SJed Brown       for (colstart = rowstart; colstart < i;) { /* Compress multiple insertions to the same location */
706d7d60843SJed Brown         PetscInt       j, l;
707d7d60843SJed Brown         MatStashBlock *block;
7089566063dSJacob Faibussowitsch         PetscCall(PetscSegBufferGet(stash->segsendblocks, 1, &block));
709d7d60843SJed Brown         block->row = row[rowstart];
710d7d60843SJed Brown         block->col = col[colstart];
7119566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(block->vals, valptr[perm[colstart]], bs2));
712d7d60843SJed Brown         for (j = colstart + 1; j < i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */
713d7d60843SJed Brown           if (insertmode == ADD_VALUES) {
714d7d60843SJed Brown             for (l = 0; l < bs2; l++) block->vals[l] += valptr[perm[j]][l];
715d7d60843SJed Brown           } else {
7169566063dSJacob Faibussowitsch             PetscCall(PetscArraycpy(block->vals, valptr[perm[j]], bs2));
717d7d60843SJed Brown           }
718d7d60843SJed Brown         }
719d7d60843SJed Brown         colstart = j;
720d7d60843SJed Brown       }
721d7d60843SJed Brown       rowstart = i;
722d7d60843SJed Brown     }
723d7d60843SJed Brown   }
7249566063dSJacob Faibussowitsch   PetscCall(PetscFree4(row, col, valptr, perm));
7253ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
726d7d60843SJed Brown }
727d7d60843SJed Brown 
728d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash)
729d71ae5a4SJacob Faibussowitsch {
730d7d60843SJed Brown   PetscFunctionBegin;
731d7d60843SJed Brown   if (stash->blocktype == MPI_DATATYPE_NULL) {
732d7d60843SJed Brown     PetscInt     bs2 = PetscSqr(stash->bs);
733d7d60843SJed Brown     PetscMPIInt  blocklens[2];
734d7d60843SJed Brown     MPI_Aint     displs[2];
735d7d60843SJed Brown     MPI_Datatype types[2], stype;
736223b4c07SBarry Smith     /*
737223b4c07SBarry Smith         DummyBlock is a type having standard layout, even when PetscScalar is C++ std::complex.
738f41aa5efSJunchao Zhang        std::complex itself has standard layout, so does DummyBlock, recursively.
739a5b23f4aSJose E. Roman        To be compatible with C++ std::complex, complex implementations on GPUs must also have standard layout,
740f41aa5efSJunchao Zhang        though they can have different alignment, e.g, 16 bytes for double complex, instead of 8 bytes as in GCC stdlibc++.
741f41aa5efSJunchao Zhang        offsetof(type, member) only requires type to have standard layout. Ref. https://en.cppreference.com/w/cpp/types/offsetof.
742f41aa5efSJunchao Zhang 
743f41aa5efSJunchao Zhang        We can test if std::complex has standard layout with the following code:
744f41aa5efSJunchao Zhang        #include <iostream>
745f41aa5efSJunchao Zhang        #include <type_traits>
746f41aa5efSJunchao Zhang        #include <complex>
747f41aa5efSJunchao Zhang        int main() {
748f41aa5efSJunchao Zhang          std::cout << std::boolalpha;
749f41aa5efSJunchao Zhang          std::cout << std::is_standard_layout<std::complex<double> >::value << '\n';
750f41aa5efSJunchao Zhang        }
751f41aa5efSJunchao Zhang        Output: true
7529503c6c6SJed Brown      */
7539371c9d4SSatish Balay     struct DummyBlock {
7549371c9d4SSatish Balay       PetscInt    row, col;
7559371c9d4SSatish Balay       PetscScalar vals;
7569371c9d4SSatish Balay     };
757d7d60843SJed Brown 
7589503c6c6SJed Brown     stash->blocktype_size = offsetof(struct DummyBlock, vals) + bs2 * sizeof(PetscScalar);
759d7d60843SJed Brown     if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */
760d7d60843SJed Brown       stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt);
761d7d60843SJed Brown     }
7629566063dSJacob Faibussowitsch     PetscCall(PetscSegBufferCreate(stash->blocktype_size, 1, &stash->segsendblocks));
7639566063dSJacob Faibussowitsch     PetscCall(PetscSegBufferCreate(stash->blocktype_size, 1, &stash->segrecvblocks));
7649566063dSJacob Faibussowitsch     PetscCall(PetscSegBufferCreate(sizeof(MatStashFrame), 1, &stash->segrecvframe));
765d7d60843SJed Brown     blocklens[0] = 2;
766d7d60843SJed Brown     blocklens[1] = bs2;
7679503c6c6SJed Brown     displs[0]    = offsetof(struct DummyBlock, row);
7689503c6c6SJed Brown     displs[1]    = offsetof(struct DummyBlock, vals);
769d7d60843SJed Brown     types[0]     = MPIU_INT;
770d7d60843SJed Brown     types[1]     = MPIU_SCALAR;
7719566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Type_create_struct(2, blocklens, displs, types, &stype));
7729566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Type_commit(&stype));
7739566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Type_create_resized(stype, 0, stash->blocktype_size, &stash->blocktype));
7749566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Type_commit(&stash->blocktype));
7759566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Type_free(&stype));
776d7d60843SJed Brown   }
7773ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
778d7d60843SJed Brown }
779d7d60843SJed Brown 
780da81f932SPierre Jolivet /* Callback invoked after target rank has initiated receive of rendezvous message.
781d7d60843SJed Brown  * Here we post the main sends.
782d7d60843SJed Brown  */
783d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm, const PetscMPIInt tag[], PetscMPIInt rankid, PetscMPIInt rank, void *sdata, MPI_Request req[], void *ctx)
784d71ae5a4SJacob Faibussowitsch {
785d7d60843SJed Brown   MatStash       *stash = (MatStash *)ctx;
786d7d60843SJed Brown   MatStashHeader *hdr   = (MatStashHeader *)sdata;
787d7d60843SJed Brown 
788d7d60843SJed Brown   PetscFunctionBegin;
78908401ef6SPierre Jolivet   PetscCheck(rank == stash->sendranks[rankid], comm, PETSC_ERR_PLIB, "BTS Send rank %d does not match sendranks[%d] %d", rank, rankid, stash->sendranks[rankid]);
7909566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Isend(stash->sendframes[rankid].buffer, hdr->count, stash->blocktype, rank, tag[0], comm, &req[0]));
791d7d60843SJed Brown   stash->sendframes[rankid].count   = hdr->count;
792d7d60843SJed Brown   stash->sendframes[rankid].pending = 1;
7933ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
794d7d60843SJed Brown }
795d7d60843SJed Brown 
796223b4c07SBarry Smith /*
797223b4c07SBarry Smith     Callback invoked by target after receiving rendezvous message.
798223b4c07SBarry Smith     Here we post the main recvs.
799d7d60843SJed Brown  */
800d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm, const PetscMPIInt tag[], PetscMPIInt rank, void *rdata, MPI_Request req[], void *ctx)
801d71ae5a4SJacob Faibussowitsch {
802d7d60843SJed Brown   MatStash       *stash = (MatStash *)ctx;
803d7d60843SJed Brown   MatStashHeader *hdr   = (MatStashHeader *)rdata;
804d7d60843SJed Brown   MatStashFrame  *frame;
805d7d60843SJed Brown 
806d7d60843SJed Brown   PetscFunctionBegin;
8079566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferGet(stash->segrecvframe, 1, &frame));
8089566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferGet(stash->segrecvblocks, hdr->count, &frame->buffer));
8099566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Irecv(frame->buffer, hdr->count, stash->blocktype, rank, tag[0], comm, &req[0]));
810d7d60843SJed Brown   frame->count   = hdr->count;
811d7d60843SJed Brown   frame->pending = 1;
8123ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
813d7d60843SJed Brown }
814d7d60843SJed Brown 
815d7d60843SJed Brown /*
816d7d60843SJed Brown  * owners[] contains the ownership ranges; may be indexed by either blocks or scalars
817d7d60843SJed Brown  */
818d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashScatterBegin_BTS(Mat mat, MatStash *stash, PetscInt owners[])
819d71ae5a4SJacob Faibussowitsch {
820d7d60843SJed Brown   size_t nblocks;
821d7d60843SJed Brown   char  *sendblocks;
822d7d60843SJed Brown 
823d7d60843SJed Brown   PetscFunctionBegin;
82476bd3646SJed Brown   if (PetscDefined(USE_DEBUG)) { /* make sure all processors are either in INSERTMODE or ADDMODE */
8254b4eb8d3SJed Brown     InsertMode addv;
8261c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
82708401ef6SPierre Jolivet     PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
8284b4eb8d3SJed Brown   }
8294b4eb8d3SJed Brown 
8309566063dSJacob Faibussowitsch   PetscCall(MatStashBlockTypeSetUp(stash));
8319566063dSJacob Faibussowitsch   PetscCall(MatStashSortCompress_Private(stash, mat->insertmode));
8329566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferGetSize(stash->segsendblocks, &nblocks));
8339566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferExtractInPlace(stash->segsendblocks, &sendblocks));
83460f34548SJunchao Zhang   if (stash->first_assembly_done) { /* Set up sendhdrs and sendframes for each rank that we sent before */
83523b7d1baSJed Brown     PetscInt i;
83623b7d1baSJed Brown     size_t   b;
83797da8949SJed Brown     for (i = 0, b = 0; i < stash->nsendranks; i++) {
83897da8949SJed Brown       stash->sendframes[i].buffer = &sendblocks[b * stash->blocktype_size];
83997da8949SJed Brown       /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */
84097da8949SJed Brown       stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */
84197da8949SJed Brown       for (; b < nblocks; b++) {
84297da8949SJed Brown         MatStashBlock *sendblock_b = (MatStashBlock *)&sendblocks[b * stash->blocktype_size];
843aed4548fSBarry Smith         PetscCheck(sendblock_b->row >= owners[stash->sendranks[i]], stash->comm, PETSC_ERR_ARG_WRONG, "MAT_SUBSET_OFF_PROC_ENTRIES set, but row %" PetscInt_FMT " owned by %d not communicated in initial assembly", sendblock_b->row, stash->sendranks[i]);
84497da8949SJed Brown         if (sendblock_b->row >= owners[stash->sendranks[i] + 1]) break;
84597da8949SJed Brown         stash->sendhdr[i].count++;
84697da8949SJed Brown       }
84797da8949SJed Brown     }
84897da8949SJed Brown   } else { /* Dynamically count and pack (first time) */
84923b7d1baSJed Brown     PetscInt sendno;
85023b7d1baSJed Brown     size_t   i, rowstart;
851d7d60843SJed Brown 
852d7d60843SJed Brown     /* Count number of send ranks and allocate for sends */
853d7d60843SJed Brown     stash->nsendranks = 0;
854d7d60843SJed Brown     for (rowstart = 0; rowstart < nblocks;) {
8557e2ea869SJed Brown       PetscInt       owner;
856d7d60843SJed Brown       MatStashBlock *sendblock_rowstart = (MatStashBlock *)&sendblocks[rowstart * stash->blocktype_size];
8579566063dSJacob Faibussowitsch       PetscCall(PetscFindInt(sendblock_rowstart->row, stash->size + 1, owners, &owner));
858d7d60843SJed Brown       if (owner < 0) owner = -(owner + 2);
859d7d60843SJed Brown       for (i = rowstart + 1; i < nblocks; i++) { /* Move forward through a run of blocks with the same owner */
860d7d60843SJed Brown         MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size];
8617e2ea869SJed Brown         if (sendblock_i->row >= owners[owner + 1]) break;
862d7d60843SJed Brown       }
863d7d60843SJed Brown       stash->nsendranks++;
864d7d60843SJed Brown       rowstart = i;
865d7d60843SJed Brown     }
8669566063dSJacob Faibussowitsch     PetscCall(PetscMalloc3(stash->nsendranks, &stash->sendranks, stash->nsendranks, &stash->sendhdr, stash->nsendranks, &stash->sendframes));
867d7d60843SJed Brown 
868d7d60843SJed Brown     /* Set up sendhdrs and sendframes */
869d7d60843SJed Brown     sendno = 0;
870d7d60843SJed Brown     for (rowstart = 0; rowstart < nblocks;) {
871d7d60843SJed Brown       PetscInt       owner;
872d7d60843SJed Brown       MatStashBlock *sendblock_rowstart = (MatStashBlock *)&sendblocks[rowstart * stash->blocktype_size];
8739566063dSJacob Faibussowitsch       PetscCall(PetscFindInt(sendblock_rowstart->row, stash->size + 1, owners, &owner));
874d7d60843SJed Brown       if (owner < 0) owner = -(owner + 2);
875d7d60843SJed Brown       stash->sendranks[sendno] = owner;
876d7d60843SJed Brown       for (i = rowstart + 1; i < nblocks; i++) { /* Move forward through a run of blocks with the same owner */
877d7d60843SJed Brown         MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size];
8787e2ea869SJed Brown         if (sendblock_i->row >= owners[owner + 1]) break;
879d7d60843SJed Brown       }
880d7d60843SJed Brown       stash->sendframes[sendno].buffer  = sendblock_rowstart;
881d7d60843SJed Brown       stash->sendframes[sendno].pending = 0;
882d7d60843SJed Brown       stash->sendhdr[sendno].count      = i - rowstart;
883d7d60843SJed Brown       sendno++;
884d7d60843SJed Brown       rowstart = i;
885d7d60843SJed Brown     }
88608401ef6SPierre Jolivet     PetscCheck(sendno == stash->nsendranks, stash->comm, PETSC_ERR_PLIB, "BTS counted %d sendranks, but %" PetscInt_FMT " sends", stash->nsendranks, sendno);
887d7d60843SJed Brown   }
888d7d60843SJed Brown 
8894b4eb8d3SJed Brown   /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new
8904b4eb8d3SJed Brown    * message or a dummy entry of some sort. */
8914b4eb8d3SJed Brown   if (mat->insertmode == INSERT_VALUES) {
89223b7d1baSJed Brown     size_t i;
8934b4eb8d3SJed Brown     for (i = 0; i < nblocks; i++) {
8944b4eb8d3SJed Brown       MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size];
8954b4eb8d3SJed Brown       sendblock_i->row           = -(sendblock_i->row + 1);
8964b4eb8d3SJed Brown     }
8974b4eb8d3SJed Brown   }
8984b4eb8d3SJed Brown 
89960f34548SJunchao Zhang   if (stash->first_assembly_done) {
90097da8949SJed Brown     PetscMPIInt i, tag;
9019566063dSJacob Faibussowitsch     PetscCall(PetscCommGetNewTag(stash->comm, &tag));
90248a46eb9SPierre Jolivet     for (i = 0; i < stash->nrecvranks; i++) PetscCall(MatStashBTSRecv_Private(stash->comm, &tag, stash->recvranks[i], &stash->recvhdr[i], &stash->recvreqs[i], stash));
90348a46eb9SPierre Jolivet     for (i = 0; i < stash->nsendranks; i++) PetscCall(MatStashBTSSend_Private(stash->comm, &tag, i, stash->sendranks[i], &stash->sendhdr[i], &stash->sendreqs[i], stash));
90497da8949SJed Brown     stash->use_status = PETSC_TRUE; /* Use count from message status. */
90597da8949SJed Brown   } else {
9069371c9d4SSatish Balay     PetscCall(PetscCommBuildTwoSidedFReq(stash->comm, 1, MPIU_INT, stash->nsendranks, stash->sendranks, (PetscInt *)stash->sendhdr, &stash->nrecvranks, &stash->recvranks, (PetscInt *)&stash->recvhdr, 1, &stash->sendreqs, &stash->recvreqs, MatStashBTSSend_Private, MatStashBTSRecv_Private, stash));
9079566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(stash->nrecvranks, &stash->some_indices, stash->nrecvranks, &stash->some_statuses));
90897da8949SJed Brown     stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */
90997da8949SJed Brown   }
910d7d60843SJed Brown 
9119566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferExtractInPlace(stash->segrecvframe, &stash->recvframes));
912d7d60843SJed Brown   stash->recvframe_active    = NULL;
913d7d60843SJed Brown   stash->recvframe_i         = 0;
914d7d60843SJed Brown   stash->some_i              = 0;
915d7d60843SJed Brown   stash->some_count          = 0;
916d7d60843SJed Brown   stash->recvcount           = 0;
91760f34548SJunchao Zhang   stash->first_assembly_done = mat->assembly_subset; /* See the same logic in VecAssemblyBegin_MPI_BTS */
9184b4eb8d3SJed Brown   stash->insertmode          = &mat->insertmode;
9193ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
920d7d60843SJed Brown }
921d7d60843SJed Brown 
922d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash, PetscMPIInt *n, PetscInt **row, PetscInt **col, PetscScalar **val, PetscInt *flg)
923d71ae5a4SJacob Faibussowitsch {
924d7d60843SJed Brown   MatStashBlock *block;
925d7d60843SJed Brown 
926d7d60843SJed Brown   PetscFunctionBegin;
927d7d60843SJed Brown   *flg = 0;
928d7d60843SJed Brown   while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) {
929d7d60843SJed Brown     if (stash->some_i == stash->some_count) {
9303ba16761SJacob Faibussowitsch       if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(PETSC_SUCCESS); /* Done */
9319566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Waitsome(stash->nrecvranks, stash->recvreqs, &stash->some_count, stash->some_indices, stash->use_status ? stash->some_statuses : MPI_STATUSES_IGNORE));
932d7d60843SJed Brown       stash->some_i = 0;
933d7d60843SJed Brown     }
934d7d60843SJed Brown     stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]];
935d7d60843SJed Brown     stash->recvframe_count  = stash->recvframe_active->count; /* From header; maximum count */
936d7d60843SJed Brown     if (stash->use_status) {                                  /* Count what was actually sent */
9379566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Get_count(&stash->some_statuses[stash->some_i], stash->blocktype, &stash->recvframe_count));
938d7d60843SJed Brown     }
9394b4eb8d3SJed Brown     if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */
9404b4eb8d3SJed Brown       block = (MatStashBlock *)&((char *)stash->recvframe_active->buffer)[0];
9414b4eb8d3SJed Brown       if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES;
942aed4548fSBarry Smith       PetscCheck(*stash->insertmode != INSERT_VALUES || block->row < 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Assembling INSERT_VALUES, but rank %d requested ADD_VALUES", stash->recvranks[stash->some_indices[stash->some_i]]);
943aed4548fSBarry Smith       PetscCheck(*stash->insertmode != ADD_VALUES || block->row >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Assembling ADD_VALUES, but rank %d requested INSERT_VALUES", stash->recvranks[stash->some_indices[stash->some_i]]);
9444b4eb8d3SJed Brown     }
945d7d60843SJed Brown     stash->some_i++;
946d7d60843SJed Brown     stash->recvcount++;
947d7d60843SJed Brown     stash->recvframe_i = 0;
948d7d60843SJed Brown   }
949d7d60843SJed Brown   *n    = 1;
950d7d60843SJed Brown   block = (MatStashBlock *)&((char *)stash->recvframe_active->buffer)[stash->recvframe_i * stash->blocktype_size];
9514b4eb8d3SJed Brown   if (block->row < 0) block->row = -(block->row + 1);
952d7d60843SJed Brown   *row = &block->row;
953d7d60843SJed Brown   *col = &block->col;
954d7d60843SJed Brown   *val = block->vals;
955d7d60843SJed Brown   stash->recvframe_i++;
956d7d60843SJed Brown   *flg = 1;
9573ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
958d7d60843SJed Brown }
959d7d60843SJed Brown 
960d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash)
961d71ae5a4SJacob Faibussowitsch {
962d7d60843SJed Brown   PetscFunctionBegin;
9639566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Waitall(stash->nsendranks, stash->sendreqs, MPI_STATUSES_IGNORE));
96460f34548SJunchao Zhang   if (stash->first_assembly_done) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks  */
965d2b3fd65SBarry Smith     PetscCall(PetscSegBufferExtractInPlace(stash->segrecvblocks, NULL));
9663575f486SJed Brown   } else { /* No reuse, so collect everything. */
9679566063dSJacob Faibussowitsch     PetscCall(MatStashScatterDestroy_BTS(stash));
96897da8949SJed Brown   }
969d7d60843SJed Brown 
970d7d60843SJed Brown   /* Now update nmaxold to be app 10% more than max n used, this way the
971d7d60843SJed Brown      wastage of space is reduced the next time this stash is used.
972d7d60843SJed Brown      Also update the oldmax, only if it increases */
973d7d60843SJed Brown   if (stash->n) {
974d7d60843SJed Brown     PetscInt bs2     = stash->bs * stash->bs;
975d7d60843SJed Brown     PetscInt oldnmax = ((int)(stash->n * 1.1) + 5) * bs2;
976d7d60843SJed Brown     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
977d7d60843SJed Brown   }
978d7d60843SJed Brown 
979d7d60843SJed Brown   stash->nmax       = 0;
980d7d60843SJed Brown   stash->n          = 0;
981d7d60843SJed Brown   stash->reallocs   = -1;
982d7d60843SJed Brown   stash->nprocessed = 0;
983d7d60843SJed Brown 
9849566063dSJacob Faibussowitsch   PetscCall(PetscMatStashSpaceDestroy(&stash->space_head));
985d7d60843SJed Brown 
986f4259b30SLisandro Dalcin   stash->space = NULL;
987d7d60843SJed Brown 
9883ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
989d7d60843SJed Brown }
990d7d60843SJed Brown 
991d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash)
992d71ae5a4SJacob Faibussowitsch {
993d7d60843SJed Brown   PetscFunctionBegin;
9949566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferDestroy(&stash->segsendblocks));
9959566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferDestroy(&stash->segrecvframe));
996d7d60843SJed Brown   stash->recvframes = NULL;
9979566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferDestroy(&stash->segrecvblocks));
99848a46eb9SPierre Jolivet   if (stash->blocktype != MPI_DATATYPE_NULL) PetscCallMPI(MPI_Type_free(&stash->blocktype));
999d7d60843SJed Brown   stash->nsendranks = 0;
1000d7d60843SJed Brown   stash->nrecvranks = 0;
10019566063dSJacob Faibussowitsch   PetscCall(PetscFree3(stash->sendranks, stash->sendhdr, stash->sendframes));
10029566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->sendreqs));
10039566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->recvreqs));
10049566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->recvranks));
10059566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->recvhdr));
10069566063dSJacob Faibussowitsch   PetscCall(PetscFree2(stash->some_indices, stash->some_statuses));
10073ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1008d7d60843SJed Brown }
10091667be42SBarry Smith #endif
1010