xref: /petsc/src/mat/utils/matstash.c (revision d71ae5a4db6382e7f06317b8d368875286fe9008)
12d5177cdSBarry Smith 
2af0996ceSBarry Smith #include <petsc/private/matimpl.h>
35bd3b8fbSHong Zhang 
4bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE 10000
54c1ff481SSatish Balay 
6ac2b2aa0SJed Brown static PetscErrorCode       MatStashScatterBegin_Ref(Mat, MatStash *, PetscInt *);
7d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
8d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *);
975d39b6aSBarry Smith #if !defined(PETSC_HAVE_MPIUNI)
10d7d60843SJed Brown static PetscErrorCode MatStashScatterBegin_BTS(Mat, MatStash *, PetscInt *);
11d7d60843SJed Brown static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
12d7d60843SJed Brown static PetscErrorCode MatStashScatterEnd_BTS(MatStash *);
1375d39b6aSBarry Smith #endif
14d7d60843SJed Brown 
159417f4adSLois Curfman McInnes /*
168798bf22SSatish Balay   MatStashCreate_Private - Creates a stash,currently used for all the parallel
174c1ff481SSatish Balay   matrix implementations. The stash is where elements of a matrix destined
184c1ff481SSatish Balay   to be stored on other processors are kept until matrix assembly is done.
199417f4adSLois Curfman McInnes 
204c1ff481SSatish Balay   This is a simple minded stash. Simply adds entries to end of stash.
214c1ff481SSatish Balay 
224c1ff481SSatish Balay   Input Parameters:
234c1ff481SSatish Balay   comm - communicator, required for scatters.
244c1ff481SSatish Balay   bs   - stash block size. used when stashing blocks of values
254c1ff481SSatish Balay 
264c1ff481SSatish Balay   Output Parameters:
274c1ff481SSatish Balay   stash    - the newly created stash
289417f4adSLois Curfman McInnes */
29*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashCreate_Private(MPI_Comm comm, PetscInt bs, MatStash *stash)
30*d71ae5a4SJacob Faibussowitsch {
31533163c2SBarry Smith   PetscInt  max, *opt, nopt, i;
32ace3abfcSBarry Smith   PetscBool flg;
33bc5ccf88SSatish Balay 
343a40ed3dSBarry Smith   PetscFunctionBegin;
35bc5ccf88SSatish Balay   /* Require 2 tags,get the second using PetscCommGetNewTag() */
36752ec6e0SSatish Balay   stash->comm = comm;
378865f1eaSKarl Rupp 
389566063dSJacob Faibussowitsch   PetscCall(PetscCommGetNewTag(stash->comm, &stash->tag1));
399566063dSJacob Faibussowitsch   PetscCall(PetscCommGetNewTag(stash->comm, &stash->tag2));
409566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(stash->comm, &stash->size));
419566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(stash->comm, &stash->rank));
429566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(2 * stash->size, &stash->flg_v));
43533163c2SBarry Smith   for (i = 0; i < 2 * stash->size; i++) stash->flg_v[i] = -1;
44533163c2SBarry Smith 
45434d7ff9SSatish Balay   nopt = stash->size;
469566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nopt, &opt));
479566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetIntArray(NULL, NULL, "-matstash_initial_size", opt, &nopt, &flg));
48434d7ff9SSatish Balay   if (flg) {
49434d7ff9SSatish Balay     if (nopt == 1) max = opt[0];
50434d7ff9SSatish Balay     else if (nopt == stash->size) max = opt[stash->rank];
51434d7ff9SSatish Balay     else if (stash->rank < nopt) max = opt[stash->rank];
52f4ab19daSSatish Balay     else max = 0; /* Use default */
53434d7ff9SSatish Balay     stash->umax = max;
54434d7ff9SSatish Balay   } else {
55434d7ff9SSatish Balay     stash->umax = 0;
56434d7ff9SSatish Balay   }
579566063dSJacob Faibussowitsch   PetscCall(PetscFree(opt));
584c1ff481SSatish Balay   if (bs <= 0) bs = 1;
59a2d1c673SSatish Balay 
604c1ff481SSatish Balay   stash->bs         = bs;
619417f4adSLois Curfman McInnes   stash->nmax       = 0;
62434d7ff9SSatish Balay   stash->oldnmax    = 0;
639417f4adSLois Curfman McInnes   stash->n          = 0;
644c1ff481SSatish Balay   stash->reallocs   = -1;
65f4259b30SLisandro Dalcin   stash->space_head = NULL;
66f4259b30SLisandro Dalcin   stash->space      = NULL;
679417f4adSLois Curfman McInnes 
68f4259b30SLisandro Dalcin   stash->send_waits  = NULL;
69f4259b30SLisandro Dalcin   stash->recv_waits  = NULL;
70f4259b30SLisandro Dalcin   stash->send_status = NULL;
71bc5ccf88SSatish Balay   stash->nsends      = 0;
72bc5ccf88SSatish Balay   stash->nrecvs      = 0;
73f4259b30SLisandro Dalcin   stash->svalues     = NULL;
74f4259b30SLisandro Dalcin   stash->rvalues     = NULL;
75f4259b30SLisandro Dalcin   stash->rindices    = NULL;
76a2d1c673SSatish Balay   stash->nprocessed  = 0;
7767318a8aSJed Brown   stash->reproduce   = PETSC_FALSE;
78d7d60843SJed Brown   stash->blocktype   = MPI_DATATYPE_NULL;
798865f1eaSKarl Rupp 
809566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(NULL, NULL, "-matstash_reproduce", &stash->reproduce, NULL));
811667be42SBarry Smith #if !defined(PETSC_HAVE_MPIUNI)
82ca723aa4SStefano Zampini   flg = PETSC_FALSE;
839566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(NULL, NULL, "-matstash_legacy", &flg, NULL));
84b30fb036SBarry Smith   if (!flg) {
85d7d60843SJed Brown     stash->ScatterBegin   = MatStashScatterBegin_BTS;
86d7d60843SJed Brown     stash->ScatterGetMesg = MatStashScatterGetMesg_BTS;
87d7d60843SJed Brown     stash->ScatterEnd     = MatStashScatterEnd_BTS;
88d7d60843SJed Brown     stash->ScatterDestroy = MatStashScatterDestroy_BTS;
89ac2b2aa0SJed Brown   } else {
901667be42SBarry Smith #endif
91ac2b2aa0SJed Brown     stash->ScatterBegin   = MatStashScatterBegin_Ref;
92ac2b2aa0SJed Brown     stash->ScatterGetMesg = MatStashScatterGetMesg_Ref;
93ac2b2aa0SJed Brown     stash->ScatterEnd     = MatStashScatterEnd_Ref;
94ac2b2aa0SJed Brown     stash->ScatterDestroy = NULL;
951667be42SBarry Smith #if !defined(PETSC_HAVE_MPIUNI)
96ac2b2aa0SJed Brown   }
971667be42SBarry Smith #endif
983a40ed3dSBarry Smith   PetscFunctionReturn(0);
999417f4adSLois Curfman McInnes }
1009417f4adSLois Curfman McInnes 
1014c1ff481SSatish Balay /*
1028798bf22SSatish Balay    MatStashDestroy_Private - Destroy the stash
1034c1ff481SSatish Balay */
104*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashDestroy_Private(MatStash *stash)
105*d71ae5a4SJacob Faibussowitsch {
106bc5ccf88SSatish Balay   PetscFunctionBegin;
1079566063dSJacob Faibussowitsch   PetscCall(PetscMatStashSpaceDestroy(&stash->space_head));
1089566063dSJacob Faibussowitsch   if (stash->ScatterDestroy) PetscCall((*stash->ScatterDestroy)(stash));
1098865f1eaSKarl Rupp 
110f4259b30SLisandro Dalcin   stash->space = NULL;
1118865f1eaSKarl Rupp 
1129566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->flg_v));
113bc5ccf88SSatish Balay   PetscFunctionReturn(0);
114bc5ccf88SSatish Balay }
115bc5ccf88SSatish Balay 
1164c1ff481SSatish Balay /*
11767318a8aSJed Brown    MatStashScatterEnd_Private - This is called as the final stage of
1184c1ff481SSatish Balay    scatter. The final stages of message passing is done here, and
11967318a8aSJed Brown    all the memory used for message passing is cleaned up. This
1204c1ff481SSatish Balay    routine also resets the stash, and deallocates the memory used
1214c1ff481SSatish Balay    for the stash. It also keeps track of the current memory usage
1224c1ff481SSatish Balay    so that the same value can be used the next time through.
1234c1ff481SSatish Balay */
124*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
125*d71ae5a4SJacob Faibussowitsch {
126ac2b2aa0SJed Brown   PetscFunctionBegin;
1279566063dSJacob Faibussowitsch   PetscCall((*stash->ScatterEnd)(stash));
128ac2b2aa0SJed Brown   PetscFunctionReturn(0);
129ac2b2aa0SJed Brown }
130ac2b2aa0SJed Brown 
131*d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash)
132*d71ae5a4SJacob Faibussowitsch {
133533163c2SBarry Smith   PetscInt    nsends = stash->nsends, bs2, oldnmax, i;
134a2d1c673SSatish Balay   MPI_Status *send_status;
135a2d1c673SSatish Balay 
1363a40ed3dSBarry Smith   PetscFunctionBegin;
137533163c2SBarry Smith   for (i = 0; i < 2 * stash->size; i++) stash->flg_v[i] = -1;
138a2d1c673SSatish Balay   /* wait on sends */
139a2d1c673SSatish Balay   if (nsends) {
1409566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(2 * nsends, &send_status));
1419566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Waitall(2 * nsends, stash->send_waits, send_status));
1429566063dSJacob Faibussowitsch     PetscCall(PetscFree(send_status));
143a2d1c673SSatish Balay   }
144a2d1c673SSatish Balay 
145c0c58ca7SSatish Balay   /* Now update nmaxold to be app 10% more than max n used, this way the
146434d7ff9SSatish Balay      wastage of space is reduced the next time this stash is used.
147434d7ff9SSatish Balay      Also update the oldmax, only if it increases */
148b9b97703SBarry Smith   if (stash->n) {
14994b769a5SSatish Balay     bs2     = stash->bs * stash->bs;
1508a9378f0SSatish Balay     oldnmax = ((int)(stash->n * 1.1) + 5) * bs2;
151434d7ff9SSatish Balay     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
152b9b97703SBarry Smith   }
153434d7ff9SSatish Balay 
154d07ff455SSatish Balay   stash->nmax       = 0;
155d07ff455SSatish Balay   stash->n          = 0;
1564c1ff481SSatish Balay   stash->reallocs   = -1;
157a2d1c673SSatish Balay   stash->nprocessed = 0;
1588865f1eaSKarl Rupp 
1599566063dSJacob Faibussowitsch   PetscCall(PetscMatStashSpaceDestroy(&stash->space_head));
1608865f1eaSKarl Rupp 
161f4259b30SLisandro Dalcin   stash->space = NULL;
1628865f1eaSKarl Rupp 
1639566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->send_waits));
1649566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->recv_waits));
1659566063dSJacob Faibussowitsch   PetscCall(PetscFree2(stash->svalues, stash->sindices));
1669566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->rvalues[0]));
1679566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->rvalues));
1689566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->rindices[0]));
1699566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->rindices));
1703a40ed3dSBarry Smith   PetscFunctionReturn(0);
1719417f4adSLois Curfman McInnes }
1729417f4adSLois Curfman McInnes 
1734c1ff481SSatish Balay /*
1746aad120cSJose E. Roman    MatStashGetInfo_Private - Gets the relevant statistics of the stash
1754c1ff481SSatish Balay 
1764c1ff481SSatish Balay    Input Parameters:
1774c1ff481SSatish Balay    stash    - the stash
17894b769a5SSatish Balay    nstash   - the size of the stash. Indicates the number of values stored.
1794c1ff481SSatish Balay    reallocs - the number of additional mallocs incurred.
1804c1ff481SSatish Balay 
1814c1ff481SSatish Balay */
182*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashGetInfo_Private(MatStash *stash, PetscInt *nstash, PetscInt *reallocs)
183*d71ae5a4SJacob Faibussowitsch {
184c1ac3661SBarry Smith   PetscInt bs2 = stash->bs * stash->bs;
18594b769a5SSatish Balay 
1863a40ed3dSBarry Smith   PetscFunctionBegin;
1871ecfd215SBarry Smith   if (nstash) *nstash = stash->n * bs2;
1881ecfd215SBarry Smith   if (reallocs) {
189434d7ff9SSatish Balay     if (stash->reallocs < 0) *reallocs = 0;
190434d7ff9SSatish Balay     else *reallocs = stash->reallocs;
1911ecfd215SBarry Smith   }
192bc5ccf88SSatish Balay   PetscFunctionReturn(0);
193bc5ccf88SSatish Balay }
1944c1ff481SSatish Balay 
1954c1ff481SSatish Balay /*
1968798bf22SSatish Balay    MatStashSetInitialSize_Private - Sets the initial size of the stash
1974c1ff481SSatish Balay 
1984c1ff481SSatish Balay    Input Parameters:
1994c1ff481SSatish Balay    stash  - the stash
2004c1ff481SSatish Balay    max    - the value that is used as the max size of the stash.
2014c1ff481SSatish Balay             this value is used while allocating memory.
2024c1ff481SSatish Balay */
203*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash, PetscInt max)
204*d71ae5a4SJacob Faibussowitsch {
205bc5ccf88SSatish Balay   PetscFunctionBegin;
206434d7ff9SSatish Balay   stash->umax = max;
2073a40ed3dSBarry Smith   PetscFunctionReturn(0);
20897530c3fSBarry Smith }
20997530c3fSBarry Smith 
2108798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called
2114c1ff481SSatish Balay    when the space in the stash is not sufficient to add the new values
2124c1ff481SSatish Balay    being inserted into the stash.
2134c1ff481SSatish Balay 
2144c1ff481SSatish Balay    Input Parameters:
2154c1ff481SSatish Balay    stash - the stash
2164c1ff481SSatish Balay    incr  - the minimum increase requested
2174c1ff481SSatish Balay 
21811a5261eSBarry Smith    Note:
2194c1ff481SSatish Balay    This routine doubles the currently used memory.
2204c1ff481SSatish Balay  */
221*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashExpand_Private(MatStash *stash, PetscInt incr)
222*d71ae5a4SJacob Faibussowitsch {
2235bd3b8fbSHong Zhang   PetscInt newnmax, bs2 = stash->bs * stash->bs;
2249417f4adSLois Curfman McInnes 
2253a40ed3dSBarry Smith   PetscFunctionBegin;
2269417f4adSLois Curfman McInnes   /* allocate a larger stash */
227c481ceb5SSatish Balay   if (!stash->oldnmax && !stash->nmax) { /* new stash */
228434d7ff9SSatish Balay     if (stash->umax) newnmax = stash->umax / bs2;
229434d7ff9SSatish Balay     else newnmax = DEFAULT_STASH_SIZE / bs2;
230c481ceb5SSatish Balay   } else if (!stash->nmax) { /* resuing stash */
231434d7ff9SSatish Balay     if (stash->umax > stash->oldnmax) newnmax = stash->umax / bs2;
232434d7ff9SSatish Balay     else newnmax = stash->oldnmax / bs2;
233434d7ff9SSatish Balay   } else newnmax = stash->nmax * 2;
2344c1ff481SSatish Balay   if (newnmax < (stash->nmax + incr)) newnmax += 2 * incr;
235d07ff455SSatish Balay 
23675cae7c1SHong Zhang   /* Get a MatStashSpace and attach it to stash */
2379566063dSJacob Faibussowitsch   PetscCall(PetscMatStashSpaceGet(bs2, newnmax, &stash->space));
238b087b6d6SSatish Balay   if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
239b087b6d6SSatish Balay     stash->space_head = stash->space;
24075cae7c1SHong Zhang   }
241b087b6d6SSatish Balay 
242bc5ccf88SSatish Balay   stash->reallocs++;
24375cae7c1SHong Zhang   stash->nmax = newnmax;
244bc5ccf88SSatish Balay   PetscFunctionReturn(0);
245bc5ccf88SSatish Balay }
246bc5ccf88SSatish Balay /*
2478798bf22SSatish Balay   MatStashValuesRow_Private - inserts values into the stash. This function
2484c1ff481SSatish Balay   expects the values to be roworiented. Multiple columns belong to the same row
2494c1ff481SSatish Balay   can be inserted with a single call to this function.
2504c1ff481SSatish Balay 
2514c1ff481SSatish Balay   Input Parameters:
2524c1ff481SSatish Balay   stash  - the stash
2534c1ff481SSatish Balay   row    - the global row correspoiding to the values
2544c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
2554c1ff481SSatish Balay   idxn   - the global column indices corresponding to each of the values.
2564c1ff481SSatish Balay   values - the values inserted
257bc5ccf88SSatish Balay */
258*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashValuesRow_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscBool ignorezeroentries)
259*d71ae5a4SJacob Faibussowitsch {
260b400d20cSBarry Smith   PetscInt           i, k, cnt = 0;
26175cae7c1SHong Zhang   PetscMatStashSpace space = stash->space;
262bc5ccf88SSatish Balay 
263bc5ccf88SSatish Balay   PetscFunctionBegin;
2644c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
26548a46eb9SPierre Jolivet   if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
26675cae7c1SHong Zhang   space = stash->space;
26775cae7c1SHong Zhang   k     = space->local_used;
2684c1ff481SSatish Balay   for (i = 0; i < n; i++) {
26914069ce9SStefano Zampini     if (ignorezeroentries && values && values[i] == 0.0) continue;
27075cae7c1SHong Zhang     space->idx[k] = row;
27175cae7c1SHong Zhang     space->idy[k] = idxn[i];
27214069ce9SStefano Zampini     space->val[k] = values ? values[i] : 0.0;
27375cae7c1SHong Zhang     k++;
274b400d20cSBarry Smith     cnt++;
2759417f4adSLois Curfman McInnes   }
276b400d20cSBarry Smith   stash->n += cnt;
277b400d20cSBarry Smith   space->local_used += cnt;
278b400d20cSBarry Smith   space->local_remaining -= cnt;
279a2d1c673SSatish Balay   PetscFunctionReturn(0);
280a2d1c673SSatish Balay }
28175cae7c1SHong Zhang 
2824c1ff481SSatish Balay /*
2838798bf22SSatish Balay   MatStashValuesCol_Private - inserts values into the stash. This function
2844c1ff481SSatish Balay   expects the values to be columnoriented. Multiple columns belong to the same row
2854c1ff481SSatish Balay   can be inserted with a single call to this function.
286a2d1c673SSatish Balay 
2874c1ff481SSatish Balay   Input Parameters:
2884c1ff481SSatish Balay   stash   - the stash
2894c1ff481SSatish Balay   row     - the global row correspoiding to the values
2904c1ff481SSatish Balay   n       - the number of elements inserted. All elements belong to the above row.
2914c1ff481SSatish Balay   idxn    - the global column indices corresponding to each of the values.
2924c1ff481SSatish Balay   values  - the values inserted
2934c1ff481SSatish Balay   stepval - the consecutive values are sepated by a distance of stepval.
2944c1ff481SSatish Balay             this happens because the input is columnoriented.
2954c1ff481SSatish Balay */
296*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashValuesCol_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt stepval, PetscBool ignorezeroentries)
297*d71ae5a4SJacob Faibussowitsch {
29850e9ab7cSBarry Smith   PetscInt           i, k, cnt = 0;
29975cae7c1SHong Zhang   PetscMatStashSpace space = stash->space;
300a2d1c673SSatish Balay 
3014c1ff481SSatish Balay   PetscFunctionBegin;
3024c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
30348a46eb9SPierre Jolivet   if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
30475cae7c1SHong Zhang   space = stash->space;
30575cae7c1SHong Zhang   k     = space->local_used;
3064c1ff481SSatish Balay   for (i = 0; i < n; i++) {
30714069ce9SStefano Zampini     if (ignorezeroentries && values && values[i * stepval] == 0.0) continue;
30875cae7c1SHong Zhang     space->idx[k] = row;
30975cae7c1SHong Zhang     space->idy[k] = idxn[i];
31014069ce9SStefano Zampini     space->val[k] = values ? values[i * stepval] : 0.0;
31175cae7c1SHong Zhang     k++;
312b400d20cSBarry Smith     cnt++;
3134c1ff481SSatish Balay   }
314b400d20cSBarry Smith   stash->n += cnt;
315b400d20cSBarry Smith   space->local_used += cnt;
316b400d20cSBarry Smith   space->local_remaining -= cnt;
3174c1ff481SSatish Balay   PetscFunctionReturn(0);
3184c1ff481SSatish Balay }
3194c1ff481SSatish Balay 
3204c1ff481SSatish Balay /*
3218798bf22SSatish Balay   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
3224c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3234c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3244c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3254c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3264c1ff481SSatish Balay 
3274c1ff481SSatish Balay   Input Parameters:
3284c1ff481SSatish Balay   stash  - the stash
3294c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3304c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3314c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3324c1ff481SSatish Balay            values. Each block is of size bs*bs.
3334c1ff481SSatish Balay   values - the values inserted
3344c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
335a5b23f4aSJose E. Roman   cmax   - the number of block-columns on the original block.
3364c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3374c1ff481SSatish Balay */
338*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt rmax, PetscInt cmax, PetscInt idx)
339*d71ae5a4SJacob Faibussowitsch {
34075cae7c1SHong Zhang   PetscInt           i, j, k, bs2, bs = stash->bs, l;
34154f21887SBarry Smith   const PetscScalar *vals;
34254f21887SBarry Smith   PetscScalar       *array;
34375cae7c1SHong Zhang   PetscMatStashSpace space = stash->space;
344a2d1c673SSatish Balay 
345a2d1c673SSatish Balay   PetscFunctionBegin;
34648a46eb9SPierre Jolivet   if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
34775cae7c1SHong Zhang   space = stash->space;
34875cae7c1SHong Zhang   l     = space->local_used;
34975cae7c1SHong Zhang   bs2   = bs * bs;
3504c1ff481SSatish Balay   for (i = 0; i < n; i++) {
35175cae7c1SHong Zhang     space->idx[l] = row;
35275cae7c1SHong Zhang     space->idy[l] = idxn[i];
35375cae7c1SHong Zhang     /* Now copy over the block of values. Store the values column oriented.
35475cae7c1SHong Zhang        This enables inserting multiple blocks belonging to a row with a single
35575cae7c1SHong Zhang        funtion call */
35675cae7c1SHong Zhang     array = space->val + bs2 * l;
35775cae7c1SHong Zhang     vals  = values + idx * bs2 * n + bs * i;
35875cae7c1SHong Zhang     for (j = 0; j < bs; j++) {
35914069ce9SStefano Zampini       for (k = 0; k < bs; k++) array[k * bs] = values ? vals[k] : 0.0;
36075cae7c1SHong Zhang       array++;
36175cae7c1SHong Zhang       vals += cmax * bs;
36275cae7c1SHong Zhang     }
36375cae7c1SHong Zhang     l++;
364a2d1c673SSatish Balay   }
3655bd3b8fbSHong Zhang   stash->n += n;
36675cae7c1SHong Zhang   space->local_used += n;
36775cae7c1SHong Zhang   space->local_remaining -= n;
3684c1ff481SSatish Balay   PetscFunctionReturn(0);
3694c1ff481SSatish Balay }
3704c1ff481SSatish Balay 
3714c1ff481SSatish Balay /*
3728798bf22SSatish Balay   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
3734c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3744c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3754c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3764c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3774c1ff481SSatish Balay 
3784c1ff481SSatish Balay   Input Parameters:
3794c1ff481SSatish Balay   stash  - the stash
3804c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3814c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3824c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3834c1ff481SSatish Balay            values. Each block is of size bs*bs.
3844c1ff481SSatish Balay   values - the values inserted
3854c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
386a5b23f4aSJose E. Roman   cmax   - the number of block-columns on the original block.
3874c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3884c1ff481SSatish Balay */
389*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt rmax, PetscInt cmax, PetscInt idx)
390*d71ae5a4SJacob Faibussowitsch {
39175cae7c1SHong Zhang   PetscInt           i, j, k, bs2, bs = stash->bs, l;
39254f21887SBarry Smith   const PetscScalar *vals;
39354f21887SBarry Smith   PetscScalar       *array;
39475cae7c1SHong Zhang   PetscMatStashSpace space = stash->space;
3954c1ff481SSatish Balay 
3964c1ff481SSatish Balay   PetscFunctionBegin;
39748a46eb9SPierre Jolivet   if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
39875cae7c1SHong Zhang   space = stash->space;
39975cae7c1SHong Zhang   l     = space->local_used;
40075cae7c1SHong Zhang   bs2   = bs * bs;
4014c1ff481SSatish Balay   for (i = 0; i < n; i++) {
40275cae7c1SHong Zhang     space->idx[l] = row;
40375cae7c1SHong Zhang     space->idy[l] = idxn[i];
40475cae7c1SHong Zhang     /* Now copy over the block of values. Store the values column oriented.
40575cae7c1SHong Zhang      This enables inserting multiple blocks belonging to a row with a single
40675cae7c1SHong Zhang      funtion call */
40775cae7c1SHong Zhang     array = space->val + bs2 * l;
40875cae7c1SHong Zhang     vals  = values + idx * bs2 * n + bs * i;
40975cae7c1SHong Zhang     for (j = 0; j < bs; j++) {
41014069ce9SStefano Zampini       for (k = 0; k < bs; k++) array[k] = values ? vals[k] : 0.0;
41175cae7c1SHong Zhang       array += bs;
41275cae7c1SHong Zhang       vals += rmax * bs;
41375cae7c1SHong Zhang     }
4145bd3b8fbSHong Zhang     l++;
415a2d1c673SSatish Balay   }
4165bd3b8fbSHong Zhang   stash->n += n;
41775cae7c1SHong Zhang   space->local_used += n;
41875cae7c1SHong Zhang   space->local_remaining -= n;
4193a40ed3dSBarry Smith   PetscFunctionReturn(0);
4209417f4adSLois Curfman McInnes }
4214c1ff481SSatish Balay /*
4228798bf22SSatish Balay   MatStashScatterBegin_Private - Initiates the transfer of values to the
4234c1ff481SSatish Balay   correct owners. This function goes through the stash, and check the
4244c1ff481SSatish Balay   owners of each stashed value, and sends the values off to the owner
4254c1ff481SSatish Balay   processors.
426bc5ccf88SSatish Balay 
4274c1ff481SSatish Balay   Input Parameters:
4284c1ff481SSatish Balay   stash  - the stash
4294c1ff481SSatish Balay   owners - an array of size 'no-of-procs' which gives the ownership range
4304c1ff481SSatish Balay            for each node.
4314c1ff481SSatish Balay 
43211a5261eSBarry Smith   Note:
43395452b02SPatrick Sanan     The 'owners' array in the cased of the blocked-stash has the
4344c1ff481SSatish Balay   ranges specified blocked global indices, and for the regular stash in
4354c1ff481SSatish Balay   the proper global indices.
4364c1ff481SSatish Balay */
437*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashScatterBegin_Private(Mat mat, MatStash *stash, PetscInt *owners)
438*d71ae5a4SJacob Faibussowitsch {
439ac2b2aa0SJed Brown   PetscFunctionBegin;
4409566063dSJacob Faibussowitsch   PetscCall((*stash->ScatterBegin)(mat, stash, owners));
441ac2b2aa0SJed Brown   PetscFunctionReturn(0);
442ac2b2aa0SJed Brown }
443ac2b2aa0SJed Brown 
444*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashScatterBegin_Ref(Mat mat, MatStash *stash, PetscInt *owners)
445*d71ae5a4SJacob Faibussowitsch {
446c1ac3661SBarry Smith   PetscInt          *owner, *startv, *starti, tag1 = stash->tag1, tag2 = stash->tag2, bs2;
447fe09c992SBarry Smith   PetscInt           size = stash->size, nsends;
44875cae7c1SHong Zhang   PetscInt           count, *sindices, **rindices, i, j, idx, lastidx, l;
44954f21887SBarry Smith   PetscScalar      **rvalues, *svalues;
450bc5ccf88SSatish Balay   MPI_Comm           comm = stash->comm;
451563fb871SSatish Balay   MPI_Request       *send_waits, *recv_waits, *recv_waits1, *recv_waits2;
45276ec1555SBarry Smith   PetscMPIInt       *sizes, *nlengths, nreceives;
4535bd3b8fbSHong Zhang   PetscInt          *sp_idx, *sp_idy;
45454f21887SBarry Smith   PetscScalar       *sp_val;
4555bd3b8fbSHong Zhang   PetscMatStashSpace space, space_next;
456bc5ccf88SSatish Balay 
457bc5ccf88SSatish Balay   PetscFunctionBegin;
4584b4eb8d3SJed Brown   { /* make sure all processors are either in INSERTMODE or ADDMODE */
4594b4eb8d3SJed Brown     InsertMode addv;
4601c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
46108401ef6SPierre Jolivet     PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
4624b4eb8d3SJed Brown     mat->insertmode = addv; /* in case this processor had no cache */
4634b4eb8d3SJed Brown   }
4644b4eb8d3SJed Brown 
4654c1ff481SSatish Balay   bs2 = stash->bs * stash->bs;
46675cae7c1SHong Zhang 
467bc5ccf88SSatish Balay   /*  first count number of contributors to each processor */
4689566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(size, &nlengths));
4699566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(stash->n + 1, &owner));
470a2d1c673SSatish Balay 
47175cae7c1SHong Zhang   i = j   = 0;
4727357eb19SBarry Smith   lastidx = -1;
4735bd3b8fbSHong Zhang   space   = stash->space_head;
4746c4ed002SBarry Smith   while (space) {
47575cae7c1SHong Zhang     space_next = space->next;
4765bd3b8fbSHong Zhang     sp_idx     = space->idx;
47775cae7c1SHong Zhang     for (l = 0; l < space->local_used; l++) {
4787357eb19SBarry Smith       /* if indices are NOT locally sorted, need to start search at the beginning */
4795bd3b8fbSHong Zhang       if (lastidx > (idx = sp_idx[l])) j = 0;
4807357eb19SBarry Smith       lastidx = idx;
4817357eb19SBarry Smith       for (; j < size; j++) {
4824c1ff481SSatish Balay         if (idx >= owners[j] && idx < owners[j + 1]) {
4839371c9d4SSatish Balay           nlengths[j]++;
4849371c9d4SSatish Balay           owner[i] = j;
4859371c9d4SSatish Balay           break;
486bc5ccf88SSatish Balay         }
487bc5ccf88SSatish Balay       }
48875cae7c1SHong Zhang       i++;
48975cae7c1SHong Zhang     }
49075cae7c1SHong Zhang     space = space_next;
491bc5ccf88SSatish Balay   }
492071fcb05SBarry Smith 
493563fb871SSatish Balay   /* Now check what procs get messages - and compute nsends. */
4949566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(size, &sizes));
495563fb871SSatish Balay   for (i = 0, nsends = 0; i < size; i++) {
4968865f1eaSKarl Rupp     if (nlengths[i]) {
4979371c9d4SSatish Balay       sizes[i] = 1;
4989371c9d4SSatish Balay       nsends++;
4998865f1eaSKarl Rupp     }
500563fb871SSatish Balay   }
501bc5ccf88SSatish Balay 
5029371c9d4SSatish Balay   {
5039371c9d4SSatish Balay     PetscMPIInt *onodes, *olengths;
504563fb871SSatish Balay     /* Determine the number of messages to expect, their lengths, from from-ids */
5059566063dSJacob Faibussowitsch     PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives));
5069566063dSJacob Faibussowitsch     PetscCall(PetscGatherMessageLengths(comm, nsends, nreceives, nlengths, &onodes, &olengths));
507563fb871SSatish Balay     /* since clubbing row,col - lengths are multiplied by 2 */
508563fb871SSatish Balay     for (i = 0; i < nreceives; i++) olengths[i] *= 2;
5099566063dSJacob Faibussowitsch     PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1));
510563fb871SSatish Balay     /* values are size 'bs2' lengths (and remove earlier factor 2 */
511563fb871SSatish Balay     for (i = 0; i < nreceives; i++) olengths[i] = olengths[i] * bs2 / 2;
5129566063dSJacob Faibussowitsch     PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2));
5139566063dSJacob Faibussowitsch     PetscCall(PetscFree(onodes));
5149371c9d4SSatish Balay     PetscCall(PetscFree(olengths));
5159371c9d4SSatish Balay   }
516bc5ccf88SSatish Balay 
517bc5ccf88SSatish Balay   /* do sends:
518bc5ccf88SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
519bc5ccf88SSatish Balay          the ith processor
520bc5ccf88SSatish Balay   */
5219566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices));
5229566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(2 * nsends, &send_waits));
5239566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(size, &startv, size, &starti));
524a2d1c673SSatish Balay   /* use 2 sends the first with all_a, the next with all_i and all_j */
5259371c9d4SSatish Balay   startv[0] = 0;
5269371c9d4SSatish Balay   starti[0] = 0;
527bc5ccf88SSatish Balay   for (i = 1; i < size; i++) {
528563fb871SSatish Balay     startv[i] = startv[i - 1] + nlengths[i - 1];
529533163c2SBarry Smith     starti[i] = starti[i - 1] + 2 * nlengths[i - 1];
530bc5ccf88SSatish Balay   }
53175cae7c1SHong Zhang 
53275cae7c1SHong Zhang   i     = 0;
5335bd3b8fbSHong Zhang   space = stash->space_head;
5346c4ed002SBarry Smith   while (space) {
53575cae7c1SHong Zhang     space_next = space->next;
5365bd3b8fbSHong Zhang     sp_idx     = space->idx;
5375bd3b8fbSHong Zhang     sp_idy     = space->idy;
5385bd3b8fbSHong Zhang     sp_val     = space->val;
53975cae7c1SHong Zhang     for (l = 0; l < space->local_used; l++) {
540bc5ccf88SSatish Balay       j = owner[i];
541a2d1c673SSatish Balay       if (bs2 == 1) {
5425bd3b8fbSHong Zhang         svalues[startv[j]] = sp_val[l];
543a2d1c673SSatish Balay       } else {
544c1ac3661SBarry Smith         PetscInt     k;
54554f21887SBarry Smith         PetscScalar *buf1, *buf2;
5464c1ff481SSatish Balay         buf1 = svalues + bs2 * startv[j];
547b087b6d6SSatish Balay         buf2 = space->val + bs2 * l;
5488865f1eaSKarl Rupp         for (k = 0; k < bs2; k++) buf1[k] = buf2[k];
549a2d1c673SSatish Balay       }
5505bd3b8fbSHong Zhang       sindices[starti[j]]               = sp_idx[l];
5515bd3b8fbSHong Zhang       sindices[starti[j] + nlengths[j]] = sp_idy[l];
552bc5ccf88SSatish Balay       startv[j]++;
553bc5ccf88SSatish Balay       starti[j]++;
55475cae7c1SHong Zhang       i++;
55575cae7c1SHong Zhang     }
55675cae7c1SHong Zhang     space = space_next;
557bc5ccf88SSatish Balay   }
558bc5ccf88SSatish Balay   startv[0] = 0;
5598865f1eaSKarl Rupp   for (i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1];
560e5d0e772SSatish Balay 
561bc5ccf88SSatish Balay   for (i = 0, count = 0; i < size; i++) {
56276ec1555SBarry Smith     if (sizes[i]) {
5639566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++));
5649566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++));
565bc5ccf88SSatish Balay     }
566b85c94c3SSatish Balay   }
5676cf91177SBarry Smith #if defined(PETSC_USE_INFO)
5689566063dSJacob Faibussowitsch   PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT " \n", nsends));
569e5d0e772SSatish Balay   for (i = 0; i < size; i++) {
57048a46eb9SPierre Jolivet     if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt)))));
571e5d0e772SSatish Balay   }
572e5d0e772SSatish Balay #endif
5739566063dSJacob Faibussowitsch   PetscCall(PetscFree(nlengths));
5749566063dSJacob Faibussowitsch   PetscCall(PetscFree(owner));
5759566063dSJacob Faibussowitsch   PetscCall(PetscFree2(startv, starti));
5769566063dSJacob Faibussowitsch   PetscCall(PetscFree(sizes));
577a2d1c673SSatish Balay 
578563fb871SSatish Balay   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
5799566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(2 * nreceives, &recv_waits));
580563fb871SSatish Balay 
581563fb871SSatish Balay   for (i = 0; i < nreceives; i++) {
582563fb871SSatish Balay     recv_waits[2 * i]     = recv_waits1[i];
583563fb871SSatish Balay     recv_waits[2 * i + 1] = recv_waits2[i];
584563fb871SSatish Balay   }
585563fb871SSatish Balay   stash->recv_waits = recv_waits;
5868865f1eaSKarl Rupp 
5879566063dSJacob Faibussowitsch   PetscCall(PetscFree(recv_waits1));
5889566063dSJacob Faibussowitsch   PetscCall(PetscFree(recv_waits2));
589563fb871SSatish Balay 
590c05d87d6SBarry Smith   stash->svalues         = svalues;
591c05d87d6SBarry Smith   stash->sindices        = sindices;
592c05d87d6SBarry Smith   stash->rvalues         = rvalues;
593c05d87d6SBarry Smith   stash->rindices        = rindices;
594c05d87d6SBarry Smith   stash->send_waits      = send_waits;
595c05d87d6SBarry Smith   stash->nsends          = nsends;
596c05d87d6SBarry Smith   stash->nrecvs          = nreceives;
59767318a8aSJed Brown   stash->reproduce_count = 0;
598bc5ccf88SSatish Balay   PetscFunctionReturn(0);
599bc5ccf88SSatish Balay }
600bc5ccf88SSatish Balay 
601a2d1c673SSatish Balay /*
6028798bf22SSatish Balay    MatStashScatterGetMesg_Private - This function waits on the receives posted
6038798bf22SSatish Balay    in the function MatStashScatterBegin_Private() and returns one message at
6044c1ff481SSatish Balay    a time to the calling function. If no messages are left, it indicates this
6054c1ff481SSatish Balay    by setting flg = 0, else it sets flg = 1.
6064c1ff481SSatish Balay 
6074c1ff481SSatish Balay    Input Parameters:
6084c1ff481SSatish Balay    stash - the stash
6094c1ff481SSatish Balay 
6104c1ff481SSatish Balay    Output Parameters:
6114c1ff481SSatish Balay    nvals - the number of entries in the current message.
6124c1ff481SSatish Balay    rows  - an array of row indices (or blocked indices) corresponding to the values
6134c1ff481SSatish Balay    cols  - an array of columnindices (or blocked indices) corresponding to the values
6144c1ff481SSatish Balay    vals  - the values
6154c1ff481SSatish Balay    flg   - 0 indicates no more message left, and the current call has no values associated.
6164c1ff481SSatish Balay            1 indicates that the current call successfully received a message, and the
6174c1ff481SSatish Balay              other output parameters nvals,rows,cols,vals are set appropriately.
618a2d1c673SSatish Balay */
619*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash, PetscMPIInt *nvals, PetscInt **rows, PetscInt **cols, PetscScalar **vals, PetscInt *flg)
620*d71ae5a4SJacob Faibussowitsch {
621ac2b2aa0SJed Brown   PetscFunctionBegin;
6229566063dSJacob Faibussowitsch   PetscCall((*stash->ScatterGetMesg)(stash, nvals, rows, cols, vals, flg));
623ac2b2aa0SJed Brown   PetscFunctionReturn(0);
624ac2b2aa0SJed Brown }
625ac2b2aa0SJed Brown 
626*d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash, PetscMPIInt *nvals, PetscInt **rows, PetscInt **cols, PetscScalar **vals, PetscInt *flg)
627*d71ae5a4SJacob Faibussowitsch {
628533163c2SBarry Smith   PetscMPIInt i, *flg_v = stash->flg_v, i1, i2;
629fe09c992SBarry Smith   PetscInt    bs2;
630a2d1c673SSatish Balay   MPI_Status  recv_status;
631ace3abfcSBarry Smith   PetscBool   match_found = PETSC_FALSE;
632bc5ccf88SSatish Balay 
633bc5ccf88SSatish Balay   PetscFunctionBegin;
634a2d1c673SSatish Balay   *flg = 0; /* When a message is discovered this is reset to 1 */
635a2d1c673SSatish Balay   /* Return if no more messages to process */
6368865f1eaSKarl Rupp   if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(0);
637a2d1c673SSatish Balay 
6384c1ff481SSatish Balay   bs2 = stash->bs * stash->bs;
63967318a8aSJed Brown   /* If a matching pair of receives are found, process them, and return the data to
640a2d1c673SSatish Balay      the calling function. Until then keep receiving messages */
641a2d1c673SSatish Balay   while (!match_found) {
64267318a8aSJed Brown     if (stash->reproduce) {
64367318a8aSJed Brown       i = stash->reproduce_count++;
6449566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Wait(stash->recv_waits + i, &recv_status));
64567318a8aSJed Brown     } else {
6469566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Waitany(2 * stash->nrecvs, stash->recv_waits, &i, &recv_status));
64767318a8aSJed Brown     }
64808401ef6SPierre Jolivet     PetscCheck(recv_status.MPI_SOURCE >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Negative MPI source!");
649533163c2SBarry Smith 
65067318a8aSJed Brown     /* Now pack the received message into a structure which is usable by others */
651a2d1c673SSatish Balay     if (i % 2) {
6529566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Get_count(&recv_status, MPIU_SCALAR, nvals));
653c1dc657dSBarry Smith       flg_v[2 * recv_status.MPI_SOURCE] = i / 2;
654a2d1c673SSatish Balay       *nvals                            = *nvals / bs2;
655563fb871SSatish Balay     } else {
6569566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Get_count(&recv_status, MPIU_INT, nvals));
657563fb871SSatish Balay       flg_v[2 * recv_status.MPI_SOURCE + 1] = i / 2;
658563fb871SSatish Balay       *nvals                                = *nvals / 2; /* This message has both row indices and col indices */
659bc5ccf88SSatish Balay     }
660a2d1c673SSatish Balay 
661cb2b73ccSBarry Smith     /* Check if we have both messages from this proc */
662c1dc657dSBarry Smith     i1 = flg_v[2 * recv_status.MPI_SOURCE];
663c1dc657dSBarry Smith     i2 = flg_v[2 * recv_status.MPI_SOURCE + 1];
664a2d1c673SSatish Balay     if (i1 != -1 && i2 != -1) {
665563fb871SSatish Balay       *rows = stash->rindices[i2];
666a2d1c673SSatish Balay       *cols = *rows + *nvals;
667563fb871SSatish Balay       *vals = stash->rvalues[i1];
668a2d1c673SSatish Balay       *flg  = 1;
669a2d1c673SSatish Balay       stash->nprocessed++;
67035d8aa7fSBarry Smith       match_found = PETSC_TRUE;
671bc5ccf88SSatish Balay     }
672bc5ccf88SSatish Balay   }
673bc5ccf88SSatish Balay   PetscFunctionReturn(0);
674bc5ccf88SSatish Balay }
675d7d60843SJed Brown 
676e69ee1f7SSatish Balay #if !defined(PETSC_HAVE_MPIUNI)
677d7d60843SJed Brown typedef struct {
678d7d60843SJed Brown   PetscInt    row;
679d7d60843SJed Brown   PetscInt    col;
680d7d60843SJed Brown   PetscScalar vals[1]; /* Actually an array of length bs2 */
681d7d60843SJed Brown } MatStashBlock;
682d7d60843SJed Brown 
683*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashSortCompress_Private(MatStash *stash, InsertMode insertmode)
684*d71ae5a4SJacob Faibussowitsch {
685d7d60843SJed Brown   PetscMatStashSpace space;
686d7d60843SJed Brown   PetscInt           n = stash->n, bs = stash->bs, bs2 = bs * bs, cnt, *row, *col, *perm, rowstart, i;
687d7d60843SJed Brown   PetscScalar      **valptr;
688d7d60843SJed Brown 
689d7d60843SJed Brown   PetscFunctionBegin;
6909566063dSJacob Faibussowitsch   PetscCall(PetscMalloc4(n, &row, n, &col, n, &valptr, n, &perm));
691d7d60843SJed Brown   for (space = stash->space_head, cnt = 0; space; space = space->next) {
692d7d60843SJed Brown     for (i = 0; i < space->local_used; i++) {
693d7d60843SJed Brown       row[cnt]    = space->idx[i];
694d7d60843SJed Brown       col[cnt]    = space->idy[i];
695d7d60843SJed Brown       valptr[cnt] = &space->val[i * bs2];
696d7d60843SJed Brown       perm[cnt]   = cnt; /* Will tell us where to find valptr after sorting row[] and col[] */
697d7d60843SJed Brown       cnt++;
698d7d60843SJed Brown     }
699d7d60843SJed Brown   }
70008401ef6SPierre Jolivet   PetscCheck(cnt == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatStash n %" PetscInt_FMT ", but counted %" PetscInt_FMT " entries", n, cnt);
7019566063dSJacob Faibussowitsch   PetscCall(PetscSortIntWithArrayPair(n, row, col, perm));
702d7d60843SJed Brown   /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */
703d7d60843SJed Brown   for (rowstart = 0, cnt = 0, i = 1; i <= n; i++) {
704d7d60843SJed Brown     if (i == n || row[i] != row[rowstart]) { /* Sort the last row. */
705d7d60843SJed Brown       PetscInt colstart;
7069566063dSJacob Faibussowitsch       PetscCall(PetscSortIntWithArray(i - rowstart, &col[rowstart], &perm[rowstart]));
707d7d60843SJed Brown       for (colstart = rowstart; colstart < i;) { /* Compress multiple insertions to the same location */
708d7d60843SJed Brown         PetscInt       j, l;
709d7d60843SJed Brown         MatStashBlock *block;
7109566063dSJacob Faibussowitsch         PetscCall(PetscSegBufferGet(stash->segsendblocks, 1, &block));
711d7d60843SJed Brown         block->row = row[rowstart];
712d7d60843SJed Brown         block->col = col[colstart];
7139566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(block->vals, valptr[perm[colstart]], bs2));
714d7d60843SJed Brown         for (j = colstart + 1; j < i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */
715d7d60843SJed Brown           if (insertmode == ADD_VALUES) {
716d7d60843SJed Brown             for (l = 0; l < bs2; l++) block->vals[l] += valptr[perm[j]][l];
717d7d60843SJed Brown           } else {
7189566063dSJacob Faibussowitsch             PetscCall(PetscArraycpy(block->vals, valptr[perm[j]], bs2));
719d7d60843SJed Brown           }
720d7d60843SJed Brown         }
721d7d60843SJed Brown         colstart = j;
722d7d60843SJed Brown       }
723d7d60843SJed Brown       rowstart = i;
724d7d60843SJed Brown     }
725d7d60843SJed Brown   }
7269566063dSJacob Faibussowitsch   PetscCall(PetscFree4(row, col, valptr, perm));
727d7d60843SJed Brown   PetscFunctionReturn(0);
728d7d60843SJed Brown }
729d7d60843SJed Brown 
730*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash)
731*d71ae5a4SJacob Faibussowitsch {
732d7d60843SJed Brown   PetscFunctionBegin;
733d7d60843SJed Brown   if (stash->blocktype == MPI_DATATYPE_NULL) {
734d7d60843SJed Brown     PetscInt     bs2 = PetscSqr(stash->bs);
735d7d60843SJed Brown     PetscMPIInt  blocklens[2];
736d7d60843SJed Brown     MPI_Aint     displs[2];
737d7d60843SJed Brown     MPI_Datatype types[2], stype;
738223b4c07SBarry Smith     /*
739223b4c07SBarry Smith         DummyBlock is a type having standard layout, even when PetscScalar is C++ std::complex.
740f41aa5efSJunchao Zhang        std::complex itself has standard layout, so does DummyBlock, recursively.
741a5b23f4aSJose E. Roman        To be compatible with C++ std::complex, complex implementations on GPUs must also have standard layout,
742f41aa5efSJunchao Zhang        though they can have different alignment, e.g, 16 bytes for double complex, instead of 8 bytes as in GCC stdlibc++.
743f41aa5efSJunchao Zhang        offsetof(type, member) only requires type to have standard layout. Ref. https://en.cppreference.com/w/cpp/types/offsetof.
744f41aa5efSJunchao Zhang 
745f41aa5efSJunchao Zhang        We can test if std::complex has standard layout with the following code:
746f41aa5efSJunchao Zhang        #include <iostream>
747f41aa5efSJunchao Zhang        #include <type_traits>
748f41aa5efSJunchao Zhang        #include <complex>
749f41aa5efSJunchao Zhang        int main() {
750f41aa5efSJunchao Zhang          std::cout << std::boolalpha;
751f41aa5efSJunchao Zhang          std::cout << std::is_standard_layout<std::complex<double> >::value << '\n';
752f41aa5efSJunchao Zhang        }
753f41aa5efSJunchao Zhang        Output: true
7549503c6c6SJed Brown      */
7559371c9d4SSatish Balay     struct DummyBlock {
7569371c9d4SSatish Balay       PetscInt    row, col;
7579371c9d4SSatish Balay       PetscScalar vals;
7589371c9d4SSatish Balay     };
759d7d60843SJed Brown 
7609503c6c6SJed Brown     stash->blocktype_size = offsetof(struct DummyBlock, vals) + bs2 * sizeof(PetscScalar);
761d7d60843SJed Brown     if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */
762d7d60843SJed Brown       stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt);
763d7d60843SJed Brown     }
7649566063dSJacob Faibussowitsch     PetscCall(PetscSegBufferCreate(stash->blocktype_size, 1, &stash->segsendblocks));
7659566063dSJacob Faibussowitsch     PetscCall(PetscSegBufferCreate(stash->blocktype_size, 1, &stash->segrecvblocks));
7669566063dSJacob Faibussowitsch     PetscCall(PetscSegBufferCreate(sizeof(MatStashFrame), 1, &stash->segrecvframe));
767d7d60843SJed Brown     blocklens[0] = 2;
768d7d60843SJed Brown     blocklens[1] = bs2;
7699503c6c6SJed Brown     displs[0]    = offsetof(struct DummyBlock, row);
7709503c6c6SJed Brown     displs[1]    = offsetof(struct DummyBlock, vals);
771d7d60843SJed Brown     types[0]     = MPIU_INT;
772d7d60843SJed Brown     types[1]     = MPIU_SCALAR;
7739566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Type_create_struct(2, blocklens, displs, types, &stype));
7749566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Type_commit(&stype));
7759566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Type_create_resized(stype, 0, stash->blocktype_size, &stash->blocktype));
7769566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Type_commit(&stash->blocktype));
7779566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Type_free(&stype));
778d7d60843SJed Brown   }
779d7d60843SJed Brown   PetscFunctionReturn(0);
780d7d60843SJed Brown }
781d7d60843SJed Brown 
782d7d60843SJed Brown /* Callback invoked after target rank has initiatied receive of rendezvous message.
783d7d60843SJed Brown  * Here we post the main sends.
784d7d60843SJed Brown  */
785*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm, const PetscMPIInt tag[], PetscMPIInt rankid, PetscMPIInt rank, void *sdata, MPI_Request req[], void *ctx)
786*d71ae5a4SJacob Faibussowitsch {
787d7d60843SJed Brown   MatStash       *stash = (MatStash *)ctx;
788d7d60843SJed Brown   MatStashHeader *hdr   = (MatStashHeader *)sdata;
789d7d60843SJed Brown 
790d7d60843SJed Brown   PetscFunctionBegin;
79108401ef6SPierre Jolivet   PetscCheck(rank == stash->sendranks[rankid], comm, PETSC_ERR_PLIB, "BTS Send rank %d does not match sendranks[%d] %d", rank, rankid, stash->sendranks[rankid]);
7929566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Isend(stash->sendframes[rankid].buffer, hdr->count, stash->blocktype, rank, tag[0], comm, &req[0]));
793d7d60843SJed Brown   stash->sendframes[rankid].count   = hdr->count;
794d7d60843SJed Brown   stash->sendframes[rankid].pending = 1;
795d7d60843SJed Brown   PetscFunctionReturn(0);
796d7d60843SJed Brown }
797d7d60843SJed Brown 
798223b4c07SBarry Smith /*
799223b4c07SBarry Smith     Callback invoked by target after receiving rendezvous message.
800223b4c07SBarry Smith     Here we post the main recvs.
801d7d60843SJed Brown  */
802*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm, const PetscMPIInt tag[], PetscMPIInt rank, void *rdata, MPI_Request req[], void *ctx)
803*d71ae5a4SJacob Faibussowitsch {
804d7d60843SJed Brown   MatStash       *stash = (MatStash *)ctx;
805d7d60843SJed Brown   MatStashHeader *hdr   = (MatStashHeader *)rdata;
806d7d60843SJed Brown   MatStashFrame  *frame;
807d7d60843SJed Brown 
808d7d60843SJed Brown   PetscFunctionBegin;
8099566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferGet(stash->segrecvframe, 1, &frame));
8109566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferGet(stash->segrecvblocks, hdr->count, &frame->buffer));
8119566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Irecv(frame->buffer, hdr->count, stash->blocktype, rank, tag[0], comm, &req[0]));
812d7d60843SJed Brown   frame->count   = hdr->count;
813d7d60843SJed Brown   frame->pending = 1;
814d7d60843SJed Brown   PetscFunctionReturn(0);
815d7d60843SJed Brown }
816d7d60843SJed Brown 
817d7d60843SJed Brown /*
818d7d60843SJed Brown  * owners[] contains the ownership ranges; may be indexed by either blocks or scalars
819d7d60843SJed Brown  */
820*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashScatterBegin_BTS(Mat mat, MatStash *stash, PetscInt owners[])
821*d71ae5a4SJacob Faibussowitsch {
822d7d60843SJed Brown   size_t nblocks;
823d7d60843SJed Brown   char  *sendblocks;
824d7d60843SJed Brown 
825d7d60843SJed Brown   PetscFunctionBegin;
82676bd3646SJed Brown   if (PetscDefined(USE_DEBUG)) { /* make sure all processors are either in INSERTMODE or ADDMODE */
8274b4eb8d3SJed Brown     InsertMode addv;
8281c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
82908401ef6SPierre Jolivet     PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
8304b4eb8d3SJed Brown   }
8314b4eb8d3SJed Brown 
8329566063dSJacob Faibussowitsch   PetscCall(MatStashBlockTypeSetUp(stash));
8339566063dSJacob Faibussowitsch   PetscCall(MatStashSortCompress_Private(stash, mat->insertmode));
8349566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferGetSize(stash->segsendblocks, &nblocks));
8359566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferExtractInPlace(stash->segsendblocks, &sendblocks));
83660f34548SJunchao Zhang   if (stash->first_assembly_done) { /* Set up sendhdrs and sendframes for each rank that we sent before */
83723b7d1baSJed Brown     PetscInt i;
83823b7d1baSJed Brown     size_t   b;
83997da8949SJed Brown     for (i = 0, b = 0; i < stash->nsendranks; i++) {
84097da8949SJed Brown       stash->sendframes[i].buffer = &sendblocks[b * stash->blocktype_size];
84197da8949SJed Brown       /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */
84297da8949SJed Brown       stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */
84397da8949SJed Brown       for (; b < nblocks; b++) {
84497da8949SJed Brown         MatStashBlock *sendblock_b = (MatStashBlock *)&sendblocks[b * stash->blocktype_size];
845aed4548fSBarry Smith         PetscCheck(sendblock_b->row >= owners[stash->sendranks[i]], stash->comm, PETSC_ERR_ARG_WRONG, "MAT_SUBSET_OFF_PROC_ENTRIES set, but row %" PetscInt_FMT " owned by %d not communicated in initial assembly", sendblock_b->row, stash->sendranks[i]);
84697da8949SJed Brown         if (sendblock_b->row >= owners[stash->sendranks[i] + 1]) break;
84797da8949SJed Brown         stash->sendhdr[i].count++;
84897da8949SJed Brown       }
84997da8949SJed Brown     }
85097da8949SJed Brown   } else { /* Dynamically count and pack (first time) */
85123b7d1baSJed Brown     PetscInt sendno;
85223b7d1baSJed Brown     size_t   i, rowstart;
853d7d60843SJed Brown 
854d7d60843SJed Brown     /* Count number of send ranks and allocate for sends */
855d7d60843SJed Brown     stash->nsendranks = 0;
856d7d60843SJed Brown     for (rowstart = 0; rowstart < nblocks;) {
8577e2ea869SJed Brown       PetscInt       owner;
858d7d60843SJed Brown       MatStashBlock *sendblock_rowstart = (MatStashBlock *)&sendblocks[rowstart * stash->blocktype_size];
8599566063dSJacob Faibussowitsch       PetscCall(PetscFindInt(sendblock_rowstart->row, stash->size + 1, owners, &owner));
860d7d60843SJed Brown       if (owner < 0) owner = -(owner + 2);
861d7d60843SJed Brown       for (i = rowstart + 1; i < nblocks; i++) { /* Move forward through a run of blocks with the same owner */
862d7d60843SJed Brown         MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size];
8637e2ea869SJed Brown         if (sendblock_i->row >= owners[owner + 1]) break;
864d7d60843SJed Brown       }
865d7d60843SJed Brown       stash->nsendranks++;
866d7d60843SJed Brown       rowstart = i;
867d7d60843SJed Brown     }
8689566063dSJacob Faibussowitsch     PetscCall(PetscMalloc3(stash->nsendranks, &stash->sendranks, stash->nsendranks, &stash->sendhdr, stash->nsendranks, &stash->sendframes));
869d7d60843SJed Brown 
870d7d60843SJed Brown     /* Set up sendhdrs and sendframes */
871d7d60843SJed Brown     sendno = 0;
872d7d60843SJed Brown     for (rowstart = 0; rowstart < nblocks;) {
873d7d60843SJed Brown       PetscInt       owner;
874d7d60843SJed Brown       MatStashBlock *sendblock_rowstart = (MatStashBlock *)&sendblocks[rowstart * stash->blocktype_size];
8759566063dSJacob Faibussowitsch       PetscCall(PetscFindInt(sendblock_rowstart->row, stash->size + 1, owners, &owner));
876d7d60843SJed Brown       if (owner < 0) owner = -(owner + 2);
877d7d60843SJed Brown       stash->sendranks[sendno] = owner;
878d7d60843SJed Brown       for (i = rowstart + 1; i < nblocks; i++) { /* Move forward through a run of blocks with the same owner */
879d7d60843SJed Brown         MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size];
8807e2ea869SJed Brown         if (sendblock_i->row >= owners[owner + 1]) break;
881d7d60843SJed Brown       }
882d7d60843SJed Brown       stash->sendframes[sendno].buffer  = sendblock_rowstart;
883d7d60843SJed Brown       stash->sendframes[sendno].pending = 0;
884d7d60843SJed Brown       stash->sendhdr[sendno].count      = i - rowstart;
885d7d60843SJed Brown       sendno++;
886d7d60843SJed Brown       rowstart = i;
887d7d60843SJed Brown     }
88808401ef6SPierre Jolivet     PetscCheck(sendno == stash->nsendranks, stash->comm, PETSC_ERR_PLIB, "BTS counted %d sendranks, but %" PetscInt_FMT " sends", stash->nsendranks, sendno);
889d7d60843SJed Brown   }
890d7d60843SJed Brown 
8914b4eb8d3SJed Brown   /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new
8924b4eb8d3SJed Brown    * message or a dummy entry of some sort. */
8934b4eb8d3SJed Brown   if (mat->insertmode == INSERT_VALUES) {
89423b7d1baSJed Brown     size_t i;
8954b4eb8d3SJed Brown     for (i = 0; i < nblocks; i++) {
8964b4eb8d3SJed Brown       MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size];
8974b4eb8d3SJed Brown       sendblock_i->row           = -(sendblock_i->row + 1);
8984b4eb8d3SJed Brown     }
8994b4eb8d3SJed Brown   }
9004b4eb8d3SJed Brown 
90160f34548SJunchao Zhang   if (stash->first_assembly_done) {
90297da8949SJed Brown     PetscMPIInt i, tag;
9039566063dSJacob Faibussowitsch     PetscCall(PetscCommGetNewTag(stash->comm, &tag));
90448a46eb9SPierre Jolivet     for (i = 0; i < stash->nrecvranks; i++) PetscCall(MatStashBTSRecv_Private(stash->comm, &tag, stash->recvranks[i], &stash->recvhdr[i], &stash->recvreqs[i], stash));
90548a46eb9SPierre Jolivet     for (i = 0; i < stash->nsendranks; i++) PetscCall(MatStashBTSSend_Private(stash->comm, &tag, i, stash->sendranks[i], &stash->sendhdr[i], &stash->sendreqs[i], stash));
90697da8949SJed Brown     stash->use_status = PETSC_TRUE; /* Use count from message status. */
90797da8949SJed Brown   } else {
9089371c9d4SSatish Balay     PetscCall(PetscCommBuildTwoSidedFReq(stash->comm, 1, MPIU_INT, stash->nsendranks, stash->sendranks, (PetscInt *)stash->sendhdr, &stash->nrecvranks, &stash->recvranks, (PetscInt *)&stash->recvhdr, 1, &stash->sendreqs, &stash->recvreqs, MatStashBTSSend_Private, MatStashBTSRecv_Private, stash));
9099566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(stash->nrecvranks, &stash->some_indices, stash->nrecvranks, &stash->some_statuses));
91097da8949SJed Brown     stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */
91197da8949SJed Brown   }
912d7d60843SJed Brown 
9139566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferExtractInPlace(stash->segrecvframe, &stash->recvframes));
914d7d60843SJed Brown   stash->recvframe_active    = NULL;
915d7d60843SJed Brown   stash->recvframe_i         = 0;
916d7d60843SJed Brown   stash->some_i              = 0;
917d7d60843SJed Brown   stash->some_count          = 0;
918d7d60843SJed Brown   stash->recvcount           = 0;
91960f34548SJunchao Zhang   stash->first_assembly_done = mat->assembly_subset; /* See the same logic in VecAssemblyBegin_MPI_BTS */
9204b4eb8d3SJed Brown   stash->insertmode          = &mat->insertmode;
921d7d60843SJed Brown   PetscFunctionReturn(0);
922d7d60843SJed Brown }
923d7d60843SJed Brown 
924*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash, PetscMPIInt *n, PetscInt **row, PetscInt **col, PetscScalar **val, PetscInt *flg)
925*d71ae5a4SJacob Faibussowitsch {
926d7d60843SJed Brown   MatStashBlock *block;
927d7d60843SJed Brown 
928d7d60843SJed Brown   PetscFunctionBegin;
929d7d60843SJed Brown   *flg = 0;
930d7d60843SJed Brown   while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) {
931d7d60843SJed Brown     if (stash->some_i == stash->some_count) {
932d7d60843SJed Brown       if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(0); /* Done */
9339566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Waitsome(stash->nrecvranks, stash->recvreqs, &stash->some_count, stash->some_indices, stash->use_status ? stash->some_statuses : MPI_STATUSES_IGNORE));
934d7d60843SJed Brown       stash->some_i = 0;
935d7d60843SJed Brown     }
936d7d60843SJed Brown     stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]];
937d7d60843SJed Brown     stash->recvframe_count  = stash->recvframe_active->count; /* From header; maximum count */
938d7d60843SJed Brown     if (stash->use_status) {                                  /* Count what was actually sent */
9399566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Get_count(&stash->some_statuses[stash->some_i], stash->blocktype, &stash->recvframe_count));
940d7d60843SJed Brown     }
9414b4eb8d3SJed Brown     if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */
9424b4eb8d3SJed Brown       block = (MatStashBlock *)&((char *)stash->recvframe_active->buffer)[0];
9434b4eb8d3SJed Brown       if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES;
944aed4548fSBarry Smith       PetscCheck(*stash->insertmode != INSERT_VALUES || block->row < 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Assembling INSERT_VALUES, but rank %d requested ADD_VALUES", stash->recvranks[stash->some_indices[stash->some_i]]);
945aed4548fSBarry Smith       PetscCheck(*stash->insertmode != ADD_VALUES || block->row >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Assembling ADD_VALUES, but rank %d requested INSERT_VALUES", stash->recvranks[stash->some_indices[stash->some_i]]);
9464b4eb8d3SJed Brown     }
947d7d60843SJed Brown     stash->some_i++;
948d7d60843SJed Brown     stash->recvcount++;
949d7d60843SJed Brown     stash->recvframe_i = 0;
950d7d60843SJed Brown   }
951d7d60843SJed Brown   *n    = 1;
952d7d60843SJed Brown   block = (MatStashBlock *)&((char *)stash->recvframe_active->buffer)[stash->recvframe_i * stash->blocktype_size];
9534b4eb8d3SJed Brown   if (block->row < 0) block->row = -(block->row + 1);
954d7d60843SJed Brown   *row = &block->row;
955d7d60843SJed Brown   *col = &block->col;
956d7d60843SJed Brown   *val = block->vals;
957d7d60843SJed Brown   stash->recvframe_i++;
958d7d60843SJed Brown   *flg = 1;
959d7d60843SJed Brown   PetscFunctionReturn(0);
960d7d60843SJed Brown }
961d7d60843SJed Brown 
962*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash)
963*d71ae5a4SJacob Faibussowitsch {
964d7d60843SJed Brown   PetscFunctionBegin;
9659566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Waitall(stash->nsendranks, stash->sendreqs, MPI_STATUSES_IGNORE));
96660f34548SJunchao Zhang   if (stash->first_assembly_done) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks  */
967d2b3fd65SBarry Smith     PetscCall(PetscSegBufferExtractInPlace(stash->segrecvblocks, NULL));
9683575f486SJed Brown   } else { /* No reuse, so collect everything. */
9699566063dSJacob Faibussowitsch     PetscCall(MatStashScatterDestroy_BTS(stash));
97097da8949SJed Brown   }
971d7d60843SJed Brown 
972d7d60843SJed Brown   /* Now update nmaxold to be app 10% more than max n used, this way the
973d7d60843SJed Brown      wastage of space is reduced the next time this stash is used.
974d7d60843SJed Brown      Also update the oldmax, only if it increases */
975d7d60843SJed Brown   if (stash->n) {
976d7d60843SJed Brown     PetscInt bs2     = stash->bs * stash->bs;
977d7d60843SJed Brown     PetscInt oldnmax = ((int)(stash->n * 1.1) + 5) * bs2;
978d7d60843SJed Brown     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
979d7d60843SJed Brown   }
980d7d60843SJed Brown 
981d7d60843SJed Brown   stash->nmax       = 0;
982d7d60843SJed Brown   stash->n          = 0;
983d7d60843SJed Brown   stash->reallocs   = -1;
984d7d60843SJed Brown   stash->nprocessed = 0;
985d7d60843SJed Brown 
9869566063dSJacob Faibussowitsch   PetscCall(PetscMatStashSpaceDestroy(&stash->space_head));
987d7d60843SJed Brown 
988f4259b30SLisandro Dalcin   stash->space = NULL;
989d7d60843SJed Brown 
990d7d60843SJed Brown   PetscFunctionReturn(0);
991d7d60843SJed Brown }
992d7d60843SJed Brown 
993*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash)
994*d71ae5a4SJacob Faibussowitsch {
995d7d60843SJed Brown   PetscFunctionBegin;
9969566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferDestroy(&stash->segsendblocks));
9979566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferDestroy(&stash->segrecvframe));
998d7d60843SJed Brown   stash->recvframes = NULL;
9999566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferDestroy(&stash->segrecvblocks));
100048a46eb9SPierre Jolivet   if (stash->blocktype != MPI_DATATYPE_NULL) PetscCallMPI(MPI_Type_free(&stash->blocktype));
1001d7d60843SJed Brown   stash->nsendranks = 0;
1002d7d60843SJed Brown   stash->nrecvranks = 0;
10039566063dSJacob Faibussowitsch   PetscCall(PetscFree3(stash->sendranks, stash->sendhdr, stash->sendframes));
10049566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->sendreqs));
10059566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->recvreqs));
10069566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->recvranks));
10079566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->recvhdr));
10089566063dSJacob Faibussowitsch   PetscCall(PetscFree2(stash->some_indices, stash->some_statuses));
1009d7d60843SJed Brown   PetscFunctionReturn(0);
1010d7d60843SJed Brown }
10111667be42SBarry Smith #endif
1012