xref: /petsc/src/mat/utils/matstash.c (revision a678f235327a9a2ea6d25b8ff1d7476fa135b6b8)
1af0996ceSBarry Smith #include <petsc/private/matimpl.h>
25bd3b8fbSHong Zhang 
3bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE 10000
44c1ff481SSatish Balay 
5ac2b2aa0SJed Brown static PetscErrorCode       MatStashScatterBegin_Ref(Mat, MatStash *, PetscInt *);
6d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
7d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *);
875d39b6aSBarry Smith #if !defined(PETSC_HAVE_MPIUNI)
9d7d60843SJed Brown static PetscErrorCode MatStashScatterBegin_BTS(Mat, MatStash *, PetscInt *);
10d7d60843SJed Brown static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
11d7d60843SJed Brown static PetscErrorCode MatStashScatterEnd_BTS(MatStash *);
1275d39b6aSBarry Smith #endif
13d7d60843SJed Brown 
149417f4adSLois Curfman McInnes /*
158798bf22SSatish Balay   MatStashCreate_Private - Creates a stash,currently used for all the parallel
164c1ff481SSatish Balay   matrix implementations. The stash is where elements of a matrix destined
174c1ff481SSatish Balay   to be stored on other processors are kept until matrix assembly is done.
189417f4adSLois Curfman McInnes 
194c1ff481SSatish Balay   This is a simple minded stash. Simply adds entries to end of stash.
204c1ff481SSatish Balay 
214c1ff481SSatish Balay   Input Parameters:
224c1ff481SSatish Balay   comm - communicator, required for scatters.
234c1ff481SSatish Balay   bs   - stash block size. used when stashing blocks of values
244c1ff481SSatish Balay 
252fe279fdSBarry Smith   Output Parameter:
264c1ff481SSatish Balay   stash    - the newly created stash
279417f4adSLois Curfman McInnes */
28d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashCreate_Private(MPI_Comm comm, PetscInt bs, MatStash *stash)
29d71ae5a4SJacob Faibussowitsch {
30533163c2SBarry Smith   PetscInt  max, *opt, nopt, i;
31ace3abfcSBarry Smith   PetscBool flg;
32bc5ccf88SSatish Balay 
333a40ed3dSBarry Smith   PetscFunctionBegin;
34bc5ccf88SSatish Balay   /* Require 2 tags,get the second using PetscCommGetNewTag() */
35752ec6e0SSatish Balay   stash->comm = comm;
368865f1eaSKarl Rupp 
379566063dSJacob Faibussowitsch   PetscCall(PetscCommGetNewTag(stash->comm, &stash->tag1));
389566063dSJacob Faibussowitsch   PetscCall(PetscCommGetNewTag(stash->comm, &stash->tag2));
399566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(stash->comm, &stash->size));
409566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(stash->comm, &stash->rank));
419566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(2 * stash->size, &stash->flg_v));
42533163c2SBarry Smith   for (i = 0; i < 2 * stash->size; i++) stash->flg_v[i] = -1;
43533163c2SBarry Smith 
44434d7ff9SSatish Balay   nopt = stash->size;
459566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nopt, &opt));
469566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetIntArray(NULL, NULL, "-matstash_initial_size", opt, &nopt, &flg));
47434d7ff9SSatish Balay   if (flg) {
48434d7ff9SSatish Balay     if (nopt == 1) max = opt[0];
49434d7ff9SSatish Balay     else if (nopt == stash->size) max = opt[stash->rank];
50434d7ff9SSatish Balay     else if (stash->rank < nopt) max = opt[stash->rank];
51f4ab19daSSatish Balay     else max = 0; /* Use default */
52434d7ff9SSatish Balay     stash->umax = max;
53434d7ff9SSatish Balay   } else {
54434d7ff9SSatish Balay     stash->umax = 0;
55434d7ff9SSatish Balay   }
569566063dSJacob Faibussowitsch   PetscCall(PetscFree(opt));
574c1ff481SSatish Balay   if (bs <= 0) bs = 1;
58a2d1c673SSatish Balay 
594c1ff481SSatish Balay   stash->bs         = bs;
609417f4adSLois Curfman McInnes   stash->nmax       = 0;
61434d7ff9SSatish Balay   stash->oldnmax    = 0;
629417f4adSLois Curfman McInnes   stash->n          = 0;
634c1ff481SSatish Balay   stash->reallocs   = -1;
64f4259b30SLisandro Dalcin   stash->space_head = NULL;
65f4259b30SLisandro Dalcin   stash->space      = NULL;
669417f4adSLois Curfman McInnes 
67f4259b30SLisandro Dalcin   stash->send_waits  = NULL;
68f4259b30SLisandro Dalcin   stash->recv_waits  = NULL;
69f4259b30SLisandro Dalcin   stash->send_status = NULL;
70bc5ccf88SSatish Balay   stash->nsends      = 0;
71bc5ccf88SSatish Balay   stash->nrecvs      = 0;
72f4259b30SLisandro Dalcin   stash->svalues     = NULL;
73f4259b30SLisandro Dalcin   stash->rvalues     = NULL;
74f4259b30SLisandro Dalcin   stash->rindices    = NULL;
75a2d1c673SSatish Balay   stash->nprocessed  = 0;
7667318a8aSJed Brown   stash->reproduce   = PETSC_FALSE;
77d7d60843SJed Brown   stash->blocktype   = MPI_DATATYPE_NULL;
788865f1eaSKarl Rupp 
799566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(NULL, NULL, "-matstash_reproduce", &stash->reproduce, NULL));
801667be42SBarry Smith #if !defined(PETSC_HAVE_MPIUNI)
81ca723aa4SStefano Zampini   flg = PETSC_FALSE;
829566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(NULL, NULL, "-matstash_legacy", &flg, NULL));
83b30fb036SBarry Smith   if (!flg) {
84d7d60843SJed Brown     stash->ScatterBegin   = MatStashScatterBegin_BTS;
85d7d60843SJed Brown     stash->ScatterGetMesg = MatStashScatterGetMesg_BTS;
86d7d60843SJed Brown     stash->ScatterEnd     = MatStashScatterEnd_BTS;
87d7d60843SJed Brown     stash->ScatterDestroy = MatStashScatterDestroy_BTS;
88ac2b2aa0SJed Brown   } else {
891667be42SBarry Smith #endif
90ac2b2aa0SJed Brown     stash->ScatterBegin   = MatStashScatterBegin_Ref;
91ac2b2aa0SJed Brown     stash->ScatterGetMesg = MatStashScatterGetMesg_Ref;
92ac2b2aa0SJed Brown     stash->ScatterEnd     = MatStashScatterEnd_Ref;
93ac2b2aa0SJed Brown     stash->ScatterDestroy = NULL;
941667be42SBarry Smith #if !defined(PETSC_HAVE_MPIUNI)
95ac2b2aa0SJed Brown   }
961667be42SBarry Smith #endif
973ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
989417f4adSLois Curfman McInnes }
999417f4adSLois Curfman McInnes 
1004c1ff481SSatish Balay /*
1018798bf22SSatish Balay    MatStashDestroy_Private - Destroy the stash
1024c1ff481SSatish Balay */
103d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashDestroy_Private(MatStash *stash)
104d71ae5a4SJacob Faibussowitsch {
105bc5ccf88SSatish Balay   PetscFunctionBegin;
1069566063dSJacob Faibussowitsch   PetscCall(PetscMatStashSpaceDestroy(&stash->space_head));
1079566063dSJacob Faibussowitsch   if (stash->ScatterDestroy) PetscCall((*stash->ScatterDestroy)(stash));
108f4259b30SLisandro Dalcin   stash->space = NULL;
1099566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->flg_v));
1103ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
111bc5ccf88SSatish Balay }
112bc5ccf88SSatish Balay 
1134c1ff481SSatish Balay /*
11467318a8aSJed Brown    MatStashScatterEnd_Private - This is called as the final stage of
1154c1ff481SSatish Balay    scatter. The final stages of message passing is done here, and
11667318a8aSJed Brown    all the memory used for message passing is cleaned up. This
1174c1ff481SSatish Balay    routine also resets the stash, and deallocates the memory used
1184c1ff481SSatish Balay    for the stash. It also keeps track of the current memory usage
1194c1ff481SSatish Balay    so that the same value can be used the next time through.
1204c1ff481SSatish Balay */
121d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
122d71ae5a4SJacob Faibussowitsch {
123ac2b2aa0SJed Brown   PetscFunctionBegin;
1249566063dSJacob Faibussowitsch   PetscCall((*stash->ScatterEnd)(stash));
1253ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
126ac2b2aa0SJed Brown }
127ac2b2aa0SJed Brown 
128d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash)
129d71ae5a4SJacob Faibussowitsch {
130533163c2SBarry Smith   PetscInt    nsends = stash->nsends, bs2, oldnmax, i;
131a2d1c673SSatish Balay   MPI_Status *send_status;
132a2d1c673SSatish Balay 
1333a40ed3dSBarry Smith   PetscFunctionBegin;
134533163c2SBarry Smith   for (i = 0; i < 2 * stash->size; i++) stash->flg_v[i] = -1;
135a2d1c673SSatish Balay   /* wait on sends */
136a2d1c673SSatish Balay   if (nsends) {
1379566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(2 * nsends, &send_status));
1389566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Waitall(2 * nsends, stash->send_waits, send_status));
1399566063dSJacob Faibussowitsch     PetscCall(PetscFree(send_status));
140a2d1c673SSatish Balay   }
141a2d1c673SSatish Balay 
142c0c58ca7SSatish Balay   /* Now update nmaxold to be app 10% more than max n used, this way the
143434d7ff9SSatish Balay      wastage of space is reduced the next time this stash is used.
144434d7ff9SSatish Balay      Also update the oldmax, only if it increases */
145b9b97703SBarry Smith   if (stash->n) {
14694b769a5SSatish Balay     bs2     = stash->bs * stash->bs;
1478a9378f0SSatish Balay     oldnmax = ((int)(stash->n * 1.1) + 5) * bs2;
148434d7ff9SSatish Balay     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
149b9b97703SBarry Smith   }
150434d7ff9SSatish Balay 
151d07ff455SSatish Balay   stash->nmax       = 0;
152d07ff455SSatish Balay   stash->n          = 0;
1534c1ff481SSatish Balay   stash->reallocs   = -1;
154a2d1c673SSatish Balay   stash->nprocessed = 0;
1558865f1eaSKarl Rupp 
1569566063dSJacob Faibussowitsch   PetscCall(PetscMatStashSpaceDestroy(&stash->space_head));
1578865f1eaSKarl Rupp 
158f4259b30SLisandro Dalcin   stash->space = NULL;
1598865f1eaSKarl Rupp 
1609566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->send_waits));
1619566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->recv_waits));
1629566063dSJacob Faibussowitsch   PetscCall(PetscFree2(stash->svalues, stash->sindices));
1639566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->rvalues[0]));
1649566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->rvalues));
1659566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->rindices[0]));
1669566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->rindices));
1673ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1689417f4adSLois Curfman McInnes }
1699417f4adSLois Curfman McInnes 
1704c1ff481SSatish Balay /*
1716aad120cSJose E. Roman    MatStashGetInfo_Private - Gets the relevant statistics of the stash
1724c1ff481SSatish Balay 
1734c1ff481SSatish Balay    Input Parameters:
1744c1ff481SSatish Balay    stash    - the stash
17594b769a5SSatish Balay    nstash   - the size of the stash. Indicates the number of values stored.
1764c1ff481SSatish Balay    reallocs - the number of additional mallocs incurred.
1774c1ff481SSatish Balay 
1784c1ff481SSatish Balay */
179d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashGetInfo_Private(MatStash *stash, PetscInt *nstash, PetscInt *reallocs)
180d71ae5a4SJacob Faibussowitsch {
181c1ac3661SBarry Smith   PetscInt bs2 = stash->bs * stash->bs;
18294b769a5SSatish Balay 
1833a40ed3dSBarry Smith   PetscFunctionBegin;
1841ecfd215SBarry Smith   if (nstash) *nstash = stash->n * bs2;
1851ecfd215SBarry Smith   if (reallocs) {
186434d7ff9SSatish Balay     if (stash->reallocs < 0) *reallocs = 0;
187434d7ff9SSatish Balay     else *reallocs = stash->reallocs;
1881ecfd215SBarry Smith   }
1893ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
190bc5ccf88SSatish Balay }
1914c1ff481SSatish Balay 
1924c1ff481SSatish Balay /*
1938798bf22SSatish Balay    MatStashSetInitialSize_Private - Sets the initial size of the stash
1944c1ff481SSatish Balay 
1954c1ff481SSatish Balay    Input Parameters:
1964c1ff481SSatish Balay    stash  - the stash
1974c1ff481SSatish Balay    max    - the value that is used as the max size of the stash.
1984c1ff481SSatish Balay             this value is used while allocating memory.
1994c1ff481SSatish Balay */
200d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash, PetscInt max)
201d71ae5a4SJacob Faibussowitsch {
202bc5ccf88SSatish Balay   PetscFunctionBegin;
203434d7ff9SSatish Balay   stash->umax = max;
2043ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
20597530c3fSBarry Smith }
20697530c3fSBarry Smith 
2078798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called
2084c1ff481SSatish Balay    when the space in the stash is not sufficient to add the new values
2094c1ff481SSatish Balay    being inserted into the stash.
2104c1ff481SSatish Balay 
2114c1ff481SSatish Balay    Input Parameters:
2124c1ff481SSatish Balay    stash - the stash
2134c1ff481SSatish Balay    incr  - the minimum increase requested
2144c1ff481SSatish Balay 
21511a5261eSBarry Smith    Note:
2164c1ff481SSatish Balay    This routine doubles the currently used memory.
2174c1ff481SSatish Balay  */
218d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashExpand_Private(MatStash *stash, PetscInt incr)
219d71ae5a4SJacob Faibussowitsch {
2205bd3b8fbSHong Zhang   PetscInt newnmax, bs2 = stash->bs * stash->bs;
2219417f4adSLois Curfman McInnes 
2223a40ed3dSBarry Smith   PetscFunctionBegin;
2239417f4adSLois Curfman McInnes   /* allocate a larger stash */
224c481ceb5SSatish Balay   if (!stash->oldnmax && !stash->nmax) { /* new stash */
225434d7ff9SSatish Balay     if (stash->umax) newnmax = stash->umax / bs2;
226434d7ff9SSatish Balay     else newnmax = DEFAULT_STASH_SIZE / bs2;
227*a678f235SPierre Jolivet   } else if (!stash->nmax) { /* reusing stash */
228434d7ff9SSatish Balay     if (stash->umax > stash->oldnmax) newnmax = stash->umax / bs2;
229434d7ff9SSatish Balay     else newnmax = stash->oldnmax / bs2;
230434d7ff9SSatish Balay   } else newnmax = stash->nmax * 2;
2314c1ff481SSatish Balay   if (newnmax < (stash->nmax + incr)) newnmax += 2 * incr;
232d07ff455SSatish Balay 
23375cae7c1SHong Zhang   /* Get a MatStashSpace and attach it to stash */
2349566063dSJacob Faibussowitsch   PetscCall(PetscMatStashSpaceGet(bs2, newnmax, &stash->space));
235*a678f235SPierre Jolivet   if (!stash->space_head) { /* new stash or reusing stash->oldnmax */
236b087b6d6SSatish Balay     stash->space_head = stash->space;
23775cae7c1SHong Zhang   }
238b087b6d6SSatish Balay 
239bc5ccf88SSatish Balay   stash->reallocs++;
24075cae7c1SHong Zhang   stash->nmax = newnmax;
2413ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
242bc5ccf88SSatish Balay }
243bc5ccf88SSatish Balay /*
2448798bf22SSatish Balay   MatStashValuesRow_Private - inserts values into the stash. This function
2454c1ff481SSatish Balay   expects the values to be roworiented. Multiple columns belong to the same row
2464c1ff481SSatish Balay   can be inserted with a single call to this function.
2474c1ff481SSatish Balay 
2484c1ff481SSatish Balay   Input Parameters:
2494c1ff481SSatish Balay   stash  - the stash
2504c1ff481SSatish Balay   row    - the global row correspoiding to the values
2514c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
2524c1ff481SSatish Balay   idxn   - the global column indices corresponding to each of the values.
2534c1ff481SSatish Balay   values - the values inserted
254bc5ccf88SSatish Balay */
255d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashValuesRow_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscBool ignorezeroentries)
256d71ae5a4SJacob Faibussowitsch {
257b400d20cSBarry Smith   PetscInt           i, k, cnt = 0;
25875cae7c1SHong Zhang   PetscMatStashSpace space = stash->space;
259bc5ccf88SSatish Balay 
260bc5ccf88SSatish Balay   PetscFunctionBegin;
2614c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
26248a46eb9SPierre Jolivet   if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
26375cae7c1SHong Zhang   space = stash->space;
26475cae7c1SHong Zhang   k     = space->local_used;
2654c1ff481SSatish Balay   for (i = 0; i < n; i++) {
26614069ce9SStefano Zampini     if (ignorezeroentries && values && values[i] == 0.0) continue;
26775cae7c1SHong Zhang     space->idx[k] = row;
26875cae7c1SHong Zhang     space->idy[k] = idxn[i];
26914069ce9SStefano Zampini     space->val[k] = values ? values[i] : 0.0;
27075cae7c1SHong Zhang     k++;
271b400d20cSBarry Smith     cnt++;
2729417f4adSLois Curfman McInnes   }
273b400d20cSBarry Smith   stash->n += cnt;
274b400d20cSBarry Smith   space->local_used += cnt;
275b400d20cSBarry Smith   space->local_remaining -= cnt;
2763ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
277a2d1c673SSatish Balay }
27875cae7c1SHong Zhang 
2794c1ff481SSatish Balay /*
2808798bf22SSatish Balay   MatStashValuesCol_Private - inserts values into the stash. This function
2814c1ff481SSatish Balay   expects the values to be columnoriented. Multiple columns belong to the same row
2824c1ff481SSatish Balay   can be inserted with a single call to this function.
283a2d1c673SSatish Balay 
2844c1ff481SSatish Balay   Input Parameters:
2854c1ff481SSatish Balay   stash   - the stash
2864c1ff481SSatish Balay   row     - the global row correspoiding to the values
2874c1ff481SSatish Balay   n       - the number of elements inserted. All elements belong to the above row.
2884c1ff481SSatish Balay   idxn    - the global column indices corresponding to each of the values.
2894c1ff481SSatish Balay   values  - the values inserted
2904c1ff481SSatish Balay   stepval - the consecutive values are sepated by a distance of stepval.
2914c1ff481SSatish Balay             this happens because the input is columnoriented.
2924c1ff481SSatish Balay */
293d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashValuesCol_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt stepval, PetscBool ignorezeroentries)
294d71ae5a4SJacob Faibussowitsch {
29550e9ab7cSBarry Smith   PetscInt           i, k, cnt = 0;
29675cae7c1SHong Zhang   PetscMatStashSpace space = stash->space;
297a2d1c673SSatish Balay 
2984c1ff481SSatish Balay   PetscFunctionBegin;
2994c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
30048a46eb9SPierre Jolivet   if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
30175cae7c1SHong Zhang   space = stash->space;
30275cae7c1SHong Zhang   k     = space->local_used;
3034c1ff481SSatish Balay   for (i = 0; i < n; i++) {
30414069ce9SStefano Zampini     if (ignorezeroentries && values && values[i * stepval] == 0.0) continue;
30575cae7c1SHong Zhang     space->idx[k] = row;
30675cae7c1SHong Zhang     space->idy[k] = idxn[i];
30714069ce9SStefano Zampini     space->val[k] = values ? values[i * stepval] : 0.0;
30875cae7c1SHong Zhang     k++;
309b400d20cSBarry Smith     cnt++;
3104c1ff481SSatish Balay   }
311b400d20cSBarry Smith   stash->n += cnt;
312b400d20cSBarry Smith   space->local_used += cnt;
313b400d20cSBarry Smith   space->local_remaining -= cnt;
3143ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3154c1ff481SSatish Balay }
3164c1ff481SSatish Balay 
3174c1ff481SSatish Balay /*
3188798bf22SSatish Balay   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
3194c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3204c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3214c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3224c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3234c1ff481SSatish Balay 
3244c1ff481SSatish Balay   Input Parameters:
3254c1ff481SSatish Balay   stash  - the stash
3264c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3274c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3284c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3294c1ff481SSatish Balay            values. Each block is of size bs*bs.
3304c1ff481SSatish Balay   values - the values inserted
3314c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
332a5b23f4aSJose E. Roman   cmax   - the number of block-columns on the original block.
3334c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3344c1ff481SSatish Balay */
335d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt rmax, PetscInt cmax, PetscInt idx)
336d71ae5a4SJacob Faibussowitsch {
33775cae7c1SHong Zhang   PetscInt           i, j, k, bs2, bs = stash->bs, l;
33854f21887SBarry Smith   const PetscScalar *vals;
33954f21887SBarry Smith   PetscScalar       *array;
34075cae7c1SHong Zhang   PetscMatStashSpace space = stash->space;
341a2d1c673SSatish Balay 
342a2d1c673SSatish Balay   PetscFunctionBegin;
34348a46eb9SPierre Jolivet   if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
34475cae7c1SHong Zhang   space = stash->space;
34575cae7c1SHong Zhang   l     = space->local_used;
34675cae7c1SHong Zhang   bs2   = bs * bs;
3474c1ff481SSatish Balay   for (i = 0; i < n; i++) {
34875cae7c1SHong Zhang     space->idx[l] = row;
34975cae7c1SHong Zhang     space->idy[l] = idxn[i];
35075cae7c1SHong Zhang     /* Now copy over the block of values. Store the values column oriented.
35175cae7c1SHong Zhang        This enables inserting multiple blocks belonging to a row with a single
352da81f932SPierre Jolivet        function call */
35375cae7c1SHong Zhang     array = space->val + bs2 * l;
35475cae7c1SHong Zhang     vals  = values + idx * bs2 * n + bs * i;
35575cae7c1SHong Zhang     for (j = 0; j < bs; j++) {
35614069ce9SStefano Zampini       for (k = 0; k < bs; k++) array[k * bs] = values ? vals[k] : 0.0;
35775cae7c1SHong Zhang       array++;
35875cae7c1SHong Zhang       vals += cmax * bs;
35975cae7c1SHong Zhang     }
36075cae7c1SHong Zhang     l++;
361a2d1c673SSatish Balay   }
3625bd3b8fbSHong Zhang   stash->n += n;
36375cae7c1SHong Zhang   space->local_used += n;
36475cae7c1SHong Zhang   space->local_remaining -= n;
3653ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3664c1ff481SSatish Balay }
3674c1ff481SSatish Balay 
3684c1ff481SSatish Balay /*
3698798bf22SSatish Balay   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
3704c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3714c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3724c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3734c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3744c1ff481SSatish Balay 
3754c1ff481SSatish Balay   Input Parameters:
3764c1ff481SSatish Balay   stash  - the stash
3774c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3784c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3794c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3804c1ff481SSatish Balay            values. Each block is of size bs*bs.
3814c1ff481SSatish Balay   values - the values inserted
3824c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
383a5b23f4aSJose E. Roman   cmax   - the number of block-columns on the original block.
3844c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3854c1ff481SSatish Balay */
386d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt rmax, PetscInt cmax, PetscInt idx)
387d71ae5a4SJacob Faibussowitsch {
38875cae7c1SHong Zhang   PetscInt           i, j, k, bs2, bs = stash->bs, l;
38954f21887SBarry Smith   const PetscScalar *vals;
39054f21887SBarry Smith   PetscScalar       *array;
39175cae7c1SHong Zhang   PetscMatStashSpace space = stash->space;
3924c1ff481SSatish Balay 
3934c1ff481SSatish Balay   PetscFunctionBegin;
39448a46eb9SPierre Jolivet   if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
39575cae7c1SHong Zhang   space = stash->space;
39675cae7c1SHong Zhang   l     = space->local_used;
39775cae7c1SHong Zhang   bs2   = bs * bs;
3984c1ff481SSatish Balay   for (i = 0; i < n; i++) {
39975cae7c1SHong Zhang     space->idx[l] = row;
40075cae7c1SHong Zhang     space->idy[l] = idxn[i];
40175cae7c1SHong Zhang     /* Now copy over the block of values. Store the values column oriented.
40275cae7c1SHong Zhang      This enables inserting multiple blocks belonging to a row with a single
403da81f932SPierre Jolivet      function call */
40475cae7c1SHong Zhang     array = space->val + bs2 * l;
40575cae7c1SHong Zhang     vals  = values + idx * bs2 * n + bs * i;
40675cae7c1SHong Zhang     for (j = 0; j < bs; j++) {
40714069ce9SStefano Zampini       for (k = 0; k < bs; k++) array[k] = values ? vals[k] : 0.0;
40875cae7c1SHong Zhang       array += bs;
40975cae7c1SHong Zhang       vals += rmax * bs;
41075cae7c1SHong Zhang     }
4115bd3b8fbSHong Zhang     l++;
412a2d1c673SSatish Balay   }
4135bd3b8fbSHong Zhang   stash->n += n;
41475cae7c1SHong Zhang   space->local_used += n;
41575cae7c1SHong Zhang   space->local_remaining -= n;
4163ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4179417f4adSLois Curfman McInnes }
4184c1ff481SSatish Balay /*
4198798bf22SSatish Balay   MatStashScatterBegin_Private - Initiates the transfer of values to the
4204c1ff481SSatish Balay   correct owners. This function goes through the stash, and check the
4214c1ff481SSatish Balay   owners of each stashed value, and sends the values off to the owner
4224c1ff481SSatish Balay   processors.
423bc5ccf88SSatish Balay 
4244c1ff481SSatish Balay   Input Parameters:
4254c1ff481SSatish Balay   stash  - the stash
4264c1ff481SSatish Balay   owners - an array of size 'no-of-procs' which gives the ownership range
4274c1ff481SSatish Balay            for each node.
4284c1ff481SSatish Balay 
42911a5261eSBarry Smith   Note:
43095452b02SPatrick Sanan     The 'owners' array in the cased of the blocked-stash has the
4314c1ff481SSatish Balay   ranges specified blocked global indices, and for the regular stash in
4324c1ff481SSatish Balay   the proper global indices.
4334c1ff481SSatish Balay */
434d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashScatterBegin_Private(Mat mat, MatStash *stash, PetscInt *owners)
435d71ae5a4SJacob Faibussowitsch {
436ac2b2aa0SJed Brown   PetscFunctionBegin;
4379566063dSJacob Faibussowitsch   PetscCall((*stash->ScatterBegin)(mat, stash, owners));
4383ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
439ac2b2aa0SJed Brown }
440ac2b2aa0SJed Brown 
441d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashScatterBegin_Ref(Mat mat, MatStash *stash, PetscInt *owners)
442d71ae5a4SJacob Faibussowitsch {
443c1ac3661SBarry Smith   PetscInt          *owner, *startv, *starti, tag1 = stash->tag1, tag2 = stash->tag2, bs2;
444fe09c992SBarry Smith   PetscInt           size = stash->size, nsends;
44575cae7c1SHong Zhang   PetscInt           count, *sindices, **rindices, i, j, idx, lastidx, l;
44654f21887SBarry Smith   PetscScalar      **rvalues, *svalues;
447bc5ccf88SSatish Balay   MPI_Comm           comm = stash->comm;
448563fb871SSatish Balay   MPI_Request       *send_waits, *recv_waits, *recv_waits1, *recv_waits2;
44976ec1555SBarry Smith   PetscMPIInt       *sizes, *nlengths, nreceives;
4505bd3b8fbSHong Zhang   PetscInt          *sp_idx, *sp_idy;
45154f21887SBarry Smith   PetscScalar       *sp_val;
4525bd3b8fbSHong Zhang   PetscMatStashSpace space, space_next;
453bc5ccf88SSatish Balay 
454bc5ccf88SSatish Balay   PetscFunctionBegin;
4554b4eb8d3SJed Brown   { /* make sure all processors are either in INSERTMODE or ADDMODE */
4564b4eb8d3SJed Brown     InsertMode addv;
4571c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
45808401ef6SPierre Jolivet     PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
4594b4eb8d3SJed Brown     mat->insertmode = addv; /* in case this processor had no cache */
4604b4eb8d3SJed Brown   }
4614b4eb8d3SJed Brown 
4624c1ff481SSatish Balay   bs2 = stash->bs * stash->bs;
46375cae7c1SHong Zhang 
464bc5ccf88SSatish Balay   /*  first count number of contributors to each processor */
4659566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(size, &nlengths));
4669566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(stash->n + 1, &owner));
467a2d1c673SSatish Balay 
46875cae7c1SHong Zhang   i = j   = 0;
4697357eb19SBarry Smith   lastidx = -1;
4705bd3b8fbSHong Zhang   space   = stash->space_head;
4716c4ed002SBarry Smith   while (space) {
47275cae7c1SHong Zhang     space_next = space->next;
4735bd3b8fbSHong Zhang     sp_idx     = space->idx;
47475cae7c1SHong Zhang     for (l = 0; l < space->local_used; l++) {
4757357eb19SBarry Smith       /* if indices are NOT locally sorted, need to start search at the beginning */
4765bd3b8fbSHong Zhang       if (lastidx > (idx = sp_idx[l])) j = 0;
4777357eb19SBarry Smith       lastidx = idx;
4787357eb19SBarry Smith       for (; j < size; j++) {
4794c1ff481SSatish Balay         if (idx >= owners[j] && idx < owners[j + 1]) {
4809371c9d4SSatish Balay           nlengths[j]++;
4819371c9d4SSatish Balay           owner[i] = j;
4829371c9d4SSatish Balay           break;
483bc5ccf88SSatish Balay         }
484bc5ccf88SSatish Balay       }
48575cae7c1SHong Zhang       i++;
48675cae7c1SHong Zhang     }
48775cae7c1SHong Zhang     space = space_next;
488bc5ccf88SSatish Balay   }
489071fcb05SBarry Smith 
490563fb871SSatish Balay   /* Now check what procs get messages - and compute nsends. */
4919566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(size, &sizes));
492563fb871SSatish Balay   for (i = 0, nsends = 0; i < size; i++) {
4938865f1eaSKarl Rupp     if (nlengths[i]) {
4949371c9d4SSatish Balay       sizes[i] = 1;
4959371c9d4SSatish Balay       nsends++;
4968865f1eaSKarl Rupp     }
497563fb871SSatish Balay   }
498bc5ccf88SSatish Balay 
4999371c9d4SSatish Balay   {
5009371c9d4SSatish Balay     PetscMPIInt *onodes, *olengths;
501563fb871SSatish Balay     /* Determine the number of messages to expect, their lengths, from from-ids */
5029566063dSJacob Faibussowitsch     PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives));
5039566063dSJacob Faibussowitsch     PetscCall(PetscGatherMessageLengths(comm, nsends, nreceives, nlengths, &onodes, &olengths));
504563fb871SSatish Balay     /* since clubbing row,col - lengths are multiplied by 2 */
505563fb871SSatish Balay     for (i = 0; i < nreceives; i++) olengths[i] *= 2;
5069566063dSJacob Faibussowitsch     PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1));
507563fb871SSatish Balay     /* values are size 'bs2' lengths (and remove earlier factor 2 */
508563fb871SSatish Balay     for (i = 0; i < nreceives; i++) olengths[i] = olengths[i] * bs2 / 2;
5099566063dSJacob Faibussowitsch     PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2));
5109566063dSJacob Faibussowitsch     PetscCall(PetscFree(onodes));
5119371c9d4SSatish Balay     PetscCall(PetscFree(olengths));
5129371c9d4SSatish Balay   }
513bc5ccf88SSatish Balay 
514bc5ccf88SSatish Balay   /* do sends:
515bc5ccf88SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
516bc5ccf88SSatish Balay          the ith processor
517bc5ccf88SSatish Balay   */
5189566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices));
5199566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(2 * nsends, &send_waits));
5209566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(size, &startv, size, &starti));
521a2d1c673SSatish Balay   /* use 2 sends the first with all_a, the next with all_i and all_j */
5229371c9d4SSatish Balay   startv[0] = 0;
5239371c9d4SSatish Balay   starti[0] = 0;
524bc5ccf88SSatish Balay   for (i = 1; i < size; i++) {
525563fb871SSatish Balay     startv[i] = startv[i - 1] + nlengths[i - 1];
526533163c2SBarry Smith     starti[i] = starti[i - 1] + 2 * nlengths[i - 1];
527bc5ccf88SSatish Balay   }
52875cae7c1SHong Zhang 
52975cae7c1SHong Zhang   i     = 0;
5305bd3b8fbSHong Zhang   space = stash->space_head;
5316c4ed002SBarry Smith   while (space) {
53275cae7c1SHong Zhang     space_next = space->next;
5335bd3b8fbSHong Zhang     sp_idx     = space->idx;
5345bd3b8fbSHong Zhang     sp_idy     = space->idy;
5355bd3b8fbSHong Zhang     sp_val     = space->val;
53675cae7c1SHong Zhang     for (l = 0; l < space->local_used; l++) {
537bc5ccf88SSatish Balay       j = owner[i];
538a2d1c673SSatish Balay       if (bs2 == 1) {
5395bd3b8fbSHong Zhang         svalues[startv[j]] = sp_val[l];
540a2d1c673SSatish Balay       } else {
541c1ac3661SBarry Smith         PetscInt     k;
54254f21887SBarry Smith         PetscScalar *buf1, *buf2;
5434c1ff481SSatish Balay         buf1 = svalues + bs2 * startv[j];
544b087b6d6SSatish Balay         buf2 = space->val + bs2 * l;
5458865f1eaSKarl Rupp         for (k = 0; k < bs2; k++) buf1[k] = buf2[k];
546a2d1c673SSatish Balay       }
5475bd3b8fbSHong Zhang       sindices[starti[j]]               = sp_idx[l];
5485bd3b8fbSHong Zhang       sindices[starti[j] + nlengths[j]] = sp_idy[l];
549bc5ccf88SSatish Balay       startv[j]++;
550bc5ccf88SSatish Balay       starti[j]++;
55175cae7c1SHong Zhang       i++;
55275cae7c1SHong Zhang     }
55375cae7c1SHong Zhang     space = space_next;
554bc5ccf88SSatish Balay   }
555bc5ccf88SSatish Balay   startv[0] = 0;
5568865f1eaSKarl Rupp   for (i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1];
557e5d0e772SSatish Balay 
558bc5ccf88SSatish Balay   for (i = 0, count = 0; i < size; i++) {
55976ec1555SBarry Smith     if (sizes[i]) {
5609566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++));
5619566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++));
562bc5ccf88SSatish Balay     }
563b85c94c3SSatish Balay   }
5646cf91177SBarry Smith #if defined(PETSC_USE_INFO)
5659566063dSJacob Faibussowitsch   PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT " \n", nsends));
566e5d0e772SSatish Balay   for (i = 0; i < size; i++) {
56748a46eb9SPierre Jolivet     if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt)))));
568e5d0e772SSatish Balay   }
569e5d0e772SSatish Balay #endif
5709566063dSJacob Faibussowitsch   PetscCall(PetscFree(nlengths));
5719566063dSJacob Faibussowitsch   PetscCall(PetscFree(owner));
5729566063dSJacob Faibussowitsch   PetscCall(PetscFree2(startv, starti));
5739566063dSJacob Faibussowitsch   PetscCall(PetscFree(sizes));
574a2d1c673SSatish Balay 
575563fb871SSatish Balay   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
5769566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(2 * nreceives, &recv_waits));
577563fb871SSatish Balay 
578563fb871SSatish Balay   for (i = 0; i < nreceives; i++) {
579563fb871SSatish Balay     recv_waits[2 * i]     = recv_waits1[i];
580563fb871SSatish Balay     recv_waits[2 * i + 1] = recv_waits2[i];
581563fb871SSatish Balay   }
582563fb871SSatish Balay   stash->recv_waits = recv_waits;
5838865f1eaSKarl Rupp 
5849566063dSJacob Faibussowitsch   PetscCall(PetscFree(recv_waits1));
5859566063dSJacob Faibussowitsch   PetscCall(PetscFree(recv_waits2));
586563fb871SSatish Balay 
587c05d87d6SBarry Smith   stash->svalues         = svalues;
588c05d87d6SBarry Smith   stash->sindices        = sindices;
589c05d87d6SBarry Smith   stash->rvalues         = rvalues;
590c05d87d6SBarry Smith   stash->rindices        = rindices;
591c05d87d6SBarry Smith   stash->send_waits      = send_waits;
592c05d87d6SBarry Smith   stash->nsends          = nsends;
593c05d87d6SBarry Smith   stash->nrecvs          = nreceives;
59467318a8aSJed Brown   stash->reproduce_count = 0;
5953ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
596bc5ccf88SSatish Balay }
597bc5ccf88SSatish Balay 
598a2d1c673SSatish Balay /*
5998798bf22SSatish Balay    MatStashScatterGetMesg_Private - This function waits on the receives posted
6008798bf22SSatish Balay    in the function MatStashScatterBegin_Private() and returns one message at
6014c1ff481SSatish Balay    a time to the calling function. If no messages are left, it indicates this
6024c1ff481SSatish Balay    by setting flg = 0, else it sets flg = 1.
6034c1ff481SSatish Balay 
6042fe279fdSBarry Smith    Input Parameter:
6054c1ff481SSatish Balay    stash - the stash
6064c1ff481SSatish Balay 
6074c1ff481SSatish Balay    Output Parameters:
6084c1ff481SSatish Balay    nvals - the number of entries in the current message.
6094c1ff481SSatish Balay    rows  - an array of row indices (or blocked indices) corresponding to the values
6104c1ff481SSatish Balay    cols  - an array of columnindices (or blocked indices) corresponding to the values
6114c1ff481SSatish Balay    vals  - the values
6124c1ff481SSatish Balay    flg   - 0 indicates no more message left, and the current call has no values associated.
6134c1ff481SSatish Balay            1 indicates that the current call successfully received a message, and the
6144c1ff481SSatish Balay              other output parameters nvals,rows,cols,vals are set appropriately.
615a2d1c673SSatish Balay */
616d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash, PetscMPIInt *nvals, PetscInt **rows, PetscInt **cols, PetscScalar **vals, PetscInt *flg)
617d71ae5a4SJacob Faibussowitsch {
618ac2b2aa0SJed Brown   PetscFunctionBegin;
6199566063dSJacob Faibussowitsch   PetscCall((*stash->ScatterGetMesg)(stash, nvals, rows, cols, vals, flg));
6203ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
621ac2b2aa0SJed Brown }
622ac2b2aa0SJed Brown 
623d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash, PetscMPIInt *nvals, PetscInt **rows, PetscInt **cols, PetscScalar **vals, PetscInt *flg)
624d71ae5a4SJacob Faibussowitsch {
625533163c2SBarry Smith   PetscMPIInt i, *flg_v = stash->flg_v, i1, i2;
626fe09c992SBarry Smith   PetscInt    bs2;
627a2d1c673SSatish Balay   MPI_Status  recv_status;
628ace3abfcSBarry Smith   PetscBool   match_found = PETSC_FALSE;
629bc5ccf88SSatish Balay 
630bc5ccf88SSatish Balay   PetscFunctionBegin;
631a2d1c673SSatish Balay   *flg = 0; /* When a message is discovered this is reset to 1 */
632a2d1c673SSatish Balay   /* Return if no more messages to process */
6333ba16761SJacob Faibussowitsch   if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(PETSC_SUCCESS);
634a2d1c673SSatish Balay 
6354c1ff481SSatish Balay   bs2 = stash->bs * stash->bs;
63667318a8aSJed Brown   /* If a matching pair of receives are found, process them, and return the data to
637a2d1c673SSatish Balay      the calling function. Until then keep receiving messages */
638a2d1c673SSatish Balay   while (!match_found) {
63967318a8aSJed Brown     if (stash->reproduce) {
64067318a8aSJed Brown       i = stash->reproduce_count++;
6419566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Wait(stash->recv_waits + i, &recv_status));
64267318a8aSJed Brown     } else {
6439566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Waitany(2 * stash->nrecvs, stash->recv_waits, &i, &recv_status));
64467318a8aSJed Brown     }
64508401ef6SPierre Jolivet     PetscCheck(recv_status.MPI_SOURCE >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Negative MPI source!");
646533163c2SBarry Smith 
64767318a8aSJed Brown     /* Now pack the received message into a structure which is usable by others */
648a2d1c673SSatish Balay     if (i % 2) {
6499566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Get_count(&recv_status, MPIU_SCALAR, nvals));
650c1dc657dSBarry Smith       flg_v[2 * recv_status.MPI_SOURCE] = i / 2;
651a2d1c673SSatish Balay       *nvals                            = *nvals / bs2;
652563fb871SSatish Balay     } else {
6539566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Get_count(&recv_status, MPIU_INT, nvals));
654563fb871SSatish Balay       flg_v[2 * recv_status.MPI_SOURCE + 1] = i / 2;
655563fb871SSatish Balay       *nvals                                = *nvals / 2; /* This message has both row indices and col indices */
656bc5ccf88SSatish Balay     }
657a2d1c673SSatish Balay 
658cb2b73ccSBarry Smith     /* Check if we have both messages from this proc */
659c1dc657dSBarry Smith     i1 = flg_v[2 * recv_status.MPI_SOURCE];
660c1dc657dSBarry Smith     i2 = flg_v[2 * recv_status.MPI_SOURCE + 1];
661a2d1c673SSatish Balay     if (i1 != -1 && i2 != -1) {
662563fb871SSatish Balay       *rows = stash->rindices[i2];
663a2d1c673SSatish Balay       *cols = *rows + *nvals;
664563fb871SSatish Balay       *vals = stash->rvalues[i1];
665a2d1c673SSatish Balay       *flg  = 1;
666a2d1c673SSatish Balay       stash->nprocessed++;
66735d8aa7fSBarry Smith       match_found = PETSC_TRUE;
668bc5ccf88SSatish Balay     }
669bc5ccf88SSatish Balay   }
6703ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
671bc5ccf88SSatish Balay }
672d7d60843SJed Brown 
673e69ee1f7SSatish Balay #if !defined(PETSC_HAVE_MPIUNI)
674d7d60843SJed Brown typedef struct {
675d7d60843SJed Brown   PetscInt    row;
676d7d60843SJed Brown   PetscInt    col;
677d7d60843SJed Brown   PetscScalar vals[1]; /* Actually an array of length bs2 */
678d7d60843SJed Brown } MatStashBlock;
679d7d60843SJed Brown 
680d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashSortCompress_Private(MatStash *stash, InsertMode insertmode)
681d71ae5a4SJacob Faibussowitsch {
682d7d60843SJed Brown   PetscMatStashSpace space;
683d7d60843SJed Brown   PetscInt           n = stash->n, bs = stash->bs, bs2 = bs * bs, cnt, *row, *col, *perm, rowstart, i;
684d7d60843SJed Brown   PetscScalar      **valptr;
685d7d60843SJed Brown 
686d7d60843SJed Brown   PetscFunctionBegin;
6879566063dSJacob Faibussowitsch   PetscCall(PetscMalloc4(n, &row, n, &col, n, &valptr, n, &perm));
688d7d60843SJed Brown   for (space = stash->space_head, cnt = 0; space; space = space->next) {
689d7d60843SJed Brown     for (i = 0; i < space->local_used; i++) {
690d7d60843SJed Brown       row[cnt]    = space->idx[i];
691d7d60843SJed Brown       col[cnt]    = space->idy[i];
692d7d60843SJed Brown       valptr[cnt] = &space->val[i * bs2];
693d7d60843SJed Brown       perm[cnt]   = cnt; /* Will tell us where to find valptr after sorting row[] and col[] */
694d7d60843SJed Brown       cnt++;
695d7d60843SJed Brown     }
696d7d60843SJed Brown   }
69708401ef6SPierre Jolivet   PetscCheck(cnt == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatStash n %" PetscInt_FMT ", but counted %" PetscInt_FMT " entries", n, cnt);
6989566063dSJacob Faibussowitsch   PetscCall(PetscSortIntWithArrayPair(n, row, col, perm));
699d7d60843SJed Brown   /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */
700d7d60843SJed Brown   for (rowstart = 0, cnt = 0, i = 1; i <= n; i++) {
701d7d60843SJed Brown     if (i == n || row[i] != row[rowstart]) { /* Sort the last row. */
702d7d60843SJed Brown       PetscInt colstart;
7039566063dSJacob Faibussowitsch       PetscCall(PetscSortIntWithArray(i - rowstart, &col[rowstart], &perm[rowstart]));
704d7d60843SJed Brown       for (colstart = rowstart; colstart < i;) { /* Compress multiple insertions to the same location */
705d7d60843SJed Brown         PetscInt       j, l;
706d7d60843SJed Brown         MatStashBlock *block;
7079566063dSJacob Faibussowitsch         PetscCall(PetscSegBufferGet(stash->segsendblocks, 1, &block));
708d7d60843SJed Brown         block->row = row[rowstart];
709d7d60843SJed Brown         block->col = col[colstart];
7109566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(block->vals, valptr[perm[colstart]], bs2));
711d7d60843SJed Brown         for (j = colstart + 1; j < i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */
712d7d60843SJed Brown           if (insertmode == ADD_VALUES) {
713d7d60843SJed Brown             for (l = 0; l < bs2; l++) block->vals[l] += valptr[perm[j]][l];
714d7d60843SJed Brown           } else {
7159566063dSJacob Faibussowitsch             PetscCall(PetscArraycpy(block->vals, valptr[perm[j]], bs2));
716d7d60843SJed Brown           }
717d7d60843SJed Brown         }
718d7d60843SJed Brown         colstart = j;
719d7d60843SJed Brown       }
720d7d60843SJed Brown       rowstart = i;
721d7d60843SJed Brown     }
722d7d60843SJed Brown   }
7239566063dSJacob Faibussowitsch   PetscCall(PetscFree4(row, col, valptr, perm));
7243ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
725d7d60843SJed Brown }
726d7d60843SJed Brown 
727d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash)
728d71ae5a4SJacob Faibussowitsch {
729d7d60843SJed Brown   PetscFunctionBegin;
730d7d60843SJed Brown   if (stash->blocktype == MPI_DATATYPE_NULL) {
731d7d60843SJed Brown     PetscInt     bs2 = PetscSqr(stash->bs);
732d7d60843SJed Brown     PetscMPIInt  blocklens[2];
733d7d60843SJed Brown     MPI_Aint     displs[2];
734d7d60843SJed Brown     MPI_Datatype types[2], stype;
735223b4c07SBarry Smith     /*
736223b4c07SBarry Smith         DummyBlock is a type having standard layout, even when PetscScalar is C++ std::complex.
737f41aa5efSJunchao Zhang        std::complex itself has standard layout, so does DummyBlock, recursively.
738a5b23f4aSJose E. Roman        To be compatible with C++ std::complex, complex implementations on GPUs must also have standard layout,
739f41aa5efSJunchao Zhang        though they can have different alignment, e.g, 16 bytes for double complex, instead of 8 bytes as in GCC stdlibc++.
740f41aa5efSJunchao Zhang        offsetof(type, member) only requires type to have standard layout. Ref. https://en.cppreference.com/w/cpp/types/offsetof.
741f41aa5efSJunchao Zhang 
742f41aa5efSJunchao Zhang        We can test if std::complex has standard layout with the following code:
743f41aa5efSJunchao Zhang        #include <iostream>
744f41aa5efSJunchao Zhang        #include <type_traits>
745f41aa5efSJunchao Zhang        #include <complex>
746f41aa5efSJunchao Zhang        int main() {
747f41aa5efSJunchao Zhang          std::cout << std::boolalpha;
748f41aa5efSJunchao Zhang          std::cout << std::is_standard_layout<std::complex<double> >::value << '\n';
749f41aa5efSJunchao Zhang        }
750f41aa5efSJunchao Zhang        Output: true
7519503c6c6SJed Brown      */
7529371c9d4SSatish Balay     struct DummyBlock {
7539371c9d4SSatish Balay       PetscInt    row, col;
7549371c9d4SSatish Balay       PetscScalar vals;
7559371c9d4SSatish Balay     };
756d7d60843SJed Brown 
7579503c6c6SJed Brown     stash->blocktype_size = offsetof(struct DummyBlock, vals) + bs2 * sizeof(PetscScalar);
758d7d60843SJed Brown     if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */
759d7d60843SJed Brown       stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt);
760d7d60843SJed Brown     }
7619566063dSJacob Faibussowitsch     PetscCall(PetscSegBufferCreate(stash->blocktype_size, 1, &stash->segsendblocks));
7629566063dSJacob Faibussowitsch     PetscCall(PetscSegBufferCreate(stash->blocktype_size, 1, &stash->segrecvblocks));
7639566063dSJacob Faibussowitsch     PetscCall(PetscSegBufferCreate(sizeof(MatStashFrame), 1, &stash->segrecvframe));
764d7d60843SJed Brown     blocklens[0] = 2;
765d7d60843SJed Brown     blocklens[1] = bs2;
7669503c6c6SJed Brown     displs[0]    = offsetof(struct DummyBlock, row);
7679503c6c6SJed Brown     displs[1]    = offsetof(struct DummyBlock, vals);
768d7d60843SJed Brown     types[0]     = MPIU_INT;
769d7d60843SJed Brown     types[1]     = MPIU_SCALAR;
7709566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Type_create_struct(2, blocklens, displs, types, &stype));
7719566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Type_commit(&stype));
7729566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Type_create_resized(stype, 0, stash->blocktype_size, &stash->blocktype));
7739566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Type_commit(&stash->blocktype));
7749566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Type_free(&stype));
775d7d60843SJed Brown   }
7763ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
777d7d60843SJed Brown }
778d7d60843SJed Brown 
779da81f932SPierre Jolivet /* Callback invoked after target rank has initiated receive of rendezvous message.
780d7d60843SJed Brown  * Here we post the main sends.
781d7d60843SJed Brown  */
782d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm, const PetscMPIInt tag[], PetscMPIInt rankid, PetscMPIInt rank, void *sdata, MPI_Request req[], void *ctx)
783d71ae5a4SJacob Faibussowitsch {
784d7d60843SJed Brown   MatStash       *stash = (MatStash *)ctx;
785d7d60843SJed Brown   MatStashHeader *hdr   = (MatStashHeader *)sdata;
786d7d60843SJed Brown 
787d7d60843SJed Brown   PetscFunctionBegin;
78808401ef6SPierre Jolivet   PetscCheck(rank == stash->sendranks[rankid], comm, PETSC_ERR_PLIB, "BTS Send rank %d does not match sendranks[%d] %d", rank, rankid, stash->sendranks[rankid]);
7899566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Isend(stash->sendframes[rankid].buffer, hdr->count, stash->blocktype, rank, tag[0], comm, &req[0]));
790d7d60843SJed Brown   stash->sendframes[rankid].count   = hdr->count;
791d7d60843SJed Brown   stash->sendframes[rankid].pending = 1;
7923ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
793d7d60843SJed Brown }
794d7d60843SJed Brown 
795223b4c07SBarry Smith /*
796223b4c07SBarry Smith     Callback invoked by target after receiving rendezvous message.
797223b4c07SBarry Smith     Here we post the main recvs.
798d7d60843SJed Brown  */
799d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm, const PetscMPIInt tag[], PetscMPIInt rank, void *rdata, MPI_Request req[], void *ctx)
800d71ae5a4SJacob Faibussowitsch {
801d7d60843SJed Brown   MatStash       *stash = (MatStash *)ctx;
802d7d60843SJed Brown   MatStashHeader *hdr   = (MatStashHeader *)rdata;
803d7d60843SJed Brown   MatStashFrame  *frame;
804d7d60843SJed Brown 
805d7d60843SJed Brown   PetscFunctionBegin;
8069566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferGet(stash->segrecvframe, 1, &frame));
8079566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferGet(stash->segrecvblocks, hdr->count, &frame->buffer));
8089566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Irecv(frame->buffer, hdr->count, stash->blocktype, rank, tag[0], comm, &req[0]));
809d7d60843SJed Brown   frame->count   = hdr->count;
810d7d60843SJed Brown   frame->pending = 1;
8113ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
812d7d60843SJed Brown }
813d7d60843SJed Brown 
814d7d60843SJed Brown /*
815d7d60843SJed Brown  * owners[] contains the ownership ranges; may be indexed by either blocks or scalars
816d7d60843SJed Brown  */
817d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashScatterBegin_BTS(Mat mat, MatStash *stash, PetscInt owners[])
818d71ae5a4SJacob Faibussowitsch {
819d7d60843SJed Brown   size_t nblocks;
820d7d60843SJed Brown   char  *sendblocks;
821d7d60843SJed Brown 
822d7d60843SJed Brown   PetscFunctionBegin;
82376bd3646SJed Brown   if (PetscDefined(USE_DEBUG)) { /* make sure all processors are either in INSERTMODE or ADDMODE */
8244b4eb8d3SJed Brown     InsertMode addv;
8251c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
82608401ef6SPierre Jolivet     PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
8274b4eb8d3SJed Brown   }
8284b4eb8d3SJed Brown 
8299566063dSJacob Faibussowitsch   PetscCall(MatStashBlockTypeSetUp(stash));
8309566063dSJacob Faibussowitsch   PetscCall(MatStashSortCompress_Private(stash, mat->insertmode));
8319566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferGetSize(stash->segsendblocks, &nblocks));
8329566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferExtractInPlace(stash->segsendblocks, &sendblocks));
83360f34548SJunchao Zhang   if (stash->first_assembly_done) { /* Set up sendhdrs and sendframes for each rank that we sent before */
83423b7d1baSJed Brown     PetscInt i;
83523b7d1baSJed Brown     size_t   b;
83697da8949SJed Brown     for (i = 0, b = 0; i < stash->nsendranks; i++) {
83797da8949SJed Brown       stash->sendframes[i].buffer = &sendblocks[b * stash->blocktype_size];
83897da8949SJed Brown       /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */
83997da8949SJed Brown       stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */
84097da8949SJed Brown       for (; b < nblocks; b++) {
84197da8949SJed Brown         MatStashBlock *sendblock_b = (MatStashBlock *)&sendblocks[b * stash->blocktype_size];
842aed4548fSBarry Smith         PetscCheck(sendblock_b->row >= owners[stash->sendranks[i]], stash->comm, PETSC_ERR_ARG_WRONG, "MAT_SUBSET_OFF_PROC_ENTRIES set, but row %" PetscInt_FMT " owned by %d not communicated in initial assembly", sendblock_b->row, stash->sendranks[i]);
84397da8949SJed Brown         if (sendblock_b->row >= owners[stash->sendranks[i] + 1]) break;
84497da8949SJed Brown         stash->sendhdr[i].count++;
84597da8949SJed Brown       }
84697da8949SJed Brown     }
84797da8949SJed Brown   } else { /* Dynamically count and pack (first time) */
84823b7d1baSJed Brown     PetscInt sendno;
84923b7d1baSJed Brown     size_t   i, rowstart;
850d7d60843SJed Brown 
851d7d60843SJed Brown     /* Count number of send ranks and allocate for sends */
852d7d60843SJed Brown     stash->nsendranks = 0;
853d7d60843SJed Brown     for (rowstart = 0; rowstart < nblocks;) {
8547e2ea869SJed Brown       PetscInt       owner;
855d7d60843SJed Brown       MatStashBlock *sendblock_rowstart = (MatStashBlock *)&sendblocks[rowstart * stash->blocktype_size];
8569566063dSJacob Faibussowitsch       PetscCall(PetscFindInt(sendblock_rowstart->row, stash->size + 1, owners, &owner));
857d7d60843SJed Brown       if (owner < 0) owner = -(owner + 2);
858d7d60843SJed Brown       for (i = rowstart + 1; i < nblocks; i++) { /* Move forward through a run of blocks with the same owner */
859d7d60843SJed Brown         MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size];
8607e2ea869SJed Brown         if (sendblock_i->row >= owners[owner + 1]) break;
861d7d60843SJed Brown       }
862d7d60843SJed Brown       stash->nsendranks++;
863d7d60843SJed Brown       rowstart = i;
864d7d60843SJed Brown     }
8659566063dSJacob Faibussowitsch     PetscCall(PetscMalloc3(stash->nsendranks, &stash->sendranks, stash->nsendranks, &stash->sendhdr, stash->nsendranks, &stash->sendframes));
866d7d60843SJed Brown 
867d7d60843SJed Brown     /* Set up sendhdrs and sendframes */
868d7d60843SJed Brown     sendno = 0;
869d7d60843SJed Brown     for (rowstart = 0; rowstart < nblocks;) {
870d7d60843SJed Brown       PetscInt       owner;
871d7d60843SJed Brown       MatStashBlock *sendblock_rowstart = (MatStashBlock *)&sendblocks[rowstart * stash->blocktype_size];
8729566063dSJacob Faibussowitsch       PetscCall(PetscFindInt(sendblock_rowstart->row, stash->size + 1, owners, &owner));
873d7d60843SJed Brown       if (owner < 0) owner = -(owner + 2);
874d7d60843SJed Brown       stash->sendranks[sendno] = owner;
875d7d60843SJed Brown       for (i = rowstart + 1; i < nblocks; i++) { /* Move forward through a run of blocks with the same owner */
876d7d60843SJed Brown         MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size];
8777e2ea869SJed Brown         if (sendblock_i->row >= owners[owner + 1]) break;
878d7d60843SJed Brown       }
879d7d60843SJed Brown       stash->sendframes[sendno].buffer  = sendblock_rowstart;
880d7d60843SJed Brown       stash->sendframes[sendno].pending = 0;
881d7d60843SJed Brown       stash->sendhdr[sendno].count      = i - rowstart;
882d7d60843SJed Brown       sendno++;
883d7d60843SJed Brown       rowstart = i;
884d7d60843SJed Brown     }
88508401ef6SPierre Jolivet     PetscCheck(sendno == stash->nsendranks, stash->comm, PETSC_ERR_PLIB, "BTS counted %d sendranks, but %" PetscInt_FMT " sends", stash->nsendranks, sendno);
886d7d60843SJed Brown   }
887d7d60843SJed Brown 
8884b4eb8d3SJed Brown   /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new
8894b4eb8d3SJed Brown    * message or a dummy entry of some sort. */
8904b4eb8d3SJed Brown   if (mat->insertmode == INSERT_VALUES) {
89123b7d1baSJed Brown     size_t i;
8924b4eb8d3SJed Brown     for (i = 0; i < nblocks; i++) {
8934b4eb8d3SJed Brown       MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size];
8944b4eb8d3SJed Brown       sendblock_i->row           = -(sendblock_i->row + 1);
8954b4eb8d3SJed Brown     }
8964b4eb8d3SJed Brown   }
8974b4eb8d3SJed Brown 
89860f34548SJunchao Zhang   if (stash->first_assembly_done) {
89997da8949SJed Brown     PetscMPIInt i, tag;
9009566063dSJacob Faibussowitsch     PetscCall(PetscCommGetNewTag(stash->comm, &tag));
90148a46eb9SPierre Jolivet     for (i = 0; i < stash->nrecvranks; i++) PetscCall(MatStashBTSRecv_Private(stash->comm, &tag, stash->recvranks[i], &stash->recvhdr[i], &stash->recvreqs[i], stash));
90248a46eb9SPierre Jolivet     for (i = 0; i < stash->nsendranks; i++) PetscCall(MatStashBTSSend_Private(stash->comm, &tag, i, stash->sendranks[i], &stash->sendhdr[i], &stash->sendreqs[i], stash));
90397da8949SJed Brown     stash->use_status = PETSC_TRUE; /* Use count from message status. */
90497da8949SJed Brown   } else {
9059371c9d4SSatish Balay     PetscCall(PetscCommBuildTwoSidedFReq(stash->comm, 1, MPIU_INT, stash->nsendranks, stash->sendranks, (PetscInt *)stash->sendhdr, &stash->nrecvranks, &stash->recvranks, (PetscInt *)&stash->recvhdr, 1, &stash->sendreqs, &stash->recvreqs, MatStashBTSSend_Private, MatStashBTSRecv_Private, stash));
9069566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(stash->nrecvranks, &stash->some_indices, stash->nrecvranks, &stash->some_statuses));
90797da8949SJed Brown     stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */
90897da8949SJed Brown   }
909d7d60843SJed Brown 
9109566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferExtractInPlace(stash->segrecvframe, &stash->recvframes));
911d7d60843SJed Brown   stash->recvframe_active    = NULL;
912d7d60843SJed Brown   stash->recvframe_i         = 0;
913d7d60843SJed Brown   stash->some_i              = 0;
914d7d60843SJed Brown   stash->some_count          = 0;
915d7d60843SJed Brown   stash->recvcount           = 0;
91660f34548SJunchao Zhang   stash->first_assembly_done = mat->assembly_subset; /* See the same logic in VecAssemblyBegin_MPI_BTS */
9174b4eb8d3SJed Brown   stash->insertmode          = &mat->insertmode;
9183ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
919d7d60843SJed Brown }
920d7d60843SJed Brown 
921d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash, PetscMPIInt *n, PetscInt **row, PetscInt **col, PetscScalar **val, PetscInt *flg)
922d71ae5a4SJacob Faibussowitsch {
923d7d60843SJed Brown   MatStashBlock *block;
924d7d60843SJed Brown 
925d7d60843SJed Brown   PetscFunctionBegin;
926d7d60843SJed Brown   *flg = 0;
927d7d60843SJed Brown   while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) {
928d7d60843SJed Brown     if (stash->some_i == stash->some_count) {
9293ba16761SJacob Faibussowitsch       if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(PETSC_SUCCESS); /* Done */
9309566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Waitsome(stash->nrecvranks, stash->recvreqs, &stash->some_count, stash->some_indices, stash->use_status ? stash->some_statuses : MPI_STATUSES_IGNORE));
931d7d60843SJed Brown       stash->some_i = 0;
932d7d60843SJed Brown     }
933d7d60843SJed Brown     stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]];
934d7d60843SJed Brown     stash->recvframe_count  = stash->recvframe_active->count; /* From header; maximum count */
935d7d60843SJed Brown     if (stash->use_status) {                                  /* Count what was actually sent */
9369566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Get_count(&stash->some_statuses[stash->some_i], stash->blocktype, &stash->recvframe_count));
937d7d60843SJed Brown     }
9384b4eb8d3SJed Brown     if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */
9394b4eb8d3SJed Brown       block = (MatStashBlock *)&((char *)stash->recvframe_active->buffer)[0];
9404b4eb8d3SJed Brown       if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES;
941aed4548fSBarry Smith       PetscCheck(*stash->insertmode != INSERT_VALUES || block->row < 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Assembling INSERT_VALUES, but rank %d requested ADD_VALUES", stash->recvranks[stash->some_indices[stash->some_i]]);
942aed4548fSBarry Smith       PetscCheck(*stash->insertmode != ADD_VALUES || block->row >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Assembling ADD_VALUES, but rank %d requested INSERT_VALUES", stash->recvranks[stash->some_indices[stash->some_i]]);
9434b4eb8d3SJed Brown     }
944d7d60843SJed Brown     stash->some_i++;
945d7d60843SJed Brown     stash->recvcount++;
946d7d60843SJed Brown     stash->recvframe_i = 0;
947d7d60843SJed Brown   }
948d7d60843SJed Brown   *n    = 1;
949d7d60843SJed Brown   block = (MatStashBlock *)&((char *)stash->recvframe_active->buffer)[stash->recvframe_i * stash->blocktype_size];
9504b4eb8d3SJed Brown   if (block->row < 0) block->row = -(block->row + 1);
951d7d60843SJed Brown   *row = &block->row;
952d7d60843SJed Brown   *col = &block->col;
953d7d60843SJed Brown   *val = block->vals;
954d7d60843SJed Brown   stash->recvframe_i++;
955d7d60843SJed Brown   *flg = 1;
9563ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
957d7d60843SJed Brown }
958d7d60843SJed Brown 
959d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash)
960d71ae5a4SJacob Faibussowitsch {
961d7d60843SJed Brown   PetscFunctionBegin;
9629566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Waitall(stash->nsendranks, stash->sendreqs, MPI_STATUSES_IGNORE));
96360f34548SJunchao Zhang   if (stash->first_assembly_done) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks  */
964d2b3fd65SBarry Smith     PetscCall(PetscSegBufferExtractInPlace(stash->segrecvblocks, NULL));
9653575f486SJed Brown   } else { /* No reuse, so collect everything. */
9669566063dSJacob Faibussowitsch     PetscCall(MatStashScatterDestroy_BTS(stash));
96797da8949SJed Brown   }
968d7d60843SJed Brown 
969d7d60843SJed Brown   /* Now update nmaxold to be app 10% more than max n used, this way the
970d7d60843SJed Brown      wastage of space is reduced the next time this stash is used.
971d7d60843SJed Brown      Also update the oldmax, only if it increases */
972d7d60843SJed Brown   if (stash->n) {
973d7d60843SJed Brown     PetscInt bs2     = stash->bs * stash->bs;
974d7d60843SJed Brown     PetscInt oldnmax = ((int)(stash->n * 1.1) + 5) * bs2;
975d7d60843SJed Brown     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
976d7d60843SJed Brown   }
977d7d60843SJed Brown 
978d7d60843SJed Brown   stash->nmax       = 0;
979d7d60843SJed Brown   stash->n          = 0;
980d7d60843SJed Brown   stash->reallocs   = -1;
981d7d60843SJed Brown   stash->nprocessed = 0;
982d7d60843SJed Brown 
9839566063dSJacob Faibussowitsch   PetscCall(PetscMatStashSpaceDestroy(&stash->space_head));
984d7d60843SJed Brown 
985f4259b30SLisandro Dalcin   stash->space = NULL;
986d7d60843SJed Brown 
9873ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
988d7d60843SJed Brown }
989d7d60843SJed Brown 
990d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash)
991d71ae5a4SJacob Faibussowitsch {
992d7d60843SJed Brown   PetscFunctionBegin;
9939566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferDestroy(&stash->segsendblocks));
9949566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferDestroy(&stash->segrecvframe));
995d7d60843SJed Brown   stash->recvframes = NULL;
9969566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferDestroy(&stash->segrecvblocks));
99748a46eb9SPierre Jolivet   if (stash->blocktype != MPI_DATATYPE_NULL) PetscCallMPI(MPI_Type_free(&stash->blocktype));
998d7d60843SJed Brown   stash->nsendranks = 0;
999d7d60843SJed Brown   stash->nrecvranks = 0;
10009566063dSJacob Faibussowitsch   PetscCall(PetscFree3(stash->sendranks, stash->sendhdr, stash->sendframes));
10019566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->sendreqs));
10029566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->recvreqs));
10039566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->recvranks));
10049566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->recvhdr));
10059566063dSJacob Faibussowitsch   PetscCall(PetscFree2(stash->some_indices, stash->some_statuses));
10063ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1007d7d60843SJed Brown }
10081667be42SBarry Smith #endif
1009