xref: /petsc/src/mat/utils/matstash.c (revision d2b3fd6591b60d272352aa484bf62400afcb5848)
12d5177cdSBarry Smith 
2af0996ceSBarry Smith #include <petsc/private/matimpl.h>
35bd3b8fbSHong Zhang 
4bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE   10000
54c1ff481SSatish Balay 
6ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterBegin_Ref(Mat,MatStash*,PetscInt*);
7d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
8d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
975d39b6aSBarry Smith #if !defined(PETSC_HAVE_MPIUNI)
10d7d60843SJed Brown static PetscErrorCode MatStashScatterBegin_BTS(Mat,MatStash*,PetscInt*);
11d7d60843SJed Brown static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
12d7d60843SJed Brown static PetscErrorCode MatStashScatterEnd_BTS(MatStash*);
1375d39b6aSBarry Smith #endif
14d7d60843SJed Brown 
159417f4adSLois Curfman McInnes /*
168798bf22SSatish Balay   MatStashCreate_Private - Creates a stash,currently used for all the parallel
174c1ff481SSatish Balay   matrix implementations. The stash is where elements of a matrix destined
184c1ff481SSatish Balay   to be stored on other processors are kept until matrix assembly is done.
199417f4adSLois Curfman McInnes 
204c1ff481SSatish Balay   This is a simple minded stash. Simply adds entries to end of stash.
214c1ff481SSatish Balay 
224c1ff481SSatish Balay   Input Parameters:
234c1ff481SSatish Balay   comm - communicator, required for scatters.
244c1ff481SSatish Balay   bs   - stash block size. used when stashing blocks of values
254c1ff481SSatish Balay 
264c1ff481SSatish Balay   Output Parameters:
274c1ff481SSatish Balay   stash    - the newly created stash
289417f4adSLois Curfman McInnes */
29c1ac3661SBarry Smith PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
309417f4adSLois Curfman McInnes {
31533163c2SBarry Smith   PetscInt       max,*opt,nopt,i;
32ace3abfcSBarry Smith   PetscBool      flg;
33bc5ccf88SSatish Balay 
343a40ed3dSBarry Smith   PetscFunctionBegin;
35bc5ccf88SSatish Balay   /* Require 2 tags,get the second using PetscCommGetNewTag() */
36752ec6e0SSatish Balay   stash->comm = comm;
378865f1eaSKarl Rupp 
389566063dSJacob Faibussowitsch   PetscCall(PetscCommGetNewTag(stash->comm,&stash->tag1));
399566063dSJacob Faibussowitsch   PetscCall(PetscCommGetNewTag(stash->comm,&stash->tag2));
409566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(stash->comm,&stash->size));
419566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(stash->comm,&stash->rank));
429566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(2*stash->size,&stash->flg_v));
43533163c2SBarry Smith   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
44533163c2SBarry Smith 
45434d7ff9SSatish Balay   nopt = stash->size;
469566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nopt,&opt));
479566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetIntArray(NULL,NULL,"-matstash_initial_size",opt,&nopt,&flg));
48434d7ff9SSatish Balay   if (flg) {
49434d7ff9SSatish Balay     if (nopt == 1)                max = opt[0];
50434d7ff9SSatish Balay     else if (nopt == stash->size) max = opt[stash->rank];
51434d7ff9SSatish Balay     else if (stash->rank < nopt)  max = opt[stash->rank];
52f4ab19daSSatish Balay     else                          max = 0; /* Use default */
53434d7ff9SSatish Balay     stash->umax = max;
54434d7ff9SSatish Balay   } else {
55434d7ff9SSatish Balay     stash->umax = 0;
56434d7ff9SSatish Balay   }
579566063dSJacob Faibussowitsch   PetscCall(PetscFree(opt));
584c1ff481SSatish Balay   if (bs <= 0) bs = 1;
59a2d1c673SSatish Balay 
604c1ff481SSatish Balay   stash->bs         = bs;
619417f4adSLois Curfman McInnes   stash->nmax       = 0;
62434d7ff9SSatish Balay   stash->oldnmax    = 0;
639417f4adSLois Curfman McInnes   stash->n          = 0;
644c1ff481SSatish Balay   stash->reallocs   = -1;
65f4259b30SLisandro Dalcin   stash->space_head = NULL;
66f4259b30SLisandro Dalcin   stash->space      = NULL;
679417f4adSLois Curfman McInnes 
68f4259b30SLisandro Dalcin   stash->send_waits  = NULL;
69f4259b30SLisandro Dalcin   stash->recv_waits  = NULL;
70f4259b30SLisandro Dalcin   stash->send_status = NULL;
71bc5ccf88SSatish Balay   stash->nsends      = 0;
72bc5ccf88SSatish Balay   stash->nrecvs      = 0;
73f4259b30SLisandro Dalcin   stash->svalues     = NULL;
74f4259b30SLisandro Dalcin   stash->rvalues     = NULL;
75f4259b30SLisandro Dalcin   stash->rindices    = NULL;
76a2d1c673SSatish Balay   stash->nprocessed  = 0;
7767318a8aSJed Brown   stash->reproduce   = PETSC_FALSE;
78d7d60843SJed Brown   stash->blocktype   = MPI_DATATYPE_NULL;
798865f1eaSKarl Rupp 
809566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(NULL,NULL,"-matstash_reproduce",&stash->reproduce,NULL));
811667be42SBarry Smith #if !defined(PETSC_HAVE_MPIUNI)
82ca723aa4SStefano Zampini   flg  = PETSC_FALSE;
839566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(NULL,NULL,"-matstash_legacy",&flg,NULL));
84b30fb036SBarry Smith   if (!flg) {
85d7d60843SJed Brown     stash->ScatterBegin   = MatStashScatterBegin_BTS;
86d7d60843SJed Brown     stash->ScatterGetMesg = MatStashScatterGetMesg_BTS;
87d7d60843SJed Brown     stash->ScatterEnd     = MatStashScatterEnd_BTS;
88d7d60843SJed Brown     stash->ScatterDestroy = MatStashScatterDestroy_BTS;
89ac2b2aa0SJed Brown   } else {
901667be42SBarry Smith #endif
91ac2b2aa0SJed Brown     stash->ScatterBegin   = MatStashScatterBegin_Ref;
92ac2b2aa0SJed Brown     stash->ScatterGetMesg = MatStashScatterGetMesg_Ref;
93ac2b2aa0SJed Brown     stash->ScatterEnd     = MatStashScatterEnd_Ref;
94ac2b2aa0SJed Brown     stash->ScatterDestroy = NULL;
951667be42SBarry Smith #if !defined(PETSC_HAVE_MPIUNI)
96ac2b2aa0SJed Brown   }
971667be42SBarry Smith #endif
983a40ed3dSBarry Smith   PetscFunctionReturn(0);
999417f4adSLois Curfman McInnes }
1009417f4adSLois Curfman McInnes 
1014c1ff481SSatish Balay /*
1028798bf22SSatish Balay    MatStashDestroy_Private - Destroy the stash
1034c1ff481SSatish Balay */
104dfbe8321SBarry Smith PetscErrorCode MatStashDestroy_Private(MatStash *stash)
1059417f4adSLois Curfman McInnes {
106bc5ccf88SSatish Balay   PetscFunctionBegin;
1079566063dSJacob Faibussowitsch   PetscCall(PetscMatStashSpaceDestroy(&stash->space_head));
1089566063dSJacob Faibussowitsch   if (stash->ScatterDestroy) PetscCall((*stash->ScatterDestroy)(stash));
1098865f1eaSKarl Rupp 
110f4259b30SLisandro Dalcin   stash->space = NULL;
1118865f1eaSKarl Rupp 
1129566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->flg_v));
113bc5ccf88SSatish Balay   PetscFunctionReturn(0);
114bc5ccf88SSatish Balay }
115bc5ccf88SSatish Balay 
1164c1ff481SSatish Balay /*
11767318a8aSJed Brown    MatStashScatterEnd_Private - This is called as the final stage of
1184c1ff481SSatish Balay    scatter. The final stages of message passing is done here, and
11967318a8aSJed Brown    all the memory used for message passing is cleaned up. This
1204c1ff481SSatish Balay    routine also resets the stash, and deallocates the memory used
1214c1ff481SSatish Balay    for the stash. It also keeps track of the current memory usage
1224c1ff481SSatish Balay    so that the same value can be used the next time through.
1234c1ff481SSatish Balay */
124dfbe8321SBarry Smith PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
125bc5ccf88SSatish Balay {
126ac2b2aa0SJed Brown   PetscFunctionBegin;
1279566063dSJacob Faibussowitsch   PetscCall((*stash->ScatterEnd)(stash));
128ac2b2aa0SJed Brown   PetscFunctionReturn(0);
129ac2b2aa0SJed Brown }
130ac2b2aa0SJed Brown 
131d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash)
132ac2b2aa0SJed Brown {
133533163c2SBarry Smith   PetscInt       nsends=stash->nsends,bs2,oldnmax,i;
134a2d1c673SSatish Balay   MPI_Status     *send_status;
135a2d1c673SSatish Balay 
1363a40ed3dSBarry Smith   PetscFunctionBegin;
137533163c2SBarry Smith   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
138a2d1c673SSatish Balay   /* wait on sends */
139a2d1c673SSatish Balay   if (nsends) {
1409566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(2*nsends,&send_status));
1419566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Waitall(2*nsends,stash->send_waits,send_status));
1429566063dSJacob Faibussowitsch     PetscCall(PetscFree(send_status));
143a2d1c673SSatish Balay   }
144a2d1c673SSatish Balay 
145c0c58ca7SSatish Balay   /* Now update nmaxold to be app 10% more than max n used, this way the
146434d7ff9SSatish Balay      wastage of space is reduced the next time this stash is used.
147434d7ff9SSatish Balay      Also update the oldmax, only if it increases */
148b9b97703SBarry Smith   if (stash->n) {
14994b769a5SSatish Balay     bs2     = stash->bs*stash->bs;
1508a9378f0SSatish Balay     oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
151434d7ff9SSatish Balay     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
152b9b97703SBarry Smith   }
153434d7ff9SSatish Balay 
154d07ff455SSatish Balay   stash->nmax       = 0;
155d07ff455SSatish Balay   stash->n          = 0;
1564c1ff481SSatish Balay   stash->reallocs   = -1;
157a2d1c673SSatish Balay   stash->nprocessed = 0;
1588865f1eaSKarl Rupp 
1599566063dSJacob Faibussowitsch   PetscCall(PetscMatStashSpaceDestroy(&stash->space_head));
1608865f1eaSKarl Rupp 
161f4259b30SLisandro Dalcin   stash->space = NULL;
1628865f1eaSKarl Rupp 
1639566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->send_waits));
1649566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->recv_waits));
1659566063dSJacob Faibussowitsch   PetscCall(PetscFree2(stash->svalues,stash->sindices));
1669566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->rvalues[0]));
1679566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->rvalues));
1689566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->rindices[0]));
1699566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->rindices));
1703a40ed3dSBarry Smith   PetscFunctionReturn(0);
1719417f4adSLois Curfman McInnes }
1729417f4adSLois Curfman McInnes 
1734c1ff481SSatish Balay /*
1748798bf22SSatish Balay    MatStashGetInfo_Private - Gets the relavant statistics of the stash
1754c1ff481SSatish Balay 
1764c1ff481SSatish Balay    Input Parameters:
1774c1ff481SSatish Balay    stash    - the stash
17894b769a5SSatish Balay    nstash   - the size of the stash. Indicates the number of values stored.
1794c1ff481SSatish Balay    reallocs - the number of additional mallocs incurred.
1804c1ff481SSatish Balay 
1814c1ff481SSatish Balay */
182c1ac3661SBarry Smith PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
18397530c3fSBarry Smith {
184c1ac3661SBarry Smith   PetscInt bs2 = stash->bs*stash->bs;
18594b769a5SSatish Balay 
1863a40ed3dSBarry Smith   PetscFunctionBegin;
1871ecfd215SBarry Smith   if (nstash) *nstash = stash->n*bs2;
1881ecfd215SBarry Smith   if (reallocs) {
189434d7ff9SSatish Balay     if (stash->reallocs < 0) *reallocs = 0;
190434d7ff9SSatish Balay     else                     *reallocs = stash->reallocs;
1911ecfd215SBarry Smith   }
192bc5ccf88SSatish Balay   PetscFunctionReturn(0);
193bc5ccf88SSatish Balay }
1944c1ff481SSatish Balay 
1954c1ff481SSatish Balay /*
1968798bf22SSatish Balay    MatStashSetInitialSize_Private - Sets the initial size of the stash
1974c1ff481SSatish Balay 
1984c1ff481SSatish Balay    Input Parameters:
1994c1ff481SSatish Balay    stash  - the stash
2004c1ff481SSatish Balay    max    - the value that is used as the max size of the stash.
2014c1ff481SSatish Balay             this value is used while allocating memory.
2024c1ff481SSatish Balay */
203c1ac3661SBarry Smith PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
204bc5ccf88SSatish Balay {
205bc5ccf88SSatish Balay   PetscFunctionBegin;
206434d7ff9SSatish Balay   stash->umax = max;
2073a40ed3dSBarry Smith   PetscFunctionReturn(0);
20897530c3fSBarry Smith }
20997530c3fSBarry Smith 
2108798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called
2114c1ff481SSatish Balay    when the space in the stash is not sufficient to add the new values
2124c1ff481SSatish Balay    being inserted into the stash.
2134c1ff481SSatish Balay 
2144c1ff481SSatish Balay    Input Parameters:
2154c1ff481SSatish Balay    stash - the stash
2164c1ff481SSatish Balay    incr  - the minimum increase requested
2174c1ff481SSatish Balay 
2184c1ff481SSatish Balay    Notes:
2194c1ff481SSatish Balay    This routine doubles the currently used memory.
2204c1ff481SSatish Balay  */
221c1ac3661SBarry Smith static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
2229417f4adSLois Curfman McInnes {
2235bd3b8fbSHong Zhang   PetscInt       newnmax,bs2= stash->bs*stash->bs;
2249417f4adSLois Curfman McInnes 
2253a40ed3dSBarry Smith   PetscFunctionBegin;
2269417f4adSLois Curfman McInnes   /* allocate a larger stash */
227c481ceb5SSatish Balay   if (!stash->oldnmax && !stash->nmax) { /* new stash */
228434d7ff9SSatish Balay     if (stash->umax)                  newnmax = stash->umax/bs2;
229434d7ff9SSatish Balay     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
230c481ceb5SSatish Balay   } else if (!stash->nmax) { /* resuing stash */
231434d7ff9SSatish Balay     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
232434d7ff9SSatish Balay     else                              newnmax = stash->oldnmax/bs2;
233434d7ff9SSatish Balay   } else                              newnmax = stash->nmax*2;
2344c1ff481SSatish Balay   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
235d07ff455SSatish Balay 
23675cae7c1SHong Zhang   /* Get a MatStashSpace and attach it to stash */
2379566063dSJacob Faibussowitsch   PetscCall(PetscMatStashSpaceGet(bs2,newnmax,&stash->space));
238b087b6d6SSatish Balay   if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
239b087b6d6SSatish Balay     stash->space_head = stash->space;
24075cae7c1SHong Zhang   }
241b087b6d6SSatish Balay 
242bc5ccf88SSatish Balay   stash->reallocs++;
24375cae7c1SHong Zhang   stash->nmax = newnmax;
244bc5ccf88SSatish Balay   PetscFunctionReturn(0);
245bc5ccf88SSatish Balay }
246bc5ccf88SSatish Balay /*
2478798bf22SSatish Balay   MatStashValuesRow_Private - inserts values into the stash. This function
2484c1ff481SSatish Balay   expects the values to be roworiented. Multiple columns belong to the same row
2494c1ff481SSatish Balay   can be inserted with a single call to this function.
2504c1ff481SSatish Balay 
2514c1ff481SSatish Balay   Input Parameters:
2524c1ff481SSatish Balay   stash  - the stash
2534c1ff481SSatish Balay   row    - the global row correspoiding to the values
2544c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
2554c1ff481SSatish Balay   idxn   - the global column indices corresponding to each of the values.
2564c1ff481SSatish Balay   values - the values inserted
257bc5ccf88SSatish Balay */
258ace3abfcSBarry Smith PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries)
259bc5ccf88SSatish Balay {
260b400d20cSBarry Smith   PetscInt           i,k,cnt = 0;
26175cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
262bc5ccf88SSatish Balay 
263bc5ccf88SSatish Balay   PetscFunctionBegin;
2644c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
26575cae7c1SHong Zhang   if (!space || space->local_remaining < n) {
2669566063dSJacob Faibussowitsch     PetscCall(MatStashExpand_Private(stash,n));
2679417f4adSLois Curfman McInnes   }
26875cae7c1SHong Zhang   space = stash->space;
26975cae7c1SHong Zhang   k     = space->local_used;
2704c1ff481SSatish Balay   for (i=0; i<n; i++) {
27114069ce9SStefano Zampini     if (ignorezeroentries && values && values[i] == 0.0) continue;
27275cae7c1SHong Zhang     space->idx[k] = row;
27375cae7c1SHong Zhang     space->idy[k] = idxn[i];
27414069ce9SStefano Zampini     space->val[k] = values ? values[i] : 0.0;
27575cae7c1SHong Zhang     k++;
276b400d20cSBarry Smith     cnt++;
2779417f4adSLois Curfman McInnes   }
278b400d20cSBarry Smith   stash->n               += cnt;
279b400d20cSBarry Smith   space->local_used      += cnt;
280b400d20cSBarry Smith   space->local_remaining -= cnt;
281a2d1c673SSatish Balay   PetscFunctionReturn(0);
282a2d1c673SSatish Balay }
28375cae7c1SHong Zhang 
2844c1ff481SSatish Balay /*
2858798bf22SSatish Balay   MatStashValuesCol_Private - inserts values into the stash. This function
2864c1ff481SSatish Balay   expects the values to be columnoriented. Multiple columns belong to the same row
2874c1ff481SSatish Balay   can be inserted with a single call to this function.
288a2d1c673SSatish Balay 
2894c1ff481SSatish Balay   Input Parameters:
2904c1ff481SSatish Balay   stash   - the stash
2914c1ff481SSatish Balay   row     - the global row correspoiding to the values
2924c1ff481SSatish Balay   n       - the number of elements inserted. All elements belong to the above row.
2934c1ff481SSatish Balay   idxn    - the global column indices corresponding to each of the values.
2944c1ff481SSatish Balay   values  - the values inserted
2954c1ff481SSatish Balay   stepval - the consecutive values are sepated by a distance of stepval.
2964c1ff481SSatish Balay             this happens because the input is columnoriented.
2974c1ff481SSatish Balay */
298ace3abfcSBarry Smith PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries)
299a2d1c673SSatish Balay {
30050e9ab7cSBarry Smith   PetscInt           i,k,cnt = 0;
30175cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
302a2d1c673SSatish Balay 
3034c1ff481SSatish Balay   PetscFunctionBegin;
3044c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
30575cae7c1SHong Zhang   if (!space || space->local_remaining < n) {
3069566063dSJacob Faibussowitsch     PetscCall(MatStashExpand_Private(stash,n));
3074c1ff481SSatish Balay   }
30875cae7c1SHong Zhang   space = stash->space;
30975cae7c1SHong Zhang   k     = space->local_used;
3104c1ff481SSatish Balay   for (i=0; i<n; i++) {
31114069ce9SStefano Zampini     if (ignorezeroentries && values && values[i*stepval] == 0.0) continue;
31275cae7c1SHong Zhang     space->idx[k] = row;
31375cae7c1SHong Zhang     space->idy[k] = idxn[i];
31414069ce9SStefano Zampini     space->val[k] = values ? values[i*stepval] : 0.0;
31575cae7c1SHong Zhang     k++;
316b400d20cSBarry Smith     cnt++;
3174c1ff481SSatish Balay   }
318b400d20cSBarry Smith   stash->n               += cnt;
319b400d20cSBarry Smith   space->local_used      += cnt;
320b400d20cSBarry Smith   space->local_remaining -= cnt;
3214c1ff481SSatish Balay   PetscFunctionReturn(0);
3224c1ff481SSatish Balay }
3234c1ff481SSatish Balay 
3244c1ff481SSatish Balay /*
3258798bf22SSatish Balay   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
3264c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3274c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3284c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3294c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3304c1ff481SSatish Balay 
3314c1ff481SSatish Balay   Input Parameters:
3324c1ff481SSatish Balay   stash  - the stash
3334c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3344c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3354c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3364c1ff481SSatish Balay            values. Each block is of size bs*bs.
3374c1ff481SSatish Balay   values - the values inserted
3384c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
339a5b23f4aSJose E. Roman   cmax   - the number of block-columns on the original block.
3404c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3414c1ff481SSatish Balay */
34254f21887SBarry Smith PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
3434c1ff481SSatish Balay {
34475cae7c1SHong Zhang   PetscInt           i,j,k,bs2,bs=stash->bs,l;
34554f21887SBarry Smith   const PetscScalar  *vals;
34654f21887SBarry Smith   PetscScalar        *array;
34775cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
348a2d1c673SSatish Balay 
349a2d1c673SSatish Balay   PetscFunctionBegin;
35075cae7c1SHong Zhang   if (!space || space->local_remaining < n) {
3519566063dSJacob Faibussowitsch     PetscCall(MatStashExpand_Private(stash,n));
352a2d1c673SSatish Balay   }
35375cae7c1SHong Zhang   space = stash->space;
35475cae7c1SHong Zhang   l     = space->local_used;
35575cae7c1SHong Zhang   bs2   = bs*bs;
3564c1ff481SSatish Balay   for (i=0; i<n; i++) {
35775cae7c1SHong Zhang     space->idx[l] = row;
35875cae7c1SHong Zhang     space->idy[l] = idxn[i];
35975cae7c1SHong Zhang     /* Now copy over the block of values. Store the values column oriented.
36075cae7c1SHong Zhang        This enables inserting multiple blocks belonging to a row with a single
36175cae7c1SHong Zhang        funtion call */
36275cae7c1SHong Zhang     array = space->val + bs2*l;
36375cae7c1SHong Zhang     vals  = values + idx*bs2*n + bs*i;
36475cae7c1SHong Zhang     for (j=0; j<bs; j++) {
36514069ce9SStefano Zampini       for (k=0; k<bs; k++) array[k*bs] = values ? vals[k] : 0.0;
36675cae7c1SHong Zhang       array++;
36775cae7c1SHong Zhang       vals += cmax*bs;
36875cae7c1SHong Zhang     }
36975cae7c1SHong Zhang     l++;
370a2d1c673SSatish Balay   }
3715bd3b8fbSHong Zhang   stash->n               += n;
37275cae7c1SHong Zhang   space->local_used      += n;
37375cae7c1SHong Zhang   space->local_remaining -= n;
3744c1ff481SSatish Balay   PetscFunctionReturn(0);
3754c1ff481SSatish Balay }
3764c1ff481SSatish Balay 
3774c1ff481SSatish Balay /*
3788798bf22SSatish Balay   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
3794c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3804c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3814c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3824c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3834c1ff481SSatish Balay 
3844c1ff481SSatish Balay   Input Parameters:
3854c1ff481SSatish Balay   stash  - the stash
3864c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3874c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3884c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3894c1ff481SSatish Balay            values. Each block is of size bs*bs.
3904c1ff481SSatish Balay   values - the values inserted
3914c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
392a5b23f4aSJose E. Roman   cmax   - the number of block-columns on the original block.
3934c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3944c1ff481SSatish Balay */
39554f21887SBarry Smith PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
3964c1ff481SSatish Balay {
39775cae7c1SHong Zhang   PetscInt           i,j,k,bs2,bs=stash->bs,l;
39854f21887SBarry Smith   const PetscScalar  *vals;
39954f21887SBarry Smith   PetscScalar        *array;
40075cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
4014c1ff481SSatish Balay 
4024c1ff481SSatish Balay   PetscFunctionBegin;
40375cae7c1SHong Zhang   if (!space || space->local_remaining < n) {
4049566063dSJacob Faibussowitsch     PetscCall(MatStashExpand_Private(stash,n));
4054c1ff481SSatish Balay   }
40675cae7c1SHong Zhang   space = stash->space;
40775cae7c1SHong Zhang   l     = space->local_used;
40875cae7c1SHong Zhang   bs2   = bs*bs;
4094c1ff481SSatish Balay   for (i=0; i<n; i++) {
41075cae7c1SHong Zhang     space->idx[l] = row;
41175cae7c1SHong Zhang     space->idy[l] = idxn[i];
41275cae7c1SHong Zhang     /* Now copy over the block of values. Store the values column oriented.
41375cae7c1SHong Zhang      This enables inserting multiple blocks belonging to a row with a single
41475cae7c1SHong Zhang      funtion call */
41575cae7c1SHong Zhang     array = space->val + bs2*l;
41675cae7c1SHong Zhang     vals  = values + idx*bs2*n + bs*i;
41775cae7c1SHong Zhang     for (j=0; j<bs; j++) {
41814069ce9SStefano Zampini       for (k=0; k<bs; k++) array[k] = values ? vals[k] : 0.0;
41975cae7c1SHong Zhang       array += bs;
42075cae7c1SHong Zhang       vals  += rmax*bs;
42175cae7c1SHong Zhang     }
4225bd3b8fbSHong Zhang     l++;
423a2d1c673SSatish Balay   }
4245bd3b8fbSHong Zhang   stash->n               += n;
42575cae7c1SHong Zhang   space->local_used      += n;
42675cae7c1SHong Zhang   space->local_remaining -= n;
4273a40ed3dSBarry Smith   PetscFunctionReturn(0);
4289417f4adSLois Curfman McInnes }
4294c1ff481SSatish Balay /*
4308798bf22SSatish Balay   MatStashScatterBegin_Private - Initiates the transfer of values to the
4314c1ff481SSatish Balay   correct owners. This function goes through the stash, and check the
4324c1ff481SSatish Balay   owners of each stashed value, and sends the values off to the owner
4334c1ff481SSatish Balay   processors.
434bc5ccf88SSatish Balay 
4354c1ff481SSatish Balay   Input Parameters:
4364c1ff481SSatish Balay   stash  - the stash
4374c1ff481SSatish Balay   owners - an array of size 'no-of-procs' which gives the ownership range
4384c1ff481SSatish Balay            for each node.
4394c1ff481SSatish Balay 
44095452b02SPatrick Sanan   Notes:
44195452b02SPatrick Sanan     The 'owners' array in the cased of the blocked-stash has the
4424c1ff481SSatish Balay   ranges specified blocked global indices, and for the regular stash in
4434c1ff481SSatish Balay   the proper global indices.
4444c1ff481SSatish Balay */
4451e2582c4SBarry Smith PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners)
446bc5ccf88SSatish Balay {
447ac2b2aa0SJed Brown   PetscFunctionBegin;
4489566063dSJacob Faibussowitsch   PetscCall((*stash->ScatterBegin)(mat,stash,owners));
449ac2b2aa0SJed Brown   PetscFunctionReturn(0);
450ac2b2aa0SJed Brown }
451ac2b2aa0SJed Brown 
452ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterBegin_Ref(Mat mat,MatStash *stash,PetscInt *owners)
453ac2b2aa0SJed Brown {
454c1ac3661SBarry Smith   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
455fe09c992SBarry Smith   PetscInt           size=stash->size,nsends;
45675cae7c1SHong Zhang   PetscInt           count,*sindices,**rindices,i,j,idx,lastidx,l;
45754f21887SBarry Smith   PetscScalar        **rvalues,*svalues;
458bc5ccf88SSatish Balay   MPI_Comm           comm = stash->comm;
459563fb871SSatish Balay   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
46076ec1555SBarry Smith   PetscMPIInt        *sizes,*nlengths,nreceives;
4615bd3b8fbSHong Zhang   PetscInt           *sp_idx,*sp_idy;
46254f21887SBarry Smith   PetscScalar        *sp_val;
4635bd3b8fbSHong Zhang   PetscMatStashSpace space,space_next;
464bc5ccf88SSatish Balay 
465bc5ccf88SSatish Balay   PetscFunctionBegin;
4664b4eb8d3SJed Brown   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
4674b4eb8d3SJed Brown     InsertMode addv;
4681c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat)));
4692c71b3e2SJacob Faibussowitsch     PetscCheckFalse(addv == (ADD_VALUES|INSERT_VALUES),PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
4704b4eb8d3SJed Brown     mat->insertmode = addv; /* in case this processor had no cache */
4714b4eb8d3SJed Brown   }
4724b4eb8d3SJed Brown 
4734c1ff481SSatish Balay   bs2 = stash->bs*stash->bs;
47475cae7c1SHong Zhang 
475bc5ccf88SSatish Balay   /*  first count number of contributors to each processor */
4769566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(size,&nlengths));
4779566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(stash->n+1,&owner));
478a2d1c673SSatish Balay 
47975cae7c1SHong Zhang   i       = j    = 0;
4807357eb19SBarry Smith   lastidx = -1;
4815bd3b8fbSHong Zhang   space   = stash->space_head;
4826c4ed002SBarry Smith   while (space) {
48375cae7c1SHong Zhang     space_next = space->next;
4845bd3b8fbSHong Zhang     sp_idx     = space->idx;
48575cae7c1SHong Zhang     for (l=0; l<space->local_used; l++) {
4867357eb19SBarry Smith       /* if indices are NOT locally sorted, need to start search at the beginning */
4875bd3b8fbSHong Zhang       if (lastidx > (idx = sp_idx[l])) j = 0;
4887357eb19SBarry Smith       lastidx = idx;
4897357eb19SBarry Smith       for (; j<size; j++) {
4904c1ff481SSatish Balay         if (idx >= owners[j] && idx < owners[j+1]) {
491563fb871SSatish Balay           nlengths[j]++; owner[i] = j; break;
492bc5ccf88SSatish Balay         }
493bc5ccf88SSatish Balay       }
49475cae7c1SHong Zhang       i++;
49575cae7c1SHong Zhang     }
49675cae7c1SHong Zhang     space = space_next;
497bc5ccf88SSatish Balay   }
498071fcb05SBarry Smith 
499563fb871SSatish Balay   /* Now check what procs get messages - and compute nsends. */
5009566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(size,&sizes));
501563fb871SSatish Balay   for (i=0, nsends=0; i<size; i++) {
5028865f1eaSKarl Rupp     if (nlengths[i]) {
50376ec1555SBarry Smith       sizes[i] = 1; nsends++;
5048865f1eaSKarl Rupp     }
505563fb871SSatish Balay   }
506bc5ccf88SSatish Balay 
50754f21887SBarry Smith   {PetscMPIInt *onodes,*olengths;
508563fb871SSatish Balay    /* Determine the number of messages to expect, their lengths, from from-ids */
5099566063dSJacob Faibussowitsch    PetscCall(PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives));
5109566063dSJacob Faibussowitsch    PetscCall(PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths));
511563fb871SSatish Balay    /* since clubbing row,col - lengths are multiplied by 2 */
512563fb871SSatish Balay    for (i=0; i<nreceives; i++) olengths[i] *=2;
5139566063dSJacob Faibussowitsch    PetscCall(PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1));
514563fb871SSatish Balay    /* values are size 'bs2' lengths (and remove earlier factor 2 */
515563fb871SSatish Balay    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
5169566063dSJacob Faibussowitsch    PetscCall(PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2));
5179566063dSJacob Faibussowitsch    PetscCall(PetscFree(onodes));
5189566063dSJacob Faibussowitsch    PetscCall(PetscFree(olengths));}
519bc5ccf88SSatish Balay 
520bc5ccf88SSatish Balay   /* do sends:
521bc5ccf88SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
522bc5ccf88SSatish Balay          the ith processor
523bc5ccf88SSatish Balay   */
5249566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices));
5259566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(2*nsends,&send_waits));
5269566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(size,&startv,size,&starti));
527a2d1c673SSatish Balay   /* use 2 sends the first with all_a, the next with all_i and all_j */
528bc5ccf88SSatish Balay   startv[0] = 0; starti[0] = 0;
529bc5ccf88SSatish Balay   for (i=1; i<size; i++) {
530563fb871SSatish Balay     startv[i] = startv[i-1] + nlengths[i-1];
531533163c2SBarry Smith     starti[i] = starti[i-1] + 2*nlengths[i-1];
532bc5ccf88SSatish Balay   }
53375cae7c1SHong Zhang 
53475cae7c1SHong Zhang   i     = 0;
5355bd3b8fbSHong Zhang   space = stash->space_head;
5366c4ed002SBarry Smith   while (space) {
53775cae7c1SHong Zhang     space_next = space->next;
5385bd3b8fbSHong Zhang     sp_idx     = space->idx;
5395bd3b8fbSHong Zhang     sp_idy     = space->idy;
5405bd3b8fbSHong Zhang     sp_val     = space->val;
54175cae7c1SHong Zhang     for (l=0; l<space->local_used; l++) {
542bc5ccf88SSatish Balay       j = owner[i];
543a2d1c673SSatish Balay       if (bs2 == 1) {
5445bd3b8fbSHong Zhang         svalues[startv[j]] = sp_val[l];
545a2d1c673SSatish Balay       } else {
546c1ac3661SBarry Smith         PetscInt    k;
54754f21887SBarry Smith         PetscScalar *buf1,*buf2;
5484c1ff481SSatish Balay         buf1 = svalues+bs2*startv[j];
549b087b6d6SSatish Balay         buf2 = space->val + bs2*l;
5508865f1eaSKarl Rupp         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
551a2d1c673SSatish Balay       }
5525bd3b8fbSHong Zhang       sindices[starti[j]]             = sp_idx[l];
5535bd3b8fbSHong Zhang       sindices[starti[j]+nlengths[j]] = sp_idy[l];
554bc5ccf88SSatish Balay       startv[j]++;
555bc5ccf88SSatish Balay       starti[j]++;
55675cae7c1SHong Zhang       i++;
55775cae7c1SHong Zhang     }
55875cae7c1SHong Zhang     space = space_next;
559bc5ccf88SSatish Balay   }
560bc5ccf88SSatish Balay   startv[0] = 0;
5618865f1eaSKarl Rupp   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
562e5d0e772SSatish Balay 
563bc5ccf88SSatish Balay   for (i=0,count=0; i<size; i++) {
56476ec1555SBarry Smith     if (sizes[i]) {
5659566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++));
5669566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++));
567bc5ccf88SSatish Balay     }
568b85c94c3SSatish Balay   }
5696cf91177SBarry Smith #if defined(PETSC_USE_INFO)
5709566063dSJacob Faibussowitsch   PetscCall(PetscInfo(NULL,"No of messages: %" PetscInt_FMT " \n",nsends));
571e5d0e772SSatish Balay   for (i=0; i<size; i++) {
57276ec1555SBarry Smith     if (sizes[i]) {
5739566063dSJacob Faibussowitsch       PetscCall(PetscInfo(NULL,"Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n",i,(size_t)(nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)))));
574e5d0e772SSatish Balay     }
575e5d0e772SSatish Balay   }
576e5d0e772SSatish Balay #endif
5779566063dSJacob Faibussowitsch   PetscCall(PetscFree(nlengths));
5789566063dSJacob Faibussowitsch   PetscCall(PetscFree(owner));
5799566063dSJacob Faibussowitsch   PetscCall(PetscFree2(startv,starti));
5809566063dSJacob Faibussowitsch   PetscCall(PetscFree(sizes));
581a2d1c673SSatish Balay 
582563fb871SSatish Balay   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
5839566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(2*nreceives,&recv_waits));
584563fb871SSatish Balay 
585563fb871SSatish Balay   for (i=0; i<nreceives; i++) {
586563fb871SSatish Balay     recv_waits[2*i]   = recv_waits1[i];
587563fb871SSatish Balay     recv_waits[2*i+1] = recv_waits2[i];
588563fb871SSatish Balay   }
589563fb871SSatish Balay   stash->recv_waits = recv_waits;
5908865f1eaSKarl Rupp 
5919566063dSJacob Faibussowitsch   PetscCall(PetscFree(recv_waits1));
5929566063dSJacob Faibussowitsch   PetscCall(PetscFree(recv_waits2));
593563fb871SSatish Balay 
594c05d87d6SBarry Smith   stash->svalues         = svalues;
595c05d87d6SBarry Smith   stash->sindices        = sindices;
596c05d87d6SBarry Smith   stash->rvalues         = rvalues;
597c05d87d6SBarry Smith   stash->rindices        = rindices;
598c05d87d6SBarry Smith   stash->send_waits      = send_waits;
599c05d87d6SBarry Smith   stash->nsends          = nsends;
600c05d87d6SBarry Smith   stash->nrecvs          = nreceives;
60167318a8aSJed Brown   stash->reproduce_count = 0;
602bc5ccf88SSatish Balay   PetscFunctionReturn(0);
603bc5ccf88SSatish Balay }
604bc5ccf88SSatish Balay 
605a2d1c673SSatish Balay /*
6068798bf22SSatish Balay    MatStashScatterGetMesg_Private - This function waits on the receives posted
6078798bf22SSatish Balay    in the function MatStashScatterBegin_Private() and returns one message at
6084c1ff481SSatish Balay    a time to the calling function. If no messages are left, it indicates this
6094c1ff481SSatish Balay    by setting flg = 0, else it sets flg = 1.
6104c1ff481SSatish Balay 
6114c1ff481SSatish Balay    Input Parameters:
6124c1ff481SSatish Balay    stash - the stash
6134c1ff481SSatish Balay 
6144c1ff481SSatish Balay    Output Parameters:
6154c1ff481SSatish Balay    nvals - the number of entries in the current message.
6164c1ff481SSatish Balay    rows  - an array of row indices (or blocked indices) corresponding to the values
6174c1ff481SSatish Balay    cols  - an array of columnindices (or blocked indices) corresponding to the values
6184c1ff481SSatish Balay    vals  - the values
6194c1ff481SSatish Balay    flg   - 0 indicates no more message left, and the current call has no values associated.
6204c1ff481SSatish Balay            1 indicates that the current call successfully received a message, and the
6214c1ff481SSatish Balay              other output parameters nvals,rows,cols,vals are set appropriately.
622a2d1c673SSatish Balay */
62354f21887SBarry Smith PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
624bc5ccf88SSatish Balay {
625ac2b2aa0SJed Brown   PetscFunctionBegin;
6269566063dSJacob Faibussowitsch   PetscCall((*stash->ScatterGetMesg)(stash,nvals,rows,cols,vals,flg));
627ac2b2aa0SJed Brown   PetscFunctionReturn(0);
628ac2b2aa0SJed Brown }
629ac2b2aa0SJed Brown 
630d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
631ac2b2aa0SJed Brown {
632533163c2SBarry Smith   PetscMPIInt    i,*flg_v = stash->flg_v,i1,i2;
633fe09c992SBarry Smith   PetscInt       bs2;
634a2d1c673SSatish Balay   MPI_Status     recv_status;
635ace3abfcSBarry Smith   PetscBool      match_found = PETSC_FALSE;
636bc5ccf88SSatish Balay 
637bc5ccf88SSatish Balay   PetscFunctionBegin;
638a2d1c673SSatish Balay   *flg = 0; /* When a message is discovered this is reset to 1 */
639a2d1c673SSatish Balay   /* Return if no more messages to process */
6408865f1eaSKarl Rupp   if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(0);
641a2d1c673SSatish Balay 
6424c1ff481SSatish Balay   bs2 = stash->bs*stash->bs;
64367318a8aSJed Brown   /* If a matching pair of receives are found, process them, and return the data to
644a2d1c673SSatish Balay      the calling function. Until then keep receiving messages */
645a2d1c673SSatish Balay   while (!match_found) {
64667318a8aSJed Brown     if (stash->reproduce) {
64767318a8aSJed Brown       i    = stash->reproduce_count++;
6489566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Wait(stash->recv_waits+i,&recv_status));
64967318a8aSJed Brown     } else {
6509566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status));
65167318a8aSJed Brown     }
6522c71b3e2SJacob Faibussowitsch     PetscCheckFalse(recv_status.MPI_SOURCE < 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!");
653533163c2SBarry Smith 
65467318a8aSJed Brown     /* Now pack the received message into a structure which is usable by others */
655a2d1c673SSatish Balay     if (i % 2) {
6569566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Get_count(&recv_status,MPIU_SCALAR,nvals));
6578865f1eaSKarl Rupp 
658c1dc657dSBarry Smith       flg_v[2*recv_status.MPI_SOURCE] = i/2;
6598865f1eaSKarl Rupp 
660a2d1c673SSatish Balay       *nvals = *nvals/bs2;
661563fb871SSatish Balay     } else {
6629566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Get_count(&recv_status,MPIU_INT,nvals));
6638865f1eaSKarl Rupp 
664563fb871SSatish Balay       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
6658865f1eaSKarl Rupp 
666563fb871SSatish Balay       *nvals = *nvals/2; /* This message has both row indices and col indices */
667bc5ccf88SSatish Balay     }
668a2d1c673SSatish Balay 
669cb2b73ccSBarry Smith     /* Check if we have both messages from this proc */
670c1dc657dSBarry Smith     i1 = flg_v[2*recv_status.MPI_SOURCE];
671c1dc657dSBarry Smith     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
672a2d1c673SSatish Balay     if (i1 != -1 && i2 != -1) {
673563fb871SSatish Balay       *rows = stash->rindices[i2];
674a2d1c673SSatish Balay       *cols = *rows + *nvals;
675563fb871SSatish Balay       *vals = stash->rvalues[i1];
676a2d1c673SSatish Balay       *flg  = 1;
677a2d1c673SSatish Balay       stash->nprocessed++;
67835d8aa7fSBarry Smith       match_found = PETSC_TRUE;
679bc5ccf88SSatish Balay     }
680bc5ccf88SSatish Balay   }
681bc5ccf88SSatish Balay   PetscFunctionReturn(0);
682bc5ccf88SSatish Balay }
683d7d60843SJed Brown 
684e69ee1f7SSatish Balay #if !defined(PETSC_HAVE_MPIUNI)
685d7d60843SJed Brown typedef struct {
686d7d60843SJed Brown   PetscInt row;
687d7d60843SJed Brown   PetscInt col;
688d7d60843SJed Brown   PetscScalar vals[1];          /* Actually an array of length bs2 */
689d7d60843SJed Brown } MatStashBlock;
690d7d60843SJed Brown 
691d7d60843SJed Brown static PetscErrorCode MatStashSortCompress_Private(MatStash *stash,InsertMode insertmode)
692d7d60843SJed Brown {
693d7d60843SJed Brown   PetscMatStashSpace space;
694d7d60843SJed Brown   PetscInt n = stash->n,bs = stash->bs,bs2 = bs*bs,cnt,*row,*col,*perm,rowstart,i;
695d7d60843SJed Brown   PetscScalar **valptr;
696d7d60843SJed Brown 
697d7d60843SJed Brown   PetscFunctionBegin;
6989566063dSJacob Faibussowitsch   PetscCall(PetscMalloc4(n,&row,n,&col,n,&valptr,n,&perm));
699d7d60843SJed Brown   for (space=stash->space_head,cnt=0; space; space=space->next) {
700d7d60843SJed Brown     for (i=0; i<space->local_used; i++) {
701d7d60843SJed Brown       row[cnt] = space->idx[i];
702d7d60843SJed Brown       col[cnt] = space->idy[i];
703d7d60843SJed Brown       valptr[cnt] = &space->val[i*bs2];
704d7d60843SJed Brown       perm[cnt] = cnt;          /* Will tell us where to find valptr after sorting row[] and col[] */
705d7d60843SJed Brown       cnt++;
706d7d60843SJed Brown     }
707d7d60843SJed Brown   }
7082c71b3e2SJacob Faibussowitsch   PetscCheckFalse(cnt != n,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatStash n %" PetscInt_FMT ", but counted %" PetscInt_FMT " entries",n,cnt);
7099566063dSJacob Faibussowitsch   PetscCall(PetscSortIntWithArrayPair(n,row,col,perm));
710d7d60843SJed Brown   /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */
711d7d60843SJed Brown   for (rowstart=0,cnt=0,i=1; i<=n; i++) {
712d7d60843SJed Brown     if (i == n || row[i] != row[rowstart]) {         /* Sort the last row. */
713d7d60843SJed Brown       PetscInt colstart;
7149566063dSJacob Faibussowitsch       PetscCall(PetscSortIntWithArray(i-rowstart,&col[rowstart],&perm[rowstart]));
715d7d60843SJed Brown       for (colstart=rowstart; colstart<i;) { /* Compress multiple insertions to the same location */
716d7d60843SJed Brown         PetscInt j,l;
717d7d60843SJed Brown         MatStashBlock *block;
7189566063dSJacob Faibussowitsch         PetscCall(PetscSegBufferGet(stash->segsendblocks,1,&block));
719d7d60843SJed Brown         block->row = row[rowstart];
720d7d60843SJed Brown         block->col = col[colstart];
7219566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(block->vals,valptr[perm[colstart]],bs2));
722d7d60843SJed Brown         for (j=colstart+1; j<i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */
723d7d60843SJed Brown           if (insertmode == ADD_VALUES) {
724d7d60843SJed Brown             for (l=0; l<bs2; l++) block->vals[l] += valptr[perm[j]][l];
725d7d60843SJed Brown           } else {
7269566063dSJacob Faibussowitsch             PetscCall(PetscArraycpy(block->vals,valptr[perm[j]],bs2));
727d7d60843SJed Brown           }
728d7d60843SJed Brown         }
729d7d60843SJed Brown         colstart = j;
730d7d60843SJed Brown       }
731d7d60843SJed Brown       rowstart = i;
732d7d60843SJed Brown     }
733d7d60843SJed Brown   }
7349566063dSJacob Faibussowitsch   PetscCall(PetscFree4(row,col,valptr,perm));
735d7d60843SJed Brown   PetscFunctionReturn(0);
736d7d60843SJed Brown }
737d7d60843SJed Brown 
738d7d60843SJed Brown static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash)
739d7d60843SJed Brown {
740d7d60843SJed Brown   PetscFunctionBegin;
741d7d60843SJed Brown   if (stash->blocktype == MPI_DATATYPE_NULL) {
742d7d60843SJed Brown     PetscInt     bs2 = PetscSqr(stash->bs);
743d7d60843SJed Brown     PetscMPIInt  blocklens[2];
744d7d60843SJed Brown     MPI_Aint     displs[2];
745d7d60843SJed Brown     MPI_Datatype types[2],stype;
746f41aa5efSJunchao Zhang     /* Note that DummyBlock is a type having standard layout, even when PetscScalar is C++ std::complex.
747f41aa5efSJunchao Zhang        std::complex itself has standard layout, so does DummyBlock, recursively.
748a5b23f4aSJose E. Roman        To be compatible with C++ std::complex, complex implementations on GPUs must also have standard layout,
749f41aa5efSJunchao Zhang        though they can have different alignment, e.g, 16 bytes for double complex, instead of 8 bytes as in GCC stdlibc++.
750f41aa5efSJunchao Zhang        offsetof(type, member) only requires type to have standard layout. Ref. https://en.cppreference.com/w/cpp/types/offsetof.
751f41aa5efSJunchao Zhang 
752f41aa5efSJunchao Zhang        We can test if std::complex has standard layout with the following code:
753f41aa5efSJunchao Zhang        #include <iostream>
754f41aa5efSJunchao Zhang        #include <type_traits>
755f41aa5efSJunchao Zhang        #include <complex>
756f41aa5efSJunchao Zhang        int main() {
757f41aa5efSJunchao Zhang          std::cout << std::boolalpha;
758f41aa5efSJunchao Zhang          std::cout << std::is_standard_layout<std::complex<double> >::value << '\n';
759f41aa5efSJunchao Zhang        }
760f41aa5efSJunchao Zhang        Output: true
7619503c6c6SJed Brown      */
762f41aa5efSJunchao Zhang     struct DummyBlock {PetscInt row,col; PetscScalar vals;};
763d7d60843SJed Brown 
7649503c6c6SJed Brown     stash->blocktype_size = offsetof(struct DummyBlock,vals) + bs2*sizeof(PetscScalar);
765d7d60843SJed Brown     if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */
766d7d60843SJed Brown       stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt);
767d7d60843SJed Brown     }
7689566063dSJacob Faibussowitsch     PetscCall(PetscSegBufferCreate(stash->blocktype_size,1,&stash->segsendblocks));
7699566063dSJacob Faibussowitsch     PetscCall(PetscSegBufferCreate(stash->blocktype_size,1,&stash->segrecvblocks));
7709566063dSJacob Faibussowitsch     PetscCall(PetscSegBufferCreate(sizeof(MatStashFrame),1,&stash->segrecvframe));
771d7d60843SJed Brown     blocklens[0] = 2;
772d7d60843SJed Brown     blocklens[1] = bs2;
7739503c6c6SJed Brown     displs[0] = offsetof(struct DummyBlock,row);
7749503c6c6SJed Brown     displs[1] = offsetof(struct DummyBlock,vals);
775d7d60843SJed Brown     types[0] = MPIU_INT;
776d7d60843SJed Brown     types[1] = MPIU_SCALAR;
7779566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Type_create_struct(2,blocklens,displs,types,&stype));
7789566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Type_commit(&stype));
7799566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Type_create_resized(stype,0,stash->blocktype_size,&stash->blocktype));
7809566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Type_commit(&stash->blocktype));
7819566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Type_free(&stype));
782d7d60843SJed Brown   }
783d7d60843SJed Brown   PetscFunctionReturn(0);
784d7d60843SJed Brown }
785d7d60843SJed Brown 
786d7d60843SJed Brown /* Callback invoked after target rank has initiatied receive of rendezvous message.
787d7d60843SJed Brown  * Here we post the main sends.
788d7d60843SJed Brown  */
789d7d60843SJed Brown static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rankid,PetscMPIInt rank,void *sdata,MPI_Request req[],void *ctx)
790d7d60843SJed Brown {
791d7d60843SJed Brown   MatStash *stash = (MatStash*)ctx;
792d7d60843SJed Brown   MatStashHeader *hdr = (MatStashHeader*)sdata;
793d7d60843SJed Brown 
794d7d60843SJed Brown   PetscFunctionBegin;
7952c71b3e2SJacob Faibussowitsch   PetscCheckFalse(rank != stash->sendranks[rankid],comm,PETSC_ERR_PLIB,"BTS Send rank %d does not match sendranks[%d] %d",rank,rankid,stash->sendranks[rankid]);
7969566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Isend(stash->sendframes[rankid].buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]));
797d7d60843SJed Brown   stash->sendframes[rankid].count = hdr->count;
798d7d60843SJed Brown   stash->sendframes[rankid].pending = 1;
799d7d60843SJed Brown   PetscFunctionReturn(0);
800d7d60843SJed Brown }
801d7d60843SJed Brown 
802d7d60843SJed Brown /* Callback invoked by target after receiving rendezvous message.
803d7d60843SJed Brown  * Here we post the main recvs.
804d7d60843SJed Brown  */
805d7d60843SJed Brown static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rank,void *rdata,MPI_Request req[],void *ctx)
806d7d60843SJed Brown {
807d7d60843SJed Brown   MatStash *stash = (MatStash*)ctx;
808d7d60843SJed Brown   MatStashHeader *hdr = (MatStashHeader*)rdata;
809d7d60843SJed Brown   MatStashFrame *frame;
810d7d60843SJed Brown 
811d7d60843SJed Brown   PetscFunctionBegin;
8129566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferGet(stash->segrecvframe,1,&frame));
8139566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferGet(stash->segrecvblocks,hdr->count,&frame->buffer));
8149566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Irecv(frame->buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]));
815d7d60843SJed Brown   frame->count = hdr->count;
816d7d60843SJed Brown   frame->pending = 1;
817d7d60843SJed Brown   PetscFunctionReturn(0);
818d7d60843SJed Brown }
819d7d60843SJed Brown 
820d7d60843SJed Brown /*
821d7d60843SJed Brown  * owners[] contains the ownership ranges; may be indexed by either blocks or scalars
822d7d60843SJed Brown  */
823d7d60843SJed Brown static PetscErrorCode MatStashScatterBegin_BTS(Mat mat,MatStash *stash,PetscInt owners[])
824d7d60843SJed Brown {
825d7d60843SJed Brown   PetscErrorCode ierr;
826d7d60843SJed Brown   size_t nblocks;
827d7d60843SJed Brown   char *sendblocks;
828d7d60843SJed Brown 
829d7d60843SJed Brown   PetscFunctionBegin;
83076bd3646SJed Brown   if (PetscDefined(USE_DEBUG)) { /* make sure all processors are either in INSERTMODE or ADDMODE */
8314b4eb8d3SJed Brown     InsertMode addv;
8321c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat)));
8332c71b3e2SJacob Faibussowitsch     PetscCheckFalse(addv == (ADD_VALUES|INSERT_VALUES),PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
8344b4eb8d3SJed Brown   }
8354b4eb8d3SJed Brown 
8369566063dSJacob Faibussowitsch   PetscCall(MatStashBlockTypeSetUp(stash));
8379566063dSJacob Faibussowitsch   PetscCall(MatStashSortCompress_Private(stash,mat->insertmode));
8389566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferGetSize(stash->segsendblocks,&nblocks));
8399566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferExtractInPlace(stash->segsendblocks,&sendblocks));
84060f34548SJunchao Zhang   if (stash->first_assembly_done) { /* Set up sendhdrs and sendframes for each rank that we sent before */
84123b7d1baSJed Brown     PetscInt i;
84223b7d1baSJed Brown     size_t b;
84397da8949SJed Brown     for (i=0,b=0; i<stash->nsendranks; i++) {
84497da8949SJed Brown       stash->sendframes[i].buffer = &sendblocks[b*stash->blocktype_size];
84597da8949SJed Brown       /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */
84697da8949SJed Brown       stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */
84797da8949SJed Brown       for (; b<nblocks; b++) {
84897da8949SJed Brown         MatStashBlock *sendblock_b = (MatStashBlock*)&sendblocks[b*stash->blocktype_size];
8492c71b3e2SJacob Faibussowitsch         PetscCheckFalse(sendblock_b->row < owners[stash->sendranks[i]],stash->comm,PETSC_ERR_ARG_WRONG,"MAT_SUBSET_OFF_PROC_ENTRIES set, but row %" PetscInt_FMT " owned by %d not communicated in initial assembly",sendblock_b->row,stash->sendranks[i]);
85097da8949SJed Brown         if (sendblock_b->row >= owners[stash->sendranks[i]+1]) break;
85197da8949SJed Brown         stash->sendhdr[i].count++;
85297da8949SJed Brown       }
85397da8949SJed Brown     }
85497da8949SJed Brown   } else {                      /* Dynamically count and pack (first time) */
85523b7d1baSJed Brown     PetscInt sendno;
85623b7d1baSJed Brown     size_t i,rowstart;
857d7d60843SJed Brown 
858d7d60843SJed Brown     /* Count number of send ranks and allocate for sends */
859d7d60843SJed Brown     stash->nsendranks = 0;
860d7d60843SJed Brown     for (rowstart=0; rowstart<nblocks;) {
8617e2ea869SJed Brown       PetscInt owner;
862d7d60843SJed Brown       MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
8639566063dSJacob Faibussowitsch       PetscCall(PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner));
864d7d60843SJed Brown       if (owner < 0) owner = -(owner+2);
865d7d60843SJed Brown       for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
866d7d60843SJed Brown         MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
8677e2ea869SJed Brown         if (sendblock_i->row >= owners[owner+1]) break;
868d7d60843SJed Brown       }
869d7d60843SJed Brown       stash->nsendranks++;
870d7d60843SJed Brown       rowstart = i;
871d7d60843SJed Brown     }
8729566063dSJacob Faibussowitsch     PetscCall(PetscMalloc3(stash->nsendranks,&stash->sendranks,stash->nsendranks,&stash->sendhdr,stash->nsendranks,&stash->sendframes));
873d7d60843SJed Brown 
874d7d60843SJed Brown     /* Set up sendhdrs and sendframes */
875d7d60843SJed Brown     sendno = 0;
876d7d60843SJed Brown     for (rowstart=0; rowstart<nblocks;) {
877d7d60843SJed Brown       PetscInt owner;
878d7d60843SJed Brown       MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
8799566063dSJacob Faibussowitsch       PetscCall(PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner));
880d7d60843SJed Brown       if (owner < 0) owner = -(owner+2);
881d7d60843SJed Brown       stash->sendranks[sendno] = owner;
882d7d60843SJed Brown       for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
883d7d60843SJed Brown         MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
8847e2ea869SJed Brown         if (sendblock_i->row >= owners[owner+1]) break;
885d7d60843SJed Brown       }
886d7d60843SJed Brown       stash->sendframes[sendno].buffer = sendblock_rowstart;
887d7d60843SJed Brown       stash->sendframes[sendno].pending = 0;
888d7d60843SJed Brown       stash->sendhdr[sendno].count = i - rowstart;
889d7d60843SJed Brown       sendno++;
890d7d60843SJed Brown       rowstart = i;
891d7d60843SJed Brown     }
8922c71b3e2SJacob Faibussowitsch     PetscCheckFalse(sendno != stash->nsendranks,stash->comm,PETSC_ERR_PLIB,"BTS counted %d sendranks, but %" PetscInt_FMT " sends",stash->nsendranks,sendno);
893d7d60843SJed Brown   }
894d7d60843SJed Brown 
8954b4eb8d3SJed Brown   /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new
8964b4eb8d3SJed Brown    * message or a dummy entry of some sort. */
8974b4eb8d3SJed Brown   if (mat->insertmode == INSERT_VALUES) {
89823b7d1baSJed Brown     size_t i;
8994b4eb8d3SJed Brown     for (i=0; i<nblocks; i++) {
9004b4eb8d3SJed Brown       MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
9014b4eb8d3SJed Brown       sendblock_i->row = -(sendblock_i->row+1);
9024b4eb8d3SJed Brown     }
9034b4eb8d3SJed Brown   }
9044b4eb8d3SJed Brown 
90560f34548SJunchao Zhang   if (stash->first_assembly_done) {
90697da8949SJed Brown     PetscMPIInt i,tag;
9079566063dSJacob Faibussowitsch     PetscCall(PetscCommGetNewTag(stash->comm,&tag));
90897da8949SJed Brown     for (i=0; i<stash->nrecvranks; i++) {
9099566063dSJacob Faibussowitsch       PetscCall(MatStashBTSRecv_Private(stash->comm,&tag,stash->recvranks[i],&stash->recvhdr[i],&stash->recvreqs[i],stash));
91097da8949SJed Brown     }
91197da8949SJed Brown     for (i=0; i<stash->nsendranks; i++) {
9129566063dSJacob Faibussowitsch       PetscCall(MatStashBTSSend_Private(stash->comm,&tag,i,stash->sendranks[i],&stash->sendhdr[i],&stash->sendreqs[i],stash));
91397da8949SJed Brown     }
91497da8949SJed Brown     stash->use_status = PETSC_TRUE; /* Use count from message status. */
91597da8949SJed Brown   } else {
916e0ddb6e8SJed Brown     ierr = PetscCommBuildTwoSidedFReq(stash->comm,1,MPIU_INT,stash->nsendranks,stash->sendranks,(PetscInt*)stash->sendhdr,
917e0ddb6e8SJed Brown                                       &stash->nrecvranks,&stash->recvranks,(PetscInt*)&stash->recvhdr,1,&stash->sendreqs,&stash->recvreqs,
9189566063dSJacob Faibussowitsch                                       MatStashBTSSend_Private,MatStashBTSRecv_Private,stash);PetscCall(ierr);
9199566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(stash->nrecvranks,&stash->some_indices,stash->nrecvranks,&stash->some_statuses));
92097da8949SJed Brown     stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */
92197da8949SJed Brown   }
922d7d60843SJed Brown 
9239566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferExtractInPlace(stash->segrecvframe,&stash->recvframes));
924d7d60843SJed Brown   stash->recvframe_active     = NULL;
925d7d60843SJed Brown   stash->recvframe_i          = 0;
926d7d60843SJed Brown   stash->some_i               = 0;
927d7d60843SJed Brown   stash->some_count           = 0;
928d7d60843SJed Brown   stash->recvcount            = 0;
92960f34548SJunchao Zhang   stash->first_assembly_done  = mat->assembly_subset; /* See the same logic in VecAssemblyBegin_MPI_BTS */
9304b4eb8d3SJed Brown   stash->insertmode           = &mat->insertmode;
931d7d60843SJed Brown   PetscFunctionReturn(0);
932d7d60843SJed Brown }
933d7d60843SJed Brown 
934d7d60843SJed Brown static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash,PetscMPIInt *n,PetscInt **row,PetscInt **col,PetscScalar **val,PetscInt *flg)
935d7d60843SJed Brown {
936d7d60843SJed Brown   MatStashBlock *block;
937d7d60843SJed Brown 
938d7d60843SJed Brown   PetscFunctionBegin;
939d7d60843SJed Brown   *flg = 0;
940d7d60843SJed Brown   while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) {
941d7d60843SJed Brown     if (stash->some_i == stash->some_count) {
942d7d60843SJed Brown       if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(0); /* Done */
9439566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Waitsome(stash->nrecvranks,stash->recvreqs,&stash->some_count,stash->some_indices,stash->use_status?stash->some_statuses:MPI_STATUSES_IGNORE));
944d7d60843SJed Brown       stash->some_i = 0;
945d7d60843SJed Brown     }
946d7d60843SJed Brown     stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]];
947d7d60843SJed Brown     stash->recvframe_count = stash->recvframe_active->count; /* From header; maximum count */
948d7d60843SJed Brown     if (stash->use_status) { /* Count what was actually sent */
9499566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Get_count(&stash->some_statuses[stash->some_i],stash->blocktype,&stash->recvframe_count));
950d7d60843SJed Brown     }
9514b4eb8d3SJed Brown     if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */
9524b4eb8d3SJed Brown       block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[0];
9534b4eb8d3SJed Brown       if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES;
9542c71b3e2SJacob Faibussowitsch       PetscCheckFalse(*stash->insertmode == INSERT_VALUES && block->row >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling INSERT_VALUES, but rank %d requested ADD_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]);
9552c71b3e2SJacob Faibussowitsch       PetscCheckFalse(*stash->insertmode == ADD_VALUES && block->row < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling ADD_VALUES, but rank %d requested INSERT_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]);
9564b4eb8d3SJed Brown     }
957d7d60843SJed Brown     stash->some_i++;
958d7d60843SJed Brown     stash->recvcount++;
959d7d60843SJed Brown     stash->recvframe_i = 0;
960d7d60843SJed Brown   }
961d7d60843SJed Brown   *n = 1;
962d7d60843SJed Brown   block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[stash->recvframe_i*stash->blocktype_size];
9634b4eb8d3SJed Brown   if (block->row < 0) block->row = -(block->row + 1);
964d7d60843SJed Brown   *row = &block->row;
965d7d60843SJed Brown   *col = &block->col;
966d7d60843SJed Brown   *val = block->vals;
967d7d60843SJed Brown   stash->recvframe_i++;
968d7d60843SJed Brown   *flg = 1;
969d7d60843SJed Brown   PetscFunctionReturn(0);
970d7d60843SJed Brown }
971d7d60843SJed Brown 
972d7d60843SJed Brown static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash)
973d7d60843SJed Brown {
974d7d60843SJed Brown   PetscFunctionBegin;
9759566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Waitall(stash->nsendranks,stash->sendreqs,MPI_STATUSES_IGNORE));
97660f34548SJunchao Zhang   if (stash->first_assembly_done) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks  */
977*d2b3fd65SBarry Smith     PetscCall(PetscSegBufferExtractInPlace(stash->segrecvblocks,NULL));
9783575f486SJed Brown   } else {                      /* No reuse, so collect everything. */
9799566063dSJacob Faibussowitsch     PetscCall(MatStashScatterDestroy_BTS(stash));
98097da8949SJed Brown   }
981d7d60843SJed Brown 
982d7d60843SJed Brown   /* Now update nmaxold to be app 10% more than max n used, this way the
983d7d60843SJed Brown      wastage of space is reduced the next time this stash is used.
984d7d60843SJed Brown      Also update the oldmax, only if it increases */
985d7d60843SJed Brown   if (stash->n) {
986d7d60843SJed Brown     PetscInt bs2     = stash->bs*stash->bs;
987d7d60843SJed Brown     PetscInt oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
988d7d60843SJed Brown     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
989d7d60843SJed Brown   }
990d7d60843SJed Brown 
991d7d60843SJed Brown   stash->nmax       = 0;
992d7d60843SJed Brown   stash->n          = 0;
993d7d60843SJed Brown   stash->reallocs   = -1;
994d7d60843SJed Brown   stash->nprocessed = 0;
995d7d60843SJed Brown 
9969566063dSJacob Faibussowitsch   PetscCall(PetscMatStashSpaceDestroy(&stash->space_head));
997d7d60843SJed Brown 
998f4259b30SLisandro Dalcin   stash->space = NULL;
999d7d60843SJed Brown 
1000d7d60843SJed Brown   PetscFunctionReturn(0);
1001d7d60843SJed Brown }
1002d7d60843SJed Brown 
100360f34548SJunchao Zhang PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash)
1004d7d60843SJed Brown {
1005d7d60843SJed Brown   PetscFunctionBegin;
10069566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferDestroy(&stash->segsendblocks));
10079566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferDestroy(&stash->segrecvframe));
1008d7d60843SJed Brown   stash->recvframes = NULL;
10099566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferDestroy(&stash->segrecvblocks));
1010d7d60843SJed Brown   if (stash->blocktype != MPI_DATATYPE_NULL) {
10119566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Type_free(&stash->blocktype));
1012d7d60843SJed Brown   }
1013d7d60843SJed Brown   stash->nsendranks = 0;
1014d7d60843SJed Brown   stash->nrecvranks = 0;
10159566063dSJacob Faibussowitsch   PetscCall(PetscFree3(stash->sendranks,stash->sendhdr,stash->sendframes));
10169566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->sendreqs));
10179566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->recvreqs));
10189566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->recvranks));
10199566063dSJacob Faibussowitsch   PetscCall(PetscFree(stash->recvhdr));
10209566063dSJacob Faibussowitsch   PetscCall(PetscFree2(stash->some_indices,stash->some_statuses));
1021d7d60843SJed Brown   PetscFunctionReturn(0);
1022d7d60843SJed Brown }
10231667be42SBarry Smith #endif
1024