xref: /petsc/src/mat/utils/matstash.c (revision 820f2d463ad7e8160c4f507e09c88f621629a79d)
12d5177cdSBarry Smith 
2af0996ceSBarry Smith #include <petsc/private/matimpl.h>
35bd3b8fbSHong Zhang 
4bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE   10000
54c1ff481SSatish Balay 
6ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterBegin_Ref(Mat,MatStash*,PetscInt*);
7d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
8d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
975d39b6aSBarry Smith #if !defined(PETSC_HAVE_MPIUNI)
10d7d60843SJed Brown static PetscErrorCode MatStashScatterBegin_BTS(Mat,MatStash*,PetscInt*);
11d7d60843SJed Brown static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
12d7d60843SJed Brown static PetscErrorCode MatStashScatterEnd_BTS(MatStash*);
1375d39b6aSBarry Smith #endif
14d7d60843SJed Brown 
159417f4adSLois Curfman McInnes /*
168798bf22SSatish Balay   MatStashCreate_Private - Creates a stash,currently used for all the parallel
174c1ff481SSatish Balay   matrix implementations. The stash is where elements of a matrix destined
184c1ff481SSatish Balay   to be stored on other processors are kept until matrix assembly is done.
199417f4adSLois Curfman McInnes 
204c1ff481SSatish Balay   This is a simple minded stash. Simply adds entries to end of stash.
214c1ff481SSatish Balay 
224c1ff481SSatish Balay   Input Parameters:
234c1ff481SSatish Balay   comm - communicator, required for scatters.
244c1ff481SSatish Balay   bs   - stash block size. used when stashing blocks of values
254c1ff481SSatish Balay 
264c1ff481SSatish Balay   Output Parameters:
274c1ff481SSatish Balay   stash    - the newly created stash
289417f4adSLois Curfman McInnes */
29c1ac3661SBarry Smith PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
309417f4adSLois Curfman McInnes {
31dfbe8321SBarry Smith   PetscErrorCode ierr;
32533163c2SBarry Smith   PetscInt       max,*opt,nopt,i;
33ace3abfcSBarry Smith   PetscBool      flg;
34bc5ccf88SSatish Balay 
353a40ed3dSBarry Smith   PetscFunctionBegin;
36bc5ccf88SSatish Balay   /* Require 2 tags,get the second using PetscCommGetNewTag() */
37752ec6e0SSatish Balay   stash->comm = comm;
388865f1eaSKarl Rupp 
39752ec6e0SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
40a2d1c673SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
41ffc4695bSBarry Smith   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRMPI(ierr);
42ffc4695bSBarry Smith   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRMPI(ierr);
43785e854fSJed Brown   ierr = PetscMalloc1(2*stash->size,&stash->flg_v);CHKERRQ(ierr);
44533163c2SBarry Smith   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
45533163c2SBarry Smith 
46bc5ccf88SSatish Balay 
47434d7ff9SSatish Balay   nopt = stash->size;
48785e854fSJed Brown   ierr = PetscMalloc1(nopt,&opt);CHKERRQ(ierr);
49c5929fdfSBarry Smith   ierr = PetscOptionsGetIntArray(NULL,NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
50434d7ff9SSatish Balay   if (flg) {
51434d7ff9SSatish Balay     if (nopt == 1)                max = opt[0];
52434d7ff9SSatish Balay     else if (nopt == stash->size) max = opt[stash->rank];
53434d7ff9SSatish Balay     else if (stash->rank < nopt)  max = opt[stash->rank];
54f4ab19daSSatish Balay     else                          max = 0; /* Use default */
55434d7ff9SSatish Balay     stash->umax = max;
56434d7ff9SSatish Balay   } else {
57434d7ff9SSatish Balay     stash->umax = 0;
58434d7ff9SSatish Balay   }
59606d414cSSatish Balay   ierr = PetscFree(opt);CHKERRQ(ierr);
604c1ff481SSatish Balay   if (bs <= 0) bs = 1;
61a2d1c673SSatish Balay 
624c1ff481SSatish Balay   stash->bs         = bs;
639417f4adSLois Curfman McInnes   stash->nmax       = 0;
64434d7ff9SSatish Balay   stash->oldnmax    = 0;
659417f4adSLois Curfman McInnes   stash->n          = 0;
664c1ff481SSatish Balay   stash->reallocs   = -1;
67f4259b30SLisandro Dalcin   stash->space_head = NULL;
68f4259b30SLisandro Dalcin   stash->space      = NULL;
699417f4adSLois Curfman McInnes 
70f4259b30SLisandro Dalcin   stash->send_waits  = NULL;
71f4259b30SLisandro Dalcin   stash->recv_waits  = NULL;
72f4259b30SLisandro Dalcin   stash->send_status = NULL;
73bc5ccf88SSatish Balay   stash->nsends      = 0;
74bc5ccf88SSatish Balay   stash->nrecvs      = 0;
75f4259b30SLisandro Dalcin   stash->svalues     = NULL;
76f4259b30SLisandro Dalcin   stash->rvalues     = NULL;
77f4259b30SLisandro Dalcin   stash->rindices    = NULL;
78a2d1c673SSatish Balay   stash->nprocessed  = 0;
7967318a8aSJed Brown   stash->reproduce   = PETSC_FALSE;
80d7d60843SJed Brown   stash->blocktype   = MPI_DATATYPE_NULL;
818865f1eaSKarl Rupp 
82c5929fdfSBarry Smith   ierr = PetscOptionsGetBool(NULL,NULL,"-matstash_reproduce",&stash->reproduce,NULL);CHKERRQ(ierr);
831667be42SBarry Smith #if !defined(PETSC_HAVE_MPIUNI)
84ca723aa4SStefano Zampini   flg  = PETSC_FALSE;
85b30fb036SBarry Smith   ierr = PetscOptionsGetBool(NULL,NULL,"-matstash_legacy",&flg,NULL);CHKERRQ(ierr);
86b30fb036SBarry Smith   if (!flg) {
87d7d60843SJed Brown     stash->ScatterBegin   = MatStashScatterBegin_BTS;
88d7d60843SJed Brown     stash->ScatterGetMesg = MatStashScatterGetMesg_BTS;
89d7d60843SJed Brown     stash->ScatterEnd     = MatStashScatterEnd_BTS;
90d7d60843SJed Brown     stash->ScatterDestroy = MatStashScatterDestroy_BTS;
91ac2b2aa0SJed Brown   } else {
921667be42SBarry Smith #endif
93ac2b2aa0SJed Brown     stash->ScatterBegin   = MatStashScatterBegin_Ref;
94ac2b2aa0SJed Brown     stash->ScatterGetMesg = MatStashScatterGetMesg_Ref;
95ac2b2aa0SJed Brown     stash->ScatterEnd     = MatStashScatterEnd_Ref;
96ac2b2aa0SJed Brown     stash->ScatterDestroy = NULL;
971667be42SBarry Smith #if !defined(PETSC_HAVE_MPIUNI)
98ac2b2aa0SJed Brown   }
991667be42SBarry Smith #endif
1003a40ed3dSBarry Smith   PetscFunctionReturn(0);
1019417f4adSLois Curfman McInnes }
1029417f4adSLois Curfman McInnes 
1034c1ff481SSatish Balay /*
1048798bf22SSatish Balay    MatStashDestroy_Private - Destroy the stash
1054c1ff481SSatish Balay */
106dfbe8321SBarry Smith PetscErrorCode MatStashDestroy_Private(MatStash *stash)
1079417f4adSLois Curfman McInnes {
108dfbe8321SBarry Smith   PetscErrorCode ierr;
109a2d1c673SSatish Balay 
110bc5ccf88SSatish Balay   PetscFunctionBegin;
1116bf464f9SBarry Smith   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
112ac2b2aa0SJed Brown   if (stash->ScatterDestroy) {ierr = (*stash->ScatterDestroy)(stash);CHKERRQ(ierr);}
1138865f1eaSKarl Rupp 
114f4259b30SLisandro Dalcin   stash->space = NULL;
1158865f1eaSKarl Rupp 
116533163c2SBarry Smith   ierr = PetscFree(stash->flg_v);CHKERRQ(ierr);
117bc5ccf88SSatish Balay   PetscFunctionReturn(0);
118bc5ccf88SSatish Balay }
119bc5ccf88SSatish Balay 
1204c1ff481SSatish Balay /*
12167318a8aSJed Brown    MatStashScatterEnd_Private - This is called as the final stage of
1224c1ff481SSatish Balay    scatter. The final stages of message passing is done here, and
12367318a8aSJed Brown    all the memory used for message passing is cleaned up. This
1244c1ff481SSatish Balay    routine also resets the stash, and deallocates the memory used
1254c1ff481SSatish Balay    for the stash. It also keeps track of the current memory usage
1264c1ff481SSatish Balay    so that the same value can be used the next time through.
1274c1ff481SSatish Balay */
128dfbe8321SBarry Smith PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
129bc5ccf88SSatish Balay {
1306849ba73SBarry Smith   PetscErrorCode ierr;
131ac2b2aa0SJed Brown 
132ac2b2aa0SJed Brown   PetscFunctionBegin;
133ac2b2aa0SJed Brown   ierr = (*stash->ScatterEnd)(stash);CHKERRQ(ierr);
134ac2b2aa0SJed Brown   PetscFunctionReturn(0);
135ac2b2aa0SJed Brown }
136ac2b2aa0SJed Brown 
137d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash)
138ac2b2aa0SJed Brown {
139ac2b2aa0SJed Brown   PetscErrorCode ierr;
140533163c2SBarry Smith   PetscInt       nsends=stash->nsends,bs2,oldnmax,i;
141a2d1c673SSatish Balay   MPI_Status     *send_status;
142a2d1c673SSatish Balay 
1433a40ed3dSBarry Smith   PetscFunctionBegin;
144533163c2SBarry Smith   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
145a2d1c673SSatish Balay   /* wait on sends */
146a2d1c673SSatish Balay   if (nsends) {
147785e854fSJed Brown     ierr = PetscMalloc1(2*nsends,&send_status);CHKERRQ(ierr);
148ffc4695bSBarry Smith     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRMPI(ierr);
149606d414cSSatish Balay     ierr = PetscFree(send_status);CHKERRQ(ierr);
150a2d1c673SSatish Balay   }
151a2d1c673SSatish Balay 
152c0c58ca7SSatish Balay   /* Now update nmaxold to be app 10% more than max n used, this way the
153434d7ff9SSatish Balay      wastage of space is reduced the next time this stash is used.
154434d7ff9SSatish Balay      Also update the oldmax, only if it increases */
155b9b97703SBarry Smith   if (stash->n) {
15694b769a5SSatish Balay     bs2     = stash->bs*stash->bs;
1578a9378f0SSatish Balay     oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
158434d7ff9SSatish Balay     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
159b9b97703SBarry Smith   }
160434d7ff9SSatish Balay 
161d07ff455SSatish Balay   stash->nmax       = 0;
162d07ff455SSatish Balay   stash->n          = 0;
1634c1ff481SSatish Balay   stash->reallocs   = -1;
164a2d1c673SSatish Balay   stash->nprocessed = 0;
1658865f1eaSKarl Rupp 
1666bf464f9SBarry Smith   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
1678865f1eaSKarl Rupp 
168f4259b30SLisandro Dalcin   stash->space = NULL;
1698865f1eaSKarl Rupp 
170606d414cSSatish Balay   ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
171606d414cSSatish Balay   ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
172c05d87d6SBarry Smith   ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr);
173c05d87d6SBarry Smith   ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr);
174606d414cSSatish Balay   ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
175c05d87d6SBarry Smith   ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr);
176563fb871SSatish Balay   ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
1773a40ed3dSBarry Smith   PetscFunctionReturn(0);
1789417f4adSLois Curfman McInnes }
1799417f4adSLois Curfman McInnes 
1804c1ff481SSatish Balay /*
1818798bf22SSatish Balay    MatStashGetInfo_Private - Gets the relavant statistics of the stash
1824c1ff481SSatish Balay 
1834c1ff481SSatish Balay    Input Parameters:
1844c1ff481SSatish Balay    stash    - the stash
18594b769a5SSatish Balay    nstash   - the size of the stash. Indicates the number of values stored.
1864c1ff481SSatish Balay    reallocs - the number of additional mallocs incurred.
1874c1ff481SSatish Balay 
1884c1ff481SSatish Balay */
189c1ac3661SBarry Smith PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
19097530c3fSBarry Smith {
191c1ac3661SBarry Smith   PetscInt bs2 = stash->bs*stash->bs;
19294b769a5SSatish Balay 
1933a40ed3dSBarry Smith   PetscFunctionBegin;
1941ecfd215SBarry Smith   if (nstash) *nstash = stash->n*bs2;
1951ecfd215SBarry Smith   if (reallocs) {
196434d7ff9SSatish Balay     if (stash->reallocs < 0) *reallocs = 0;
197434d7ff9SSatish Balay     else                     *reallocs = stash->reallocs;
1981ecfd215SBarry Smith   }
199bc5ccf88SSatish Balay   PetscFunctionReturn(0);
200bc5ccf88SSatish Balay }
2014c1ff481SSatish Balay 
2024c1ff481SSatish Balay /*
2038798bf22SSatish Balay    MatStashSetInitialSize_Private - Sets the initial size of the stash
2044c1ff481SSatish Balay 
2054c1ff481SSatish Balay    Input Parameters:
2064c1ff481SSatish Balay    stash  - the stash
2074c1ff481SSatish Balay    max    - the value that is used as the max size of the stash.
2084c1ff481SSatish Balay             this value is used while allocating memory.
2094c1ff481SSatish Balay */
210c1ac3661SBarry Smith PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
211bc5ccf88SSatish Balay {
212bc5ccf88SSatish Balay   PetscFunctionBegin;
213434d7ff9SSatish Balay   stash->umax = max;
2143a40ed3dSBarry Smith   PetscFunctionReturn(0);
21597530c3fSBarry Smith }
21697530c3fSBarry Smith 
2178798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called
2184c1ff481SSatish Balay    when the space in the stash is not sufficient to add the new values
2194c1ff481SSatish Balay    being inserted into the stash.
2204c1ff481SSatish Balay 
2214c1ff481SSatish Balay    Input Parameters:
2224c1ff481SSatish Balay    stash - the stash
2234c1ff481SSatish Balay    incr  - the minimum increase requested
2244c1ff481SSatish Balay 
2254c1ff481SSatish Balay    Notes:
2264c1ff481SSatish Balay    This routine doubles the currently used memory.
2274c1ff481SSatish Balay  */
228c1ac3661SBarry Smith static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
2299417f4adSLois Curfman McInnes {
2306849ba73SBarry Smith   PetscErrorCode ierr;
2315bd3b8fbSHong Zhang   PetscInt       newnmax,bs2= stash->bs*stash->bs;
2329417f4adSLois Curfman McInnes 
2333a40ed3dSBarry Smith   PetscFunctionBegin;
2349417f4adSLois Curfman McInnes   /* allocate a larger stash */
235c481ceb5SSatish Balay   if (!stash->oldnmax && !stash->nmax) { /* new stash */
236434d7ff9SSatish Balay     if (stash->umax)                  newnmax = stash->umax/bs2;
237434d7ff9SSatish Balay     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
238c481ceb5SSatish Balay   } else if (!stash->nmax) { /* resuing stash */
239434d7ff9SSatish Balay     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
240434d7ff9SSatish Balay     else                              newnmax = stash->oldnmax/bs2;
241434d7ff9SSatish Balay   } else                              newnmax = stash->nmax*2;
2424c1ff481SSatish Balay   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
243d07ff455SSatish Balay 
24475cae7c1SHong Zhang   /* Get a MatStashSpace and attach it to stash */
24575cae7c1SHong Zhang   ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr);
246b087b6d6SSatish Balay   if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
247b087b6d6SSatish Balay     stash->space_head = stash->space;
24875cae7c1SHong Zhang   }
249b087b6d6SSatish Balay 
250bc5ccf88SSatish Balay   stash->reallocs++;
25175cae7c1SHong Zhang   stash->nmax = newnmax;
252bc5ccf88SSatish Balay   PetscFunctionReturn(0);
253bc5ccf88SSatish Balay }
254bc5ccf88SSatish Balay /*
2558798bf22SSatish Balay   MatStashValuesRow_Private - inserts values into the stash. This function
2564c1ff481SSatish Balay   expects the values to be roworiented. Multiple columns belong to the same row
2574c1ff481SSatish Balay   can be inserted with a single call to this function.
2584c1ff481SSatish Balay 
2594c1ff481SSatish Balay   Input Parameters:
2604c1ff481SSatish Balay   stash  - the stash
2614c1ff481SSatish Balay   row    - the global row correspoiding to the values
2624c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
2634c1ff481SSatish Balay   idxn   - the global column indices corresponding to each of the values.
2644c1ff481SSatish Balay   values - the values inserted
265bc5ccf88SSatish Balay */
266ace3abfcSBarry Smith PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries)
267bc5ccf88SSatish Balay {
268dfbe8321SBarry Smith   PetscErrorCode     ierr;
269b400d20cSBarry Smith   PetscInt           i,k,cnt = 0;
27075cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
271bc5ccf88SSatish Balay 
272bc5ccf88SSatish Balay   PetscFunctionBegin;
2734c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
27475cae7c1SHong Zhang   if (!space || space->local_remaining < n) {
2758798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
2769417f4adSLois Curfman McInnes   }
27775cae7c1SHong Zhang   space = stash->space;
27875cae7c1SHong Zhang   k     = space->local_used;
2794c1ff481SSatish Balay   for (i=0; i<n; i++) {
28014069ce9SStefano Zampini     if (ignorezeroentries && values && values[i] == 0.0) continue;
28175cae7c1SHong Zhang     space->idx[k] = row;
28275cae7c1SHong Zhang     space->idy[k] = idxn[i];
28314069ce9SStefano Zampini     space->val[k] = values ? values[i] : 0.0;
28475cae7c1SHong Zhang     k++;
285b400d20cSBarry Smith     cnt++;
2869417f4adSLois Curfman McInnes   }
287b400d20cSBarry Smith   stash->n               += cnt;
288b400d20cSBarry Smith   space->local_used      += cnt;
289b400d20cSBarry Smith   space->local_remaining -= cnt;
290a2d1c673SSatish Balay   PetscFunctionReturn(0);
291a2d1c673SSatish Balay }
29275cae7c1SHong Zhang 
2934c1ff481SSatish Balay /*
2948798bf22SSatish Balay   MatStashValuesCol_Private - inserts values into the stash. This function
2954c1ff481SSatish Balay   expects the values to be columnoriented. Multiple columns belong to the same row
2964c1ff481SSatish Balay   can be inserted with a single call to this function.
297a2d1c673SSatish Balay 
2984c1ff481SSatish Balay   Input Parameters:
2994c1ff481SSatish Balay   stash   - the stash
3004c1ff481SSatish Balay   row     - the global row correspoiding to the values
3014c1ff481SSatish Balay   n       - the number of elements inserted. All elements belong to the above row.
3024c1ff481SSatish Balay   idxn    - the global column indices corresponding to each of the values.
3034c1ff481SSatish Balay   values  - the values inserted
3044c1ff481SSatish Balay   stepval - the consecutive values are sepated by a distance of stepval.
3054c1ff481SSatish Balay             this happens because the input is columnoriented.
3064c1ff481SSatish Balay */
307ace3abfcSBarry Smith PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries)
308a2d1c673SSatish Balay {
309dfbe8321SBarry Smith   PetscErrorCode     ierr;
31050e9ab7cSBarry Smith   PetscInt           i,k,cnt = 0;
31175cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
312a2d1c673SSatish Balay 
3134c1ff481SSatish Balay   PetscFunctionBegin;
3144c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
31575cae7c1SHong Zhang   if (!space || space->local_remaining < n) {
3168798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
3174c1ff481SSatish Balay   }
31875cae7c1SHong Zhang   space = stash->space;
31975cae7c1SHong Zhang   k     = space->local_used;
3204c1ff481SSatish Balay   for (i=0; i<n; i++) {
32114069ce9SStefano Zampini     if (ignorezeroentries && values && values[i*stepval] == 0.0) continue;
32275cae7c1SHong Zhang     space->idx[k] = row;
32375cae7c1SHong Zhang     space->idy[k] = idxn[i];
32414069ce9SStefano Zampini     space->val[k] = values ? values[i*stepval] : 0.0;
32575cae7c1SHong Zhang     k++;
326b400d20cSBarry Smith     cnt++;
3274c1ff481SSatish Balay   }
328b400d20cSBarry Smith   stash->n               += cnt;
329b400d20cSBarry Smith   space->local_used      += cnt;
330b400d20cSBarry Smith   space->local_remaining -= cnt;
3314c1ff481SSatish Balay   PetscFunctionReturn(0);
3324c1ff481SSatish Balay }
3334c1ff481SSatish Balay 
3344c1ff481SSatish Balay /*
3358798bf22SSatish Balay   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
3364c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3374c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3384c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3394c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3404c1ff481SSatish Balay 
3414c1ff481SSatish Balay   Input Parameters:
3424c1ff481SSatish Balay   stash  - the stash
3434c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3444c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3454c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3464c1ff481SSatish Balay            values. Each block is of size bs*bs.
3474c1ff481SSatish Balay   values - the values inserted
3484c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3494c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3504c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3514c1ff481SSatish Balay */
35254f21887SBarry Smith PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
3534c1ff481SSatish Balay {
354dfbe8321SBarry Smith   PetscErrorCode     ierr;
35575cae7c1SHong Zhang   PetscInt           i,j,k,bs2,bs=stash->bs,l;
35654f21887SBarry Smith   const PetscScalar  *vals;
35754f21887SBarry Smith   PetscScalar        *array;
35875cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
359a2d1c673SSatish Balay 
360a2d1c673SSatish Balay   PetscFunctionBegin;
36175cae7c1SHong Zhang   if (!space || space->local_remaining < n) {
3628798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
363a2d1c673SSatish Balay   }
36475cae7c1SHong Zhang   space = stash->space;
36575cae7c1SHong Zhang   l     = space->local_used;
36675cae7c1SHong Zhang   bs2   = bs*bs;
3674c1ff481SSatish Balay   for (i=0; i<n; i++) {
36875cae7c1SHong Zhang     space->idx[l] = row;
36975cae7c1SHong Zhang     space->idy[l] = idxn[i];
37075cae7c1SHong Zhang     /* Now copy over the block of values. Store the values column oriented.
37175cae7c1SHong Zhang        This enables inserting multiple blocks belonging to a row with a single
37275cae7c1SHong Zhang        funtion call */
37375cae7c1SHong Zhang     array = space->val + bs2*l;
37475cae7c1SHong Zhang     vals  = values + idx*bs2*n + bs*i;
37575cae7c1SHong Zhang     for (j=0; j<bs; j++) {
37614069ce9SStefano Zampini       for (k=0; k<bs; k++) array[k*bs] = values ? vals[k] : 0.0;
37775cae7c1SHong Zhang       array++;
37875cae7c1SHong Zhang       vals += cmax*bs;
37975cae7c1SHong Zhang     }
38075cae7c1SHong Zhang     l++;
381a2d1c673SSatish Balay   }
3825bd3b8fbSHong Zhang   stash->n               += n;
38375cae7c1SHong Zhang   space->local_used      += n;
38475cae7c1SHong Zhang   space->local_remaining -= n;
3854c1ff481SSatish Balay   PetscFunctionReturn(0);
3864c1ff481SSatish Balay }
3874c1ff481SSatish Balay 
3884c1ff481SSatish Balay /*
3898798bf22SSatish Balay   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
3904c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3914c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3924c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3934c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3944c1ff481SSatish Balay 
3954c1ff481SSatish Balay   Input Parameters:
3964c1ff481SSatish Balay   stash  - the stash
3974c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3984c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3994c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
4004c1ff481SSatish Balay            values. Each block is of size bs*bs.
4014c1ff481SSatish Balay   values - the values inserted
4024c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
4034c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
4044c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
4054c1ff481SSatish Balay */
40654f21887SBarry Smith PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
4074c1ff481SSatish Balay {
408dfbe8321SBarry Smith   PetscErrorCode     ierr;
40975cae7c1SHong Zhang   PetscInt           i,j,k,bs2,bs=stash->bs,l;
41054f21887SBarry Smith   const PetscScalar  *vals;
41154f21887SBarry Smith   PetscScalar        *array;
41275cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
4134c1ff481SSatish Balay 
4144c1ff481SSatish Balay   PetscFunctionBegin;
41575cae7c1SHong Zhang   if (!space || space->local_remaining < n) {
4168798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
4174c1ff481SSatish Balay   }
41875cae7c1SHong Zhang   space = stash->space;
41975cae7c1SHong Zhang   l     = space->local_used;
42075cae7c1SHong Zhang   bs2   = bs*bs;
4214c1ff481SSatish Balay   for (i=0; i<n; i++) {
42275cae7c1SHong Zhang     space->idx[l] = row;
42375cae7c1SHong Zhang     space->idy[l] = idxn[i];
42475cae7c1SHong Zhang     /* Now copy over the block of values. Store the values column oriented.
42575cae7c1SHong Zhang      This enables inserting multiple blocks belonging to a row with a single
42675cae7c1SHong Zhang      funtion call */
42775cae7c1SHong Zhang     array = space->val + bs2*l;
42875cae7c1SHong Zhang     vals  = values + idx*bs2*n + bs*i;
42975cae7c1SHong Zhang     for (j=0; j<bs; j++) {
43014069ce9SStefano Zampini       for (k=0; k<bs; k++) array[k] = values ? vals[k] : 0.0;
43175cae7c1SHong Zhang       array += bs;
43275cae7c1SHong Zhang       vals  += rmax*bs;
43375cae7c1SHong Zhang     }
4345bd3b8fbSHong Zhang     l++;
435a2d1c673SSatish Balay   }
4365bd3b8fbSHong Zhang   stash->n               += n;
43775cae7c1SHong Zhang   space->local_used      += n;
43875cae7c1SHong Zhang   space->local_remaining -= n;
4393a40ed3dSBarry Smith   PetscFunctionReturn(0);
4409417f4adSLois Curfman McInnes }
4414c1ff481SSatish Balay /*
4428798bf22SSatish Balay   MatStashScatterBegin_Private - Initiates the transfer of values to the
4434c1ff481SSatish Balay   correct owners. This function goes through the stash, and check the
4444c1ff481SSatish Balay   owners of each stashed value, and sends the values off to the owner
4454c1ff481SSatish Balay   processors.
446bc5ccf88SSatish Balay 
4474c1ff481SSatish Balay   Input Parameters:
4484c1ff481SSatish Balay   stash  - the stash
4494c1ff481SSatish Balay   owners - an array of size 'no-of-procs' which gives the ownership range
4504c1ff481SSatish Balay            for each node.
4514c1ff481SSatish Balay 
45295452b02SPatrick Sanan   Notes:
45395452b02SPatrick Sanan     The 'owners' array in the cased of the blocked-stash has the
4544c1ff481SSatish Balay   ranges specified blocked global indices, and for the regular stash in
4554c1ff481SSatish Balay   the proper global indices.
4564c1ff481SSatish Balay */
4571e2582c4SBarry Smith PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners)
458bc5ccf88SSatish Balay {
459ac2b2aa0SJed Brown   PetscErrorCode ierr;
460ac2b2aa0SJed Brown 
461ac2b2aa0SJed Brown   PetscFunctionBegin;
462ac2b2aa0SJed Brown   ierr = (*stash->ScatterBegin)(mat,stash,owners);CHKERRQ(ierr);
463ac2b2aa0SJed Brown   PetscFunctionReturn(0);
464ac2b2aa0SJed Brown }
465ac2b2aa0SJed Brown 
466ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterBegin_Ref(Mat mat,MatStash *stash,PetscInt *owners)
467ac2b2aa0SJed Brown {
468c1ac3661SBarry Smith   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
469fe09c992SBarry Smith   PetscInt           size=stash->size,nsends;
4706849ba73SBarry Smith   PetscErrorCode     ierr;
47175cae7c1SHong Zhang   PetscInt           count,*sindices,**rindices,i,j,idx,lastidx,l;
47254f21887SBarry Smith   PetscScalar        **rvalues,*svalues;
473bc5ccf88SSatish Balay   MPI_Comm           comm = stash->comm;
474563fb871SSatish Balay   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
47576ec1555SBarry Smith   PetscMPIInt        *sizes,*nlengths,nreceives;
4765bd3b8fbSHong Zhang   PetscInt           *sp_idx,*sp_idy;
47754f21887SBarry Smith   PetscScalar        *sp_val;
4785bd3b8fbSHong Zhang   PetscMatStashSpace space,space_next;
479bc5ccf88SSatish Balay 
480bc5ccf88SSatish Balay   PetscFunctionBegin;
4814b4eb8d3SJed Brown   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
4824b4eb8d3SJed Brown     InsertMode addv;
483*820f2d46SBarry Smith     ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
4844b4eb8d3SJed Brown     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
4854b4eb8d3SJed Brown     mat->insertmode = addv; /* in case this processor had no cache */
4864b4eb8d3SJed Brown   }
4874b4eb8d3SJed Brown 
4884c1ff481SSatish Balay   bs2 = stash->bs*stash->bs;
48975cae7c1SHong Zhang 
490bc5ccf88SSatish Balay   /*  first count number of contributors to each processor */
4911795a4d1SJed Brown   ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr);
492037dbc42SBarry Smith   ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr);
493a2d1c673SSatish Balay 
49475cae7c1SHong Zhang   i       = j    = 0;
4957357eb19SBarry Smith   lastidx = -1;
4965bd3b8fbSHong Zhang   space   = stash->space_head;
4976c4ed002SBarry Smith   while (space) {
49875cae7c1SHong Zhang     space_next = space->next;
4995bd3b8fbSHong Zhang     sp_idx     = space->idx;
50075cae7c1SHong Zhang     for (l=0; l<space->local_used; l++) {
5017357eb19SBarry Smith       /* if indices are NOT locally sorted, need to start search at the beginning */
5025bd3b8fbSHong Zhang       if (lastidx > (idx = sp_idx[l])) j = 0;
5037357eb19SBarry Smith       lastidx = idx;
5047357eb19SBarry Smith       for (; j<size; j++) {
5054c1ff481SSatish Balay         if (idx >= owners[j] && idx < owners[j+1]) {
506563fb871SSatish Balay           nlengths[j]++; owner[i] = j; break;
507bc5ccf88SSatish Balay         }
508bc5ccf88SSatish Balay       }
50975cae7c1SHong Zhang       i++;
51075cae7c1SHong Zhang     }
51175cae7c1SHong Zhang     space = space_next;
512bc5ccf88SSatish Balay   }
513071fcb05SBarry Smith 
514563fb871SSatish Balay   /* Now check what procs get messages - and compute nsends. */
515071fcb05SBarry Smith   ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr);
516563fb871SSatish Balay   for (i=0, nsends=0; i<size; i++) {
5178865f1eaSKarl Rupp     if (nlengths[i]) {
51876ec1555SBarry Smith       sizes[i] = 1; nsends++;
5198865f1eaSKarl Rupp     }
520563fb871SSatish Balay   }
521bc5ccf88SSatish Balay 
52254f21887SBarry Smith   {PetscMPIInt *onodes,*olengths;
523563fb871SSatish Balay    /* Determine the number of messages to expect, their lengths, from from-ids */
52476ec1555SBarry Smith    ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr);
525563fb871SSatish Balay    ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
526563fb871SSatish Balay    /* since clubbing row,col - lengths are multiplied by 2 */
527563fb871SSatish Balay    for (i=0; i<nreceives; i++) olengths[i] *=2;
528563fb871SSatish Balay    ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
529563fb871SSatish Balay    /* values are size 'bs2' lengths (and remove earlier factor 2 */
530563fb871SSatish Balay    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
531563fb871SSatish Balay    ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
532563fb871SSatish Balay    ierr = PetscFree(onodes);CHKERRQ(ierr);
5338865f1eaSKarl Rupp    ierr = PetscFree(olengths);CHKERRQ(ierr);}
534bc5ccf88SSatish Balay 
535bc5ccf88SSatish Balay   /* do sends:
536bc5ccf88SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
537bc5ccf88SSatish Balay          the ith processor
538bc5ccf88SSatish Balay   */
539dcca6d9dSJed Brown   ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr);
540785e854fSJed Brown   ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr);
541dcca6d9dSJed Brown   ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr);
542a2d1c673SSatish Balay   /* use 2 sends the first with all_a, the next with all_i and all_j */
543bc5ccf88SSatish Balay   startv[0] = 0; starti[0] = 0;
544bc5ccf88SSatish Balay   for (i=1; i<size; i++) {
545563fb871SSatish Balay     startv[i] = startv[i-1] + nlengths[i-1];
546533163c2SBarry Smith     starti[i] = starti[i-1] + 2*nlengths[i-1];
547bc5ccf88SSatish Balay   }
54875cae7c1SHong Zhang 
54975cae7c1SHong Zhang   i     = 0;
5505bd3b8fbSHong Zhang   space = stash->space_head;
5516c4ed002SBarry Smith   while (space) {
55275cae7c1SHong Zhang     space_next = space->next;
5535bd3b8fbSHong Zhang     sp_idx     = space->idx;
5545bd3b8fbSHong Zhang     sp_idy     = space->idy;
5555bd3b8fbSHong Zhang     sp_val     = space->val;
55675cae7c1SHong Zhang     for (l=0; l<space->local_used; l++) {
557bc5ccf88SSatish Balay       j = owner[i];
558a2d1c673SSatish Balay       if (bs2 == 1) {
5595bd3b8fbSHong Zhang         svalues[startv[j]] = sp_val[l];
560a2d1c673SSatish Balay       } else {
561c1ac3661SBarry Smith         PetscInt    k;
56254f21887SBarry Smith         PetscScalar *buf1,*buf2;
5634c1ff481SSatish Balay         buf1 = svalues+bs2*startv[j];
564b087b6d6SSatish Balay         buf2 = space->val + bs2*l;
5658865f1eaSKarl Rupp         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
566a2d1c673SSatish Balay       }
5675bd3b8fbSHong Zhang       sindices[starti[j]]             = sp_idx[l];
5685bd3b8fbSHong Zhang       sindices[starti[j]+nlengths[j]] = sp_idy[l];
569bc5ccf88SSatish Balay       startv[j]++;
570bc5ccf88SSatish Balay       starti[j]++;
57175cae7c1SHong Zhang       i++;
57275cae7c1SHong Zhang     }
57375cae7c1SHong Zhang     space = space_next;
574bc5ccf88SSatish Balay   }
575bc5ccf88SSatish Balay   startv[0] = 0;
5768865f1eaSKarl Rupp   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
577e5d0e772SSatish Balay 
578bc5ccf88SSatish Balay   for (i=0,count=0; i<size; i++) {
57976ec1555SBarry Smith     if (sizes[i]) {
580ffc4695bSBarry Smith       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRMPI(ierr);
581ffc4695bSBarry Smith       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRMPI(ierr);
582bc5ccf88SSatish Balay     }
583b85c94c3SSatish Balay   }
5846cf91177SBarry Smith #if defined(PETSC_USE_INFO)
58593157e10SBarry Smith   ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr);
586e5d0e772SSatish Balay   for (i=0; i<size; i++) {
58776ec1555SBarry Smith     if (sizes[i]) {
58830c47e72SSatish Balay       ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
589e5d0e772SSatish Balay     }
590e5d0e772SSatish Balay   }
591e5d0e772SSatish Balay #endif
592c05d87d6SBarry Smith   ierr = PetscFree(nlengths);CHKERRQ(ierr);
593606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
594c05d87d6SBarry Smith   ierr = PetscFree2(startv,starti);CHKERRQ(ierr);
59576ec1555SBarry Smith   ierr = PetscFree(sizes);CHKERRQ(ierr);
596a2d1c673SSatish Balay 
597563fb871SSatish Balay   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
598785e854fSJed Brown   ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr);
599563fb871SSatish Balay 
600563fb871SSatish Balay   for (i=0; i<nreceives; i++) {
601563fb871SSatish Balay     recv_waits[2*i]   = recv_waits1[i];
602563fb871SSatish Balay     recv_waits[2*i+1] = recv_waits2[i];
603563fb871SSatish Balay   }
604563fb871SSatish Balay   stash->recv_waits = recv_waits;
6058865f1eaSKarl Rupp 
606563fb871SSatish Balay   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
607563fb871SSatish Balay   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
608563fb871SSatish Balay 
609c05d87d6SBarry Smith   stash->svalues         = svalues;
610c05d87d6SBarry Smith   stash->sindices        = sindices;
611c05d87d6SBarry Smith   stash->rvalues         = rvalues;
612c05d87d6SBarry Smith   stash->rindices        = rindices;
613c05d87d6SBarry Smith   stash->send_waits      = send_waits;
614c05d87d6SBarry Smith   stash->nsends          = nsends;
615c05d87d6SBarry Smith   stash->nrecvs          = nreceives;
61667318a8aSJed Brown   stash->reproduce_count = 0;
617bc5ccf88SSatish Balay   PetscFunctionReturn(0);
618bc5ccf88SSatish Balay }
619bc5ccf88SSatish Balay 
620a2d1c673SSatish Balay /*
6218798bf22SSatish Balay    MatStashScatterGetMesg_Private - This function waits on the receives posted
6228798bf22SSatish Balay    in the function MatStashScatterBegin_Private() and returns one message at
6234c1ff481SSatish Balay    a time to the calling function. If no messages are left, it indicates this
6244c1ff481SSatish Balay    by setting flg = 0, else it sets flg = 1.
6254c1ff481SSatish Balay 
6264c1ff481SSatish Balay    Input Parameters:
6274c1ff481SSatish Balay    stash - the stash
6284c1ff481SSatish Balay 
6294c1ff481SSatish Balay    Output Parameters:
6304c1ff481SSatish Balay    nvals - the number of entries in the current message.
6314c1ff481SSatish Balay    rows  - an array of row indices (or blocked indices) corresponding to the values
6324c1ff481SSatish Balay    cols  - an array of columnindices (or blocked indices) corresponding to the values
6334c1ff481SSatish Balay    vals  - the values
6344c1ff481SSatish Balay    flg   - 0 indicates no more message left, and the current call has no values associated.
6354c1ff481SSatish Balay            1 indicates that the current call successfully received a message, and the
6364c1ff481SSatish Balay              other output parameters nvals,rows,cols,vals are set appropriately.
637a2d1c673SSatish Balay */
63854f21887SBarry Smith PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
639bc5ccf88SSatish Balay {
6406849ba73SBarry Smith   PetscErrorCode ierr;
641ac2b2aa0SJed Brown 
642ac2b2aa0SJed Brown   PetscFunctionBegin;
643ac2b2aa0SJed Brown   ierr = (*stash->ScatterGetMesg)(stash,nvals,rows,cols,vals,flg);CHKERRQ(ierr);
644ac2b2aa0SJed Brown   PetscFunctionReturn(0);
645ac2b2aa0SJed Brown }
646ac2b2aa0SJed Brown 
647d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
648ac2b2aa0SJed Brown {
649ac2b2aa0SJed Brown   PetscErrorCode ierr;
650533163c2SBarry Smith   PetscMPIInt    i,*flg_v = stash->flg_v,i1,i2;
651fe09c992SBarry Smith   PetscInt       bs2;
652a2d1c673SSatish Balay   MPI_Status     recv_status;
653ace3abfcSBarry Smith   PetscBool      match_found = PETSC_FALSE;
654bc5ccf88SSatish Balay 
655bc5ccf88SSatish Balay   PetscFunctionBegin;
656a2d1c673SSatish Balay   *flg = 0; /* When a message is discovered this is reset to 1 */
657a2d1c673SSatish Balay   /* Return if no more messages to process */
6588865f1eaSKarl Rupp   if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(0);
659a2d1c673SSatish Balay 
6604c1ff481SSatish Balay   bs2 = stash->bs*stash->bs;
66167318a8aSJed Brown   /* If a matching pair of receives are found, process them, and return the data to
662a2d1c673SSatish Balay      the calling function. Until then keep receiving messages */
663a2d1c673SSatish Balay   while (!match_found) {
66467318a8aSJed Brown     if (stash->reproduce) {
66567318a8aSJed Brown       i    = stash->reproduce_count++;
666ffc4695bSBarry Smith       ierr = MPI_Wait(stash->recv_waits+i,&recv_status);CHKERRMPI(ierr);
66767318a8aSJed Brown     } else {
668ffc4695bSBarry Smith       ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRMPI(ierr);
66967318a8aSJed Brown     }
670e32f2f54SBarry Smith     if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!");
671533163c2SBarry Smith 
67267318a8aSJed Brown     /* Now pack the received message into a structure which is usable by others */
673a2d1c673SSatish Balay     if (i % 2) {
674ffc4695bSBarry Smith       ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRMPI(ierr);
6758865f1eaSKarl Rupp 
676c1dc657dSBarry Smith       flg_v[2*recv_status.MPI_SOURCE] = i/2;
6778865f1eaSKarl Rupp 
678a2d1c673SSatish Balay       *nvals = *nvals/bs2;
679563fb871SSatish Balay     } else {
680ffc4695bSBarry Smith       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRMPI(ierr);
6818865f1eaSKarl Rupp 
682563fb871SSatish Balay       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
6838865f1eaSKarl Rupp 
684563fb871SSatish Balay       *nvals = *nvals/2; /* This message has both row indices and col indices */
685bc5ccf88SSatish Balay     }
686a2d1c673SSatish Balay 
687cb2b73ccSBarry Smith     /* Check if we have both messages from this proc */
688c1dc657dSBarry Smith     i1 = flg_v[2*recv_status.MPI_SOURCE];
689c1dc657dSBarry Smith     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
690a2d1c673SSatish Balay     if (i1 != -1 && i2 != -1) {
691563fb871SSatish Balay       *rows = stash->rindices[i2];
692a2d1c673SSatish Balay       *cols = *rows + *nvals;
693563fb871SSatish Balay       *vals = stash->rvalues[i1];
694a2d1c673SSatish Balay       *flg  = 1;
695a2d1c673SSatish Balay       stash->nprocessed++;
69635d8aa7fSBarry Smith       match_found = PETSC_TRUE;
697bc5ccf88SSatish Balay     }
698bc5ccf88SSatish Balay   }
699bc5ccf88SSatish Balay   PetscFunctionReturn(0);
700bc5ccf88SSatish Balay }
701d7d60843SJed Brown 
702e69ee1f7SSatish Balay #if !defined(PETSC_HAVE_MPIUNI)
703d7d60843SJed Brown typedef struct {
704d7d60843SJed Brown   PetscInt row;
705d7d60843SJed Brown   PetscInt col;
706d7d60843SJed Brown   PetscScalar vals[1];          /* Actually an array of length bs2 */
707d7d60843SJed Brown } MatStashBlock;
708d7d60843SJed Brown 
709d7d60843SJed Brown static PetscErrorCode MatStashSortCompress_Private(MatStash *stash,InsertMode insertmode)
710d7d60843SJed Brown {
711d7d60843SJed Brown   PetscErrorCode ierr;
712d7d60843SJed Brown   PetscMatStashSpace space;
713d7d60843SJed Brown   PetscInt n = stash->n,bs = stash->bs,bs2 = bs*bs,cnt,*row,*col,*perm,rowstart,i;
714d7d60843SJed Brown   PetscScalar **valptr;
715d7d60843SJed Brown 
716d7d60843SJed Brown   PetscFunctionBegin;
717d7d60843SJed Brown   ierr = PetscMalloc4(n,&row,n,&col,n,&valptr,n,&perm);CHKERRQ(ierr);
718d7d60843SJed Brown   for (space=stash->space_head,cnt=0; space; space=space->next) {
719d7d60843SJed Brown     for (i=0; i<space->local_used; i++) {
720d7d60843SJed Brown       row[cnt] = space->idx[i];
721d7d60843SJed Brown       col[cnt] = space->idy[i];
722d7d60843SJed Brown       valptr[cnt] = &space->val[i*bs2];
723d7d60843SJed Brown       perm[cnt] = cnt;          /* Will tell us where to find valptr after sorting row[] and col[] */
724d7d60843SJed Brown       cnt++;
725d7d60843SJed Brown     }
726d7d60843SJed Brown   }
727d7d60843SJed Brown   if (cnt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatStash n %D, but counted %D entries",n,cnt);
728d7d60843SJed Brown   ierr = PetscSortIntWithArrayPair(n,row,col,perm);CHKERRQ(ierr);
729d7d60843SJed Brown   /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */
730d7d60843SJed Brown   for (rowstart=0,cnt=0,i=1; i<=n; i++) {
731d7d60843SJed Brown     if (i == n || row[i] != row[rowstart]) {         /* Sort the last row. */
732d7d60843SJed Brown       PetscInt colstart;
733d7d60843SJed Brown       ierr = PetscSortIntWithArray(i-rowstart,&col[rowstart],&perm[rowstart]);CHKERRQ(ierr);
734d7d60843SJed Brown       for (colstart=rowstart; colstart<i;) { /* Compress multiple insertions to the same location */
735d7d60843SJed Brown         PetscInt j,l;
736d7d60843SJed Brown         MatStashBlock *block;
737d7d60843SJed Brown         ierr = PetscSegBufferGet(stash->segsendblocks,1,&block);CHKERRQ(ierr);
738d7d60843SJed Brown         block->row = row[rowstart];
739d7d60843SJed Brown         block->col = col[colstart];
740580bdb30SBarry Smith         ierr = PetscArraycpy(block->vals,valptr[perm[colstart]],bs2);CHKERRQ(ierr);
741d7d60843SJed Brown         for (j=colstart+1; j<i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */
742d7d60843SJed Brown           if (insertmode == ADD_VALUES) {
743d7d60843SJed Brown             for (l=0; l<bs2; l++) block->vals[l] += valptr[perm[j]][l];
744d7d60843SJed Brown           } else {
745580bdb30SBarry Smith             ierr = PetscArraycpy(block->vals,valptr[perm[j]],bs2);CHKERRQ(ierr);
746d7d60843SJed Brown           }
747d7d60843SJed Brown         }
748d7d60843SJed Brown         colstart = j;
749d7d60843SJed Brown       }
750d7d60843SJed Brown       rowstart = i;
751d7d60843SJed Brown     }
752d7d60843SJed Brown   }
753d7d60843SJed Brown   ierr = PetscFree4(row,col,valptr,perm);CHKERRQ(ierr);
754d7d60843SJed Brown   PetscFunctionReturn(0);
755d7d60843SJed Brown }
756d7d60843SJed Brown 
757d7d60843SJed Brown static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash)
758d7d60843SJed Brown {
759d7d60843SJed Brown   PetscErrorCode ierr;
760d7d60843SJed Brown 
761d7d60843SJed Brown   PetscFunctionBegin;
762d7d60843SJed Brown   if (stash->blocktype == MPI_DATATYPE_NULL) {
763d7d60843SJed Brown     PetscInt     bs2 = PetscSqr(stash->bs);
764d7d60843SJed Brown     PetscMPIInt  blocklens[2];
765d7d60843SJed Brown     MPI_Aint     displs[2];
766d7d60843SJed Brown     MPI_Datatype types[2],stype;
767f41aa5efSJunchao Zhang     /* Note that DummyBlock is a type having standard layout, even when PetscScalar is C++ std::complex.
768f41aa5efSJunchao Zhang        std::complex itself has standard layout, so does DummyBlock, recursively.
769f41aa5efSJunchao Zhang        To be compatiable with C++ std::complex, complex implementations on GPUs must also have standard layout,
770f41aa5efSJunchao Zhang        though they can have different alignment, e.g, 16 bytes for double complex, instead of 8 bytes as in GCC stdlibc++.
771f41aa5efSJunchao Zhang        offsetof(type, member) only requires type to have standard layout. Ref. https://en.cppreference.com/w/cpp/types/offsetof.
772f41aa5efSJunchao Zhang 
773f41aa5efSJunchao Zhang        We can test if std::complex has standard layout with the following code:
774f41aa5efSJunchao Zhang        #include <iostream>
775f41aa5efSJunchao Zhang        #include <type_traits>
776f41aa5efSJunchao Zhang        #include <complex>
777f41aa5efSJunchao Zhang        int main() {
778f41aa5efSJunchao Zhang          std::cout << std::boolalpha;
779f41aa5efSJunchao Zhang          std::cout << std::is_standard_layout<std::complex<double> >::value << '\n';
780f41aa5efSJunchao Zhang        }
781f41aa5efSJunchao Zhang        Output: true
7829503c6c6SJed Brown      */
783f41aa5efSJunchao Zhang     struct DummyBlock {PetscInt row,col; PetscScalar vals;};
784d7d60843SJed Brown 
7859503c6c6SJed Brown     stash->blocktype_size = offsetof(struct DummyBlock,vals) + bs2*sizeof(PetscScalar);
786d7d60843SJed Brown     if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */
787d7d60843SJed Brown       stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt);
788d7d60843SJed Brown     }
789d7d60843SJed Brown     ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segsendblocks);CHKERRQ(ierr);
790d7d60843SJed Brown     ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segrecvblocks);CHKERRQ(ierr);
791d7d60843SJed Brown     ierr = PetscSegBufferCreate(sizeof(MatStashFrame),1,&stash->segrecvframe);CHKERRQ(ierr);
792d7d60843SJed Brown     blocklens[0] = 2;
793d7d60843SJed Brown     blocklens[1] = bs2;
7949503c6c6SJed Brown     displs[0] = offsetof(struct DummyBlock,row);
7959503c6c6SJed Brown     displs[1] = offsetof(struct DummyBlock,vals);
796d7d60843SJed Brown     types[0] = MPIU_INT;
797d7d60843SJed Brown     types[1] = MPIU_SCALAR;
798ffc4695bSBarry Smith     ierr = MPI_Type_create_struct(2,blocklens,displs,types,&stype);CHKERRMPI(ierr);
799ffc4695bSBarry Smith     ierr = MPI_Type_commit(&stype);CHKERRMPI(ierr);
800ffc4695bSBarry Smith     ierr = MPI_Type_create_resized(stype,0,stash->blocktype_size,&stash->blocktype);CHKERRMPI(ierr);
801ffc4695bSBarry Smith     ierr = MPI_Type_commit(&stash->blocktype);CHKERRMPI(ierr);
802ffc4695bSBarry Smith     ierr = MPI_Type_free(&stype);CHKERRMPI(ierr);
803d7d60843SJed Brown   }
804d7d60843SJed Brown   PetscFunctionReturn(0);
805d7d60843SJed Brown }
806d7d60843SJed Brown 
807d7d60843SJed Brown /* Callback invoked after target rank has initiatied receive of rendezvous message.
808d7d60843SJed Brown  * Here we post the main sends.
809d7d60843SJed Brown  */
810d7d60843SJed Brown static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rankid,PetscMPIInt rank,void *sdata,MPI_Request req[],void *ctx)
811d7d60843SJed Brown {
812d7d60843SJed Brown   MatStash *stash = (MatStash*)ctx;
813d7d60843SJed Brown   MatStashHeader *hdr = (MatStashHeader*)sdata;
814d7d60843SJed Brown   PetscErrorCode ierr;
815d7d60843SJed Brown 
816d7d60843SJed Brown   PetscFunctionBegin;
817d7d60843SJed Brown   if (rank != stash->sendranks[rankid]) SETERRQ3(comm,PETSC_ERR_PLIB,"BTS Send rank %d does not match sendranks[%d] %d",rank,rankid,stash->sendranks[rankid]);
818ffc4695bSBarry Smith   ierr = MPI_Isend(stash->sendframes[rankid].buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRMPI(ierr);
819d7d60843SJed Brown   stash->sendframes[rankid].count = hdr->count;
820d7d60843SJed Brown   stash->sendframes[rankid].pending = 1;
821d7d60843SJed Brown   PetscFunctionReturn(0);
822d7d60843SJed Brown }
823d7d60843SJed Brown 
824d7d60843SJed Brown /* Callback invoked by target after receiving rendezvous message.
825d7d60843SJed Brown  * Here we post the main recvs.
826d7d60843SJed Brown  */
827d7d60843SJed Brown static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rank,void *rdata,MPI_Request req[],void *ctx)
828d7d60843SJed Brown {
829d7d60843SJed Brown   MatStash *stash = (MatStash*)ctx;
830d7d60843SJed Brown   MatStashHeader *hdr = (MatStashHeader*)rdata;
831d7d60843SJed Brown   MatStashFrame *frame;
832d7d60843SJed Brown   PetscErrorCode ierr;
833d7d60843SJed Brown 
834d7d60843SJed Brown   PetscFunctionBegin;
835d7d60843SJed Brown   ierr = PetscSegBufferGet(stash->segrecvframe,1,&frame);CHKERRQ(ierr);
836d7d60843SJed Brown   ierr = PetscSegBufferGet(stash->segrecvblocks,hdr->count,&frame->buffer);CHKERRQ(ierr);
837ffc4695bSBarry Smith   ierr = MPI_Irecv(frame->buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRMPI(ierr);
838d7d60843SJed Brown   frame->count = hdr->count;
839d7d60843SJed Brown   frame->pending = 1;
840d7d60843SJed Brown   PetscFunctionReturn(0);
841d7d60843SJed Brown }
842d7d60843SJed Brown 
843d7d60843SJed Brown /*
844d7d60843SJed Brown  * owners[] contains the ownership ranges; may be indexed by either blocks or scalars
845d7d60843SJed Brown  */
846d7d60843SJed Brown static PetscErrorCode MatStashScatterBegin_BTS(Mat mat,MatStash *stash,PetscInt owners[])
847d7d60843SJed Brown {
848d7d60843SJed Brown   PetscErrorCode ierr;
849d7d60843SJed Brown   size_t nblocks;
850d7d60843SJed Brown   char *sendblocks;
851d7d60843SJed Brown 
852d7d60843SJed Brown   PetscFunctionBegin;
85376bd3646SJed Brown   if (PetscDefined(USE_DEBUG)) { /* make sure all processors are either in INSERTMODE or ADDMODE */
8544b4eb8d3SJed Brown     InsertMode addv;
855*820f2d46SBarry Smith     ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
8564b4eb8d3SJed Brown     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
8574b4eb8d3SJed Brown   }
8584b4eb8d3SJed Brown 
859d7d60843SJed Brown   ierr = MatStashBlockTypeSetUp(stash);CHKERRQ(ierr);
860d7d60843SJed Brown   ierr = MatStashSortCompress_Private(stash,mat->insertmode);CHKERRQ(ierr);
861d7d60843SJed Brown   ierr = PetscSegBufferGetSize(stash->segsendblocks,&nblocks);CHKERRQ(ierr);
862d7d60843SJed Brown   ierr = PetscSegBufferExtractInPlace(stash->segsendblocks,&sendblocks);CHKERRQ(ierr);
86360f34548SJunchao Zhang   if (stash->first_assembly_done) { /* Set up sendhdrs and sendframes for each rank that we sent before */
86423b7d1baSJed Brown     PetscInt i;
86523b7d1baSJed Brown     size_t b;
86697da8949SJed Brown     for (i=0,b=0; i<stash->nsendranks; i++) {
86797da8949SJed Brown       stash->sendframes[i].buffer = &sendblocks[b*stash->blocktype_size];
86897da8949SJed Brown       /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */
86997da8949SJed Brown       stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */
87097da8949SJed Brown       for (; b<nblocks; b++) {
87197da8949SJed Brown         MatStashBlock *sendblock_b = (MatStashBlock*)&sendblocks[b*stash->blocktype_size];
87297da8949SJed Brown         if (PetscUnlikely(sendblock_b->row < owners[stash->sendranks[i]])) SETERRQ2(stash->comm,PETSC_ERR_ARG_WRONG,"MAT_SUBSET_OFF_PROC_ENTRIES set, but row %D owned by %d not communicated in initial assembly",sendblock_b->row,stash->sendranks[i]);
87397da8949SJed Brown         if (sendblock_b->row >= owners[stash->sendranks[i]+1]) break;
87497da8949SJed Brown         stash->sendhdr[i].count++;
87597da8949SJed Brown       }
87697da8949SJed Brown     }
87797da8949SJed Brown   } else {                      /* Dynamically count and pack (first time) */
87823b7d1baSJed Brown     PetscInt sendno;
87923b7d1baSJed Brown     size_t i,rowstart;
880d7d60843SJed Brown 
881d7d60843SJed Brown     /* Count number of send ranks and allocate for sends */
882d7d60843SJed Brown     stash->nsendranks = 0;
883d7d60843SJed Brown     for (rowstart=0; rowstart<nblocks;) {
8847e2ea869SJed Brown       PetscInt owner;
885d7d60843SJed Brown       MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
886d7d60843SJed Brown       ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr);
887d7d60843SJed Brown       if (owner < 0) owner = -(owner+2);
888d7d60843SJed Brown       for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
889d7d60843SJed Brown         MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
8907e2ea869SJed Brown         if (sendblock_i->row >= owners[owner+1]) break;
891d7d60843SJed Brown       }
892d7d60843SJed Brown       stash->nsendranks++;
893d7d60843SJed Brown       rowstart = i;
894d7d60843SJed Brown     }
895d7d60843SJed Brown     ierr = PetscMalloc3(stash->nsendranks,&stash->sendranks,stash->nsendranks,&stash->sendhdr,stash->nsendranks,&stash->sendframes);CHKERRQ(ierr);
896d7d60843SJed Brown 
897d7d60843SJed Brown     /* Set up sendhdrs and sendframes */
898d7d60843SJed Brown     sendno = 0;
899d7d60843SJed Brown     for (rowstart=0; rowstart<nblocks;) {
900d7d60843SJed Brown       PetscInt owner;
901d7d60843SJed Brown       MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
902d7d60843SJed Brown       ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr);
903d7d60843SJed Brown       if (owner < 0) owner = -(owner+2);
904d7d60843SJed Brown       stash->sendranks[sendno] = owner;
905d7d60843SJed Brown       for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
906d7d60843SJed Brown         MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
9077e2ea869SJed Brown         if (sendblock_i->row >= owners[owner+1]) break;
908d7d60843SJed Brown       }
909d7d60843SJed Brown       stash->sendframes[sendno].buffer = sendblock_rowstart;
910d7d60843SJed Brown       stash->sendframes[sendno].pending = 0;
911d7d60843SJed Brown       stash->sendhdr[sendno].count = i - rowstart;
912d7d60843SJed Brown       sendno++;
913d7d60843SJed Brown       rowstart = i;
914d7d60843SJed Brown     }
915d7d60843SJed Brown     if (sendno != stash->nsendranks) SETERRQ2(stash->comm,PETSC_ERR_PLIB,"BTS counted %D sendranks, but %D sends",stash->nsendranks,sendno);
916d7d60843SJed Brown   }
917d7d60843SJed Brown 
9184b4eb8d3SJed Brown   /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new
9194b4eb8d3SJed Brown    * message or a dummy entry of some sort. */
9204b4eb8d3SJed Brown   if (mat->insertmode == INSERT_VALUES) {
92123b7d1baSJed Brown     size_t i;
9224b4eb8d3SJed Brown     for (i=0; i<nblocks; i++) {
9234b4eb8d3SJed Brown       MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
9244b4eb8d3SJed Brown       sendblock_i->row = -(sendblock_i->row+1);
9254b4eb8d3SJed Brown     }
9264b4eb8d3SJed Brown   }
9274b4eb8d3SJed Brown 
92860f34548SJunchao Zhang   if (stash->first_assembly_done) {
92997da8949SJed Brown     PetscMPIInt i,tag;
93097da8949SJed Brown     ierr = PetscCommGetNewTag(stash->comm,&tag);CHKERRQ(ierr);
93197da8949SJed Brown     for (i=0; i<stash->nrecvranks; i++) {
93297da8949SJed Brown       ierr = MatStashBTSRecv_Private(stash->comm,&tag,stash->recvranks[i],&stash->recvhdr[i],&stash->recvreqs[i],stash);CHKERRQ(ierr);
93397da8949SJed Brown     }
93497da8949SJed Brown     for (i=0; i<stash->nsendranks; i++) {
93597da8949SJed Brown       ierr = MatStashBTSSend_Private(stash->comm,&tag,i,stash->sendranks[i],&stash->sendhdr[i],&stash->sendreqs[i],stash);CHKERRQ(ierr);
93697da8949SJed Brown     }
93797da8949SJed Brown     stash->use_status = PETSC_TRUE; /* Use count from message status. */
93897da8949SJed Brown   } else {
939e0ddb6e8SJed Brown     ierr = PetscCommBuildTwoSidedFReq(stash->comm,1,MPIU_INT,stash->nsendranks,stash->sendranks,(PetscInt*)stash->sendhdr,
940e0ddb6e8SJed Brown                                       &stash->nrecvranks,&stash->recvranks,(PetscInt*)&stash->recvhdr,1,&stash->sendreqs,&stash->recvreqs,
941d7d60843SJed Brown                                       MatStashBTSSend_Private,MatStashBTSRecv_Private,stash);CHKERRQ(ierr);
942b5ddc6f1SJed Brown     ierr = PetscMalloc2(stash->nrecvranks,&stash->some_indices,stash->nrecvranks,&stash->some_statuses);CHKERRQ(ierr);
94397da8949SJed Brown     stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */
94497da8949SJed Brown   }
945d7d60843SJed Brown 
946d7d60843SJed Brown   ierr = PetscSegBufferExtractInPlace(stash->segrecvframe,&stash->recvframes);CHKERRQ(ierr);
947d7d60843SJed Brown   stash->recvframe_active     = NULL;
948d7d60843SJed Brown   stash->recvframe_i          = 0;
949d7d60843SJed Brown   stash->some_i               = 0;
950d7d60843SJed Brown   stash->some_count           = 0;
951d7d60843SJed Brown   stash->recvcount            = 0;
95260f34548SJunchao Zhang   stash->first_assembly_done  = mat->assembly_subset; /* See the same logic in VecAssemblyBegin_MPI_BTS */
9534b4eb8d3SJed Brown   stash->insertmode           = &mat->insertmode;
954d7d60843SJed Brown   PetscFunctionReturn(0);
955d7d60843SJed Brown }
956d7d60843SJed Brown 
957d7d60843SJed Brown static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash,PetscMPIInt *n,PetscInt **row,PetscInt **col,PetscScalar **val,PetscInt *flg)
958d7d60843SJed Brown {
959d7d60843SJed Brown   PetscErrorCode ierr;
960d7d60843SJed Brown   MatStashBlock *block;
961d7d60843SJed Brown 
962d7d60843SJed Brown   PetscFunctionBegin;
963d7d60843SJed Brown   *flg = 0;
964d7d60843SJed Brown   while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) {
965d7d60843SJed Brown     if (stash->some_i == stash->some_count) {
966d7d60843SJed Brown       if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(0); /* Done */
967ffc4695bSBarry Smith       ierr = MPI_Waitsome(stash->nrecvranks,stash->recvreqs,&stash->some_count,stash->some_indices,stash->use_status?stash->some_statuses:MPI_STATUSES_IGNORE);CHKERRMPI(ierr);
968d7d60843SJed Brown       stash->some_i = 0;
969d7d60843SJed Brown     }
970d7d60843SJed Brown     stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]];
971d7d60843SJed Brown     stash->recvframe_count = stash->recvframe_active->count; /* From header; maximum count */
972d7d60843SJed Brown     if (stash->use_status) { /* Count what was actually sent */
973ffc4695bSBarry Smith       ierr = MPI_Get_count(&stash->some_statuses[stash->some_i],stash->blocktype,&stash->recvframe_count);CHKERRMPI(ierr);
974d7d60843SJed Brown     }
9754b4eb8d3SJed Brown     if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */
9764b4eb8d3SJed Brown       block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[0];
9774b4eb8d3SJed Brown       if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES;
9784b4eb8d3SJed Brown       if (PetscUnlikely(*stash->insertmode == INSERT_VALUES && block->row >= 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling INSERT_VALUES, but rank %d requested ADD_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]);
9794b4eb8d3SJed Brown       if (PetscUnlikely(*stash->insertmode == ADD_VALUES && block->row < 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling ADD_VALUES, but rank %d requested INSERT_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]);
9804b4eb8d3SJed Brown     }
981d7d60843SJed Brown     stash->some_i++;
982d7d60843SJed Brown     stash->recvcount++;
983d7d60843SJed Brown     stash->recvframe_i = 0;
984d7d60843SJed Brown   }
985d7d60843SJed Brown   *n = 1;
986d7d60843SJed Brown   block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[stash->recvframe_i*stash->blocktype_size];
9874b4eb8d3SJed Brown   if (block->row < 0) block->row = -(block->row + 1);
988d7d60843SJed Brown   *row = &block->row;
989d7d60843SJed Brown   *col = &block->col;
990d7d60843SJed Brown   *val = block->vals;
991d7d60843SJed Brown   stash->recvframe_i++;
992d7d60843SJed Brown   *flg = 1;
993d7d60843SJed Brown   PetscFunctionReturn(0);
994d7d60843SJed Brown }
995d7d60843SJed Brown 
996d7d60843SJed Brown static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash)
997d7d60843SJed Brown {
998d7d60843SJed Brown   PetscErrorCode ierr;
999d7d60843SJed Brown 
1000d7d60843SJed Brown   PetscFunctionBegin;
1001ffc4695bSBarry Smith   ierr = MPI_Waitall(stash->nsendranks,stash->sendreqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);
100260f34548SJunchao Zhang   if (stash->first_assembly_done) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks  */
10033575f486SJed Brown     void *dummy;
10043575f486SJed Brown     ierr = PetscSegBufferExtractInPlace(stash->segrecvblocks,&dummy);CHKERRQ(ierr);
10053575f486SJed Brown   } else {                      /* No reuse, so collect everything. */
1006d7d60843SJed Brown     ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr);
100797da8949SJed Brown   }
1008d7d60843SJed Brown 
1009d7d60843SJed Brown   /* Now update nmaxold to be app 10% more than max n used, this way the
1010d7d60843SJed Brown      wastage of space is reduced the next time this stash is used.
1011d7d60843SJed Brown      Also update the oldmax, only if it increases */
1012d7d60843SJed Brown   if (stash->n) {
1013d7d60843SJed Brown     PetscInt bs2     = stash->bs*stash->bs;
1014d7d60843SJed Brown     PetscInt oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
1015d7d60843SJed Brown     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
1016d7d60843SJed Brown   }
1017d7d60843SJed Brown 
1018d7d60843SJed Brown   stash->nmax       = 0;
1019d7d60843SJed Brown   stash->n          = 0;
1020d7d60843SJed Brown   stash->reallocs   = -1;
1021d7d60843SJed Brown   stash->nprocessed = 0;
1022d7d60843SJed Brown 
1023d7d60843SJed Brown   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
1024d7d60843SJed Brown 
1025f4259b30SLisandro Dalcin   stash->space = NULL;
1026d7d60843SJed Brown 
1027d7d60843SJed Brown   PetscFunctionReturn(0);
1028d7d60843SJed Brown }
1029d7d60843SJed Brown 
103060f34548SJunchao Zhang PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash)
1031d7d60843SJed Brown {
1032d7d60843SJed Brown   PetscErrorCode ierr;
1033d7d60843SJed Brown 
1034d7d60843SJed Brown   PetscFunctionBegin;
1035d7d60843SJed Brown   ierr = PetscSegBufferDestroy(&stash->segsendblocks);CHKERRQ(ierr);
1036d7d60843SJed Brown   ierr = PetscSegBufferDestroy(&stash->segrecvframe);CHKERRQ(ierr);
1037d7d60843SJed Brown   stash->recvframes = NULL;
1038d7d60843SJed Brown   ierr = PetscSegBufferDestroy(&stash->segrecvblocks);CHKERRQ(ierr);
1039d7d60843SJed Brown   if (stash->blocktype != MPI_DATATYPE_NULL) {
1040ffc4695bSBarry Smith     ierr = MPI_Type_free(&stash->blocktype);CHKERRMPI(ierr);
1041d7d60843SJed Brown   }
1042d7d60843SJed Brown   stash->nsendranks = 0;
1043d7d60843SJed Brown   stash->nrecvranks = 0;
1044d7d60843SJed Brown   ierr = PetscFree3(stash->sendranks,stash->sendhdr,stash->sendframes);CHKERRQ(ierr);
1045d7d60843SJed Brown   ierr = PetscFree(stash->sendreqs);CHKERRQ(ierr);
1046d7d60843SJed Brown   ierr = PetscFree(stash->recvreqs);CHKERRQ(ierr);
1047d7d60843SJed Brown   ierr = PetscFree(stash->recvranks);CHKERRQ(ierr);
1048d7d60843SJed Brown   ierr = PetscFree(stash->recvhdr);CHKERRQ(ierr);
1049d7d60843SJed Brown   ierr = PetscFree2(stash->some_indices,stash->some_statuses);CHKERRQ(ierr);
1050d7d60843SJed Brown   PetscFunctionReturn(0);
1051d7d60843SJed Brown }
10521667be42SBarry Smith #endif
1053