xref: /petsc/src/mat/utils/matstash.c (revision 071fcb05f2a6aea8aef2c2090580530b9e9df77e)
12d5177cdSBarry Smith 
2af0996ceSBarry Smith #include <petsc/private/matimpl.h>
35bd3b8fbSHong Zhang 
4bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE   10000
54c1ff481SSatish Balay 
6ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterBegin_Ref(Mat,MatStash*,PetscInt*);
7ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
8ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
975d39b6aSBarry Smith #if !defined(PETSC_HAVE_MPIUNI)
10d7d60843SJed Brown static PetscErrorCode MatStashScatterBegin_BTS(Mat,MatStash*,PetscInt*);
11d7d60843SJed Brown static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
12d7d60843SJed Brown static PetscErrorCode MatStashScatterEnd_BTS(MatStash*);
1375d39b6aSBarry Smith #endif
14d7d60843SJed Brown 
159417f4adSLois Curfman McInnes /*
168798bf22SSatish Balay   MatStashCreate_Private - Creates a stash,currently used for all the parallel
174c1ff481SSatish Balay   matrix implementations. The stash is where elements of a matrix destined
184c1ff481SSatish Balay   to be stored on other processors are kept until matrix assembly is done.
199417f4adSLois Curfman McInnes 
204c1ff481SSatish Balay   This is a simple minded stash. Simply adds entries to end of stash.
214c1ff481SSatish Balay 
224c1ff481SSatish Balay   Input Parameters:
234c1ff481SSatish Balay   comm - communicator, required for scatters.
244c1ff481SSatish Balay   bs   - stash block size. used when stashing blocks of values
254c1ff481SSatish Balay 
264c1ff481SSatish Balay   Output Parameters:
274c1ff481SSatish Balay   stash    - the newly created stash
289417f4adSLois Curfman McInnes */
29c1ac3661SBarry Smith PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
309417f4adSLois Curfman McInnes {
31dfbe8321SBarry Smith   PetscErrorCode ierr;
32533163c2SBarry Smith   PetscInt       max,*opt,nopt,i;
33ace3abfcSBarry Smith   PetscBool      flg;
34bc5ccf88SSatish Balay 
353a40ed3dSBarry Smith   PetscFunctionBegin;
36bc5ccf88SSatish Balay   /* Require 2 tags,get the second using PetscCommGetNewTag() */
37752ec6e0SSatish Balay   stash->comm = comm;
388865f1eaSKarl Rupp 
39752ec6e0SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
40a2d1c673SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
41a2d1c673SSatish Balay   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
42a2d1c673SSatish Balay   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
43785e854fSJed Brown   ierr = PetscMalloc1(2*stash->size,&stash->flg_v);CHKERRQ(ierr);
44533163c2SBarry Smith   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
45533163c2SBarry Smith 
46bc5ccf88SSatish Balay 
47434d7ff9SSatish Balay   nopt = stash->size;
48785e854fSJed Brown   ierr = PetscMalloc1(nopt,&opt);CHKERRQ(ierr);
49c5929fdfSBarry Smith   ierr = PetscOptionsGetIntArray(NULL,NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
50434d7ff9SSatish Balay   if (flg) {
51434d7ff9SSatish Balay     if (nopt == 1)                max = opt[0];
52434d7ff9SSatish Balay     else if (nopt == stash->size) max = opt[stash->rank];
53434d7ff9SSatish Balay     else if (stash->rank < nopt)  max = opt[stash->rank];
54f4ab19daSSatish Balay     else                          max = 0; /* Use default */
55434d7ff9SSatish Balay     stash->umax = max;
56434d7ff9SSatish Balay   } else {
57434d7ff9SSatish Balay     stash->umax = 0;
58434d7ff9SSatish Balay   }
59606d414cSSatish Balay   ierr = PetscFree(opt);CHKERRQ(ierr);
604c1ff481SSatish Balay   if (bs <= 0) bs = 1;
61a2d1c673SSatish Balay 
624c1ff481SSatish Balay   stash->bs         = bs;
639417f4adSLois Curfman McInnes   stash->nmax       = 0;
64434d7ff9SSatish Balay   stash->oldnmax    = 0;
659417f4adSLois Curfman McInnes   stash->n          = 0;
664c1ff481SSatish Balay   stash->reallocs   = -1;
6775cae7c1SHong Zhang   stash->space_head = 0;
6875cae7c1SHong Zhang   stash->space      = 0;
699417f4adSLois Curfman McInnes 
70bc5ccf88SSatish Balay   stash->send_waits  = 0;
71bc5ccf88SSatish Balay   stash->recv_waits  = 0;
72a2d1c673SSatish Balay   stash->send_status = 0;
73bc5ccf88SSatish Balay   stash->nsends      = 0;
74bc5ccf88SSatish Balay   stash->nrecvs      = 0;
75bc5ccf88SSatish Balay   stash->svalues     = 0;
76bc5ccf88SSatish Balay   stash->rvalues     = 0;
77563fb871SSatish Balay   stash->rindices    = 0;
78a2d1c673SSatish Balay   stash->nprocessed  = 0;
7967318a8aSJed Brown   stash->reproduce   = PETSC_FALSE;
80d7d60843SJed Brown   stash->blocktype   = MPI_DATATYPE_NULL;
818865f1eaSKarl Rupp 
82c5929fdfSBarry Smith   ierr = PetscOptionsGetBool(NULL,NULL,"-matstash_reproduce",&stash->reproduce,NULL);CHKERRQ(ierr);
831667be42SBarry Smith #if !defined(PETSC_HAVE_MPIUNI)
84b30fb036SBarry Smith   ierr = PetscOptionsGetBool(NULL,NULL,"-matstash_legacy",&flg,NULL);CHKERRQ(ierr);
85b30fb036SBarry Smith   if (!flg) {
86d7d60843SJed Brown     stash->ScatterBegin   = MatStashScatterBegin_BTS;
87d7d60843SJed Brown     stash->ScatterGetMesg = MatStashScatterGetMesg_BTS;
88d7d60843SJed Brown     stash->ScatterEnd     = MatStashScatterEnd_BTS;
89d7d60843SJed Brown     stash->ScatterDestroy = MatStashScatterDestroy_BTS;
90ac2b2aa0SJed Brown   } else {
911667be42SBarry Smith #endif
92ac2b2aa0SJed Brown     stash->ScatterBegin   = MatStashScatterBegin_Ref;
93ac2b2aa0SJed Brown     stash->ScatterGetMesg = MatStashScatterGetMesg_Ref;
94ac2b2aa0SJed Brown     stash->ScatterEnd     = MatStashScatterEnd_Ref;
95ac2b2aa0SJed Brown     stash->ScatterDestroy = NULL;
961667be42SBarry Smith #if !defined(PETSC_HAVE_MPIUNI)
97ac2b2aa0SJed Brown   }
981667be42SBarry Smith #endif
993a40ed3dSBarry Smith   PetscFunctionReturn(0);
1009417f4adSLois Curfman McInnes }
1019417f4adSLois Curfman McInnes 
1024c1ff481SSatish Balay /*
1038798bf22SSatish Balay    MatStashDestroy_Private - Destroy the stash
1044c1ff481SSatish Balay */
105dfbe8321SBarry Smith PetscErrorCode MatStashDestroy_Private(MatStash *stash)
1069417f4adSLois Curfman McInnes {
107dfbe8321SBarry Smith   PetscErrorCode ierr;
108a2d1c673SSatish Balay 
109bc5ccf88SSatish Balay   PetscFunctionBegin;
1106bf464f9SBarry Smith   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
111ac2b2aa0SJed Brown   if (stash->ScatterDestroy) {ierr = (*stash->ScatterDestroy)(stash);CHKERRQ(ierr);}
1128865f1eaSKarl Rupp 
11382740460SHong Zhang   stash->space = 0;
1148865f1eaSKarl Rupp 
115533163c2SBarry Smith   ierr = PetscFree(stash->flg_v);CHKERRQ(ierr);
116bc5ccf88SSatish Balay   PetscFunctionReturn(0);
117bc5ccf88SSatish Balay }
118bc5ccf88SSatish Balay 
1194c1ff481SSatish Balay /*
12067318a8aSJed Brown    MatStashScatterEnd_Private - This is called as the final stage of
1214c1ff481SSatish Balay    scatter. The final stages of message passing is done here, and
12267318a8aSJed Brown    all the memory used for message passing is cleaned up. This
1234c1ff481SSatish Balay    routine also resets the stash, and deallocates the memory used
1244c1ff481SSatish Balay    for the stash. It also keeps track of the current memory usage
1254c1ff481SSatish Balay    so that the same value can be used the next time through.
1264c1ff481SSatish Balay */
127dfbe8321SBarry Smith PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
128bc5ccf88SSatish Balay {
1296849ba73SBarry Smith   PetscErrorCode ierr;
130ac2b2aa0SJed Brown 
131ac2b2aa0SJed Brown   PetscFunctionBegin;
132ac2b2aa0SJed Brown   ierr = (*stash->ScatterEnd)(stash);CHKERRQ(ierr);
133ac2b2aa0SJed Brown   PetscFunctionReturn(0);
134ac2b2aa0SJed Brown }
135ac2b2aa0SJed Brown 
136ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash)
137ac2b2aa0SJed Brown {
138ac2b2aa0SJed Brown   PetscErrorCode ierr;
139533163c2SBarry Smith   PetscInt       nsends=stash->nsends,bs2,oldnmax,i;
140a2d1c673SSatish Balay   MPI_Status     *send_status;
141a2d1c673SSatish Balay 
1423a40ed3dSBarry Smith   PetscFunctionBegin;
143533163c2SBarry Smith   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
144a2d1c673SSatish Balay   /* wait on sends */
145a2d1c673SSatish Balay   if (nsends) {
146785e854fSJed Brown     ierr = PetscMalloc1(2*nsends,&send_status);CHKERRQ(ierr);
147a2d1c673SSatish Balay     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
148606d414cSSatish Balay     ierr = PetscFree(send_status);CHKERRQ(ierr);
149a2d1c673SSatish Balay   }
150a2d1c673SSatish Balay 
151c0c58ca7SSatish Balay   /* Now update nmaxold to be app 10% more than max n used, this way the
152434d7ff9SSatish Balay      wastage of space is reduced the next time this stash is used.
153434d7ff9SSatish Balay      Also update the oldmax, only if it increases */
154b9b97703SBarry Smith   if (stash->n) {
15594b769a5SSatish Balay     bs2     = stash->bs*stash->bs;
1568a9378f0SSatish Balay     oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
157434d7ff9SSatish Balay     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
158b9b97703SBarry Smith   }
159434d7ff9SSatish Balay 
160d07ff455SSatish Balay   stash->nmax       = 0;
161d07ff455SSatish Balay   stash->n          = 0;
1624c1ff481SSatish Balay   stash->reallocs   = -1;
163a2d1c673SSatish Balay   stash->nprocessed = 0;
1648865f1eaSKarl Rupp 
1656bf464f9SBarry Smith   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
1668865f1eaSKarl Rupp 
16782740460SHong Zhang   stash->space = 0;
1688865f1eaSKarl Rupp 
169606d414cSSatish Balay   ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
170606d414cSSatish Balay   ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
171c05d87d6SBarry Smith   ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr);
172c05d87d6SBarry Smith   ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr);
173606d414cSSatish Balay   ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
174c05d87d6SBarry Smith   ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr);
175563fb871SSatish Balay   ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
1763a40ed3dSBarry Smith   PetscFunctionReturn(0);
1779417f4adSLois Curfman McInnes }
1789417f4adSLois Curfman McInnes 
1794c1ff481SSatish Balay /*
1808798bf22SSatish Balay    MatStashGetInfo_Private - Gets the relavant statistics of the stash
1814c1ff481SSatish Balay 
1824c1ff481SSatish Balay    Input Parameters:
1834c1ff481SSatish Balay    stash    - the stash
18494b769a5SSatish Balay    nstash   - the size of the stash. Indicates the number of values stored.
1854c1ff481SSatish Balay    reallocs - the number of additional mallocs incurred.
1864c1ff481SSatish Balay 
1874c1ff481SSatish Balay */
188c1ac3661SBarry Smith PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
18997530c3fSBarry Smith {
190c1ac3661SBarry Smith   PetscInt bs2 = stash->bs*stash->bs;
19194b769a5SSatish Balay 
1923a40ed3dSBarry Smith   PetscFunctionBegin;
1931ecfd215SBarry Smith   if (nstash) *nstash = stash->n*bs2;
1941ecfd215SBarry Smith   if (reallocs) {
195434d7ff9SSatish Balay     if (stash->reallocs < 0) *reallocs = 0;
196434d7ff9SSatish Balay     else                     *reallocs = stash->reallocs;
1971ecfd215SBarry Smith   }
198bc5ccf88SSatish Balay   PetscFunctionReturn(0);
199bc5ccf88SSatish Balay }
2004c1ff481SSatish Balay 
2014c1ff481SSatish Balay /*
2028798bf22SSatish Balay    MatStashSetInitialSize_Private - Sets the initial size of the stash
2034c1ff481SSatish Balay 
2044c1ff481SSatish Balay    Input Parameters:
2054c1ff481SSatish Balay    stash  - the stash
2064c1ff481SSatish Balay    max    - the value that is used as the max size of the stash.
2074c1ff481SSatish Balay             this value is used while allocating memory.
2084c1ff481SSatish Balay */
209c1ac3661SBarry Smith PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
210bc5ccf88SSatish Balay {
211bc5ccf88SSatish Balay   PetscFunctionBegin;
212434d7ff9SSatish Balay   stash->umax = max;
2133a40ed3dSBarry Smith   PetscFunctionReturn(0);
21497530c3fSBarry Smith }
21597530c3fSBarry Smith 
2168798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called
2174c1ff481SSatish Balay    when the space in the stash is not sufficient to add the new values
2184c1ff481SSatish Balay    being inserted into the stash.
2194c1ff481SSatish Balay 
2204c1ff481SSatish Balay    Input Parameters:
2214c1ff481SSatish Balay    stash - the stash
2224c1ff481SSatish Balay    incr  - the minimum increase requested
2234c1ff481SSatish Balay 
2244c1ff481SSatish Balay    Notes:
2254c1ff481SSatish Balay    This routine doubles the currently used memory.
2264c1ff481SSatish Balay  */
227c1ac3661SBarry Smith static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
2289417f4adSLois Curfman McInnes {
2296849ba73SBarry Smith   PetscErrorCode ierr;
2305bd3b8fbSHong Zhang   PetscInt       newnmax,bs2= stash->bs*stash->bs;
2319417f4adSLois Curfman McInnes 
2323a40ed3dSBarry Smith   PetscFunctionBegin;
2339417f4adSLois Curfman McInnes   /* allocate a larger stash */
234c481ceb5SSatish Balay   if (!stash->oldnmax && !stash->nmax) { /* new stash */
235434d7ff9SSatish Balay     if (stash->umax)                  newnmax = stash->umax/bs2;
236434d7ff9SSatish Balay     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
237c481ceb5SSatish Balay   } else if (!stash->nmax) { /* resuing stash */
238434d7ff9SSatish Balay     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
239434d7ff9SSatish Balay     else                              newnmax = stash->oldnmax/bs2;
240434d7ff9SSatish Balay   } else                              newnmax = stash->nmax*2;
2414c1ff481SSatish Balay   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
242d07ff455SSatish Balay 
24375cae7c1SHong Zhang   /* Get a MatStashSpace and attach it to stash */
24475cae7c1SHong Zhang   ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr);
245b087b6d6SSatish Balay   if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
246b087b6d6SSatish Balay     stash->space_head = stash->space;
24775cae7c1SHong Zhang   }
248b087b6d6SSatish Balay 
249bc5ccf88SSatish Balay   stash->reallocs++;
25075cae7c1SHong Zhang   stash->nmax = newnmax;
251bc5ccf88SSatish Balay   PetscFunctionReturn(0);
252bc5ccf88SSatish Balay }
253bc5ccf88SSatish Balay /*
2548798bf22SSatish Balay   MatStashValuesRow_Private - inserts values into the stash. This function
2554c1ff481SSatish Balay   expects the values to be roworiented. Multiple columns belong to the same row
2564c1ff481SSatish Balay   can be inserted with a single call to this function.
2574c1ff481SSatish Balay 
2584c1ff481SSatish Balay   Input Parameters:
2594c1ff481SSatish Balay   stash  - the stash
2604c1ff481SSatish Balay   row    - the global row correspoiding to the values
2614c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
2624c1ff481SSatish Balay   idxn   - the global column indices corresponding to each of the values.
2634c1ff481SSatish Balay   values - the values inserted
264bc5ccf88SSatish Balay */
265ace3abfcSBarry Smith PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries)
266bc5ccf88SSatish Balay {
267dfbe8321SBarry Smith   PetscErrorCode     ierr;
268b400d20cSBarry Smith   PetscInt           i,k,cnt = 0;
26975cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
270bc5ccf88SSatish Balay 
271bc5ccf88SSatish Balay   PetscFunctionBegin;
2724c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
27375cae7c1SHong Zhang   if (!space || space->local_remaining < n) {
2748798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
2759417f4adSLois Curfman McInnes   }
27675cae7c1SHong Zhang   space = stash->space;
27775cae7c1SHong Zhang   k     = space->local_used;
2784c1ff481SSatish Balay   for (i=0; i<n; i++) {
27988c3974fSBarry Smith     if (ignorezeroentries && (values[i] == 0.0)) continue;
28075cae7c1SHong Zhang     space->idx[k] = row;
28175cae7c1SHong Zhang     space->idy[k] = idxn[i];
28275cae7c1SHong Zhang     space->val[k] = values[i];
28375cae7c1SHong Zhang     k++;
284b400d20cSBarry Smith     cnt++;
2859417f4adSLois Curfman McInnes   }
286b400d20cSBarry Smith   stash->n               += cnt;
287b400d20cSBarry Smith   space->local_used      += cnt;
288b400d20cSBarry Smith   space->local_remaining -= cnt;
289a2d1c673SSatish Balay   PetscFunctionReturn(0);
290a2d1c673SSatish Balay }
29175cae7c1SHong Zhang 
2924c1ff481SSatish Balay /*
2938798bf22SSatish Balay   MatStashValuesCol_Private - inserts values into the stash. This function
2944c1ff481SSatish Balay   expects the values to be columnoriented. Multiple columns belong to the same row
2954c1ff481SSatish Balay   can be inserted with a single call to this function.
296a2d1c673SSatish Balay 
2974c1ff481SSatish Balay   Input Parameters:
2984c1ff481SSatish Balay   stash   - the stash
2994c1ff481SSatish Balay   row     - the global row correspoiding to the values
3004c1ff481SSatish Balay   n       - the number of elements inserted. All elements belong to the above row.
3014c1ff481SSatish Balay   idxn    - the global column indices corresponding to each of the values.
3024c1ff481SSatish Balay   values  - the values inserted
3034c1ff481SSatish Balay   stepval - the consecutive values are sepated by a distance of stepval.
3044c1ff481SSatish Balay             this happens because the input is columnoriented.
3054c1ff481SSatish Balay */
306ace3abfcSBarry Smith PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries)
307a2d1c673SSatish Balay {
308dfbe8321SBarry Smith   PetscErrorCode     ierr;
30950e9ab7cSBarry Smith   PetscInt           i,k,cnt = 0;
31075cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
311a2d1c673SSatish Balay 
3124c1ff481SSatish Balay   PetscFunctionBegin;
3134c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
31475cae7c1SHong Zhang   if (!space || space->local_remaining < n) {
3158798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
3164c1ff481SSatish Balay   }
31775cae7c1SHong Zhang   space = stash->space;
31875cae7c1SHong Zhang   k     = space->local_used;
3194c1ff481SSatish Balay   for (i=0; i<n; i++) {
32088c3974fSBarry Smith     if (ignorezeroentries && (values[i*stepval] == 0.0)) continue;
32175cae7c1SHong Zhang     space->idx[k] = row;
32275cae7c1SHong Zhang     space->idy[k] = idxn[i];
32375cae7c1SHong Zhang     space->val[k] = values[i*stepval];
32475cae7c1SHong Zhang     k++;
325b400d20cSBarry Smith     cnt++;
3264c1ff481SSatish Balay   }
327b400d20cSBarry Smith   stash->n               += cnt;
328b400d20cSBarry Smith   space->local_used      += cnt;
329b400d20cSBarry Smith   space->local_remaining -= cnt;
3304c1ff481SSatish Balay   PetscFunctionReturn(0);
3314c1ff481SSatish Balay }
3324c1ff481SSatish Balay 
3334c1ff481SSatish Balay /*
3348798bf22SSatish Balay   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
3354c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3364c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3374c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3384c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3394c1ff481SSatish Balay 
3404c1ff481SSatish Balay   Input Parameters:
3414c1ff481SSatish Balay   stash  - the stash
3424c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3434c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3444c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3454c1ff481SSatish Balay            values. Each block is of size bs*bs.
3464c1ff481SSatish Balay   values - the values inserted
3474c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3484c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3494c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3504c1ff481SSatish Balay */
35154f21887SBarry Smith PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
3524c1ff481SSatish Balay {
353dfbe8321SBarry Smith   PetscErrorCode     ierr;
35475cae7c1SHong Zhang   PetscInt           i,j,k,bs2,bs=stash->bs,l;
35554f21887SBarry Smith   const PetscScalar  *vals;
35654f21887SBarry Smith   PetscScalar        *array;
35775cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
358a2d1c673SSatish Balay 
359a2d1c673SSatish Balay   PetscFunctionBegin;
36075cae7c1SHong Zhang   if (!space || space->local_remaining < n) {
3618798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
362a2d1c673SSatish Balay   }
36375cae7c1SHong Zhang   space = stash->space;
36475cae7c1SHong Zhang   l     = space->local_used;
36575cae7c1SHong Zhang   bs2   = bs*bs;
3664c1ff481SSatish Balay   for (i=0; i<n; i++) {
36775cae7c1SHong Zhang     space->idx[l] = row;
36875cae7c1SHong Zhang     space->idy[l] = idxn[i];
36975cae7c1SHong Zhang     /* Now copy over the block of values. Store the values column oriented.
37075cae7c1SHong Zhang        This enables inserting multiple blocks belonging to a row with a single
37175cae7c1SHong Zhang        funtion call */
37275cae7c1SHong Zhang     array = space->val + bs2*l;
37375cae7c1SHong Zhang     vals  = values + idx*bs2*n + bs*i;
37475cae7c1SHong Zhang     for (j=0; j<bs; j++) {
37575cae7c1SHong Zhang       for (k=0; k<bs; k++) array[k*bs] = vals[k];
37675cae7c1SHong Zhang       array++;
37775cae7c1SHong Zhang       vals += cmax*bs;
37875cae7c1SHong Zhang     }
37975cae7c1SHong Zhang     l++;
380a2d1c673SSatish Balay   }
3815bd3b8fbSHong Zhang   stash->n               += n;
38275cae7c1SHong Zhang   space->local_used      += n;
38375cae7c1SHong Zhang   space->local_remaining -= n;
3844c1ff481SSatish Balay   PetscFunctionReturn(0);
3854c1ff481SSatish Balay }
3864c1ff481SSatish Balay 
3874c1ff481SSatish Balay /*
3888798bf22SSatish Balay   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
3894c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3904c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3914c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3924c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3934c1ff481SSatish Balay 
3944c1ff481SSatish Balay   Input Parameters:
3954c1ff481SSatish Balay   stash  - the stash
3964c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3974c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3984c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3994c1ff481SSatish Balay            values. Each block is of size bs*bs.
4004c1ff481SSatish Balay   values - the values inserted
4014c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
4024c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
4034c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
4044c1ff481SSatish Balay */
40554f21887SBarry Smith PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
4064c1ff481SSatish Balay {
407dfbe8321SBarry Smith   PetscErrorCode     ierr;
40875cae7c1SHong Zhang   PetscInt           i,j,k,bs2,bs=stash->bs,l;
40954f21887SBarry Smith   const PetscScalar  *vals;
41054f21887SBarry Smith   PetscScalar        *array;
41175cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
4124c1ff481SSatish Balay 
4134c1ff481SSatish Balay   PetscFunctionBegin;
41475cae7c1SHong Zhang   if (!space || space->local_remaining < n) {
4158798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
4164c1ff481SSatish Balay   }
41775cae7c1SHong Zhang   space = stash->space;
41875cae7c1SHong Zhang   l     = space->local_used;
41975cae7c1SHong Zhang   bs2   = bs*bs;
4204c1ff481SSatish Balay   for (i=0; i<n; i++) {
42175cae7c1SHong Zhang     space->idx[l] = row;
42275cae7c1SHong Zhang     space->idy[l] = idxn[i];
42375cae7c1SHong Zhang     /* Now copy over the block of values. Store the values column oriented.
42475cae7c1SHong Zhang      This enables inserting multiple blocks belonging to a row with a single
42575cae7c1SHong Zhang      funtion call */
42675cae7c1SHong Zhang     array = space->val + bs2*l;
42775cae7c1SHong Zhang     vals  = values + idx*bs2*n + bs*i;
42875cae7c1SHong Zhang     for (j=0; j<bs; j++) {
4298865f1eaSKarl Rupp       for (k=0; k<bs; k++) array[k] = vals[k];
43075cae7c1SHong Zhang       array += bs;
43175cae7c1SHong Zhang       vals  += rmax*bs;
43275cae7c1SHong Zhang     }
4335bd3b8fbSHong Zhang     l++;
434a2d1c673SSatish Balay   }
4355bd3b8fbSHong Zhang   stash->n               += n;
43675cae7c1SHong Zhang   space->local_used      += n;
43775cae7c1SHong Zhang   space->local_remaining -= n;
4383a40ed3dSBarry Smith   PetscFunctionReturn(0);
4399417f4adSLois Curfman McInnes }
4404c1ff481SSatish Balay /*
4418798bf22SSatish Balay   MatStashScatterBegin_Private - Initiates the transfer of values to the
4424c1ff481SSatish Balay   correct owners. This function goes through the stash, and check the
4434c1ff481SSatish Balay   owners of each stashed value, and sends the values off to the owner
4444c1ff481SSatish Balay   processors.
445bc5ccf88SSatish Balay 
4464c1ff481SSatish Balay   Input Parameters:
4474c1ff481SSatish Balay   stash  - the stash
4484c1ff481SSatish Balay   owners - an array of size 'no-of-procs' which gives the ownership range
4494c1ff481SSatish Balay            for each node.
4504c1ff481SSatish Balay 
45195452b02SPatrick Sanan   Notes:
45295452b02SPatrick Sanan     The 'owners' array in the cased of the blocked-stash has the
4534c1ff481SSatish Balay   ranges specified blocked global indices, and for the regular stash in
4544c1ff481SSatish Balay   the proper global indices.
4554c1ff481SSatish Balay */
4561e2582c4SBarry Smith PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners)
457bc5ccf88SSatish Balay {
458ac2b2aa0SJed Brown   PetscErrorCode ierr;
459ac2b2aa0SJed Brown 
460ac2b2aa0SJed Brown   PetscFunctionBegin;
461ac2b2aa0SJed Brown   ierr = (*stash->ScatterBegin)(mat,stash,owners);CHKERRQ(ierr);
462ac2b2aa0SJed Brown   PetscFunctionReturn(0);
463ac2b2aa0SJed Brown }
464ac2b2aa0SJed Brown 
465ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterBegin_Ref(Mat mat,MatStash *stash,PetscInt *owners)
466ac2b2aa0SJed Brown {
467c1ac3661SBarry Smith   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
468fe09c992SBarry Smith   PetscInt           size=stash->size,nsends;
4696849ba73SBarry Smith   PetscErrorCode     ierr;
47075cae7c1SHong Zhang   PetscInt           count,*sindices,**rindices,i,j,idx,lastidx,l;
47154f21887SBarry Smith   PetscScalar        **rvalues,*svalues;
472bc5ccf88SSatish Balay   MPI_Comm           comm = stash->comm;
473563fb871SSatish Balay   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
47476ec1555SBarry Smith   PetscMPIInt        *sizes,*nlengths,nreceives;
4755bd3b8fbSHong Zhang   PetscInt           *sp_idx,*sp_idy;
47654f21887SBarry Smith   PetscScalar        *sp_val;
4775bd3b8fbSHong Zhang   PetscMatStashSpace space,space_next;
478bc5ccf88SSatish Balay 
479bc5ccf88SSatish Balay   PetscFunctionBegin;
4804b4eb8d3SJed Brown   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
4814b4eb8d3SJed Brown     InsertMode addv;
482b2566f29SBarry Smith     ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
4834b4eb8d3SJed Brown     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
4844b4eb8d3SJed Brown     mat->insertmode = addv; /* in case this processor had no cache */
4854b4eb8d3SJed Brown   }
4864b4eb8d3SJed Brown 
4874c1ff481SSatish Balay   bs2 = stash->bs*stash->bs;
48875cae7c1SHong Zhang 
489bc5ccf88SSatish Balay   /*  first count number of contributors to each processor */
4901795a4d1SJed Brown   ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr);
491037dbc42SBarry Smith   ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr);
492a2d1c673SSatish Balay 
49375cae7c1SHong Zhang   i       = j    = 0;
4947357eb19SBarry Smith   lastidx = -1;
4955bd3b8fbSHong Zhang   space   = stash->space_head;
4966c4ed002SBarry Smith   while (space) {
49775cae7c1SHong Zhang     space_next = space->next;
4985bd3b8fbSHong Zhang     sp_idx     = space->idx;
49975cae7c1SHong Zhang     for (l=0; l<space->local_used; l++) {
5007357eb19SBarry Smith       /* if indices are NOT locally sorted, need to start search at the beginning */
5015bd3b8fbSHong Zhang       if (lastidx > (idx = sp_idx[l])) j = 0;
5027357eb19SBarry Smith       lastidx = idx;
5037357eb19SBarry Smith       for (; j<size; j++) {
5044c1ff481SSatish Balay         if (idx >= owners[j] && idx < owners[j+1]) {
505563fb871SSatish Balay           nlengths[j]++; owner[i] = j; break;
506bc5ccf88SSatish Balay         }
507bc5ccf88SSatish Balay       }
50875cae7c1SHong Zhang       i++;
50975cae7c1SHong Zhang     }
51075cae7c1SHong Zhang     space = space_next;
511bc5ccf88SSatish Balay   }
512*071fcb05SBarry Smith 
513563fb871SSatish Balay   /* Now check what procs get messages - and compute nsends. */
514*071fcb05SBarry Smith   ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr);
515563fb871SSatish Balay   for (i=0, nsends=0; i<size; i++) {
5168865f1eaSKarl Rupp     if (nlengths[i]) {
51776ec1555SBarry Smith       sizes[i] = 1; nsends++;
5188865f1eaSKarl Rupp     }
519563fb871SSatish Balay   }
520bc5ccf88SSatish Balay 
52154f21887SBarry Smith   {PetscMPIInt *onodes,*olengths;
522563fb871SSatish Balay    /* Determine the number of messages to expect, their lengths, from from-ids */
52376ec1555SBarry Smith    ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr);
524563fb871SSatish Balay    ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
525563fb871SSatish Balay    /* since clubbing row,col - lengths are multiplied by 2 */
526563fb871SSatish Balay    for (i=0; i<nreceives; i++) olengths[i] *=2;
527563fb871SSatish Balay    ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
528563fb871SSatish Balay    /* values are size 'bs2' lengths (and remove earlier factor 2 */
529563fb871SSatish Balay    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
530563fb871SSatish Balay    ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
531563fb871SSatish Balay    ierr = PetscFree(onodes);CHKERRQ(ierr);
5328865f1eaSKarl Rupp    ierr = PetscFree(olengths);CHKERRQ(ierr);}
533bc5ccf88SSatish Balay 
534bc5ccf88SSatish Balay   /* do sends:
535bc5ccf88SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
536bc5ccf88SSatish Balay          the ith processor
537bc5ccf88SSatish Balay   */
538dcca6d9dSJed Brown   ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr);
539785e854fSJed Brown   ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr);
540dcca6d9dSJed Brown   ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr);
541a2d1c673SSatish Balay   /* use 2 sends the first with all_a, the next with all_i and all_j */
542bc5ccf88SSatish Balay   startv[0] = 0; starti[0] = 0;
543bc5ccf88SSatish Balay   for (i=1; i<size; i++) {
544563fb871SSatish Balay     startv[i] = startv[i-1] + nlengths[i-1];
545533163c2SBarry Smith     starti[i] = starti[i-1] + 2*nlengths[i-1];
546bc5ccf88SSatish Balay   }
54775cae7c1SHong Zhang 
54875cae7c1SHong Zhang   i     = 0;
5495bd3b8fbSHong Zhang   space = stash->space_head;
5506c4ed002SBarry Smith   while (space) {
55175cae7c1SHong Zhang     space_next = space->next;
5525bd3b8fbSHong Zhang     sp_idx     = space->idx;
5535bd3b8fbSHong Zhang     sp_idy     = space->idy;
5545bd3b8fbSHong Zhang     sp_val     = space->val;
55575cae7c1SHong Zhang     for (l=0; l<space->local_used; l++) {
556bc5ccf88SSatish Balay       j = owner[i];
557a2d1c673SSatish Balay       if (bs2 == 1) {
5585bd3b8fbSHong Zhang         svalues[startv[j]] = sp_val[l];
559a2d1c673SSatish Balay       } else {
560c1ac3661SBarry Smith         PetscInt    k;
56154f21887SBarry Smith         PetscScalar *buf1,*buf2;
5624c1ff481SSatish Balay         buf1 = svalues+bs2*startv[j];
563b087b6d6SSatish Balay         buf2 = space->val + bs2*l;
5648865f1eaSKarl Rupp         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
565a2d1c673SSatish Balay       }
5665bd3b8fbSHong Zhang       sindices[starti[j]]             = sp_idx[l];
5675bd3b8fbSHong Zhang       sindices[starti[j]+nlengths[j]] = sp_idy[l];
568bc5ccf88SSatish Balay       startv[j]++;
569bc5ccf88SSatish Balay       starti[j]++;
57075cae7c1SHong Zhang       i++;
57175cae7c1SHong Zhang     }
57275cae7c1SHong Zhang     space = space_next;
573bc5ccf88SSatish Balay   }
574bc5ccf88SSatish Balay   startv[0] = 0;
5758865f1eaSKarl Rupp   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
576e5d0e772SSatish Balay 
577bc5ccf88SSatish Balay   for (i=0,count=0; i<size; i++) {
57876ec1555SBarry Smith     if (sizes[i]) {
579563fb871SSatish Balay       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
580a77337e4SBarry Smith       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
581bc5ccf88SSatish Balay     }
582b85c94c3SSatish Balay   }
5836cf91177SBarry Smith #if defined(PETSC_USE_INFO)
58493157e10SBarry Smith   ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr);
585e5d0e772SSatish Balay   for (i=0; i<size; i++) {
58676ec1555SBarry Smith     if (sizes[i]) {
58730c47e72SSatish Balay       ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
588e5d0e772SSatish Balay     }
589e5d0e772SSatish Balay   }
590e5d0e772SSatish Balay #endif
591c05d87d6SBarry Smith   ierr = PetscFree(nlengths);CHKERRQ(ierr);
592606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
593c05d87d6SBarry Smith   ierr = PetscFree2(startv,starti);CHKERRQ(ierr);
59476ec1555SBarry Smith   ierr = PetscFree(sizes);CHKERRQ(ierr);
595a2d1c673SSatish Balay 
596563fb871SSatish Balay   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
597785e854fSJed Brown   ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr);
598563fb871SSatish Balay 
599563fb871SSatish Balay   for (i=0; i<nreceives; i++) {
600563fb871SSatish Balay     recv_waits[2*i]   = recv_waits1[i];
601563fb871SSatish Balay     recv_waits[2*i+1] = recv_waits2[i];
602563fb871SSatish Balay   }
603563fb871SSatish Balay   stash->recv_waits = recv_waits;
6048865f1eaSKarl Rupp 
605563fb871SSatish Balay   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
606563fb871SSatish Balay   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
607563fb871SSatish Balay 
608c05d87d6SBarry Smith   stash->svalues         = svalues;
609c05d87d6SBarry Smith   stash->sindices        = sindices;
610c05d87d6SBarry Smith   stash->rvalues         = rvalues;
611c05d87d6SBarry Smith   stash->rindices        = rindices;
612c05d87d6SBarry Smith   stash->send_waits      = send_waits;
613c05d87d6SBarry Smith   stash->nsends          = nsends;
614c05d87d6SBarry Smith   stash->nrecvs          = nreceives;
61567318a8aSJed Brown   stash->reproduce_count = 0;
616bc5ccf88SSatish Balay   PetscFunctionReturn(0);
617bc5ccf88SSatish Balay }
618bc5ccf88SSatish Balay 
619a2d1c673SSatish Balay /*
6208798bf22SSatish Balay    MatStashScatterGetMesg_Private - This function waits on the receives posted
6218798bf22SSatish Balay    in the function MatStashScatterBegin_Private() and returns one message at
6224c1ff481SSatish Balay    a time to the calling function. If no messages are left, it indicates this
6234c1ff481SSatish Balay    by setting flg = 0, else it sets flg = 1.
6244c1ff481SSatish Balay 
6254c1ff481SSatish Balay    Input Parameters:
6264c1ff481SSatish Balay    stash - the stash
6274c1ff481SSatish Balay 
6284c1ff481SSatish Balay    Output Parameters:
6294c1ff481SSatish Balay    nvals - the number of entries in the current message.
6304c1ff481SSatish Balay    rows  - an array of row indices (or blocked indices) corresponding to the values
6314c1ff481SSatish Balay    cols  - an array of columnindices (or blocked indices) corresponding to the values
6324c1ff481SSatish Balay    vals  - the values
6334c1ff481SSatish Balay    flg   - 0 indicates no more message left, and the current call has no values associated.
6344c1ff481SSatish Balay            1 indicates that the current call successfully received a message, and the
6354c1ff481SSatish Balay              other output parameters nvals,rows,cols,vals are set appropriately.
636a2d1c673SSatish Balay */
63754f21887SBarry Smith PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
638bc5ccf88SSatish Balay {
6396849ba73SBarry Smith   PetscErrorCode ierr;
640ac2b2aa0SJed Brown 
641ac2b2aa0SJed Brown   PetscFunctionBegin;
642ac2b2aa0SJed Brown   ierr = (*stash->ScatterGetMesg)(stash,nvals,rows,cols,vals,flg);CHKERRQ(ierr);
643ac2b2aa0SJed Brown   PetscFunctionReturn(0);
644ac2b2aa0SJed Brown }
645ac2b2aa0SJed Brown 
646ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
647ac2b2aa0SJed Brown {
648ac2b2aa0SJed Brown   PetscErrorCode ierr;
649533163c2SBarry Smith   PetscMPIInt    i,*flg_v = stash->flg_v,i1,i2;
650fe09c992SBarry Smith   PetscInt       bs2;
651a2d1c673SSatish Balay   MPI_Status     recv_status;
652ace3abfcSBarry Smith   PetscBool      match_found = PETSC_FALSE;
653bc5ccf88SSatish Balay 
654bc5ccf88SSatish Balay   PetscFunctionBegin;
655a2d1c673SSatish Balay   *flg = 0; /* When a message is discovered this is reset to 1 */
656a2d1c673SSatish Balay   /* Return if no more messages to process */
6578865f1eaSKarl Rupp   if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(0);
658a2d1c673SSatish Balay 
6594c1ff481SSatish Balay   bs2 = stash->bs*stash->bs;
66067318a8aSJed Brown   /* If a matching pair of receives are found, process them, and return the data to
661a2d1c673SSatish Balay      the calling function. Until then keep receiving messages */
662a2d1c673SSatish Balay   while (!match_found) {
66367318a8aSJed Brown     if (stash->reproduce) {
66467318a8aSJed Brown       i    = stash->reproduce_count++;
66567318a8aSJed Brown       ierr = MPI_Wait(stash->recv_waits+i,&recv_status);CHKERRQ(ierr);
66667318a8aSJed Brown     } else {
667a2d1c673SSatish Balay       ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
66867318a8aSJed Brown     }
669e32f2f54SBarry Smith     if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!");
670533163c2SBarry Smith 
67167318a8aSJed Brown     /* Now pack the received message into a structure which is usable by others */
672a2d1c673SSatish Balay     if (i % 2) {
673a77337e4SBarry Smith       ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr);
6748865f1eaSKarl Rupp 
675c1dc657dSBarry Smith       flg_v[2*recv_status.MPI_SOURCE] = i/2;
6768865f1eaSKarl Rupp 
677a2d1c673SSatish Balay       *nvals = *nvals/bs2;
678563fb871SSatish Balay     } else {
679563fb871SSatish Balay       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
6808865f1eaSKarl Rupp 
681563fb871SSatish Balay       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
6828865f1eaSKarl Rupp 
683563fb871SSatish Balay       *nvals = *nvals/2; /* This message has both row indices and col indices */
684bc5ccf88SSatish Balay     }
685a2d1c673SSatish Balay 
686cb2b73ccSBarry Smith     /* Check if we have both messages from this proc */
687c1dc657dSBarry Smith     i1 = flg_v[2*recv_status.MPI_SOURCE];
688c1dc657dSBarry Smith     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
689a2d1c673SSatish Balay     if (i1 != -1 && i2 != -1) {
690563fb871SSatish Balay       *rows = stash->rindices[i2];
691a2d1c673SSatish Balay       *cols = *rows + *nvals;
692563fb871SSatish Balay       *vals = stash->rvalues[i1];
693a2d1c673SSatish Balay       *flg  = 1;
694a2d1c673SSatish Balay       stash->nprocessed++;
69535d8aa7fSBarry Smith       match_found = PETSC_TRUE;
696bc5ccf88SSatish Balay     }
697bc5ccf88SSatish Balay   }
698bc5ccf88SSatish Balay   PetscFunctionReturn(0);
699bc5ccf88SSatish Balay }
700d7d60843SJed Brown 
701e69ee1f7SSatish Balay #if !defined(PETSC_HAVE_MPIUNI)
702d7d60843SJed Brown typedef struct {
703d7d60843SJed Brown   PetscInt row;
704d7d60843SJed Brown   PetscInt col;
705d7d60843SJed Brown   PetscScalar vals[1];          /* Actually an array of length bs2 */
706d7d60843SJed Brown } MatStashBlock;
707d7d60843SJed Brown 
708d7d60843SJed Brown static PetscErrorCode MatStashSortCompress_Private(MatStash *stash,InsertMode insertmode)
709d7d60843SJed Brown {
710d7d60843SJed Brown   PetscErrorCode ierr;
711d7d60843SJed Brown   PetscMatStashSpace space;
712d7d60843SJed Brown   PetscInt n = stash->n,bs = stash->bs,bs2 = bs*bs,cnt,*row,*col,*perm,rowstart,i;
713d7d60843SJed Brown   PetscScalar **valptr;
714d7d60843SJed Brown 
715d7d60843SJed Brown   PetscFunctionBegin;
716d7d60843SJed Brown   ierr = PetscMalloc4(n,&row,n,&col,n,&valptr,n,&perm);CHKERRQ(ierr);
717d7d60843SJed Brown   for (space=stash->space_head,cnt=0; space; space=space->next) {
718d7d60843SJed Brown     for (i=0; i<space->local_used; i++) {
719d7d60843SJed Brown       row[cnt] = space->idx[i];
720d7d60843SJed Brown       col[cnt] = space->idy[i];
721d7d60843SJed Brown       valptr[cnt] = &space->val[i*bs2];
722d7d60843SJed Brown       perm[cnt] = cnt;          /* Will tell us where to find valptr after sorting row[] and col[] */
723d7d60843SJed Brown       cnt++;
724d7d60843SJed Brown     }
725d7d60843SJed Brown   }
726d7d60843SJed Brown   if (cnt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatStash n %D, but counted %D entries",n,cnt);
727d7d60843SJed Brown   ierr = PetscSortIntWithArrayPair(n,row,col,perm);CHKERRQ(ierr);
728d7d60843SJed Brown   /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */
729d7d60843SJed Brown   for (rowstart=0,cnt=0,i=1; i<=n; i++) {
730d7d60843SJed Brown     if (i == n || row[i] != row[rowstart]) {         /* Sort the last row. */
731d7d60843SJed Brown       PetscInt colstart;
732d7d60843SJed Brown       ierr = PetscSortIntWithArray(i-rowstart,&col[rowstart],&perm[rowstart]);CHKERRQ(ierr);
733d7d60843SJed Brown       for (colstart=rowstart; colstart<i; ) { /* Compress multiple insertions to the same location */
734d7d60843SJed Brown         PetscInt j,l;
735d7d60843SJed Brown         MatStashBlock *block;
736d7d60843SJed Brown         ierr = PetscSegBufferGet(stash->segsendblocks,1,&block);CHKERRQ(ierr);
737d7d60843SJed Brown         block->row = row[rowstart];
738d7d60843SJed Brown         block->col = col[colstart];
739580bdb30SBarry Smith         ierr = PetscArraycpy(block->vals,valptr[perm[colstart]],bs2);CHKERRQ(ierr);
740d7d60843SJed Brown         for (j=colstart+1; j<i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */
741d7d60843SJed Brown           if (insertmode == ADD_VALUES) {
742d7d60843SJed Brown             for (l=0; l<bs2; l++) block->vals[l] += valptr[perm[j]][l];
743d7d60843SJed Brown           } else {
744580bdb30SBarry Smith             ierr = PetscArraycpy(block->vals,valptr[perm[j]],bs2);CHKERRQ(ierr);
745d7d60843SJed Brown           }
746d7d60843SJed Brown         }
747d7d60843SJed Brown         colstart = j;
748d7d60843SJed Brown       }
749d7d60843SJed Brown       rowstart = i;
750d7d60843SJed Brown     }
751d7d60843SJed Brown   }
752d7d60843SJed Brown   ierr = PetscFree4(row,col,valptr,perm);CHKERRQ(ierr);
753d7d60843SJed Brown   PetscFunctionReturn(0);
754d7d60843SJed Brown }
755d7d60843SJed Brown 
756d7d60843SJed Brown static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash)
757d7d60843SJed Brown {
758d7d60843SJed Brown   PetscErrorCode ierr;
759d7d60843SJed Brown 
760d7d60843SJed Brown   PetscFunctionBegin;
761d7d60843SJed Brown   if (stash->blocktype == MPI_DATATYPE_NULL) {
762d7d60843SJed Brown     PetscInt     bs2 = PetscSqr(stash->bs);
763d7d60843SJed Brown     PetscMPIInt  blocklens[2];
764d7d60843SJed Brown     MPI_Aint     displs[2];
765d7d60843SJed Brown     MPI_Datatype types[2],stype;
7669503c6c6SJed Brown     /* C++ std::complex is not my favorite datatype.  Since it is not POD, we cannot use offsetof to find the offset of
7679503c6c6SJed Brown      * vals.  But the layout is actually guaranteed by the standard, so we do a little dance here with struct
7689503c6c6SJed Brown      * DummyBlock, substituting PetscReal for PetscComplex so that we can determine the offset.
7699503c6c6SJed Brown      */
7709503c6c6SJed Brown     struct DummyBlock {PetscInt row,col; PetscReal vals;};
771d7d60843SJed Brown 
7729503c6c6SJed Brown     stash->blocktype_size = offsetof(struct DummyBlock,vals) + bs2*sizeof(PetscScalar);
773d7d60843SJed Brown     if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */
774d7d60843SJed Brown       stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt);
775d7d60843SJed Brown     }
776d7d60843SJed Brown     ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segsendblocks);CHKERRQ(ierr);
777d7d60843SJed Brown     ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segrecvblocks);CHKERRQ(ierr);
778d7d60843SJed Brown     ierr = PetscSegBufferCreate(sizeof(MatStashFrame),1,&stash->segrecvframe);CHKERRQ(ierr);
779d7d60843SJed Brown     blocklens[0] = 2;
780d7d60843SJed Brown     blocklens[1] = bs2;
7819503c6c6SJed Brown     displs[0] = offsetof(struct DummyBlock,row);
7829503c6c6SJed Brown     displs[1] = offsetof(struct DummyBlock,vals);
783d7d60843SJed Brown     types[0] = MPIU_INT;
784d7d60843SJed Brown     types[1] = MPIU_SCALAR;
785d7d60843SJed Brown     ierr = MPI_Type_create_struct(2,blocklens,displs,types,&stype);CHKERRQ(ierr);
786d7d60843SJed Brown     ierr = MPI_Type_commit(&stype);CHKERRQ(ierr);
787d7d60843SJed Brown     ierr = MPI_Type_create_resized(stype,0,stash->blocktype_size,&stash->blocktype);CHKERRQ(ierr); /* MPI-2 */
788d7d60843SJed Brown     ierr = MPI_Type_commit(&stash->blocktype);CHKERRQ(ierr);
789d7d60843SJed Brown     ierr = MPI_Type_free(&stype);CHKERRQ(ierr);
790d7d60843SJed Brown   }
791d7d60843SJed Brown   PetscFunctionReturn(0);
792d7d60843SJed Brown }
793d7d60843SJed Brown 
794d7d60843SJed Brown /* Callback invoked after target rank has initiatied receive of rendezvous message.
795d7d60843SJed Brown  * Here we post the main sends.
796d7d60843SJed Brown  */
797d7d60843SJed Brown static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rankid,PetscMPIInt rank,void *sdata,MPI_Request req[],void *ctx)
798d7d60843SJed Brown {
799d7d60843SJed Brown   MatStash *stash = (MatStash*)ctx;
800d7d60843SJed Brown   MatStashHeader *hdr = (MatStashHeader*)sdata;
801d7d60843SJed Brown   PetscErrorCode ierr;
802d7d60843SJed Brown 
803d7d60843SJed Brown   PetscFunctionBegin;
804d7d60843SJed Brown   if (rank != stash->sendranks[rankid]) SETERRQ3(comm,PETSC_ERR_PLIB,"BTS Send rank %d does not match sendranks[%d] %d",rank,rankid,stash->sendranks[rankid]);
805d7d60843SJed Brown   ierr = MPI_Isend(stash->sendframes[rankid].buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr);
806d7d60843SJed Brown   stash->sendframes[rankid].count = hdr->count;
807d7d60843SJed Brown   stash->sendframes[rankid].pending = 1;
808d7d60843SJed Brown   PetscFunctionReturn(0);
809d7d60843SJed Brown }
810d7d60843SJed Brown 
811d7d60843SJed Brown /* Callback invoked by target after receiving rendezvous message.
812d7d60843SJed Brown  * Here we post the main recvs.
813d7d60843SJed Brown  */
814d7d60843SJed Brown static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rank,void *rdata,MPI_Request req[],void *ctx)
815d7d60843SJed Brown {
816d7d60843SJed Brown   MatStash *stash = (MatStash*)ctx;
817d7d60843SJed Brown   MatStashHeader *hdr = (MatStashHeader*)rdata;
818d7d60843SJed Brown   MatStashFrame *frame;
819d7d60843SJed Brown   PetscErrorCode ierr;
820d7d60843SJed Brown 
821d7d60843SJed Brown   PetscFunctionBegin;
822d7d60843SJed Brown   ierr = PetscSegBufferGet(stash->segrecvframe,1,&frame);CHKERRQ(ierr);
823d7d60843SJed Brown   ierr = PetscSegBufferGet(stash->segrecvblocks,hdr->count,&frame->buffer);CHKERRQ(ierr);
824d7d60843SJed Brown   ierr = MPI_Irecv(frame->buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr);
825d7d60843SJed Brown   frame->count = hdr->count;
826d7d60843SJed Brown   frame->pending = 1;
827d7d60843SJed Brown   PetscFunctionReturn(0);
828d7d60843SJed Brown }
829d7d60843SJed Brown 
830d7d60843SJed Brown /*
831d7d60843SJed Brown  * owners[] contains the ownership ranges; may be indexed by either blocks or scalars
832d7d60843SJed Brown  */
833d7d60843SJed Brown static PetscErrorCode MatStashScatterBegin_BTS(Mat mat,MatStash *stash,PetscInt owners[])
834d7d60843SJed Brown {
835d7d60843SJed Brown   PetscErrorCode ierr;
836d7d60843SJed Brown   size_t nblocks;
837d7d60843SJed Brown   char *sendblocks;
838d7d60843SJed Brown 
839d7d60843SJed Brown   PetscFunctionBegin;
8404b4eb8d3SJed Brown #if defined(PETSC_USE_DEBUG)
8414b4eb8d3SJed Brown   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
8424b4eb8d3SJed Brown     InsertMode addv;
843b2566f29SBarry Smith     ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
8444b4eb8d3SJed Brown     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
8454b4eb8d3SJed Brown   }
8464b4eb8d3SJed Brown #endif
8474b4eb8d3SJed Brown 
848d7d60843SJed Brown   ierr = MatStashBlockTypeSetUp(stash);CHKERRQ(ierr);
849d7d60843SJed Brown   ierr = MatStashSortCompress_Private(stash,mat->insertmode);CHKERRQ(ierr);
850d7d60843SJed Brown   ierr = PetscSegBufferGetSize(stash->segsendblocks,&nblocks);CHKERRQ(ierr);
851d7d60843SJed Brown   ierr = PetscSegBufferExtractInPlace(stash->segsendblocks,&sendblocks);CHKERRQ(ierr);
85260f34548SJunchao Zhang   if (stash->first_assembly_done) { /* Set up sendhdrs and sendframes for each rank that we sent before */
85323b7d1baSJed Brown     PetscInt i;
85423b7d1baSJed Brown     size_t b;
85597da8949SJed Brown     for (i=0,b=0; i<stash->nsendranks; i++) {
85697da8949SJed Brown       stash->sendframes[i].buffer = &sendblocks[b*stash->blocktype_size];
85797da8949SJed Brown       /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */
85897da8949SJed Brown       stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */
85997da8949SJed Brown       for ( ; b<nblocks; b++) {
86097da8949SJed Brown         MatStashBlock *sendblock_b = (MatStashBlock*)&sendblocks[b*stash->blocktype_size];
86197da8949SJed Brown         if (PetscUnlikely(sendblock_b->row < owners[stash->sendranks[i]])) SETERRQ2(stash->comm,PETSC_ERR_ARG_WRONG,"MAT_SUBSET_OFF_PROC_ENTRIES set, but row %D owned by %d not communicated in initial assembly",sendblock_b->row,stash->sendranks[i]);
86297da8949SJed Brown         if (sendblock_b->row >= owners[stash->sendranks[i]+1]) break;
86397da8949SJed Brown         stash->sendhdr[i].count++;
86497da8949SJed Brown       }
86597da8949SJed Brown     }
86697da8949SJed Brown   } else {                      /* Dynamically count and pack (first time) */
86723b7d1baSJed Brown     PetscInt sendno;
86823b7d1baSJed Brown     size_t i,rowstart;
869d7d60843SJed Brown 
870d7d60843SJed Brown     /* Count number of send ranks and allocate for sends */
871d7d60843SJed Brown     stash->nsendranks = 0;
872d7d60843SJed Brown     for (rowstart=0; rowstart<nblocks; ) {
8737e2ea869SJed Brown       PetscInt owner;
874d7d60843SJed Brown       MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
875d7d60843SJed Brown       ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr);
876d7d60843SJed Brown       if (owner < 0) owner = -(owner+2);
877d7d60843SJed Brown       for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
878d7d60843SJed Brown         MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
8797e2ea869SJed Brown         if (sendblock_i->row >= owners[owner+1]) break;
880d7d60843SJed Brown       }
881d7d60843SJed Brown       stash->nsendranks++;
882d7d60843SJed Brown       rowstart = i;
883d7d60843SJed Brown     }
884d7d60843SJed Brown     ierr = PetscMalloc3(stash->nsendranks,&stash->sendranks,stash->nsendranks,&stash->sendhdr,stash->nsendranks,&stash->sendframes);CHKERRQ(ierr);
885d7d60843SJed Brown 
886d7d60843SJed Brown     /* Set up sendhdrs and sendframes */
887d7d60843SJed Brown     sendno = 0;
888d7d60843SJed Brown     for (rowstart=0; rowstart<nblocks; ) {
889d7d60843SJed Brown       PetscInt owner;
890d7d60843SJed Brown       MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
891d7d60843SJed Brown       ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr);
892d7d60843SJed Brown       if (owner < 0) owner = -(owner+2);
893d7d60843SJed Brown       stash->sendranks[sendno] = owner;
894d7d60843SJed Brown       for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
895d7d60843SJed Brown         MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
8967e2ea869SJed Brown         if (sendblock_i->row >= owners[owner+1]) break;
897d7d60843SJed Brown       }
898d7d60843SJed Brown       stash->sendframes[sendno].buffer = sendblock_rowstart;
899d7d60843SJed Brown       stash->sendframes[sendno].pending = 0;
900d7d60843SJed Brown       stash->sendhdr[sendno].count = i - rowstart;
901d7d60843SJed Brown       sendno++;
902d7d60843SJed Brown       rowstart = i;
903d7d60843SJed Brown     }
904d7d60843SJed Brown     if (sendno != stash->nsendranks) SETERRQ2(stash->comm,PETSC_ERR_PLIB,"BTS counted %D sendranks, but %D sends",stash->nsendranks,sendno);
905d7d60843SJed Brown   }
906d7d60843SJed Brown 
9074b4eb8d3SJed Brown   /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new
9084b4eb8d3SJed Brown    * message or a dummy entry of some sort. */
9094b4eb8d3SJed Brown   if (mat->insertmode == INSERT_VALUES) {
91023b7d1baSJed Brown     size_t i;
9114b4eb8d3SJed Brown     for (i=0; i<nblocks; i++) {
9124b4eb8d3SJed Brown       MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
9134b4eb8d3SJed Brown       sendblock_i->row = -(sendblock_i->row+1);
9144b4eb8d3SJed Brown     }
9154b4eb8d3SJed Brown   }
9164b4eb8d3SJed Brown 
91760f34548SJunchao Zhang   if (stash->first_assembly_done) {
91897da8949SJed Brown     PetscMPIInt i,tag;
91997da8949SJed Brown     ierr = PetscCommGetNewTag(stash->comm,&tag);CHKERRQ(ierr);
92097da8949SJed Brown     for (i=0; i<stash->nrecvranks; i++) {
92197da8949SJed Brown       ierr = MatStashBTSRecv_Private(stash->comm,&tag,stash->recvranks[i],&stash->recvhdr[i],&stash->recvreqs[i],stash);CHKERRQ(ierr);
92297da8949SJed Brown     }
92397da8949SJed Brown     for (i=0; i<stash->nsendranks; i++) {
92497da8949SJed Brown       ierr = MatStashBTSSend_Private(stash->comm,&tag,i,stash->sendranks[i],&stash->sendhdr[i],&stash->sendreqs[i],stash);CHKERRQ(ierr);
92597da8949SJed Brown     }
92697da8949SJed Brown     stash->use_status = PETSC_TRUE; /* Use count from message status. */
92797da8949SJed Brown   } else {
928e0ddb6e8SJed Brown     ierr = PetscCommBuildTwoSidedFReq(stash->comm,1,MPIU_INT,stash->nsendranks,stash->sendranks,(PetscInt*)stash->sendhdr,
929e0ddb6e8SJed Brown                                       &stash->nrecvranks,&stash->recvranks,(PetscInt*)&stash->recvhdr,1,&stash->sendreqs,&stash->recvreqs,
930d7d60843SJed Brown                                       MatStashBTSSend_Private,MatStashBTSRecv_Private,stash);CHKERRQ(ierr);
931b5ddc6f1SJed Brown     ierr = PetscMalloc2(stash->nrecvranks,&stash->some_indices,stash->nrecvranks,&stash->some_statuses);CHKERRQ(ierr);
93297da8949SJed Brown     stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */
93397da8949SJed Brown   }
934d7d60843SJed Brown 
935d7d60843SJed Brown   ierr = PetscSegBufferExtractInPlace(stash->segrecvframe,&stash->recvframes);CHKERRQ(ierr);
936d7d60843SJed Brown   stash->recvframe_active     = NULL;
937d7d60843SJed Brown   stash->recvframe_i          = 0;
938d7d60843SJed Brown   stash->some_i               = 0;
939d7d60843SJed Brown   stash->some_count           = 0;
940d7d60843SJed Brown   stash->recvcount            = 0;
94160f34548SJunchao Zhang   stash->first_assembly_done  = mat->assembly_subset; /* See the same logic in VecAssemblyBegin_MPI_BTS */
9424b4eb8d3SJed Brown   stash->insertmode           = &mat->insertmode;
943d7d60843SJed Brown   PetscFunctionReturn(0);
944d7d60843SJed Brown }
945d7d60843SJed Brown 
946d7d60843SJed Brown static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash,PetscMPIInt *n,PetscInt **row,PetscInt **col,PetscScalar **val,PetscInt *flg)
947d7d60843SJed Brown {
948d7d60843SJed Brown   PetscErrorCode ierr;
949d7d60843SJed Brown   MatStashBlock *block;
950d7d60843SJed Brown 
951d7d60843SJed Brown   PetscFunctionBegin;
952d7d60843SJed Brown   *flg = 0;
953d7d60843SJed Brown   while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) {
954d7d60843SJed Brown     if (stash->some_i == stash->some_count) {
955d7d60843SJed Brown       if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(0); /* Done */
956d7d60843SJed Brown       ierr = MPI_Waitsome(stash->nrecvranks,stash->recvreqs,&stash->some_count,stash->some_indices,stash->use_status?stash->some_statuses:MPI_STATUSES_IGNORE);CHKERRQ(ierr);
957d7d60843SJed Brown       stash->some_i = 0;
958d7d60843SJed Brown     }
959d7d60843SJed Brown     stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]];
960d7d60843SJed Brown     stash->recvframe_count = stash->recvframe_active->count; /* From header; maximum count */
961d7d60843SJed Brown     if (stash->use_status) { /* Count what was actually sent */
962d7d60843SJed Brown       ierr = MPI_Get_count(&stash->some_statuses[stash->some_i],stash->blocktype,&stash->recvframe_count);CHKERRQ(ierr);
963d7d60843SJed Brown     }
9644b4eb8d3SJed Brown     if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */
9654b4eb8d3SJed Brown       block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[0];
9664b4eb8d3SJed Brown       if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES;
9674b4eb8d3SJed Brown       if (PetscUnlikely(*stash->insertmode == INSERT_VALUES && block->row >= 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling INSERT_VALUES, but rank %d requested ADD_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]);
9684b4eb8d3SJed Brown       if (PetscUnlikely(*stash->insertmode == ADD_VALUES && block->row < 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling ADD_VALUES, but rank %d requested INSERT_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]);
9694b4eb8d3SJed Brown     }
970d7d60843SJed Brown     stash->some_i++;
971d7d60843SJed Brown     stash->recvcount++;
972d7d60843SJed Brown     stash->recvframe_i = 0;
973d7d60843SJed Brown   }
974d7d60843SJed Brown   *n = 1;
975d7d60843SJed Brown   block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[stash->recvframe_i*stash->blocktype_size];
9764b4eb8d3SJed Brown   if (block->row < 0) block->row = -(block->row + 1);
977d7d60843SJed Brown   *row = &block->row;
978d7d60843SJed Brown   *col = &block->col;
979d7d60843SJed Brown   *val = block->vals;
980d7d60843SJed Brown   stash->recvframe_i++;
981d7d60843SJed Brown   *flg = 1;
982d7d60843SJed Brown   PetscFunctionReturn(0);
983d7d60843SJed Brown }
984d7d60843SJed Brown 
985d7d60843SJed Brown static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash)
986d7d60843SJed Brown {
987d7d60843SJed Brown   PetscErrorCode ierr;
988d7d60843SJed Brown 
989d7d60843SJed Brown   PetscFunctionBegin;
990d7d60843SJed Brown   ierr = MPI_Waitall(stash->nsendranks,stash->sendreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
99160f34548SJunchao Zhang   if (stash->first_assembly_done) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks  */
9923575f486SJed Brown     void *dummy;
9933575f486SJed Brown     ierr = PetscSegBufferExtractInPlace(stash->segrecvblocks,&dummy);CHKERRQ(ierr);
9943575f486SJed Brown   } else {                      /* No reuse, so collect everything. */
995d7d60843SJed Brown     ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr);
99697da8949SJed Brown   }
997d7d60843SJed Brown 
998d7d60843SJed Brown   /* Now update nmaxold to be app 10% more than max n used, this way the
999d7d60843SJed Brown      wastage of space is reduced the next time this stash is used.
1000d7d60843SJed Brown      Also update the oldmax, only if it increases */
1001d7d60843SJed Brown   if (stash->n) {
1002d7d60843SJed Brown     PetscInt bs2     = stash->bs*stash->bs;
1003d7d60843SJed Brown     PetscInt oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
1004d7d60843SJed Brown     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
1005d7d60843SJed Brown   }
1006d7d60843SJed Brown 
1007d7d60843SJed Brown   stash->nmax       = 0;
1008d7d60843SJed Brown   stash->n          = 0;
1009d7d60843SJed Brown   stash->reallocs   = -1;
1010d7d60843SJed Brown   stash->nprocessed = 0;
1011d7d60843SJed Brown 
1012d7d60843SJed Brown   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
1013d7d60843SJed Brown 
1014d7d60843SJed Brown   stash->space = 0;
1015d7d60843SJed Brown 
1016d7d60843SJed Brown   PetscFunctionReturn(0);
1017d7d60843SJed Brown }
1018d7d60843SJed Brown 
101960f34548SJunchao Zhang PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash)
1020d7d60843SJed Brown {
1021d7d60843SJed Brown   PetscErrorCode ierr;
1022d7d60843SJed Brown 
1023d7d60843SJed Brown   PetscFunctionBegin;
1024d7d60843SJed Brown   ierr = PetscSegBufferDestroy(&stash->segsendblocks);CHKERRQ(ierr);
1025d7d60843SJed Brown   ierr = PetscSegBufferDestroy(&stash->segrecvframe);CHKERRQ(ierr);
1026d7d60843SJed Brown   stash->recvframes = NULL;
1027d7d60843SJed Brown   ierr = PetscSegBufferDestroy(&stash->segrecvblocks);CHKERRQ(ierr);
1028d7d60843SJed Brown   if (stash->blocktype != MPI_DATATYPE_NULL) {
1029d7d60843SJed Brown     ierr = MPI_Type_free(&stash->blocktype);CHKERRQ(ierr);
1030d7d60843SJed Brown   }
1031d7d60843SJed Brown   stash->nsendranks = 0;
1032d7d60843SJed Brown   stash->nrecvranks = 0;
1033d7d60843SJed Brown   ierr = PetscFree3(stash->sendranks,stash->sendhdr,stash->sendframes);CHKERRQ(ierr);
1034d7d60843SJed Brown   ierr = PetscFree(stash->sendreqs);CHKERRQ(ierr);
1035d7d60843SJed Brown   ierr = PetscFree(stash->recvreqs);CHKERRQ(ierr);
1036d7d60843SJed Brown   ierr = PetscFree(stash->recvranks);CHKERRQ(ierr);
1037d7d60843SJed Brown   ierr = PetscFree(stash->recvhdr);CHKERRQ(ierr);
1038d7d60843SJed Brown   ierr = PetscFree2(stash->some_indices,stash->some_statuses);CHKERRQ(ierr);
1039d7d60843SJed Brown   PetscFunctionReturn(0);
1040d7d60843SJed Brown }
10411667be42SBarry Smith #endif
1042