xref: /petsc/src/mat/utils/matstash.c (revision b30fb036e872901b890c0acec9d011b45dc369a6)
12d5177cdSBarry Smith 
2af0996ceSBarry Smith #include <petsc/private/matimpl.h>
35bd3b8fbSHong Zhang 
4bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE   10000
54c1ff481SSatish Balay 
6ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterBegin_Ref(Mat,MatStash*,PetscInt*);
7ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
8ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
9d7d60843SJed Brown static PetscErrorCode MatStashScatterBegin_BTS(Mat,MatStash*,PetscInt*);
10d7d60843SJed Brown static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
11d7d60843SJed Brown static PetscErrorCode MatStashScatterEnd_BTS(MatStash*);
12d7d60843SJed Brown static PetscErrorCode MatStashScatterDestroy_BTS(MatStash*);
13d7d60843SJed Brown 
149417f4adSLois Curfman McInnes /*
158798bf22SSatish Balay   MatStashCreate_Private - Creates a stash,currently used for all the parallel
164c1ff481SSatish Balay   matrix implementations. The stash is where elements of a matrix destined
174c1ff481SSatish Balay   to be stored on other processors are kept until matrix assembly is done.
189417f4adSLois Curfman McInnes 
194c1ff481SSatish Balay   This is a simple minded stash. Simply adds entries to end of stash.
204c1ff481SSatish Balay 
214c1ff481SSatish Balay   Input Parameters:
224c1ff481SSatish Balay   comm - communicator, required for scatters.
234c1ff481SSatish Balay   bs   - stash block size. used when stashing blocks of values
244c1ff481SSatish Balay 
254c1ff481SSatish Balay   Output Parameters:
264c1ff481SSatish Balay   stash    - the newly created stash
279417f4adSLois Curfman McInnes */
28c1ac3661SBarry Smith PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
299417f4adSLois Curfman McInnes {
30dfbe8321SBarry Smith   PetscErrorCode ierr;
31533163c2SBarry Smith   PetscInt       max,*opt,nopt,i;
32ace3abfcSBarry Smith   PetscBool      flg;
33bc5ccf88SSatish Balay 
343a40ed3dSBarry Smith   PetscFunctionBegin;
35bc5ccf88SSatish Balay   /* Require 2 tags,get the second using PetscCommGetNewTag() */
36752ec6e0SSatish Balay   stash->comm = comm;
378865f1eaSKarl Rupp 
38752ec6e0SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
39a2d1c673SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
40a2d1c673SSatish Balay   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
41a2d1c673SSatish Balay   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
42785e854fSJed Brown   ierr = PetscMalloc1(2*stash->size,&stash->flg_v);CHKERRQ(ierr);
43533163c2SBarry Smith   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
44533163c2SBarry Smith 
45bc5ccf88SSatish Balay 
46434d7ff9SSatish Balay   nopt = stash->size;
47785e854fSJed Brown   ierr = PetscMalloc1(nopt,&opt);CHKERRQ(ierr);
48c5929fdfSBarry Smith   ierr = PetscOptionsGetIntArray(NULL,NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
49434d7ff9SSatish Balay   if (flg) {
50434d7ff9SSatish Balay     if (nopt == 1)                max = opt[0];
51434d7ff9SSatish Balay     else if (nopt == stash->size) max = opt[stash->rank];
52434d7ff9SSatish Balay     else if (stash->rank < nopt)  max = opt[stash->rank];
53f4ab19daSSatish Balay     else                          max = 0; /* Use default */
54434d7ff9SSatish Balay     stash->umax = max;
55434d7ff9SSatish Balay   } else {
56434d7ff9SSatish Balay     stash->umax = 0;
57434d7ff9SSatish Balay   }
58606d414cSSatish Balay   ierr = PetscFree(opt);CHKERRQ(ierr);
594c1ff481SSatish Balay   if (bs <= 0) bs = 1;
60a2d1c673SSatish Balay 
614c1ff481SSatish Balay   stash->bs         = bs;
629417f4adSLois Curfman McInnes   stash->nmax       = 0;
63434d7ff9SSatish Balay   stash->oldnmax    = 0;
649417f4adSLois Curfman McInnes   stash->n          = 0;
654c1ff481SSatish Balay   stash->reallocs   = -1;
6675cae7c1SHong Zhang   stash->space_head = 0;
6775cae7c1SHong Zhang   stash->space      = 0;
689417f4adSLois Curfman McInnes 
69bc5ccf88SSatish Balay   stash->send_waits  = 0;
70bc5ccf88SSatish Balay   stash->recv_waits  = 0;
71a2d1c673SSatish Balay   stash->send_status = 0;
72bc5ccf88SSatish Balay   stash->nsends      = 0;
73bc5ccf88SSatish Balay   stash->nrecvs      = 0;
74bc5ccf88SSatish Balay   stash->svalues     = 0;
75bc5ccf88SSatish Balay   stash->rvalues     = 0;
76563fb871SSatish Balay   stash->rindices    = 0;
77a2d1c673SSatish Balay   stash->nprocessed  = 0;
7867318a8aSJed Brown   stash->reproduce   = PETSC_FALSE;
79d7d60843SJed Brown   stash->blocktype   = MPI_DATATYPE_NULL;
808865f1eaSKarl Rupp 
81c5929fdfSBarry Smith   ierr = PetscOptionsGetBool(NULL,NULL,"-matstash_reproduce",&stash->reproduce,NULL);CHKERRQ(ierr);
82*b30fb036SBarry Smith   ierr = PetscOptionsGetBool(NULL,NULL,"-matstash_legacy",&flg,NULL);CHKERRQ(ierr);
83*b30fb036SBarry Smith   if (!flg) {
84d7d60843SJed Brown     stash->ScatterBegin   = MatStashScatterBegin_BTS;
85d7d60843SJed Brown     stash->ScatterGetMesg = MatStashScatterGetMesg_BTS;
86d7d60843SJed Brown     stash->ScatterEnd     = MatStashScatterEnd_BTS;
87d7d60843SJed Brown     stash->ScatterDestroy = MatStashScatterDestroy_BTS;
88ac2b2aa0SJed Brown   } else {
89ac2b2aa0SJed Brown     stash->ScatterBegin   = MatStashScatterBegin_Ref;
90ac2b2aa0SJed Brown     stash->ScatterGetMesg = MatStashScatterGetMesg_Ref;
91ac2b2aa0SJed Brown     stash->ScatterEnd     = MatStashScatterEnd_Ref;
92ac2b2aa0SJed Brown     stash->ScatterDestroy = NULL;
93ac2b2aa0SJed Brown   }
943a40ed3dSBarry Smith   PetscFunctionReturn(0);
959417f4adSLois Curfman McInnes }
969417f4adSLois Curfman McInnes 
974c1ff481SSatish Balay /*
988798bf22SSatish Balay    MatStashDestroy_Private - Destroy the stash
994c1ff481SSatish Balay */
100dfbe8321SBarry Smith PetscErrorCode MatStashDestroy_Private(MatStash *stash)
1019417f4adSLois Curfman McInnes {
102dfbe8321SBarry Smith   PetscErrorCode ierr;
103a2d1c673SSatish Balay 
104bc5ccf88SSatish Balay   PetscFunctionBegin;
1056bf464f9SBarry Smith   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
106ac2b2aa0SJed Brown   if (stash->ScatterDestroy) {ierr = (*stash->ScatterDestroy)(stash);CHKERRQ(ierr);}
1078865f1eaSKarl Rupp 
10882740460SHong Zhang   stash->space = 0;
1098865f1eaSKarl Rupp 
110533163c2SBarry Smith   ierr = PetscFree(stash->flg_v);CHKERRQ(ierr);
111bc5ccf88SSatish Balay   PetscFunctionReturn(0);
112bc5ccf88SSatish Balay }
113bc5ccf88SSatish Balay 
1144c1ff481SSatish Balay /*
11567318a8aSJed Brown    MatStashScatterEnd_Private - This is called as the final stage of
1164c1ff481SSatish Balay    scatter. The final stages of message passing is done here, and
11767318a8aSJed Brown    all the memory used for message passing is cleaned up. This
1184c1ff481SSatish Balay    routine also resets the stash, and deallocates the memory used
1194c1ff481SSatish Balay    for the stash. It also keeps track of the current memory usage
1204c1ff481SSatish Balay    so that the same value can be used the next time through.
1214c1ff481SSatish Balay */
122dfbe8321SBarry Smith PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
123bc5ccf88SSatish Balay {
1246849ba73SBarry Smith   PetscErrorCode ierr;
125ac2b2aa0SJed Brown 
126ac2b2aa0SJed Brown   PetscFunctionBegin;
127ac2b2aa0SJed Brown   ierr = (*stash->ScatterEnd)(stash);CHKERRQ(ierr);
128ac2b2aa0SJed Brown   PetscFunctionReturn(0);
129ac2b2aa0SJed Brown }
130ac2b2aa0SJed Brown 
131ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash)
132ac2b2aa0SJed Brown {
133ac2b2aa0SJed Brown   PetscErrorCode ierr;
134533163c2SBarry Smith   PetscInt       nsends=stash->nsends,bs2,oldnmax,i;
135a2d1c673SSatish Balay   MPI_Status     *send_status;
136a2d1c673SSatish Balay 
1373a40ed3dSBarry Smith   PetscFunctionBegin;
138533163c2SBarry Smith   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
139a2d1c673SSatish Balay   /* wait on sends */
140a2d1c673SSatish Balay   if (nsends) {
141785e854fSJed Brown     ierr = PetscMalloc1(2*nsends,&send_status);CHKERRQ(ierr);
142a2d1c673SSatish Balay     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
143606d414cSSatish Balay     ierr = PetscFree(send_status);CHKERRQ(ierr);
144a2d1c673SSatish Balay   }
145a2d1c673SSatish Balay 
146c0c58ca7SSatish Balay   /* Now update nmaxold to be app 10% more than max n used, this way the
147434d7ff9SSatish Balay      wastage of space is reduced the next time this stash is used.
148434d7ff9SSatish Balay      Also update the oldmax, only if it increases */
149b9b97703SBarry Smith   if (stash->n) {
15094b769a5SSatish Balay     bs2     = stash->bs*stash->bs;
1518a9378f0SSatish Balay     oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
152434d7ff9SSatish Balay     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
153b9b97703SBarry Smith   }
154434d7ff9SSatish Balay 
155d07ff455SSatish Balay   stash->nmax       = 0;
156d07ff455SSatish Balay   stash->n          = 0;
1574c1ff481SSatish Balay   stash->reallocs   = -1;
158a2d1c673SSatish Balay   stash->nprocessed = 0;
1598865f1eaSKarl Rupp 
1606bf464f9SBarry Smith   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
1618865f1eaSKarl Rupp 
16282740460SHong Zhang   stash->space = 0;
1638865f1eaSKarl Rupp 
164606d414cSSatish Balay   ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
165606d414cSSatish Balay   ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
166c05d87d6SBarry Smith   ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr);
167c05d87d6SBarry Smith   ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr);
168606d414cSSatish Balay   ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
169c05d87d6SBarry Smith   ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr);
170563fb871SSatish Balay   ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
1713a40ed3dSBarry Smith   PetscFunctionReturn(0);
1729417f4adSLois Curfman McInnes }
1739417f4adSLois Curfman McInnes 
1744c1ff481SSatish Balay /*
1758798bf22SSatish Balay    MatStashGetInfo_Private - Gets the relavant statistics of the stash
1764c1ff481SSatish Balay 
1774c1ff481SSatish Balay    Input Parameters:
1784c1ff481SSatish Balay    stash    - the stash
17994b769a5SSatish Balay    nstash   - the size of the stash. Indicates the number of values stored.
1804c1ff481SSatish Balay    reallocs - the number of additional mallocs incurred.
1814c1ff481SSatish Balay 
1824c1ff481SSatish Balay */
183c1ac3661SBarry Smith PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
18497530c3fSBarry Smith {
185c1ac3661SBarry Smith   PetscInt bs2 = stash->bs*stash->bs;
18694b769a5SSatish Balay 
1873a40ed3dSBarry Smith   PetscFunctionBegin;
1881ecfd215SBarry Smith   if (nstash) *nstash = stash->n*bs2;
1891ecfd215SBarry Smith   if (reallocs) {
190434d7ff9SSatish Balay     if (stash->reallocs < 0) *reallocs = 0;
191434d7ff9SSatish Balay     else                     *reallocs = stash->reallocs;
1921ecfd215SBarry Smith   }
193bc5ccf88SSatish Balay   PetscFunctionReturn(0);
194bc5ccf88SSatish Balay }
1954c1ff481SSatish Balay 
1964c1ff481SSatish Balay /*
1978798bf22SSatish Balay    MatStashSetInitialSize_Private - Sets the initial size of the stash
1984c1ff481SSatish Balay 
1994c1ff481SSatish Balay    Input Parameters:
2004c1ff481SSatish Balay    stash  - the stash
2014c1ff481SSatish Balay    max    - the value that is used as the max size of the stash.
2024c1ff481SSatish Balay             this value is used while allocating memory.
2034c1ff481SSatish Balay */
204c1ac3661SBarry Smith PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
205bc5ccf88SSatish Balay {
206bc5ccf88SSatish Balay   PetscFunctionBegin;
207434d7ff9SSatish Balay   stash->umax = max;
2083a40ed3dSBarry Smith   PetscFunctionReturn(0);
20997530c3fSBarry Smith }
21097530c3fSBarry Smith 
2118798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called
2124c1ff481SSatish Balay    when the space in the stash is not sufficient to add the new values
2134c1ff481SSatish Balay    being inserted into the stash.
2144c1ff481SSatish Balay 
2154c1ff481SSatish Balay    Input Parameters:
2164c1ff481SSatish Balay    stash - the stash
2174c1ff481SSatish Balay    incr  - the minimum increase requested
2184c1ff481SSatish Balay 
2194c1ff481SSatish Balay    Notes:
2204c1ff481SSatish Balay    This routine doubles the currently used memory.
2214c1ff481SSatish Balay  */
222c1ac3661SBarry Smith static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
2239417f4adSLois Curfman McInnes {
2246849ba73SBarry Smith   PetscErrorCode ierr;
2255bd3b8fbSHong Zhang   PetscInt       newnmax,bs2= stash->bs*stash->bs;
2269417f4adSLois Curfman McInnes 
2273a40ed3dSBarry Smith   PetscFunctionBegin;
2289417f4adSLois Curfman McInnes   /* allocate a larger stash */
229c481ceb5SSatish Balay   if (!stash->oldnmax && !stash->nmax) { /* new stash */
230434d7ff9SSatish Balay     if (stash->umax)                  newnmax = stash->umax/bs2;
231434d7ff9SSatish Balay     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
232c481ceb5SSatish Balay   } else if (!stash->nmax) { /* resuing stash */
233434d7ff9SSatish Balay     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
234434d7ff9SSatish Balay     else                              newnmax = stash->oldnmax/bs2;
235434d7ff9SSatish Balay   } else                              newnmax = stash->nmax*2;
2364c1ff481SSatish Balay   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
237d07ff455SSatish Balay 
23875cae7c1SHong Zhang   /* Get a MatStashSpace and attach it to stash */
23975cae7c1SHong Zhang   ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr);
240b087b6d6SSatish Balay   if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
241b087b6d6SSatish Balay     stash->space_head = stash->space;
24275cae7c1SHong Zhang   }
243b087b6d6SSatish Balay 
244bc5ccf88SSatish Balay   stash->reallocs++;
24575cae7c1SHong Zhang   stash->nmax = newnmax;
246bc5ccf88SSatish Balay   PetscFunctionReturn(0);
247bc5ccf88SSatish Balay }
248bc5ccf88SSatish Balay /*
2498798bf22SSatish Balay   MatStashValuesRow_Private - inserts values into the stash. This function
2504c1ff481SSatish Balay   expects the values to be roworiented. Multiple columns belong to the same row
2514c1ff481SSatish Balay   can be inserted with a single call to this function.
2524c1ff481SSatish Balay 
2534c1ff481SSatish Balay   Input Parameters:
2544c1ff481SSatish Balay   stash  - the stash
2554c1ff481SSatish Balay   row    - the global row correspoiding to the values
2564c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
2574c1ff481SSatish Balay   idxn   - the global column indices corresponding to each of the values.
2584c1ff481SSatish Balay   values - the values inserted
259bc5ccf88SSatish Balay */
260ace3abfcSBarry Smith PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries)
261bc5ccf88SSatish Balay {
262dfbe8321SBarry Smith   PetscErrorCode     ierr;
263b400d20cSBarry Smith   PetscInt           i,k,cnt = 0;
26475cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
265bc5ccf88SSatish Balay 
266bc5ccf88SSatish Balay   PetscFunctionBegin;
2674c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
26875cae7c1SHong Zhang   if (!space || space->local_remaining < n) {
2698798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
2709417f4adSLois Curfman McInnes   }
27175cae7c1SHong Zhang   space = stash->space;
27275cae7c1SHong Zhang   k     = space->local_used;
2734c1ff481SSatish Balay   for (i=0; i<n; i++) {
27488c3974fSBarry Smith     if (ignorezeroentries && (values[i] == 0.0)) continue;
27575cae7c1SHong Zhang     space->idx[k] = row;
27675cae7c1SHong Zhang     space->idy[k] = idxn[i];
27775cae7c1SHong Zhang     space->val[k] = values[i];
27875cae7c1SHong Zhang     k++;
279b400d20cSBarry Smith     cnt++;
2809417f4adSLois Curfman McInnes   }
281b400d20cSBarry Smith   stash->n               += cnt;
282b400d20cSBarry Smith   space->local_used      += cnt;
283b400d20cSBarry Smith   space->local_remaining -= cnt;
284a2d1c673SSatish Balay   PetscFunctionReturn(0);
285a2d1c673SSatish Balay }
28675cae7c1SHong Zhang 
2874c1ff481SSatish Balay /*
2888798bf22SSatish Balay   MatStashValuesCol_Private - inserts values into the stash. This function
2894c1ff481SSatish Balay   expects the values to be columnoriented. Multiple columns belong to the same row
2904c1ff481SSatish Balay   can be inserted with a single call to this function.
291a2d1c673SSatish Balay 
2924c1ff481SSatish Balay   Input Parameters:
2934c1ff481SSatish Balay   stash   - the stash
2944c1ff481SSatish Balay   row     - the global row correspoiding to the values
2954c1ff481SSatish Balay   n       - the number of elements inserted. All elements belong to the above row.
2964c1ff481SSatish Balay   idxn    - the global column indices corresponding to each of the values.
2974c1ff481SSatish Balay   values  - the values inserted
2984c1ff481SSatish Balay   stepval - the consecutive values are sepated by a distance of stepval.
2994c1ff481SSatish Balay             this happens because the input is columnoriented.
3004c1ff481SSatish Balay */
301ace3abfcSBarry Smith PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries)
302a2d1c673SSatish Balay {
303dfbe8321SBarry Smith   PetscErrorCode     ierr;
30450e9ab7cSBarry Smith   PetscInt           i,k,cnt = 0;
30575cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
306a2d1c673SSatish Balay 
3074c1ff481SSatish Balay   PetscFunctionBegin;
3084c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
30975cae7c1SHong Zhang   if (!space || space->local_remaining < n) {
3108798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
3114c1ff481SSatish Balay   }
31275cae7c1SHong Zhang   space = stash->space;
31375cae7c1SHong Zhang   k     = space->local_used;
3144c1ff481SSatish Balay   for (i=0; i<n; i++) {
31588c3974fSBarry Smith     if (ignorezeroentries && (values[i*stepval] == 0.0)) continue;
31675cae7c1SHong Zhang     space->idx[k] = row;
31775cae7c1SHong Zhang     space->idy[k] = idxn[i];
31875cae7c1SHong Zhang     space->val[k] = values[i*stepval];
31975cae7c1SHong Zhang     k++;
320b400d20cSBarry Smith     cnt++;
3214c1ff481SSatish Balay   }
322b400d20cSBarry Smith   stash->n               += cnt;
323b400d20cSBarry Smith   space->local_used      += cnt;
324b400d20cSBarry Smith   space->local_remaining -= cnt;
3254c1ff481SSatish Balay   PetscFunctionReturn(0);
3264c1ff481SSatish Balay }
3274c1ff481SSatish Balay 
3284c1ff481SSatish Balay /*
3298798bf22SSatish Balay   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
3304c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3314c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3324c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3334c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3344c1ff481SSatish Balay 
3354c1ff481SSatish Balay   Input Parameters:
3364c1ff481SSatish Balay   stash  - the stash
3374c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3384c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3394c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3404c1ff481SSatish Balay            values. Each block is of size bs*bs.
3414c1ff481SSatish Balay   values - the values inserted
3424c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3434c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3444c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3454c1ff481SSatish Balay */
34654f21887SBarry Smith PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
3474c1ff481SSatish Balay {
348dfbe8321SBarry Smith   PetscErrorCode     ierr;
34975cae7c1SHong Zhang   PetscInt           i,j,k,bs2,bs=stash->bs,l;
35054f21887SBarry Smith   const PetscScalar  *vals;
35154f21887SBarry Smith   PetscScalar        *array;
35275cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
353a2d1c673SSatish Balay 
354a2d1c673SSatish Balay   PetscFunctionBegin;
35575cae7c1SHong Zhang   if (!space || space->local_remaining < n) {
3568798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
357a2d1c673SSatish Balay   }
35875cae7c1SHong Zhang   space = stash->space;
35975cae7c1SHong Zhang   l     = space->local_used;
36075cae7c1SHong Zhang   bs2   = bs*bs;
3614c1ff481SSatish Balay   for (i=0; i<n; i++) {
36275cae7c1SHong Zhang     space->idx[l] = row;
36375cae7c1SHong Zhang     space->idy[l] = idxn[i];
36475cae7c1SHong Zhang     /* Now copy over the block of values. Store the values column oriented.
36575cae7c1SHong Zhang        This enables inserting multiple blocks belonging to a row with a single
36675cae7c1SHong Zhang        funtion call */
36775cae7c1SHong Zhang     array = space->val + bs2*l;
36875cae7c1SHong Zhang     vals  = values + idx*bs2*n + bs*i;
36975cae7c1SHong Zhang     for (j=0; j<bs; j++) {
37075cae7c1SHong Zhang       for (k=0; k<bs; k++) array[k*bs] = vals[k];
37175cae7c1SHong Zhang       array++;
37275cae7c1SHong Zhang       vals += cmax*bs;
37375cae7c1SHong Zhang     }
37475cae7c1SHong Zhang     l++;
375a2d1c673SSatish Balay   }
3765bd3b8fbSHong Zhang   stash->n               += n;
37775cae7c1SHong Zhang   space->local_used      += n;
37875cae7c1SHong Zhang   space->local_remaining -= n;
3794c1ff481SSatish Balay   PetscFunctionReturn(0);
3804c1ff481SSatish Balay }
3814c1ff481SSatish Balay 
3824c1ff481SSatish Balay /*
3838798bf22SSatish Balay   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
3844c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3854c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3864c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3874c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3884c1ff481SSatish Balay 
3894c1ff481SSatish Balay   Input Parameters:
3904c1ff481SSatish Balay   stash  - the stash
3914c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3924c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3934c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3944c1ff481SSatish Balay            values. Each block is of size bs*bs.
3954c1ff481SSatish Balay   values - the values inserted
3964c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3974c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3984c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3994c1ff481SSatish Balay */
40054f21887SBarry Smith PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
4014c1ff481SSatish Balay {
402dfbe8321SBarry Smith   PetscErrorCode     ierr;
40375cae7c1SHong Zhang   PetscInt           i,j,k,bs2,bs=stash->bs,l;
40454f21887SBarry Smith   const PetscScalar  *vals;
40554f21887SBarry Smith   PetscScalar        *array;
40675cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
4074c1ff481SSatish Balay 
4084c1ff481SSatish Balay   PetscFunctionBegin;
40975cae7c1SHong Zhang   if (!space || space->local_remaining < n) {
4108798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
4114c1ff481SSatish Balay   }
41275cae7c1SHong Zhang   space = stash->space;
41375cae7c1SHong Zhang   l     = space->local_used;
41475cae7c1SHong Zhang   bs2   = bs*bs;
4154c1ff481SSatish Balay   for (i=0; i<n; i++) {
41675cae7c1SHong Zhang     space->idx[l] = row;
41775cae7c1SHong Zhang     space->idy[l] = idxn[i];
41875cae7c1SHong Zhang     /* Now copy over the block of values. Store the values column oriented.
41975cae7c1SHong Zhang      This enables inserting multiple blocks belonging to a row with a single
42075cae7c1SHong Zhang      funtion call */
42175cae7c1SHong Zhang     array = space->val + bs2*l;
42275cae7c1SHong Zhang     vals  = values + idx*bs2*n + bs*i;
42375cae7c1SHong Zhang     for (j=0; j<bs; j++) {
4248865f1eaSKarl Rupp       for (k=0; k<bs; k++) array[k] = vals[k];
42575cae7c1SHong Zhang       array += bs;
42675cae7c1SHong Zhang       vals  += rmax*bs;
42775cae7c1SHong Zhang     }
4285bd3b8fbSHong Zhang     l++;
429a2d1c673SSatish Balay   }
4305bd3b8fbSHong Zhang   stash->n               += n;
43175cae7c1SHong Zhang   space->local_used      += n;
43275cae7c1SHong Zhang   space->local_remaining -= n;
4333a40ed3dSBarry Smith   PetscFunctionReturn(0);
4349417f4adSLois Curfman McInnes }
4354c1ff481SSatish Balay /*
4368798bf22SSatish Balay   MatStashScatterBegin_Private - Initiates the transfer of values to the
4374c1ff481SSatish Balay   correct owners. This function goes through the stash, and check the
4384c1ff481SSatish Balay   owners of each stashed value, and sends the values off to the owner
4394c1ff481SSatish Balay   processors.
440bc5ccf88SSatish Balay 
4414c1ff481SSatish Balay   Input Parameters:
4424c1ff481SSatish Balay   stash  - the stash
4434c1ff481SSatish Balay   owners - an array of size 'no-of-procs' which gives the ownership range
4444c1ff481SSatish Balay            for each node.
4454c1ff481SSatish Balay 
4464c1ff481SSatish Balay   Notes: The 'owners' array in the cased of the blocked-stash has the
4474c1ff481SSatish Balay   ranges specified blocked global indices, and for the regular stash in
4484c1ff481SSatish Balay   the proper global indices.
4494c1ff481SSatish Balay */
4501e2582c4SBarry Smith PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners)
451bc5ccf88SSatish Balay {
452ac2b2aa0SJed Brown   PetscErrorCode ierr;
453ac2b2aa0SJed Brown 
454ac2b2aa0SJed Brown   PetscFunctionBegin;
455ac2b2aa0SJed Brown   ierr = (*stash->ScatterBegin)(mat,stash,owners);CHKERRQ(ierr);
456ac2b2aa0SJed Brown   PetscFunctionReturn(0);
457ac2b2aa0SJed Brown }
458ac2b2aa0SJed Brown 
459ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterBegin_Ref(Mat mat,MatStash *stash,PetscInt *owners)
460ac2b2aa0SJed Brown {
461c1ac3661SBarry Smith   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
462fe09c992SBarry Smith   PetscInt           size=stash->size,nsends;
4636849ba73SBarry Smith   PetscErrorCode     ierr;
46475cae7c1SHong Zhang   PetscInt           count,*sindices,**rindices,i,j,idx,lastidx,l;
46554f21887SBarry Smith   PetscScalar        **rvalues,*svalues;
466bc5ccf88SSatish Balay   MPI_Comm           comm = stash->comm;
467563fb871SSatish Balay   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
46876ec1555SBarry Smith   PetscMPIInt        *sizes,*nlengths,nreceives;
4695bd3b8fbSHong Zhang   PetscInt           *sp_idx,*sp_idy;
47054f21887SBarry Smith   PetscScalar        *sp_val;
4715bd3b8fbSHong Zhang   PetscMatStashSpace space,space_next;
472bc5ccf88SSatish Balay 
473bc5ccf88SSatish Balay   PetscFunctionBegin;
4744b4eb8d3SJed Brown   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
4754b4eb8d3SJed Brown     InsertMode addv;
476b2566f29SBarry Smith     ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
4774b4eb8d3SJed Brown     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
4784b4eb8d3SJed Brown     mat->insertmode = addv; /* in case this processor had no cache */
4794b4eb8d3SJed Brown   }
4804b4eb8d3SJed Brown 
4814c1ff481SSatish Balay   bs2 = stash->bs*stash->bs;
48275cae7c1SHong Zhang 
483bc5ccf88SSatish Balay   /*  first count number of contributors to each processor */
484037dbc42SBarry Smith   ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr);
4851795a4d1SJed Brown   ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr);
486037dbc42SBarry Smith   ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr);
487a2d1c673SSatish Balay 
48875cae7c1SHong Zhang   i       = j    = 0;
4897357eb19SBarry Smith   lastidx = -1;
4905bd3b8fbSHong Zhang   space   = stash->space_head;
4916c4ed002SBarry Smith   while (space) {
49275cae7c1SHong Zhang     space_next = space->next;
4935bd3b8fbSHong Zhang     sp_idx     = space->idx;
49475cae7c1SHong Zhang     for (l=0; l<space->local_used; l++) {
4957357eb19SBarry Smith       /* if indices are NOT locally sorted, need to start search at the beginning */
4965bd3b8fbSHong Zhang       if (lastidx > (idx = sp_idx[l])) j = 0;
4977357eb19SBarry Smith       lastidx = idx;
4987357eb19SBarry Smith       for (; j<size; j++) {
4994c1ff481SSatish Balay         if (idx >= owners[j] && idx < owners[j+1]) {
500563fb871SSatish Balay           nlengths[j]++; owner[i] = j; break;
501bc5ccf88SSatish Balay         }
502bc5ccf88SSatish Balay       }
50375cae7c1SHong Zhang       i++;
50475cae7c1SHong Zhang     }
50575cae7c1SHong Zhang     space = space_next;
506bc5ccf88SSatish Balay   }
507563fb871SSatish Balay   /* Now check what procs get messages - and compute nsends. */
508563fb871SSatish Balay   for (i=0, nsends=0; i<size; i++) {
5098865f1eaSKarl Rupp     if (nlengths[i]) {
51076ec1555SBarry Smith       sizes[i] = 1; nsends++;
5118865f1eaSKarl Rupp     }
512563fb871SSatish Balay   }
513bc5ccf88SSatish Balay 
51454f21887SBarry Smith   {PetscMPIInt *onodes,*olengths;
515563fb871SSatish Balay    /* Determine the number of messages to expect, their lengths, from from-ids */
51676ec1555SBarry Smith    ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr);
517563fb871SSatish Balay    ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
518563fb871SSatish Balay    /* since clubbing row,col - lengths are multiplied by 2 */
519563fb871SSatish Balay    for (i=0; i<nreceives; i++) olengths[i] *=2;
520563fb871SSatish Balay    ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
521563fb871SSatish Balay    /* values are size 'bs2' lengths (and remove earlier factor 2 */
522563fb871SSatish Balay    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
523563fb871SSatish Balay    ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
524563fb871SSatish Balay    ierr = PetscFree(onodes);CHKERRQ(ierr);
5258865f1eaSKarl Rupp    ierr = PetscFree(olengths);CHKERRQ(ierr);}
526bc5ccf88SSatish Balay 
527bc5ccf88SSatish Balay   /* do sends:
528bc5ccf88SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
529bc5ccf88SSatish Balay          the ith processor
530bc5ccf88SSatish Balay   */
531dcca6d9dSJed Brown   ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr);
532785e854fSJed Brown   ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr);
533dcca6d9dSJed Brown   ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr);
534a2d1c673SSatish Balay   /* use 2 sends the first with all_a, the next with all_i and all_j */
535bc5ccf88SSatish Balay   startv[0] = 0; starti[0] = 0;
536bc5ccf88SSatish Balay   for (i=1; i<size; i++) {
537563fb871SSatish Balay     startv[i] = startv[i-1] + nlengths[i-1];
538533163c2SBarry Smith     starti[i] = starti[i-1] + 2*nlengths[i-1];
539bc5ccf88SSatish Balay   }
54075cae7c1SHong Zhang 
54175cae7c1SHong Zhang   i     = 0;
5425bd3b8fbSHong Zhang   space = stash->space_head;
5436c4ed002SBarry Smith   while (space) {
54475cae7c1SHong Zhang     space_next = space->next;
5455bd3b8fbSHong Zhang     sp_idx     = space->idx;
5465bd3b8fbSHong Zhang     sp_idy     = space->idy;
5475bd3b8fbSHong Zhang     sp_val     = space->val;
54875cae7c1SHong Zhang     for (l=0; l<space->local_used; l++) {
549bc5ccf88SSatish Balay       j = owner[i];
550a2d1c673SSatish Balay       if (bs2 == 1) {
5515bd3b8fbSHong Zhang         svalues[startv[j]] = sp_val[l];
552a2d1c673SSatish Balay       } else {
553c1ac3661SBarry Smith         PetscInt    k;
55454f21887SBarry Smith         PetscScalar *buf1,*buf2;
5554c1ff481SSatish Balay         buf1 = svalues+bs2*startv[j];
556b087b6d6SSatish Balay         buf2 = space->val + bs2*l;
5578865f1eaSKarl Rupp         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
558a2d1c673SSatish Balay       }
5595bd3b8fbSHong Zhang       sindices[starti[j]]             = sp_idx[l];
5605bd3b8fbSHong Zhang       sindices[starti[j]+nlengths[j]] = sp_idy[l];
561bc5ccf88SSatish Balay       startv[j]++;
562bc5ccf88SSatish Balay       starti[j]++;
56375cae7c1SHong Zhang       i++;
56475cae7c1SHong Zhang     }
56575cae7c1SHong Zhang     space = space_next;
566bc5ccf88SSatish Balay   }
567bc5ccf88SSatish Balay   startv[0] = 0;
5688865f1eaSKarl Rupp   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
569e5d0e772SSatish Balay 
570bc5ccf88SSatish Balay   for (i=0,count=0; i<size; i++) {
57176ec1555SBarry Smith     if (sizes[i]) {
572563fb871SSatish Balay       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
573a77337e4SBarry Smith       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
574bc5ccf88SSatish Balay     }
575b85c94c3SSatish Balay   }
5766cf91177SBarry Smith #if defined(PETSC_USE_INFO)
57793157e10SBarry Smith   ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr);
578e5d0e772SSatish Balay   for (i=0; i<size; i++) {
57976ec1555SBarry Smith     if (sizes[i]) {
58030c47e72SSatish Balay       ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
581e5d0e772SSatish Balay     }
582e5d0e772SSatish Balay   }
583e5d0e772SSatish Balay #endif
584c05d87d6SBarry Smith   ierr = PetscFree(nlengths);CHKERRQ(ierr);
585606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
586c05d87d6SBarry Smith   ierr = PetscFree2(startv,starti);CHKERRQ(ierr);
58776ec1555SBarry Smith   ierr = PetscFree(sizes);CHKERRQ(ierr);
588a2d1c673SSatish Balay 
589563fb871SSatish Balay   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
590785e854fSJed Brown   ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr);
591563fb871SSatish Balay 
592563fb871SSatish Balay   for (i=0; i<nreceives; i++) {
593563fb871SSatish Balay     recv_waits[2*i]   = recv_waits1[i];
594563fb871SSatish Balay     recv_waits[2*i+1] = recv_waits2[i];
595563fb871SSatish Balay   }
596563fb871SSatish Balay   stash->recv_waits = recv_waits;
5978865f1eaSKarl Rupp 
598563fb871SSatish Balay   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
599563fb871SSatish Balay   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
600563fb871SSatish Balay 
601c05d87d6SBarry Smith   stash->svalues         = svalues;
602c05d87d6SBarry Smith   stash->sindices        = sindices;
603c05d87d6SBarry Smith   stash->rvalues         = rvalues;
604c05d87d6SBarry Smith   stash->rindices        = rindices;
605c05d87d6SBarry Smith   stash->send_waits      = send_waits;
606c05d87d6SBarry Smith   stash->nsends          = nsends;
607c05d87d6SBarry Smith   stash->nrecvs          = nreceives;
60867318a8aSJed Brown   stash->reproduce_count = 0;
609bc5ccf88SSatish Balay   PetscFunctionReturn(0);
610bc5ccf88SSatish Balay }
611bc5ccf88SSatish Balay 
612a2d1c673SSatish Balay /*
6138798bf22SSatish Balay    MatStashScatterGetMesg_Private - This function waits on the receives posted
6148798bf22SSatish Balay    in the function MatStashScatterBegin_Private() and returns one message at
6154c1ff481SSatish Balay    a time to the calling function. If no messages are left, it indicates this
6164c1ff481SSatish Balay    by setting flg = 0, else it sets flg = 1.
6174c1ff481SSatish Balay 
6184c1ff481SSatish Balay    Input Parameters:
6194c1ff481SSatish Balay    stash - the stash
6204c1ff481SSatish Balay 
6214c1ff481SSatish Balay    Output Parameters:
6224c1ff481SSatish Balay    nvals - the number of entries in the current message.
6234c1ff481SSatish Balay    rows  - an array of row indices (or blocked indices) corresponding to the values
6244c1ff481SSatish Balay    cols  - an array of columnindices (or blocked indices) corresponding to the values
6254c1ff481SSatish Balay    vals  - the values
6264c1ff481SSatish Balay    flg   - 0 indicates no more message left, and the current call has no values associated.
6274c1ff481SSatish Balay            1 indicates that the current call successfully received a message, and the
6284c1ff481SSatish Balay              other output parameters nvals,rows,cols,vals are set appropriately.
629a2d1c673SSatish Balay */
63054f21887SBarry Smith PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
631bc5ccf88SSatish Balay {
6326849ba73SBarry Smith   PetscErrorCode ierr;
633ac2b2aa0SJed Brown 
634ac2b2aa0SJed Brown   PetscFunctionBegin;
635ac2b2aa0SJed Brown   ierr = (*stash->ScatterGetMesg)(stash,nvals,rows,cols,vals,flg);CHKERRQ(ierr);
636ac2b2aa0SJed Brown   PetscFunctionReturn(0);
637ac2b2aa0SJed Brown }
638ac2b2aa0SJed Brown 
639ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
640ac2b2aa0SJed Brown {
641ac2b2aa0SJed Brown   PetscErrorCode ierr;
642533163c2SBarry Smith   PetscMPIInt    i,*flg_v = stash->flg_v,i1,i2;
643fe09c992SBarry Smith   PetscInt       bs2;
644a2d1c673SSatish Balay   MPI_Status     recv_status;
645ace3abfcSBarry Smith   PetscBool      match_found = PETSC_FALSE;
646bc5ccf88SSatish Balay 
647bc5ccf88SSatish Balay   PetscFunctionBegin;
648a2d1c673SSatish Balay   *flg = 0; /* When a message is discovered this is reset to 1 */
649a2d1c673SSatish Balay   /* Return if no more messages to process */
6508865f1eaSKarl Rupp   if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(0);
651a2d1c673SSatish Balay 
6524c1ff481SSatish Balay   bs2 = stash->bs*stash->bs;
65367318a8aSJed Brown   /* If a matching pair of receives are found, process them, and return the data to
654a2d1c673SSatish Balay      the calling function. Until then keep receiving messages */
655a2d1c673SSatish Balay   while (!match_found) {
65667318a8aSJed Brown     if (stash->reproduce) {
65767318a8aSJed Brown       i    = stash->reproduce_count++;
65867318a8aSJed Brown       ierr = MPI_Wait(stash->recv_waits+i,&recv_status);CHKERRQ(ierr);
65967318a8aSJed Brown     } else {
660a2d1c673SSatish Balay       ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
66167318a8aSJed Brown     }
662e32f2f54SBarry Smith     if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!");
663533163c2SBarry Smith 
66467318a8aSJed Brown     /* Now pack the received message into a structure which is usable by others */
665a2d1c673SSatish Balay     if (i % 2) {
666a77337e4SBarry Smith       ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr);
6678865f1eaSKarl Rupp 
668c1dc657dSBarry Smith       flg_v[2*recv_status.MPI_SOURCE] = i/2;
6698865f1eaSKarl Rupp 
670a2d1c673SSatish Balay       *nvals = *nvals/bs2;
671563fb871SSatish Balay     } else {
672563fb871SSatish Balay       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
6738865f1eaSKarl Rupp 
674563fb871SSatish Balay       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
6758865f1eaSKarl Rupp 
676563fb871SSatish Balay       *nvals = *nvals/2; /* This message has both row indices and col indices */
677bc5ccf88SSatish Balay     }
678a2d1c673SSatish Balay 
679cb2b73ccSBarry Smith     /* Check if we have both messages from this proc */
680c1dc657dSBarry Smith     i1 = flg_v[2*recv_status.MPI_SOURCE];
681c1dc657dSBarry Smith     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
682a2d1c673SSatish Balay     if (i1 != -1 && i2 != -1) {
683563fb871SSatish Balay       *rows = stash->rindices[i2];
684a2d1c673SSatish Balay       *cols = *rows + *nvals;
685563fb871SSatish Balay       *vals = stash->rvalues[i1];
686a2d1c673SSatish Balay       *flg  = 1;
687a2d1c673SSatish Balay       stash->nprocessed++;
68835d8aa7fSBarry Smith       match_found = PETSC_TRUE;
689bc5ccf88SSatish Balay     }
690bc5ccf88SSatish Balay   }
691bc5ccf88SSatish Balay   PetscFunctionReturn(0);
692bc5ccf88SSatish Balay }
693d7d60843SJed Brown 
694d7d60843SJed Brown typedef struct {
695d7d60843SJed Brown   PetscInt row;
696d7d60843SJed Brown   PetscInt col;
697d7d60843SJed Brown   PetscScalar vals[1];          /* Actually an array of length bs2 */
698d7d60843SJed Brown } MatStashBlock;
699d7d60843SJed Brown 
700d7d60843SJed Brown static PetscErrorCode MatStashSortCompress_Private(MatStash *stash,InsertMode insertmode)
701d7d60843SJed Brown {
702d7d60843SJed Brown   PetscErrorCode ierr;
703d7d60843SJed Brown   PetscMatStashSpace space;
704d7d60843SJed Brown   PetscInt n = stash->n,bs = stash->bs,bs2 = bs*bs,cnt,*row,*col,*perm,rowstart,i;
705d7d60843SJed Brown   PetscScalar **valptr;
706d7d60843SJed Brown 
707d7d60843SJed Brown   PetscFunctionBegin;
708d7d60843SJed Brown   ierr = PetscMalloc4(n,&row,n,&col,n,&valptr,n,&perm);CHKERRQ(ierr);
709d7d60843SJed Brown   for (space=stash->space_head,cnt=0; space; space=space->next) {
710d7d60843SJed Brown     for (i=0; i<space->local_used; i++) {
711d7d60843SJed Brown       row[cnt] = space->idx[i];
712d7d60843SJed Brown       col[cnt] = space->idy[i];
713d7d60843SJed Brown       valptr[cnt] = &space->val[i*bs2];
714d7d60843SJed Brown       perm[cnt] = cnt;          /* Will tell us where to find valptr after sorting row[] and col[] */
715d7d60843SJed Brown       cnt++;
716d7d60843SJed Brown     }
717d7d60843SJed Brown   }
718d7d60843SJed Brown   if (cnt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatStash n %D, but counted %D entries",n,cnt);
719d7d60843SJed Brown   ierr = PetscSortIntWithArrayPair(n,row,col,perm);CHKERRQ(ierr);
720d7d60843SJed Brown   /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */
721d7d60843SJed Brown   for (rowstart=0,cnt=0,i=1; i<=n; i++) {
722d7d60843SJed Brown     if (i == n || row[i] != row[rowstart]) {         /* Sort the last row. */
723d7d60843SJed Brown       PetscInt colstart;
724d7d60843SJed Brown       ierr = PetscSortIntWithArray(i-rowstart,&col[rowstart],&perm[rowstart]);CHKERRQ(ierr);
725d7d60843SJed Brown       for (colstart=rowstart; colstart<i; ) { /* Compress multiple insertions to the same location */
726d7d60843SJed Brown         PetscInt j,l;
727d7d60843SJed Brown         MatStashBlock *block;
728d7d60843SJed Brown         ierr = PetscSegBufferGet(stash->segsendblocks,1,&block);CHKERRQ(ierr);
729d7d60843SJed Brown         block->row = row[rowstart];
730d7d60843SJed Brown         block->col = col[colstart];
731d7d60843SJed Brown         ierr = PetscMemcpy(block->vals,valptr[perm[colstart]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr);
732d7d60843SJed Brown         for (j=colstart+1; j<i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */
733d7d60843SJed Brown           if (insertmode == ADD_VALUES) {
734d7d60843SJed Brown             for (l=0; l<bs2; l++) block->vals[l] += valptr[perm[j]][l];
735d7d60843SJed Brown           } else {
736d7d60843SJed Brown             ierr = PetscMemcpy(block->vals,valptr[perm[j]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr);
737d7d60843SJed Brown           }
738d7d60843SJed Brown         }
739d7d60843SJed Brown         colstart = j;
740d7d60843SJed Brown       }
741d7d60843SJed Brown       rowstart = i;
742d7d60843SJed Brown     }
743d7d60843SJed Brown   }
744d7d60843SJed Brown   ierr = PetscFree4(row,col,valptr,perm);CHKERRQ(ierr);
745d7d60843SJed Brown   PetscFunctionReturn(0);
746d7d60843SJed Brown }
747d7d60843SJed Brown 
748d7d60843SJed Brown static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash)
749d7d60843SJed Brown {
750d7d60843SJed Brown   PetscErrorCode ierr;
751d7d60843SJed Brown 
752d7d60843SJed Brown   PetscFunctionBegin;
753d7d60843SJed Brown   if (stash->blocktype == MPI_DATATYPE_NULL) {
754d7d60843SJed Brown     PetscInt     bs2 = PetscSqr(stash->bs);
755d7d60843SJed Brown     PetscMPIInt  blocklens[2];
756d7d60843SJed Brown     MPI_Aint     displs[2];
757d7d60843SJed Brown     MPI_Datatype types[2],stype;
7589503c6c6SJed Brown     /* C++ std::complex is not my favorite datatype.  Since it is not POD, we cannot use offsetof to find the offset of
7599503c6c6SJed Brown      * vals.  But the layout is actually guaranteed by the standard, so we do a little dance here with struct
7609503c6c6SJed Brown      * DummyBlock, substituting PetscReal for PetscComplex so that we can determine the offset.
7619503c6c6SJed Brown      */
7629503c6c6SJed Brown     struct DummyBlock {PetscInt row,col; PetscReal vals;};
763d7d60843SJed Brown 
7649503c6c6SJed Brown     stash->blocktype_size = offsetof(struct DummyBlock,vals) + bs2*sizeof(PetscScalar);
765d7d60843SJed Brown     if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */
766d7d60843SJed Brown       stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt);
767d7d60843SJed Brown     }
768d7d60843SJed Brown     ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segsendblocks);CHKERRQ(ierr);
769d7d60843SJed Brown     ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segrecvblocks);CHKERRQ(ierr);
770d7d60843SJed Brown     ierr = PetscSegBufferCreate(sizeof(MatStashFrame),1,&stash->segrecvframe);CHKERRQ(ierr);
771d7d60843SJed Brown     blocklens[0] = 2;
772d7d60843SJed Brown     blocklens[1] = bs2;
7739503c6c6SJed Brown     displs[0] = offsetof(struct DummyBlock,row);
7749503c6c6SJed Brown     displs[1] = offsetof(struct DummyBlock,vals);
775d7d60843SJed Brown     types[0] = MPIU_INT;
776d7d60843SJed Brown     types[1] = MPIU_SCALAR;
777d7d60843SJed Brown     ierr = MPI_Type_create_struct(2,blocklens,displs,types,&stype);CHKERRQ(ierr);
778d7d60843SJed Brown     ierr = MPI_Type_commit(&stype);CHKERRQ(ierr);
779d7d60843SJed Brown     ierr = MPI_Type_create_resized(stype,0,stash->blocktype_size,&stash->blocktype);CHKERRQ(ierr); /* MPI-2 */
780d7d60843SJed Brown     ierr = MPI_Type_commit(&stash->blocktype);CHKERRQ(ierr);
781d7d60843SJed Brown     ierr = MPI_Type_free(&stype);CHKERRQ(ierr);
782d7d60843SJed Brown   }
783d7d60843SJed Brown   PetscFunctionReturn(0);
784d7d60843SJed Brown }
785d7d60843SJed Brown 
786d7d60843SJed Brown /* Callback invoked after target rank has initiatied receive of rendezvous message.
787d7d60843SJed Brown  * Here we post the main sends.
788d7d60843SJed Brown  */
789d7d60843SJed Brown static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rankid,PetscMPIInt rank,void *sdata,MPI_Request req[],void *ctx)
790d7d60843SJed Brown {
791d7d60843SJed Brown   MatStash *stash = (MatStash*)ctx;
792d7d60843SJed Brown   MatStashHeader *hdr = (MatStashHeader*)sdata;
793d7d60843SJed Brown   PetscErrorCode ierr;
794d7d60843SJed Brown 
795d7d60843SJed Brown   PetscFunctionBegin;
796d7d60843SJed Brown   if (rank != stash->sendranks[rankid]) SETERRQ3(comm,PETSC_ERR_PLIB,"BTS Send rank %d does not match sendranks[%d] %d",rank,rankid,stash->sendranks[rankid]);
797d7d60843SJed Brown   ierr = MPI_Isend(stash->sendframes[rankid].buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr);
798d7d60843SJed Brown   stash->sendframes[rankid].count = hdr->count;
799d7d60843SJed Brown   stash->sendframes[rankid].pending = 1;
800d7d60843SJed Brown   PetscFunctionReturn(0);
801d7d60843SJed Brown }
802d7d60843SJed Brown 
803d7d60843SJed Brown /* Callback invoked by target after receiving rendezvous message.
804d7d60843SJed Brown  * Here we post the main recvs.
805d7d60843SJed Brown  */
806d7d60843SJed Brown static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rank,void *rdata,MPI_Request req[],void *ctx)
807d7d60843SJed Brown {
808d7d60843SJed Brown   MatStash *stash = (MatStash*)ctx;
809d7d60843SJed Brown   MatStashHeader *hdr = (MatStashHeader*)rdata;
810d7d60843SJed Brown   MatStashFrame *frame;
811d7d60843SJed Brown   PetscErrorCode ierr;
812d7d60843SJed Brown 
813d7d60843SJed Brown   PetscFunctionBegin;
814d7d60843SJed Brown   ierr = PetscSegBufferGet(stash->segrecvframe,1,&frame);CHKERRQ(ierr);
815d7d60843SJed Brown   ierr = PetscSegBufferGet(stash->segrecvblocks,hdr->count,&frame->buffer);CHKERRQ(ierr);
816d7d60843SJed Brown   ierr = MPI_Irecv(frame->buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr);
817d7d60843SJed Brown   frame->count = hdr->count;
818d7d60843SJed Brown   frame->pending = 1;
819d7d60843SJed Brown   PetscFunctionReturn(0);
820d7d60843SJed Brown }
821d7d60843SJed Brown 
822d7d60843SJed Brown /*
823d7d60843SJed Brown  * owners[] contains the ownership ranges; may be indexed by either blocks or scalars
824d7d60843SJed Brown  */
825d7d60843SJed Brown static PetscErrorCode MatStashScatterBegin_BTS(Mat mat,MatStash *stash,PetscInt owners[])
826d7d60843SJed Brown {
827d7d60843SJed Brown   PetscErrorCode ierr;
828d7d60843SJed Brown   size_t nblocks;
829d7d60843SJed Brown   char *sendblocks;
830d7d60843SJed Brown 
831d7d60843SJed Brown   PetscFunctionBegin;
8324b4eb8d3SJed Brown #if defined(PETSC_USE_DEBUG)
8334b4eb8d3SJed Brown   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
8344b4eb8d3SJed Brown     InsertMode addv;
835b2566f29SBarry Smith     ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
8364b4eb8d3SJed Brown     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
8374b4eb8d3SJed Brown   }
8384b4eb8d3SJed Brown #endif
8394b4eb8d3SJed Brown 
84097da8949SJed Brown   if (stash->subset_off_proc && !mat->subsetoffprocentries) { /* We won't use the old scatter context. */
84197da8949SJed Brown     ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr);
84297da8949SJed Brown   }
84397da8949SJed Brown 
844d7d60843SJed Brown   ierr = MatStashBlockTypeSetUp(stash);CHKERRQ(ierr);
845d7d60843SJed Brown   ierr = MatStashSortCompress_Private(stash,mat->insertmode);CHKERRQ(ierr);
846d7d60843SJed Brown   ierr = PetscSegBufferGetSize(stash->segsendblocks,&nblocks);CHKERRQ(ierr);
847d7d60843SJed Brown   ierr = PetscSegBufferExtractInPlace(stash->segsendblocks,&sendblocks);CHKERRQ(ierr);
84897da8949SJed Brown   if (stash->subset_off_proc && mat->subsetoffprocentries) { /* Set up sendhdrs and sendframes for each rank that we sent before */
84923b7d1baSJed Brown     PetscInt i;
85023b7d1baSJed Brown     size_t b;
85197da8949SJed Brown     for (i=0,b=0; i<stash->nsendranks; i++) {
85297da8949SJed Brown       stash->sendframes[i].buffer = &sendblocks[b*stash->blocktype_size];
85397da8949SJed Brown       /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */
85497da8949SJed Brown       stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */
85597da8949SJed Brown       for ( ; b<nblocks; b++) {
85697da8949SJed Brown         MatStashBlock *sendblock_b = (MatStashBlock*)&sendblocks[b*stash->blocktype_size];
85797da8949SJed Brown         if (PetscUnlikely(sendblock_b->row < owners[stash->sendranks[i]])) SETERRQ2(stash->comm,PETSC_ERR_ARG_WRONG,"MAT_SUBSET_OFF_PROC_ENTRIES set, but row %D owned by %d not communicated in initial assembly",sendblock_b->row,stash->sendranks[i]);
85897da8949SJed Brown         if (sendblock_b->row >= owners[stash->sendranks[i]+1]) break;
85997da8949SJed Brown         stash->sendhdr[i].count++;
86097da8949SJed Brown       }
86197da8949SJed Brown     }
86297da8949SJed Brown   } else {                      /* Dynamically count and pack (first time) */
86323b7d1baSJed Brown     PetscInt sendno;
86423b7d1baSJed Brown     size_t i,rowstart;
865d7d60843SJed Brown 
866d7d60843SJed Brown     /* Count number of send ranks and allocate for sends */
867d7d60843SJed Brown     stash->nsendranks = 0;
868d7d60843SJed Brown     for (rowstart=0; rowstart<nblocks; ) {
8697e2ea869SJed Brown       PetscInt owner;
870d7d60843SJed Brown       MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
871d7d60843SJed Brown       ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr);
872d7d60843SJed Brown       if (owner < 0) owner = -(owner+2);
873d7d60843SJed Brown       for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
874d7d60843SJed Brown         MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
8757e2ea869SJed Brown         if (sendblock_i->row >= owners[owner+1]) break;
876d7d60843SJed Brown       }
877d7d60843SJed Brown       stash->nsendranks++;
878d7d60843SJed Brown       rowstart = i;
879d7d60843SJed Brown     }
880d7d60843SJed Brown     ierr = PetscMalloc3(stash->nsendranks,&stash->sendranks,stash->nsendranks,&stash->sendhdr,stash->nsendranks,&stash->sendframes);CHKERRQ(ierr);
881d7d60843SJed Brown 
882d7d60843SJed Brown     /* Set up sendhdrs and sendframes */
883d7d60843SJed Brown     sendno = 0;
884d7d60843SJed Brown     for (rowstart=0; rowstart<nblocks; ) {
885d7d60843SJed Brown       PetscInt owner;
886d7d60843SJed Brown       MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
887d7d60843SJed Brown       ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr);
888d7d60843SJed Brown       if (owner < 0) owner = -(owner+2);
889d7d60843SJed Brown       stash->sendranks[sendno] = owner;
890d7d60843SJed Brown       for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
891d7d60843SJed Brown         MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
8927e2ea869SJed Brown         if (sendblock_i->row >= owners[owner+1]) break;
893d7d60843SJed Brown       }
894d7d60843SJed Brown       stash->sendframes[sendno].buffer = sendblock_rowstart;
895d7d60843SJed Brown       stash->sendframes[sendno].pending = 0;
896d7d60843SJed Brown       stash->sendhdr[sendno].count = i - rowstart;
897d7d60843SJed Brown       sendno++;
898d7d60843SJed Brown       rowstart = i;
899d7d60843SJed Brown     }
900d7d60843SJed Brown     if (sendno != stash->nsendranks) SETERRQ2(stash->comm,PETSC_ERR_PLIB,"BTS counted %D sendranks, but %D sends",stash->nsendranks,sendno);
901d7d60843SJed Brown   }
902d7d60843SJed Brown 
9034b4eb8d3SJed Brown   /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new
9044b4eb8d3SJed Brown    * message or a dummy entry of some sort. */
9054b4eb8d3SJed Brown   if (mat->insertmode == INSERT_VALUES) {
90623b7d1baSJed Brown     size_t i;
9074b4eb8d3SJed Brown     for (i=0; i<nblocks; i++) {
9084b4eb8d3SJed Brown       MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
9094b4eb8d3SJed Brown       sendblock_i->row = -(sendblock_i->row+1);
9104b4eb8d3SJed Brown     }
9114b4eb8d3SJed Brown   }
9124b4eb8d3SJed Brown 
91397da8949SJed Brown   if (stash->subset_off_proc && mat->subsetoffprocentries) {
91497da8949SJed Brown     PetscMPIInt i,tag;
91597da8949SJed Brown     ierr = PetscCommGetNewTag(stash->comm,&tag);CHKERRQ(ierr);
91697da8949SJed Brown     for (i=0; i<stash->nrecvranks; i++) {
91797da8949SJed Brown       ierr = MatStashBTSRecv_Private(stash->comm,&tag,stash->recvranks[i],&stash->recvhdr[i],&stash->recvreqs[i],stash);CHKERRQ(ierr);
91897da8949SJed Brown     }
91997da8949SJed Brown     for (i=0; i<stash->nsendranks; i++) {
92097da8949SJed Brown       ierr = MatStashBTSSend_Private(stash->comm,&tag,i,stash->sendranks[i],&stash->sendhdr[i],&stash->sendreqs[i],stash);CHKERRQ(ierr);
92197da8949SJed Brown     }
92297da8949SJed Brown     stash->use_status = PETSC_TRUE; /* Use count from message status. */
92397da8949SJed Brown   } else {
924e0ddb6e8SJed Brown     ierr = PetscCommBuildTwoSidedFReq(stash->comm,1,MPIU_INT,stash->nsendranks,stash->sendranks,(PetscInt*)stash->sendhdr,
925e0ddb6e8SJed Brown                                       &stash->nrecvranks,&stash->recvranks,(PetscInt*)&stash->recvhdr,1,&stash->sendreqs,&stash->recvreqs,
926d7d60843SJed Brown                                       MatStashBTSSend_Private,MatStashBTSRecv_Private,stash);CHKERRQ(ierr);
927b5ddc6f1SJed Brown     ierr = PetscMalloc2(stash->nrecvranks,&stash->some_indices,stash->nrecvranks,&stash->some_statuses);CHKERRQ(ierr);
92897da8949SJed Brown     stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */
92997da8949SJed Brown   }
930d7d60843SJed Brown 
931d7d60843SJed Brown   ierr = PetscSegBufferExtractInPlace(stash->segrecvframe,&stash->recvframes);CHKERRQ(ierr);
932d7d60843SJed Brown   stash->recvframe_active = NULL;
933d7d60843SJed Brown   stash->recvframe_i      = 0;
934d7d60843SJed Brown   stash->some_i           = 0;
935d7d60843SJed Brown   stash->some_count       = 0;
936d7d60843SJed Brown   stash->recvcount        = 0;
93797da8949SJed Brown   stash->subset_off_proc  = mat->subsetoffprocentries;
9384b4eb8d3SJed Brown   stash->insertmode       = &mat->insertmode;
939d7d60843SJed Brown   PetscFunctionReturn(0);
940d7d60843SJed Brown }
941d7d60843SJed Brown 
942d7d60843SJed Brown static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash,PetscMPIInt *n,PetscInt **row,PetscInt **col,PetscScalar **val,PetscInt *flg)
943d7d60843SJed Brown {
944d7d60843SJed Brown   PetscErrorCode ierr;
945d7d60843SJed Brown   MatStashBlock *block;
946d7d60843SJed Brown 
947d7d60843SJed Brown   PetscFunctionBegin;
948d7d60843SJed Brown   *flg = 0;
949d7d60843SJed Brown   while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) {
950d7d60843SJed Brown     if (stash->some_i == stash->some_count) {
951d7d60843SJed Brown       if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(0); /* Done */
952d7d60843SJed Brown       ierr = MPI_Waitsome(stash->nrecvranks,stash->recvreqs,&stash->some_count,stash->some_indices,stash->use_status?stash->some_statuses:MPI_STATUSES_IGNORE);CHKERRQ(ierr);
953d7d60843SJed Brown       stash->some_i = 0;
954d7d60843SJed Brown     }
955d7d60843SJed Brown     stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]];
956d7d60843SJed Brown     stash->recvframe_count = stash->recvframe_active->count; /* From header; maximum count */
957d7d60843SJed Brown     if (stash->use_status) { /* Count what was actually sent */
958d7d60843SJed Brown       ierr = MPI_Get_count(&stash->some_statuses[stash->some_i],stash->blocktype,&stash->recvframe_count);CHKERRQ(ierr);
959d7d60843SJed Brown     }
9604b4eb8d3SJed Brown     if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */
9614b4eb8d3SJed Brown       block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[0];
9624b4eb8d3SJed Brown       if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES;
9634b4eb8d3SJed Brown       if (PetscUnlikely(*stash->insertmode == INSERT_VALUES && block->row >= 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling INSERT_VALUES, but rank %d requested ADD_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]);
9644b4eb8d3SJed Brown       if (PetscUnlikely(*stash->insertmode == ADD_VALUES && block->row < 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling ADD_VALUES, but rank %d requested INSERT_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]);
9654b4eb8d3SJed Brown     }
966d7d60843SJed Brown     stash->some_i++;
967d7d60843SJed Brown     stash->recvcount++;
968d7d60843SJed Brown     stash->recvframe_i = 0;
969d7d60843SJed Brown   }
970d7d60843SJed Brown   *n = 1;
971d7d60843SJed Brown   block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[stash->recvframe_i*stash->blocktype_size];
9724b4eb8d3SJed Brown   if (block->row < 0) block->row = -(block->row + 1);
973d7d60843SJed Brown   *row = &block->row;
974d7d60843SJed Brown   *col = &block->col;
975d7d60843SJed Brown   *val = block->vals;
976d7d60843SJed Brown   stash->recvframe_i++;
977d7d60843SJed Brown   *flg = 1;
978d7d60843SJed Brown   PetscFunctionReturn(0);
979d7d60843SJed Brown }
980d7d60843SJed Brown 
981d7d60843SJed Brown static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash)
982d7d60843SJed Brown {
983d7d60843SJed Brown   PetscErrorCode ierr;
984d7d60843SJed Brown 
985d7d60843SJed Brown   PetscFunctionBegin;
986d7d60843SJed Brown   ierr = MPI_Waitall(stash->nsendranks,stash->sendreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
9873575f486SJed Brown   if (stash->subset_off_proc) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks  */
9883575f486SJed Brown     void *dummy;
9893575f486SJed Brown     ierr = PetscSegBufferExtractInPlace(stash->segrecvblocks,&dummy);CHKERRQ(ierr);
9903575f486SJed Brown   } else {                      /* No reuse, so collect everything. */
991d7d60843SJed Brown     ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr);
99297da8949SJed Brown   }
993d7d60843SJed Brown 
994d7d60843SJed Brown   /* Now update nmaxold to be app 10% more than max n used, this way the
995d7d60843SJed Brown      wastage of space is reduced the next time this stash is used.
996d7d60843SJed Brown      Also update the oldmax, only if it increases */
997d7d60843SJed Brown   if (stash->n) {
998d7d60843SJed Brown     PetscInt bs2     = stash->bs*stash->bs;
999d7d60843SJed Brown     PetscInt oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
1000d7d60843SJed Brown     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
1001d7d60843SJed Brown   }
1002d7d60843SJed Brown 
1003d7d60843SJed Brown   stash->nmax       = 0;
1004d7d60843SJed Brown   stash->n          = 0;
1005d7d60843SJed Brown   stash->reallocs   = -1;
1006d7d60843SJed Brown   stash->nprocessed = 0;
1007d7d60843SJed Brown 
1008d7d60843SJed Brown   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
1009d7d60843SJed Brown 
1010d7d60843SJed Brown   stash->space = 0;
1011d7d60843SJed Brown 
1012d7d60843SJed Brown   PetscFunctionReturn(0);
1013d7d60843SJed Brown }
1014d7d60843SJed Brown 
1015d7d60843SJed Brown static PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash)
1016d7d60843SJed Brown {
1017d7d60843SJed Brown   PetscErrorCode ierr;
1018d7d60843SJed Brown 
1019d7d60843SJed Brown   PetscFunctionBegin;
1020d7d60843SJed Brown   ierr = PetscSegBufferDestroy(&stash->segsendblocks);CHKERRQ(ierr);
1021d7d60843SJed Brown   ierr = PetscSegBufferDestroy(&stash->segrecvframe);CHKERRQ(ierr);
1022d7d60843SJed Brown   stash->recvframes = NULL;
1023d7d60843SJed Brown   ierr = PetscSegBufferDestroy(&stash->segrecvblocks);CHKERRQ(ierr);
1024d7d60843SJed Brown   if (stash->blocktype != MPI_DATATYPE_NULL) {
1025d7d60843SJed Brown     ierr = MPI_Type_free(&stash->blocktype);CHKERRQ(ierr);
1026d7d60843SJed Brown   }
1027d7d60843SJed Brown   stash->nsendranks = 0;
1028d7d60843SJed Brown   stash->nrecvranks = 0;
1029d7d60843SJed Brown   ierr = PetscFree3(stash->sendranks,stash->sendhdr,stash->sendframes);CHKERRQ(ierr);
1030d7d60843SJed Brown   ierr = PetscFree(stash->sendreqs);CHKERRQ(ierr);
1031d7d60843SJed Brown   ierr = PetscFree(stash->recvreqs);CHKERRQ(ierr);
1032d7d60843SJed Brown   ierr = PetscFree(stash->recvranks);CHKERRQ(ierr);
1033d7d60843SJed Brown   ierr = PetscFree(stash->recvhdr);CHKERRQ(ierr);
1034d7d60843SJed Brown   ierr = PetscFree2(stash->some_indices,stash->some_statuses);CHKERRQ(ierr);
1035d7d60843SJed Brown   PetscFunctionReturn(0);
1036d7d60843SJed Brown }
1037