xref: /petsc/src/mat/utils/matstash.c (revision 4b4eb8d3b5c67aff3d3d02420f547e6671225154)
1 
2 #include <petsc-private/matimpl.h>
3 
4 #define DEFAULT_STASH_SIZE   10000
5 
6 static PetscErrorCode MatStashScatterBegin_Ref(Mat,MatStash*,PetscInt*);
7 static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
8 static PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
9 static PetscErrorCode MatStashScatterBegin_BTS(Mat,MatStash*,PetscInt*);
10 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
11 static PetscErrorCode MatStashScatterEnd_BTS(MatStash*);
12 static PetscErrorCode MatStashScatterDestroy_BTS(MatStash*);
13 
14 /*
15   MatStashCreate_Private - Creates a stash,currently used for all the parallel
16   matrix implementations. The stash is where elements of a matrix destined
17   to be stored on other processors are kept until matrix assembly is done.
18 
19   This is a simple minded stash. Simply adds entries to end of stash.
20 
21   Input Parameters:
22   comm - communicator, required for scatters.
23   bs   - stash block size. used when stashing blocks of values
24 
25   Output Parameters:
26   stash    - the newly created stash
27 */
28 #undef __FUNCT__
29 #define __FUNCT__ "MatStashCreate_Private"
30 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
31 {
32   PetscErrorCode ierr;
33   PetscInt       max,*opt,nopt,i;
34   PetscBool      flg;
35 
36   PetscFunctionBegin;
37   /* Require 2 tags,get the second using PetscCommGetNewTag() */
38   stash->comm = comm;
39 
40   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
41   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
42   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
43   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
44   ierr = PetscMalloc1(2*stash->size,&stash->flg_v);CHKERRQ(ierr);
45   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
46 
47 
48   nopt = stash->size;
49   ierr = PetscMalloc1(nopt,&opt);CHKERRQ(ierr);
50   ierr = PetscOptionsGetIntArray(NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
51   if (flg) {
52     if (nopt == 1)                max = opt[0];
53     else if (nopt == stash->size) max = opt[stash->rank];
54     else if (stash->rank < nopt)  max = opt[stash->rank];
55     else                          max = 0; /* Use default */
56     stash->umax = max;
57   } else {
58     stash->umax = 0;
59   }
60   ierr = PetscFree(opt);CHKERRQ(ierr);
61   if (bs <= 0) bs = 1;
62 
63   stash->bs         = bs;
64   stash->nmax       = 0;
65   stash->oldnmax    = 0;
66   stash->n          = 0;
67   stash->reallocs   = -1;
68   stash->space_head = 0;
69   stash->space      = 0;
70 
71   stash->send_waits  = 0;
72   stash->recv_waits  = 0;
73   stash->send_status = 0;
74   stash->nsends      = 0;
75   stash->nrecvs      = 0;
76   stash->svalues     = 0;
77   stash->rvalues     = 0;
78   stash->rindices    = 0;
79   stash->nprocessed  = 0;
80   stash->reproduce   = PETSC_FALSE;
81   stash->blocktype   = MPI_DATATYPE_NULL;
82 
83   ierr = PetscOptionsGetBool(NULL,"-matstash_reproduce",&stash->reproduce,NULL);CHKERRQ(ierr);
84   ierr = PetscOptionsGetBool(NULL,"-matstash_bts",&flg,NULL);CHKERRQ(ierr);
85   if (flg) {
86     stash->ScatterBegin   = MatStashScatterBegin_BTS;
87     stash->ScatterGetMesg = MatStashScatterGetMesg_BTS;
88     stash->ScatterEnd     = MatStashScatterEnd_BTS;
89     stash->ScatterDestroy = MatStashScatterDestroy_BTS;
90   } else {
91     stash->ScatterBegin   = MatStashScatterBegin_Ref;
92     stash->ScatterGetMesg = MatStashScatterGetMesg_Ref;
93     stash->ScatterEnd     = MatStashScatterEnd_Ref;
94     stash->ScatterDestroy = NULL;
95   }
96   PetscFunctionReturn(0);
97 }
98 
99 /*
100    MatStashDestroy_Private - Destroy the stash
101 */
102 #undef __FUNCT__
103 #define __FUNCT__ "MatStashDestroy_Private"
104 PetscErrorCode MatStashDestroy_Private(MatStash *stash)
105 {
106   PetscErrorCode ierr;
107 
108   PetscFunctionBegin;
109   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
110   if (stash->ScatterDestroy) {ierr = (*stash->ScatterDestroy)(stash);CHKERRQ(ierr);}
111 
112   stash->space = 0;
113 
114   ierr = PetscFree(stash->flg_v);CHKERRQ(ierr);
115   PetscFunctionReturn(0);
116 }
117 
118 /*
119    MatStashScatterEnd_Private - This is called as the final stage of
120    scatter. The final stages of message passing is done here, and
121    all the memory used for message passing is cleaned up. This
122    routine also resets the stash, and deallocates the memory used
123    for the stash. It also keeps track of the current memory usage
124    so that the same value can be used the next time through.
125 */
126 #undef __FUNCT__
127 #define __FUNCT__ "MatStashScatterEnd_Private"
128 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
129 {
130   PetscErrorCode ierr;
131 
132   PetscFunctionBegin;
133   ierr = (*stash->ScatterEnd)(stash);CHKERRQ(ierr);
134   PetscFunctionReturn(0);
135 }
136 
137 #undef __FUNCT__
138 #define __FUNCT__ "MatStashScatterEnd_Ref"
139 static PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash)
140 {
141   PetscErrorCode ierr;
142   PetscInt       nsends=stash->nsends,bs2,oldnmax,i;
143   MPI_Status     *send_status;
144 
145   PetscFunctionBegin;
146   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
147   /* wait on sends */
148   if (nsends) {
149     ierr = PetscMalloc1(2*nsends,&send_status);CHKERRQ(ierr);
150     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
151     ierr = PetscFree(send_status);CHKERRQ(ierr);
152   }
153 
154   /* Now update nmaxold to be app 10% more than max n used, this way the
155      wastage of space is reduced the next time this stash is used.
156      Also update the oldmax, only if it increases */
157   if (stash->n) {
158     bs2     = stash->bs*stash->bs;
159     oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
160     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
161   }
162 
163   stash->nmax       = 0;
164   stash->n          = 0;
165   stash->reallocs   = -1;
166   stash->nprocessed = 0;
167 
168   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
169 
170   stash->space = 0;
171 
172   ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
173   ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
174   ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr);
175   ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr);
176   ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
177   ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr);
178   ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
179   PetscFunctionReturn(0);
180 }
181 
182 /*
183    MatStashGetInfo_Private - Gets the relavant statistics of the stash
184 
185    Input Parameters:
186    stash    - the stash
187    nstash   - the size of the stash. Indicates the number of values stored.
188    reallocs - the number of additional mallocs incurred.
189 
190 */
191 #undef __FUNCT__
192 #define __FUNCT__ "MatStashGetInfo_Private"
193 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
194 {
195   PetscInt bs2 = stash->bs*stash->bs;
196 
197   PetscFunctionBegin;
198   if (nstash) *nstash = stash->n*bs2;
199   if (reallocs) {
200     if (stash->reallocs < 0) *reallocs = 0;
201     else                     *reallocs = stash->reallocs;
202   }
203   PetscFunctionReturn(0);
204 }
205 
206 /*
207    MatStashSetInitialSize_Private - Sets the initial size of the stash
208 
209    Input Parameters:
210    stash  - the stash
211    max    - the value that is used as the max size of the stash.
212             this value is used while allocating memory.
213 */
214 #undef __FUNCT__
215 #define __FUNCT__ "MatStashSetInitialSize_Private"
216 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
217 {
218   PetscFunctionBegin;
219   stash->umax = max;
220   PetscFunctionReturn(0);
221 }
222 
223 /* MatStashExpand_Private - Expand the stash. This function is called
224    when the space in the stash is not sufficient to add the new values
225    being inserted into the stash.
226 
227    Input Parameters:
228    stash - the stash
229    incr  - the minimum increase requested
230 
231    Notes:
232    This routine doubles the currently used memory.
233  */
234 #undef __FUNCT__
235 #define __FUNCT__ "MatStashExpand_Private"
236 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
237 {
238   PetscErrorCode ierr;
239   PetscInt       newnmax,bs2= stash->bs*stash->bs;
240 
241   PetscFunctionBegin;
242   /* allocate a larger stash */
243   if (!stash->oldnmax && !stash->nmax) { /* new stash */
244     if (stash->umax)                  newnmax = stash->umax/bs2;
245     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
246   } else if (!stash->nmax) { /* resuing stash */
247     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
248     else                              newnmax = stash->oldnmax/bs2;
249   } else                              newnmax = stash->nmax*2;
250   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
251 
252   /* Get a MatStashSpace and attach it to stash */
253   ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr);
254   if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
255     stash->space_head = stash->space;
256   }
257 
258   stash->reallocs++;
259   stash->nmax = newnmax;
260   PetscFunctionReturn(0);
261 }
262 /*
263   MatStashValuesRow_Private - inserts values into the stash. This function
264   expects the values to be roworiented. Multiple columns belong to the same row
265   can be inserted with a single call to this function.
266 
267   Input Parameters:
268   stash  - the stash
269   row    - the global row correspoiding to the values
270   n      - the number of elements inserted. All elements belong to the above row.
271   idxn   - the global column indices corresponding to each of the values.
272   values - the values inserted
273 */
274 #undef __FUNCT__
275 #define __FUNCT__ "MatStashValuesRow_Private"
276 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries)
277 {
278   PetscErrorCode     ierr;
279   PetscInt           i,k,cnt = 0;
280   PetscMatStashSpace space=stash->space;
281 
282   PetscFunctionBegin;
283   /* Check and see if we have sufficient memory */
284   if (!space || space->local_remaining < n) {
285     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
286   }
287   space = stash->space;
288   k     = space->local_used;
289   for (i=0; i<n; i++) {
290     if (ignorezeroentries && (values[i] == 0.0)) continue;
291     space->idx[k] = row;
292     space->idy[k] = idxn[i];
293     space->val[k] = values[i];
294     k++;
295     cnt++;
296   }
297   stash->n               += cnt;
298   space->local_used      += cnt;
299   space->local_remaining -= cnt;
300   PetscFunctionReturn(0);
301 }
302 
303 /*
304   MatStashValuesCol_Private - inserts values into the stash. This function
305   expects the values to be columnoriented. Multiple columns belong to the same row
306   can be inserted with a single call to this function.
307 
308   Input Parameters:
309   stash   - the stash
310   row     - the global row correspoiding to the values
311   n       - the number of elements inserted. All elements belong to the above row.
312   idxn    - the global column indices corresponding to each of the values.
313   values  - the values inserted
314   stepval - the consecutive values are sepated by a distance of stepval.
315             this happens because the input is columnoriented.
316 */
317 #undef __FUNCT__
318 #define __FUNCT__ "MatStashValuesCol_Private"
319 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries)
320 {
321   PetscErrorCode     ierr;
322   PetscInt           i,k,cnt = 0;
323   PetscMatStashSpace space=stash->space;
324 
325   PetscFunctionBegin;
326   /* Check and see if we have sufficient memory */
327   if (!space || space->local_remaining < n) {
328     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
329   }
330   space = stash->space;
331   k     = space->local_used;
332   for (i=0; i<n; i++) {
333     if (ignorezeroentries && (values[i*stepval] == 0.0)) continue;
334     space->idx[k] = row;
335     space->idy[k] = idxn[i];
336     space->val[k] = values[i*stepval];
337     k++;
338     cnt++;
339   }
340   stash->n               += cnt;
341   space->local_used      += cnt;
342   space->local_remaining -= cnt;
343   PetscFunctionReturn(0);
344 }
345 
346 /*
347   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
348   This function expects the values to be roworiented. Multiple columns belong
349   to the same block-row can be inserted with a single call to this function.
350   This function extracts the sub-block of values based on the dimensions of
351   the original input block, and the row,col values corresponding to the blocks.
352 
353   Input Parameters:
354   stash  - the stash
355   row    - the global block-row correspoiding to the values
356   n      - the number of elements inserted. All elements belong to the above row.
357   idxn   - the global block-column indices corresponding to each of the blocks of
358            values. Each block is of size bs*bs.
359   values - the values inserted
360   rmax   - the number of block-rows in the original block.
361   cmax   - the number of block-columsn on the original block.
362   idx    - the index of the current block-row in the original block.
363 */
364 #undef __FUNCT__
365 #define __FUNCT__ "MatStashValuesRowBlocked_Private"
366 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
367 {
368   PetscErrorCode     ierr;
369   PetscInt           i,j,k,bs2,bs=stash->bs,l;
370   const PetscScalar  *vals;
371   PetscScalar        *array;
372   PetscMatStashSpace space=stash->space;
373 
374   PetscFunctionBegin;
375   if (!space || space->local_remaining < n) {
376     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
377   }
378   space = stash->space;
379   l     = space->local_used;
380   bs2   = bs*bs;
381   for (i=0; i<n; i++) {
382     space->idx[l] = row;
383     space->idy[l] = idxn[i];
384     /* Now copy over the block of values. Store the values column oriented.
385        This enables inserting multiple blocks belonging to a row with a single
386        funtion call */
387     array = space->val + bs2*l;
388     vals  = values + idx*bs2*n + bs*i;
389     for (j=0; j<bs; j++) {
390       for (k=0; k<bs; k++) array[k*bs] = vals[k];
391       array++;
392       vals += cmax*bs;
393     }
394     l++;
395   }
396   stash->n               += n;
397   space->local_used      += n;
398   space->local_remaining -= n;
399   PetscFunctionReturn(0);
400 }
401 
402 /*
403   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
404   This function expects the values to be roworiented. Multiple columns belong
405   to the same block-row can be inserted with a single call to this function.
406   This function extracts the sub-block of values based on the dimensions of
407   the original input block, and the row,col values corresponding to the blocks.
408 
409   Input Parameters:
410   stash  - the stash
411   row    - the global block-row correspoiding to the values
412   n      - the number of elements inserted. All elements belong to the above row.
413   idxn   - the global block-column indices corresponding to each of the blocks of
414            values. Each block is of size bs*bs.
415   values - the values inserted
416   rmax   - the number of block-rows in the original block.
417   cmax   - the number of block-columsn on the original block.
418   idx    - the index of the current block-row in the original block.
419 */
420 #undef __FUNCT__
421 #define __FUNCT__ "MatStashValuesColBlocked_Private"
422 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
423 {
424   PetscErrorCode     ierr;
425   PetscInt           i,j,k,bs2,bs=stash->bs,l;
426   const PetscScalar  *vals;
427   PetscScalar        *array;
428   PetscMatStashSpace space=stash->space;
429 
430   PetscFunctionBegin;
431   if (!space || space->local_remaining < n) {
432     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
433   }
434   space = stash->space;
435   l     = space->local_used;
436   bs2   = bs*bs;
437   for (i=0; i<n; i++) {
438     space->idx[l] = row;
439     space->idy[l] = idxn[i];
440     /* Now copy over the block of values. Store the values column oriented.
441      This enables inserting multiple blocks belonging to a row with a single
442      funtion call */
443     array = space->val + bs2*l;
444     vals  = values + idx*bs2*n + bs*i;
445     for (j=0; j<bs; j++) {
446       for (k=0; k<bs; k++) array[k] = vals[k];
447       array += bs;
448       vals  += rmax*bs;
449     }
450     l++;
451   }
452   stash->n               += n;
453   space->local_used      += n;
454   space->local_remaining -= n;
455   PetscFunctionReturn(0);
456 }
457 /*
458   MatStashScatterBegin_Private - Initiates the transfer of values to the
459   correct owners. This function goes through the stash, and check the
460   owners of each stashed value, and sends the values off to the owner
461   processors.
462 
463   Input Parameters:
464   stash  - the stash
465   owners - an array of size 'no-of-procs' which gives the ownership range
466            for each node.
467 
468   Notes: The 'owners' array in the cased of the blocked-stash has the
469   ranges specified blocked global indices, and for the regular stash in
470   the proper global indices.
471 */
472 #undef __FUNCT__
473 #define __FUNCT__ "MatStashScatterBegin_Private"
474 PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners)
475 {
476   PetscErrorCode ierr;
477 
478   PetscFunctionBegin;
479   ierr = (*stash->ScatterBegin)(mat,stash,owners);CHKERRQ(ierr);
480   PetscFunctionReturn(0);
481 }
482 
483 #undef __FUNCT__
484 #define __FUNCT__ "MatStashScatterBegin_Ref"
485 static PetscErrorCode MatStashScatterBegin_Ref(Mat mat,MatStash *stash,PetscInt *owners)
486 {
487   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
488   PetscInt           size=stash->size,nsends;
489   PetscErrorCode     ierr;
490   PetscInt           count,*sindices,**rindices,i,j,idx,lastidx,l;
491   PetscScalar        **rvalues,*svalues;
492   MPI_Comm           comm = stash->comm;
493   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
494   PetscMPIInt        *sizes,*nlengths,nreceives;
495   PetscInt           *sp_idx,*sp_idy;
496   PetscScalar        *sp_val;
497   PetscMatStashSpace space,space_next;
498 
499   PetscFunctionBegin;
500   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
501     InsertMode addv;
502     ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
503     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
504     mat->insertmode = addv; /* in case this processor had no cache */
505   }
506 
507   bs2 = stash->bs*stash->bs;
508 
509   /*  first count number of contributors to each processor */
510   ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr);
511   ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr);
512   ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr);
513 
514   i       = j    = 0;
515   lastidx = -1;
516   space   = stash->space_head;
517   while (space != NULL) {
518     space_next = space->next;
519     sp_idx     = space->idx;
520     for (l=0; l<space->local_used; l++) {
521       /* if indices are NOT locally sorted, need to start search at the beginning */
522       if (lastidx > (idx = sp_idx[l])) j = 0;
523       lastidx = idx;
524       for (; j<size; j++) {
525         if (idx >= owners[j] && idx < owners[j+1]) {
526           nlengths[j]++; owner[i] = j; break;
527         }
528       }
529       i++;
530     }
531     space = space_next;
532   }
533   /* Now check what procs get messages - and compute nsends. */
534   for (i=0, nsends=0; i<size; i++) {
535     if (nlengths[i]) {
536       sizes[i] = 1; nsends++;
537     }
538   }
539 
540   {PetscMPIInt *onodes,*olengths;
541    /* Determine the number of messages to expect, their lengths, from from-ids */
542    ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr);
543    ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
544    /* since clubbing row,col - lengths are multiplied by 2 */
545    for (i=0; i<nreceives; i++) olengths[i] *=2;
546    ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
547    /* values are size 'bs2' lengths (and remove earlier factor 2 */
548    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
549    ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
550    ierr = PetscFree(onodes);CHKERRQ(ierr);
551    ierr = PetscFree(olengths);CHKERRQ(ierr);}
552 
553   /* do sends:
554       1) starts[i] gives the starting index in svalues for stuff going to
555          the ith processor
556   */
557   ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr);
558   ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr);
559   ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr);
560   /* use 2 sends the first with all_a, the next with all_i and all_j */
561   startv[0] = 0; starti[0] = 0;
562   for (i=1; i<size; i++) {
563     startv[i] = startv[i-1] + nlengths[i-1];
564     starti[i] = starti[i-1] + 2*nlengths[i-1];
565   }
566 
567   i     = 0;
568   space = stash->space_head;
569   while (space != NULL) {
570     space_next = space->next;
571     sp_idx     = space->idx;
572     sp_idy     = space->idy;
573     sp_val     = space->val;
574     for (l=0; l<space->local_used; l++) {
575       j = owner[i];
576       if (bs2 == 1) {
577         svalues[startv[j]] = sp_val[l];
578       } else {
579         PetscInt    k;
580         PetscScalar *buf1,*buf2;
581         buf1 = svalues+bs2*startv[j];
582         buf2 = space->val + bs2*l;
583         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
584       }
585       sindices[starti[j]]             = sp_idx[l];
586       sindices[starti[j]+nlengths[j]] = sp_idy[l];
587       startv[j]++;
588       starti[j]++;
589       i++;
590     }
591     space = space_next;
592   }
593   startv[0] = 0;
594   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
595 
596   for (i=0,count=0; i<size; i++) {
597     if (sizes[i]) {
598       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
599       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
600     }
601   }
602 #if defined(PETSC_USE_INFO)
603   ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr);
604   for (i=0; i<size; i++) {
605     if (sizes[i]) {
606       ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
607     }
608   }
609 #endif
610   ierr = PetscFree(nlengths);CHKERRQ(ierr);
611   ierr = PetscFree(owner);CHKERRQ(ierr);
612   ierr = PetscFree2(startv,starti);CHKERRQ(ierr);
613   ierr = PetscFree(sizes);CHKERRQ(ierr);
614 
615   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
616   ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr);
617 
618   for (i=0; i<nreceives; i++) {
619     recv_waits[2*i]   = recv_waits1[i];
620     recv_waits[2*i+1] = recv_waits2[i];
621   }
622   stash->recv_waits = recv_waits;
623 
624   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
625   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
626 
627   stash->svalues         = svalues;
628   stash->sindices        = sindices;
629   stash->rvalues         = rvalues;
630   stash->rindices        = rindices;
631   stash->send_waits      = send_waits;
632   stash->nsends          = nsends;
633   stash->nrecvs          = nreceives;
634   stash->reproduce_count = 0;
635   PetscFunctionReturn(0);
636 }
637 
638 /*
639    MatStashScatterGetMesg_Private - This function waits on the receives posted
640    in the function MatStashScatterBegin_Private() and returns one message at
641    a time to the calling function. If no messages are left, it indicates this
642    by setting flg = 0, else it sets flg = 1.
643 
644    Input Parameters:
645    stash - the stash
646 
647    Output Parameters:
648    nvals - the number of entries in the current message.
649    rows  - an array of row indices (or blocked indices) corresponding to the values
650    cols  - an array of columnindices (or blocked indices) corresponding to the values
651    vals  - the values
652    flg   - 0 indicates no more message left, and the current call has no values associated.
653            1 indicates that the current call successfully received a message, and the
654              other output parameters nvals,rows,cols,vals are set appropriately.
655 */
656 #undef __FUNCT__
657 #define __FUNCT__ "MatStashScatterGetMesg_Private"
658 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
659 {
660   PetscErrorCode ierr;
661 
662   PetscFunctionBegin;
663   ierr = (*stash->ScatterGetMesg)(stash,nvals,rows,cols,vals,flg);CHKERRQ(ierr);
664   PetscFunctionReturn(0);
665 }
666 
667 #undef __FUNCT__
668 #define __FUNCT__ "MatStashScatterGetMesg_Ref"
669 static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
670 {
671   PetscErrorCode ierr;
672   PetscMPIInt    i,*flg_v = stash->flg_v,i1,i2;
673   PetscInt       bs2;
674   MPI_Status     recv_status;
675   PetscBool      match_found = PETSC_FALSE;
676 
677   PetscFunctionBegin;
678   *flg = 0; /* When a message is discovered this is reset to 1 */
679   /* Return if no more messages to process */
680   if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(0);
681 
682   bs2 = stash->bs*stash->bs;
683   /* If a matching pair of receives are found, process them, and return the data to
684      the calling function. Until then keep receiving messages */
685   while (!match_found) {
686     if (stash->reproduce) {
687       i    = stash->reproduce_count++;
688       ierr = MPI_Wait(stash->recv_waits+i,&recv_status);CHKERRQ(ierr);
689     } else {
690       ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
691     }
692     if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!");
693 
694     /* Now pack the received message into a structure which is usable by others */
695     if (i % 2) {
696       ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr);
697 
698       flg_v[2*recv_status.MPI_SOURCE] = i/2;
699 
700       *nvals = *nvals/bs2;
701     } else {
702       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
703 
704       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
705 
706       *nvals = *nvals/2; /* This message has both row indices and col indices */
707     }
708 
709     /* Check if we have both messages from this proc */
710     i1 = flg_v[2*recv_status.MPI_SOURCE];
711     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
712     if (i1 != -1 && i2 != -1) {
713       *rows = stash->rindices[i2];
714       *cols = *rows + *nvals;
715       *vals = stash->rvalues[i1];
716       *flg  = 1;
717       stash->nprocessed++;
718       match_found = PETSC_TRUE;
719     }
720   }
721   PetscFunctionReturn(0);
722 }
723 
724 typedef struct {
725   PetscInt row;
726   PetscInt col;
727   PetscScalar vals[1];          /* Actually an array of length bs2 */
728 } MatStashBlock;
729 
730 #undef __FUNCT__
731 #define __FUNCT__ "MatStashSortCompress_Private"
732 static PetscErrorCode MatStashSortCompress_Private(MatStash *stash,InsertMode insertmode)
733 {
734   PetscErrorCode ierr;
735   PetscMatStashSpace space;
736   PetscInt n = stash->n,bs = stash->bs,bs2 = bs*bs,cnt,*row,*col,*perm,rowstart,i;
737   PetscScalar **valptr;
738 
739   PetscFunctionBegin;
740   ierr = PetscMalloc4(n,&row,n,&col,n,&valptr,n,&perm);CHKERRQ(ierr);
741   for (space=stash->space_head,cnt=0; space; space=space->next) {
742     for (i=0; i<space->local_used; i++) {
743       row[cnt] = space->idx[i];
744       col[cnt] = space->idy[i];
745       valptr[cnt] = &space->val[i*bs2];
746       perm[cnt] = cnt;          /* Will tell us where to find valptr after sorting row[] and col[] */
747       cnt++;
748     }
749   }
750   if (cnt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatStash n %D, but counted %D entries",n,cnt);
751   ierr = PetscSortIntWithArrayPair(n,row,col,perm);CHKERRQ(ierr);
752   /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */
753   for (rowstart=0,cnt=0,i=1; i<=n; i++) {
754     if (i == n || row[i] != row[rowstart]) {         /* Sort the last row. */
755       PetscInt colstart;
756       ierr = PetscSortIntWithArray(i-rowstart,&col[rowstart],&perm[rowstart]);CHKERRQ(ierr);
757       for (colstart=rowstart; colstart<i; ) { /* Compress multiple insertions to the same location */
758         PetscInt j,l;
759         MatStashBlock *block;
760         ierr = PetscSegBufferGet(stash->segsendblocks,1,&block);CHKERRQ(ierr);
761         block->row = row[rowstart];
762         block->col = col[colstart];
763         ierr = PetscMemcpy(block->vals,valptr[perm[colstart]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr);
764         for (j=colstart+1; j<i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */
765           if (insertmode == ADD_VALUES) {
766             for (l=0; l<bs2; l++) block->vals[l] += valptr[perm[j]][l];
767           } else {
768             ierr = PetscMemcpy(block->vals,valptr[perm[j]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr);
769           }
770         }
771         colstart = j;
772       }
773       rowstart = i;
774     }
775   }
776   ierr = PetscFree4(row,col,valptr,perm);CHKERRQ(ierr);
777   PetscFunctionReturn(0);
778 }
779 
780 #undef __FUNCT__
781 #define __FUNCT__ "MatStashBlockTypeSetUp"
782 static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash)
783 {
784   PetscErrorCode ierr;
785 
786   PetscFunctionBegin;
787   if (stash->blocktype == MPI_DATATYPE_NULL) {
788     PetscInt     bs2 = PetscSqr(stash->bs);
789     PetscMPIInt  blocklens[2];
790     MPI_Aint     displs[2];
791     MPI_Datatype types[2],stype;
792 
793     stash->blocktype_size = offsetof(MatStashBlock,vals) + bs2*sizeof(PetscScalar);
794     if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */
795       stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt);
796     }
797     ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segsendblocks);CHKERRQ(ierr);
798     ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segrecvblocks);CHKERRQ(ierr);
799     ierr = PetscSegBufferCreate(sizeof(MatStashFrame),1,&stash->segrecvframe);CHKERRQ(ierr);
800     blocklens[0] = 2;
801     blocklens[1] = bs2;
802     displs[0] = offsetof(MatStashBlock,row);
803     displs[1] = offsetof(MatStashBlock,vals);
804     types[0] = MPIU_INT;
805     types[1] = MPIU_SCALAR;
806     ierr = MPI_Type_create_struct(2,blocklens,displs,types,&stype);CHKERRQ(ierr);
807     ierr = MPI_Type_commit(&stype);CHKERRQ(ierr);
808     ierr = MPI_Type_create_resized(stype,0,stash->blocktype_size,&stash->blocktype);CHKERRQ(ierr); /* MPI-2 */
809     ierr = MPI_Type_commit(&stash->blocktype);CHKERRQ(ierr);
810     ierr = MPI_Type_free(&stype);CHKERRQ(ierr);
811   }
812   PetscFunctionReturn(0);
813 }
814 
815 #undef __FUNCT__
816 #define __FUNCT__ "MatStashBTSSend_Private"
817 /* Callback invoked after target rank has initiatied receive of rendezvous message.
818  * Here we post the main sends.
819  */
820 static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rankid,PetscMPIInt rank,void *sdata,MPI_Request req[],void *ctx)
821 {
822   MatStash *stash = (MatStash*)ctx;
823   MatStashHeader *hdr = (MatStashHeader*)sdata;
824   PetscErrorCode ierr;
825 
826   PetscFunctionBegin;
827   if (rank != stash->sendranks[rankid]) SETERRQ3(comm,PETSC_ERR_PLIB,"BTS Send rank %d does not match sendranks[%d] %d",rank,rankid,stash->sendranks[rankid]);
828   ierr = MPI_Isend(stash->sendframes[rankid].buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr);
829   stash->sendframes[rankid].count = hdr->count;
830   stash->sendframes[rankid].pending = 1;
831   PetscFunctionReturn(0);
832 }
833 
834 #undef __FUNCT__
835 #define __FUNCT__ "MatStashBTSRecv_Private"
836 /* Callback invoked by target after receiving rendezvous message.
837  * Here we post the main recvs.
838  */
839 static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rank,void *rdata,MPI_Request req[],void *ctx)
840 {
841   MatStash *stash = (MatStash*)ctx;
842   MatStashHeader *hdr = (MatStashHeader*)rdata;
843   MatStashFrame *frame;
844   PetscErrorCode ierr;
845 
846   PetscFunctionBegin;
847   ierr = PetscSegBufferGet(stash->segrecvframe,1,&frame);CHKERRQ(ierr);
848   ierr = PetscSegBufferGet(stash->segrecvblocks,hdr->count,&frame->buffer);CHKERRQ(ierr);
849   ierr = MPI_Irecv(frame->buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr);
850   frame->count = hdr->count;
851   frame->pending = 1;
852   PetscFunctionReturn(0);
853 }
854 
855 #undef __FUNCT__
856 #define __FUNCT__ "MatStashScatterBegin_BTS"
857 /*
858  * owners[] contains the ownership ranges; may be indexed by either blocks or scalars
859  */
860 static PetscErrorCode MatStashScatterBegin_BTS(Mat mat,MatStash *stash,PetscInt owners[])
861 {
862   PetscErrorCode ierr;
863   size_t nblocks;
864   char *sendblocks;
865 
866   PetscFunctionBegin;
867 #if defined(PETSC_USE_DEBUG)
868   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
869     InsertMode addv;
870     ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
871     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
872   }
873 #endif
874 
875   if (stash->subset_off_proc && !mat->subsetoffprocentries) { /* We won't use the old scatter context. */
876     ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr);
877   }
878 
879   ierr = MatStashBlockTypeSetUp(stash);CHKERRQ(ierr);
880   ierr = MatStashSortCompress_Private(stash,mat->insertmode);CHKERRQ(ierr);
881   ierr = PetscSegBufferGetSize(stash->segsendblocks,&nblocks);CHKERRQ(ierr);
882   ierr = PetscSegBufferExtractInPlace(stash->segsendblocks,&sendblocks);CHKERRQ(ierr);
883   if (stash->subset_off_proc && mat->subsetoffprocentries) { /* Set up sendhdrs and sendframes for each rank that we sent before */
884     PetscInt i,b;
885     for (i=0,b=0; i<stash->nsendranks; i++) {
886       stash->sendframes[i].buffer = &sendblocks[b*stash->blocktype_size];
887       /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */
888       stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */
889       for ( ; b<nblocks; b++) {
890         MatStashBlock *sendblock_b = (MatStashBlock*)&sendblocks[b*stash->blocktype_size];
891         if (PetscUnlikely(sendblock_b->row < owners[stash->sendranks[i]])) SETERRQ2(stash->comm,PETSC_ERR_ARG_WRONG,"MAT_SUBSET_OFF_PROC_ENTRIES set, but row %D owned by %d not communicated in initial assembly",sendblock_b->row,stash->sendranks[i]);
892         if (sendblock_b->row >= owners[stash->sendranks[i]+1]) break;
893         stash->sendhdr[i].count++;
894       }
895     }
896   } else {                      /* Dynamically count and pack (first time) */
897     PetscInt i,rowstart,sendno;
898 
899     /* Count number of send ranks and allocate for sends */
900     stash->nsendranks = 0;
901     for (rowstart=0; rowstart<nblocks; ) {
902       PetscInt owner;
903       MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
904       ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr);
905       if (owner < 0) owner = -(owner+2);
906       for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
907         MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
908         if (sendblock_i->row >= owners[owner+1]) break;
909       }
910       stash->nsendranks++;
911       rowstart = i;
912     }
913     ierr = PetscMalloc3(stash->nsendranks,&stash->sendranks,stash->nsendranks,&stash->sendhdr,stash->nsendranks,&stash->sendframes);CHKERRQ(ierr);
914 
915     /* Set up sendhdrs and sendframes */
916     sendno = 0;
917     for (rowstart=0; rowstart<nblocks; ) {
918       PetscInt owner;
919       MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
920       ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr);
921       if (owner < 0) owner = -(owner+2);
922       stash->sendranks[sendno] = owner;
923       for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
924         MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
925         if (sendblock_i->row >= owners[owner+1]) break;
926       }
927       stash->sendframes[sendno].buffer = sendblock_rowstart;
928       stash->sendframes[sendno].pending = 0;
929       stash->sendhdr[sendno].count = i - rowstart;
930       sendno++;
931       rowstart = i;
932     }
933     if (sendno != stash->nsendranks) SETERRQ2(stash->comm,PETSC_ERR_PLIB,"BTS counted %D sendranks, but %D sends",stash->nsendranks,sendno);
934   }
935 
936   /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new
937    * message or a dummy entry of some sort. */
938   if (mat->insertmode == INSERT_VALUES) {
939     PetscInt i;
940     for (i=0; i<nblocks; i++) {
941       MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
942       sendblock_i->row = -(sendblock_i->row+1);
943     }
944   }
945 
946   if (stash->subset_off_proc && mat->subsetoffprocentries) {
947     PetscMPIInt i,tag;
948     ierr = PetscCommGetNewTag(stash->comm,&tag);CHKERRQ(ierr);
949     for (i=0; i<stash->nrecvranks; i++) {
950       ierr = MatStashBTSRecv_Private(stash->comm,&tag,stash->recvranks[i],&stash->recvhdr[i],&stash->recvreqs[i],stash);CHKERRQ(ierr);
951     }
952     for (i=0; i<stash->nsendranks; i++) {
953       ierr = MatStashBTSSend_Private(stash->comm,&tag,i,stash->sendranks[i],&stash->sendhdr[i],&stash->sendreqs[i],stash);CHKERRQ(ierr);
954     }
955     stash->use_status = PETSC_TRUE; /* Use count from message status. */
956   } else {
957     ierr = PetscCommBuildTwoSidedFReq(stash->comm,1,MPIU_INT,stash->nsendranks,stash->sendranks,stash->sendhdr,
958                                       &stash->nrecvranks,&stash->recvranks,&stash->recvhdr,1,&stash->sendreqs,&stash->recvreqs,
959                                       MatStashBTSSend_Private,MatStashBTSRecv_Private,stash);CHKERRQ(ierr);
960     stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */
961   }
962 
963   ierr = PetscMalloc2(stash->nrecvranks,&stash->some_indices,stash->nrecvranks,&stash->some_statuses);CHKERRQ(ierr);
964   ierr = PetscSegBufferExtractInPlace(stash->segrecvframe,&stash->recvframes);CHKERRQ(ierr);
965   stash->recvframe_active = NULL;
966   stash->recvframe_i      = 0;
967   stash->some_i           = 0;
968   stash->some_count       = 0;
969   stash->recvcount        = 0;
970   stash->subset_off_proc  = mat->subsetoffprocentries;
971   stash->insertmode       = &mat->insertmode;
972   PetscFunctionReturn(0);
973 }
974 
975 #undef __FUNCT__
976 #define __FUNCT__ "MatStashScatterGetMesg_BTS"
977 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash,PetscMPIInt *n,PetscInt **row,PetscInt **col,PetscScalar **val,PetscInt *flg)
978 {
979   PetscErrorCode ierr;
980   MatStashBlock *block;
981 
982   PetscFunctionBegin;
983   *flg = 0;
984   while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) {
985     if (stash->some_i == stash->some_count) {
986       if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(0); /* Done */
987       ierr = MPI_Waitsome(stash->nrecvranks,stash->recvreqs,&stash->some_count,stash->some_indices,stash->use_status?stash->some_statuses:MPI_STATUSES_IGNORE);CHKERRQ(ierr);
988       stash->some_i = 0;
989     }
990     stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]];
991     stash->recvframe_count = stash->recvframe_active->count; /* From header; maximum count */
992     if (stash->use_status) { /* Count what was actually sent */
993       ierr = MPI_Get_count(&stash->some_statuses[stash->some_i],stash->blocktype,&stash->recvframe_count);CHKERRQ(ierr);
994     }
995     if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */
996       block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[0];
997       if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES;
998       if (PetscUnlikely(*stash->insertmode == INSERT_VALUES && block->row >= 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling INSERT_VALUES, but rank %d requested ADD_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]);
999       if (PetscUnlikely(*stash->insertmode == ADD_VALUES && block->row < 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling ADD_VALUES, but rank %d requested INSERT_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]);
1000     }
1001     stash->some_i++;
1002     stash->recvcount++;
1003     stash->recvframe_i = 0;
1004   }
1005   *n = 1;
1006   block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[stash->recvframe_i*stash->blocktype_size];
1007   if (block->row < 0) block->row = -(block->row + 1);
1008   *row = &block->row;
1009   *col = &block->col;
1010   *val = block->vals;
1011   stash->recvframe_i++;
1012   *flg = 1;
1013   PetscFunctionReturn(0);
1014 }
1015 
1016 #undef __FUNCT__
1017 #define __FUNCT__ "MatStashScatterEnd_BTS"
1018 static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash)
1019 {
1020   PetscErrorCode ierr;
1021 
1022   PetscFunctionBegin;
1023   ierr = MPI_Waitall(stash->nsendranks,stash->sendreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
1024   if (stash->subset_off_proc) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks  */
1025     void *dummy;
1026     ierr = PetscSegBufferExtractInPlace(stash->segrecvblocks,&dummy);CHKERRQ(ierr);
1027   } else {                      /* No reuse, so collect everything. */
1028     ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr);
1029   }
1030 
1031   /* Now update nmaxold to be app 10% more than max n used, this way the
1032      wastage of space is reduced the next time this stash is used.
1033      Also update the oldmax, only if it increases */
1034   if (stash->n) {
1035     PetscInt bs2     = stash->bs*stash->bs;
1036     PetscInt oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
1037     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
1038   }
1039 
1040   stash->nmax       = 0;
1041   stash->n          = 0;
1042   stash->reallocs   = -1;
1043   stash->nprocessed = 0;
1044 
1045   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
1046 
1047   stash->space = 0;
1048 
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 #undef __FUNCT__
1053 #define __FUNCT__ "MatStashScatterDestroy_BTS"
1054 static PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash)
1055 {
1056   PetscErrorCode ierr;
1057 
1058   PetscFunctionBegin;
1059   ierr = PetscSegBufferDestroy(&stash->segsendblocks);CHKERRQ(ierr);
1060   ierr = PetscSegBufferDestroy(&stash->segrecvframe);CHKERRQ(ierr);
1061   stash->recvframes = NULL;
1062   ierr = PetscSegBufferDestroy(&stash->segrecvblocks);CHKERRQ(ierr);
1063   if (stash->blocktype != MPI_DATATYPE_NULL) {
1064     ierr = MPI_Type_free(&stash->blocktype);CHKERRQ(ierr);
1065   }
1066   stash->nsendranks = 0;
1067   stash->nrecvranks = 0;
1068   ierr = PetscFree3(stash->sendranks,stash->sendhdr,stash->sendframes);CHKERRQ(ierr);
1069   ierr = PetscFree(stash->sendreqs);CHKERRQ(ierr);
1070   ierr = PetscFree(stash->recvreqs);CHKERRQ(ierr);
1071   ierr = PetscFree(stash->recvranks);CHKERRQ(ierr);
1072   ierr = PetscFree(stash->recvhdr);CHKERRQ(ierr);
1073   ierr = PetscFree2(stash->some_indices,stash->some_statuses);CHKERRQ(ierr);
1074   PetscFunctionReturn(0);
1075 }
1076