xref: /petsc/src/mat/utils/matstash.c (revision 1667be421921b5a18c088baf27915dd8c3ba83b4)
1 
2 #include <petsc/private/matimpl.h>
3 
4 #define DEFAULT_STASH_SIZE   10000
5 
6 static PetscErrorCode MatStashScatterBegin_Ref(Mat,MatStash*,PetscInt*);
7 static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
8 static PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
9 static PetscErrorCode MatStashScatterBegin_BTS(Mat,MatStash*,PetscInt*);
10 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
11 static PetscErrorCode MatStashScatterEnd_BTS(MatStash*);
12 static PetscErrorCode MatStashScatterDestroy_BTS(MatStash*);
13 
14 /*
15   MatStashCreate_Private - Creates a stash,currently used for all the parallel
16   matrix implementations. The stash is where elements of a matrix destined
17   to be stored on other processors are kept until matrix assembly is done.
18 
19   This is a simple minded stash. Simply adds entries to end of stash.
20 
21   Input Parameters:
22   comm - communicator, required for scatters.
23   bs   - stash block size. used when stashing blocks of values
24 
25   Output Parameters:
26   stash    - the newly created stash
27 */
28 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
29 {
30   PetscErrorCode ierr;
31   PetscInt       max,*opt,nopt,i;
32   PetscBool      flg;
33 
34   PetscFunctionBegin;
35   /* Require 2 tags,get the second using PetscCommGetNewTag() */
36   stash->comm = comm;
37 
38   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
39   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
40   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
41   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
42   ierr = PetscMalloc1(2*stash->size,&stash->flg_v);CHKERRQ(ierr);
43   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
44 
45 
46   nopt = stash->size;
47   ierr = PetscMalloc1(nopt,&opt);CHKERRQ(ierr);
48   ierr = PetscOptionsGetIntArray(NULL,NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
49   if (flg) {
50     if (nopt == 1)                max = opt[0];
51     else if (nopt == stash->size) max = opt[stash->rank];
52     else if (stash->rank < nopt)  max = opt[stash->rank];
53     else                          max = 0; /* Use default */
54     stash->umax = max;
55   } else {
56     stash->umax = 0;
57   }
58   ierr = PetscFree(opt);CHKERRQ(ierr);
59   if (bs <= 0) bs = 1;
60 
61   stash->bs         = bs;
62   stash->nmax       = 0;
63   stash->oldnmax    = 0;
64   stash->n          = 0;
65   stash->reallocs   = -1;
66   stash->space_head = 0;
67   stash->space      = 0;
68 
69   stash->send_waits  = 0;
70   stash->recv_waits  = 0;
71   stash->send_status = 0;
72   stash->nsends      = 0;
73   stash->nrecvs      = 0;
74   stash->svalues     = 0;
75   stash->rvalues     = 0;
76   stash->rindices    = 0;
77   stash->nprocessed  = 0;
78   stash->reproduce   = PETSC_FALSE;
79   stash->blocktype   = MPI_DATATYPE_NULL;
80 
81   ierr = PetscOptionsGetBool(NULL,NULL,"-matstash_reproduce",&stash->reproduce,NULL);CHKERRQ(ierr);
82 #if !defined(PETSC_HAVE_MPIUNI)
83   ierr = PetscOptionsGetBool(NULL,NULL,"-matstash_legacy",&flg,NULL);CHKERRQ(ierr);
84   if (!flg) {
85     stash->ScatterBegin   = MatStashScatterBegin_BTS;
86     stash->ScatterGetMesg = MatStashScatterGetMesg_BTS;
87     stash->ScatterEnd     = MatStashScatterEnd_BTS;
88     stash->ScatterDestroy = MatStashScatterDestroy_BTS;
89   } else {
90 #endif
91     stash->ScatterBegin   = MatStashScatterBegin_Ref;
92     stash->ScatterGetMesg = MatStashScatterGetMesg_Ref;
93     stash->ScatterEnd     = MatStashScatterEnd_Ref;
94     stash->ScatterDestroy = NULL;
95 #if !defined(PETSC_HAVE_MPIUNI)
96   }
97 #endif
98   PetscFunctionReturn(0);
99 }
100 
101 /*
102    MatStashDestroy_Private - Destroy the stash
103 */
104 PetscErrorCode MatStashDestroy_Private(MatStash *stash)
105 {
106   PetscErrorCode ierr;
107 
108   PetscFunctionBegin;
109   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
110   if (stash->ScatterDestroy) {ierr = (*stash->ScatterDestroy)(stash);CHKERRQ(ierr);}
111 
112   stash->space = 0;
113 
114   ierr = PetscFree(stash->flg_v);CHKERRQ(ierr);
115   PetscFunctionReturn(0);
116 }
117 
118 /*
119    MatStashScatterEnd_Private - This is called as the final stage of
120    scatter. The final stages of message passing is done here, and
121    all the memory used for message passing is cleaned up. This
122    routine also resets the stash, and deallocates the memory used
123    for the stash. It also keeps track of the current memory usage
124    so that the same value can be used the next time through.
125 */
126 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
127 {
128   PetscErrorCode ierr;
129 
130   PetscFunctionBegin;
131   ierr = (*stash->ScatterEnd)(stash);CHKERRQ(ierr);
132   PetscFunctionReturn(0);
133 }
134 
135 static PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash)
136 {
137   PetscErrorCode ierr;
138   PetscInt       nsends=stash->nsends,bs2,oldnmax,i;
139   MPI_Status     *send_status;
140 
141   PetscFunctionBegin;
142   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
143   /* wait on sends */
144   if (nsends) {
145     ierr = PetscMalloc1(2*nsends,&send_status);CHKERRQ(ierr);
146     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
147     ierr = PetscFree(send_status);CHKERRQ(ierr);
148   }
149 
150   /* Now update nmaxold to be app 10% more than max n used, this way the
151      wastage of space is reduced the next time this stash is used.
152      Also update the oldmax, only if it increases */
153   if (stash->n) {
154     bs2     = stash->bs*stash->bs;
155     oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
156     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
157   }
158 
159   stash->nmax       = 0;
160   stash->n          = 0;
161   stash->reallocs   = -1;
162   stash->nprocessed = 0;
163 
164   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
165 
166   stash->space = 0;
167 
168   ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
169   ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
170   ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr);
171   ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr);
172   ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
173   ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr);
174   ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
175   PetscFunctionReturn(0);
176 }
177 
178 /*
179    MatStashGetInfo_Private - Gets the relavant statistics of the stash
180 
181    Input Parameters:
182    stash    - the stash
183    nstash   - the size of the stash. Indicates the number of values stored.
184    reallocs - the number of additional mallocs incurred.
185 
186 */
187 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
188 {
189   PetscInt bs2 = stash->bs*stash->bs;
190 
191   PetscFunctionBegin;
192   if (nstash) *nstash = stash->n*bs2;
193   if (reallocs) {
194     if (stash->reallocs < 0) *reallocs = 0;
195     else                     *reallocs = stash->reallocs;
196   }
197   PetscFunctionReturn(0);
198 }
199 
200 /*
201    MatStashSetInitialSize_Private - Sets the initial size of the stash
202 
203    Input Parameters:
204    stash  - the stash
205    max    - the value that is used as the max size of the stash.
206             this value is used while allocating memory.
207 */
208 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
209 {
210   PetscFunctionBegin;
211   stash->umax = max;
212   PetscFunctionReturn(0);
213 }
214 
215 /* MatStashExpand_Private - Expand the stash. This function is called
216    when the space in the stash is not sufficient to add the new values
217    being inserted into the stash.
218 
219    Input Parameters:
220    stash - the stash
221    incr  - the minimum increase requested
222 
223    Notes:
224    This routine doubles the currently used memory.
225  */
226 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
227 {
228   PetscErrorCode ierr;
229   PetscInt       newnmax,bs2= stash->bs*stash->bs;
230 
231   PetscFunctionBegin;
232   /* allocate a larger stash */
233   if (!stash->oldnmax && !stash->nmax) { /* new stash */
234     if (stash->umax)                  newnmax = stash->umax/bs2;
235     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
236   } else if (!stash->nmax) { /* resuing stash */
237     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
238     else                              newnmax = stash->oldnmax/bs2;
239   } else                              newnmax = stash->nmax*2;
240   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
241 
242   /* Get a MatStashSpace and attach it to stash */
243   ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr);
244   if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
245     stash->space_head = stash->space;
246   }
247 
248   stash->reallocs++;
249   stash->nmax = newnmax;
250   PetscFunctionReturn(0);
251 }
252 /*
253   MatStashValuesRow_Private - inserts values into the stash. This function
254   expects the values to be roworiented. Multiple columns belong to the same row
255   can be inserted with a single call to this function.
256 
257   Input Parameters:
258   stash  - the stash
259   row    - the global row correspoiding to the values
260   n      - the number of elements inserted. All elements belong to the above row.
261   idxn   - the global column indices corresponding to each of the values.
262   values - the values inserted
263 */
264 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries)
265 {
266   PetscErrorCode     ierr;
267   PetscInt           i,k,cnt = 0;
268   PetscMatStashSpace space=stash->space;
269 
270   PetscFunctionBegin;
271   /* Check and see if we have sufficient memory */
272   if (!space || space->local_remaining < n) {
273     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
274   }
275   space = stash->space;
276   k     = space->local_used;
277   for (i=0; i<n; i++) {
278     if (ignorezeroentries && (values[i] == 0.0)) continue;
279     space->idx[k] = row;
280     space->idy[k] = idxn[i];
281     space->val[k] = values[i];
282     k++;
283     cnt++;
284   }
285   stash->n               += cnt;
286   space->local_used      += cnt;
287   space->local_remaining -= cnt;
288   PetscFunctionReturn(0);
289 }
290 
291 /*
292   MatStashValuesCol_Private - inserts values into the stash. This function
293   expects the values to be columnoriented. Multiple columns belong to the same row
294   can be inserted with a single call to this function.
295 
296   Input Parameters:
297   stash   - the stash
298   row     - the global row correspoiding to the values
299   n       - the number of elements inserted. All elements belong to the above row.
300   idxn    - the global column indices corresponding to each of the values.
301   values  - the values inserted
302   stepval - the consecutive values are sepated by a distance of stepval.
303             this happens because the input is columnoriented.
304 */
305 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries)
306 {
307   PetscErrorCode     ierr;
308   PetscInt           i,k,cnt = 0;
309   PetscMatStashSpace space=stash->space;
310 
311   PetscFunctionBegin;
312   /* Check and see if we have sufficient memory */
313   if (!space || space->local_remaining < n) {
314     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
315   }
316   space = stash->space;
317   k     = space->local_used;
318   for (i=0; i<n; i++) {
319     if (ignorezeroentries && (values[i*stepval] == 0.0)) continue;
320     space->idx[k] = row;
321     space->idy[k] = idxn[i];
322     space->val[k] = values[i*stepval];
323     k++;
324     cnt++;
325   }
326   stash->n               += cnt;
327   space->local_used      += cnt;
328   space->local_remaining -= cnt;
329   PetscFunctionReturn(0);
330 }
331 
332 /*
333   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
334   This function expects the values to be roworiented. Multiple columns belong
335   to the same block-row can be inserted with a single call to this function.
336   This function extracts the sub-block of values based on the dimensions of
337   the original input block, and the row,col values corresponding to the blocks.
338 
339   Input Parameters:
340   stash  - the stash
341   row    - the global block-row correspoiding to the values
342   n      - the number of elements inserted. All elements belong to the above row.
343   idxn   - the global block-column indices corresponding to each of the blocks of
344            values. Each block is of size bs*bs.
345   values - the values inserted
346   rmax   - the number of block-rows in the original block.
347   cmax   - the number of block-columsn on the original block.
348   idx    - the index of the current block-row in the original block.
349 */
350 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
351 {
352   PetscErrorCode     ierr;
353   PetscInt           i,j,k,bs2,bs=stash->bs,l;
354   const PetscScalar  *vals;
355   PetscScalar        *array;
356   PetscMatStashSpace space=stash->space;
357 
358   PetscFunctionBegin;
359   if (!space || space->local_remaining < n) {
360     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
361   }
362   space = stash->space;
363   l     = space->local_used;
364   bs2   = bs*bs;
365   for (i=0; i<n; i++) {
366     space->idx[l] = row;
367     space->idy[l] = idxn[i];
368     /* Now copy over the block of values. Store the values column oriented.
369        This enables inserting multiple blocks belonging to a row with a single
370        funtion call */
371     array = space->val + bs2*l;
372     vals  = values + idx*bs2*n + bs*i;
373     for (j=0; j<bs; j++) {
374       for (k=0; k<bs; k++) array[k*bs] = vals[k];
375       array++;
376       vals += cmax*bs;
377     }
378     l++;
379   }
380   stash->n               += n;
381   space->local_used      += n;
382   space->local_remaining -= n;
383   PetscFunctionReturn(0);
384 }
385 
386 /*
387   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
388   This function expects the values to be roworiented. Multiple columns belong
389   to the same block-row can be inserted with a single call to this function.
390   This function extracts the sub-block of values based on the dimensions of
391   the original input block, and the row,col values corresponding to the blocks.
392 
393   Input Parameters:
394   stash  - the stash
395   row    - the global block-row correspoiding to the values
396   n      - the number of elements inserted. All elements belong to the above row.
397   idxn   - the global block-column indices corresponding to each of the blocks of
398            values. Each block is of size bs*bs.
399   values - the values inserted
400   rmax   - the number of block-rows in the original block.
401   cmax   - the number of block-columsn on the original block.
402   idx    - the index of the current block-row in the original block.
403 */
404 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
405 {
406   PetscErrorCode     ierr;
407   PetscInt           i,j,k,bs2,bs=stash->bs,l;
408   const PetscScalar  *vals;
409   PetscScalar        *array;
410   PetscMatStashSpace space=stash->space;
411 
412   PetscFunctionBegin;
413   if (!space || space->local_remaining < n) {
414     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
415   }
416   space = stash->space;
417   l     = space->local_used;
418   bs2   = bs*bs;
419   for (i=0; i<n; i++) {
420     space->idx[l] = row;
421     space->idy[l] = idxn[i];
422     /* Now copy over the block of values. Store the values column oriented.
423      This enables inserting multiple blocks belonging to a row with a single
424      funtion call */
425     array = space->val + bs2*l;
426     vals  = values + idx*bs2*n + bs*i;
427     for (j=0; j<bs; j++) {
428       for (k=0; k<bs; k++) array[k] = vals[k];
429       array += bs;
430       vals  += rmax*bs;
431     }
432     l++;
433   }
434   stash->n               += n;
435   space->local_used      += n;
436   space->local_remaining -= n;
437   PetscFunctionReturn(0);
438 }
439 /*
440   MatStashScatterBegin_Private - Initiates the transfer of values to the
441   correct owners. This function goes through the stash, and check the
442   owners of each stashed value, and sends the values off to the owner
443   processors.
444 
445   Input Parameters:
446   stash  - the stash
447   owners - an array of size 'no-of-procs' which gives the ownership range
448            for each node.
449 
450   Notes: The 'owners' array in the cased of the blocked-stash has the
451   ranges specified blocked global indices, and for the regular stash in
452   the proper global indices.
453 */
454 PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners)
455 {
456   PetscErrorCode ierr;
457 
458   PetscFunctionBegin;
459   ierr = (*stash->ScatterBegin)(mat,stash,owners);CHKERRQ(ierr);
460   PetscFunctionReturn(0);
461 }
462 
463 static PetscErrorCode MatStashScatterBegin_Ref(Mat mat,MatStash *stash,PetscInt *owners)
464 {
465   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
466   PetscInt           size=stash->size,nsends;
467   PetscErrorCode     ierr;
468   PetscInt           count,*sindices,**rindices,i,j,idx,lastidx,l;
469   PetscScalar        **rvalues,*svalues;
470   MPI_Comm           comm = stash->comm;
471   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
472   PetscMPIInt        *sizes,*nlengths,nreceives;
473   PetscInt           *sp_idx,*sp_idy;
474   PetscScalar        *sp_val;
475   PetscMatStashSpace space,space_next;
476 
477   PetscFunctionBegin;
478   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
479     InsertMode addv;
480     ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
481     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
482     mat->insertmode = addv; /* in case this processor had no cache */
483   }
484 
485   bs2 = stash->bs*stash->bs;
486 
487   /*  first count number of contributors to each processor */
488   ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr);
489   ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr);
490   ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr);
491 
492   i       = j    = 0;
493   lastidx = -1;
494   space   = stash->space_head;
495   while (space) {
496     space_next = space->next;
497     sp_idx     = space->idx;
498     for (l=0; l<space->local_used; l++) {
499       /* if indices are NOT locally sorted, need to start search at the beginning */
500       if (lastidx > (idx = sp_idx[l])) j = 0;
501       lastidx = idx;
502       for (; j<size; j++) {
503         if (idx >= owners[j] && idx < owners[j+1]) {
504           nlengths[j]++; owner[i] = j; break;
505         }
506       }
507       i++;
508     }
509     space = space_next;
510   }
511   /* Now check what procs get messages - and compute nsends. */
512   for (i=0, nsends=0; i<size; i++) {
513     if (nlengths[i]) {
514       sizes[i] = 1; nsends++;
515     }
516   }
517 
518   {PetscMPIInt *onodes,*olengths;
519    /* Determine the number of messages to expect, their lengths, from from-ids */
520    ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr);
521    ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
522    /* since clubbing row,col - lengths are multiplied by 2 */
523    for (i=0; i<nreceives; i++) olengths[i] *=2;
524    ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
525    /* values are size 'bs2' lengths (and remove earlier factor 2 */
526    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
527    ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
528    ierr = PetscFree(onodes);CHKERRQ(ierr);
529    ierr = PetscFree(olengths);CHKERRQ(ierr);}
530 
531   /* do sends:
532       1) starts[i] gives the starting index in svalues for stuff going to
533          the ith processor
534   */
535   ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr);
536   ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr);
537   ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr);
538   /* use 2 sends the first with all_a, the next with all_i and all_j */
539   startv[0] = 0; starti[0] = 0;
540   for (i=1; i<size; i++) {
541     startv[i] = startv[i-1] + nlengths[i-1];
542     starti[i] = starti[i-1] + 2*nlengths[i-1];
543   }
544 
545   i     = 0;
546   space = stash->space_head;
547   while (space) {
548     space_next = space->next;
549     sp_idx     = space->idx;
550     sp_idy     = space->idy;
551     sp_val     = space->val;
552     for (l=0; l<space->local_used; l++) {
553       j = owner[i];
554       if (bs2 == 1) {
555         svalues[startv[j]] = sp_val[l];
556       } else {
557         PetscInt    k;
558         PetscScalar *buf1,*buf2;
559         buf1 = svalues+bs2*startv[j];
560         buf2 = space->val + bs2*l;
561         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
562       }
563       sindices[starti[j]]             = sp_idx[l];
564       sindices[starti[j]+nlengths[j]] = sp_idy[l];
565       startv[j]++;
566       starti[j]++;
567       i++;
568     }
569     space = space_next;
570   }
571   startv[0] = 0;
572   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
573 
574   for (i=0,count=0; i<size; i++) {
575     if (sizes[i]) {
576       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
577       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
578     }
579   }
580 #if defined(PETSC_USE_INFO)
581   ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr);
582   for (i=0; i<size; i++) {
583     if (sizes[i]) {
584       ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
585     }
586   }
587 #endif
588   ierr = PetscFree(nlengths);CHKERRQ(ierr);
589   ierr = PetscFree(owner);CHKERRQ(ierr);
590   ierr = PetscFree2(startv,starti);CHKERRQ(ierr);
591   ierr = PetscFree(sizes);CHKERRQ(ierr);
592 
593   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
594   ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr);
595 
596   for (i=0; i<nreceives; i++) {
597     recv_waits[2*i]   = recv_waits1[i];
598     recv_waits[2*i+1] = recv_waits2[i];
599   }
600   stash->recv_waits = recv_waits;
601 
602   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
603   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
604 
605   stash->svalues         = svalues;
606   stash->sindices        = sindices;
607   stash->rvalues         = rvalues;
608   stash->rindices        = rindices;
609   stash->send_waits      = send_waits;
610   stash->nsends          = nsends;
611   stash->nrecvs          = nreceives;
612   stash->reproduce_count = 0;
613   PetscFunctionReturn(0);
614 }
615 
616 /*
617    MatStashScatterGetMesg_Private - This function waits on the receives posted
618    in the function MatStashScatterBegin_Private() and returns one message at
619    a time to the calling function. If no messages are left, it indicates this
620    by setting flg = 0, else it sets flg = 1.
621 
622    Input Parameters:
623    stash - the stash
624 
625    Output Parameters:
626    nvals - the number of entries in the current message.
627    rows  - an array of row indices (or blocked indices) corresponding to the values
628    cols  - an array of columnindices (or blocked indices) corresponding to the values
629    vals  - the values
630    flg   - 0 indicates no more message left, and the current call has no values associated.
631            1 indicates that the current call successfully received a message, and the
632              other output parameters nvals,rows,cols,vals are set appropriately.
633 */
634 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
635 {
636   PetscErrorCode ierr;
637 
638   PetscFunctionBegin;
639   ierr = (*stash->ScatterGetMesg)(stash,nvals,rows,cols,vals,flg);CHKERRQ(ierr);
640   PetscFunctionReturn(0);
641 }
642 
643 static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
644 {
645   PetscErrorCode ierr;
646   PetscMPIInt    i,*flg_v = stash->flg_v,i1,i2;
647   PetscInt       bs2;
648   MPI_Status     recv_status;
649   PetscBool      match_found = PETSC_FALSE;
650 
651   PetscFunctionBegin;
652   *flg = 0; /* When a message is discovered this is reset to 1 */
653   /* Return if no more messages to process */
654   if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(0);
655 
656   bs2 = stash->bs*stash->bs;
657   /* If a matching pair of receives are found, process them, and return the data to
658      the calling function. Until then keep receiving messages */
659   while (!match_found) {
660     if (stash->reproduce) {
661       i    = stash->reproduce_count++;
662       ierr = MPI_Wait(stash->recv_waits+i,&recv_status);CHKERRQ(ierr);
663     } else {
664       ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
665     }
666     if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!");
667 
668     /* Now pack the received message into a structure which is usable by others */
669     if (i % 2) {
670       ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr);
671 
672       flg_v[2*recv_status.MPI_SOURCE] = i/2;
673 
674       *nvals = *nvals/bs2;
675     } else {
676       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
677 
678       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
679 
680       *nvals = *nvals/2; /* This message has both row indices and col indices */
681     }
682 
683     /* Check if we have both messages from this proc */
684     i1 = flg_v[2*recv_status.MPI_SOURCE];
685     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
686     if (i1 != -1 && i2 != -1) {
687       *rows = stash->rindices[i2];
688       *cols = *rows + *nvals;
689       *vals = stash->rvalues[i1];
690       *flg  = 1;
691       stash->nprocessed++;
692       match_found = PETSC_TRUE;
693     }
694   }
695   PetscFunctionReturn(0);
696 }
697 
698 typedef struct {
699   PetscInt row;
700   PetscInt col;
701   PetscScalar vals[1];          /* Actually an array of length bs2 */
702 } MatStashBlock;
703 
704 static PetscErrorCode MatStashSortCompress_Private(MatStash *stash,InsertMode insertmode)
705 {
706   PetscErrorCode ierr;
707   PetscMatStashSpace space;
708   PetscInt n = stash->n,bs = stash->bs,bs2 = bs*bs,cnt,*row,*col,*perm,rowstart,i;
709   PetscScalar **valptr;
710 
711   PetscFunctionBegin;
712   ierr = PetscMalloc4(n,&row,n,&col,n,&valptr,n,&perm);CHKERRQ(ierr);
713   for (space=stash->space_head,cnt=0; space; space=space->next) {
714     for (i=0; i<space->local_used; i++) {
715       row[cnt] = space->idx[i];
716       col[cnt] = space->idy[i];
717       valptr[cnt] = &space->val[i*bs2];
718       perm[cnt] = cnt;          /* Will tell us where to find valptr after sorting row[] and col[] */
719       cnt++;
720     }
721   }
722   if (cnt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatStash n %D, but counted %D entries",n,cnt);
723   ierr = PetscSortIntWithArrayPair(n,row,col,perm);CHKERRQ(ierr);
724   /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */
725   for (rowstart=0,cnt=0,i=1; i<=n; i++) {
726     if (i == n || row[i] != row[rowstart]) {         /* Sort the last row. */
727       PetscInt colstart;
728       ierr = PetscSortIntWithArray(i-rowstart,&col[rowstart],&perm[rowstart]);CHKERRQ(ierr);
729       for (colstart=rowstart; colstart<i; ) { /* Compress multiple insertions to the same location */
730         PetscInt j,l;
731         MatStashBlock *block;
732         ierr = PetscSegBufferGet(stash->segsendblocks,1,&block);CHKERRQ(ierr);
733         block->row = row[rowstart];
734         block->col = col[colstart];
735         ierr = PetscMemcpy(block->vals,valptr[perm[colstart]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr);
736         for (j=colstart+1; j<i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */
737           if (insertmode == ADD_VALUES) {
738             for (l=0; l<bs2; l++) block->vals[l] += valptr[perm[j]][l];
739           } else {
740             ierr = PetscMemcpy(block->vals,valptr[perm[j]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr);
741           }
742         }
743         colstart = j;
744       }
745       rowstart = i;
746     }
747   }
748   ierr = PetscFree4(row,col,valptr,perm);CHKERRQ(ierr);
749   PetscFunctionReturn(0);
750 }
751 
752 static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash)
753 {
754   PetscErrorCode ierr;
755 
756   PetscFunctionBegin;
757   if (stash->blocktype == MPI_DATATYPE_NULL) {
758     PetscInt     bs2 = PetscSqr(stash->bs);
759     PetscMPIInt  blocklens[2];
760     MPI_Aint     displs[2];
761     MPI_Datatype types[2],stype;
762     /* C++ std::complex is not my favorite datatype.  Since it is not POD, we cannot use offsetof to find the offset of
763      * vals.  But the layout is actually guaranteed by the standard, so we do a little dance here with struct
764      * DummyBlock, substituting PetscReal for PetscComplex so that we can determine the offset.
765      */
766     struct DummyBlock {PetscInt row,col; PetscReal vals;};
767 
768     stash->blocktype_size = offsetof(struct DummyBlock,vals) + bs2*sizeof(PetscScalar);
769     if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */
770       stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt);
771     }
772     ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segsendblocks);CHKERRQ(ierr);
773     ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segrecvblocks);CHKERRQ(ierr);
774     ierr = PetscSegBufferCreate(sizeof(MatStashFrame),1,&stash->segrecvframe);CHKERRQ(ierr);
775     blocklens[0] = 2;
776     blocklens[1] = bs2;
777     displs[0] = offsetof(struct DummyBlock,row);
778     displs[1] = offsetof(struct DummyBlock,vals);
779     types[0] = MPIU_INT;
780     types[1] = MPIU_SCALAR;
781     ierr = MPI_Type_create_struct(2,blocklens,displs,types,&stype);CHKERRQ(ierr);
782     ierr = MPI_Type_commit(&stype);CHKERRQ(ierr);
783     ierr = MPI_Type_create_resized(stype,0,stash->blocktype_size,&stash->blocktype);CHKERRQ(ierr); /* MPI-2 */
784     ierr = MPI_Type_commit(&stash->blocktype);CHKERRQ(ierr);
785     ierr = MPI_Type_free(&stype);CHKERRQ(ierr);
786   }
787   PetscFunctionReturn(0);
788 }
789 
790 /* Callback invoked after target rank has initiatied receive of rendezvous message.
791  * Here we post the main sends.
792  */
793 static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rankid,PetscMPIInt rank,void *sdata,MPI_Request req[],void *ctx)
794 {
795   MatStash *stash = (MatStash*)ctx;
796   MatStashHeader *hdr = (MatStashHeader*)sdata;
797   PetscErrorCode ierr;
798 
799   PetscFunctionBegin;
800   if (rank != stash->sendranks[rankid]) SETERRQ3(comm,PETSC_ERR_PLIB,"BTS Send rank %d does not match sendranks[%d] %d",rank,rankid,stash->sendranks[rankid]);
801   ierr = MPI_Isend(stash->sendframes[rankid].buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr);
802   stash->sendframes[rankid].count = hdr->count;
803   stash->sendframes[rankid].pending = 1;
804   PetscFunctionReturn(0);
805 }
806 
807 /* Callback invoked by target after receiving rendezvous message.
808  * Here we post the main recvs.
809  */
810 static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rank,void *rdata,MPI_Request req[],void *ctx)
811 {
812   MatStash *stash = (MatStash*)ctx;
813   MatStashHeader *hdr = (MatStashHeader*)rdata;
814   MatStashFrame *frame;
815   PetscErrorCode ierr;
816 
817   PetscFunctionBegin;
818   ierr = PetscSegBufferGet(stash->segrecvframe,1,&frame);CHKERRQ(ierr);
819   ierr = PetscSegBufferGet(stash->segrecvblocks,hdr->count,&frame->buffer);CHKERRQ(ierr);
820   ierr = MPI_Irecv(frame->buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr);
821   frame->count = hdr->count;
822   frame->pending = 1;
823   PetscFunctionReturn(0);
824 }
825 
826 #if !defined(PETSC_HAVE_MPIUNI)
827 /*
828  * owners[] contains the ownership ranges; may be indexed by either blocks or scalars
829  */
830 static PetscErrorCode MatStashScatterBegin_BTS(Mat mat,MatStash *stash,PetscInt owners[])
831 {
832   PetscErrorCode ierr;
833   size_t nblocks;
834   char *sendblocks;
835 
836   PetscFunctionBegin;
837 #if defined(PETSC_USE_DEBUG)
838   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
839     InsertMode addv;
840     ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
841     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
842   }
843 #endif
844 
845   if (stash->subset_off_proc && !mat->subsetoffprocentries) { /* We won't use the old scatter context. */
846     ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr);
847   }
848 
849   ierr = MatStashBlockTypeSetUp(stash);CHKERRQ(ierr);
850   ierr = MatStashSortCompress_Private(stash,mat->insertmode);CHKERRQ(ierr);
851   ierr = PetscSegBufferGetSize(stash->segsendblocks,&nblocks);CHKERRQ(ierr);
852   ierr = PetscSegBufferExtractInPlace(stash->segsendblocks,&sendblocks);CHKERRQ(ierr);
853   if (stash->subset_off_proc && mat->subsetoffprocentries) { /* Set up sendhdrs and sendframes for each rank that we sent before */
854     PetscInt i;
855     size_t b;
856     for (i=0,b=0; i<stash->nsendranks; i++) {
857       stash->sendframes[i].buffer = &sendblocks[b*stash->blocktype_size];
858       /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */
859       stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */
860       for ( ; b<nblocks; b++) {
861         MatStashBlock *sendblock_b = (MatStashBlock*)&sendblocks[b*stash->blocktype_size];
862         if (PetscUnlikely(sendblock_b->row < owners[stash->sendranks[i]])) SETERRQ2(stash->comm,PETSC_ERR_ARG_WRONG,"MAT_SUBSET_OFF_PROC_ENTRIES set, but row %D owned by %d not communicated in initial assembly",sendblock_b->row,stash->sendranks[i]);
863         if (sendblock_b->row >= owners[stash->sendranks[i]+1]) break;
864         stash->sendhdr[i].count++;
865       }
866     }
867   } else {                      /* Dynamically count and pack (first time) */
868     PetscInt sendno;
869     size_t i,rowstart;
870 
871     /* Count number of send ranks and allocate for sends */
872     stash->nsendranks = 0;
873     for (rowstart=0; rowstart<nblocks; ) {
874       PetscInt owner;
875       MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
876       ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr);
877       if (owner < 0) owner = -(owner+2);
878       for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
879         MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
880         if (sendblock_i->row >= owners[owner+1]) break;
881       }
882       stash->nsendranks++;
883       rowstart = i;
884     }
885     ierr = PetscMalloc3(stash->nsendranks,&stash->sendranks,stash->nsendranks,&stash->sendhdr,stash->nsendranks,&stash->sendframes);CHKERRQ(ierr);
886 
887     /* Set up sendhdrs and sendframes */
888     sendno = 0;
889     for (rowstart=0; rowstart<nblocks; ) {
890       PetscInt owner;
891       MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
892       ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr);
893       if (owner < 0) owner = -(owner+2);
894       stash->sendranks[sendno] = owner;
895       for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
896         MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
897         if (sendblock_i->row >= owners[owner+1]) break;
898       }
899       stash->sendframes[sendno].buffer = sendblock_rowstart;
900       stash->sendframes[sendno].pending = 0;
901       stash->sendhdr[sendno].count = i - rowstart;
902       sendno++;
903       rowstart = i;
904     }
905     if (sendno != stash->nsendranks) SETERRQ2(stash->comm,PETSC_ERR_PLIB,"BTS counted %D sendranks, but %D sends",stash->nsendranks,sendno);
906   }
907 
908   /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new
909    * message or a dummy entry of some sort. */
910   if (mat->insertmode == INSERT_VALUES) {
911     size_t i;
912     for (i=0; i<nblocks; i++) {
913       MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
914       sendblock_i->row = -(sendblock_i->row+1);
915     }
916   }
917 
918   if (stash->subset_off_proc && mat->subsetoffprocentries) {
919     PetscMPIInt i,tag;
920     ierr = PetscCommGetNewTag(stash->comm,&tag);CHKERRQ(ierr);
921     for (i=0; i<stash->nrecvranks; i++) {
922       ierr = MatStashBTSRecv_Private(stash->comm,&tag,stash->recvranks[i],&stash->recvhdr[i],&stash->recvreqs[i],stash);CHKERRQ(ierr);
923     }
924     for (i=0; i<stash->nsendranks; i++) {
925       ierr = MatStashBTSSend_Private(stash->comm,&tag,i,stash->sendranks[i],&stash->sendhdr[i],&stash->sendreqs[i],stash);CHKERRQ(ierr);
926     }
927     stash->use_status = PETSC_TRUE; /* Use count from message status. */
928   } else {
929     ierr = PetscCommBuildTwoSidedFReq(stash->comm,1,MPIU_INT,stash->nsendranks,stash->sendranks,(PetscInt*)stash->sendhdr,
930                                       &stash->nrecvranks,&stash->recvranks,(PetscInt*)&stash->recvhdr,1,&stash->sendreqs,&stash->recvreqs,
931                                       MatStashBTSSend_Private,MatStashBTSRecv_Private,stash);CHKERRQ(ierr);
932     ierr = PetscMalloc2(stash->nrecvranks,&stash->some_indices,stash->nrecvranks,&stash->some_statuses);CHKERRQ(ierr);
933     stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */
934   }
935 
936   ierr = PetscSegBufferExtractInPlace(stash->segrecvframe,&stash->recvframes);CHKERRQ(ierr);
937   stash->recvframe_active = NULL;
938   stash->recvframe_i      = 0;
939   stash->some_i           = 0;
940   stash->some_count       = 0;
941   stash->recvcount        = 0;
942   stash->subset_off_proc  = mat->subsetoffprocentries;
943   stash->insertmode       = &mat->insertmode;
944   PetscFunctionReturn(0);
945 }
946 
947 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash,PetscMPIInt *n,PetscInt **row,PetscInt **col,PetscScalar **val,PetscInt *flg)
948 {
949   PetscErrorCode ierr;
950   MatStashBlock *block;
951 
952   PetscFunctionBegin;
953   *flg = 0;
954   while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) {
955     if (stash->some_i == stash->some_count) {
956       if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(0); /* Done */
957       ierr = MPI_Waitsome(stash->nrecvranks,stash->recvreqs,&stash->some_count,stash->some_indices,stash->use_status?stash->some_statuses:MPI_STATUSES_IGNORE);CHKERRQ(ierr);
958       stash->some_i = 0;
959     }
960     stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]];
961     stash->recvframe_count = stash->recvframe_active->count; /* From header; maximum count */
962     if (stash->use_status) { /* Count what was actually sent */
963       ierr = MPI_Get_count(&stash->some_statuses[stash->some_i],stash->blocktype,&stash->recvframe_count);CHKERRQ(ierr);
964     }
965     if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */
966       block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[0];
967       if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES;
968       if (PetscUnlikely(*stash->insertmode == INSERT_VALUES && block->row >= 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling INSERT_VALUES, but rank %d requested ADD_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]);
969       if (PetscUnlikely(*stash->insertmode == ADD_VALUES && block->row < 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling ADD_VALUES, but rank %d requested INSERT_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]);
970     }
971     stash->some_i++;
972     stash->recvcount++;
973     stash->recvframe_i = 0;
974   }
975   *n = 1;
976   block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[stash->recvframe_i*stash->blocktype_size];
977   if (block->row < 0) block->row = -(block->row + 1);
978   *row = &block->row;
979   *col = &block->col;
980   *val = block->vals;
981   stash->recvframe_i++;
982   *flg = 1;
983   PetscFunctionReturn(0);
984 }
985 
986 static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash)
987 {
988   PetscErrorCode ierr;
989 
990   PetscFunctionBegin;
991   ierr = MPI_Waitall(stash->nsendranks,stash->sendreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
992   if (stash->subset_off_proc) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks  */
993     void *dummy;
994     ierr = PetscSegBufferExtractInPlace(stash->segrecvblocks,&dummy);CHKERRQ(ierr);
995   } else {                      /* No reuse, so collect everything. */
996     ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr);
997   }
998 
999   /* Now update nmaxold to be app 10% more than max n used, this way the
1000      wastage of space is reduced the next time this stash is used.
1001      Also update the oldmax, only if it increases */
1002   if (stash->n) {
1003     PetscInt bs2     = stash->bs*stash->bs;
1004     PetscInt oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
1005     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
1006   }
1007 
1008   stash->nmax       = 0;
1009   stash->n          = 0;
1010   stash->reallocs   = -1;
1011   stash->nprocessed = 0;
1012 
1013   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
1014 
1015   stash->space = 0;
1016 
1017   PetscFunctionReturn(0);
1018 }
1019 
1020 static PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash)
1021 {
1022   PetscErrorCode ierr;
1023 
1024   PetscFunctionBegin;
1025   ierr = PetscSegBufferDestroy(&stash->segsendblocks);CHKERRQ(ierr);
1026   ierr = PetscSegBufferDestroy(&stash->segrecvframe);CHKERRQ(ierr);
1027   stash->recvframes = NULL;
1028   ierr = PetscSegBufferDestroy(&stash->segrecvblocks);CHKERRQ(ierr);
1029   if (stash->blocktype != MPI_DATATYPE_NULL) {
1030     ierr = MPI_Type_free(&stash->blocktype);CHKERRQ(ierr);
1031   }
1032   stash->nsendranks = 0;
1033   stash->nrecvranks = 0;
1034   ierr = PetscFree3(stash->sendranks,stash->sendhdr,stash->sendframes);CHKERRQ(ierr);
1035   ierr = PetscFree(stash->sendreqs);CHKERRQ(ierr);
1036   ierr = PetscFree(stash->recvreqs);CHKERRQ(ierr);
1037   ierr = PetscFree(stash->recvranks);CHKERRQ(ierr);
1038   ierr = PetscFree(stash->recvhdr);CHKERRQ(ierr);
1039   ierr = PetscFree2(stash->some_indices,stash->some_statuses);CHKERRQ(ierr);
1040   PetscFunctionReturn(0);
1041 }
1042 #endif
1043