xref: /petsc/src/mat/utils/matstash.c (revision 0c0e133f2a4dd09ad45d46bb5a1194cba971d72e)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/matimpl.h"
4 #include "src/mat/utils/matstashspace.h"
5 
6 /*
7        The input to the stash is ALWAYS in MatScalar precision, and the
8     internal storage and output is also in MatScalar.
9 */
10 #define DEFAULT_STASH_SIZE   10000
11 
12 /*
13   MatStashCreate_Private - Creates a stash,currently used for all the parallel
14   matrix implementations. The stash is where elements of a matrix destined
15   to be stored on other processors are kept until matrix assembly is done.
16 
17   This is a simple minded stash. Simply adds entries to end of stash.
18 
19   Input Parameters:
20   comm - communicator, required for scatters.
21   bs   - stash block size. used when stashing blocks of values
22 
23   Output Parameters:
24   stash    - the newly created stash
25 */
26 #undef __FUNCT__
27 #define __FUNCT__ "MatStashCreate_Private"
28 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
29 {
30   PetscErrorCode ierr;
31   PetscInt       max,*opt,nopt;
32   PetscTruth     flg;
33 
34   PetscFunctionBegin;
35   /* Require 2 tags,get the second using PetscCommGetNewTag() */
36   stash->comm = comm;
37   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
38   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
39   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
40   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
41 
42   nopt = stash->size;
43   ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr);
44   ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
45   if (flg) {
46     if (nopt == 1)                max = opt[0];
47     else if (nopt == stash->size) max = opt[stash->rank];
48     else if (stash->rank < nopt)  max = opt[stash->rank];
49     else                          max = 0; /* Use default */
50     stash->umax = max;
51   } else {
52     stash->umax = 0;
53   }
54   ierr = PetscFree(opt);CHKERRQ(ierr);
55   if (bs <= 0) bs = 1;
56 
57   stash->bs       = bs;
58   stash->nmax     = 0;
59   stash->oldnmax  = 0;
60   stash->n        = 0;
61   stash->reallocs = -1;
62   stash->space_head = 0;
63   stash->space      = 0;
64 
65   stash->send_waits  = 0;
66   stash->recv_waits  = 0;
67   stash->send_status = 0;
68   stash->nsends      = 0;
69   stash->nrecvs      = 0;
70   stash->svalues     = 0;
71   stash->rvalues     = 0;
72   stash->rindices    = 0;
73   stash->nprocs      = 0;
74   stash->nprocessed  = 0;
75   PetscFunctionReturn(0);
76 }
77 
78 /*
79    MatStashDestroy_Private - Destroy the stash
80 */
81 #undef __FUNCT__
82 #define __FUNCT__ "MatStashDestroy_Private"
83 PetscErrorCode MatStashDestroy_Private(MatStash *stash)
84 {
85   PetscErrorCode ierr;
86 
87   PetscFunctionBegin;
88   if (stash->space_head){
89     ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr);
90     stash->space_head = 0;
91     stash->space      = 0;
92   }
93   PetscFunctionReturn(0);
94 }
95 
96 /*
97    MatStashScatterEnd_Private - This is called as the fial stage of
98    scatter. The final stages of messagepassing is done here, and
99    all the memory used for messagepassing is cleanedu up. This
100    routine also resets the stash, and deallocates the memory used
101    for the stash. It also keeps track of the current memory usage
102    so that the same value can be used the next time through.
103 */
104 #undef __FUNCT__
105 #define __FUNCT__ "MatStashScatterEnd_Private"
106 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
107 {
108   PetscErrorCode ierr;
109   PetscInt       nsends=stash->nsends,bs2,oldnmax;
110   MPI_Status     *send_status;
111 
112   PetscFunctionBegin;
113   /* wait on sends */
114   if (nsends) {
115     ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
116     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
117     ierr = PetscFree(send_status);CHKERRQ(ierr);
118   }
119 
120   /* Now update nmaxold to be app 10% more than max n used, this way the
121      wastage of space is reduced the next time this stash is used.
122      Also update the oldmax, only if it increases */
123   if (stash->n) {
124     bs2      = stash->bs*stash->bs;
125     oldnmax  = ((int)(stash->n * 1.1) + 5)*bs2;
126     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
127   }
128 
129   stash->nmax       = 0;
130   stash->n          = 0;
131   stash->reallocs   = -1;
132   stash->nprocessed = 0;
133   if (stash->space_head){
134     ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr);
135     stash->space_head = 0;
136     stash->space      = 0;
137   }
138   if (stash->send_waits) {
139     ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
140     stash->send_waits = 0;
141   }
142   if (stash->recv_waits) {
143     ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
144     stash->recv_waits = 0;
145   }
146   if (stash->svalues) {
147     ierr = PetscFree(stash->svalues);CHKERRQ(ierr);
148     stash->svalues = 0;
149   }
150   if (stash->rvalues) {
151     ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
152     stash->rvalues = 0;
153   }
154   if (stash->rindices) {
155     ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
156     stash->rindices = 0;
157   }
158   if (stash->nprocs) {
159     ierr = PetscFree(stash->nprocs);CHKERRQ(ierr);
160     stash->nprocs = 0;
161   }
162   PetscFunctionReturn(0);
163 }
164 
165 /*
166    MatStashGetInfo_Private - Gets the relavant statistics of the stash
167 
168    Input Parameters:
169    stash    - the stash
170    nstash   - the size of the stash. Indicates the number of values stored.
171    reallocs - the number of additional mallocs incurred.
172 
173 */
174 #undef __FUNCT__
175 #define __FUNCT__ "MatStashGetInfo_Private"
176 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
177 {
178   PetscInt bs2 = stash->bs*stash->bs;
179 
180   PetscFunctionBegin;
181   if (nstash) *nstash   = stash->n*bs2;
182   if (reallocs) {
183     if (stash->reallocs < 0) *reallocs = 0;
184     else                     *reallocs = stash->reallocs;
185   }
186   PetscFunctionReturn(0);
187 }
188 
189 /*
190    MatStashSetInitialSize_Private - Sets the initial size of the stash
191 
192    Input Parameters:
193    stash  - the stash
194    max    - the value that is used as the max size of the stash.
195             this value is used while allocating memory.
196 */
197 #undef __FUNCT__
198 #define __FUNCT__ "MatStashSetInitialSize_Private"
199 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
200 {
201   PetscFunctionBegin;
202   stash->umax = max;
203   PetscFunctionReturn(0);
204 }
205 
206 /* MatStashExpand_Private - Expand the stash. This function is called
207    when the space in the stash is not sufficient to add the new values
208    being inserted into the stash.
209 
210    Input Parameters:
211    stash - the stash
212    incr  - the minimum increase requested
213 
214    Notes:
215    This routine doubles the currently used memory.
216  */
217 #undef __FUNCT__
218 #define __FUNCT__ "MatStashExpand_Private"
219 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
220 {
221   PetscErrorCode ierr;
222   PetscInt       newnmax,bs2= stash->bs*stash->bs;
223 
224   PetscFunctionBegin;
225   /* allocate a larger stash */
226   if (!stash->oldnmax && !stash->nmax) { /* new stash */
227     if (stash->umax)                  newnmax = stash->umax/bs2;
228     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
229   } else if (!stash->nmax) { /* resuing stash */
230     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
231     else                              newnmax = stash->oldnmax/bs2;
232   } else                              newnmax = stash->nmax*2;
233   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
234 
235   /* Get a MatStashSpace and attach it to stash */
236   if (!stash->nmax) { /* new stash or resuing stash->oldnmax */
237     ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space_head);CHKERRQ(ierr);
238     stash->space = stash->space_head;
239   } else {
240     ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr);
241   }
242   stash->reallocs++;
243   stash->nmax = newnmax;
244   PetscFunctionReturn(0);
245 }
246 /*
247   MatStashValuesRow_Private - inserts values into the stash. This function
248   expects the values to be roworiented. Multiple columns belong to the same row
249   can be inserted with a single call to this function.
250 
251   Input Parameters:
252   stash  - the stash
253   row    - the global row correspoiding to the values
254   n      - the number of elements inserted. All elements belong to the above row.
255   idxn   - the global column indices corresponding to each of the values.
256   values - the values inserted
257 */
258 #undef __FUNCT__
259 #define __FUNCT__ "MatStashValuesRow_Private"
260 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[])
261 {
262   PetscErrorCode     ierr;
263   PetscInt           i,k;
264   PetscMatStashSpace space=stash->space;
265 
266   PetscFunctionBegin;
267   /* Check and see if we have sufficient memory */
268   if (!space || space->local_remaining < n){
269     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
270   }
271   space = stash->space;
272   k     = space->local_used;
273   for (i=0; i<n; i++) {
274     space->idx[k] = row;
275     space->idy[k] = idxn[i];
276     space->val[k] = values[i];
277     k++;
278   }
279   stash->n               += n;
280   space->local_used      += n;
281   space->local_remaining -= n;
282   PetscFunctionReturn(0);
283 }
284 
285 /*
286   MatStashValuesCol_Private - inserts values into the stash. This function
287   expects the values to be columnoriented. Multiple columns belong to the same row
288   can be inserted with a single call to this function.
289 
290   Input Parameters:
291   stash   - the stash
292   row     - the global row correspoiding to the values
293   n       - the number of elements inserted. All elements belong to the above row.
294   idxn    - the global column indices corresponding to each of the values.
295   values  - the values inserted
296   stepval - the consecutive values are sepated by a distance of stepval.
297             this happens because the input is columnoriented.
298 */
299 #undef __FUNCT__
300 #define __FUNCT__ "MatStashValuesCol_Private"
301 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt stepval)
302 {
303   PetscErrorCode     ierr;
304   PetscInt           i,k;
305   PetscMatStashSpace space=stash->space;
306 
307   PetscFunctionBegin;
308   /* Check and see if we have sufficient memory */
309   if (!space || space->local_remaining < n){
310     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
311   }
312   space = stash->space;
313   k = space->local_used;
314   for (i=0; i<n; i++) {
315     space->idx[k] = row;
316     space->idy[k] = idxn[i];
317     space->val[k] = values[i*stepval];
318     k++;
319   }
320   stash->n               += n;
321   space->local_used      += n;
322   space->local_remaining -= n;
323   PetscFunctionReturn(0);
324 }
325 
326 /*
327   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
328   This function expects the values to be roworiented. Multiple columns belong
329   to the same block-row can be inserted with a single call to this function.
330   This function extracts the sub-block of values based on the dimensions of
331   the original input block, and the row,col values corresponding to the blocks.
332 
333   Input Parameters:
334   stash  - the stash
335   row    - the global block-row correspoiding to the values
336   n      - the number of elements inserted. All elements belong to the above row.
337   idxn   - the global block-column indices corresponding to each of the blocks of
338            values. Each block is of size bs*bs.
339   values - the values inserted
340   rmax   - the number of block-rows in the original block.
341   cmax   - the number of block-columsn on the original block.
342   idx    - the index of the current block-row in the original block.
343 */
344 #undef __FUNCT__
345 #define __FUNCT__ "MatStashValuesRowBlocked_Private"
346 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
347 {
348   PetscErrorCode     ierr;
349   PetscInt           i,j,k,bs2,bs=stash->bs,l;
350   const MatScalar    *vals;
351   MatScalar          *array;
352   PetscMatStashSpace space=stash->space;
353 
354   PetscFunctionBegin;
355   if (!space || space->local_remaining < n){
356     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
357   }
358   space = stash->space;
359   l     = space->local_used;
360   bs2   = bs*bs;
361   for (i=0; i<n; i++) {
362     space->idx[l] = row;
363     space->idy[l] = idxn[i];
364     /* Now copy over the block of values. Store the values column oriented.
365        This enables inserting multiple blocks belonging to a row with a single
366        funtion call */
367     array = space->val + bs2*l;
368     vals  = values + idx*bs2*n + bs*i;
369     for (j=0; j<bs; j++) {
370       for (k=0; k<bs; k++) array[k*bs] = vals[k];
371       array++;
372       vals  += cmax*bs;
373     }
374     l++;
375   }
376   stash->n               += n;
377   space->local_used      += n;
378   space->local_remaining -= n;
379   PetscFunctionReturn(0);
380 }
381 
382 /*
383   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
384   This function expects the values to be roworiented. Multiple columns belong
385   to the same block-row can be inserted with a single call to this function.
386   This function extracts the sub-block of values based on the dimensions of
387   the original input block, and the row,col values corresponding to the blocks.
388 
389   Input Parameters:
390   stash  - the stash
391   row    - the global block-row correspoiding to the values
392   n      - the number of elements inserted. All elements belong to the above row.
393   idxn   - the global block-column indices corresponding to each of the blocks of
394            values. Each block is of size bs*bs.
395   values - the values inserted
396   rmax   - the number of block-rows in the original block.
397   cmax   - the number of block-columsn on the original block.
398   idx    - the index of the current block-row in the original block.
399 */
400 #undef __FUNCT__
401 #define __FUNCT__ "MatStashValuesColBlocked_Private"
402 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
403 {
404   PetscErrorCode  ierr;
405   PetscInt        i,j,k,bs2,bs=stash->bs,l;
406   const MatScalar *vals;
407   MatScalar       *array;
408   PetscMatStashSpace space=stash->space;
409 
410   PetscFunctionBegin;
411   if (!space || space->local_remaining < n){
412     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
413   }
414   space = stash->space;
415   l     = space->local_used;
416   bs2   = bs*bs;
417   for (i=0; i<n; i++) {
418     space->idx[l] = row;
419     space->idy[l] = idxn[i];
420     /* Now copy over the block of values. Store the values column oriented.
421      This enables inserting multiple blocks belonging to a row with a single
422      funtion call */
423     array = space->val + bs2*l;
424     vals  = values + idx*bs2*n + bs*i;
425     for (j=0; j<bs; j++) {
426       for (k=0; k<bs; k++) {array[k] = vals[k];}
427       array += bs;
428       vals  += rmax*bs;
429     }
430     l++;
431   }
432   stash->n               += n;
433   space->local_used      += n;
434   space->local_remaining -= n;
435   PetscFunctionReturn(0);
436 }
437 /*
438   MatStashScatterBegin_Private - Initiates the transfer of values to the
439   correct owners. This function goes through the stash, and check the
440   owners of each stashed value, and sends the values off to the owner
441   processors.
442 
443   Input Parameters:
444   stash  - the stash
445   owners - an array of size 'no-of-procs' which gives the ownership range
446            for each node.
447 
448   Notes: The 'owners' array in the cased of the blocked-stash has the
449   ranges specified blocked global indices, and for the regular stash in
450   the proper global indices.
451 */
452 #undef __FUNCT__
453 #define __FUNCT__ "MatStashScatterBegin_Private"
454 PetscErrorCode MatStashScatterBegin_Private(MatStash *stash,PetscInt *owners)
455 {
456   PetscInt       *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
457   PetscInt       size=stash->size,nsends;
458   PetscErrorCode ierr;
459   PetscInt       count,*sindices,**rindices,i,j,idx,lastidx,l;
460   MatScalar      **rvalues,*svalues;
461   MPI_Comm       comm = stash->comm;
462   MPI_Request    *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
463   PetscMPIInt    *nprocs,*nlengths,nreceives;
464   PetscInt       *sp_idx,*sp_idy;
465   MatScalar      *sp_val;
466   PetscMatStashSpace space,space_next;
467 
468   PetscFunctionBegin;
469   bs2 = stash->bs*stash->bs;
470 
471   /*  first count number of contributors to each processor */
472   ierr  = PetscMalloc(2*size*sizeof(PetscMPIInt),&nprocs);CHKERRQ(ierr);
473   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscMPIInt));CHKERRQ(ierr);
474   ierr  = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);
475 
476   nlengths = nprocs+size;
477   i = j    = 0;
478   lastidx  = -1;
479   space    = stash->space_head;
480   while (space != PETSC_NULL){
481     space_next = space->next;
482     sp_idx     = space->idx;
483     for (l=0; l<space->local_used; l++){
484       /* if indices are NOT locally sorted, need to start search at the beginning */
485       if (lastidx > (idx = sp_idx[l])) j = 0;
486       lastidx = idx;
487       for (; j<size; j++) {
488         if (idx >= owners[j] && idx < owners[j+1]) {
489           nlengths[j]++; owner[i] = j; break;
490         }
491       }
492       i++;
493     }
494     space = space_next;
495   }
496   /* Now check what procs get messages - and compute nsends. */
497   for (i=0, nsends=0 ; i<size; i++) {
498     if (nlengths[i]) { nprocs[i] = 1; nsends ++;}
499   }
500 
501   { int  *onodes,*olengths;
502   /* Determine the number of messages to expect, their lengths, from from-ids */
503   ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr);
504   ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
505   /* since clubbing row,col - lengths are multiplied by 2 */
506   for (i=0; i<nreceives; i++) olengths[i] *=2;
507   ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
508   /* values are size 'bs2' lengths (and remove earlier factor 2 */
509   for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
510   ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
511   ierr = PetscFree(onodes);CHKERRQ(ierr);
512   ierr = PetscFree(olengths);CHKERRQ(ierr);
513   }
514 
515   /* do sends:
516       1) starts[i] gives the starting index in svalues for stuff going to
517          the ith processor
518   */
519   ierr     = PetscMalloc((stash->n+1)*(bs2*sizeof(MatScalar)+2*sizeof(PetscInt)),&svalues);CHKERRQ(ierr);
520   sindices = (PetscInt*)(svalues + bs2*stash->n);
521   ierr     = PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
522   ierr     = PetscMalloc(2*size*sizeof(PetscInt),&startv);CHKERRQ(ierr);
523   starti   = startv + size;
524   /* use 2 sends the first with all_a, the next with all_i and all_j */
525   startv[0]  = 0; starti[0] = 0;
526   for (i=1; i<size; i++) {
527     startv[i] = startv[i-1] + nlengths[i-1];
528     starti[i] = starti[i-1] + nlengths[i-1]*2;
529   }
530 
531   i     = 0;
532   space = stash->space_head;
533   while (space != PETSC_NULL){
534     space_next = space->next;
535     sp_idx = space->idx;
536     sp_idy = space->idy;
537     sp_val = space->val;
538     for (l=0; l<space->local_used; l++){
539       j = owner[i];
540       if (bs2 == 1) {
541         svalues[startv[j]] = sp_val[l];
542       } else {
543         PetscInt  k;
544         MatScalar *buf1,*buf2;
545         buf1 = svalues+bs2*startv[j];
546         buf2 = space->val + bs2*i;
547         for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
548       }
549       sindices[starti[j]]             = sp_idx[l];
550       sindices[starti[j]+nlengths[j]] = sp_idy[l];
551       startv[j]++;
552       starti[j]++;
553       i++;
554     }
555     space = space_next;
556   }
557   startv[0] = 0;
558   for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];}
559 
560   for (i=0,count=0; i<size; i++) {
561     if (nprocs[i]) {
562       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
563       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_MATSCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
564     }
565   }
566 #if defined(PETSC_USE_INFO)
567   ierr = PetscInfo1(0,"No of messages: %d \n",nsends);CHKERRQ(ierr);
568   for (i=0; i<size; i++) {
569     if (nprocs[i]) {
570       ierr = PetscInfo2(0,"Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(MatScalar)+2*sizeof(PetscInt));CHKERRQ(ierr);
571     }
572   }
573 #endif
574   ierr = PetscFree(owner);CHKERRQ(ierr);
575   ierr = PetscFree(startv);CHKERRQ(ierr);
576   /* This memory is reused in scatter end  for a different purpose*/
577   for (i=0; i<2*size; i++) nprocs[i] = -1;
578   stash->nprocs = nprocs;
579 
580   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
581   ierr  = PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
582 
583   for (i=0; i<nreceives; i++) {
584     recv_waits[2*i]   = recv_waits1[i];
585     recv_waits[2*i+1] = recv_waits2[i];
586   }
587   stash->recv_waits = recv_waits;
588   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
589   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
590 
591   stash->svalues    = svalues;    stash->rvalues     = rvalues;
592   stash->rindices   = rindices;   stash->send_waits  = send_waits;
593   stash->nsends     = nsends;     stash->nrecvs      = nreceives;
594   PetscFunctionReturn(0);
595 }
596 
597 /*
598    MatStashScatterGetMesg_Private - This function waits on the receives posted
599    in the function MatStashScatterBegin_Private() and returns one message at
600    a time to the calling function. If no messages are left, it indicates this
601    by setting flg = 0, else it sets flg = 1.
602 
603    Input Parameters:
604    stash - the stash
605 
606    Output Parameters:
607    nvals - the number of entries in the current message.
608    rows  - an array of row indices (or blocked indices) corresponding to the values
609    cols  - an array of columnindices (or blocked indices) corresponding to the values
610    vals  - the values
611    flg   - 0 indicates no more message left, and the current call has no values associated.
612            1 indicates that the current call successfully received a message, and the
613              other output parameters nvals,rows,cols,vals are set appropriately.
614 */
615 #undef __FUNCT__
616 #define __FUNCT__ "MatStashScatterGetMesg_Private"
617 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,MatScalar **vals,PetscInt *flg)
618 {
619   PetscErrorCode ierr;
620   PetscMPIInt    i,*flg_v,i1,i2;
621   PetscInt       bs2;
622   MPI_Status     recv_status;
623   PetscTruth     match_found = PETSC_FALSE;
624 
625   PetscFunctionBegin;
626 
627   *flg = 0; /* When a message is discovered this is reset to 1 */
628   /* Return if no more messages to process */
629   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
630 
631   flg_v = stash->nprocs;
632   bs2   = stash->bs*stash->bs;
633   /* If a matching pair of receieves are found, process them, and return the data to
634      the calling function. Until then keep receiving messages */
635   while (!match_found) {
636     ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
637     /* Now pack the received message into a structure which is useable by others */
638     if (i % 2) {
639       ierr = MPI_Get_count(&recv_status,MPIU_MATSCALAR,nvals);CHKERRQ(ierr);
640       flg_v[2*recv_status.MPI_SOURCE] = i/2;
641       *nvals = *nvals/bs2;
642     } else {
643       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
644       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
645       *nvals = *nvals/2; /* This message has both row indices and col indices */
646     }
647 
648     /* Check if we have both the messages from this proc */
649     i1 = flg_v[2*recv_status.MPI_SOURCE];
650     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
651     if (i1 != -1 && i2 != -1) {
652       *rows       = stash->rindices[i2];
653       *cols       = *rows + *nvals;
654       *vals       = stash->rvalues[i1];
655       *flg        = 1;
656       stash->nprocessed ++;
657       match_found = PETSC_TRUE;
658     }
659   }
660   PetscFunctionReturn(0);
661 }
662