xref: /petsc/src/mat/utils/matstash.c (revision 563fb8714a5d78400bbe9510b73a96321c613bbf)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/matimpl.h"
4 
5 /*
6        The input to the stash is ALWAYS in MatScalar precision, and the
7     internal storage and output is also in MatScalar.
8 */
9 #define DEFAULT_STASH_SIZE   10000
10 
11 /*
12   MatStashCreate_Private - Creates a stash,currently used for all the parallel
13   matrix implementations. The stash is where elements of a matrix destined
14   to be stored on other processors are kept until matrix assembly is done.
15 
16   This is a simple minded stash. Simply adds entries to end of stash.
17 
18   Input Parameters:
19   comm - communicator, required for scatters.
20   bs   - stash block size. used when stashing blocks of values
21 
22   Output Parameters:
23   stash    - the newly created stash
24 */
25 #undef __FUNCT__
26 #define __FUNCT__ "MatStashCreate_Private"
27 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
28 {
29   PetscErrorCode ierr;
30   PetscInt       max,*opt,nopt;
31   PetscTruth     flg;
32 
33   PetscFunctionBegin;
34   /* Require 2 tags,get the second using PetscCommGetNewTag() */
35   stash->comm = comm;
36   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
37   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
38   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
39   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
40 
41   nopt = stash->size;
42   ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr);
43   ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
44   if (flg) {
45     if (nopt == 1)                max = opt[0];
46     else if (nopt == stash->size) max = opt[stash->rank];
47     else if (stash->rank < nopt)  max = opt[stash->rank];
48     else                          max = 0; /* Use default */
49     stash->umax = max;
50   } else {
51     stash->umax = 0;
52   }
53   ierr = PetscFree(opt);CHKERRQ(ierr);
54   if (bs <= 0) bs = 1;
55 
56   stash->bs       = bs;
57   stash->nmax     = 0;
58   stash->oldnmax  = 0;
59   stash->n        = 0;
60   stash->reallocs = -1;
61   stash->idx      = 0;
62   stash->idy      = 0;
63   stash->array    = 0;
64 
65   stash->send_waits  = 0;
66   stash->recv_waits  = 0;
67   stash->send_status = 0;
68   stash->nsends      = 0;
69   stash->nrecvs      = 0;
70   stash->svalues     = 0;
71   stash->rvalues     = 0;
72   stash->rindices    = 0;
73   stash->rmax        = 0;
74   stash->nprocs      = 0;
75   stash->nprocessed  = 0;
76   PetscFunctionReturn(0);
77 }
78 
79 /*
80    MatStashDestroy_Private - Destroy the stash
81 */
82 #undef __FUNCT__
83 #define __FUNCT__ "MatStashDestroy_Private"
84 PetscErrorCode MatStashDestroy_Private(MatStash *stash)
85 {
86   PetscErrorCode ierr;
87 
88   PetscFunctionBegin;
89   if (stash->array) {
90     ierr = PetscFree(stash->array);CHKERRQ(ierr);
91     stash->array = 0;
92   }
93   PetscFunctionReturn(0);
94 }
95 
96 /*
97    MatStashScatterEnd_Private - This is called as the fial stage of
98    scatter. The final stages of messagepassing is done here, and
99    all the memory used for messagepassing is cleanedu up. This
100    routine also resets the stash, and deallocates the memory used
101    for the stash. It also keeps track of the current memory usage
102    so that the same value can be used the next time through.
103 */
104 #undef __FUNCT__
105 #define __FUNCT__ "MatStashScatterEnd_Private"
106 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
107 {
108   PetscErrorCode ierr;
109   int         nsends=stash->nsends,bs2,oldnmax;
110   MPI_Status  *send_status;
111 
112   PetscFunctionBegin;
113   /* wait on sends */
114   if (nsends) {
115     ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
116     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
117     ierr = PetscFree(send_status);CHKERRQ(ierr);
118   }
119 
120   /* Now update nmaxold to be app 10% more than max n used, this way the
121      wastage of space is reduced the next time this stash is used.
122      Also update the oldmax, only if it increases */
123   if (stash->n) {
124     bs2      = stash->bs*stash->bs;
125     oldnmax  = ((int)(stash->n * 1.1) + 5)*bs2;
126     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
127   }
128 
129   stash->nmax       = 0;
130   stash->n          = 0;
131   stash->reallocs   = -1;
132   stash->rmax       = 0;
133   stash->nprocessed = 0;
134 
135   if (stash->array) {
136     ierr         = PetscFree(stash->array);CHKERRQ(ierr);
137     stash->array = 0;
138     stash->idx   = 0;
139     stash->idy   = 0;
140   }
141   if (stash->send_waits) {
142     ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
143     stash->send_waits = 0;
144   }
145   if (stash->recv_waits) {
146     ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
147     stash->recv_waits = 0;
148   }
149   if (stash->svalues) {
150     ierr = PetscFree(stash->svalues);CHKERRQ(ierr);
151     stash->svalues = 0;
152   }
153   if (stash->rvalues) {
154     ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
155     stash->rvalues = 0;
156   }
157   if (stash->rindices) {
158     ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
159     stash->rindices = 0;
160   }
161   if (stash->nprocs) {
162     ierr = PetscFree(stash->nprocs);CHKERRQ(ierr);
163     stash->nprocs = 0;
164   }
165 
166   PetscFunctionReturn(0);
167 }
168 
169 /*
170    MatStashGetInfo_Private - Gets the relavant statistics of the stash
171 
172    Input Parameters:
173    stash    - the stash
174    nstash   - the size of the stash. Indicates the number of values stored.
175    reallocs - the number of additional mallocs incurred.
176 
177 */
178 #undef __FUNCT__
179 #define __FUNCT__ "MatStashGetInfo_Private"
180 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
181 {
182   PetscInt bs2 = stash->bs*stash->bs;
183 
184   PetscFunctionBegin;
185   if (nstash) *nstash   = stash->n*bs2;
186   if (reallocs) {
187     if (stash->reallocs < 0) *reallocs = 0;
188     else                     *reallocs = stash->reallocs;
189   }
190   PetscFunctionReturn(0);
191 }
192 
193 
194 /*
195    MatStashSetInitialSize_Private - Sets the initial size of the stash
196 
197    Input Parameters:
198    stash  - the stash
199    max    - the value that is used as the max size of the stash.
200             this value is used while allocating memory.
201 */
202 #undef __FUNCT__
203 #define __FUNCT__ "MatStashSetInitialSize_Private"
204 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
205 {
206   PetscFunctionBegin;
207   stash->umax = max;
208   PetscFunctionReturn(0);
209 }
210 
211 /* MatStashExpand_Private - Expand the stash. This function is called
212    when the space in the stash is not sufficient to add the new values
213    being inserted into the stash.
214 
215    Input Parameters:
216    stash - the stash
217    incr  - the minimum increase requested
218 
219    Notes:
220    This routine doubles the currently used memory.
221  */
222 #undef __FUNCT__
223 #define __FUNCT__ "MatStashExpand_Private"
224 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
225 {
226   PetscErrorCode ierr;
227   PetscInt       *n_idx,*n_idy,newnmax,bs2;
228   MatScalar *n_array;
229 
230   PetscFunctionBegin;
231   /* allocate a larger stash */
232   bs2     = stash->bs*stash->bs;
233   if (!stash->oldnmax && !stash->nmax) { /* new stash */
234     if (stash->umax)                  newnmax = stash->umax/bs2;
235     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
236   } else if (!stash->nmax) { /* resuing stash */
237     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
238     else                              newnmax = stash->oldnmax/bs2;
239   } else                              newnmax = stash->nmax*2;
240   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
241 
242   ierr  = PetscMalloc((newnmax)*(2*sizeof(PetscInt)+bs2*sizeof(MatScalar)),&n_array);CHKERRQ(ierr);
243   n_idx = (PetscInt*)(n_array + bs2*newnmax);
244   n_idy = (PetscInt*)(n_idx + newnmax);
245   ierr  = PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(MatScalar));CHKERRQ(ierr);
246   ierr  = PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(PetscInt));CHKERRQ(ierr);
247   ierr  = PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(PetscInt));CHKERRQ(ierr);
248   if (stash->array) {ierr = PetscFree(stash->array);CHKERRQ(ierr);}
249   stash->array   = n_array;
250   stash->idx     = n_idx;
251   stash->idy     = n_idy;
252   stash->nmax    = newnmax;
253   stash->reallocs++;
254   PetscFunctionReturn(0);
255 }
256 /*
257   MatStashValuesRow_Private - inserts values into the stash. This function
258   expects the values to be roworiented. Multiple columns belong to the same row
259   can be inserted with a single call to this function.
260 
261   Input Parameters:
262   stash  - the stash
263   row    - the global row correspoiding to the values
264   n      - the number of elements inserted. All elements belong to the above row.
265   idxn   - the global column indices corresponding to each of the values.
266   values - the values inserted
267 */
268 #undef __FUNCT__
269 #define __FUNCT__ "MatStashValuesRow_Private"
270 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[])
271 {
272   PetscErrorCode ierr;
273   PetscInt i;
274 
275   PetscFunctionBegin;
276   /* Check and see if we have sufficient memory */
277   if ((stash->n + n) > stash->nmax) {
278     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
279   }
280   for (i=0; i<n; i++) {
281     stash->idx[stash->n]   = row;
282     stash->idy[stash->n]   = idxn[i];
283     stash->array[stash->n] = values[i];
284     stash->n++;
285   }
286   PetscFunctionReturn(0);
287 }
288 /*
289   MatStashValuesCol_Private - inserts values into the stash. This function
290   expects the values to be columnoriented. Multiple columns belong to the same row
291   can be inserted with a single call to this function.
292 
293   Input Parameters:
294   stash   - the stash
295   row     - the global row correspoiding to the values
296   n       - the number of elements inserted. All elements belong to the above row.
297   idxn    - the global column indices corresponding to each of the values.
298   values  - the values inserted
299   stepval - the consecutive values are sepated by a distance of stepval.
300             this happens because the input is columnoriented.
301 */
302 #undef __FUNCT__
303 #define __FUNCT__ "MatStashValuesCol_Private"
304 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt stepval)
305 {
306   PetscErrorCode ierr;
307   PetscInt i;
308 
309   PetscFunctionBegin;
310   /* Check and see if we have sufficient memory */
311   if ((stash->n + n) > stash->nmax) {
312     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
313   }
314   for (i=0; i<n; i++) {
315     stash->idx[stash->n]   = row;
316     stash->idy[stash->n]   = idxn[i];
317     stash->array[stash->n] = values[i*stepval];
318     stash->n++;
319   }
320   PetscFunctionReturn(0);
321 }
322 
323 /*
324   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
325   This function expects the values to be roworiented. Multiple columns belong
326   to the same block-row can be inserted with a single call to this function.
327   This function extracts the sub-block of values based on the dimensions of
328   the original input block, and the row,col values corresponding to the blocks.
329 
330   Input Parameters:
331   stash  - the stash
332   row    - the global block-row correspoiding to the values
333   n      - the number of elements inserted. All elements belong to the above row.
334   idxn   - the global block-column indices corresponding to each of the blocks of
335            values. Each block is of size bs*bs.
336   values - the values inserted
337   rmax   - the number of block-rows in the original block.
338   cmax   - the number of block-columsn on the original block.
339   idx    - the index of the current block-row in the original block.
340 */
341 #undef __FUNCT__
342 #define __FUNCT__ "MatStashValuesRowBlocked_Private"
343 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
344 {
345   PetscErrorCode ierr;
346   PetscInt i,j,k,bs2,bs=stash->bs;
347   const MatScalar *vals;
348   MatScalar       *array;
349 
350   PetscFunctionBegin;
351   bs2 = bs*bs;
352   if ((stash->n+n) > stash->nmax) {
353     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
354   }
355   for (i=0; i<n; i++) {
356     stash->idx[stash->n]   = row;
357     stash->idy[stash->n] = idxn[i];
358     /* Now copy over the block of values. Store the values column oriented.
359        This enables inserting multiple blocks belonging to a row with a single
360        funtion call */
361     array = stash->array + bs2*stash->n;
362     vals  = values + idx*bs2*n + bs*i;
363     for (j=0; j<bs; j++) {
364       for (k=0; k<bs; k++) {array[k*bs] = vals[k];}
365       array += 1;
366       vals  += cmax*bs;
367     }
368     stash->n++;
369   }
370   PetscFunctionReturn(0);
371 }
372 
373 /*
374   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
375   This function expects the values to be roworiented. Multiple columns belong
376   to the same block-row can be inserted with a single call to this function.
377   This function extracts the sub-block of values based on the dimensions of
378   the original input block, and the row,col values corresponding to the blocks.
379 
380   Input Parameters:
381   stash  - the stash
382   row    - the global block-row correspoiding to the values
383   n      - the number of elements inserted. All elements belong to the above row.
384   idxn   - the global block-column indices corresponding to each of the blocks of
385            values. Each block is of size bs*bs.
386   values - the values inserted
387   rmax   - the number of block-rows in the original block.
388   cmax   - the number of block-columsn on the original block.
389   idx    - the index of the current block-row in the original block.
390 */
391 #undef __FUNCT__
392 #define __FUNCT__ "MatStashValuesColBlocked_Private"
393 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
394 {
395   PetscErrorCode ierr;
396   PetscInt i,j,k,bs2,bs=stash->bs;
397   const MatScalar *vals;
398   MatScalar       *array;
399 
400   PetscFunctionBegin;
401   bs2 = bs*bs;
402   if ((stash->n+n) > stash->nmax) {
403     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
404   }
405   for (i=0; i<n; i++) {
406     stash->idx[stash->n]   = row;
407     stash->idy[stash->n] = idxn[i];
408     /* Now copy over the block of values. Store the values column oriented.
409      This enables inserting multiple blocks belonging to a row with a single
410      funtion call */
411     array = stash->array + bs2*stash->n;
412     vals  = values + idx*bs + bs2*rmax*i;
413     for (j=0; j<bs; j++) {
414       for (k=0; k<bs; k++) {array[k] = vals[k];}
415       array += bs;
416       vals  += rmax*bs;
417     }
418     stash->n++;
419   }
420   PetscFunctionReturn(0);
421 }
422 /*
423   MatStashScatterBegin_Private - Initiates the transfer of values to the
424   correct owners. This function goes through the stash, and check the
425   owners of each stashed value, and sends the values off to the owner
426   processors.
427 
428   Input Parameters:
429   stash  - the stash
430   owners - an array of size 'no-of-procs' which gives the ownership range
431            for each node.
432 
433   Notes: The 'owners' array in the cased of the blocked-stash has the
434   ranges specified blocked global indices, and for the regular stash in
435   the proper global indices.
436 */
437 #undef __FUNCT__
438 #define __FUNCT__ "MatStashScatterBegin_Private"
439 PetscErrorCode MatStashScatterBegin_Private(MatStash *stash,PetscInt *owners)
440 {
441   PetscInt       *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
442   PetscInt       size=stash->size,*nprocs,*nlengths,nsends,nreceives;
443   PetscErrorCode ierr;
444   PetscInt       nmax,count,*sindices,**rindices,i,j,idx,lastidx;
445   MatScalar      **rvalues,*svalues;
446   MPI_Comm       comm = stash->comm;
447   MPI_Request    *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
448 
449   PetscFunctionBegin;
450 
451   bs2   = stash->bs*stash->bs;
452   /*  first count number of contributors to each processor */
453   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
454   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
455   ierr  = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);
456 
457   nlengths = nprocs+size;
458   j        = 0;
459   lastidx  = -1;
460   for (i=0; i<stash->n; i++) {
461     /* if indices are NOT locally sorted, need to start search at the beginning */
462     if (lastidx > (idx = stash->idx[i])) j = 0;
463     lastidx = idx;
464     for (; j<size; j++) {
465       if (idx >= owners[j] && idx < owners[j+1]) {
466         nlengths[j]++; owner[i] = j; break;
467       }
468     }
469   }
470   /* Now check what procs get messages - and compute nsends. */
471   for (i=0, nsends=0 ; i<size; i++) {
472     if (nlengths[i]) { nprocs[i] = 1; nsends ++;}
473   }
474 
475   { int  *onodes,*olengths;
476   /* Determine the number of messages to expect, their lengths, from from-ids */
477   ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr);
478   ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
479   /* since clubbing row,col - lengths are multiplied by 2 */
480   for (i=0; i<nreceives; i++) olengths[i] *=2;
481   ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
482   /* values are size 'bs2' lengths (and remove earlier factor 2 */
483   for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
484   ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
485   ierr = PetscFree(onodes);CHKERRQ(ierr);
486   ierr = PetscFree(olengths);CHKERRQ(ierr);
487   }
488 
489   /* do sends:
490       1) starts[i] gives the starting index in svalues for stuff going to
491          the ith processor
492   */
493   ierr     = PetscMalloc((stash->n+1)*(bs2*sizeof(MatScalar)+2*sizeof(PetscInt)),&svalues);CHKERRQ(ierr);
494   sindices = (PetscInt*)(svalues + bs2*stash->n);
495   ierr     = PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
496   ierr     = PetscMalloc(2*size*sizeof(PetscInt),&startv);CHKERRQ(ierr);
497   starti   = startv + size;
498   /* use 2 sends the first with all_a, the next with all_i and all_j */
499   startv[0]  = 0; starti[0] = 0;
500   for (i=1; i<size; i++) {
501     startv[i] = startv[i-1] + nlengths[i-1];
502     starti[i] = starti[i-1] + nlengths[i-1]*2;
503   }
504   for (i=0; i<stash->n; i++) {
505     j = owner[i];
506     if (bs2 == 1) {
507       svalues[startv[j]]              = stash->array[i];
508     } else {
509       PetscInt       k;
510       MatScalar *buf1,*buf2;
511       buf1 = svalues+bs2*startv[j];
512       buf2 = stash->array+bs2*i;
513       for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
514     }
515     sindices[starti[j]]               = stash->idx[i];
516     sindices[starti[j]+nlengths[j]]   = stash->idy[i];
517     startv[j]++;
518     starti[j]++;
519   }
520   startv[0] = 0;
521   for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];}
522   for (i=0,count=0; i<size; i++) {
523     if (nprocs[i]) {
524       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
525       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_MATSCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
526     }
527   }
528   ierr = PetscFree(owner);CHKERRQ(ierr);
529   ierr = PetscFree(startv);CHKERRQ(ierr);
530   /* This memory is reused in scatter end  for a different purpose*/
531   for (i=0; i<2*size; i++) nprocs[i] = -1;
532   stash->nprocs      = nprocs;
533 
534   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
535   ierr  = PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
536 
537   for (i=0; i<nreceives; i++) {
538     recv_waits[2*i]   = recv_waits1[i];
539     recv_waits[2*i+1] = recv_waits2[i];
540   }
541   stash->recv_waits = recv_waits;
542   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
543   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
544 
545   stash->svalues    = svalues;    stash->rvalues     = rvalues;
546   stash->rindices   = rindices;   stash->send_waits  = send_waits;
547   stash->nsends     = nsends;     stash->nrecvs      = nreceives;
548   stash->rmax       = nmax;
549   PetscFunctionReturn(0);
550 }
551 
552 /*
553    MatStashScatterGetMesg_Private - This function waits on the receives posted
554    in the function MatStashScatterBegin_Private() and returns one message at
555    a time to the calling function. If no messages are left, it indicates this
556    by setting flg = 0, else it sets flg = 1.
557 
558    Input Parameters:
559    stash - the stash
560 
561    Output Parameters:
562    nvals - the number of entries in the current message.
563    rows  - an array of row indices (or blocked indices) corresponding to the values
564    cols  - an array of columnindices (or blocked indices) corresponding to the values
565    vals  - the values
566    flg   - 0 indicates no more message left, and the current call has no values associated.
567            1 indicates that the current call successfully received a message, and the
568              other output parameters nvals,rows,cols,vals are set appropriately.
569 */
570 #undef __FUNCT__
571 #define __FUNCT__ "MatStashScatterGetMesg_Private"
572 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,MatScalar **vals,PetscInt *flg)
573 {
574   PetscErrorCode ierr;
575   PetscMPIInt    i;
576   PetscInt       *flg_v,i1,i2,bs2;
577   MPI_Status     recv_status;
578   PetscTruth     match_found = PETSC_FALSE;
579 
580   PetscFunctionBegin;
581 
582   *flg = 0; /* When a message is discovered this is reset to 1 */
583   /* Return if no more messages to process */
584   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
585 
586   flg_v = stash->nprocs;
587   bs2   = stash->bs*stash->bs;
588   /* If a matching pair of receieves are found, process them, and return the data to
589      the calling function. Until then keep receiving messages */
590   while (!match_found) {
591     ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
592     /* Now pack the received message into a structure which is useable by others */
593     if (i % 2) {
594       ierr = MPI_Get_count(&recv_status,MPIU_MATSCALAR,nvals);CHKERRQ(ierr);
595       flg_v[2*recv_status.MPI_SOURCE] = i/2;
596       *nvals = *nvals/bs2;
597     } else {
598       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
599       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
600       *nvals = *nvals/2; /* This message has both row indices and col indices */
601     }
602 
603     /* Check if we have both the messages from this proc */
604     i1 = flg_v[2*recv_status.MPI_SOURCE];
605     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
606     if (i1 != -1 && i2 != -1) {
607       *rows       = stash->rindices[i2];
608       *cols       = *rows + *nvals;
609       *vals       = stash->rvalues[i1];
610       *flg        = 1;
611       stash->nprocessed ++;
612       match_found = PETSC_TRUE;
613     }
614   }
615   PetscFunctionReturn(0);
616 }
617