xref: /petsc/src/mat/utils/matstash.c (revision ac2b2aa072733048646740f7267415a7b45ae633)
1 
2 #include <petsc-private/matimpl.h>
3 
4 #define DEFAULT_STASH_SIZE   10000
5 
6 static PetscErrorCode MatStashScatterBegin_Ref(Mat,MatStash*,PetscInt*);
7 static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
8 static PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
9 /*
10   MatStashCreate_Private - Creates a stash,currently used for all the parallel
11   matrix implementations. The stash is where elements of a matrix destined
12   to be stored on other processors are kept until matrix assembly is done.
13 
14   This is a simple minded stash. Simply adds entries to end of stash.
15 
16   Input Parameters:
17   comm - communicator, required for scatters.
18   bs   - stash block size. used when stashing blocks of values
19 
20   Output Parameters:
21   stash    - the newly created stash
22 */
23 #undef __FUNCT__
24 #define __FUNCT__ "MatStashCreate_Private"
25 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
26 {
27   PetscErrorCode ierr;
28   PetscInt       max,*opt,nopt,i;
29   PetscBool      flg;
30 
31   PetscFunctionBegin;
32   /* Require 2 tags,get the second using PetscCommGetNewTag() */
33   stash->comm = comm;
34 
35   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
36   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
37   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
38   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
39   ierr = PetscMalloc1(2*stash->size,&stash->flg_v);CHKERRQ(ierr);
40   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
41 
42 
43   nopt = stash->size;
44   ierr = PetscMalloc1(nopt,&opt);CHKERRQ(ierr);
45   ierr = PetscOptionsGetIntArray(NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
46   if (flg) {
47     if (nopt == 1)                max = opt[0];
48     else if (nopt == stash->size) max = opt[stash->rank];
49     else if (stash->rank < nopt)  max = opt[stash->rank];
50     else                          max = 0; /* Use default */
51     stash->umax = max;
52   } else {
53     stash->umax = 0;
54   }
55   ierr = PetscFree(opt);CHKERRQ(ierr);
56   if (bs <= 0) bs = 1;
57 
58   stash->bs         = bs;
59   stash->nmax       = 0;
60   stash->oldnmax    = 0;
61   stash->n          = 0;
62   stash->reallocs   = -1;
63   stash->space_head = 0;
64   stash->space      = 0;
65 
66   stash->send_waits  = 0;
67   stash->recv_waits  = 0;
68   stash->send_status = 0;
69   stash->nsends      = 0;
70   stash->nrecvs      = 0;
71   stash->svalues     = 0;
72   stash->rvalues     = 0;
73   stash->rindices    = 0;
74   stash->nprocessed  = 0;
75   stash->reproduce   = PETSC_FALSE;
76 
77   ierr = PetscOptionsGetBool(NULL,"-matstash_reproduce",&stash->reproduce,NULL);CHKERRQ(ierr);
78   ierr = PetscOptionsGetBool(NULL,"-matstash_bts",&flg,NULL);CHKERRQ(ierr);
79   if (flg) {
80     SETERRQ(comm,PETSC_ERR_SUP,"Not implemented yet");
81   } else {
82     stash->ScatterBegin   = MatStashScatterBegin_Ref;
83     stash->ScatterGetMesg = MatStashScatterGetMesg_Ref;
84     stash->ScatterEnd     = MatStashScatterEnd_Ref;
85     stash->ScatterDestroy = NULL;
86   }
87   PetscFunctionReturn(0);
88 }
89 
90 /*
91    MatStashDestroy_Private - Destroy the stash
92 */
93 #undef __FUNCT__
94 #define __FUNCT__ "MatStashDestroy_Private"
95 PetscErrorCode MatStashDestroy_Private(MatStash *stash)
96 {
97   PetscErrorCode ierr;
98 
99   PetscFunctionBegin;
100   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
101   if (stash->ScatterDestroy) {ierr = (*stash->ScatterDestroy)(stash);CHKERRQ(ierr);}
102 
103   stash->space = 0;
104 
105   ierr = PetscFree(stash->flg_v);CHKERRQ(ierr);
106   PetscFunctionReturn(0);
107 }
108 
109 /*
110    MatStashScatterEnd_Private - This is called as the final stage of
111    scatter. The final stages of message passing is done here, and
112    all the memory used for message passing is cleaned up. This
113    routine also resets the stash, and deallocates the memory used
114    for the stash. It also keeps track of the current memory usage
115    so that the same value can be used the next time through.
116 */
117 #undef __FUNCT__
118 #define __FUNCT__ "MatStashScatterEnd_Private"
119 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
120 {
121   PetscErrorCode ierr;
122 
123   PetscFunctionBegin;
124   ierr = (*stash->ScatterEnd)(stash);CHKERRQ(ierr);
125   PetscFunctionReturn(0);
126 }
127 
128 #undef __FUNCT__
129 #define __FUNCT__ "MatStashScatterEnd_Ref"
130 static PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash)
131 {
132   PetscErrorCode ierr;
133   PetscInt       nsends=stash->nsends,bs2,oldnmax,i;
134   MPI_Status     *send_status;
135 
136   PetscFunctionBegin;
137   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
138   /* wait on sends */
139   if (nsends) {
140     ierr = PetscMalloc1(2*nsends,&send_status);CHKERRQ(ierr);
141     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
142     ierr = PetscFree(send_status);CHKERRQ(ierr);
143   }
144 
145   /* Now update nmaxold to be app 10% more than max n used, this way the
146      wastage of space is reduced the next time this stash is used.
147      Also update the oldmax, only if it increases */
148   if (stash->n) {
149     bs2     = stash->bs*stash->bs;
150     oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
151     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
152   }
153 
154   stash->nmax       = 0;
155   stash->n          = 0;
156   stash->reallocs   = -1;
157   stash->nprocessed = 0;
158 
159   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
160 
161   stash->space = 0;
162 
163   ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
164   ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
165   ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr);
166   ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr);
167   ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
168   ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr);
169   ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
170   PetscFunctionReturn(0);
171 }
172 
173 /*
174    MatStashGetInfo_Private - Gets the relavant statistics of the stash
175 
176    Input Parameters:
177    stash    - the stash
178    nstash   - the size of the stash. Indicates the number of values stored.
179    reallocs - the number of additional mallocs incurred.
180 
181 */
182 #undef __FUNCT__
183 #define __FUNCT__ "MatStashGetInfo_Private"
184 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
185 {
186   PetscInt bs2 = stash->bs*stash->bs;
187 
188   PetscFunctionBegin;
189   if (nstash) *nstash = stash->n*bs2;
190   if (reallocs) {
191     if (stash->reallocs < 0) *reallocs = 0;
192     else                     *reallocs = stash->reallocs;
193   }
194   PetscFunctionReturn(0);
195 }
196 
197 /*
198    MatStashSetInitialSize_Private - Sets the initial size of the stash
199 
200    Input Parameters:
201    stash  - the stash
202    max    - the value that is used as the max size of the stash.
203             this value is used while allocating memory.
204 */
205 #undef __FUNCT__
206 #define __FUNCT__ "MatStashSetInitialSize_Private"
207 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
208 {
209   PetscFunctionBegin;
210   stash->umax = max;
211   PetscFunctionReturn(0);
212 }
213 
214 /* MatStashExpand_Private - Expand the stash. This function is called
215    when the space in the stash is not sufficient to add the new values
216    being inserted into the stash.
217 
218    Input Parameters:
219    stash - the stash
220    incr  - the minimum increase requested
221 
222    Notes:
223    This routine doubles the currently used memory.
224  */
225 #undef __FUNCT__
226 #define __FUNCT__ "MatStashExpand_Private"
227 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
228 {
229   PetscErrorCode ierr;
230   PetscInt       newnmax,bs2= stash->bs*stash->bs;
231 
232   PetscFunctionBegin;
233   /* allocate a larger stash */
234   if (!stash->oldnmax && !stash->nmax) { /* new stash */
235     if (stash->umax)                  newnmax = stash->umax/bs2;
236     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
237   } else if (!stash->nmax) { /* resuing stash */
238     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
239     else                              newnmax = stash->oldnmax/bs2;
240   } else                              newnmax = stash->nmax*2;
241   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
242 
243   /* Get a MatStashSpace and attach it to stash */
244   ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr);
245   if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
246     stash->space_head = stash->space;
247   }
248 
249   stash->reallocs++;
250   stash->nmax = newnmax;
251   PetscFunctionReturn(0);
252 }
253 /*
254   MatStashValuesRow_Private - inserts values into the stash. This function
255   expects the values to be roworiented. Multiple columns belong to the same row
256   can be inserted with a single call to this function.
257 
258   Input Parameters:
259   stash  - the stash
260   row    - the global row correspoiding to the values
261   n      - the number of elements inserted. All elements belong to the above row.
262   idxn   - the global column indices corresponding to each of the values.
263   values - the values inserted
264 */
265 #undef __FUNCT__
266 #define __FUNCT__ "MatStashValuesRow_Private"
267 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries)
268 {
269   PetscErrorCode     ierr;
270   PetscInt           i,k,cnt = 0;
271   PetscMatStashSpace space=stash->space;
272 
273   PetscFunctionBegin;
274   /* Check and see if we have sufficient memory */
275   if (!space || space->local_remaining < n) {
276     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
277   }
278   space = stash->space;
279   k     = space->local_used;
280   for (i=0; i<n; i++) {
281     if (ignorezeroentries && (values[i] == 0.0)) continue;
282     space->idx[k] = row;
283     space->idy[k] = idxn[i];
284     space->val[k] = values[i];
285     k++;
286     cnt++;
287   }
288   stash->n               += cnt;
289   space->local_used      += cnt;
290   space->local_remaining -= cnt;
291   PetscFunctionReturn(0);
292 }
293 
294 /*
295   MatStashValuesCol_Private - inserts values into the stash. This function
296   expects the values to be columnoriented. Multiple columns belong to the same row
297   can be inserted with a single call to this function.
298 
299   Input Parameters:
300   stash   - the stash
301   row     - the global row correspoiding to the values
302   n       - the number of elements inserted. All elements belong to the above row.
303   idxn    - the global column indices corresponding to each of the values.
304   values  - the values inserted
305   stepval - the consecutive values are sepated by a distance of stepval.
306             this happens because the input is columnoriented.
307 */
308 #undef __FUNCT__
309 #define __FUNCT__ "MatStashValuesCol_Private"
310 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries)
311 {
312   PetscErrorCode     ierr;
313   PetscInt           i,k,cnt = 0;
314   PetscMatStashSpace space=stash->space;
315 
316   PetscFunctionBegin;
317   /* Check and see if we have sufficient memory */
318   if (!space || space->local_remaining < n) {
319     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
320   }
321   space = stash->space;
322   k     = space->local_used;
323   for (i=0; i<n; i++) {
324     if (ignorezeroentries && (values[i*stepval] == 0.0)) continue;
325     space->idx[k] = row;
326     space->idy[k] = idxn[i];
327     space->val[k] = values[i*stepval];
328     k++;
329     cnt++;
330   }
331   stash->n               += cnt;
332   space->local_used      += cnt;
333   space->local_remaining -= cnt;
334   PetscFunctionReturn(0);
335 }
336 
337 /*
338   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
339   This function expects the values to be roworiented. Multiple columns belong
340   to the same block-row can be inserted with a single call to this function.
341   This function extracts the sub-block of values based on the dimensions of
342   the original input block, and the row,col values corresponding to the blocks.
343 
344   Input Parameters:
345   stash  - the stash
346   row    - the global block-row correspoiding to the values
347   n      - the number of elements inserted. All elements belong to the above row.
348   idxn   - the global block-column indices corresponding to each of the blocks of
349            values. Each block is of size bs*bs.
350   values - the values inserted
351   rmax   - the number of block-rows in the original block.
352   cmax   - the number of block-columsn on the original block.
353   idx    - the index of the current block-row in the original block.
354 */
355 #undef __FUNCT__
356 #define __FUNCT__ "MatStashValuesRowBlocked_Private"
357 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
358 {
359   PetscErrorCode     ierr;
360   PetscInt           i,j,k,bs2,bs=stash->bs,l;
361   const PetscScalar  *vals;
362   PetscScalar        *array;
363   PetscMatStashSpace space=stash->space;
364 
365   PetscFunctionBegin;
366   if (!space || space->local_remaining < n) {
367     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
368   }
369   space = stash->space;
370   l     = space->local_used;
371   bs2   = bs*bs;
372   for (i=0; i<n; i++) {
373     space->idx[l] = row;
374     space->idy[l] = idxn[i];
375     /* Now copy over the block of values. Store the values column oriented.
376        This enables inserting multiple blocks belonging to a row with a single
377        funtion call */
378     array = space->val + bs2*l;
379     vals  = values + idx*bs2*n + bs*i;
380     for (j=0; j<bs; j++) {
381       for (k=0; k<bs; k++) array[k*bs] = vals[k];
382       array++;
383       vals += cmax*bs;
384     }
385     l++;
386   }
387   stash->n               += n;
388   space->local_used      += n;
389   space->local_remaining -= n;
390   PetscFunctionReturn(0);
391 }
392 
393 /*
394   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
395   This function expects the values to be roworiented. Multiple columns belong
396   to the same block-row can be inserted with a single call to this function.
397   This function extracts the sub-block of values based on the dimensions of
398   the original input block, and the row,col values corresponding to the blocks.
399 
400   Input Parameters:
401   stash  - the stash
402   row    - the global block-row correspoiding to the values
403   n      - the number of elements inserted. All elements belong to the above row.
404   idxn   - the global block-column indices corresponding to each of the blocks of
405            values. Each block is of size bs*bs.
406   values - the values inserted
407   rmax   - the number of block-rows in the original block.
408   cmax   - the number of block-columsn on the original block.
409   idx    - the index of the current block-row in the original block.
410 */
411 #undef __FUNCT__
412 #define __FUNCT__ "MatStashValuesColBlocked_Private"
413 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
414 {
415   PetscErrorCode     ierr;
416   PetscInt           i,j,k,bs2,bs=stash->bs,l;
417   const PetscScalar  *vals;
418   PetscScalar        *array;
419   PetscMatStashSpace space=stash->space;
420 
421   PetscFunctionBegin;
422   if (!space || space->local_remaining < n) {
423     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
424   }
425   space = stash->space;
426   l     = space->local_used;
427   bs2   = bs*bs;
428   for (i=0; i<n; i++) {
429     space->idx[l] = row;
430     space->idy[l] = idxn[i];
431     /* Now copy over the block of values. Store the values column oriented.
432      This enables inserting multiple blocks belonging to a row with a single
433      funtion call */
434     array = space->val + bs2*l;
435     vals  = values + idx*bs2*n + bs*i;
436     for (j=0; j<bs; j++) {
437       for (k=0; k<bs; k++) array[k] = vals[k];
438       array += bs;
439       vals  += rmax*bs;
440     }
441     l++;
442   }
443   stash->n               += n;
444   space->local_used      += n;
445   space->local_remaining -= n;
446   PetscFunctionReturn(0);
447 }
448 /*
449   MatStashScatterBegin_Private - Initiates the transfer of values to the
450   correct owners. This function goes through the stash, and check the
451   owners of each stashed value, and sends the values off to the owner
452   processors.
453 
454   Input Parameters:
455   stash  - the stash
456   owners - an array of size 'no-of-procs' which gives the ownership range
457            for each node.
458 
459   Notes: The 'owners' array in the cased of the blocked-stash has the
460   ranges specified blocked global indices, and for the regular stash in
461   the proper global indices.
462 */
463 #undef __FUNCT__
464 #define __FUNCT__ "MatStashScatterBegin_Private"
465 PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners)
466 {
467   PetscErrorCode ierr;
468 
469   PetscFunctionBegin;
470   ierr = (*stash->ScatterBegin)(mat,stash,owners);CHKERRQ(ierr);
471   PetscFunctionReturn(0);
472 }
473 
474 #undef __FUNCT__
475 #define __FUNCT__ "MatStashScatterBegin_Ref"
476 static PetscErrorCode MatStashScatterBegin_Ref(Mat mat,MatStash *stash,PetscInt *owners)
477 {
478   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
479   PetscInt           size=stash->size,nsends;
480   PetscErrorCode     ierr;
481   PetscInt           count,*sindices,**rindices,i,j,idx,lastidx,l;
482   PetscScalar        **rvalues,*svalues;
483   MPI_Comm           comm = stash->comm;
484   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
485   PetscMPIInt        *sizes,*nlengths,nreceives;
486   PetscInt           *sp_idx,*sp_idy;
487   PetscScalar        *sp_val;
488   PetscMatStashSpace space,space_next;
489 
490   PetscFunctionBegin;
491   bs2 = stash->bs*stash->bs;
492 
493   /*  first count number of contributors to each processor */
494   ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr);
495   ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr);
496   ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr);
497 
498   i       = j    = 0;
499   lastidx = -1;
500   space   = stash->space_head;
501   while (space != NULL) {
502     space_next = space->next;
503     sp_idx     = space->idx;
504     for (l=0; l<space->local_used; l++) {
505       /* if indices are NOT locally sorted, need to start search at the beginning */
506       if (lastidx > (idx = sp_idx[l])) j = 0;
507       lastidx = idx;
508       for (; j<size; j++) {
509         if (idx >= owners[j] && idx < owners[j+1]) {
510           nlengths[j]++; owner[i] = j; break;
511         }
512       }
513       i++;
514     }
515     space = space_next;
516   }
517   /* Now check what procs get messages - and compute nsends. */
518   for (i=0, nsends=0; i<size; i++) {
519     if (nlengths[i]) {
520       sizes[i] = 1; nsends++;
521     }
522   }
523 
524   {PetscMPIInt *onodes,*olengths;
525    /* Determine the number of messages to expect, their lengths, from from-ids */
526    ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr);
527    ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
528    /* since clubbing row,col - lengths are multiplied by 2 */
529    for (i=0; i<nreceives; i++) olengths[i] *=2;
530    ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
531    /* values are size 'bs2' lengths (and remove earlier factor 2 */
532    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
533    ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
534    ierr = PetscFree(onodes);CHKERRQ(ierr);
535    ierr = PetscFree(olengths);CHKERRQ(ierr);}
536 
537   /* do sends:
538       1) starts[i] gives the starting index in svalues for stuff going to
539          the ith processor
540   */
541   ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr);
542   ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr);
543   ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr);
544   /* use 2 sends the first with all_a, the next with all_i and all_j */
545   startv[0] = 0; starti[0] = 0;
546   for (i=1; i<size; i++) {
547     startv[i] = startv[i-1] + nlengths[i-1];
548     starti[i] = starti[i-1] + 2*nlengths[i-1];
549   }
550 
551   i     = 0;
552   space = stash->space_head;
553   while (space != NULL) {
554     space_next = space->next;
555     sp_idx     = space->idx;
556     sp_idy     = space->idy;
557     sp_val     = space->val;
558     for (l=0; l<space->local_used; l++) {
559       j = owner[i];
560       if (bs2 == 1) {
561         svalues[startv[j]] = sp_val[l];
562       } else {
563         PetscInt    k;
564         PetscScalar *buf1,*buf2;
565         buf1 = svalues+bs2*startv[j];
566         buf2 = space->val + bs2*l;
567         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
568       }
569       sindices[starti[j]]             = sp_idx[l];
570       sindices[starti[j]+nlengths[j]] = sp_idy[l];
571       startv[j]++;
572       starti[j]++;
573       i++;
574     }
575     space = space_next;
576   }
577   startv[0] = 0;
578   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
579 
580   for (i=0,count=0; i<size; i++) {
581     if (sizes[i]) {
582       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
583       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
584     }
585   }
586 #if defined(PETSC_USE_INFO)
587   ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr);
588   for (i=0; i<size; i++) {
589     if (sizes[i]) {
590       ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
591     }
592   }
593 #endif
594   ierr = PetscFree(nlengths);CHKERRQ(ierr);
595   ierr = PetscFree(owner);CHKERRQ(ierr);
596   ierr = PetscFree2(startv,starti);CHKERRQ(ierr);
597   ierr = PetscFree(sizes);CHKERRQ(ierr);
598 
599   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
600   ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr);
601 
602   for (i=0; i<nreceives; i++) {
603     recv_waits[2*i]   = recv_waits1[i];
604     recv_waits[2*i+1] = recv_waits2[i];
605   }
606   stash->recv_waits = recv_waits;
607 
608   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
609   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
610 
611   stash->svalues         = svalues;
612   stash->sindices        = sindices;
613   stash->rvalues         = rvalues;
614   stash->rindices        = rindices;
615   stash->send_waits      = send_waits;
616   stash->nsends          = nsends;
617   stash->nrecvs          = nreceives;
618   stash->reproduce_count = 0;
619   PetscFunctionReturn(0);
620 }
621 
622 /*
623    MatStashScatterGetMesg_Private - This function waits on the receives posted
624    in the function MatStashScatterBegin_Private() and returns one message at
625    a time to the calling function. If no messages are left, it indicates this
626    by setting flg = 0, else it sets flg = 1.
627 
628    Input Parameters:
629    stash - the stash
630 
631    Output Parameters:
632    nvals - the number of entries in the current message.
633    rows  - an array of row indices (or blocked indices) corresponding to the values
634    cols  - an array of columnindices (or blocked indices) corresponding to the values
635    vals  - the values
636    flg   - 0 indicates no more message left, and the current call has no values associated.
637            1 indicates that the current call successfully received a message, and the
638              other output parameters nvals,rows,cols,vals are set appropriately.
639 */
640 #undef __FUNCT__
641 #define __FUNCT__ "MatStashScatterGetMesg_Private"
642 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
643 {
644   PetscErrorCode ierr;
645 
646   PetscFunctionBegin;
647   ierr = (*stash->ScatterGetMesg)(stash,nvals,rows,cols,vals,flg);CHKERRQ(ierr);
648   PetscFunctionReturn(0);
649 }
650 
651 #undef __FUNCT__
652 #define __FUNCT__ "MatStashScatterGetMesg_Ref"
653 static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
654 {
655   PetscErrorCode ierr;
656   PetscMPIInt    i,*flg_v = stash->flg_v,i1,i2;
657   PetscInt       bs2;
658   MPI_Status     recv_status;
659   PetscBool      match_found = PETSC_FALSE;
660 
661   PetscFunctionBegin;
662   *flg = 0; /* When a message is discovered this is reset to 1 */
663   /* Return if no more messages to process */
664   if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(0);
665 
666   bs2 = stash->bs*stash->bs;
667   /* If a matching pair of receives are found, process them, and return the data to
668      the calling function. Until then keep receiving messages */
669   while (!match_found) {
670     if (stash->reproduce) {
671       i    = stash->reproduce_count++;
672       ierr = MPI_Wait(stash->recv_waits+i,&recv_status);CHKERRQ(ierr);
673     } else {
674       ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
675     }
676     if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!");
677 
678     /* Now pack the received message into a structure which is usable by others */
679     if (i % 2) {
680       ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr);
681 
682       flg_v[2*recv_status.MPI_SOURCE] = i/2;
683 
684       *nvals = *nvals/bs2;
685     } else {
686       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
687 
688       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
689 
690       *nvals = *nvals/2; /* This message has both row indices and col indices */
691     }
692 
693     /* Check if we have both messages from this proc */
694     i1 = flg_v[2*recv_status.MPI_SOURCE];
695     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
696     if (i1 != -1 && i2 != -1) {
697       *rows = stash->rindices[i2];
698       *cols = *rows + *nvals;
699       *vals = stash->rvalues[i1];
700       *flg  = 1;
701       stash->nprocessed++;
702       match_found = PETSC_TRUE;
703     }
704   }
705   PetscFunctionReturn(0);
706 }
707