xref: /petsc/src/mat/utils/matstash.c (revision 97604f25f5dfe92e119abbbf1807aed7ef73282d)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/matimpl.h"
4 #include "src/mat/utils/matstashspace.h"
5 #undef MV
6 /*
7        The input to the stash is ALWAYS in MatScalar precision, and the
8     internal storage and output is also in MatScalar.
9 */
10 #define DEFAULT_STASH_SIZE   10000
11 
12 /*
13   MatStashCreate_Private - Creates a stash,currently used for all the parallel
14   matrix implementations. The stash is where elements of a matrix destined
15   to be stored on other processors are kept until matrix assembly is done.
16 
17   This is a simple minded stash. Simply adds entries to end of stash.
18 
19   Input Parameters:
20   comm - communicator, required for scatters.
21   bs   - stash block size. used when stashing blocks of values
22 
23   Output Parameters:
24   stash    - the newly created stash
25 */
26 #undef __FUNCT__
27 #define __FUNCT__ "MatStashCreate_Private"
28 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
29 {
30   PetscErrorCode ierr;
31   PetscInt       max,*opt,nopt;
32   PetscTruth     flg;
33 
34   PetscFunctionBegin;
35   /* Require 2 tags,get the second using PetscCommGetNewTag() */
36   stash->comm = comm;
37   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
38   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
39   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
40   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
41 
42   nopt = stash->size;
43   ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr);
44   ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
45   if (flg) {
46     if (nopt == 1)                max = opt[0];
47     else if (nopt == stash->size) max = opt[stash->rank];
48     else if (stash->rank < nopt)  max = opt[stash->rank];
49     else                          max = 0; /* Use default */
50     stash->umax = max;
51   } else {
52     stash->umax = 0;
53   }
54   ierr = PetscFree(opt);CHKERRQ(ierr);
55   if (bs <= 0) bs = 1;
56 
57   stash->bs       = bs;
58   stash->nmax     = 0;
59   stash->oldnmax  = 0;
60   stash->n        = 0;
61   stash->reallocs = -1;
62 #ifdef MV
63   stash->idx      = 0;
64   stash->idy      = 0;
65   stash->array    = 0;
66 #endif
67   stash->space_head = 0;
68   stash->space      = 0;
69 
70   stash->send_waits  = 0;
71   stash->recv_waits  = 0;
72   stash->send_status = 0;
73   stash->nsends      = 0;
74   stash->nrecvs      = 0;
75   stash->svalues     = 0;
76   stash->rvalues     = 0;
77   stash->rindices    = 0;
78   stash->nprocs      = 0;
79   stash->nprocessed  = 0;
80   PetscFunctionReturn(0);
81 }
82 
83 /*
84    MatStashDestroy_Private - Destroy the stash
85 */
86 #undef __FUNCT__
87 #define __FUNCT__ "MatStashDestroy_Private"
88 PetscErrorCode MatStashDestroy_Private(MatStash *stash)
89 {
90   PetscErrorCode ierr;
91 
92   PetscFunctionBegin;
93 #ifdef MV
94   if (stash->array) {
95     ierr = PetscFree(stash->array);CHKERRQ(ierr);
96     stash->array = 0;
97   }
98 #endif
99   if (stash->space_head){
100     ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr);
101     stash->space_head = 0;
102   }
103   PetscFunctionReturn(0);
104 }
105 
106 /*
107    MatStashScatterEnd_Private - This is called as the fial stage of
108    scatter. The final stages of messagepassing is done here, and
109    all the memory used for messagepassing is cleanedu up. This
110    routine also resets the stash, and deallocates the memory used
111    for the stash. It also keeps track of the current memory usage
112    so that the same value can be used the next time through.
113 */
114 #undef __FUNCT__
115 #define __FUNCT__ "MatStashScatterEnd_Private"
116 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
117 {
118   PetscErrorCode ierr;
119   int         nsends=stash->nsends,bs2,oldnmax;
120   MPI_Status  *send_status;
121 
122   PetscFunctionBegin;
123   /* wait on sends */
124   if (nsends) {
125     ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
126     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
127     ierr = PetscFree(send_status);CHKERRQ(ierr);
128   }
129 
130   /* Now update nmaxold to be app 10% more than max n used, this way the
131      wastage of space is reduced the next time this stash is used.
132      Also update the oldmax, only if it increases */
133   if (stash->n) {
134     bs2      = stash->bs*stash->bs;
135     oldnmax  = ((int)(stash->n * 1.1) + 5)*bs2;
136     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
137   }
138 
139   stash->nmax       = 0;
140   stash->n          = 0;
141   stash->reallocs   = -1;
142   stash->nprocessed = 0;
143 #ifdef MV
144   if (stash->array) {
145     ierr         = PetscFree(stash->array);CHKERRQ(ierr);
146     stash->array = 0;
147     stash->idx   = 0;
148     stash->idy   = 0;
149   }
150 #endif
151   if (stash->space_head){
152     ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr);
153     stash->space_head = 0;
154   }
155   if (stash->send_waits) {
156     ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
157     stash->send_waits = 0;
158   }
159   if (stash->recv_waits) {
160     ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
161     stash->recv_waits = 0;
162   }
163   if (stash->svalues) {
164     ierr = PetscFree(stash->svalues);CHKERRQ(ierr);
165     stash->svalues = 0;
166   }
167   if (stash->rvalues) {
168     ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
169     stash->rvalues = 0;
170   }
171   if (stash->rindices) {
172     ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
173     stash->rindices = 0;
174   }
175   if (stash->nprocs) {
176     ierr = PetscFree(stash->nprocs);CHKERRQ(ierr);
177     stash->nprocs = 0;
178   }
179 
180   PetscFunctionReturn(0);
181 }
182 
183 /*
184    MatStashGetInfo_Private - Gets the relavant statistics of the stash
185 
186    Input Parameters:
187    stash    - the stash
188    nstash   - the size of the stash. Indicates the number of values stored.
189    reallocs - the number of additional mallocs incurred.
190 
191 */
192 #undef __FUNCT__
193 #define __FUNCT__ "MatStashGetInfo_Private"
194 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
195 {
196   PetscInt bs2 = stash->bs*stash->bs;
197 
198   PetscFunctionBegin;
199   if (nstash) *nstash   = stash->n*bs2;
200   if (reallocs) {
201     if (stash->reallocs < 0) *reallocs = 0;
202     else                     *reallocs = stash->reallocs;
203   }
204   PetscFunctionReturn(0);
205 }
206 
207 
208 /*
209    MatStashSetInitialSize_Private - Sets the initial size of the stash
210 
211    Input Parameters:
212    stash  - the stash
213    max    - the value that is used as the max size of the stash.
214             this value is used while allocating memory.
215 */
216 #undef __FUNCT__
217 #define __FUNCT__ "MatStashSetInitialSize_Private"
218 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
219 {
220   PetscFunctionBegin;
221   stash->umax = max;
222   PetscFunctionReturn(0);
223 }
224 
225 /* MatStashExpand_Private - Expand the stash. This function is called
226    when the space in the stash is not sufficient to add the new values
227    being inserted into the stash.
228 
229    Input Parameters:
230    stash - the stash
231    incr  - the minimum increase requested
232 
233    Notes:
234    This routine doubles the currently used memory.
235  */
236 #undef __FUNCT__
237 #define __FUNCT__ "MatStashExpand_Private"
238 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
239 {
240   PetscErrorCode ierr;
241   PetscInt       *n_idx,*n_idy,newnmax,bs2= stash->bs*stash->bs;
242   MatScalar      *n_array;
243 
244   PetscFunctionBegin;
245   /* allocate a larger stash */
246   if (!stash->oldnmax && !stash->nmax) { /* new stash */
247     if (stash->umax)                  newnmax = stash->umax/bs2;
248     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
249   } else if (!stash->nmax) { /* resuing stash */
250     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
251     else                              newnmax = stash->oldnmax/bs2;
252   } else                              newnmax = stash->nmax*2;
253   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
254 
255   /* Get a MatStashSpace and attach it to stash */
256   if (!stash->nmax) { /* new stash or resuing stash->oldnmax */
257     ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space_head);CHKERRQ(ierr);
258     stash->space = stash->space_head;
259   } else {
260     ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr);
261   }
262 #ifdef MV
263   PetscMPIInt rank;
264   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
265   printf("[%d] MatStashExpand ends, incr %d, space %p, space->val %p\n",rank,incr,stash->space,(stash->space)->val);
266 #endif
267 #ifdef MV
268   ierr  = PetscMalloc((newnmax)*(2*sizeof(PetscInt)+bs2*sizeof(MatScalar)),&n_array);CHKERRQ(ierr);
269   n_idx = (PetscInt*)(n_array + bs2*newnmax);
270   n_idy = (PetscInt*)(n_idx + newnmax);
271   ierr  = PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(MatScalar));CHKERRQ(ierr);
272   ierr  = PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(PetscInt));CHKERRQ(ierr);
273   ierr  = PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(PetscInt));CHKERRQ(ierr);
274   if (stash->array) {ierr = PetscFree(stash->array);CHKERRQ(ierr);}
275   stash->array   = n_array;
276   stash->idx     = n_idx;
277   stash->idy     = n_idy;
278 #endif /* MV */
279   stash->reallocs++;
280   stash->nmax    = newnmax;
281   PetscFunctionReturn(0);
282 }
283 /*
284   MatStashValuesRow_Private - inserts values into the stash. This function
285   expects the values to be roworiented. Multiple columns belong to the same row
286   can be inserted with a single call to this function.
287 
288   Input Parameters:
289   stash  - the stash
290   row    - the global row correspoiding to the values
291   n      - the number of elements inserted. All elements belong to the above row.
292   idxn   - the global column indices corresponding to each of the values.
293   values - the values inserted
294 */
295 #undef __FUNCT__
296 #define __FUNCT__ "MatStashValuesRow_Private"
297 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[])
298 {
299   PetscErrorCode     ierr;
300   PetscInt           i,k;
301   PetscMatStashSpace space=stash->space;
302 
303   PetscFunctionBegin;
304 #ifdef MV
305   PetscMPIInt rank;
306   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
307 #endif
308   /* Check and see if we have sufficient memory */
309   if (!space || space->local_remaining < n){
310     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
311   }
312   space = stash->space;
313 #ifdef MV
314   if (rank == 1){
315     printf(" [%d] MatStashValuesRow, local_remaining %d, n %d\n",rank,space->local_remaining,n);
316   }
317   /* printf("space %p, stash %d values, local_used %d\n",space,n,space->local_used); */
318 #endif
319   k = space->local_used;
320   for (i=0; i<n; i++) {
321     space->idx[k] = row;
322     space->idy[k] = idxn[i];
323     space->val[k] = values[i];
324     k++;
325 #ifdef MV
326     stash->idx[stash->n]   = row;
327     stash->idy[stash->n]   = idxn[i];
328     stash->array[stash->n] = values[i];
329 #endif
330     stash->n++;
331   }
332   /* stash->n               += n; */
333   space->local_used      += n;
334   space->local_remaining -= n;
335   PetscFunctionReturn(0);
336 }
337 
338 /*
339   MatStashValuesCol_Private - inserts values into the stash. This function
340   expects the values to be columnoriented. Multiple columns belong to the same row
341   can be inserted with a single call to this function.
342 
343   Input Parameters:
344   stash   - the stash
345   row     - the global row correspoiding to the values
346   n       - the number of elements inserted. All elements belong to the above row.
347   idxn    - the global column indices corresponding to each of the values.
348   values  - the values inserted
349   stepval - the consecutive values are sepated by a distance of stepval.
350             this happens because the input is columnoriented.
351 */
352 #undef __FUNCT__
353 #define __FUNCT__ "MatStashValuesCol_Private"
354 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt stepval)
355 {
356   PetscErrorCode     ierr;
357   PetscInt           i,k;
358   PetscMatStashSpace space=stash->space;
359 
360   PetscFunctionBegin;
361   /* Check and see if we have sufficient memory */
362   if (!space || space->local_remaining < n){
363     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
364   }
365   space = stash->space;
366   k = space->local_used;
367   for (i=0; i<n; i++) {
368     space->idx[k] = row;
369     space->idy[k] = idxn[i];
370     space->val[k] = values[i*stepval];
371     k++;
372 #ifdef MV
373     stash->idx[stash->n]   = row;
374     stash->idy[stash->n]   = idxn[i];
375     stash->array[stash->n] = values[i*stepval];
376 #endif
377     stash->n++;
378   }
379   /* stash->n               += n; */
380   space->local_used      += n;
381   space->local_remaining -= n;
382   PetscFunctionReturn(0);
383 }
384 
385 /*
386   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
387   This function expects the values to be roworiented. Multiple columns belong
388   to the same block-row can be inserted with a single call to this function.
389   This function extracts the sub-block of values based on the dimensions of
390   the original input block, and the row,col values corresponding to the blocks.
391 
392   Input Parameters:
393   stash  - the stash
394   row    - the global block-row correspoiding to the values
395   n      - the number of elements inserted. All elements belong to the above row.
396   idxn   - the global block-column indices corresponding to each of the blocks of
397            values. Each block is of size bs*bs.
398   values - the values inserted
399   rmax   - the number of block-rows in the original block.
400   cmax   - the number of block-columsn on the original block.
401   idx    - the index of the current block-row in the original block.
402 */
403 #undef __FUNCT__
404 #define __FUNCT__ "MatStashValuesRowBlocked_Private"
405 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
406 {
407   PetscErrorCode  ierr;
408   PetscInt        i,j,k,bs2,bs=stash->bs,l;
409   const MatScalar *vals;
410   MatScalar       *array;
411   PetscMatStashSpace space=stash->space;
412 
413   PetscFunctionBegin;
414   if (!space || space->local_remaining < n){
415     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
416   }
417   space = stash->space;
418   l     = space->local_used;
419   bs2   = bs*bs;
420   for (i=0; i<n; i++) {
421     space->idx[l] = row;
422     space->idy[l] = idxn[i];
423     /* Now copy over the block of values. Store the values column oriented.
424        This enables inserting multiple blocks belonging to a row with a single
425        funtion call */
426     array = space->val + bs2*l;
427     vals  = values + idx*bs2*n + bs*i;
428     for (j=0; j<bs; j++) {
429       for (k=0; k<bs; k++) array[k*bs] = vals[k];
430       array++;
431       vals  += cmax*bs;
432     }
433     l++;
434 #ifdef MV
435     stash->idx[stash->n]   = row;
436     stash->idy[stash->n] = idxn[i];
437     /* Now copy over the block of values. Store the values column oriented.
438        This enables inserting multiple blocks belonging to a row with a single
439        funtion call */
440     array = stash->array + bs2*stash->n;
441     vals  = values + idx*bs2*n + bs*i;
442     for (j=0; j<bs; j++) {
443       for (k=0; k<bs; k++) {array[k*bs] = vals[k];}
444       array += 1;
445       vals  += cmax*bs;
446     }
447 #endif
448     stash->n++;
449   }
450   space->local_used      += n;
451   space->local_remaining -= n;
452   PetscFunctionReturn(0);
453 }
454 
455 /*
456   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
457   This function expects the values to be roworiented. Multiple columns belong
458   to the same block-row can be inserted with a single call to this function.
459   This function extracts the sub-block of values based on the dimensions of
460   the original input block, and the row,col values corresponding to the blocks.
461 
462   Input Parameters:
463   stash  - the stash
464   row    - the global block-row correspoiding to the values
465   n      - the number of elements inserted. All elements belong to the above row.
466   idxn   - the global block-column indices corresponding to each of the blocks of
467            values. Each block is of size bs*bs.
468   values - the values inserted
469   rmax   - the number of block-rows in the original block.
470   cmax   - the number of block-columsn on the original block.
471   idx    - the index of the current block-row in the original block.
472 */
473 #undef __FUNCT__
474 #define __FUNCT__ "MatStashValuesColBlocked_Private"
475 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
476 {
477   PetscErrorCode  ierr;
478   PetscInt        i,j,k,bs2,bs=stash->bs,l;
479   const MatScalar *vals;
480   MatScalar       *array;
481   PetscMatStashSpace space=stash->space;
482 
483   PetscFunctionBegin;
484   if (!space || space->local_remaining < n){
485     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
486   }
487   space = stash->space;
488   l     = space->local_used;
489   bs2   = bs*bs;
490   for (i=0; i<n; i++) {
491     space->idx[l] = row;
492     space->idy[l] = idxn[i];
493     /* Now copy over the block of values. Store the values column oriented.
494      This enables inserting multiple blocks belonging to a row with a single
495      funtion call */
496     array = space->val + bs2*l;
497     vals  = values + idx*bs2*n + bs*i;
498     for (j=0; j<bs; j++) {
499       for (k=0; k<bs; k++) {array[k] = vals[k];}
500       array += bs;
501       vals  += rmax*bs;
502     }
503 #ifdef MV
504     stash->idx[stash->n]   = row;
505     stash->idy[stash->n] = idxn[i];
506     /* Now copy over the block of values. Store the values column oriented.
507      This enables inserting multiple blocks belonging to a row with a single
508      funtion call */
509     array = stash->array + bs2*stash->n;
510     vals  = values + idx*bs + bs2*rmax*i;
511     for (j=0; j<bs; j++) {
512       for (k=0; k<bs; k++) {array[k] = vals[k];}
513       array += bs;
514       vals  += rmax*bs;
515     }
516 #endif
517     stash->n++;
518   }
519   space->local_used      += n;
520   space->local_remaining -= n;
521   PetscFunctionReturn(0);
522 }
523 /*
524   MatStashScatterBegin_Private - Initiates the transfer of values to the
525   correct owners. This function goes through the stash, and check the
526   owners of each stashed value, and sends the values off to the owner
527   processors.
528 
529   Input Parameters:
530   stash  - the stash
531   owners - an array of size 'no-of-procs' which gives the ownership range
532            for each node.
533 
534   Notes: The 'owners' array in the cased of the blocked-stash has the
535   ranges specified blocked global indices, and for the regular stash in
536   the proper global indices.
537 */
538 #undef __FUNCT__
539 #define __FUNCT__ "MatStashScatterBegin_Private"
540 PetscErrorCode MatStashScatterBegin_Private(MatStash *stash,PetscInt *owners)
541 {
542   PetscInt       *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
543   PetscInt       size=stash->size,nsends;
544   PetscErrorCode ierr;
545   PetscInt       count,*sindices,**rindices,i,j,idx,lastidx,l;
546   MatScalar      **rvalues,*svalues;
547   MPI_Comm       comm = stash->comm;
548   MPI_Request    *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
549   PetscMPIInt    *nprocs,*nlengths,nreceives;
550   PetscInt       *idx_ptr,*idy,n=stash->n;
551   MatScalar      *val;
552   PetscMatStashSpace space,space_next,space_head=stash->space_head;
553 
554   PetscFunctionBegin;
555   bs2 = stash->bs*stash->bs;
556 #ifdef MV
557   PetscMPIInt rank;
558   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
559 #endif
560 
561 #ifdef MV
562   /* Copy values of space into val, idx, idy, and destroy space */
563   ierr = PetscMalloc((n+1)*(bs2*sizeof(MatScalar)+2*sizeof(PetscInt)),&val);CHKERRQ(ierr);
564   idx_ptr = (PetscInt*)(val + bs2*n);
565   idy     = (PetscInt*)(idx_ptr + n);
566   ierr = PetscMatStashSpaceContiguous(bs2,&stash->space_head,val,idx_ptr,idy);CHKERRQ(ierr);
567   /* printf("[%d] Before and after SpaceContiguous, space->head %p/%p\n",rank,space_head,stash->space_head); */
568   ierr = PetscFree(val);CHKERRQ(ierr);
569 #endif
570 #ifdef MV
571   printf("[%d] compare array with val ...\n",rank);
572   PetscScalar *array,*valtmp;
573   for (i=0; i<stash->n; i++){
574     array = stash->array +i*bs2;
575     valtmp = val+i*bs2;
576     for (j=0; j<bs2; j++){
577       if (*array != *valtmp) SETERRQ3(PETSC_ERR_ARG_SIZ, "%d, array %g != val %g",i,*array,*valtmp);
578       array++; valtmp++;
579     }
580     if (stash->idx[i] != idx_ptr[i]) SETERRQ3(PETSC_ERR_ARG_SIZ, "%d, array %d != idx %d",i,stash->idx[i],idx_ptr[i]);
581     if (stash->idy[i] != idy[i]) SETERRQ3(PETSC_ERR_ARG_SIZ, "%d, array %d != idy %d",i,stash->idy[i],idy[i]);
582   }
583 #endif
584 
585   /*  first count number of contributors to each processor */
586   ierr  = PetscMalloc(2*size*sizeof(PetscMPIInt),&nprocs);CHKERRQ(ierr);
587   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscMPIInt));CHKERRQ(ierr);
588   ierr  = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);
589 
590   nlengths = nprocs+size;
591   i = j    = 0;
592   lastidx  = -1;
593   space    = space_head;
594   while (space != PETSC_NULL){
595     space_next = space->next;
596     for (l=0; l<space->local_used; l++){
597       idx=space->idx[l];
598       /* if indices are NOT locally sorted, need to start search at the beginning */
599       if (lastidx > idx) j = 0; /* idx = stash->idx[i] */
600       lastidx = idx;
601       for (; j<size; j++) {
602         if (idx >= owners[j] && idx < owners[j+1]) {
603           nlengths[j]++; owner[i] = j; break;
604         }
605       }
606       i++;
607     }
608     space = space_next;
609   }
610   /* Now check what procs get messages - and compute nsends. */
611   for (i=0, nsends=0 ; i<size; i++) {
612     if (nlengths[i]) { nprocs[i] = 1; nsends ++;}
613   }
614 
615   { int  *onodes,*olengths;
616   /* Determine the number of messages to expect, their lengths, from from-ids */
617   ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr);
618   ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
619   /* since clubbing row,col - lengths are multiplied by 2 */
620   for (i=0; i<nreceives; i++) olengths[i] *=2;
621   ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
622   /* values are size 'bs2' lengths (and remove earlier factor 2 */
623   for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
624   ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
625   ierr = PetscFree(onodes);CHKERRQ(ierr);
626   ierr = PetscFree(olengths);CHKERRQ(ierr);
627   }
628 
629   /* do sends:
630       1) starts[i] gives the starting index in svalues for stuff going to
631          the ith processor
632   */
633   ierr     = PetscMalloc((stash->n+1)*(bs2*sizeof(MatScalar)+2*sizeof(PetscInt)),&svalues);CHKERRQ(ierr);
634   sindices = (PetscInt*)(svalues + bs2*stash->n);
635   ierr     = PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
636   ierr     = PetscMalloc(2*size*sizeof(PetscInt),&startv);CHKERRQ(ierr);
637   starti   = startv + size;
638   /* use 2 sends the first with all_a, the next with all_i and all_j */
639   startv[0]  = 0; starti[0] = 0;
640   for (i=1; i<size; i++) {
641     startv[i] = startv[i-1] + nlengths[i-1];
642     starti[i] = starti[i-1] + nlengths[i-1]*2;
643   }
644 
645   i     = 0;
646   space = space_head;
647   while (space != PETSC_NULL){
648     space_next = space->next;
649     for (l=0; l<space->local_used; l++){
650       j = owner[i];
651       if (bs2 == 1) {
652         svalues[startv[j]] = space->val[l];       /* = stash->array[i]; */
653       } else {
654         PetscInt       k;
655         MatScalar *buf1,*buf2;
656         buf1 = svalues+bs2*startv[j];
657         buf2 = space->val + bs2*i;                      /* stash->array+bs2*i; */
658         for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
659       }
660       sindices[starti[j]]             = space->idx[l]; /* stash->idx[i]; */
661       sindices[starti[j]+nlengths[j]] = space->idy[l]; /* stash->idy[i]; */
662       startv[j]++;
663       starti[j]++;
664       i++;
665     }
666     space = space_next;
667   }
668   startv[0] = 0;
669   for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];}
670 
671   for (i=0,count=0; i<size; i++) {
672     if (nprocs[i]) {
673       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
674       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_MATSCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
675     }
676   }
677 #if defined(PETSC_USE_INFO)
678   ierr = PetscInfo1(0,"No of messages: %d \n",nsends);CHKERRQ(ierr);
679   for (i=0; i<size; i++) {
680     if (nprocs[i]) {
681       ierr = PetscInfo2(0,"Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(MatScalar)+2*sizeof(PetscInt));CHKERRQ(ierr);
682     }
683   }
684 #endif
685   ierr = PetscFree(owner);CHKERRQ(ierr);
686   ierr = PetscFree(startv);CHKERRQ(ierr);
687   /* This memory is reused in scatter end  for a different purpose*/
688   for (i=0; i<2*size; i++) nprocs[i] = -1;
689   stash->nprocs      = nprocs;
690 
691   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
692   ierr  = PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
693 
694   for (i=0; i<nreceives; i++) {
695     recv_waits[2*i]   = recv_waits1[i];
696     recv_waits[2*i+1] = recv_waits2[i];
697   }
698   stash->recv_waits = recv_waits;
699   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
700   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
701 
702   stash->svalues    = svalues;    stash->rvalues     = rvalues;
703   stash->rindices   = rindices;   stash->send_waits  = send_waits;
704   stash->nsends     = nsends;     stash->nrecvs      = nreceives;
705   PetscFunctionReturn(0);
706 }
707 
708 /*
709    MatStashScatterGetMesg_Private - This function waits on the receives posted
710    in the function MatStashScatterBegin_Private() and returns one message at
711    a time to the calling function. If no messages are left, it indicates this
712    by setting flg = 0, else it sets flg = 1.
713 
714    Input Parameters:
715    stash - the stash
716 
717    Output Parameters:
718    nvals - the number of entries in the current message.
719    rows  - an array of row indices (or blocked indices) corresponding to the values
720    cols  - an array of columnindices (or blocked indices) corresponding to the values
721    vals  - the values
722    flg   - 0 indicates no more message left, and the current call has no values associated.
723            1 indicates that the current call successfully received a message, and the
724              other output parameters nvals,rows,cols,vals are set appropriately.
725 */
726 #undef __FUNCT__
727 #define __FUNCT__ "MatStashScatterGetMesg_Private"
728 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,MatScalar **vals,PetscInt *flg)
729 {
730   PetscErrorCode ierr;
731   PetscMPIInt    i,*flg_v,i1,i2;
732   PetscInt       bs2;
733   MPI_Status     recv_status;
734   PetscTruth     match_found = PETSC_FALSE;
735 
736   PetscFunctionBegin;
737 
738   *flg = 0; /* When a message is discovered this is reset to 1 */
739   /* Return if no more messages to process */
740   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
741 
742   flg_v = stash->nprocs;
743   bs2   = stash->bs*stash->bs;
744   /* If a matching pair of receieves are found, process them, and return the data to
745      the calling function. Until then keep receiving messages */
746   while (!match_found) {
747     ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
748     /* Now pack the received message into a structure which is useable by others */
749     if (i % 2) {
750       ierr = MPI_Get_count(&recv_status,MPIU_MATSCALAR,nvals);CHKERRQ(ierr);
751       flg_v[2*recv_status.MPI_SOURCE] = i/2;
752       *nvals = *nvals/bs2;
753     } else {
754       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
755       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
756       *nvals = *nvals/2; /* This message has both row indices and col indices */
757     }
758 
759     /* Check if we have both the messages from this proc */
760     i1 = flg_v[2*recv_status.MPI_SOURCE];
761     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
762     if (i1 != -1 && i2 != -1) {
763       *rows       = stash->rindices[i2];
764       *cols       = *rows + *nvals;
765       *vals       = stash->rvalues[i1];
766       *flg        = 1;
767       stash->nprocessed ++;
768       match_found = PETSC_TRUE;
769     }
770   }
771   PetscFunctionReturn(0);
772 }
773