xref: /petsc/src/mat/utils/matstash.c (revision d7d60843ef74bf0220ee2ef6d73674229d676f5f)
1 
2 #include <petsc-private/matimpl.h>
3 
4 #define DEFAULT_STASH_SIZE   10000
5 
6 static PetscErrorCode MatStashScatterBegin_Ref(Mat,MatStash*,PetscInt*);
7 static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
8 static PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
9 static PetscErrorCode MatStashScatterBegin_BTS(Mat,MatStash*,PetscInt*);
10 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
11 static PetscErrorCode MatStashScatterEnd_BTS(MatStash*);
12 static PetscErrorCode MatStashScatterDestroy_BTS(MatStash*);
13 
14 /*
15   MatStashCreate_Private - Creates a stash,currently used for all the parallel
16   matrix implementations. The stash is where elements of a matrix destined
17   to be stored on other processors are kept until matrix assembly is done.
18 
19   This is a simple minded stash. Simply adds entries to end of stash.
20 
21   Input Parameters:
22   comm - communicator, required for scatters.
23   bs   - stash block size. used when stashing blocks of values
24 
25   Output Parameters:
26   stash    - the newly created stash
27 */
28 #undef __FUNCT__
29 #define __FUNCT__ "MatStashCreate_Private"
30 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
31 {
32   PetscErrorCode ierr;
33   PetscInt       max,*opt,nopt,i;
34   PetscBool      flg;
35 
36   PetscFunctionBegin;
37   /* Require 2 tags,get the second using PetscCommGetNewTag() */
38   stash->comm = comm;
39 
40   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
41   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
42   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
43   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
44   ierr = PetscMalloc1(2*stash->size,&stash->flg_v);CHKERRQ(ierr);
45   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
46 
47 
48   nopt = stash->size;
49   ierr = PetscMalloc1(nopt,&opt);CHKERRQ(ierr);
50   ierr = PetscOptionsGetIntArray(NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
51   if (flg) {
52     if (nopt == 1)                max = opt[0];
53     else if (nopt == stash->size) max = opt[stash->rank];
54     else if (stash->rank < nopt)  max = opt[stash->rank];
55     else                          max = 0; /* Use default */
56     stash->umax = max;
57   } else {
58     stash->umax = 0;
59   }
60   ierr = PetscFree(opt);CHKERRQ(ierr);
61   if (bs <= 0) bs = 1;
62 
63   stash->bs         = bs;
64   stash->nmax       = 0;
65   stash->oldnmax    = 0;
66   stash->n          = 0;
67   stash->reallocs   = -1;
68   stash->space_head = 0;
69   stash->space      = 0;
70 
71   stash->send_waits  = 0;
72   stash->recv_waits  = 0;
73   stash->send_status = 0;
74   stash->nsends      = 0;
75   stash->nrecvs      = 0;
76   stash->svalues     = 0;
77   stash->rvalues     = 0;
78   stash->rindices    = 0;
79   stash->nprocessed  = 0;
80   stash->reproduce   = PETSC_FALSE;
81   stash->blocktype   = MPI_DATATYPE_NULL;
82 
83   ierr = PetscOptionsGetBool(NULL,"-matstash_reproduce",&stash->reproduce,NULL);CHKERRQ(ierr);
84   ierr = PetscOptionsGetBool(NULL,"-matstash_bts",&flg,NULL);CHKERRQ(ierr);
85   if (flg) {
86     stash->ScatterBegin   = MatStashScatterBegin_BTS;
87     stash->ScatterGetMesg = MatStashScatterGetMesg_BTS;
88     stash->ScatterEnd     = MatStashScatterEnd_BTS;
89     stash->ScatterDestroy = MatStashScatterDestroy_BTS;
90   } else {
91     stash->ScatterBegin   = MatStashScatterBegin_Ref;
92     stash->ScatterGetMesg = MatStashScatterGetMesg_Ref;
93     stash->ScatterEnd     = MatStashScatterEnd_Ref;
94     stash->ScatterDestroy = NULL;
95   }
96   PetscFunctionReturn(0);
97 }
98 
99 /*
100    MatStashDestroy_Private - Destroy the stash
101 */
102 #undef __FUNCT__
103 #define __FUNCT__ "MatStashDestroy_Private"
104 PetscErrorCode MatStashDestroy_Private(MatStash *stash)
105 {
106   PetscErrorCode ierr;
107 
108   PetscFunctionBegin;
109   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
110   if (stash->ScatterDestroy) {ierr = (*stash->ScatterDestroy)(stash);CHKERRQ(ierr);}
111 
112   stash->space = 0;
113 
114   ierr = PetscFree(stash->flg_v);CHKERRQ(ierr);
115   PetscFunctionReturn(0);
116 }
117 
118 /*
119    MatStashScatterEnd_Private - This is called as the final stage of
120    scatter. The final stages of message passing is done here, and
121    all the memory used for message passing is cleaned up. This
122    routine also resets the stash, and deallocates the memory used
123    for the stash. It also keeps track of the current memory usage
124    so that the same value can be used the next time through.
125 */
126 #undef __FUNCT__
127 #define __FUNCT__ "MatStashScatterEnd_Private"
128 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
129 {
130   PetscErrorCode ierr;
131 
132   PetscFunctionBegin;
133   ierr = (*stash->ScatterEnd)(stash);CHKERRQ(ierr);
134   PetscFunctionReturn(0);
135 }
136 
137 #undef __FUNCT__
138 #define __FUNCT__ "MatStashScatterEnd_Ref"
139 static PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash)
140 {
141   PetscErrorCode ierr;
142   PetscInt       nsends=stash->nsends,bs2,oldnmax,i;
143   MPI_Status     *send_status;
144 
145   PetscFunctionBegin;
146   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
147   /* wait on sends */
148   if (nsends) {
149     ierr = PetscMalloc1(2*nsends,&send_status);CHKERRQ(ierr);
150     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
151     ierr = PetscFree(send_status);CHKERRQ(ierr);
152   }
153 
154   /* Now update nmaxold to be app 10% more than max n used, this way the
155      wastage of space is reduced the next time this stash is used.
156      Also update the oldmax, only if it increases */
157   if (stash->n) {
158     bs2     = stash->bs*stash->bs;
159     oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
160     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
161   }
162 
163   stash->nmax       = 0;
164   stash->n          = 0;
165   stash->reallocs   = -1;
166   stash->nprocessed = 0;
167 
168   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
169 
170   stash->space = 0;
171 
172   ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
173   ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
174   ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr);
175   ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr);
176   ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
177   ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr);
178   ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
179   PetscFunctionReturn(0);
180 }
181 
182 /*
183    MatStashGetInfo_Private - Gets the relavant statistics of the stash
184 
185    Input Parameters:
186    stash    - the stash
187    nstash   - the size of the stash. Indicates the number of values stored.
188    reallocs - the number of additional mallocs incurred.
189 
190 */
191 #undef __FUNCT__
192 #define __FUNCT__ "MatStashGetInfo_Private"
193 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
194 {
195   PetscInt bs2 = stash->bs*stash->bs;
196 
197   PetscFunctionBegin;
198   if (nstash) *nstash = stash->n*bs2;
199   if (reallocs) {
200     if (stash->reallocs < 0) *reallocs = 0;
201     else                     *reallocs = stash->reallocs;
202   }
203   PetscFunctionReturn(0);
204 }
205 
206 /*
207    MatStashSetInitialSize_Private - Sets the initial size of the stash
208 
209    Input Parameters:
210    stash  - the stash
211    max    - the value that is used as the max size of the stash.
212             this value is used while allocating memory.
213 */
214 #undef __FUNCT__
215 #define __FUNCT__ "MatStashSetInitialSize_Private"
216 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
217 {
218   PetscFunctionBegin;
219   stash->umax = max;
220   PetscFunctionReturn(0);
221 }
222 
223 /* MatStashExpand_Private - Expand the stash. This function is called
224    when the space in the stash is not sufficient to add the new values
225    being inserted into the stash.
226 
227    Input Parameters:
228    stash - the stash
229    incr  - the minimum increase requested
230 
231    Notes:
232    This routine doubles the currently used memory.
233  */
234 #undef __FUNCT__
235 #define __FUNCT__ "MatStashExpand_Private"
236 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
237 {
238   PetscErrorCode ierr;
239   PetscInt       newnmax,bs2= stash->bs*stash->bs;
240 
241   PetscFunctionBegin;
242   /* allocate a larger stash */
243   if (!stash->oldnmax && !stash->nmax) { /* new stash */
244     if (stash->umax)                  newnmax = stash->umax/bs2;
245     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
246   } else if (!stash->nmax) { /* resuing stash */
247     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
248     else                              newnmax = stash->oldnmax/bs2;
249   } else                              newnmax = stash->nmax*2;
250   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
251 
252   /* Get a MatStashSpace and attach it to stash */
253   ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr);
254   if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
255     stash->space_head = stash->space;
256   }
257 
258   stash->reallocs++;
259   stash->nmax = newnmax;
260   PetscFunctionReturn(0);
261 }
262 /*
263   MatStashValuesRow_Private - inserts values into the stash. This function
264   expects the values to be roworiented. Multiple columns belong to the same row
265   can be inserted with a single call to this function.
266 
267   Input Parameters:
268   stash  - the stash
269   row    - the global row correspoiding to the values
270   n      - the number of elements inserted. All elements belong to the above row.
271   idxn   - the global column indices corresponding to each of the values.
272   values - the values inserted
273 */
274 #undef __FUNCT__
275 #define __FUNCT__ "MatStashValuesRow_Private"
276 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries)
277 {
278   PetscErrorCode     ierr;
279   PetscInt           i,k,cnt = 0;
280   PetscMatStashSpace space=stash->space;
281 
282   PetscFunctionBegin;
283   /* Check and see if we have sufficient memory */
284   if (!space || space->local_remaining < n) {
285     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
286   }
287   space = stash->space;
288   k     = space->local_used;
289   for (i=0; i<n; i++) {
290     if (ignorezeroentries && (values[i] == 0.0)) continue;
291     space->idx[k] = row;
292     space->idy[k] = idxn[i];
293     space->val[k] = values[i];
294     k++;
295     cnt++;
296   }
297   stash->n               += cnt;
298   space->local_used      += cnt;
299   space->local_remaining -= cnt;
300   PetscFunctionReturn(0);
301 }
302 
303 /*
304   MatStashValuesCol_Private - inserts values into the stash. This function
305   expects the values to be columnoriented. Multiple columns belong to the same row
306   can be inserted with a single call to this function.
307 
308   Input Parameters:
309   stash   - the stash
310   row     - the global row correspoiding to the values
311   n       - the number of elements inserted. All elements belong to the above row.
312   idxn    - the global column indices corresponding to each of the values.
313   values  - the values inserted
314   stepval - the consecutive values are sepated by a distance of stepval.
315             this happens because the input is columnoriented.
316 */
317 #undef __FUNCT__
318 #define __FUNCT__ "MatStashValuesCol_Private"
319 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries)
320 {
321   PetscErrorCode     ierr;
322   PetscInt           i,k,cnt = 0;
323   PetscMatStashSpace space=stash->space;
324 
325   PetscFunctionBegin;
326   /* Check and see if we have sufficient memory */
327   if (!space || space->local_remaining < n) {
328     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
329   }
330   space = stash->space;
331   k     = space->local_used;
332   for (i=0; i<n; i++) {
333     if (ignorezeroentries && (values[i*stepval] == 0.0)) continue;
334     space->idx[k] = row;
335     space->idy[k] = idxn[i];
336     space->val[k] = values[i*stepval];
337     k++;
338     cnt++;
339   }
340   stash->n               += cnt;
341   space->local_used      += cnt;
342   space->local_remaining -= cnt;
343   PetscFunctionReturn(0);
344 }
345 
346 /*
347   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
348   This function expects the values to be roworiented. Multiple columns belong
349   to the same block-row can be inserted with a single call to this function.
350   This function extracts the sub-block of values based on the dimensions of
351   the original input block, and the row,col values corresponding to the blocks.
352 
353   Input Parameters:
354   stash  - the stash
355   row    - the global block-row correspoiding to the values
356   n      - the number of elements inserted. All elements belong to the above row.
357   idxn   - the global block-column indices corresponding to each of the blocks of
358            values. Each block is of size bs*bs.
359   values - the values inserted
360   rmax   - the number of block-rows in the original block.
361   cmax   - the number of block-columsn on the original block.
362   idx    - the index of the current block-row in the original block.
363 */
364 #undef __FUNCT__
365 #define __FUNCT__ "MatStashValuesRowBlocked_Private"
366 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
367 {
368   PetscErrorCode     ierr;
369   PetscInt           i,j,k,bs2,bs=stash->bs,l;
370   const PetscScalar  *vals;
371   PetscScalar        *array;
372   PetscMatStashSpace space=stash->space;
373 
374   PetscFunctionBegin;
375   if (!space || space->local_remaining < n) {
376     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
377   }
378   space = stash->space;
379   l     = space->local_used;
380   bs2   = bs*bs;
381   for (i=0; i<n; i++) {
382     space->idx[l] = row;
383     space->idy[l] = idxn[i];
384     /* Now copy over the block of values. Store the values column oriented.
385        This enables inserting multiple blocks belonging to a row with a single
386        funtion call */
387     array = space->val + bs2*l;
388     vals  = values + idx*bs2*n + bs*i;
389     for (j=0; j<bs; j++) {
390       for (k=0; k<bs; k++) array[k*bs] = vals[k];
391       array++;
392       vals += cmax*bs;
393     }
394     l++;
395   }
396   stash->n               += n;
397   space->local_used      += n;
398   space->local_remaining -= n;
399   PetscFunctionReturn(0);
400 }
401 
402 /*
403   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
404   This function expects the values to be roworiented. Multiple columns belong
405   to the same block-row can be inserted with a single call to this function.
406   This function extracts the sub-block of values based on the dimensions of
407   the original input block, and the row,col values corresponding to the blocks.
408 
409   Input Parameters:
410   stash  - the stash
411   row    - the global block-row correspoiding to the values
412   n      - the number of elements inserted. All elements belong to the above row.
413   idxn   - the global block-column indices corresponding to each of the blocks of
414            values. Each block is of size bs*bs.
415   values - the values inserted
416   rmax   - the number of block-rows in the original block.
417   cmax   - the number of block-columsn on the original block.
418   idx    - the index of the current block-row in the original block.
419 */
420 #undef __FUNCT__
421 #define __FUNCT__ "MatStashValuesColBlocked_Private"
422 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
423 {
424   PetscErrorCode     ierr;
425   PetscInt           i,j,k,bs2,bs=stash->bs,l;
426   const PetscScalar  *vals;
427   PetscScalar        *array;
428   PetscMatStashSpace space=stash->space;
429 
430   PetscFunctionBegin;
431   if (!space || space->local_remaining < n) {
432     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
433   }
434   space = stash->space;
435   l     = space->local_used;
436   bs2   = bs*bs;
437   for (i=0; i<n; i++) {
438     space->idx[l] = row;
439     space->idy[l] = idxn[i];
440     /* Now copy over the block of values. Store the values column oriented.
441      This enables inserting multiple blocks belonging to a row with a single
442      funtion call */
443     array = space->val + bs2*l;
444     vals  = values + idx*bs2*n + bs*i;
445     for (j=0; j<bs; j++) {
446       for (k=0; k<bs; k++) array[k] = vals[k];
447       array += bs;
448       vals  += rmax*bs;
449     }
450     l++;
451   }
452   stash->n               += n;
453   space->local_used      += n;
454   space->local_remaining -= n;
455   PetscFunctionReturn(0);
456 }
457 /*
458   MatStashScatterBegin_Private - Initiates the transfer of values to the
459   correct owners. This function goes through the stash, and check the
460   owners of each stashed value, and sends the values off to the owner
461   processors.
462 
463   Input Parameters:
464   stash  - the stash
465   owners - an array of size 'no-of-procs' which gives the ownership range
466            for each node.
467 
468   Notes: The 'owners' array in the cased of the blocked-stash has the
469   ranges specified blocked global indices, and for the regular stash in
470   the proper global indices.
471 */
472 #undef __FUNCT__
473 #define __FUNCT__ "MatStashScatterBegin_Private"
474 PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners)
475 {
476   PetscErrorCode ierr;
477 
478   PetscFunctionBegin;
479   ierr = (*stash->ScatterBegin)(mat,stash,owners);CHKERRQ(ierr);
480   PetscFunctionReturn(0);
481 }
482 
483 #undef __FUNCT__
484 #define __FUNCT__ "MatStashScatterBegin_Ref"
485 static PetscErrorCode MatStashScatterBegin_Ref(Mat mat,MatStash *stash,PetscInt *owners)
486 {
487   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
488   PetscInt           size=stash->size,nsends;
489   PetscErrorCode     ierr;
490   PetscInt           count,*sindices,**rindices,i,j,idx,lastidx,l;
491   PetscScalar        **rvalues,*svalues;
492   MPI_Comm           comm = stash->comm;
493   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
494   PetscMPIInt        *sizes,*nlengths,nreceives;
495   PetscInt           *sp_idx,*sp_idy;
496   PetscScalar        *sp_val;
497   PetscMatStashSpace space,space_next;
498 
499   PetscFunctionBegin;
500   bs2 = stash->bs*stash->bs;
501 
502   /*  first count number of contributors to each processor */
503   ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr);
504   ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr);
505   ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr);
506 
507   i       = j    = 0;
508   lastidx = -1;
509   space   = stash->space_head;
510   while (space != NULL) {
511     space_next = space->next;
512     sp_idx     = space->idx;
513     for (l=0; l<space->local_used; l++) {
514       /* if indices are NOT locally sorted, need to start search at the beginning */
515       if (lastidx > (idx = sp_idx[l])) j = 0;
516       lastidx = idx;
517       for (; j<size; j++) {
518         if (idx >= owners[j] && idx < owners[j+1]) {
519           nlengths[j]++; owner[i] = j; break;
520         }
521       }
522       i++;
523     }
524     space = space_next;
525   }
526   /* Now check what procs get messages - and compute nsends. */
527   for (i=0, nsends=0; i<size; i++) {
528     if (nlengths[i]) {
529       sizes[i] = 1; nsends++;
530     }
531   }
532 
533   {PetscMPIInt *onodes,*olengths;
534    /* Determine the number of messages to expect, their lengths, from from-ids */
535    ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr);
536    ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
537    /* since clubbing row,col - lengths are multiplied by 2 */
538    for (i=0; i<nreceives; i++) olengths[i] *=2;
539    ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
540    /* values are size 'bs2' lengths (and remove earlier factor 2 */
541    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
542    ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
543    ierr = PetscFree(onodes);CHKERRQ(ierr);
544    ierr = PetscFree(olengths);CHKERRQ(ierr);}
545 
546   /* do sends:
547       1) starts[i] gives the starting index in svalues for stuff going to
548          the ith processor
549   */
550   ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr);
551   ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr);
552   ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr);
553   /* use 2 sends the first with all_a, the next with all_i and all_j */
554   startv[0] = 0; starti[0] = 0;
555   for (i=1; i<size; i++) {
556     startv[i] = startv[i-1] + nlengths[i-1];
557     starti[i] = starti[i-1] + 2*nlengths[i-1];
558   }
559 
560   i     = 0;
561   space = stash->space_head;
562   while (space != NULL) {
563     space_next = space->next;
564     sp_idx     = space->idx;
565     sp_idy     = space->idy;
566     sp_val     = space->val;
567     for (l=0; l<space->local_used; l++) {
568       j = owner[i];
569       if (bs2 == 1) {
570         svalues[startv[j]] = sp_val[l];
571       } else {
572         PetscInt    k;
573         PetscScalar *buf1,*buf2;
574         buf1 = svalues+bs2*startv[j];
575         buf2 = space->val + bs2*l;
576         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
577       }
578       sindices[starti[j]]             = sp_idx[l];
579       sindices[starti[j]+nlengths[j]] = sp_idy[l];
580       startv[j]++;
581       starti[j]++;
582       i++;
583     }
584     space = space_next;
585   }
586   startv[0] = 0;
587   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
588 
589   for (i=0,count=0; i<size; i++) {
590     if (sizes[i]) {
591       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
592       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
593     }
594   }
595 #if defined(PETSC_USE_INFO)
596   ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr);
597   for (i=0; i<size; i++) {
598     if (sizes[i]) {
599       ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
600     }
601   }
602 #endif
603   ierr = PetscFree(nlengths);CHKERRQ(ierr);
604   ierr = PetscFree(owner);CHKERRQ(ierr);
605   ierr = PetscFree2(startv,starti);CHKERRQ(ierr);
606   ierr = PetscFree(sizes);CHKERRQ(ierr);
607 
608   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
609   ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr);
610 
611   for (i=0; i<nreceives; i++) {
612     recv_waits[2*i]   = recv_waits1[i];
613     recv_waits[2*i+1] = recv_waits2[i];
614   }
615   stash->recv_waits = recv_waits;
616 
617   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
618   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
619 
620   stash->svalues         = svalues;
621   stash->sindices        = sindices;
622   stash->rvalues         = rvalues;
623   stash->rindices        = rindices;
624   stash->send_waits      = send_waits;
625   stash->nsends          = nsends;
626   stash->nrecvs          = nreceives;
627   stash->reproduce_count = 0;
628   PetscFunctionReturn(0);
629 }
630 
631 /*
632    MatStashScatterGetMesg_Private - This function waits on the receives posted
633    in the function MatStashScatterBegin_Private() and returns one message at
634    a time to the calling function. If no messages are left, it indicates this
635    by setting flg = 0, else it sets flg = 1.
636 
637    Input Parameters:
638    stash - the stash
639 
640    Output Parameters:
641    nvals - the number of entries in the current message.
642    rows  - an array of row indices (or blocked indices) corresponding to the values
643    cols  - an array of columnindices (or blocked indices) corresponding to the values
644    vals  - the values
645    flg   - 0 indicates no more message left, and the current call has no values associated.
646            1 indicates that the current call successfully received a message, and the
647              other output parameters nvals,rows,cols,vals are set appropriately.
648 */
649 #undef __FUNCT__
650 #define __FUNCT__ "MatStashScatterGetMesg_Private"
651 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
652 {
653   PetscErrorCode ierr;
654 
655   PetscFunctionBegin;
656   ierr = (*stash->ScatterGetMesg)(stash,nvals,rows,cols,vals,flg);CHKERRQ(ierr);
657   PetscFunctionReturn(0);
658 }
659 
660 #undef __FUNCT__
661 #define __FUNCT__ "MatStashScatterGetMesg_Ref"
662 static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
663 {
664   PetscErrorCode ierr;
665   PetscMPIInt    i,*flg_v = stash->flg_v,i1,i2;
666   PetscInt       bs2;
667   MPI_Status     recv_status;
668   PetscBool      match_found = PETSC_FALSE;
669 
670   PetscFunctionBegin;
671   *flg = 0; /* When a message is discovered this is reset to 1 */
672   /* Return if no more messages to process */
673   if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(0);
674 
675   bs2 = stash->bs*stash->bs;
676   /* If a matching pair of receives are found, process them, and return the data to
677      the calling function. Until then keep receiving messages */
678   while (!match_found) {
679     if (stash->reproduce) {
680       i    = stash->reproduce_count++;
681       ierr = MPI_Wait(stash->recv_waits+i,&recv_status);CHKERRQ(ierr);
682     } else {
683       ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
684     }
685     if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!");
686 
687     /* Now pack the received message into a structure which is usable by others */
688     if (i % 2) {
689       ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr);
690 
691       flg_v[2*recv_status.MPI_SOURCE] = i/2;
692 
693       *nvals = *nvals/bs2;
694     } else {
695       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
696 
697       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
698 
699       *nvals = *nvals/2; /* This message has both row indices and col indices */
700     }
701 
702     /* Check if we have both messages from this proc */
703     i1 = flg_v[2*recv_status.MPI_SOURCE];
704     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
705     if (i1 != -1 && i2 != -1) {
706       *rows = stash->rindices[i2];
707       *cols = *rows + *nvals;
708       *vals = stash->rvalues[i1];
709       *flg  = 1;
710       stash->nprocessed++;
711       match_found = PETSC_TRUE;
712     }
713   }
714   PetscFunctionReturn(0);
715 }
716 
717 typedef struct {
718   PetscInt row;
719   PetscInt col;
720   PetscScalar vals[1];          /* Actually an array of length bs2 */
721 } MatStashBlock;
722 
723 #undef __FUNCT__
724 #define __FUNCT__ "MatStashSortCompress_Private"
725 static PetscErrorCode MatStashSortCompress_Private(MatStash *stash,InsertMode insertmode)
726 {
727   PetscErrorCode ierr;
728   PetscMatStashSpace space;
729   PetscInt n = stash->n,bs = stash->bs,bs2 = bs*bs,cnt,*row,*col,*perm,rowstart,i;
730   PetscScalar **valptr;
731 
732   PetscFunctionBegin;
733   ierr = PetscMalloc4(n,&row,n,&col,n,&valptr,n,&perm);CHKERRQ(ierr);
734   for (space=stash->space_head,cnt=0; space; space=space->next) {
735     for (i=0; i<space->local_used; i++) {
736       row[cnt] = space->idx[i];
737       col[cnt] = space->idy[i];
738       valptr[cnt] = &space->val[i*bs2];
739       perm[cnt] = cnt;          /* Will tell us where to find valptr after sorting row[] and col[] */
740       cnt++;
741     }
742   }
743   if (cnt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatStash n %D, but counted %D entries",n,cnt);
744   ierr = PetscSortIntWithArrayPair(n,row,col,perm);CHKERRQ(ierr);
745   /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */
746   for (rowstart=0,cnt=0,i=1; i<=n; i++) {
747     if (i == n || row[i] != row[rowstart]) {         /* Sort the last row. */
748       PetscInt colstart;
749       ierr = PetscSortIntWithArray(i-rowstart,&col[rowstart],&perm[rowstart]);CHKERRQ(ierr);
750       for (colstart=rowstart; colstart<i; ) { /* Compress multiple insertions to the same location */
751         PetscInt j,l;
752         MatStashBlock *block;
753         ierr = PetscSegBufferGet(stash->segsendblocks,1,&block);CHKERRQ(ierr);
754         block->row = row[rowstart];
755         block->col = col[colstart];
756         ierr = PetscMemcpy(block->vals,valptr[perm[colstart]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr);
757         for (j=colstart+1; j<i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */
758           if (insertmode == ADD_VALUES) {
759             for (l=0; l<bs2; l++) block->vals[l] += valptr[perm[j]][l];
760           } else {
761             ierr = PetscMemcpy(block->vals,valptr[perm[j]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr);
762           }
763         }
764         colstart = j;
765       }
766       rowstart = i;
767     }
768   }
769   ierr = PetscFree4(row,col,valptr,perm);CHKERRQ(ierr);
770   PetscFunctionReturn(0);
771 }
772 
773 #undef __FUNCT__
774 #define __FUNCT__ "MatStashBlockTypeSetUp"
775 static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash)
776 {
777   PetscErrorCode ierr;
778 
779   PetscFunctionBegin;
780   if (stash->blocktype == MPI_DATATYPE_NULL) {
781     PetscInt     bs2 = PetscSqr(stash->bs);
782     PetscMPIInt  blocklens[2];
783     MPI_Aint     displs[2];
784     MPI_Datatype types[2],stype;
785 
786     stash->blocktype_size = offsetof(MatStashBlock,vals) + bs2*sizeof(PetscScalar);
787     if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */
788       stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt);
789     }
790     ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segsendblocks);CHKERRQ(ierr);
791     ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segrecvblocks);CHKERRQ(ierr);
792     ierr = PetscSegBufferCreate(sizeof(MatStashFrame),1,&stash->segrecvframe);CHKERRQ(ierr);
793     blocklens[0] = 2;
794     blocklens[1] = bs2;
795     displs[0] = offsetof(MatStashBlock,row);
796     displs[1] = offsetof(MatStashBlock,vals);
797     types[0] = MPIU_INT;
798     types[1] = MPIU_SCALAR;
799     ierr = MPI_Type_create_struct(2,blocklens,displs,types,&stype);CHKERRQ(ierr);
800     ierr = MPI_Type_commit(&stype);CHKERRQ(ierr);
801     ierr = MPI_Type_create_resized(stype,0,stash->blocktype_size,&stash->blocktype);CHKERRQ(ierr); /* MPI-2 */
802     ierr = MPI_Type_commit(&stash->blocktype);CHKERRQ(ierr);
803     ierr = MPI_Type_free(&stype);CHKERRQ(ierr);
804   }
805   PetscFunctionReturn(0);
806 }
807 
808 #undef __FUNCT__
809 #define __FUNCT__ "MatStashBTSSend_Private"
810 /* Callback invoked after target rank has initiatied receive of rendezvous message.
811  * Here we post the main sends.
812  */
813 static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rankid,PetscMPIInt rank,void *sdata,MPI_Request req[],void *ctx)
814 {
815   MatStash *stash = (MatStash*)ctx;
816   MatStashHeader *hdr = (MatStashHeader*)sdata;
817   PetscErrorCode ierr;
818 
819   PetscFunctionBegin;
820   if (rank != stash->sendranks[rankid]) SETERRQ3(comm,PETSC_ERR_PLIB,"BTS Send rank %d does not match sendranks[%d] %d",rank,rankid,stash->sendranks[rankid]);
821   ierr = MPI_Isend(stash->sendframes[rankid].buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr);
822   stash->sendframes[rankid].count = hdr->count;
823   stash->sendframes[rankid].pending = 1;
824   PetscFunctionReturn(0);
825 }
826 
827 #undef __FUNCT__
828 #define __FUNCT__ "MatStashBTSRecv_Private"
829 /* Callback invoked by target after receiving rendezvous message.
830  * Here we post the main recvs.
831  */
832 static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rank,void *rdata,MPI_Request req[],void *ctx)
833 {
834   MatStash *stash = (MatStash*)ctx;
835   MatStashHeader *hdr = (MatStashHeader*)rdata;
836   MatStashFrame *frame;
837   PetscErrorCode ierr;
838 
839   PetscFunctionBegin;
840   ierr = PetscSegBufferGet(stash->segrecvframe,1,&frame);CHKERRQ(ierr);
841   ierr = PetscSegBufferGet(stash->segrecvblocks,hdr->count,&frame->buffer);CHKERRQ(ierr);
842   ierr = MPI_Irecv(frame->buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr);
843   frame->count = hdr->count;
844   frame->pending = 1;
845   PetscFunctionReturn(0);
846 }
847 
848 #undef __FUNCT__
849 #define __FUNCT__ "MatStashScatterBegin_BTS"
850 /*
851  * owners[] contains the ownership ranges; may be indexed by either blocks or scalars
852  */
853 static PetscErrorCode MatStashScatterBegin_BTS(Mat mat,MatStash *stash,PetscInt owners[])
854 {
855   PetscErrorCode ierr;
856   size_t nblocks;
857   char *sendblocks;
858 
859   PetscFunctionBegin;
860   ierr = MatStashBlockTypeSetUp(stash);CHKERRQ(ierr);
861   ierr = MatStashSortCompress_Private(stash,mat->insertmode);CHKERRQ(ierr);
862   ierr = PetscSegBufferGetSize(stash->segsendblocks,&nblocks);CHKERRQ(ierr);
863   ierr = PetscSegBufferExtractInPlace(stash->segsendblocks,&sendblocks);CHKERRQ(ierr);
864   {
865     PetscInt i,rowstart,sendno;
866 
867     /* Count number of send ranks and allocate for sends */
868     stash->nsendranks = 0;
869     for (rowstart=0; rowstart<nblocks; ) {
870       PetscInt lastowner,owner;
871       MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
872       ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr);
873       if (owner < 0) owner = -(owner+2);
874       lastowner = owner;
875       for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
876         MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
877         if (sendblock_i->row == sendblock_rowstart->row) continue;
878         ierr = PetscFindInt(sendblock_i->row,stash->size+1,owners,&owner);CHKERRQ(ierr);
879         if (owner < 0) owner = -(owner+2);
880         if (owner != lastowner) break;
881       }
882       stash->nsendranks++;
883       rowstart = i;
884     }
885     ierr = PetscMalloc3(stash->nsendranks,&stash->sendranks,stash->nsendranks,&stash->sendhdr,stash->nsendranks,&stash->sendframes);CHKERRQ(ierr);
886 
887     /* Set up sendhdrs and sendframes */
888     sendno = 0;
889     for (rowstart=0; rowstart<nblocks; ) {
890       PetscInt owner;
891       MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
892       ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr);
893       if (owner < 0) owner = -(owner+2);
894       stash->sendranks[sendno] = owner;
895       for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
896         MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
897         if (sendblock_i->row == sendblock_rowstart->row) continue;
898         ierr = PetscFindInt(sendblock_i->row,stash->size+1,owners,&owner);CHKERRQ(ierr);
899         if (owner < 0) owner = -(owner+2);
900         if (owner != stash->sendranks[sendno]) break;
901       }
902       stash->sendframes[sendno].buffer = sendblock_rowstart;
903       stash->sendframes[sendno].pending = 0;
904       stash->sendhdr[sendno].count = i - rowstart;
905       stash->sendhdr[sendno].insertmode = mat->insertmode;
906       sendno++;
907       rowstart = i;
908     }
909     if (sendno != stash->nsendranks) SETERRQ2(stash->comm,PETSC_ERR_PLIB,"BTS counted %D sendranks, but %D sends",stash->nsendranks,sendno);
910   }
911 
912   ierr = PetscCommBuildTwoSidedFReq(stash->comm,2,MPIU_INT,stash->nsendranks,stash->sendranks,stash->sendhdr,
913                                     &stash->nrecvranks,&stash->recvranks,&stash->recvhdr,1,&stash->sendreqs,&stash->recvreqs,
914                                     MatStashBTSSend_Private,MatStashBTSRecv_Private,stash);CHKERRQ(ierr);
915 
916   ierr = PetscMalloc2(stash->nrecvranks,&stash->some_indices,stash->nrecvranks,&stash->some_statuses);CHKERRQ(ierr);
917   ierr = PetscSegBufferExtractInPlace(stash->segrecvframe,&stash->recvframes);CHKERRQ(ierr);
918   stash->recvframe_active = NULL;
919   stash->recvframe_i      = 0;
920   stash->some_i           = 0;
921   stash->some_count       = 0;
922   stash->recvcount        = 0;
923   stash->use_status       = PETSC_FALSE; /* Use count from header instead of from message. */
924 
925   PetscFunctionReturn(0);
926 }
927 
928 #undef __FUNCT__
929 #define __FUNCT__ "MatStashScatterGetMesg_BTS"
930 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash,PetscMPIInt *n,PetscInt **row,PetscInt **col,PetscScalar **val,PetscInt *flg)
931 {
932   PetscErrorCode ierr;
933   MatStashBlock *block;
934 
935   PetscFunctionBegin;
936   *flg = 0;
937   while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) {
938     if (stash->some_i == stash->some_count) {
939       if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(0); /* Done */
940       ierr = MPI_Waitsome(stash->nrecvranks,stash->recvreqs,&stash->some_count,stash->some_indices,stash->use_status?stash->some_statuses:MPI_STATUSES_IGNORE);CHKERRQ(ierr);
941       stash->some_i = 0;
942     }
943     stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]];
944     stash->recvframe_count = stash->recvframe_active->count; /* From header; maximum count */
945     if (stash->use_status) { /* Count what was actually sent */
946       ierr = MPI_Get_count(&stash->some_statuses[stash->some_i],stash->blocktype,&stash->recvframe_count);CHKERRQ(ierr);
947     }
948     stash->some_i++;
949     stash->recvcount++;
950     stash->recvframe_i = 0;
951   }
952   *n = 1;
953   block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[stash->recvframe_i*stash->blocktype_size];
954   *row = &block->row;
955   *col = &block->col;
956   *val = block->vals;
957   stash->recvframe_i++;
958   *flg = 1;
959   PetscFunctionReturn(0);
960 }
961 
962 #undef __FUNCT__
963 #define __FUNCT__ "MatStashScatterEnd_BTS"
964 static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash)
965 {
966   PetscErrorCode ierr;
967 
968   PetscFunctionBegin;
969   ierr = MPI_Waitall(stash->nsendranks,stash->sendreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
970   ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr);
971 
972   /* Now update nmaxold to be app 10% more than max n used, this way the
973      wastage of space is reduced the next time this stash is used.
974      Also update the oldmax, only if it increases */
975   if (stash->n) {
976     PetscInt bs2     = stash->bs*stash->bs;
977     PetscInt oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
978     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
979   }
980 
981   stash->nmax       = 0;
982   stash->n          = 0;
983   stash->reallocs   = -1;
984   stash->nprocessed = 0;
985 
986   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
987 
988   stash->space = 0;
989 
990   PetscFunctionReturn(0);
991 }
992 
993 #undef __FUNCT__
994 #define __FUNCT__ "MatStashScatterDestroy_BTS"
995 static PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash)
996 {
997   PetscErrorCode ierr;
998 
999   PetscFunctionBegin;
1000   ierr = PetscSegBufferDestroy(&stash->segsendblocks);CHKERRQ(ierr);
1001   ierr = PetscSegBufferDestroy(&stash->segrecvframe);CHKERRQ(ierr);
1002   stash->recvframes = NULL;
1003   ierr = PetscSegBufferDestroy(&stash->segrecvblocks);CHKERRQ(ierr);
1004   if (stash->blocktype != MPI_DATATYPE_NULL) {
1005     ierr = MPI_Type_free(&stash->blocktype);CHKERRQ(ierr);
1006   }
1007   stash->nsendranks = 0;
1008   stash->nrecvranks = 0;
1009   ierr = PetscFree3(stash->sendranks,stash->sendhdr,stash->sendframes);CHKERRQ(ierr);
1010   ierr = PetscFree(stash->sendreqs);CHKERRQ(ierr);
1011   ierr = PetscFree(stash->recvreqs);CHKERRQ(ierr);
1012   ierr = PetscFree(stash->recvranks);CHKERRQ(ierr);
1013   ierr = PetscFree(stash->recvhdr);CHKERRQ(ierr);
1014   ierr = PetscFree2(stash->some_indices,stash->some_statuses);CHKERRQ(ierr);
1015   PetscFunctionReturn(0);
1016 }
1017