xref: /petsc/src/mat/utils/matstash.c (revision 649db694b3889ba5f6b2652ccd96912bbc3c5e3f)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: matstash.c,v 1.27 1999/03/18 00:33:52 balay Exp balay $";
3 #endif
4 
5 #include "src/mat/matimpl.h"
6 
7 #define DEFAULT_STASH_SIZE   10000
8 
9 /*
10   MatStashCreate_Private - Creates a stash ,currently used for all the parallel
11   matrix implementations. The stash is where elements of a matrix destined
12   to be stored on other processors are kept until matrix assembly is done.
13 
14   This is a simple minded stash. Simply adds entries to end of stash.
15 
16   Input Parameters:
17   comm - communicator, required for scatters.
18   bs   - stash block size. used when stashing blocks of values
19 
20   Output Parameters:
21   stash    - the newly created stash
22 */
23 #undef __FUNC__
24 #define __FUNC__ "MatStashCreate_Private"
25 int MatStashCreate_Private(MPI_Comm comm,int bs, MatStash *stash)
26 {
27   int ierr,flg,max=DEFAULT_STASH_SIZE;
28 
29   PetscFunctionBegin;
30   /* Require 2 tags, get the second using PetscCommGetNewTag() */
31   ierr = PetscCommDuplicate_Private(comm,&stash->comm,&stash->tag1);CHKERRQ(ierr);
32   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2); CHKERRQ(ierr);
33   ierr = OptionsGetInt(PETSC_NULL,"-matstash_initial_size",&max,&flg);CHKERRQ(ierr);
34   ierr = MatStashSetInitialSize_Private(stash,max); CHKERRQ(ierr);
35   ierr = MPI_Comm_size(stash->comm,&stash->size); CHKERRQ(ierr);
36   ierr = MPI_Comm_rank(stash->comm,&stash->rank); CHKERRQ(ierr);
37 
38   if (bs <= 0) bs = 1;
39 
40   stash->bs       = bs;
41   stash->nmax     = 0;
42   stash->n        = 0;
43   stash->reallocs = -1;
44   stash->idx      = 0;
45   stash->idy      = 0;
46   stash->array    = 0;
47 
48   stash->send_waits  = 0;
49   stash->recv_waits  = 0;
50   stash->send_status = 0;
51   stash->nsends      = 0;
52   stash->nrecvs      = 0;
53   stash->svalues     = 0;
54   stash->rvalues     = 0;
55   stash->rmax        = 0;
56   stash->nprocs      = 0;
57   stash->nprocessed  = 0;
58   PetscFunctionReturn(0);
59 }
60 
61 /*
62    MatStashDestroy_Private - Destroy the stash
63 */
64 #undef __FUNC__
65 #define __FUNC__ "MatStashDestroy_Private"
66 int MatStashDestroy_Private(MatStash *stash)
67 {
68   int ierr;
69 
70   PetscFunctionBegin;
71   ierr = PetscCommDestroy_Private(&stash->comm); CHKERRQ(ierr);
72   if (stash->array) {PetscFree(stash->array); stash->array = 0;}
73   PetscFunctionReturn(0);
74 }
75 
76 /*
77    MatStashScatterEnd_Private - This is called as the fial stage of
78    scatter. The final stages of messagepassing is done here, and
79    all the memory used for messagepassing is cleanedu up. This
80    routine also resets the stash, and deallocates the memory used
81    for the stash. It also keeps track of the current memory usage
82    so that the same value can be used the next time through.
83 */
84 #undef __FUNC__
85 #define __FUNC__ "MatStashScatterEnd_Private"
86 int MatStashScatterEnd_Private(MatStash *stash)
87 {
88   int         nsends=stash->nsends,ierr,bs2;
89   MPI_Status  *send_status;
90 
91   PetscFunctionBegin;
92   /* wait on sends */
93   if (nsends) {
94     send_status = (MPI_Status *)PetscMalloc(2*nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
95     ierr        = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
96     PetscFree(send_status);
97   }
98 
99   /* Now update nmaxold to be app 10% more than max n used, this way the
100      wastage of space is reduced the next time this stash is used */
101   bs2               = stash->bs*stash->bs;
102   stash->oldnmax    = ((int)(stash->n * 1.1) + 5)*bs2;
103   stash->nmax       = 0;
104   stash->n          = 0;
105   stash->reallocs   = -1;
106   stash->rmax       = 0;
107   stash->nprocessed = 0;
108 
109   if (stash->array) {
110     PetscFree(stash->array);
111     stash->array = 0;
112     stash->idx   = 0;
113     stash->idy   = 0;
114   }
115   if (stash->send_waits)  {PetscFree(stash->send_waits);stash->send_waits = 0;}
116   if (stash->recv_waits)  {PetscFree(stash->recv_waits);stash->recv_waits = 0;}
117   if (stash->svalues)     {PetscFree(stash->svalues);stash->svalues = 0;}
118   if (stash->rvalues)     {PetscFree(stash->rvalues); stash->rvalues = 0;}
119   if (stash->nprocs)      {PetscFree(stash->nprocs); stash->nprocs = 0;}
120 
121   PetscFunctionReturn(0);
122 }
123 
124 /*
125    MatStashGetInfo_Private - Gets the relavant statistics of the stash
126 
127    Input Parameters:
128    stash    - the stash
129    nstash   - the size of the stash. Indicates the number of values stored.
130    reallocs - the number of additional mallocs incurred.
131 
132 */
133 #undef __FUNC__
134 #define __FUNC__ "MatStashGetInfo_Private"
135 int MatStashGetInfo_Private(MatStash *stash,int *nstash, int *reallocs)
136 {
137   int bs2 = stash->bs*stash->bs;
138 
139   PetscFunctionBegin;
140   *nstash   = stash->n*bs2;
141   *reallocs = stash->reallocs;
142   PetscFunctionReturn(0);
143 }
144 
145 
146 /*
147    MatStashSetInitialSize_Private - Sets the initial size of the stash
148 
149    Input Parameters:
150    stash  - the stash
151    max    - the value that is used as the max size of the stash.
152             this value is used while allocating memory.
153 */
154 #undef __FUNC__
155 #define __FUNC__ "MatStashSetInitialSize_Private"
156 int MatStashSetInitialSize_Private(MatStash *stash,int max)
157 {
158   PetscFunctionBegin;
159   stash->oldnmax = max;
160   stash->nmax    = 0;
161   PetscFunctionReturn(0);
162 }
163 
164 /* MatStashExpand_Private - Expand the stash. This function is called
165    when the space in the stash is not sufficient to add the new values
166    being inserted into the stash.
167 
168    Input Parameters:
169    stash - the stash
170    incr  - the minimum increase requested
171 
172    Notes:
173    This routine doubles the currently used memory.
174  */
175 #undef __FUNC__
176 #define __FUNC__ "MatStashExpand_Private"
177 static int MatStashExpand_Private(MatStash *stash,int incr)
178 {
179   int    *n_idx,*n_idy,newnmax,bs2;
180   Scalar *n_array;
181 
182   PetscFunctionBegin;
183   /* allocate a larger stash */
184   bs2     = stash->bs*stash->bs;
185   if (stash->nmax == 0) newnmax = stash->oldnmax/bs2;
186   else                  newnmax = stash->nmax*2;
187   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
188 
189   n_array = (Scalar *)PetscMalloc((newnmax)*(2*sizeof(int)+bs2*sizeof(Scalar)));CHKPTRQ(n_array);
190   n_idx   = (int *) (n_array + bs2*newnmax);
191   n_idy   = (int *) (n_idx + newnmax);
192   PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(Scalar));
193   PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(int));
194   PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(int));
195   if (stash->array) PetscFree(stash->array);
196   stash->array   = n_array;
197   stash->idx     = n_idx;
198   stash->idy     = n_idy;
199   stash->nmax    = newnmax;
200   stash->oldnmax = newnmax*bs2;
201   stash->reallocs++;
202   PetscFunctionReturn(0);
203 }
204 /*
205   MatStashValuesRow_Private - inserts values into the stash. This function
206   expects the values to be roworiented. Multiple columns belong to the same row
207   can be inserted with a single call to this function.
208 
209   Input Parameters:
210   stash  - the stash
211   row    - the global row correspoiding to the values
212   n      - the number of elements inserted. All elements belong to the above row.
213   idxn   - the global column indices corresponding to each of the values.
214   values - the values inserted
215 */
216 #undef __FUNC__
217 #define __FUNC__ "MatStashValuesRow_Private"
218 int MatStashValuesRow_Private(MatStash *stash,int row,int n, int *idxn,Scalar *values)
219 {
220   int    ierr,i;
221 
222   PetscFunctionBegin;
223   /* Check and see if we have sufficient memory */
224   if ((stash->n + n) > stash->nmax) {
225     ierr = MatStashExpand_Private(stash,n); CHKERRQ(ierr);
226   }
227   for ( i=0; i<n; i++ ) {
228     stash->idx[stash->n]   = row;
229     stash->idy[stash->n]   = idxn[i];
230     stash->array[stash->n] = values[i];
231     stash->n++;
232   }
233   PetscFunctionReturn(0);
234 }
235 /*
236   MatStashValuesCol_Private - inserts values into the stash. This function
237   expects the values to be columnoriented. Multiple columns belong to the same row
238   can be inserted with a single call to this function.
239 
240   Input Parameters:
241   stash   - the stash
242   row     - the global row correspoiding to the values
243   n       - the number of elements inserted. All elements belong to the above row.
244   idxn    - the global column indices corresponding to each of the values.
245   values  - the values inserted
246   stepval - the consecutive values are sepated by a distance of stepval.
247             this happens because the input is columnoriented.
248 */
249 #undef __FUNC__
250 #define __FUNC__ "MatStashValuesCol_Private"
251 int MatStashValuesCol_Private(MatStash *stash,int row,int n, int *idxn,
252                                       Scalar *values,int stepval)
253 {
254   int    ierr,i;
255 
256   PetscFunctionBegin;
257   /* Check and see if we have sufficient memory */
258   if ((stash->n + n) > stash->nmax) {
259     ierr = MatStashExpand_Private(stash,n); CHKERRQ(ierr);
260   }
261   for ( i=0; i<n; i++ ) {
262     stash->idx[stash->n]   = row;
263     stash->idy[stash->n]   = idxn[i];
264     stash->array[stash->n] = values[i*stepval];
265     stash->n++;
266   }
267   PetscFunctionReturn(0);
268 }
269 
270 /*
271   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
272   This function expects the values to be roworiented. Multiple columns belong
273   to the same block-row can be inserted with a single call to this function.
274   This function extracts the sub-block of values based on the dimensions of
275   the original input block, and the row,col values corresponding to the blocks.
276 
277   Input Parameters:
278   stash  - the stash
279   row    - the global block-row correspoiding to the values
280   n      - the number of elements inserted. All elements belong to the above row.
281   idxn   - the global block-column indices corresponding to each of the blocks of
282            values. Each block is of size bs*bs.
283   values - the values inserted
284   rmax   - the number of block-rows in the original block.
285   cmax   - the number of block-columsn on the original block.
286   idx    - the index of the current block-row in the original block.
287 */
288 #undef __FUNC__
289 #define __FUNC__ "MatStashValuesRowBlocked_Private"
290 int MatStashValuesRowBlocked_Private(MatStash *stash,int row,int n,int *idxn,Scalar *values,
291                                int rmax,int cmax,int idx)
292 {
293   int    ierr,i,j,k,bs2,bs=stash->bs;
294   Scalar *vals,*array;
295 
296   PetscFunctionBegin;
297   bs2 = bs*bs;
298   if ((stash->n+n) > stash->nmax) {
299     ierr = MatStashExpand_Private(stash,n); CHKERRQ(ierr);
300   }
301   for ( i=0; i<n; i++ ) {
302     stash->idx[stash->n]   = row;
303     stash->idy[stash->n] = idxn[i];
304     /* Now copy over the block of values. Store the values column oriented.
305        This enables inserting multiple blocks belonging to a row with a single
306        funtion call */
307     array = stash->array + bs2*stash->n;
308     vals  = values + idx*bs2*n + bs*i;
309     for ( j=0; j<bs; j++ ) {
310       for ( k=0; k<bs; k++ ) {array[k*bs] = vals[k];}
311       array += 1;
312       vals  += cmax*bs;
313     }
314     stash->n++;
315   }
316   PetscFunctionReturn(0);
317 }
318 
319 /*
320   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
321   This function expects the values to be roworiented. Multiple columns belong
322   to the same block-row can be inserted with a single call to this function.
323   This function extracts the sub-block of values based on the dimensions of
324   the original input block, and the row,col values corresponding to the blocks.
325 
326   Input Parameters:
327   stash  - the stash
328   row    - the global block-row correspoiding to the values
329   n      - the number of elements inserted. All elements belong to the above row.
330   idxn   - the global block-column indices corresponding to each of the blocks of
331            values. Each block is of size bs*bs.
332   values - the values inserted
333   rmax   - the number of block-rows in the original block.
334   cmax   - the number of block-columsn on the original block.
335   idx    - the index of the current block-row in the original block.
336 */
337 #undef __FUNC__
338 #define __FUNC__ "MatStashValuesColBlocked_Private"
339 int MatStashValuesColBlocked_Private(MatStash *stash,int row,int n,int *idxn,
340                                              Scalar *values,int rmax,int cmax,int idx)
341 {
342   int    ierr,i,j,k,bs2,bs=stash->bs;
343   Scalar *vals,*array;
344 
345   PetscFunctionBegin;
346   bs2 = bs*bs;
347   if ((stash->n+n) > stash->nmax) {
348     ierr = MatStashExpand_Private(stash,n); CHKERRQ(ierr);
349   }
350   for ( i=0; i<n; i++ ) {
351     stash->idx[stash->n]   = row;
352     stash->idy[stash->n] = idxn[i];
353     /* Now copy over the block of values. Store the values column oriented.
354      This enables inserting multiple blocks belonging to a row with a single
355      funtion call */
356     array = stash->array + bs2*stash->n;
357     vals  = values + idx*bs + bs2*rmax*i;
358     for ( j=0; j<bs; j++ ) {
359       for ( k=0; k<bs; k++ ) {array[k] = vals[k];}
360       array += bs;
361       vals  += rmax*bs;
362     }
363     stash->n++;
364   }
365   PetscFunctionReturn(0);
366 }
367 /*
368   MatStashScatterBegin_Private - Initiates the transfer of values to the
369   correct owners. This function goes through the stash, and check the
370   owners of each stashed value, and sends the values off to the owner
371   processors.
372 
373   Input Parameters:
374   stash  - the stash
375   owners - an array of size 'no-of-procs' which gives the ownership range
376            for each node.
377 
378   Notes: The 'owners' array in the cased of the blocked-stash has the
379   ranges specified blocked global indices, and for the regular stash in
380   the proper global indices.
381 */
382 #undef __FUNC__
383 #define __FUNC__ "MatStashScatterBegin_Private"
384 int MatStashScatterBegin_Private(MatStash *stash,int *owners)
385 {
386   int         *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
387   int         rank=stash->rank,size=stash->size,*nprocs,*procs,nsends,nreceives;
388   int         nmax,*work,count,ierr,*sindices,*rindices,i,j,idx;
389   Scalar      *rvalues,*svalues;
390   MPI_Comm    comm = stash->comm;
391   MPI_Request *send_waits,*recv_waits;
392 
393   PetscFunctionBegin;
394 
395   bs2 = stash->bs*stash->bs;
396   /*  first count number of contributors to each processor */
397   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
398   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
399   owner = (int *) PetscMalloc( (stash->n+1)*sizeof(int) ); CHKPTRQ(owner);
400 
401   for ( i=0; i<stash->n; i++ ) {
402     idx = stash->idx[i];
403     for ( j=0; j<size; j++ ) {
404       if (idx >= owners[j] && idx < owners[j+1]) {
405         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
406       }
407     }
408   }
409   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
410 
411   /* inform other processors of number of messages and max length*/
412   work = (int *)PetscMalloc(size*sizeof(int)); CHKPTRQ(work);
413   ierr = MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
414   nreceives = work[rank];
415   ierr = MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
416   nmax = work[rank];
417   PetscFree(work);
418   /* post receives:
419      since we don't know how long each individual message is we
420      allocate the largest needed buffer for each receive. Potentially
421      this is a lot of wasted space.
422   */
423   rvalues    = (Scalar *)PetscMalloc((nreceives+1)*(nmax+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(rvalues);
424   rindices   = (int *) (rvalues + bs2*nreceives*nmax);
425   recv_waits = (MPI_Request *)PetscMalloc((nreceives+1)*2*sizeof(MPI_Request));CHKPTRQ(recv_waits);
426   for ( i=0,count=0; i<nreceives; i++ ) {
427     ierr = MPI_Irecv(rvalues+bs2*nmax*i,bs2*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag1,comm,
428                      recv_waits+count++); CHKERRQ(ierr);
429     ierr = MPI_Irecv(rindices+2*nmax*i,2*nmax,MPI_INT,MPI_ANY_SOURCE,tag2,comm,
430                      recv_waits+count++); CHKERRQ(ierr);
431   }
432 
433   /* do sends:
434       1) starts[i] gives the starting index in svalues for stuff going to
435          the ith processor
436   */
437   svalues    = (Scalar *)PetscMalloc((stash->n+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(svalues);
438   sindices   = (int *) (svalues + bs2*stash->n);
439   send_waits = (MPI_Request *) PetscMalloc(2*(nsends+1)*sizeof(MPI_Request));
440   CHKPTRQ(send_waits);
441   startv     = (int *) PetscMalloc(2*size*sizeof(int) ); CHKPTRQ(startv);
442   starti     = startv + size;
443   /* use 2 sends the first with all_a, the next with all_i and all_j */
444   startv[0]  = 0; starti[0] = 0;
445   for ( i=1; i<size; i++ ) {
446     startv[i] = startv[i-1] + nprocs[i-1];
447     starti[i] = starti[i-1] + nprocs[i-1]*2;
448   }
449   for ( i=0; i<stash->n; i++ ) {
450     j = owner[i];
451     if (bs2 == 1) {
452       svalues[startv[j]]              = stash->array[i];
453     } else {
454       int    k;
455       Scalar *buf1,*buf2;
456       buf1 = svalues+bs2*startv[j];
457       buf2 = stash->array+bs2*i;
458       for ( k=0; k<bs2; k++ ){ buf1[k] = buf2[k]; }
459     }
460     sindices[starti[j]]             = stash->idx[i];
461     sindices[starti[j]+nprocs[j]]   = stash->idy[i];
462     startv[j]++;
463     starti[j]++;
464   }
465   startv[0] = 0;
466   for ( i=1; i<size; i++ ) { startv[i] = startv[i-1] + nprocs[i-1];}
467   for ( i=0,count=0; i<size; i++ ) {
468     if (procs[i]) {
469       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nprocs[i],MPIU_SCALAR,i,tag1,comm,
470                        send_waits+count++);CHKERRQ(ierr);
471       ierr = MPI_Isend(sindices+2*startv[i],2*nprocs[i],MPI_INT,i,tag2,comm,
472                        send_waits+count++);CHKERRQ(ierr);
473     }
474   }
475   PetscFree(owner);
476   PetscFree(startv);
477   /* This memory is reused in scatter end  for a different purpose*/
478   for (i=0; i<2*size; i++ ) nprocs[i] = -1;
479   stash->nprocs      = nprocs;
480 
481   stash->svalues    = svalues;    stash->rvalues    = rvalues;
482   stash->nsends     = nsends;     stash->nrecvs     = nreceives;
483   stash->send_waits = send_waits; stash->recv_waits = recv_waits;
484   stash->rmax       = nmax;
485   PetscFunctionReturn(0);
486 }
487 
488 /*
489    MatStashScatterGetMesg_Private - This function waits on the receives posted
490    in the function MatStashScatterBegin_Private() and returns one message at
491    a time to the calling function. If no messages are left, it indicates this
492    by setting flg = 0, else it sets flg = 1.
493 
494    Input Parameters:
495    stash - the stash
496 
497    Output Parameters:
498    nvals - the number of entries in the current message.
499    rows  - an array of row indices (or blocked indices) corresponding to the values
500    cols  - an array of columnindices (or blocked indices) corresponding to the values
501    vals  - the values
502    flg   - 0 indicates no more message left, and the current call has no values associated.
503            1 indicates that the current call successfully received a message, and the
504              other output parameters nvals,rows,cols,vals are set appropriately.
505 */
506 #undef __FUNC__
507 #define __FUNC__ "MatStashScatterGetMesg_Private"
508 int MatStashScatterGetMesg_Private(MatStash *stash,int *nvals,int **rows,int** cols,Scalar **vals,int *flg)
509 {
510   int         i,ierr,size=stash->size,*flg_v,*flg_i;
511   int         i1,i2,*rindices,match_found=0,bs2;
512   MPI_Status  recv_status;
513 
514   PetscFunctionBegin;
515 
516   *flg = 0; /* When a message is discovered this is reset to 1 */
517   /* Return if no more messages to process */
518   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
519 
520   flg_v = stash->nprocs;
521   flg_i = flg_v + size;
522   bs2   = stash->bs*stash->bs;
523   /* If a matching pair of receieves are found, process them, and return the data to
524      the calling function. Until then keep receiving messages */
525   while (!match_found) {
526     ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
527     /* Now pack the received message into a structure which is useable by others */
528     if (i % 2) {
529       ierr = MPI_Get_count(&recv_status,MPI_INT,nvals);CHKERRQ(ierr);
530       flg_i[recv_status.MPI_SOURCE] = i/2;
531       *nvals = *nvals/2; /* This message has both row indices and col indices */
532     } else {
533       ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr);
534       flg_v[recv_status.MPI_SOURCE] = i/2;
535       *nvals = *nvals/bs2;
536     }
537 
538     /* Check if we have both the messages from this proc */
539     i1 = flg_v[recv_status.MPI_SOURCE];
540     i2 = flg_i[recv_status.MPI_SOURCE];
541     if (i1 != -1 && i2 != -1) {
542       rindices    = (int *) (stash->rvalues + bs2*stash->rmax*stash->nrecvs);
543       *rows       = rindices + 2*i2*stash->rmax;
544       *cols       = *rows + *nvals;
545       *vals       = stash->rvalues + i1*bs2*stash->rmax;
546       *flg        = 1;
547       stash->nprocessed ++;
548       match_found = 1;
549     }
550   }
551   PetscFunctionReturn(0);
552 }
553