xref: /petsc/src/mat/utils/matstash.c (revision 4c1ff4818e913ed5875827e4cab935750ee70c71)
1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER
2*4c1ff481SSatish Balay static char vcid[] = "$Id: stash.c,v 1.24 1999/03/11 23:21:51 balay Exp balay $";
32d5177cdSBarry Smith #endif
42d5177cdSBarry Smith 
570f55243SBarry Smith #include "src/mat/matimpl.h"
69417f4adSLois Curfman McInnes 
7bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE   10000
8*4c1ff481SSatish Balay 
99417f4adSLois Curfman McInnes /*
10*4c1ff481SSatish Balay   StashCreate_Private - Creates a stash ,currently used for all the parallel
11*4c1ff481SSatish Balay   matrix implementations. The stash is where elements of a matrix destined
12*4c1ff481SSatish Balay   to be stored on other processors are kept until matrix assembly is done.
139417f4adSLois Curfman McInnes 
14*4c1ff481SSatish Balay   This is a simple minded stash. Simply adds entries to end of stash.
15*4c1ff481SSatish Balay 
16*4c1ff481SSatish Balay   Input Parameters:
17*4c1ff481SSatish Balay   comm - communicator, required for scatters.
18*4c1ff481SSatish Balay   bs   - stash block size. used when stashing blocks of values
19*4c1ff481SSatish Balay 
20*4c1ff481SSatish Balay   Output Parameters:
21*4c1ff481SSatish Balay   stash    - the newly created stash
229417f4adSLois Curfman McInnes */
235615d1e5SSatish Balay #undef __FUNC__
24bc5ccf88SSatish Balay #define __FUNC__ "StashCreate_Private"
25*4c1ff481SSatish Balay int StashCreate_Private(MPI_Comm comm,int bs, Stash *stash)
269417f4adSLois Curfman McInnes {
27*4c1ff481SSatish Balay   int ierr,flg,max=DEFAULT_STASH_SIZE/(bs*bs);
28bc5ccf88SSatish Balay 
293a40ed3dSBarry Smith   PetscFunctionBegin;
30bc5ccf88SSatish Balay   /* Require 2 tags, get the second using PetscCommGetNewTag() */
31bc5ccf88SSatish Balay   ierr = PetscCommDuplicate_Private(comm,&stash->comm,&stash->tag1);CHKERRQ(ierr);
32a2d1c673SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2); CHKERRQ(ierr);
33bc5ccf88SSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-stash_initial_size",&max,&flg);CHKERRQ(ierr);
34bc5ccf88SSatish Balay   ierr = StashSetInitialSize_Private(stash,max); CHKERRQ(ierr);
35a2d1c673SSatish Balay   ierr = MPI_Comm_size(stash->comm,&stash->size); CHKERRQ(ierr);
36a2d1c673SSatish Balay   ierr = MPI_Comm_rank(stash->comm,&stash->rank); CHKERRQ(ierr);
37bc5ccf88SSatish Balay 
38*4c1ff481SSatish Balay   if (bs <= 0) bs = 1;
39a2d1c673SSatish Balay 
40*4c1ff481SSatish Balay   stash->bs       = bs;
419417f4adSLois Curfman McInnes   stash->nmax     = 0;
429417f4adSLois Curfman McInnes   stash->n        = 0;
43*4c1ff481SSatish Balay   stash->reallocs = -1;
449417f4adSLois Curfman McInnes   stash->idx      = 0;
459417f4adSLois Curfman McInnes   stash->idy      = 0;
46bc5ccf88SSatish Balay   stash->array    = 0;
479417f4adSLois Curfman McInnes 
48bc5ccf88SSatish Balay   stash->send_waits  = 0;
49bc5ccf88SSatish Balay   stash->recv_waits  = 0;
50a2d1c673SSatish Balay   stash->send_status = 0;
51bc5ccf88SSatish Balay   stash->nsends      = 0;
52bc5ccf88SSatish Balay   stash->nrecvs      = 0;
53bc5ccf88SSatish Balay   stash->svalues     = 0;
54bc5ccf88SSatish Balay   stash->rvalues     = 0;
55bc5ccf88SSatish Balay   stash->rmax        = 0;
56a2d1c673SSatish Balay   stash->nprocs      = 0;
57a2d1c673SSatish Balay   stash->nprocessed  = 0;
583a40ed3dSBarry Smith   PetscFunctionReturn(0);
599417f4adSLois Curfman McInnes }
609417f4adSLois Curfman McInnes 
61*4c1ff481SSatish Balay /*
62*4c1ff481SSatish Balay    StashDestroy_Private - Destroy the stash
63*4c1ff481SSatish Balay */
645615d1e5SSatish Balay #undef __FUNC__
65d4bb536fSBarry Smith #define __FUNC__ "StashDestroy_Private"
669417f4adSLois Curfman McInnes int StashDestroy_Private(Stash *stash)
679417f4adSLois Curfman McInnes {
68bc5ccf88SSatish Balay   int ierr;
69a2d1c673SSatish Balay 
70bc5ccf88SSatish Balay   PetscFunctionBegin;
71bc5ccf88SSatish Balay   ierr = PetscCommDestroy_Private(&stash->comm); CHKERRQ(ierr);
72bc5ccf88SSatish Balay   if (stash->array) {PetscFree(stash->array); stash->array = 0;}
73bc5ccf88SSatish Balay   PetscFunctionReturn(0);
74bc5ccf88SSatish Balay }
75bc5ccf88SSatish Balay 
76*4c1ff481SSatish Balay /*
77*4c1ff481SSatish Balay    StashScatterEnd_Private - This is called as the fial stage of
78*4c1ff481SSatish Balay    scatter. The final stages of messagepassing is done here, and
79*4c1ff481SSatish Balay    all the memory used for messagepassing is cleanedu up. This
80*4c1ff481SSatish Balay    routine also resets the stash, and deallocates the memory used
81*4c1ff481SSatish Balay    for the stash. It also keeps track of the current memory usage
82*4c1ff481SSatish Balay    so that the same value can be used the next time through.
83*4c1ff481SSatish Balay */
84bc5ccf88SSatish Balay #undef __FUNC__
85a2d1c673SSatish Balay #define __FUNC__ "StashScatterEnd_Private"
86a2d1c673SSatish Balay int StashScatterEnd_Private(Stash *stash)
87bc5ccf88SSatish Balay {
88a2d1c673SSatish Balay   int         nsends=stash->nsends,ierr;
89a2d1c673SSatish Balay   MPI_Status  *send_status;
90a2d1c673SSatish Balay 
913a40ed3dSBarry Smith   PetscFunctionBegin;
92a2d1c673SSatish Balay   /* wait on sends */
93a2d1c673SSatish Balay   if (nsends) {
94a2d1c673SSatish Balay     send_status = (MPI_Status *)PetscMalloc(2*nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
95a2d1c673SSatish Balay     ierr        = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
96a2d1c673SSatish Balay     PetscFree(send_status);
97a2d1c673SSatish Balay   }
98a2d1c673SSatish Balay 
99d07ff455SSatish Balay   /* Now update nmaxold to be app 10% more than nmax, this way the
100d07ff455SSatish Balay      wastage of space is reduced the next time this stash is used */
101bc5ccf88SSatish Balay   stash->oldnmax    = (int)(stash->nmax * 1.1) + 5;
102d07ff455SSatish Balay   stash->nmax       = 0;
103d07ff455SSatish Balay   stash->n          = 0;
104*4c1ff481SSatish Balay   stash->reallocs   = -1;
105bc5ccf88SSatish Balay   stash->rmax       = 0;
106a2d1c673SSatish Balay   stash->nprocessed = 0;
107bc5ccf88SSatish Balay 
108bc5ccf88SSatish Balay   if (stash->array) {
109bc5ccf88SSatish Balay     PetscFree(stash->array);
110bc5ccf88SSatish Balay     stash->array = 0;
111bc5ccf88SSatish Balay     stash->idx   = 0;
112bc5ccf88SSatish Balay     stash->idy   = 0;
113bc5ccf88SSatish Balay   }
114bc5ccf88SSatish Balay   if (stash->send_waits)  {PetscFree(stash->send_waits);stash->send_waits = 0;}
115bc5ccf88SSatish Balay   if (stash->recv_waits)  {PetscFree(stash->recv_waits);stash->recv_waits = 0;}
116bc5ccf88SSatish Balay   if (stash->svalues)     {PetscFree(stash->svalues);stash->svalues = 0;}
117bc5ccf88SSatish Balay   if (stash->rvalues)     {PetscFree(stash->rvalues); stash->rvalues = 0;}
118a2d1c673SSatish Balay   if (stash->nprocs)      {PetscFree(stash->nprocs); stash->nprocs = 0;}
119bc5ccf88SSatish Balay 
1203a40ed3dSBarry Smith   PetscFunctionReturn(0);
1219417f4adSLois Curfman McInnes }
1229417f4adSLois Curfman McInnes 
123*4c1ff481SSatish Balay /*
124*4c1ff481SSatish Balay    StashGetInfo_Private - Gets the relavant statistics of the stash
125*4c1ff481SSatish Balay 
126*4c1ff481SSatish Balay    Input Parameters:
127*4c1ff481SSatish Balay    stash    - the stash
128*4c1ff481SSatish Balay    nstash   - the size of the stash
129*4c1ff481SSatish Balay    reallocs - the number of additional mallocs incurred.
130*4c1ff481SSatish Balay 
131*4c1ff481SSatish Balay */
1325615d1e5SSatish Balay #undef __FUNC__
133*4c1ff481SSatish Balay #define __FUNC__ "StashGetInfo_Private"
134*4c1ff481SSatish Balay int StashGetInfo_Private(Stash *stash,int *nstash, int *reallocs)
13597530c3fSBarry Smith {
1363a40ed3dSBarry Smith   PetscFunctionBegin;
137*4c1ff481SSatish Balay   *nstash   = stash->n;
138*4c1ff481SSatish Balay   *reallocs = stash->reallocs;
139bc5ccf88SSatish Balay   PetscFunctionReturn(0);
140bc5ccf88SSatish Balay }
141*4c1ff481SSatish Balay 
142*4c1ff481SSatish Balay 
143*4c1ff481SSatish Balay /*
144*4c1ff481SSatish Balay    StashSetInitialSize_Private - Sets the initial size of the stash
145*4c1ff481SSatish Balay 
146*4c1ff481SSatish Balay    Input Parameters:
147*4c1ff481SSatish Balay    stash  - the stash
148*4c1ff481SSatish Balay    max    - the value that is used as the max size of the stash.
149*4c1ff481SSatish Balay             this value is used while allocating memory.
150*4c1ff481SSatish Balay */
151bc5ccf88SSatish Balay #undef __FUNC__
152bc5ccf88SSatish Balay #define __FUNC__ "StashSetInitialSize_Private"
153bc5ccf88SSatish Balay int StashSetInitialSize_Private(Stash *stash,int max)
154bc5ccf88SSatish Balay {
155bc5ccf88SSatish Balay   PetscFunctionBegin;
156bc5ccf88SSatish Balay   stash->oldnmax = max;
157bc5ccf88SSatish Balay   stash->nmax    = 0;
1583a40ed3dSBarry Smith   PetscFunctionReturn(0);
15997530c3fSBarry Smith }
16097530c3fSBarry Smith 
161*4c1ff481SSatish Balay /* StashExpand_Private - Expand the stash. This function is called
162*4c1ff481SSatish Balay    when the space in the stash is not sufficient to add the new values
163*4c1ff481SSatish Balay    being inserted into the stash.
164*4c1ff481SSatish Balay 
165*4c1ff481SSatish Balay    Input Parameters:
166*4c1ff481SSatish Balay    stash - the stash
167*4c1ff481SSatish Balay    incr  - the minimum increase requested
168*4c1ff481SSatish Balay 
169*4c1ff481SSatish Balay    Notes:
170*4c1ff481SSatish Balay    This routine doubles the currently used memory.
171*4c1ff481SSatish Balay  */
1725615d1e5SSatish Balay #undef __FUNC__
173bc5ccf88SSatish Balay #define __FUNC__ "StashExpand_Private"
174*4c1ff481SSatish Balay static int StashExpand_Private(Stash *stash,int incr)
1759417f4adSLois Curfman McInnes {
176a2d1c673SSatish Balay   int    *n_idx,*n_idy,newnmax,bs2;
177bc5ccf88SSatish Balay   Scalar *n_array;
1789417f4adSLois Curfman McInnes 
1793a40ed3dSBarry Smith   PetscFunctionBegin;
1809417f4adSLois Curfman McInnes   /* allocate a larger stash */
181d07ff455SSatish Balay   if (stash->nmax == 0) newnmax = stash->oldnmax;
182d07ff455SSatish Balay   else                  newnmax = stash->nmax*2;
183*4c1ff481SSatish Balay   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
184d07ff455SSatish Balay 
185*4c1ff481SSatish Balay   bs2     = stash->bs*stash->bs;
186a2d1c673SSatish Balay   n_array = (Scalar *)PetscMalloc((newnmax)*(2*sizeof(int)+bs2*sizeof(Scalar)));CHKPTRQ(n_array);
187a2d1c673SSatish Balay   n_idx   = (int *) (n_array + bs2*newnmax);
188d07ff455SSatish Balay   n_idy   = (int *) (n_idx + newnmax);
189a2d1c673SSatish Balay   PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(Scalar));
190416022c9SBarry Smith   PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(int));
191416022c9SBarry Smith   PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(int));
1920452661fSBarry Smith   if (stash->array) PetscFree(stash->array);
193d07ff455SSatish Balay   stash->array   = n_array;
194d07ff455SSatish Balay   stash->idx     = n_idx;
195d07ff455SSatish Balay   stash->idy     = n_idy;
196d07ff455SSatish Balay   stash->nmax    = newnmax;
197d07ff455SSatish Balay   stash->oldnmax = newnmax;
198bc5ccf88SSatish Balay   stash->reallocs++;
199bc5ccf88SSatish Balay   PetscFunctionReturn(0);
200bc5ccf88SSatish Balay }
201bc5ccf88SSatish Balay /*
202*4c1ff481SSatish Balay   StashValuesRoworiented_Private - inserts values into the stash. This function
203*4c1ff481SSatish Balay   expects the values to be roworiented. Multiple columns belong to the same row
204*4c1ff481SSatish Balay   can be inserted with a single call to this function.
205*4c1ff481SSatish Balay 
206*4c1ff481SSatish Balay   Input Parameters:
207*4c1ff481SSatish Balay   stash  - the stash
208*4c1ff481SSatish Balay   row    - the global row correspoiding to the values
209*4c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
210*4c1ff481SSatish Balay   idxn   - the global column indices corresponding to each of the values.
211*4c1ff481SSatish Balay   values - the values inserted
212bc5ccf88SSatish Balay */
213bc5ccf88SSatish Balay #undef __FUNC__
214*4c1ff481SSatish Balay #define __FUNC__ "StashValuesRoworiented_Private"
215*4c1ff481SSatish Balay int StashValuesRoworiented_Private(Stash *stash,int row,int n, int *idxn,Scalar *values)
216bc5ccf88SSatish Balay {
217a2d1c673SSatish Balay   int    ierr,i;
218bc5ccf88SSatish Balay 
219bc5ccf88SSatish Balay   PetscFunctionBegin;
220*4c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
221*4c1ff481SSatish Balay   if ((stash->n + n) > stash->nmax) {
222*4c1ff481SSatish Balay     ierr = StashExpand_Private(stash,n); CHKERRQ(ierr);
2239417f4adSLois Curfman McInnes   }
224*4c1ff481SSatish Balay   for ( i=0; i<n; i++ ) {
2259417f4adSLois Curfman McInnes     stash->idx[stash->n]   = row;
226a2d1c673SSatish Balay     stash->idy[stash->n]   = idxn[i];
227a2d1c673SSatish Balay     stash->array[stash->n] = values[i];
228a2d1c673SSatish Balay     stash->n++;
2299417f4adSLois Curfman McInnes   }
230a2d1c673SSatish Balay   PetscFunctionReturn(0);
231a2d1c673SSatish Balay }
232*4c1ff481SSatish Balay /*
233*4c1ff481SSatish Balay   StashValuesColumnoriented_Private - inserts values into the stash. This function
234*4c1ff481SSatish Balay   expects the values to be columnoriented. Multiple columns belong to the same row
235*4c1ff481SSatish Balay   can be inserted with a single call to this function.
236a2d1c673SSatish Balay 
237*4c1ff481SSatish Balay   Input Parameters:
238*4c1ff481SSatish Balay   stash   - the stash
239*4c1ff481SSatish Balay   row     - the global row correspoiding to the values
240*4c1ff481SSatish Balay   n       - the number of elements inserted. All elements belong to the above row.
241*4c1ff481SSatish Balay   idxn    - the global column indices corresponding to each of the values.
242*4c1ff481SSatish Balay   values  - the values inserted
243*4c1ff481SSatish Balay   stepval - the consecutive values are sepated by a distance of stepval.
244*4c1ff481SSatish Balay             this happens because the input is columnoriented.
245*4c1ff481SSatish Balay */
246a2d1c673SSatish Balay #undef __FUNC__
247*4c1ff481SSatish Balay #define __FUNC__ "StashValuesColumnoriented_Private"
248*4c1ff481SSatish Balay int StashValuesColumnoriented_Private(Stash *stash,int row,int n, int *idxn,
249*4c1ff481SSatish Balay                                       Scalar *values,int stepval)
250a2d1c673SSatish Balay {
251*4c1ff481SSatish Balay   int    ierr,i;
252a2d1c673SSatish Balay 
253*4c1ff481SSatish Balay   PetscFunctionBegin;
254*4c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
255*4c1ff481SSatish Balay   if ((stash->n + n) > stash->nmax) {
256*4c1ff481SSatish Balay     ierr = StashExpand_Private(stash,n); CHKERRQ(ierr);
257*4c1ff481SSatish Balay   }
258*4c1ff481SSatish Balay   for ( i=0; i<n; i++ ) {
259*4c1ff481SSatish Balay     stash->idx[stash->n]   = row;
260*4c1ff481SSatish Balay     stash->idy[stash->n]   = idxn[i];
261*4c1ff481SSatish Balay     stash->array[stash->n] = values[i*stepval];
262*4c1ff481SSatish Balay     stash->n++;
263*4c1ff481SSatish Balay   }
264*4c1ff481SSatish Balay   PetscFunctionReturn(0);
265*4c1ff481SSatish Balay }
266*4c1ff481SSatish Balay 
267*4c1ff481SSatish Balay /*
268*4c1ff481SSatish Balay   StashValuesRoworientedBlocked_Private - inserts blocks of values into the stash.
269*4c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
270*4c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
271*4c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
272*4c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
273*4c1ff481SSatish Balay 
274*4c1ff481SSatish Balay   Input Parameters:
275*4c1ff481SSatish Balay   stash  - the stash
276*4c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
277*4c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
278*4c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
279*4c1ff481SSatish Balay            values. Each block is of size bs*bs.
280*4c1ff481SSatish Balay   values - the values inserted
281*4c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
282*4c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
283*4c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
284*4c1ff481SSatish Balay */
285*4c1ff481SSatish Balay #undef __FUNC__
286*4c1ff481SSatish Balay #define __FUNC__ "StashValuesRoworientedBlocked_Private"
287*4c1ff481SSatish Balay int StashValuesRoworientedBlocked_Private(Stash *stash,int row,int n,int *idxn,Scalar *values,
288*4c1ff481SSatish Balay                                int rmax,int cmax,int idx)
289*4c1ff481SSatish Balay {
290*4c1ff481SSatish Balay   int    ierr,i,j,k,bs2,bs=stash->bs;
291*4c1ff481SSatish Balay   Scalar *vals,*array;
292a2d1c673SSatish Balay 
293a2d1c673SSatish Balay   PetscFunctionBegin;
294a2d1c673SSatish Balay   bs2 = bs*bs;
295*4c1ff481SSatish Balay   if ((stash->n+n) > stash->nmax) {
296*4c1ff481SSatish Balay     ierr = StashExpand_Private(stash,n); CHKERRQ(ierr);
297a2d1c673SSatish Balay   }
298*4c1ff481SSatish Balay   for ( i=0; i<n; i++ ) {
299a2d1c673SSatish Balay     stash->idx[stash->n]   = row;
300a2d1c673SSatish Balay     stash->idy[stash->n] = idxn[i];
301a2d1c673SSatish Balay     /* Now copy over the block of values. Store the values column oriented.
302a2d1c673SSatish Balay        This enables inserting multiple blocks belonging to a row with a single
303a2d1c673SSatish Balay        funtion call */
304a2d1c673SSatish Balay     array = stash->array + bs2*stash->n;
305a2d1c673SSatish Balay     vals  = values + idx*bs2*n + bs*i;
306a2d1c673SSatish Balay     for ( j=0; j<bs; j++ ) {
307a2d1c673SSatish Balay       for ( k=0; k<bs; k++ ) {array[k*bs] = vals[k];}
308a2d1c673SSatish Balay       array += 1;
309a2d1c673SSatish Balay       vals  += cmax*bs;
310a2d1c673SSatish Balay     }
311*4c1ff481SSatish Balay     stash->n++;
312*4c1ff481SSatish Balay   }
313*4c1ff481SSatish Balay   PetscFunctionReturn(0);
314*4c1ff481SSatish Balay }
315*4c1ff481SSatish Balay 
316*4c1ff481SSatish Balay /*
317*4c1ff481SSatish Balay   StashValuesColumnorientedBlocked_Private - inserts blocks of values into the stash.
318*4c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
319*4c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
320*4c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
321*4c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
322*4c1ff481SSatish Balay 
323*4c1ff481SSatish Balay   Input Parameters:
324*4c1ff481SSatish Balay   stash  - the stash
325*4c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
326*4c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
327*4c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
328*4c1ff481SSatish Balay            values. Each block is of size bs*bs.
329*4c1ff481SSatish Balay   values - the values inserted
330*4c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
331*4c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
332*4c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
333*4c1ff481SSatish Balay */
334*4c1ff481SSatish Balay #undef __FUNC__
335*4c1ff481SSatish Balay #define __FUNC__ "StashValuesColumnorientedBlocked_Private"
336*4c1ff481SSatish Balay int StashValuesColumnorientedBlocked_Private(Stash *stash,int row,int n,int *idxn,
337*4c1ff481SSatish Balay                                              Scalar *values,int rmax,int cmax,int idx)
338*4c1ff481SSatish Balay {
339*4c1ff481SSatish Balay   int    ierr,i,j,k,bs2,bs=stash->bs;
340*4c1ff481SSatish Balay   Scalar *vals,*array;
341*4c1ff481SSatish Balay 
342*4c1ff481SSatish Balay   PetscFunctionBegin;
343*4c1ff481SSatish Balay   bs2 = bs*bs;
344*4c1ff481SSatish Balay   if ((stash->n+n) > stash->nmax) {
345*4c1ff481SSatish Balay     ierr = StashExpand_Private(stash,n); CHKERRQ(ierr);
346*4c1ff481SSatish Balay   }
347*4c1ff481SSatish Balay   for ( i=0; i<n; i++ ) {
348*4c1ff481SSatish Balay     stash->idx[stash->n]   = row;
349*4c1ff481SSatish Balay     stash->idy[stash->n] = idxn[i];
350*4c1ff481SSatish Balay     /* Now copy over the block of values. Store the values column oriented.
351*4c1ff481SSatish Balay      This enables inserting multiple blocks belonging to a row with a single
352*4c1ff481SSatish Balay      funtion call */
353a2d1c673SSatish Balay     array = stash->array + bs2*stash->n;
354a2d1c673SSatish Balay     vals  = values + idx*bs + bs2*rmax*i;
355a2d1c673SSatish Balay     for ( j=0; j<bs; j++ ) {
356a2d1c673SSatish Balay       for ( k=0; k<bs; k++ ) {array[k] = vals[k];}
357a2d1c673SSatish Balay       array += bs;
358a2d1c673SSatish Balay       vals  += rmax*bs;
359a2d1c673SSatish Balay     }
360a2d1c673SSatish Balay     stash->n++;
3619417f4adSLois Curfman McInnes   }
3623a40ed3dSBarry Smith   PetscFunctionReturn(0);
3639417f4adSLois Curfman McInnes }
364*4c1ff481SSatish Balay /*
365*4c1ff481SSatish Balay   StashScatterBegin_Private - Initiates the transfer of values to the
366*4c1ff481SSatish Balay   correct owners. This function goes through the stash, and check the
367*4c1ff481SSatish Balay   owners of each stashed value, and sends the values off to the owner
368*4c1ff481SSatish Balay   processors.
369bc5ccf88SSatish Balay 
370*4c1ff481SSatish Balay   Input Parameters:
371*4c1ff481SSatish Balay   stash  - the stash
372*4c1ff481SSatish Balay   owners - an array of size 'no-of-procs' which gives the ownership range
373*4c1ff481SSatish Balay            for each node.
374*4c1ff481SSatish Balay 
375*4c1ff481SSatish Balay   Notes: The 'owners' array in the cased of the blocked-stash has the
376*4c1ff481SSatish Balay   ranges specified blocked global indices, and for the regular stash in
377*4c1ff481SSatish Balay   the proper global indices.
378*4c1ff481SSatish Balay */
379bc5ccf88SSatish Balay #undef __FUNC__
380bc5ccf88SSatish Balay #define __FUNC__ "StashScatterBegin_Private"
381bc5ccf88SSatish Balay int StashScatterBegin_Private(Stash *stash,int *owners)
382bc5ccf88SSatish Balay {
383a2d1c673SSatish Balay   int         *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
384a2d1c673SSatish Balay   int         rank=stash->rank,size=stash->size,*nprocs,*procs,nsends,nreceives;
385*4c1ff481SSatish Balay   int         nmax,*work,count,ierr,*sindices,*rindices,i,j,idx;
386a2d1c673SSatish Balay   Scalar      *rvalues,*svalues;
387bc5ccf88SSatish Balay   MPI_Comm    comm = stash->comm;
388bc5ccf88SSatish Balay   MPI_Request *send_waits,*recv_waits;
389bc5ccf88SSatish Balay 
390bc5ccf88SSatish Balay   PetscFunctionBegin;
391bc5ccf88SSatish Balay 
392*4c1ff481SSatish Balay   bs2 = stash->bs*stash->bs;
393bc5ccf88SSatish Balay   /*  first count number of contributors to each processor */
394bc5ccf88SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
395bc5ccf88SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
396bc5ccf88SSatish Balay   owner = (int *) PetscMalloc( (stash->n+1)*sizeof(int) ); CHKPTRQ(owner);
397a2d1c673SSatish Balay 
398bc5ccf88SSatish Balay   for ( i=0; i<stash->n; i++ ) {
399bc5ccf88SSatish Balay     idx = stash->idx[i];
400bc5ccf88SSatish Balay     for ( j=0; j<size; j++ ) {
401*4c1ff481SSatish Balay       if (idx >= owners[j] && idx < owners[j+1]) {
402bc5ccf88SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
403bc5ccf88SSatish Balay       }
404bc5ccf88SSatish Balay     }
405bc5ccf88SSatish Balay   }
406bc5ccf88SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
407bc5ccf88SSatish Balay 
408bc5ccf88SSatish Balay   /* inform other processors of number of messages and max length*/
409bc5ccf88SSatish Balay   work = (int *)PetscMalloc(size*sizeof(int)); CHKPTRQ(work);
410bc5ccf88SSatish Balay   ierr = MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
411bc5ccf88SSatish Balay   nreceives = work[rank];
412bc5ccf88SSatish Balay   ierr = MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
413bc5ccf88SSatish Balay   nmax = work[rank];
414bc5ccf88SSatish Balay   PetscFree(work);
415bc5ccf88SSatish Balay   /* post receives:
416bc5ccf88SSatish Balay      since we don't know how long each individual message is we
417bc5ccf88SSatish Balay      allocate the largest needed buffer for each receive. Potentially
418bc5ccf88SSatish Balay      this is a lot of wasted space.
419bc5ccf88SSatish Balay   */
420a2d1c673SSatish Balay   rvalues    = (Scalar *)PetscMalloc((nreceives+1)*(nmax+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(rvalues);
421a2d1c673SSatish Balay   rindices   = (int *) (rvalues + bs2*nreceives*nmax);
422a2d1c673SSatish Balay   recv_waits = (MPI_Request *)PetscMalloc((nreceives+1)*2*sizeof(MPI_Request));CHKPTRQ(recv_waits);
423bc5ccf88SSatish Balay   for ( i=0,count=0; i<nreceives; i++ ) {
424a2d1c673SSatish Balay     ierr = MPI_Irecv(rvalues+bs2*nmax*i,bs2*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag1,comm,
425bc5ccf88SSatish Balay                      recv_waits+count++); CHKERRQ(ierr);
426bc5ccf88SSatish Balay     ierr = MPI_Irecv(rindices+2*nmax*i,2*nmax,MPI_INT,MPI_ANY_SOURCE,tag2,comm,
427bc5ccf88SSatish Balay                      recv_waits+count++); CHKERRQ(ierr);
428bc5ccf88SSatish Balay   }
429bc5ccf88SSatish Balay 
430bc5ccf88SSatish Balay   /* do sends:
431bc5ccf88SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
432bc5ccf88SSatish Balay          the ith processor
433bc5ccf88SSatish Balay   */
434a2d1c673SSatish Balay   svalues    = (Scalar *)PetscMalloc((stash->n+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(svalues);
435a2d1c673SSatish Balay   sindices   = (int *) (svalues + bs2*stash->n);
436bc5ccf88SSatish Balay   send_waits = (MPI_Request *) PetscMalloc(2*(nsends+1)*sizeof(MPI_Request));
437bc5ccf88SSatish Balay   CHKPTRQ(send_waits);
438bc5ccf88SSatish Balay   startv     = (int *) PetscMalloc(2*size*sizeof(int) ); CHKPTRQ(startv);
439bc5ccf88SSatish Balay   starti     = startv + size;
440a2d1c673SSatish Balay   /* use 2 sends the first with all_a, the next with all_i and all_j */
441bc5ccf88SSatish Balay   startv[0]  = 0; starti[0] = 0;
442bc5ccf88SSatish Balay   for ( i=1; i<size; i++ ) {
443bc5ccf88SSatish Balay     startv[i] = startv[i-1] + nprocs[i-1];
444bc5ccf88SSatish Balay     starti[i] = starti[i-1] + nprocs[i-1]*2;
445bc5ccf88SSatish Balay   }
446bc5ccf88SSatish Balay   for ( i=0; i<stash->n; i++ ) {
447bc5ccf88SSatish Balay     j = owner[i];
448a2d1c673SSatish Balay     if (bs2 == 1) {
449bc5ccf88SSatish Balay       svalues[startv[j]]              = stash->array[i];
450a2d1c673SSatish Balay     } else {
451*4c1ff481SSatish Balay       int    k;
452*4c1ff481SSatish Balay       Scalar *buf1,*buf2;
453*4c1ff481SSatish Balay       buf1 = svalues+bs2*startv[j];
454*4c1ff481SSatish Balay       buf2 = stash->array+bs2*i;
455*4c1ff481SSatish Balay       for ( k=0; k<bs2; k++ ){ buf1[k] = buf2[k]; }
456a2d1c673SSatish Balay     }
457bc5ccf88SSatish Balay     sindices[starti[j]]             = stash->idx[i];
458bc5ccf88SSatish Balay     sindices[starti[j]+nprocs[j]]   = stash->idy[i];
459bc5ccf88SSatish Balay     startv[j]++;
460bc5ccf88SSatish Balay     starti[j]++;
461bc5ccf88SSatish Balay   }
462bc5ccf88SSatish Balay   startv[0] = 0;
463bc5ccf88SSatish Balay   for ( i=1; i<size; i++ ) { startv[i] = startv[i-1] + nprocs[i-1];}
464bc5ccf88SSatish Balay   for ( i=0,count=0; i<size; i++ ) {
465bc5ccf88SSatish Balay     if (procs[i]) {
466a2d1c673SSatish Balay       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nprocs[i],MPIU_SCALAR,i,tag1,comm,
467bc5ccf88SSatish Balay                        send_waits+count++);CHKERRQ(ierr);
468bc5ccf88SSatish Balay       ierr = MPI_Isend(sindices+2*startv[i],2*nprocs[i],MPI_INT,i,tag2,comm,
469bc5ccf88SSatish Balay                        send_waits+count++);CHKERRQ(ierr);
470bc5ccf88SSatish Balay     }
471bc5ccf88SSatish Balay   }
472bc5ccf88SSatish Balay   PetscFree(owner);
473bc5ccf88SSatish Balay   PetscFree(startv);
474a2d1c673SSatish Balay   /* This memory is reused in scatter end  for a different purpose*/
475a2d1c673SSatish Balay   for (i=0; i<2*size; i++ ) nprocs[i] = -1;
476a2d1c673SSatish Balay   stash->nprocs      = nprocs;
477a2d1c673SSatish Balay 
478bc5ccf88SSatish Balay   stash->svalues    = svalues;    stash->rvalues    = rvalues;
479bc5ccf88SSatish Balay   stash->nsends     = nsends;     stash->nrecvs     = nreceives;
480bc5ccf88SSatish Balay   stash->send_waits = send_waits; stash->recv_waits = recv_waits;
481bc5ccf88SSatish Balay   stash->rmax       = nmax;
482bc5ccf88SSatish Balay   PetscFunctionReturn(0);
483bc5ccf88SSatish Balay }
484bc5ccf88SSatish Balay 
485a2d1c673SSatish Balay /*
486*4c1ff481SSatish Balay    StashScatterGetMesg_Private - This function waits on the receives posted
487*4c1ff481SSatish Balay    in the function StashScatterBegin_Private() and returns one message at
488*4c1ff481SSatish Balay    a time to the calling function. If no messages are left, it indicates this
489*4c1ff481SSatish Balay    by setting flg = 0, else it sets flg = 1.
490*4c1ff481SSatish Balay 
491*4c1ff481SSatish Balay    Input Parameters:
492*4c1ff481SSatish Balay    stash - the stash
493*4c1ff481SSatish Balay 
494*4c1ff481SSatish Balay    Output Parameters:
495*4c1ff481SSatish Balay    nvals - the number of entries in the current message.
496*4c1ff481SSatish Balay    rows  - an array of row indices (or blocked indices) corresponding to the values
497*4c1ff481SSatish Balay    cols  - an array of columnindices (or blocked indices) corresponding to the values
498*4c1ff481SSatish Balay    vals  - the values
499*4c1ff481SSatish Balay    flg   - 0 indicates no more message left, and the current call has no values associated.
500*4c1ff481SSatish Balay            1 indicates that the current call successfully received a message, and the
501*4c1ff481SSatish Balay              other output parameters nvals,rows,cols,vals are set appropriately.
502a2d1c673SSatish Balay */
503bc5ccf88SSatish Balay #undef __FUNC__
504a2d1c673SSatish Balay #define __FUNC__ "StashScatterGetMesg_Private"
505a2d1c673SSatish Balay int StashScatterGetMesg_Private(Stash *stash,int *nvals,int **rows,int** cols,Scalar **vals,int *flg)
506bc5ccf88SSatish Balay {
507a2d1c673SSatish Balay   int         i,ierr,size=stash->size,*flg_v,*flg_i;
508a2d1c673SSatish Balay   int         i1,i2,*rindices,match_found=0,bs2;
509a2d1c673SSatish Balay   MPI_Status  recv_status;
510bc5ccf88SSatish Balay 
511bc5ccf88SSatish Balay   PetscFunctionBegin;
512bc5ccf88SSatish Balay 
513a2d1c673SSatish Balay   *flg = 0; /* When a message is discovered this is reset to 1 */
514a2d1c673SSatish Balay   /* Return if no more messages to process */
515a2d1c673SSatish Balay   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
516a2d1c673SSatish Balay 
517a2d1c673SSatish Balay   flg_v = stash->nprocs;
518a2d1c673SSatish Balay   flg_i = flg_v + size;
519*4c1ff481SSatish Balay   bs2   = stash->bs*stash->bs;
520a2d1c673SSatish Balay   /* If a matching pair of receieves are found, process them, and return the data to
521a2d1c673SSatish Balay      the calling function. Until then keep receiving messages */
522a2d1c673SSatish Balay   while (!match_found) {
523a2d1c673SSatish Balay     ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
524a2d1c673SSatish Balay     /* Now pack the received message into a structure which is useable by others */
525a2d1c673SSatish Balay     if (i % 2) {
526a2d1c673SSatish Balay       ierr = MPI_Get_count(&recv_status,MPI_INT,nvals);CHKERRQ(ierr);
527a2d1c673SSatish Balay       flg_i[recv_status.MPI_SOURCE] = i/2;
528a2d1c673SSatish Balay       *nvals = *nvals/2; /* This message has both row indices and col indices */
529a2d1c673SSatish Balay     } else {
530a2d1c673SSatish Balay       ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr);
531a2d1c673SSatish Balay       flg_v[recv_status.MPI_SOURCE] = i/2;
532a2d1c673SSatish Balay       *nvals = *nvals/bs2;
533bc5ccf88SSatish Balay     }
534a2d1c673SSatish Balay 
535a2d1c673SSatish Balay     /* Check if we have both the messages from this proc */
536a2d1c673SSatish Balay     i1 = flg_v[recv_status.MPI_SOURCE];
537a2d1c673SSatish Balay     i2 = flg_i[recv_status.MPI_SOURCE];
538a2d1c673SSatish Balay     if (i1 != -1 && i2 != -1) {
539a2d1c673SSatish Balay       rindices    = (int *) (stash->rvalues + bs2*stash->rmax*stash->nrecvs);
540a2d1c673SSatish Balay       *rows       = rindices + 2*i2*stash->rmax;
541a2d1c673SSatish Balay       *cols       = *rows + *nvals;
542a2d1c673SSatish Balay       *vals       = stash->rvalues + i1*bs2*stash->rmax;
543a2d1c673SSatish Balay       *flg        = 1;
544a2d1c673SSatish Balay       stash->nprocessed ++;
545a2d1c673SSatish Balay       match_found = 1;
546bc5ccf88SSatish Balay     }
547bc5ccf88SSatish Balay   }
548bc5ccf88SSatish Balay   PetscFunctionReturn(0);
549bc5ccf88SSatish Balay }
550