xref: /petsc/src/mat/utils/matstash.c (revision 94b769a51ce616ae34d8a246e886be2664ee75a4)
1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER
2*94b769a5SSatish Balay static char vcid[] = "$Id: matstash.c,v 1.27 1999/03/18 00:33:52 balay Exp balay $";
32d5177cdSBarry Smith #endif
42d5177cdSBarry Smith 
570f55243SBarry Smith #include "src/mat/matimpl.h"
69417f4adSLois Curfman McInnes 
7bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE   10000
84c1ff481SSatish Balay 
99417f4adSLois Curfman McInnes /*
108798bf22SSatish Balay   MatStashCreate_Private - Creates a stash ,currently used for all the parallel
114c1ff481SSatish Balay   matrix implementations. The stash is where elements of a matrix destined
124c1ff481SSatish Balay   to be stored on other processors are kept until matrix assembly is done.
139417f4adSLois Curfman McInnes 
144c1ff481SSatish Balay   This is a simple minded stash. Simply adds entries to end of stash.
154c1ff481SSatish Balay 
164c1ff481SSatish Balay   Input Parameters:
174c1ff481SSatish Balay   comm - communicator, required for scatters.
184c1ff481SSatish Balay   bs   - stash block size. used when stashing blocks of values
194c1ff481SSatish Balay 
204c1ff481SSatish Balay   Output Parameters:
214c1ff481SSatish Balay   stash    - the newly created stash
229417f4adSLois Curfman McInnes */
235615d1e5SSatish Balay #undef __FUNC__
248798bf22SSatish Balay #define __FUNC__ "MatStashCreate_Private"
258798bf22SSatish Balay int MatStashCreate_Private(MPI_Comm comm,int bs, MatStash *stash)
269417f4adSLois Curfman McInnes {
27*94b769a5SSatish Balay   int ierr,flg,max=DEFAULT_STASH_SIZE;
28bc5ccf88SSatish Balay 
293a40ed3dSBarry Smith   PetscFunctionBegin;
30bc5ccf88SSatish Balay   /* Require 2 tags, get the second using PetscCommGetNewTag() */
31bc5ccf88SSatish Balay   ierr = PetscCommDuplicate_Private(comm,&stash->comm,&stash->tag1);CHKERRQ(ierr);
32a2d1c673SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2); CHKERRQ(ierr);
338798bf22SSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matstash_initial_size",&max,&flg);CHKERRQ(ierr);
348798bf22SSatish Balay   ierr = MatStashSetInitialSize_Private(stash,max); CHKERRQ(ierr);
35a2d1c673SSatish Balay   ierr = MPI_Comm_size(stash->comm,&stash->size); CHKERRQ(ierr);
36a2d1c673SSatish Balay   ierr = MPI_Comm_rank(stash->comm,&stash->rank); CHKERRQ(ierr);
37bc5ccf88SSatish Balay 
384c1ff481SSatish Balay   if (bs <= 0) bs = 1;
39a2d1c673SSatish Balay 
404c1ff481SSatish Balay   stash->bs       = bs;
419417f4adSLois Curfman McInnes   stash->nmax     = 0;
429417f4adSLois Curfman McInnes   stash->n        = 0;
434c1ff481SSatish Balay   stash->reallocs = -1;
449417f4adSLois Curfman McInnes   stash->idx      = 0;
459417f4adSLois Curfman McInnes   stash->idy      = 0;
46bc5ccf88SSatish Balay   stash->array    = 0;
479417f4adSLois Curfman McInnes 
48bc5ccf88SSatish Balay   stash->send_waits  = 0;
49bc5ccf88SSatish Balay   stash->recv_waits  = 0;
50a2d1c673SSatish Balay   stash->send_status = 0;
51bc5ccf88SSatish Balay   stash->nsends      = 0;
52bc5ccf88SSatish Balay   stash->nrecvs      = 0;
53bc5ccf88SSatish Balay   stash->svalues     = 0;
54bc5ccf88SSatish Balay   stash->rvalues     = 0;
55bc5ccf88SSatish Balay   stash->rmax        = 0;
56a2d1c673SSatish Balay   stash->nprocs      = 0;
57a2d1c673SSatish Balay   stash->nprocessed  = 0;
583a40ed3dSBarry Smith   PetscFunctionReturn(0);
599417f4adSLois Curfman McInnes }
609417f4adSLois Curfman McInnes 
614c1ff481SSatish Balay /*
628798bf22SSatish Balay    MatStashDestroy_Private - Destroy the stash
634c1ff481SSatish Balay */
645615d1e5SSatish Balay #undef __FUNC__
658798bf22SSatish Balay #define __FUNC__ "MatStashDestroy_Private"
668798bf22SSatish Balay int MatStashDestroy_Private(MatStash *stash)
679417f4adSLois Curfman McInnes {
68bc5ccf88SSatish Balay   int ierr;
69a2d1c673SSatish Balay 
70bc5ccf88SSatish Balay   PetscFunctionBegin;
71bc5ccf88SSatish Balay   ierr = PetscCommDestroy_Private(&stash->comm); CHKERRQ(ierr);
72bc5ccf88SSatish Balay   if (stash->array) {PetscFree(stash->array); stash->array = 0;}
73bc5ccf88SSatish Balay   PetscFunctionReturn(0);
74bc5ccf88SSatish Balay }
75bc5ccf88SSatish Balay 
764c1ff481SSatish Balay /*
778798bf22SSatish Balay    MatStashScatterEnd_Private - This is called as the fial stage of
784c1ff481SSatish Balay    scatter. The final stages of messagepassing is done here, and
794c1ff481SSatish Balay    all the memory used for messagepassing is cleanedu up. This
804c1ff481SSatish Balay    routine also resets the stash, and deallocates the memory used
814c1ff481SSatish Balay    for the stash. It also keeps track of the current memory usage
824c1ff481SSatish Balay    so that the same value can be used the next time through.
834c1ff481SSatish Balay */
84bc5ccf88SSatish Balay #undef __FUNC__
858798bf22SSatish Balay #define __FUNC__ "MatStashScatterEnd_Private"
868798bf22SSatish Balay int MatStashScatterEnd_Private(MatStash *stash)
87bc5ccf88SSatish Balay {
88*94b769a5SSatish Balay   int         nsends=stash->nsends,ierr,bs2;
89a2d1c673SSatish Balay   MPI_Status  *send_status;
90a2d1c673SSatish Balay 
913a40ed3dSBarry Smith   PetscFunctionBegin;
92a2d1c673SSatish Balay   /* wait on sends */
93a2d1c673SSatish Balay   if (nsends) {
94a2d1c673SSatish Balay     send_status = (MPI_Status *)PetscMalloc(2*nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
95a2d1c673SSatish Balay     ierr        = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
96a2d1c673SSatish Balay     PetscFree(send_status);
97a2d1c673SSatish Balay   }
98a2d1c673SSatish Balay 
99c0c58ca7SSatish Balay   /* Now update nmaxold to be app 10% more than max n used, this way the
100d07ff455SSatish Balay      wastage of space is reduced the next time this stash is used */
101*94b769a5SSatish Balay   bs2               = stash->bs*stash->bs;
102*94b769a5SSatish Balay   stash->oldnmax    = ((int)(stash->n * 1.1) + 5)*bs2;
103d07ff455SSatish Balay   stash->nmax       = 0;
104d07ff455SSatish Balay   stash->n          = 0;
1054c1ff481SSatish Balay   stash->reallocs   = -1;
106bc5ccf88SSatish Balay   stash->rmax       = 0;
107a2d1c673SSatish Balay   stash->nprocessed = 0;
108bc5ccf88SSatish Balay 
109bc5ccf88SSatish Balay   if (stash->array) {
110bc5ccf88SSatish Balay     PetscFree(stash->array);
111bc5ccf88SSatish Balay     stash->array = 0;
112bc5ccf88SSatish Balay     stash->idx   = 0;
113bc5ccf88SSatish Balay     stash->idy   = 0;
114bc5ccf88SSatish Balay   }
115bc5ccf88SSatish Balay   if (stash->send_waits)  {PetscFree(stash->send_waits);stash->send_waits = 0;}
116bc5ccf88SSatish Balay   if (stash->recv_waits)  {PetscFree(stash->recv_waits);stash->recv_waits = 0;}
117bc5ccf88SSatish Balay   if (stash->svalues)     {PetscFree(stash->svalues);stash->svalues = 0;}
118bc5ccf88SSatish Balay   if (stash->rvalues)     {PetscFree(stash->rvalues); stash->rvalues = 0;}
119a2d1c673SSatish Balay   if (stash->nprocs)      {PetscFree(stash->nprocs); stash->nprocs = 0;}
120bc5ccf88SSatish Balay 
1213a40ed3dSBarry Smith   PetscFunctionReturn(0);
1229417f4adSLois Curfman McInnes }
1239417f4adSLois Curfman McInnes 
1244c1ff481SSatish Balay /*
1258798bf22SSatish Balay    MatStashGetInfo_Private - Gets the relavant statistics of the stash
1264c1ff481SSatish Balay 
1274c1ff481SSatish Balay    Input Parameters:
1284c1ff481SSatish Balay    stash    - the stash
129*94b769a5SSatish Balay    nstash   - the size of the stash. Indicates the number of values stored.
1304c1ff481SSatish Balay    reallocs - the number of additional mallocs incurred.
1314c1ff481SSatish Balay 
1324c1ff481SSatish Balay */
1335615d1e5SSatish Balay #undef __FUNC__
1348798bf22SSatish Balay #define __FUNC__ "MatStashGetInfo_Private"
1358798bf22SSatish Balay int MatStashGetInfo_Private(MatStash *stash,int *nstash, int *reallocs)
13697530c3fSBarry Smith {
137*94b769a5SSatish Balay   int bs2 = stash->bs*stash->bs;
138*94b769a5SSatish Balay 
1393a40ed3dSBarry Smith   PetscFunctionBegin;
140*94b769a5SSatish Balay   *nstash   = stash->n*bs2;
1414c1ff481SSatish Balay   *reallocs = stash->reallocs;
142bc5ccf88SSatish Balay   PetscFunctionReturn(0);
143bc5ccf88SSatish Balay }
1444c1ff481SSatish Balay 
1454c1ff481SSatish Balay 
1464c1ff481SSatish Balay /*
1478798bf22SSatish Balay    MatStashSetInitialSize_Private - Sets the initial size of the stash
1484c1ff481SSatish Balay 
1494c1ff481SSatish Balay    Input Parameters:
1504c1ff481SSatish Balay    stash  - the stash
1514c1ff481SSatish Balay    max    - the value that is used as the max size of the stash.
1524c1ff481SSatish Balay             this value is used while allocating memory.
1534c1ff481SSatish Balay */
154bc5ccf88SSatish Balay #undef __FUNC__
1558798bf22SSatish Balay #define __FUNC__ "MatStashSetInitialSize_Private"
1568798bf22SSatish Balay int MatStashSetInitialSize_Private(MatStash *stash,int max)
157bc5ccf88SSatish Balay {
158bc5ccf88SSatish Balay   PetscFunctionBegin;
159bc5ccf88SSatish Balay   stash->oldnmax = max;
160bc5ccf88SSatish Balay   stash->nmax    = 0;
1613a40ed3dSBarry Smith   PetscFunctionReturn(0);
16297530c3fSBarry Smith }
16397530c3fSBarry Smith 
1648798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called
1654c1ff481SSatish Balay    when the space in the stash is not sufficient to add the new values
1664c1ff481SSatish Balay    being inserted into the stash.
1674c1ff481SSatish Balay 
1684c1ff481SSatish Balay    Input Parameters:
1694c1ff481SSatish Balay    stash - the stash
1704c1ff481SSatish Balay    incr  - the minimum increase requested
1714c1ff481SSatish Balay 
1724c1ff481SSatish Balay    Notes:
1734c1ff481SSatish Balay    This routine doubles the currently used memory.
1744c1ff481SSatish Balay  */
1755615d1e5SSatish Balay #undef __FUNC__
1768798bf22SSatish Balay #define __FUNC__ "MatStashExpand_Private"
1778798bf22SSatish Balay static int MatStashExpand_Private(MatStash *stash,int incr)
1789417f4adSLois Curfman McInnes {
179a2d1c673SSatish Balay   int    *n_idx,*n_idy,newnmax,bs2;
180bc5ccf88SSatish Balay   Scalar *n_array;
1819417f4adSLois Curfman McInnes 
1823a40ed3dSBarry Smith   PetscFunctionBegin;
1839417f4adSLois Curfman McInnes   /* allocate a larger stash */
184*94b769a5SSatish Balay   bs2     = stash->bs*stash->bs;
185*94b769a5SSatish Balay   if (stash->nmax == 0) newnmax = stash->oldnmax/bs2;
186d07ff455SSatish Balay   else                  newnmax = stash->nmax*2;
1874c1ff481SSatish Balay   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
188d07ff455SSatish Balay 
189a2d1c673SSatish Balay   n_array = (Scalar *)PetscMalloc((newnmax)*(2*sizeof(int)+bs2*sizeof(Scalar)));CHKPTRQ(n_array);
190a2d1c673SSatish Balay   n_idx   = (int *) (n_array + bs2*newnmax);
191d07ff455SSatish Balay   n_idy   = (int *) (n_idx + newnmax);
192a2d1c673SSatish Balay   PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(Scalar));
193416022c9SBarry Smith   PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(int));
194416022c9SBarry Smith   PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(int));
1950452661fSBarry Smith   if (stash->array) PetscFree(stash->array);
196d07ff455SSatish Balay   stash->array   = n_array;
197d07ff455SSatish Balay   stash->idx     = n_idx;
198d07ff455SSatish Balay   stash->idy     = n_idy;
199d07ff455SSatish Balay   stash->nmax    = newnmax;
200*94b769a5SSatish Balay   stash->oldnmax = newnmax*bs2;
201bc5ccf88SSatish Balay   stash->reallocs++;
202bc5ccf88SSatish Balay   PetscFunctionReturn(0);
203bc5ccf88SSatish Balay }
204bc5ccf88SSatish Balay /*
2058798bf22SSatish Balay   MatStashValuesRow_Private - inserts values into the stash. This function
2064c1ff481SSatish Balay   expects the values to be roworiented. Multiple columns belong to the same row
2074c1ff481SSatish Balay   can be inserted with a single call to this function.
2084c1ff481SSatish Balay 
2094c1ff481SSatish Balay   Input Parameters:
2104c1ff481SSatish Balay   stash  - the stash
2114c1ff481SSatish Balay   row    - the global row correspoiding to the values
2124c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
2134c1ff481SSatish Balay   idxn   - the global column indices corresponding to each of the values.
2144c1ff481SSatish Balay   values - the values inserted
215bc5ccf88SSatish Balay */
216bc5ccf88SSatish Balay #undef __FUNC__
2178798bf22SSatish Balay #define __FUNC__ "MatStashValuesRow_Private"
2188798bf22SSatish Balay int MatStashValuesRow_Private(MatStash *stash,int row,int n, int *idxn,Scalar *values)
219bc5ccf88SSatish Balay {
220a2d1c673SSatish Balay   int    ierr,i;
221bc5ccf88SSatish Balay 
222bc5ccf88SSatish Balay   PetscFunctionBegin;
2234c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
2244c1ff481SSatish Balay   if ((stash->n + n) > stash->nmax) {
2258798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n); CHKERRQ(ierr);
2269417f4adSLois Curfman McInnes   }
2274c1ff481SSatish Balay   for ( i=0; i<n; i++ ) {
2289417f4adSLois Curfman McInnes     stash->idx[stash->n]   = row;
229a2d1c673SSatish Balay     stash->idy[stash->n]   = idxn[i];
230a2d1c673SSatish Balay     stash->array[stash->n] = values[i];
231a2d1c673SSatish Balay     stash->n++;
2329417f4adSLois Curfman McInnes   }
233a2d1c673SSatish Balay   PetscFunctionReturn(0);
234a2d1c673SSatish Balay }
2354c1ff481SSatish Balay /*
2368798bf22SSatish Balay   MatStashValuesCol_Private - inserts values into the stash. This function
2374c1ff481SSatish Balay   expects the values to be columnoriented. Multiple columns belong to the same row
2384c1ff481SSatish Balay   can be inserted with a single call to this function.
239a2d1c673SSatish Balay 
2404c1ff481SSatish Balay   Input Parameters:
2414c1ff481SSatish Balay   stash   - the stash
2424c1ff481SSatish Balay   row     - the global row correspoiding to the values
2434c1ff481SSatish Balay   n       - the number of elements inserted. All elements belong to the above row.
2444c1ff481SSatish Balay   idxn    - the global column indices corresponding to each of the values.
2454c1ff481SSatish Balay   values  - the values inserted
2464c1ff481SSatish Balay   stepval - the consecutive values are sepated by a distance of stepval.
2474c1ff481SSatish Balay             this happens because the input is columnoriented.
2484c1ff481SSatish Balay */
249a2d1c673SSatish Balay #undef __FUNC__
2508798bf22SSatish Balay #define __FUNC__ "MatStashValuesCol_Private"
2518798bf22SSatish Balay int MatStashValuesCol_Private(MatStash *stash,int row,int n, int *idxn,
2524c1ff481SSatish Balay                                       Scalar *values,int stepval)
253a2d1c673SSatish Balay {
2544c1ff481SSatish Balay   int    ierr,i;
255a2d1c673SSatish Balay 
2564c1ff481SSatish Balay   PetscFunctionBegin;
2574c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
2584c1ff481SSatish Balay   if ((stash->n + n) > stash->nmax) {
2598798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n); CHKERRQ(ierr);
2604c1ff481SSatish Balay   }
2614c1ff481SSatish Balay   for ( i=0; i<n; i++ ) {
2624c1ff481SSatish Balay     stash->idx[stash->n]   = row;
2634c1ff481SSatish Balay     stash->idy[stash->n]   = idxn[i];
2644c1ff481SSatish Balay     stash->array[stash->n] = values[i*stepval];
2654c1ff481SSatish Balay     stash->n++;
2664c1ff481SSatish Balay   }
2674c1ff481SSatish Balay   PetscFunctionReturn(0);
2684c1ff481SSatish Balay }
2694c1ff481SSatish Balay 
2704c1ff481SSatish Balay /*
2718798bf22SSatish Balay   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
2724c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
2734c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
2744c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
2754c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
2764c1ff481SSatish Balay 
2774c1ff481SSatish Balay   Input Parameters:
2784c1ff481SSatish Balay   stash  - the stash
2794c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
2804c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
2814c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
2824c1ff481SSatish Balay            values. Each block is of size bs*bs.
2834c1ff481SSatish Balay   values - the values inserted
2844c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
2854c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
2864c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
2874c1ff481SSatish Balay */
2884c1ff481SSatish Balay #undef __FUNC__
2898798bf22SSatish Balay #define __FUNC__ "MatStashValuesRowBlocked_Private"
2908798bf22SSatish Balay int MatStashValuesRowBlocked_Private(MatStash *stash,int row,int n,int *idxn,Scalar *values,
2914c1ff481SSatish Balay                                int rmax,int cmax,int idx)
2924c1ff481SSatish Balay {
2934c1ff481SSatish Balay   int    ierr,i,j,k,bs2,bs=stash->bs;
2944c1ff481SSatish Balay   Scalar *vals,*array;
295a2d1c673SSatish Balay 
296a2d1c673SSatish Balay   PetscFunctionBegin;
297a2d1c673SSatish Balay   bs2 = bs*bs;
2984c1ff481SSatish Balay   if ((stash->n+n) > stash->nmax) {
2998798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n); CHKERRQ(ierr);
300a2d1c673SSatish Balay   }
3014c1ff481SSatish Balay   for ( i=0; i<n; i++ ) {
302a2d1c673SSatish Balay     stash->idx[stash->n]   = row;
303a2d1c673SSatish Balay     stash->idy[stash->n] = idxn[i];
304a2d1c673SSatish Balay     /* Now copy over the block of values. Store the values column oriented.
305a2d1c673SSatish Balay        This enables inserting multiple blocks belonging to a row with a single
306a2d1c673SSatish Balay        funtion call */
307a2d1c673SSatish Balay     array = stash->array + bs2*stash->n;
308a2d1c673SSatish Balay     vals  = values + idx*bs2*n + bs*i;
309a2d1c673SSatish Balay     for ( j=0; j<bs; j++ ) {
310a2d1c673SSatish Balay       for ( k=0; k<bs; k++ ) {array[k*bs] = vals[k];}
311a2d1c673SSatish Balay       array += 1;
312a2d1c673SSatish Balay       vals  += cmax*bs;
313a2d1c673SSatish Balay     }
3144c1ff481SSatish Balay     stash->n++;
3154c1ff481SSatish Balay   }
3164c1ff481SSatish Balay   PetscFunctionReturn(0);
3174c1ff481SSatish Balay }
3184c1ff481SSatish Balay 
3194c1ff481SSatish Balay /*
3208798bf22SSatish Balay   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
3214c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3224c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3234c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3244c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3254c1ff481SSatish Balay 
3264c1ff481SSatish Balay   Input Parameters:
3274c1ff481SSatish Balay   stash  - the stash
3284c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3294c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3304c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3314c1ff481SSatish Balay            values. Each block is of size bs*bs.
3324c1ff481SSatish Balay   values - the values inserted
3334c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3344c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3354c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3364c1ff481SSatish Balay */
3374c1ff481SSatish Balay #undef __FUNC__
3388798bf22SSatish Balay #define __FUNC__ "MatStashValuesColBlocked_Private"
3398798bf22SSatish Balay int MatStashValuesColBlocked_Private(MatStash *stash,int row,int n,int *idxn,
3404c1ff481SSatish Balay                                              Scalar *values,int rmax,int cmax,int idx)
3414c1ff481SSatish Balay {
3424c1ff481SSatish Balay   int    ierr,i,j,k,bs2,bs=stash->bs;
3434c1ff481SSatish Balay   Scalar *vals,*array;
3444c1ff481SSatish Balay 
3454c1ff481SSatish Balay   PetscFunctionBegin;
3464c1ff481SSatish Balay   bs2 = bs*bs;
3474c1ff481SSatish Balay   if ((stash->n+n) > stash->nmax) {
3488798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n); CHKERRQ(ierr);
3494c1ff481SSatish Balay   }
3504c1ff481SSatish Balay   for ( i=0; i<n; i++ ) {
3514c1ff481SSatish Balay     stash->idx[stash->n]   = row;
3524c1ff481SSatish Balay     stash->idy[stash->n] = idxn[i];
3534c1ff481SSatish Balay     /* Now copy over the block of values. Store the values column oriented.
3544c1ff481SSatish Balay      This enables inserting multiple blocks belonging to a row with a single
3554c1ff481SSatish Balay      funtion call */
356a2d1c673SSatish Balay     array = stash->array + bs2*stash->n;
357a2d1c673SSatish Balay     vals  = values + idx*bs + bs2*rmax*i;
358a2d1c673SSatish Balay     for ( j=0; j<bs; j++ ) {
359a2d1c673SSatish Balay       for ( k=0; k<bs; k++ ) {array[k] = vals[k];}
360a2d1c673SSatish Balay       array += bs;
361a2d1c673SSatish Balay       vals  += rmax*bs;
362a2d1c673SSatish Balay     }
363a2d1c673SSatish Balay     stash->n++;
3649417f4adSLois Curfman McInnes   }
3653a40ed3dSBarry Smith   PetscFunctionReturn(0);
3669417f4adSLois Curfman McInnes }
3674c1ff481SSatish Balay /*
3688798bf22SSatish Balay   MatStashScatterBegin_Private - Initiates the transfer of values to the
3694c1ff481SSatish Balay   correct owners. This function goes through the stash, and check the
3704c1ff481SSatish Balay   owners of each stashed value, and sends the values off to the owner
3714c1ff481SSatish Balay   processors.
372bc5ccf88SSatish Balay 
3734c1ff481SSatish Balay   Input Parameters:
3744c1ff481SSatish Balay   stash  - the stash
3754c1ff481SSatish Balay   owners - an array of size 'no-of-procs' which gives the ownership range
3764c1ff481SSatish Balay            for each node.
3774c1ff481SSatish Balay 
3784c1ff481SSatish Balay   Notes: The 'owners' array in the cased of the blocked-stash has the
3794c1ff481SSatish Balay   ranges specified blocked global indices, and for the regular stash in
3804c1ff481SSatish Balay   the proper global indices.
3814c1ff481SSatish Balay */
382bc5ccf88SSatish Balay #undef __FUNC__
3838798bf22SSatish Balay #define __FUNC__ "MatStashScatterBegin_Private"
3848798bf22SSatish Balay int MatStashScatterBegin_Private(MatStash *stash,int *owners)
385bc5ccf88SSatish Balay {
386a2d1c673SSatish Balay   int         *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
387a2d1c673SSatish Balay   int         rank=stash->rank,size=stash->size,*nprocs,*procs,nsends,nreceives;
3884c1ff481SSatish Balay   int         nmax,*work,count,ierr,*sindices,*rindices,i,j,idx;
389a2d1c673SSatish Balay   Scalar      *rvalues,*svalues;
390bc5ccf88SSatish Balay   MPI_Comm    comm = stash->comm;
391bc5ccf88SSatish Balay   MPI_Request *send_waits,*recv_waits;
392bc5ccf88SSatish Balay 
393bc5ccf88SSatish Balay   PetscFunctionBegin;
394bc5ccf88SSatish Balay 
3954c1ff481SSatish Balay   bs2 = stash->bs*stash->bs;
396bc5ccf88SSatish Balay   /*  first count number of contributors to each processor */
397bc5ccf88SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
398bc5ccf88SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
399bc5ccf88SSatish Balay   owner = (int *) PetscMalloc( (stash->n+1)*sizeof(int) ); CHKPTRQ(owner);
400a2d1c673SSatish Balay 
401bc5ccf88SSatish Balay   for ( i=0; i<stash->n; i++ ) {
402bc5ccf88SSatish Balay     idx = stash->idx[i];
403bc5ccf88SSatish Balay     for ( j=0; j<size; j++ ) {
4044c1ff481SSatish Balay       if (idx >= owners[j] && idx < owners[j+1]) {
405bc5ccf88SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
406bc5ccf88SSatish Balay       }
407bc5ccf88SSatish Balay     }
408bc5ccf88SSatish Balay   }
409bc5ccf88SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
410bc5ccf88SSatish Balay 
411bc5ccf88SSatish Balay   /* inform other processors of number of messages and max length*/
412bc5ccf88SSatish Balay   work = (int *)PetscMalloc(size*sizeof(int)); CHKPTRQ(work);
413bc5ccf88SSatish Balay   ierr = MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
414bc5ccf88SSatish Balay   nreceives = work[rank];
415bc5ccf88SSatish Balay   ierr = MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
416bc5ccf88SSatish Balay   nmax = work[rank];
417bc5ccf88SSatish Balay   PetscFree(work);
418bc5ccf88SSatish Balay   /* post receives:
419bc5ccf88SSatish Balay      since we don't know how long each individual message is we
420bc5ccf88SSatish Balay      allocate the largest needed buffer for each receive. Potentially
421bc5ccf88SSatish Balay      this is a lot of wasted space.
422bc5ccf88SSatish Balay   */
423a2d1c673SSatish Balay   rvalues    = (Scalar *)PetscMalloc((nreceives+1)*(nmax+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(rvalues);
424a2d1c673SSatish Balay   rindices   = (int *) (rvalues + bs2*nreceives*nmax);
425a2d1c673SSatish Balay   recv_waits = (MPI_Request *)PetscMalloc((nreceives+1)*2*sizeof(MPI_Request));CHKPTRQ(recv_waits);
426bc5ccf88SSatish Balay   for ( i=0,count=0; i<nreceives; i++ ) {
427a2d1c673SSatish Balay     ierr = MPI_Irecv(rvalues+bs2*nmax*i,bs2*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag1,comm,
428bc5ccf88SSatish Balay                      recv_waits+count++); CHKERRQ(ierr);
429bc5ccf88SSatish Balay     ierr = MPI_Irecv(rindices+2*nmax*i,2*nmax,MPI_INT,MPI_ANY_SOURCE,tag2,comm,
430bc5ccf88SSatish Balay                      recv_waits+count++); CHKERRQ(ierr);
431bc5ccf88SSatish Balay   }
432bc5ccf88SSatish Balay 
433bc5ccf88SSatish Balay   /* do sends:
434bc5ccf88SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
435bc5ccf88SSatish Balay          the ith processor
436bc5ccf88SSatish Balay   */
437a2d1c673SSatish Balay   svalues    = (Scalar *)PetscMalloc((stash->n+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(svalues);
438a2d1c673SSatish Balay   sindices   = (int *) (svalues + bs2*stash->n);
439bc5ccf88SSatish Balay   send_waits = (MPI_Request *) PetscMalloc(2*(nsends+1)*sizeof(MPI_Request));
440bc5ccf88SSatish Balay   CHKPTRQ(send_waits);
441bc5ccf88SSatish Balay   startv     = (int *) PetscMalloc(2*size*sizeof(int) ); CHKPTRQ(startv);
442bc5ccf88SSatish Balay   starti     = startv + size;
443a2d1c673SSatish Balay   /* use 2 sends the first with all_a, the next with all_i and all_j */
444bc5ccf88SSatish Balay   startv[0]  = 0; starti[0] = 0;
445bc5ccf88SSatish Balay   for ( i=1; i<size; i++ ) {
446bc5ccf88SSatish Balay     startv[i] = startv[i-1] + nprocs[i-1];
447bc5ccf88SSatish Balay     starti[i] = starti[i-1] + nprocs[i-1]*2;
448bc5ccf88SSatish Balay   }
449bc5ccf88SSatish Balay   for ( i=0; i<stash->n; i++ ) {
450bc5ccf88SSatish Balay     j = owner[i];
451a2d1c673SSatish Balay     if (bs2 == 1) {
452bc5ccf88SSatish Balay       svalues[startv[j]]              = stash->array[i];
453a2d1c673SSatish Balay     } else {
4544c1ff481SSatish Balay       int    k;
4554c1ff481SSatish Balay       Scalar *buf1,*buf2;
4564c1ff481SSatish Balay       buf1 = svalues+bs2*startv[j];
4574c1ff481SSatish Balay       buf2 = stash->array+bs2*i;
4584c1ff481SSatish Balay       for ( k=0; k<bs2; k++ ){ buf1[k] = buf2[k]; }
459a2d1c673SSatish Balay     }
460bc5ccf88SSatish Balay     sindices[starti[j]]             = stash->idx[i];
461bc5ccf88SSatish Balay     sindices[starti[j]+nprocs[j]]   = stash->idy[i];
462bc5ccf88SSatish Balay     startv[j]++;
463bc5ccf88SSatish Balay     starti[j]++;
464bc5ccf88SSatish Balay   }
465bc5ccf88SSatish Balay   startv[0] = 0;
466bc5ccf88SSatish Balay   for ( i=1; i<size; i++ ) { startv[i] = startv[i-1] + nprocs[i-1];}
467bc5ccf88SSatish Balay   for ( i=0,count=0; i<size; i++ ) {
468bc5ccf88SSatish Balay     if (procs[i]) {
469a2d1c673SSatish Balay       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nprocs[i],MPIU_SCALAR,i,tag1,comm,
470bc5ccf88SSatish Balay                        send_waits+count++);CHKERRQ(ierr);
471bc5ccf88SSatish Balay       ierr = MPI_Isend(sindices+2*startv[i],2*nprocs[i],MPI_INT,i,tag2,comm,
472bc5ccf88SSatish Balay                        send_waits+count++);CHKERRQ(ierr);
473bc5ccf88SSatish Balay     }
474bc5ccf88SSatish Balay   }
475bc5ccf88SSatish Balay   PetscFree(owner);
476bc5ccf88SSatish Balay   PetscFree(startv);
477a2d1c673SSatish Balay   /* This memory is reused in scatter end  for a different purpose*/
478a2d1c673SSatish Balay   for (i=0; i<2*size; i++ ) nprocs[i] = -1;
479a2d1c673SSatish Balay   stash->nprocs      = nprocs;
480a2d1c673SSatish Balay 
481bc5ccf88SSatish Balay   stash->svalues    = svalues;    stash->rvalues    = rvalues;
482bc5ccf88SSatish Balay   stash->nsends     = nsends;     stash->nrecvs     = nreceives;
483bc5ccf88SSatish Balay   stash->send_waits = send_waits; stash->recv_waits = recv_waits;
484bc5ccf88SSatish Balay   stash->rmax       = nmax;
485bc5ccf88SSatish Balay   PetscFunctionReturn(0);
486bc5ccf88SSatish Balay }
487bc5ccf88SSatish Balay 
488a2d1c673SSatish Balay /*
4898798bf22SSatish Balay    MatStashScatterGetMesg_Private - This function waits on the receives posted
4908798bf22SSatish Balay    in the function MatStashScatterBegin_Private() and returns one message at
4914c1ff481SSatish Balay    a time to the calling function. If no messages are left, it indicates this
4924c1ff481SSatish Balay    by setting flg = 0, else it sets flg = 1.
4934c1ff481SSatish Balay 
4944c1ff481SSatish Balay    Input Parameters:
4954c1ff481SSatish Balay    stash - the stash
4964c1ff481SSatish Balay 
4974c1ff481SSatish Balay    Output Parameters:
4984c1ff481SSatish Balay    nvals - the number of entries in the current message.
4994c1ff481SSatish Balay    rows  - an array of row indices (or blocked indices) corresponding to the values
5004c1ff481SSatish Balay    cols  - an array of columnindices (or blocked indices) corresponding to the values
5014c1ff481SSatish Balay    vals  - the values
5024c1ff481SSatish Balay    flg   - 0 indicates no more message left, and the current call has no values associated.
5034c1ff481SSatish Balay            1 indicates that the current call successfully received a message, and the
5044c1ff481SSatish Balay              other output parameters nvals,rows,cols,vals are set appropriately.
505a2d1c673SSatish Balay */
506bc5ccf88SSatish Balay #undef __FUNC__
5078798bf22SSatish Balay #define __FUNC__ "MatStashScatterGetMesg_Private"
5088798bf22SSatish Balay int MatStashScatterGetMesg_Private(MatStash *stash,int *nvals,int **rows,int** cols,Scalar **vals,int *flg)
509bc5ccf88SSatish Balay {
510a2d1c673SSatish Balay   int         i,ierr,size=stash->size,*flg_v,*flg_i;
511a2d1c673SSatish Balay   int         i1,i2,*rindices,match_found=0,bs2;
512a2d1c673SSatish Balay   MPI_Status  recv_status;
513bc5ccf88SSatish Balay 
514bc5ccf88SSatish Balay   PetscFunctionBegin;
515bc5ccf88SSatish Balay 
516a2d1c673SSatish Balay   *flg = 0; /* When a message is discovered this is reset to 1 */
517a2d1c673SSatish Balay   /* Return if no more messages to process */
518a2d1c673SSatish Balay   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
519a2d1c673SSatish Balay 
520a2d1c673SSatish Balay   flg_v = stash->nprocs;
521a2d1c673SSatish Balay   flg_i = flg_v + size;
5224c1ff481SSatish Balay   bs2   = stash->bs*stash->bs;
523a2d1c673SSatish Balay   /* If a matching pair of receieves are found, process them, and return the data to
524a2d1c673SSatish Balay      the calling function. Until then keep receiving messages */
525a2d1c673SSatish Balay   while (!match_found) {
526a2d1c673SSatish Balay     ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
527a2d1c673SSatish Balay     /* Now pack the received message into a structure which is useable by others */
528a2d1c673SSatish Balay     if (i % 2) {
529a2d1c673SSatish Balay       ierr = MPI_Get_count(&recv_status,MPI_INT,nvals);CHKERRQ(ierr);
530a2d1c673SSatish Balay       flg_i[recv_status.MPI_SOURCE] = i/2;
531a2d1c673SSatish Balay       *nvals = *nvals/2; /* This message has both row indices and col indices */
532a2d1c673SSatish Balay     } else {
533a2d1c673SSatish Balay       ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr);
534a2d1c673SSatish Balay       flg_v[recv_status.MPI_SOURCE] = i/2;
535a2d1c673SSatish Balay       *nvals = *nvals/bs2;
536bc5ccf88SSatish Balay     }
537a2d1c673SSatish Balay 
538a2d1c673SSatish Balay     /* Check if we have both the messages from this proc */
539a2d1c673SSatish Balay     i1 = flg_v[recv_status.MPI_SOURCE];
540a2d1c673SSatish Balay     i2 = flg_i[recv_status.MPI_SOURCE];
541a2d1c673SSatish Balay     if (i1 != -1 && i2 != -1) {
542a2d1c673SSatish Balay       rindices    = (int *) (stash->rvalues + bs2*stash->rmax*stash->nrecvs);
543a2d1c673SSatish Balay       *rows       = rindices + 2*i2*stash->rmax;
544a2d1c673SSatish Balay       *cols       = *rows + *nvals;
545a2d1c673SSatish Balay       *vals       = stash->rvalues + i1*bs2*stash->rmax;
546a2d1c673SSatish Balay       *flg        = 1;
547a2d1c673SSatish Balay       stash->nprocessed ++;
548a2d1c673SSatish Balay       match_found = 1;
549bc5ccf88SSatish Balay     }
550bc5ccf88SSatish Balay   }
551bc5ccf88SSatish Balay   PetscFunctionReturn(0);
552bc5ccf88SSatish Balay }
553