xref: /petsc/src/mat/utils/matstash.c (revision 8798bf22c13d6b0f19082f7079c241038c01d971)
1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER
2*8798bf22SSatish Balay static char vcid[] = "$Id: stash.c,v 1.26 1999/03/17 21:14:34 balay Exp balay $";
32d5177cdSBarry Smith #endif
42d5177cdSBarry Smith 
570f55243SBarry Smith #include "src/mat/matimpl.h"
69417f4adSLois Curfman McInnes 
7bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE   10000
84c1ff481SSatish Balay 
99417f4adSLois Curfman McInnes /*
10*8798bf22SSatish Balay   MatStashCreate_Private - Creates a stash ,currently used for all the parallel
114c1ff481SSatish Balay   matrix implementations. The stash is where elements of a matrix destined
124c1ff481SSatish Balay   to be stored on other processors are kept until matrix assembly is done.
139417f4adSLois Curfman McInnes 
144c1ff481SSatish Balay   This is a simple minded stash. Simply adds entries to end of stash.
154c1ff481SSatish Balay 
164c1ff481SSatish Balay   Input Parameters:
174c1ff481SSatish Balay   comm - communicator, required for scatters.
184c1ff481SSatish Balay   bs   - stash block size. used when stashing blocks of values
194c1ff481SSatish Balay 
204c1ff481SSatish Balay   Output Parameters:
214c1ff481SSatish Balay   stash    - the newly created stash
229417f4adSLois Curfman McInnes */
235615d1e5SSatish Balay #undef __FUNC__
24*8798bf22SSatish Balay #define __FUNC__ "MatStashCreate_Private"
25*8798bf22SSatish Balay int MatStashCreate_Private(MPI_Comm comm,int bs, MatStash *stash)
269417f4adSLois Curfman McInnes {
274c1ff481SSatish Balay   int ierr,flg,max=DEFAULT_STASH_SIZE/(bs*bs);
28bc5ccf88SSatish Balay 
293a40ed3dSBarry Smith   PetscFunctionBegin;
30bc5ccf88SSatish Balay   /* Require 2 tags, get the second using PetscCommGetNewTag() */
31bc5ccf88SSatish Balay   ierr = PetscCommDuplicate_Private(comm,&stash->comm,&stash->tag1);CHKERRQ(ierr);
32a2d1c673SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2); CHKERRQ(ierr);
33*8798bf22SSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matstash_initial_size",&max,&flg);CHKERRQ(ierr);
34*8798bf22SSatish Balay   ierr = MatStashSetInitialSize_Private(stash,max); CHKERRQ(ierr);
35a2d1c673SSatish Balay   ierr = MPI_Comm_size(stash->comm,&stash->size); CHKERRQ(ierr);
36a2d1c673SSatish Balay   ierr = MPI_Comm_rank(stash->comm,&stash->rank); CHKERRQ(ierr);
37bc5ccf88SSatish Balay 
384c1ff481SSatish Balay   if (bs <= 0) bs = 1;
39a2d1c673SSatish Balay 
404c1ff481SSatish Balay   stash->bs       = bs;
419417f4adSLois Curfman McInnes   stash->nmax     = 0;
429417f4adSLois Curfman McInnes   stash->n        = 0;
434c1ff481SSatish Balay   stash->reallocs = -1;
449417f4adSLois Curfman McInnes   stash->idx      = 0;
459417f4adSLois Curfman McInnes   stash->idy      = 0;
46bc5ccf88SSatish Balay   stash->array    = 0;
479417f4adSLois Curfman McInnes 
48bc5ccf88SSatish Balay   stash->send_waits  = 0;
49bc5ccf88SSatish Balay   stash->recv_waits  = 0;
50a2d1c673SSatish Balay   stash->send_status = 0;
51bc5ccf88SSatish Balay   stash->nsends      = 0;
52bc5ccf88SSatish Balay   stash->nrecvs      = 0;
53bc5ccf88SSatish Balay   stash->svalues     = 0;
54bc5ccf88SSatish Balay   stash->rvalues     = 0;
55bc5ccf88SSatish Balay   stash->rmax        = 0;
56a2d1c673SSatish Balay   stash->nprocs      = 0;
57a2d1c673SSatish Balay   stash->nprocessed  = 0;
583a40ed3dSBarry Smith   PetscFunctionReturn(0);
599417f4adSLois Curfman McInnes }
609417f4adSLois Curfman McInnes 
614c1ff481SSatish Balay /*
62*8798bf22SSatish Balay    MatStashDestroy_Private - Destroy the stash
634c1ff481SSatish Balay */
645615d1e5SSatish Balay #undef __FUNC__
65*8798bf22SSatish Balay #define __FUNC__ "MatStashDestroy_Private"
66*8798bf22SSatish Balay int MatStashDestroy_Private(MatStash *stash)
679417f4adSLois Curfman McInnes {
68bc5ccf88SSatish Balay   int ierr;
69a2d1c673SSatish Balay 
70bc5ccf88SSatish Balay   PetscFunctionBegin;
71bc5ccf88SSatish Balay   ierr = PetscCommDestroy_Private(&stash->comm); CHKERRQ(ierr);
72bc5ccf88SSatish Balay   if (stash->array) {PetscFree(stash->array); stash->array = 0;}
73bc5ccf88SSatish Balay   PetscFunctionReturn(0);
74bc5ccf88SSatish Balay }
75bc5ccf88SSatish Balay 
764c1ff481SSatish Balay /*
77*8798bf22SSatish Balay    MatStashScatterEnd_Private - This is called as the fial stage of
784c1ff481SSatish Balay    scatter. The final stages of messagepassing is done here, and
794c1ff481SSatish Balay    all the memory used for messagepassing is cleanedu up. This
804c1ff481SSatish Balay    routine also resets the stash, and deallocates the memory used
814c1ff481SSatish Balay    for the stash. It also keeps track of the current memory usage
824c1ff481SSatish Balay    so that the same value can be used the next time through.
834c1ff481SSatish Balay */
84bc5ccf88SSatish Balay #undef __FUNC__
85*8798bf22SSatish Balay #define __FUNC__ "MatStashScatterEnd_Private"
86*8798bf22SSatish Balay int MatStashScatterEnd_Private(MatStash *stash)
87bc5ccf88SSatish Balay {
88a2d1c673SSatish Balay   int         nsends=stash->nsends,ierr;
89a2d1c673SSatish Balay   MPI_Status  *send_status;
90a2d1c673SSatish Balay 
913a40ed3dSBarry Smith   PetscFunctionBegin;
92a2d1c673SSatish Balay   /* wait on sends */
93a2d1c673SSatish Balay   if (nsends) {
94a2d1c673SSatish Balay     send_status = (MPI_Status *)PetscMalloc(2*nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
95a2d1c673SSatish Balay     ierr        = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
96a2d1c673SSatish Balay     PetscFree(send_status);
97a2d1c673SSatish Balay   }
98a2d1c673SSatish Balay 
99c0c58ca7SSatish Balay   /* Now update nmaxold to be app 10% more than max n used, this way the
100d07ff455SSatish Balay      wastage of space is reduced the next time this stash is used */
101c0c58ca7SSatish Balay   stash->oldnmax    = (int)(stash->n * 1.1) + 5;
102d07ff455SSatish Balay   stash->nmax       = 0;
103d07ff455SSatish Balay   stash->n          = 0;
1044c1ff481SSatish Balay   stash->reallocs   = -1;
105bc5ccf88SSatish Balay   stash->rmax       = 0;
106a2d1c673SSatish Balay   stash->nprocessed = 0;
107bc5ccf88SSatish Balay 
108bc5ccf88SSatish Balay   if (stash->array) {
109bc5ccf88SSatish Balay     PetscFree(stash->array);
110bc5ccf88SSatish Balay     stash->array = 0;
111bc5ccf88SSatish Balay     stash->idx   = 0;
112bc5ccf88SSatish Balay     stash->idy   = 0;
113bc5ccf88SSatish Balay   }
114bc5ccf88SSatish Balay   if (stash->send_waits)  {PetscFree(stash->send_waits);stash->send_waits = 0;}
115bc5ccf88SSatish Balay   if (stash->recv_waits)  {PetscFree(stash->recv_waits);stash->recv_waits = 0;}
116bc5ccf88SSatish Balay   if (stash->svalues)     {PetscFree(stash->svalues);stash->svalues = 0;}
117bc5ccf88SSatish Balay   if (stash->rvalues)     {PetscFree(stash->rvalues); stash->rvalues = 0;}
118a2d1c673SSatish Balay   if (stash->nprocs)      {PetscFree(stash->nprocs); stash->nprocs = 0;}
119bc5ccf88SSatish Balay 
1203a40ed3dSBarry Smith   PetscFunctionReturn(0);
1219417f4adSLois Curfman McInnes }
1229417f4adSLois Curfman McInnes 
1234c1ff481SSatish Balay /*
124*8798bf22SSatish Balay    MatStashGetInfo_Private - Gets the relavant statistics of the stash
1254c1ff481SSatish Balay 
1264c1ff481SSatish Balay    Input Parameters:
1274c1ff481SSatish Balay    stash    - the stash
1284c1ff481SSatish Balay    nstash   - the size of the stash
1294c1ff481SSatish Balay    reallocs - the number of additional mallocs incurred.
1304c1ff481SSatish Balay 
1314c1ff481SSatish Balay */
1325615d1e5SSatish Balay #undef __FUNC__
133*8798bf22SSatish Balay #define __FUNC__ "MatStashGetInfo_Private"
134*8798bf22SSatish Balay int MatStashGetInfo_Private(MatStash *stash,int *nstash, int *reallocs)
13597530c3fSBarry Smith {
1363a40ed3dSBarry Smith   PetscFunctionBegin;
1374c1ff481SSatish Balay   *nstash   = stash->n;
1384c1ff481SSatish Balay   *reallocs = stash->reallocs;
139bc5ccf88SSatish Balay   PetscFunctionReturn(0);
140bc5ccf88SSatish Balay }
1414c1ff481SSatish Balay 
1424c1ff481SSatish Balay 
1434c1ff481SSatish Balay /*
144*8798bf22SSatish Balay    MatStashSetInitialSize_Private - Sets the initial size of the stash
1454c1ff481SSatish Balay 
1464c1ff481SSatish Balay    Input Parameters:
1474c1ff481SSatish Balay    stash  - the stash
1484c1ff481SSatish Balay    max    - the value that is used as the max size of the stash.
1494c1ff481SSatish Balay             this value is used while allocating memory.
1504c1ff481SSatish Balay */
151bc5ccf88SSatish Balay #undef __FUNC__
152*8798bf22SSatish Balay #define __FUNC__ "MatStashSetInitialSize_Private"
153*8798bf22SSatish Balay int MatStashSetInitialSize_Private(MatStash *stash,int max)
154bc5ccf88SSatish Balay {
155bc5ccf88SSatish Balay   PetscFunctionBegin;
156bc5ccf88SSatish Balay   stash->oldnmax = max;
157bc5ccf88SSatish Balay   stash->nmax    = 0;
1583a40ed3dSBarry Smith   PetscFunctionReturn(0);
15997530c3fSBarry Smith }
16097530c3fSBarry Smith 
161*8798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called
1624c1ff481SSatish Balay    when the space in the stash is not sufficient to add the new values
1634c1ff481SSatish Balay    being inserted into the stash.
1644c1ff481SSatish Balay 
1654c1ff481SSatish Balay    Input Parameters:
1664c1ff481SSatish Balay    stash - the stash
1674c1ff481SSatish Balay    incr  - the minimum increase requested
1684c1ff481SSatish Balay 
1694c1ff481SSatish Balay    Notes:
1704c1ff481SSatish Balay    This routine doubles the currently used memory.
1714c1ff481SSatish Balay  */
1725615d1e5SSatish Balay #undef __FUNC__
173*8798bf22SSatish Balay #define __FUNC__ "MatStashExpand_Private"
174*8798bf22SSatish Balay static int MatStashExpand_Private(MatStash *stash,int incr)
1759417f4adSLois Curfman McInnes {
176a2d1c673SSatish Balay   int    *n_idx,*n_idy,newnmax,bs2;
177bc5ccf88SSatish Balay   Scalar *n_array;
1789417f4adSLois Curfman McInnes 
1793a40ed3dSBarry Smith   PetscFunctionBegin;
1809417f4adSLois Curfman McInnes   /* allocate a larger stash */
181d07ff455SSatish Balay   if (stash->nmax == 0) newnmax = stash->oldnmax;
182d07ff455SSatish Balay   else                  newnmax = stash->nmax*2;
1834c1ff481SSatish Balay   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
184d07ff455SSatish Balay 
1854c1ff481SSatish Balay   bs2     = stash->bs*stash->bs;
186a2d1c673SSatish Balay   n_array = (Scalar *)PetscMalloc((newnmax)*(2*sizeof(int)+bs2*sizeof(Scalar)));CHKPTRQ(n_array);
187a2d1c673SSatish Balay   n_idx   = (int *) (n_array + bs2*newnmax);
188d07ff455SSatish Balay   n_idy   = (int *) (n_idx + newnmax);
189a2d1c673SSatish Balay   PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(Scalar));
190416022c9SBarry Smith   PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(int));
191416022c9SBarry Smith   PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(int));
1920452661fSBarry Smith   if (stash->array) PetscFree(stash->array);
193d07ff455SSatish Balay   stash->array   = n_array;
194d07ff455SSatish Balay   stash->idx     = n_idx;
195d07ff455SSatish Balay   stash->idy     = n_idy;
196d07ff455SSatish Balay   stash->nmax    = newnmax;
197d07ff455SSatish Balay   stash->oldnmax = newnmax;
198bc5ccf88SSatish Balay   stash->reallocs++;
199bc5ccf88SSatish Balay   PetscFunctionReturn(0);
200bc5ccf88SSatish Balay }
201bc5ccf88SSatish Balay /*
202*8798bf22SSatish Balay   MatStashValuesRow_Private - inserts values into the stash. This function
2034c1ff481SSatish Balay   expects the values to be roworiented. Multiple columns belong to the same row
2044c1ff481SSatish Balay   can be inserted with a single call to this function.
2054c1ff481SSatish Balay 
2064c1ff481SSatish Balay   Input Parameters:
2074c1ff481SSatish Balay   stash  - the stash
2084c1ff481SSatish Balay   row    - the global row correspoiding to the values
2094c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
2104c1ff481SSatish Balay   idxn   - the global column indices corresponding to each of the values.
2114c1ff481SSatish Balay   values - the values inserted
212bc5ccf88SSatish Balay */
213bc5ccf88SSatish Balay #undef __FUNC__
214*8798bf22SSatish Balay #define __FUNC__ "MatStashValuesRow_Private"
215*8798bf22SSatish Balay int MatStashValuesRow_Private(MatStash *stash,int row,int n, int *idxn,Scalar *values)
216bc5ccf88SSatish Balay {
217a2d1c673SSatish Balay   int    ierr,i;
218bc5ccf88SSatish Balay 
219bc5ccf88SSatish Balay   PetscFunctionBegin;
2204c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
2214c1ff481SSatish Balay   if ((stash->n + n) > stash->nmax) {
222*8798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n); CHKERRQ(ierr);
2239417f4adSLois Curfman McInnes   }
2244c1ff481SSatish Balay   for ( i=0; i<n; i++ ) {
2259417f4adSLois Curfman McInnes     stash->idx[stash->n]   = row;
226a2d1c673SSatish Balay     stash->idy[stash->n]   = idxn[i];
227a2d1c673SSatish Balay     stash->array[stash->n] = values[i];
228a2d1c673SSatish Balay     stash->n++;
2299417f4adSLois Curfman McInnes   }
230a2d1c673SSatish Balay   PetscFunctionReturn(0);
231a2d1c673SSatish Balay }
2324c1ff481SSatish Balay /*
233*8798bf22SSatish Balay   MatStashValuesCol_Private - inserts values into the stash. This function
2344c1ff481SSatish Balay   expects the values to be columnoriented. Multiple columns belong to the same row
2354c1ff481SSatish Balay   can be inserted with a single call to this function.
236a2d1c673SSatish Balay 
2374c1ff481SSatish Balay   Input Parameters:
2384c1ff481SSatish Balay   stash   - the stash
2394c1ff481SSatish Balay   row     - the global row correspoiding to the values
2404c1ff481SSatish Balay   n       - the number of elements inserted. All elements belong to the above row.
2414c1ff481SSatish Balay   idxn    - the global column indices corresponding to each of the values.
2424c1ff481SSatish Balay   values  - the values inserted
2434c1ff481SSatish Balay   stepval - the consecutive values are sepated by a distance of stepval.
2444c1ff481SSatish Balay             this happens because the input is columnoriented.
2454c1ff481SSatish Balay */
246a2d1c673SSatish Balay #undef __FUNC__
247*8798bf22SSatish Balay #define __FUNC__ "MatStashValuesCol_Private"
248*8798bf22SSatish Balay int MatStashValuesCol_Private(MatStash *stash,int row,int n, int *idxn,
2494c1ff481SSatish Balay                                       Scalar *values,int stepval)
250a2d1c673SSatish Balay {
2514c1ff481SSatish Balay   int    ierr,i;
252a2d1c673SSatish Balay 
2534c1ff481SSatish Balay   PetscFunctionBegin;
2544c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
2554c1ff481SSatish Balay   if ((stash->n + n) > stash->nmax) {
256*8798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n); CHKERRQ(ierr);
2574c1ff481SSatish Balay   }
2584c1ff481SSatish Balay   for ( i=0; i<n; i++ ) {
2594c1ff481SSatish Balay     stash->idx[stash->n]   = row;
2604c1ff481SSatish Balay     stash->idy[stash->n]   = idxn[i];
2614c1ff481SSatish Balay     stash->array[stash->n] = values[i*stepval];
2624c1ff481SSatish Balay     stash->n++;
2634c1ff481SSatish Balay   }
2644c1ff481SSatish Balay   PetscFunctionReturn(0);
2654c1ff481SSatish Balay }
2664c1ff481SSatish Balay 
2674c1ff481SSatish Balay /*
268*8798bf22SSatish Balay   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
2694c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
2704c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
2714c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
2724c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
2734c1ff481SSatish Balay 
2744c1ff481SSatish Balay   Input Parameters:
2754c1ff481SSatish Balay   stash  - the stash
2764c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
2774c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
2784c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
2794c1ff481SSatish Balay            values. Each block is of size bs*bs.
2804c1ff481SSatish Balay   values - the values inserted
2814c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
2824c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
2834c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
2844c1ff481SSatish Balay */
2854c1ff481SSatish Balay #undef __FUNC__
286*8798bf22SSatish Balay #define __FUNC__ "MatStashValuesRowBlocked_Private"
287*8798bf22SSatish Balay int MatStashValuesRowBlocked_Private(MatStash *stash,int row,int n,int *idxn,Scalar *values,
2884c1ff481SSatish Balay                                int rmax,int cmax,int idx)
2894c1ff481SSatish Balay {
2904c1ff481SSatish Balay   int    ierr,i,j,k,bs2,bs=stash->bs;
2914c1ff481SSatish Balay   Scalar *vals,*array;
292a2d1c673SSatish Balay 
293a2d1c673SSatish Balay   PetscFunctionBegin;
294a2d1c673SSatish Balay   bs2 = bs*bs;
2954c1ff481SSatish Balay   if ((stash->n+n) > stash->nmax) {
296*8798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n); CHKERRQ(ierr);
297a2d1c673SSatish Balay   }
2984c1ff481SSatish Balay   for ( i=0; i<n; i++ ) {
299a2d1c673SSatish Balay     stash->idx[stash->n]   = row;
300a2d1c673SSatish Balay     stash->idy[stash->n] = idxn[i];
301a2d1c673SSatish Balay     /* Now copy over the block of values. Store the values column oriented.
302a2d1c673SSatish Balay        This enables inserting multiple blocks belonging to a row with a single
303a2d1c673SSatish Balay        funtion call */
304a2d1c673SSatish Balay     array = stash->array + bs2*stash->n;
305a2d1c673SSatish Balay     vals  = values + idx*bs2*n + bs*i;
306a2d1c673SSatish Balay     for ( j=0; j<bs; j++ ) {
307a2d1c673SSatish Balay       for ( k=0; k<bs; k++ ) {array[k*bs] = vals[k];}
308a2d1c673SSatish Balay       array += 1;
309a2d1c673SSatish Balay       vals  += cmax*bs;
310a2d1c673SSatish Balay     }
3114c1ff481SSatish Balay     stash->n++;
3124c1ff481SSatish Balay   }
3134c1ff481SSatish Balay   PetscFunctionReturn(0);
3144c1ff481SSatish Balay }
3154c1ff481SSatish Balay 
3164c1ff481SSatish Balay /*
317*8798bf22SSatish Balay   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
3184c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3194c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3204c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3214c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3224c1ff481SSatish Balay 
3234c1ff481SSatish Balay   Input Parameters:
3244c1ff481SSatish Balay   stash  - the stash
3254c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3264c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3274c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3284c1ff481SSatish Balay            values. Each block is of size bs*bs.
3294c1ff481SSatish Balay   values - the values inserted
3304c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3314c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3324c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3334c1ff481SSatish Balay */
3344c1ff481SSatish Balay #undef __FUNC__
335*8798bf22SSatish Balay #define __FUNC__ "MatStashValuesColBlocked_Private"
336*8798bf22SSatish Balay int MatStashValuesColBlocked_Private(MatStash *stash,int row,int n,int *idxn,
3374c1ff481SSatish Balay                                              Scalar *values,int rmax,int cmax,int idx)
3384c1ff481SSatish Balay {
3394c1ff481SSatish Balay   int    ierr,i,j,k,bs2,bs=stash->bs;
3404c1ff481SSatish Balay   Scalar *vals,*array;
3414c1ff481SSatish Balay 
3424c1ff481SSatish Balay   PetscFunctionBegin;
3434c1ff481SSatish Balay   bs2 = bs*bs;
3444c1ff481SSatish Balay   if ((stash->n+n) > stash->nmax) {
345*8798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n); CHKERRQ(ierr);
3464c1ff481SSatish Balay   }
3474c1ff481SSatish Balay   for ( i=0; i<n; i++ ) {
3484c1ff481SSatish Balay     stash->idx[stash->n]   = row;
3494c1ff481SSatish Balay     stash->idy[stash->n] = idxn[i];
3504c1ff481SSatish Balay     /* Now copy over the block of values. Store the values column oriented.
3514c1ff481SSatish Balay      This enables inserting multiple blocks belonging to a row with a single
3524c1ff481SSatish Balay      funtion call */
353a2d1c673SSatish Balay     array = stash->array + bs2*stash->n;
354a2d1c673SSatish Balay     vals  = values + idx*bs + bs2*rmax*i;
355a2d1c673SSatish Balay     for ( j=0; j<bs; j++ ) {
356a2d1c673SSatish Balay       for ( k=0; k<bs; k++ ) {array[k] = vals[k];}
357a2d1c673SSatish Balay       array += bs;
358a2d1c673SSatish Balay       vals  += rmax*bs;
359a2d1c673SSatish Balay     }
360a2d1c673SSatish Balay     stash->n++;
3619417f4adSLois Curfman McInnes   }
3623a40ed3dSBarry Smith   PetscFunctionReturn(0);
3639417f4adSLois Curfman McInnes }
3644c1ff481SSatish Balay /*
365*8798bf22SSatish Balay   MatStashScatterBegin_Private - Initiates the transfer of values to the
3664c1ff481SSatish Balay   correct owners. This function goes through the stash, and check the
3674c1ff481SSatish Balay   owners of each stashed value, and sends the values off to the owner
3684c1ff481SSatish Balay   processors.
369bc5ccf88SSatish Balay 
3704c1ff481SSatish Balay   Input Parameters:
3714c1ff481SSatish Balay   stash  - the stash
3724c1ff481SSatish Balay   owners - an array of size 'no-of-procs' which gives the ownership range
3734c1ff481SSatish Balay            for each node.
3744c1ff481SSatish Balay 
3754c1ff481SSatish Balay   Notes: The 'owners' array in the cased of the blocked-stash has the
3764c1ff481SSatish Balay   ranges specified blocked global indices, and for the regular stash in
3774c1ff481SSatish Balay   the proper global indices.
3784c1ff481SSatish Balay */
379bc5ccf88SSatish Balay #undef __FUNC__
380*8798bf22SSatish Balay #define __FUNC__ "MatStashScatterBegin_Private"
381*8798bf22SSatish Balay int MatStashScatterBegin_Private(MatStash *stash,int *owners)
382bc5ccf88SSatish Balay {
383a2d1c673SSatish Balay   int         *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
384a2d1c673SSatish Balay   int         rank=stash->rank,size=stash->size,*nprocs,*procs,nsends,nreceives;
3854c1ff481SSatish Balay   int         nmax,*work,count,ierr,*sindices,*rindices,i,j,idx;
386a2d1c673SSatish Balay   Scalar      *rvalues,*svalues;
387bc5ccf88SSatish Balay   MPI_Comm    comm = stash->comm;
388bc5ccf88SSatish Balay   MPI_Request *send_waits,*recv_waits;
389bc5ccf88SSatish Balay 
390bc5ccf88SSatish Balay   PetscFunctionBegin;
391bc5ccf88SSatish Balay 
3924c1ff481SSatish Balay   bs2 = stash->bs*stash->bs;
393bc5ccf88SSatish Balay   /*  first count number of contributors to each processor */
394bc5ccf88SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
395bc5ccf88SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
396bc5ccf88SSatish Balay   owner = (int *) PetscMalloc( (stash->n+1)*sizeof(int) ); CHKPTRQ(owner);
397a2d1c673SSatish Balay 
398bc5ccf88SSatish Balay   for ( i=0; i<stash->n; i++ ) {
399bc5ccf88SSatish Balay     idx = stash->idx[i];
400bc5ccf88SSatish Balay     for ( j=0; j<size; j++ ) {
4014c1ff481SSatish Balay       if (idx >= owners[j] && idx < owners[j+1]) {
402bc5ccf88SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
403bc5ccf88SSatish Balay       }
404bc5ccf88SSatish Balay     }
405bc5ccf88SSatish Balay   }
406bc5ccf88SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
407bc5ccf88SSatish Balay 
408bc5ccf88SSatish Balay   /* inform other processors of number of messages and max length*/
409bc5ccf88SSatish Balay   work = (int *)PetscMalloc(size*sizeof(int)); CHKPTRQ(work);
410bc5ccf88SSatish Balay   ierr = MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
411bc5ccf88SSatish Balay   nreceives = work[rank];
412bc5ccf88SSatish Balay   ierr = MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
413bc5ccf88SSatish Balay   nmax = work[rank];
414bc5ccf88SSatish Balay   PetscFree(work);
415bc5ccf88SSatish Balay   /* post receives:
416bc5ccf88SSatish Balay      since we don't know how long each individual message is we
417bc5ccf88SSatish Balay      allocate the largest needed buffer for each receive. Potentially
418bc5ccf88SSatish Balay      this is a lot of wasted space.
419bc5ccf88SSatish Balay   */
420a2d1c673SSatish Balay   rvalues    = (Scalar *)PetscMalloc((nreceives+1)*(nmax+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(rvalues);
421a2d1c673SSatish Balay   rindices   = (int *) (rvalues + bs2*nreceives*nmax);
422a2d1c673SSatish Balay   recv_waits = (MPI_Request *)PetscMalloc((nreceives+1)*2*sizeof(MPI_Request));CHKPTRQ(recv_waits);
423bc5ccf88SSatish Balay   for ( i=0,count=0; i<nreceives; i++ ) {
424a2d1c673SSatish Balay     ierr = MPI_Irecv(rvalues+bs2*nmax*i,bs2*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag1,comm,
425bc5ccf88SSatish Balay                      recv_waits+count++); CHKERRQ(ierr);
426bc5ccf88SSatish Balay     ierr = MPI_Irecv(rindices+2*nmax*i,2*nmax,MPI_INT,MPI_ANY_SOURCE,tag2,comm,
427bc5ccf88SSatish Balay                      recv_waits+count++); CHKERRQ(ierr);
428bc5ccf88SSatish Balay   }
429bc5ccf88SSatish Balay 
430bc5ccf88SSatish Balay   /* do sends:
431bc5ccf88SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
432bc5ccf88SSatish Balay          the ith processor
433bc5ccf88SSatish Balay   */
434a2d1c673SSatish Balay   svalues    = (Scalar *)PetscMalloc((stash->n+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(svalues);
435a2d1c673SSatish Balay   sindices   = (int *) (svalues + bs2*stash->n);
436bc5ccf88SSatish Balay   send_waits = (MPI_Request *) PetscMalloc(2*(nsends+1)*sizeof(MPI_Request));
437bc5ccf88SSatish Balay   CHKPTRQ(send_waits);
438bc5ccf88SSatish Balay   startv     = (int *) PetscMalloc(2*size*sizeof(int) ); CHKPTRQ(startv);
439bc5ccf88SSatish Balay   starti     = startv + size;
440a2d1c673SSatish Balay   /* use 2 sends the first with all_a, the next with all_i and all_j */
441bc5ccf88SSatish Balay   startv[0]  = 0; starti[0] = 0;
442bc5ccf88SSatish Balay   for ( i=1; i<size; i++ ) {
443bc5ccf88SSatish Balay     startv[i] = startv[i-1] + nprocs[i-1];
444bc5ccf88SSatish Balay     starti[i] = starti[i-1] + nprocs[i-1]*2;
445bc5ccf88SSatish Balay   }
446bc5ccf88SSatish Balay   for ( i=0; i<stash->n; i++ ) {
447bc5ccf88SSatish Balay     j = owner[i];
448a2d1c673SSatish Balay     if (bs2 == 1) {
449bc5ccf88SSatish Balay       svalues[startv[j]]              = stash->array[i];
450a2d1c673SSatish Balay     } else {
4514c1ff481SSatish Balay       int    k;
4524c1ff481SSatish Balay       Scalar *buf1,*buf2;
4534c1ff481SSatish Balay       buf1 = svalues+bs2*startv[j];
4544c1ff481SSatish Balay       buf2 = stash->array+bs2*i;
4554c1ff481SSatish Balay       for ( k=0; k<bs2; k++ ){ buf1[k] = buf2[k]; }
456a2d1c673SSatish Balay     }
457bc5ccf88SSatish Balay     sindices[starti[j]]             = stash->idx[i];
458bc5ccf88SSatish Balay     sindices[starti[j]+nprocs[j]]   = stash->idy[i];
459bc5ccf88SSatish Balay     startv[j]++;
460bc5ccf88SSatish Balay     starti[j]++;
461bc5ccf88SSatish Balay   }
462bc5ccf88SSatish Balay   startv[0] = 0;
463bc5ccf88SSatish Balay   for ( i=1; i<size; i++ ) { startv[i] = startv[i-1] + nprocs[i-1];}
464bc5ccf88SSatish Balay   for ( i=0,count=0; i<size; i++ ) {
465bc5ccf88SSatish Balay     if (procs[i]) {
466a2d1c673SSatish Balay       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nprocs[i],MPIU_SCALAR,i,tag1,comm,
467bc5ccf88SSatish Balay                        send_waits+count++);CHKERRQ(ierr);
468bc5ccf88SSatish Balay       ierr = MPI_Isend(sindices+2*startv[i],2*nprocs[i],MPI_INT,i,tag2,comm,
469bc5ccf88SSatish Balay                        send_waits+count++);CHKERRQ(ierr);
470bc5ccf88SSatish Balay     }
471bc5ccf88SSatish Balay   }
472bc5ccf88SSatish Balay   PetscFree(owner);
473bc5ccf88SSatish Balay   PetscFree(startv);
474a2d1c673SSatish Balay   /* This memory is reused in scatter end  for a different purpose*/
475a2d1c673SSatish Balay   for (i=0; i<2*size; i++ ) nprocs[i] = -1;
476a2d1c673SSatish Balay   stash->nprocs      = nprocs;
477a2d1c673SSatish Balay 
478bc5ccf88SSatish Balay   stash->svalues    = svalues;    stash->rvalues    = rvalues;
479bc5ccf88SSatish Balay   stash->nsends     = nsends;     stash->nrecvs     = nreceives;
480bc5ccf88SSatish Balay   stash->send_waits = send_waits; stash->recv_waits = recv_waits;
481bc5ccf88SSatish Balay   stash->rmax       = nmax;
482bc5ccf88SSatish Balay   PetscFunctionReturn(0);
483bc5ccf88SSatish Balay }
484bc5ccf88SSatish Balay 
485a2d1c673SSatish Balay /*
486*8798bf22SSatish Balay    MatStashScatterGetMesg_Private - This function waits on the receives posted
487*8798bf22SSatish Balay    in the function MatStashScatterBegin_Private() and returns one message at
4884c1ff481SSatish Balay    a time to the calling function. If no messages are left, it indicates this
4894c1ff481SSatish Balay    by setting flg = 0, else it sets flg = 1.
4904c1ff481SSatish Balay 
4914c1ff481SSatish Balay    Input Parameters:
4924c1ff481SSatish Balay    stash - the stash
4934c1ff481SSatish Balay 
4944c1ff481SSatish Balay    Output Parameters:
4954c1ff481SSatish Balay    nvals - the number of entries in the current message.
4964c1ff481SSatish Balay    rows  - an array of row indices (or blocked indices) corresponding to the values
4974c1ff481SSatish Balay    cols  - an array of columnindices (or blocked indices) corresponding to the values
4984c1ff481SSatish Balay    vals  - the values
4994c1ff481SSatish Balay    flg   - 0 indicates no more message left, and the current call has no values associated.
5004c1ff481SSatish Balay            1 indicates that the current call successfully received a message, and the
5014c1ff481SSatish Balay              other output parameters nvals,rows,cols,vals are set appropriately.
502a2d1c673SSatish Balay */
503bc5ccf88SSatish Balay #undef __FUNC__
504*8798bf22SSatish Balay #define __FUNC__ "MatStashScatterGetMesg_Private"
505*8798bf22SSatish Balay int MatStashScatterGetMesg_Private(MatStash *stash,int *nvals,int **rows,int** cols,Scalar **vals,int *flg)
506bc5ccf88SSatish Balay {
507a2d1c673SSatish Balay   int         i,ierr,size=stash->size,*flg_v,*flg_i;
508a2d1c673SSatish Balay   int         i1,i2,*rindices,match_found=0,bs2;
509a2d1c673SSatish Balay   MPI_Status  recv_status;
510bc5ccf88SSatish Balay 
511bc5ccf88SSatish Balay   PetscFunctionBegin;
512bc5ccf88SSatish Balay 
513a2d1c673SSatish Balay   *flg = 0; /* When a message is discovered this is reset to 1 */
514a2d1c673SSatish Balay   /* Return if no more messages to process */
515a2d1c673SSatish Balay   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
516a2d1c673SSatish Balay 
517a2d1c673SSatish Balay   flg_v = stash->nprocs;
518a2d1c673SSatish Balay   flg_i = flg_v + size;
5194c1ff481SSatish Balay   bs2   = stash->bs*stash->bs;
520a2d1c673SSatish Balay   /* If a matching pair of receieves are found, process them, and return the data to
521a2d1c673SSatish Balay      the calling function. Until then keep receiving messages */
522a2d1c673SSatish Balay   while (!match_found) {
523a2d1c673SSatish Balay     ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
524a2d1c673SSatish Balay     /* Now pack the received message into a structure which is useable by others */
525a2d1c673SSatish Balay     if (i % 2) {
526a2d1c673SSatish Balay       ierr = MPI_Get_count(&recv_status,MPI_INT,nvals);CHKERRQ(ierr);
527a2d1c673SSatish Balay       flg_i[recv_status.MPI_SOURCE] = i/2;
528a2d1c673SSatish Balay       *nvals = *nvals/2; /* This message has both row indices and col indices */
529a2d1c673SSatish Balay     } else {
530a2d1c673SSatish Balay       ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr);
531a2d1c673SSatish Balay       flg_v[recv_status.MPI_SOURCE] = i/2;
532a2d1c673SSatish Balay       *nvals = *nvals/bs2;
533bc5ccf88SSatish Balay     }
534a2d1c673SSatish Balay 
535a2d1c673SSatish Balay     /* Check if we have both the messages from this proc */
536a2d1c673SSatish Balay     i1 = flg_v[recv_status.MPI_SOURCE];
537a2d1c673SSatish Balay     i2 = flg_i[recv_status.MPI_SOURCE];
538a2d1c673SSatish Balay     if (i1 != -1 && i2 != -1) {
539a2d1c673SSatish Balay       rindices    = (int *) (stash->rvalues + bs2*stash->rmax*stash->nrecvs);
540a2d1c673SSatish Balay       *rows       = rindices + 2*i2*stash->rmax;
541a2d1c673SSatish Balay       *cols       = *rows + *nvals;
542a2d1c673SSatish Balay       *vals       = stash->rvalues + i1*bs2*stash->rmax;
543a2d1c673SSatish Balay       *flg        = 1;
544a2d1c673SSatish Balay       stash->nprocessed ++;
545a2d1c673SSatish Balay       match_found = 1;
546bc5ccf88SSatish Balay     }
547bc5ccf88SSatish Balay   }
548bc5ccf88SSatish Balay   PetscFunctionReturn(0);
549bc5ccf88SSatish Balay }
550