xref: /petsc/src/mat/utils/matstash.c (revision 6831982abb6453c2f3e25efecb5d0951d333371e)
1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER
2*6831982aSBarry Smith static char vcid[] = "$Id: matstash.c,v 1.35 1999/06/30 23:52:35 balay Exp bsmith $";
32d5177cdSBarry Smith #endif
42d5177cdSBarry Smith 
570f55243SBarry Smith #include "src/mat/matimpl.h"
69417f4adSLois Curfman McInnes 
7bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE   10000
84c1ff481SSatish Balay 
99417f4adSLois Curfman McInnes /*
108798bf22SSatish Balay   MatStashCreate_Private - Creates a stash ,currently used for all the parallel
114c1ff481SSatish Balay   matrix implementations. The stash is where elements of a matrix destined
124c1ff481SSatish Balay   to be stored on other processors are kept until matrix assembly is done.
139417f4adSLois Curfman McInnes 
144c1ff481SSatish Balay   This is a simple minded stash. Simply adds entries to end of stash.
154c1ff481SSatish Balay 
164c1ff481SSatish Balay   Input Parameters:
174c1ff481SSatish Balay   comm - communicator, required for scatters.
184c1ff481SSatish Balay   bs   - stash block size. used when stashing blocks of values
194c1ff481SSatish Balay 
204c1ff481SSatish Balay   Output Parameters:
214c1ff481SSatish Balay   stash    - the newly created stash
229417f4adSLois Curfman McInnes */
235615d1e5SSatish Balay #undef __FUNC__
248798bf22SSatish Balay #define __FUNC__ "MatStashCreate_Private"
258798bf22SSatish Balay int MatStashCreate_Private(MPI_Comm comm,int bs, MatStash *stash)
269417f4adSLois Curfman McInnes {
27434d7ff9SSatish Balay   int ierr,flg,max,*opt,nopt;
28bc5ccf88SSatish Balay 
293a40ed3dSBarry Smith   PetscFunctionBegin;
30bc5ccf88SSatish Balay   /* Require 2 tags, get the second using PetscCommGetNewTag() */
31bc5ccf88SSatish Balay   ierr = PetscCommDuplicate_Private(comm,&stash->comm,&stash->tag1);CHKERRQ(ierr);
32a2d1c673SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
33a2d1c673SSatish Balay   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
34a2d1c673SSatish Balay   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
35bc5ccf88SSatish Balay 
36434d7ff9SSatish Balay   nopt = stash->size;
37434d7ff9SSatish Balay   opt  = (int*) PetscMalloc(nopt*sizeof(int));CHKPTRQ(opt);
38434d7ff9SSatish Balay   ierr = OptionsGetIntArray(PETSC_NULL,"-vecstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
39434d7ff9SSatish Balay   if (flg) {
40434d7ff9SSatish Balay     if (nopt == 1)                max = opt[0];
41434d7ff9SSatish Balay     else if (nopt == stash->size) max = opt[stash->rank];
42434d7ff9SSatish Balay     else if (stash->rank < nopt)  max = opt[stash->rank];
43f4ab19daSSatish Balay     else                          max = 0; /* Use default */
44434d7ff9SSatish Balay     stash->umax = max;
45434d7ff9SSatish Balay   } else {
46434d7ff9SSatish Balay     stash->umax = 0;
47434d7ff9SSatish Balay   }
48606d414cSSatish Balay   ierr = PetscFree(opt);CHKERRQ(ierr);
494c1ff481SSatish Balay   if (bs <= 0) bs = 1;
50a2d1c673SSatish Balay 
514c1ff481SSatish Balay   stash->bs       = bs;
529417f4adSLois Curfman McInnes   stash->nmax     = 0;
53434d7ff9SSatish Balay   stash->oldnmax  = 0;
549417f4adSLois Curfman McInnes   stash->n        = 0;
554c1ff481SSatish Balay   stash->reallocs = -1;
569417f4adSLois Curfman McInnes   stash->idx      = 0;
579417f4adSLois Curfman McInnes   stash->idy      = 0;
58bc5ccf88SSatish Balay   stash->array    = 0;
599417f4adSLois Curfman McInnes 
60bc5ccf88SSatish Balay   stash->send_waits  = 0;
61bc5ccf88SSatish Balay   stash->recv_waits  = 0;
62a2d1c673SSatish Balay   stash->send_status = 0;
63bc5ccf88SSatish Balay   stash->nsends      = 0;
64bc5ccf88SSatish Balay   stash->nrecvs      = 0;
65bc5ccf88SSatish Balay   stash->svalues     = 0;
66bc5ccf88SSatish Balay   stash->rvalues     = 0;
67bc5ccf88SSatish Balay   stash->rmax        = 0;
68a2d1c673SSatish Balay   stash->nprocs      = 0;
69a2d1c673SSatish Balay   stash->nprocessed  = 0;
703a40ed3dSBarry Smith   PetscFunctionReturn(0);
719417f4adSLois Curfman McInnes }
729417f4adSLois Curfman McInnes 
734c1ff481SSatish Balay /*
748798bf22SSatish Balay    MatStashDestroy_Private - Destroy the stash
754c1ff481SSatish Balay */
765615d1e5SSatish Balay #undef __FUNC__
778798bf22SSatish Balay #define __FUNC__ "MatStashDestroy_Private"
788798bf22SSatish Balay int MatStashDestroy_Private(MatStash *stash)
799417f4adSLois Curfman McInnes {
80bc5ccf88SSatish Balay   int ierr;
81a2d1c673SSatish Balay 
82bc5ccf88SSatish Balay   PetscFunctionBegin;
83bc5ccf88SSatish Balay   ierr = PetscCommDestroy_Private(&stash->comm);CHKERRQ(ierr);
84606d414cSSatish Balay   if (stash->array) {
85606d414cSSatish Balay     ierr = PetscFree(stash->array);CHKERRQ(ierr);
86606d414cSSatish Balay     stash->array = 0;
87606d414cSSatish Balay   }
88bc5ccf88SSatish Balay   PetscFunctionReturn(0);
89bc5ccf88SSatish Balay }
90bc5ccf88SSatish Balay 
914c1ff481SSatish Balay /*
928798bf22SSatish Balay    MatStashScatterEnd_Private - This is called as the fial stage of
934c1ff481SSatish Balay    scatter. The final stages of messagepassing is done here, and
944c1ff481SSatish Balay    all the memory used for messagepassing is cleanedu up. This
954c1ff481SSatish Balay    routine also resets the stash, and deallocates the memory used
964c1ff481SSatish Balay    for the stash. It also keeps track of the current memory usage
974c1ff481SSatish Balay    so that the same value can be used the next time through.
984c1ff481SSatish Balay */
99bc5ccf88SSatish Balay #undef __FUNC__
1008798bf22SSatish Balay #define __FUNC__ "MatStashScatterEnd_Private"
1018798bf22SSatish Balay int MatStashScatterEnd_Private(MatStash *stash)
102bc5ccf88SSatish Balay {
103434d7ff9SSatish Balay   int         nsends=stash->nsends,ierr,bs2,oldnmax;
104a2d1c673SSatish Balay   MPI_Status  *send_status;
105a2d1c673SSatish Balay 
1063a40ed3dSBarry Smith   PetscFunctionBegin;
107a2d1c673SSatish Balay   /* wait on sends */
108a2d1c673SSatish Balay   if (nsends) {
109a2d1c673SSatish Balay     send_status = (MPI_Status *)PetscMalloc(2*nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
110a2d1c673SSatish Balay     ierr        = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
111606d414cSSatish Balay     ierr        = PetscFree(send_status);CHKERRQ(ierr);
112a2d1c673SSatish Balay   }
113a2d1c673SSatish Balay 
114c0c58ca7SSatish Balay   /* Now update nmaxold to be app 10% more than max n used, this way the
115434d7ff9SSatish Balay      wastage of space is reduced the next time this stash is used.
116434d7ff9SSatish Balay      Also update the oldmax, only if it increases */
11794b769a5SSatish Balay   bs2      = stash->bs*stash->bs;
1188a9378f0SSatish Balay   oldnmax  = ((int)(stash->n * 1.1) + 5)*bs2;
119434d7ff9SSatish Balay   if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
120434d7ff9SSatish Balay 
121d07ff455SSatish Balay   stash->nmax       = 0;
122d07ff455SSatish Balay   stash->n          = 0;
1234c1ff481SSatish Balay   stash->reallocs   = -1;
124bc5ccf88SSatish Balay   stash->rmax       = 0;
125a2d1c673SSatish Balay   stash->nprocessed = 0;
126bc5ccf88SSatish Balay 
127bc5ccf88SSatish Balay   if (stash->array) {
128606d414cSSatish Balay     ierr         = PetscFree(stash->array);CHKERRQ(ierr);
129bc5ccf88SSatish Balay     stash->array = 0;
130bc5ccf88SSatish Balay     stash->idx   = 0;
131bc5ccf88SSatish Balay     stash->idy   = 0;
132bc5ccf88SSatish Balay   }
133606d414cSSatish Balay   if (stash->send_waits) {
134606d414cSSatish Balay     ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
135606d414cSSatish Balay     stash->send_waits = 0;
136606d414cSSatish Balay   }
137606d414cSSatish Balay   if (stash->recv_waits) {
138606d414cSSatish Balay     ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
139606d414cSSatish Balay     stash->recv_waits = 0;
140606d414cSSatish Balay   }
141606d414cSSatish Balay   if (stash->svalues) {
142606d414cSSatish Balay     ierr = PetscFree(stash->svalues);CHKERRQ(ierr);
143606d414cSSatish Balay     stash->svalues = 0;
144606d414cSSatish Balay   }
145606d414cSSatish Balay   if (stash->rvalues) {
146606d414cSSatish Balay     ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
147606d414cSSatish Balay     stash->rvalues = 0;
148606d414cSSatish Balay   }
149606d414cSSatish Balay   if (stash->nprocs) {
150606d414cSSatish Balay     ierr = PetscFree(stash->nprocs);
151606d414cSSatish Balay     stash->nprocs = 0;
152606d414cSSatish Balay   }
153bc5ccf88SSatish Balay 
1543a40ed3dSBarry Smith   PetscFunctionReturn(0);
1559417f4adSLois Curfman McInnes }
1569417f4adSLois Curfman McInnes 
1574c1ff481SSatish Balay /*
1588798bf22SSatish Balay    MatStashGetInfo_Private - Gets the relavant statistics of the stash
1594c1ff481SSatish Balay 
1604c1ff481SSatish Balay    Input Parameters:
1614c1ff481SSatish Balay    stash    - the stash
16294b769a5SSatish Balay    nstash   - the size of the stash. Indicates the number of values stored.
1634c1ff481SSatish Balay    reallocs - the number of additional mallocs incurred.
1644c1ff481SSatish Balay 
1654c1ff481SSatish Balay */
1665615d1e5SSatish Balay #undef __FUNC__
1678798bf22SSatish Balay #define __FUNC__ "MatStashGetInfo_Private"
1688798bf22SSatish Balay int MatStashGetInfo_Private(MatStash *stash,int *nstash, int *reallocs)
16997530c3fSBarry Smith {
17094b769a5SSatish Balay   int bs2 = stash->bs*stash->bs;
17194b769a5SSatish Balay 
1723a40ed3dSBarry Smith   PetscFunctionBegin;
17394b769a5SSatish Balay   *nstash   = stash->n*bs2;
174434d7ff9SSatish Balay   if (stash->reallocs < 0) *reallocs = 0;
175434d7ff9SSatish Balay   else                     *reallocs = stash->reallocs;
176bc5ccf88SSatish Balay   PetscFunctionReturn(0);
177bc5ccf88SSatish Balay }
1784c1ff481SSatish Balay 
1794c1ff481SSatish Balay 
1804c1ff481SSatish Balay /*
1818798bf22SSatish Balay    MatStashSetInitialSize_Private - Sets the initial size of the stash
1824c1ff481SSatish Balay 
1834c1ff481SSatish Balay    Input Parameters:
1844c1ff481SSatish Balay    stash  - the stash
1854c1ff481SSatish Balay    max    - the value that is used as the max size of the stash.
1864c1ff481SSatish Balay             this value is used while allocating memory.
1874c1ff481SSatish Balay */
188bc5ccf88SSatish Balay #undef __FUNC__
1898798bf22SSatish Balay #define __FUNC__ "MatStashSetInitialSize_Private"
1908798bf22SSatish Balay int MatStashSetInitialSize_Private(MatStash *stash,int max)
191bc5ccf88SSatish Balay {
192bc5ccf88SSatish Balay   PetscFunctionBegin;
193434d7ff9SSatish Balay   stash->umax = max;
1943a40ed3dSBarry Smith   PetscFunctionReturn(0);
19597530c3fSBarry Smith }
19697530c3fSBarry Smith 
1978798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called
1984c1ff481SSatish Balay    when the space in the stash is not sufficient to add the new values
1994c1ff481SSatish Balay    being inserted into the stash.
2004c1ff481SSatish Balay 
2014c1ff481SSatish Balay    Input Parameters:
2024c1ff481SSatish Balay    stash - the stash
2034c1ff481SSatish Balay    incr  - the minimum increase requested
2044c1ff481SSatish Balay 
2054c1ff481SSatish Balay    Notes:
2064c1ff481SSatish Balay    This routine doubles the currently used memory.
2074c1ff481SSatish Balay  */
2085615d1e5SSatish Balay #undef __FUNC__
2098798bf22SSatish Balay #define __FUNC__ "MatStashExpand_Private"
2108798bf22SSatish Balay static int MatStashExpand_Private(MatStash *stash,int incr)
2119417f4adSLois Curfman McInnes {
212549d3d68SSatish Balay   int    *n_idx,*n_idy,newnmax,bs2,ierr;
213bc5ccf88SSatish Balay   Scalar *n_array;
2149417f4adSLois Curfman McInnes 
2153a40ed3dSBarry Smith   PetscFunctionBegin;
2169417f4adSLois Curfman McInnes   /* allocate a larger stash */
21794b769a5SSatish Balay   bs2     = stash->bs*stash->bs;
218c481ceb5SSatish Balay   if (!stash->oldnmax && !stash->nmax) { /* new stash */
219434d7ff9SSatish Balay     if (stash->umax)                  newnmax = stash->umax/bs2;
220434d7ff9SSatish Balay     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
221c481ceb5SSatish Balay   } else if (!stash->nmax) { /* resuing stash */
222434d7ff9SSatish Balay     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
223434d7ff9SSatish Balay     else                              newnmax = stash->oldnmax/bs2;
224434d7ff9SSatish Balay   } else                              newnmax = stash->nmax*2;
2254c1ff481SSatish Balay   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
226d07ff455SSatish Balay 
227a2d1c673SSatish Balay   n_array = (Scalar *)PetscMalloc((newnmax)*(2*sizeof(int)+bs2*sizeof(Scalar)));CHKPTRQ(n_array);
228a2d1c673SSatish Balay   n_idx   = (int *) (n_array + bs2*newnmax);
229d07ff455SSatish Balay   n_idy   = (int *) (n_idx + newnmax);
230549d3d68SSatish Balay   ierr = PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(Scalar));CHKERRQ(ierr);
231549d3d68SSatish Balay   ierr = PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(int));CHKERRQ(ierr);
232549d3d68SSatish Balay   ierr = PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(int));CHKERRQ(ierr);
233606d414cSSatish Balay   if (stash->array) {ierr = PetscFree(stash->array);CHKERRQ(ierr);}
234d07ff455SSatish Balay   stash->array   = n_array;
235d07ff455SSatish Balay   stash->idx     = n_idx;
236d07ff455SSatish Balay   stash->idy     = n_idy;
237d07ff455SSatish Balay   stash->nmax    = newnmax;
238bc5ccf88SSatish Balay   stash->reallocs++;
239bc5ccf88SSatish Balay   PetscFunctionReturn(0);
240bc5ccf88SSatish Balay }
241bc5ccf88SSatish Balay /*
2428798bf22SSatish Balay   MatStashValuesRow_Private - inserts values into the stash. This function
2434c1ff481SSatish Balay   expects the values to be roworiented. Multiple columns belong to the same row
2444c1ff481SSatish Balay   can be inserted with a single call to this function.
2454c1ff481SSatish Balay 
2464c1ff481SSatish Balay   Input Parameters:
2474c1ff481SSatish Balay   stash  - the stash
2484c1ff481SSatish Balay   row    - the global row correspoiding to the values
2494c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
2504c1ff481SSatish Balay   idxn   - the global column indices corresponding to each of the values.
2514c1ff481SSatish Balay   values - the values inserted
252bc5ccf88SSatish Balay */
253bc5ccf88SSatish Balay #undef __FUNC__
2548798bf22SSatish Balay #define __FUNC__ "MatStashValuesRow_Private"
2558798bf22SSatish Balay int MatStashValuesRow_Private(MatStash *stash,int row,int n, int *idxn,Scalar *values)
256bc5ccf88SSatish Balay {
257a2d1c673SSatish Balay   int    ierr,i;
258bc5ccf88SSatish Balay 
259bc5ccf88SSatish Balay   PetscFunctionBegin;
2604c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
2614c1ff481SSatish Balay   if ((stash->n + n) > stash->nmax) {
2628798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
2639417f4adSLois Curfman McInnes   }
2644c1ff481SSatish Balay   for ( i=0; i<n; i++ ) {
2659417f4adSLois Curfman McInnes     stash->idx[stash->n]   = row;
266a2d1c673SSatish Balay     stash->idy[stash->n]   = idxn[i];
267a2d1c673SSatish Balay     stash->array[stash->n] = values[i];
268a2d1c673SSatish Balay     stash->n++;
2699417f4adSLois Curfman McInnes   }
270a2d1c673SSatish Balay   PetscFunctionReturn(0);
271a2d1c673SSatish Balay }
2724c1ff481SSatish Balay /*
2738798bf22SSatish Balay   MatStashValuesCol_Private - inserts values into the stash. This function
2744c1ff481SSatish Balay   expects the values to be columnoriented. Multiple columns belong to the same row
2754c1ff481SSatish Balay   can be inserted with a single call to this function.
276a2d1c673SSatish Balay 
2774c1ff481SSatish Balay   Input Parameters:
2784c1ff481SSatish Balay   stash   - the stash
2794c1ff481SSatish Balay   row     - the global row correspoiding to the values
2804c1ff481SSatish Balay   n       - the number of elements inserted. All elements belong to the above row.
2814c1ff481SSatish Balay   idxn    - the global column indices corresponding to each of the values.
2824c1ff481SSatish Balay   values  - the values inserted
2834c1ff481SSatish Balay   stepval - the consecutive values are sepated by a distance of stepval.
2844c1ff481SSatish Balay             this happens because the input is columnoriented.
2854c1ff481SSatish Balay */
286a2d1c673SSatish Balay #undef __FUNC__
2878798bf22SSatish Balay #define __FUNC__ "MatStashValuesCol_Private"
2888798bf22SSatish Balay int MatStashValuesCol_Private(MatStash *stash,int row,int n, int *idxn,
2894c1ff481SSatish Balay                                       Scalar *values,int stepval)
290a2d1c673SSatish Balay {
2914c1ff481SSatish Balay   int    ierr,i;
292a2d1c673SSatish Balay 
2934c1ff481SSatish Balay   PetscFunctionBegin;
2944c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
2954c1ff481SSatish Balay   if ((stash->n + n) > stash->nmax) {
2968798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
2974c1ff481SSatish Balay   }
2984c1ff481SSatish Balay   for ( i=0; i<n; i++ ) {
2994c1ff481SSatish Balay     stash->idx[stash->n]   = row;
3004c1ff481SSatish Balay     stash->idy[stash->n]   = idxn[i];
3014c1ff481SSatish Balay     stash->array[stash->n] = values[i*stepval];
3024c1ff481SSatish Balay     stash->n++;
3034c1ff481SSatish Balay   }
3044c1ff481SSatish Balay   PetscFunctionReturn(0);
3054c1ff481SSatish Balay }
3064c1ff481SSatish Balay 
3074c1ff481SSatish Balay /*
3088798bf22SSatish Balay   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
3094c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3104c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3114c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3124c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3134c1ff481SSatish Balay 
3144c1ff481SSatish Balay   Input Parameters:
3154c1ff481SSatish Balay   stash  - the stash
3164c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3174c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3184c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3194c1ff481SSatish Balay            values. Each block is of size bs*bs.
3204c1ff481SSatish Balay   values - the values inserted
3214c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3224c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3234c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3244c1ff481SSatish Balay */
3254c1ff481SSatish Balay #undef __FUNC__
3268798bf22SSatish Balay #define __FUNC__ "MatStashValuesRowBlocked_Private"
3278798bf22SSatish Balay int MatStashValuesRowBlocked_Private(MatStash *stash,int row,int n,int *idxn,Scalar *values,
3284c1ff481SSatish Balay                                int rmax,int cmax,int idx)
3294c1ff481SSatish Balay {
3304c1ff481SSatish Balay   int    ierr,i,j,k,bs2,bs=stash->bs;
3314c1ff481SSatish Balay   Scalar *vals,*array;
332a2d1c673SSatish Balay 
333a2d1c673SSatish Balay   PetscFunctionBegin;
334a2d1c673SSatish Balay   bs2 = bs*bs;
3354c1ff481SSatish Balay   if ((stash->n+n) > stash->nmax) {
3368798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
337a2d1c673SSatish Balay   }
3384c1ff481SSatish Balay   for ( i=0; i<n; i++ ) {
339a2d1c673SSatish Balay     stash->idx[stash->n]   = row;
340a2d1c673SSatish Balay     stash->idy[stash->n] = idxn[i];
341a2d1c673SSatish Balay     /* Now copy over the block of values. Store the values column oriented.
342a2d1c673SSatish Balay        This enables inserting multiple blocks belonging to a row with a single
343a2d1c673SSatish Balay        funtion call */
344a2d1c673SSatish Balay     array = stash->array + bs2*stash->n;
345a2d1c673SSatish Balay     vals  = values + idx*bs2*n + bs*i;
346a2d1c673SSatish Balay     for ( j=0; j<bs; j++ ) {
347a2d1c673SSatish Balay       for ( k=0; k<bs; k++ ) {array[k*bs] = vals[k];}
348a2d1c673SSatish Balay       array += 1;
349a2d1c673SSatish Balay       vals  += cmax*bs;
350a2d1c673SSatish Balay     }
3514c1ff481SSatish Balay     stash->n++;
3524c1ff481SSatish Balay   }
3534c1ff481SSatish Balay   PetscFunctionReturn(0);
3544c1ff481SSatish Balay }
3554c1ff481SSatish Balay 
3564c1ff481SSatish Balay /*
3578798bf22SSatish Balay   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
3584c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3594c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3604c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3614c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3624c1ff481SSatish Balay 
3634c1ff481SSatish Balay   Input Parameters:
3644c1ff481SSatish Balay   stash  - the stash
3654c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3664c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3674c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3684c1ff481SSatish Balay            values. Each block is of size bs*bs.
3694c1ff481SSatish Balay   values - the values inserted
3704c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3714c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3724c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3734c1ff481SSatish Balay */
3744c1ff481SSatish Balay #undef __FUNC__
3758798bf22SSatish Balay #define __FUNC__ "MatStashValuesColBlocked_Private"
3768798bf22SSatish Balay int MatStashValuesColBlocked_Private(MatStash *stash,int row,int n,int *idxn,
3774c1ff481SSatish Balay                                              Scalar *values,int rmax,int cmax,int idx)
3784c1ff481SSatish Balay {
3794c1ff481SSatish Balay   int    ierr,i,j,k,bs2,bs=stash->bs;
3804c1ff481SSatish Balay   Scalar *vals,*array;
3814c1ff481SSatish Balay 
3824c1ff481SSatish Balay   PetscFunctionBegin;
3834c1ff481SSatish Balay   bs2 = bs*bs;
3844c1ff481SSatish Balay   if ((stash->n+n) > stash->nmax) {
3858798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
3864c1ff481SSatish Balay   }
3874c1ff481SSatish Balay   for ( i=0; i<n; i++ ) {
3884c1ff481SSatish Balay     stash->idx[stash->n]   = row;
3894c1ff481SSatish Balay     stash->idy[stash->n] = idxn[i];
3904c1ff481SSatish Balay     /* Now copy over the block of values. Store the values column oriented.
3914c1ff481SSatish Balay      This enables inserting multiple blocks belonging to a row with a single
3924c1ff481SSatish Balay      funtion call */
393a2d1c673SSatish Balay     array = stash->array + bs2*stash->n;
394a2d1c673SSatish Balay     vals  = values + idx*bs + bs2*rmax*i;
395a2d1c673SSatish Balay     for ( j=0; j<bs; j++ ) {
396a2d1c673SSatish Balay       for ( k=0; k<bs; k++ ) {array[k] = vals[k];}
397a2d1c673SSatish Balay       array += bs;
398a2d1c673SSatish Balay       vals  += rmax*bs;
399a2d1c673SSatish Balay     }
400a2d1c673SSatish Balay     stash->n++;
4019417f4adSLois Curfman McInnes   }
4023a40ed3dSBarry Smith   PetscFunctionReturn(0);
4039417f4adSLois Curfman McInnes }
4044c1ff481SSatish Balay /*
4058798bf22SSatish Balay   MatStashScatterBegin_Private - Initiates the transfer of values to the
4064c1ff481SSatish Balay   correct owners. This function goes through the stash, and check the
4074c1ff481SSatish Balay   owners of each stashed value, and sends the values off to the owner
4084c1ff481SSatish Balay   processors.
409bc5ccf88SSatish Balay 
4104c1ff481SSatish Balay   Input Parameters:
4114c1ff481SSatish Balay   stash  - the stash
4124c1ff481SSatish Balay   owners - an array of size 'no-of-procs' which gives the ownership range
4134c1ff481SSatish Balay            for each node.
4144c1ff481SSatish Balay 
4154c1ff481SSatish Balay   Notes: The 'owners' array in the cased of the blocked-stash has the
4164c1ff481SSatish Balay   ranges specified blocked global indices, and for the regular stash in
4174c1ff481SSatish Balay   the proper global indices.
4184c1ff481SSatish Balay */
419bc5ccf88SSatish Balay #undef __FUNC__
4208798bf22SSatish Balay #define __FUNC__ "MatStashScatterBegin_Private"
4218798bf22SSatish Balay int MatStashScatterBegin_Private(MatStash *stash,int *owners)
422bc5ccf88SSatish Balay {
423a2d1c673SSatish Balay   int         *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
424a2d1c673SSatish Balay   int         rank=stash->rank,size=stash->size,*nprocs,*procs,nsends,nreceives;
4254c1ff481SSatish Balay   int         nmax,*work,count,ierr,*sindices,*rindices,i,j,idx;
426a2d1c673SSatish Balay   Scalar      *rvalues,*svalues;
427bc5ccf88SSatish Balay   MPI_Comm    comm = stash->comm;
428bc5ccf88SSatish Balay   MPI_Request *send_waits,*recv_waits;
429bc5ccf88SSatish Balay 
430bc5ccf88SSatish Balay   PetscFunctionBegin;
431bc5ccf88SSatish Balay 
4324c1ff481SSatish Balay   bs2 = stash->bs*stash->bs;
433bc5ccf88SSatish Balay   /*  first count number of contributors to each processor */
434bc5ccf88SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
435549d3d68SSatish Balay   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
436549d3d68SSatish Balay   procs  = nprocs + size;
437bc5ccf88SSatish Balay   owner  = (int *) PetscMalloc( (stash->n+1)*sizeof(int) );CHKPTRQ(owner);
438a2d1c673SSatish Balay 
439bc5ccf88SSatish Balay   for ( i=0; i<stash->n; i++ ) {
440bc5ccf88SSatish Balay     idx = stash->idx[i];
441bc5ccf88SSatish Balay     for ( j=0; j<size; j++ ) {
4424c1ff481SSatish Balay       if (idx >= owners[j] && idx < owners[j+1]) {
443bc5ccf88SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
444bc5ccf88SSatish Balay       }
445bc5ccf88SSatish Balay     }
446bc5ccf88SSatish Balay   }
447bc5ccf88SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
448bc5ccf88SSatish Balay 
449bc5ccf88SSatish Balay   /* inform other processors of number of messages and max length*/
450*6831982aSBarry Smith   work      = (int *)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work);
451*6831982aSBarry Smith   ierr      = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
452bc5ccf88SSatish Balay   nmax      = work[rank];
453*6831982aSBarry Smith   nreceives = work[size+rank];
454606d414cSSatish Balay   ierr      = PetscFree(work);CHKERRQ(ierr);
455bc5ccf88SSatish Balay   /* post receives:
456bc5ccf88SSatish Balay      since we don't know how long each individual message is we
457bc5ccf88SSatish Balay      allocate the largest needed buffer for each receive. Potentially
458bc5ccf88SSatish Balay      this is a lot of wasted space.
459bc5ccf88SSatish Balay   */
460a2d1c673SSatish Balay   rvalues    = (Scalar *)PetscMalloc((nreceives+1)*(nmax+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(rvalues);
461a2d1c673SSatish Balay   rindices   = (int *) (rvalues + bs2*nreceives*nmax);
462a2d1c673SSatish Balay   recv_waits = (MPI_Request *)PetscMalloc((nreceives+1)*2*sizeof(MPI_Request));CHKPTRQ(recv_waits);
463bc5ccf88SSatish Balay   for ( i=0,count=0; i<nreceives; i++ ) {
464a2d1c673SSatish Balay     ierr = MPI_Irecv(rvalues+bs2*nmax*i,bs2*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag1,comm,
465bc5ccf88SSatish Balay                      recv_waits+count++);CHKERRQ(ierr);
466bc5ccf88SSatish Balay     ierr = MPI_Irecv(rindices+2*nmax*i,2*nmax,MPI_INT,MPI_ANY_SOURCE,tag2,comm,
467bc5ccf88SSatish Balay                      recv_waits+count++);CHKERRQ(ierr);
468bc5ccf88SSatish Balay   }
469bc5ccf88SSatish Balay 
470bc5ccf88SSatish Balay   /* do sends:
471bc5ccf88SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
472bc5ccf88SSatish Balay          the ith processor
473bc5ccf88SSatish Balay   */
474a2d1c673SSatish Balay   svalues    = (Scalar *)PetscMalloc((stash->n+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(svalues);
475a2d1c673SSatish Balay   sindices   = (int *) (svalues + bs2*stash->n);
476549d3d68SSatish Balay   send_waits = (MPI_Request *) PetscMalloc(2*(nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
477bc5ccf88SSatish Balay   startv     = (int *) PetscMalloc(2*size*sizeof(int) );CHKPTRQ(startv);
478bc5ccf88SSatish Balay   starti     = startv + size;
479a2d1c673SSatish Balay   /* use 2 sends the first with all_a, the next with all_i and all_j */
480bc5ccf88SSatish Balay   startv[0]  = 0; starti[0] = 0;
481bc5ccf88SSatish Balay   for ( i=1; i<size; i++ ) {
482bc5ccf88SSatish Balay     startv[i] = startv[i-1] + nprocs[i-1];
483bc5ccf88SSatish Balay     starti[i] = starti[i-1] + nprocs[i-1]*2;
484bc5ccf88SSatish Balay   }
485bc5ccf88SSatish Balay   for ( i=0; i<stash->n; i++ ) {
486bc5ccf88SSatish Balay     j = owner[i];
487a2d1c673SSatish Balay     if (bs2 == 1) {
488bc5ccf88SSatish Balay       svalues[startv[j]]              = stash->array[i];
489a2d1c673SSatish Balay     } else {
4904c1ff481SSatish Balay       int    k;
4914c1ff481SSatish Balay       Scalar *buf1,*buf2;
4924c1ff481SSatish Balay       buf1 = svalues+bs2*startv[j];
4934c1ff481SSatish Balay       buf2 = stash->array+bs2*i;
4944c1ff481SSatish Balay       for ( k=0; k<bs2; k++ ){ buf1[k] = buf2[k]; }
495a2d1c673SSatish Balay     }
496bc5ccf88SSatish Balay     sindices[starti[j]]             = stash->idx[i];
497bc5ccf88SSatish Balay     sindices[starti[j]+nprocs[j]]   = stash->idy[i];
498bc5ccf88SSatish Balay     startv[j]++;
499bc5ccf88SSatish Balay     starti[j]++;
500bc5ccf88SSatish Balay   }
501bc5ccf88SSatish Balay   startv[0] = 0;
502bc5ccf88SSatish Balay   for ( i=1; i<size; i++ ) { startv[i] = startv[i-1] + nprocs[i-1];}
503bc5ccf88SSatish Balay   for ( i=0,count=0; i<size; i++ ) {
504bc5ccf88SSatish Balay     if (procs[i]) {
505a2d1c673SSatish Balay       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nprocs[i],MPIU_SCALAR,i,tag1,comm,
506bc5ccf88SSatish Balay                        send_waits+count++);CHKERRQ(ierr);
507bc5ccf88SSatish Balay       ierr = MPI_Isend(sindices+2*startv[i],2*nprocs[i],MPI_INT,i,tag2,comm,
508bc5ccf88SSatish Balay                        send_waits+count++);CHKERRQ(ierr);
509bc5ccf88SSatish Balay     }
510bc5ccf88SSatish Balay   }
511606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
512606d414cSSatish Balay   ierr = PetscFree(startv);CHKERRQ(ierr);
513a2d1c673SSatish Balay   /* This memory is reused in scatter end  for a different purpose*/
514a2d1c673SSatish Balay   for (i=0; i<2*size; i++ ) nprocs[i] = -1;
515a2d1c673SSatish Balay   stash->nprocs      = nprocs;
516a2d1c673SSatish Balay 
517bc5ccf88SSatish Balay   stash->svalues    = svalues;    stash->rvalues    = rvalues;
518bc5ccf88SSatish Balay   stash->nsends     = nsends;     stash->nrecvs     = nreceives;
519bc5ccf88SSatish Balay   stash->send_waits = send_waits; stash->recv_waits = recv_waits;
520bc5ccf88SSatish Balay   stash->rmax       = nmax;
521bc5ccf88SSatish Balay   PetscFunctionReturn(0);
522bc5ccf88SSatish Balay }
523bc5ccf88SSatish Balay 
524a2d1c673SSatish Balay /*
5258798bf22SSatish Balay    MatStashScatterGetMesg_Private - This function waits on the receives posted
5268798bf22SSatish Balay    in the function MatStashScatterBegin_Private() and returns one message at
5274c1ff481SSatish Balay    a time to the calling function. If no messages are left, it indicates this
5284c1ff481SSatish Balay    by setting flg = 0, else it sets flg = 1.
5294c1ff481SSatish Balay 
5304c1ff481SSatish Balay    Input Parameters:
5314c1ff481SSatish Balay    stash - the stash
5324c1ff481SSatish Balay 
5334c1ff481SSatish Balay    Output Parameters:
5344c1ff481SSatish Balay    nvals - the number of entries in the current message.
5354c1ff481SSatish Balay    rows  - an array of row indices (or blocked indices) corresponding to the values
5364c1ff481SSatish Balay    cols  - an array of columnindices (or blocked indices) corresponding to the values
5374c1ff481SSatish Balay    vals  - the values
5384c1ff481SSatish Balay    flg   - 0 indicates no more message left, and the current call has no values associated.
5394c1ff481SSatish Balay            1 indicates that the current call successfully received a message, and the
5404c1ff481SSatish Balay              other output parameters nvals,rows,cols,vals are set appropriately.
541a2d1c673SSatish Balay */
542bc5ccf88SSatish Balay #undef __FUNC__
5438798bf22SSatish Balay #define __FUNC__ "MatStashScatterGetMesg_Private"
5448798bf22SSatish Balay int MatStashScatterGetMesg_Private(MatStash *stash,int *nvals,int **rows,int** cols,Scalar **vals,int *flg)
545bc5ccf88SSatish Balay {
546a2d1c673SSatish Balay   int         i,ierr,size=stash->size,*flg_v,*flg_i;
547a2d1c673SSatish Balay   int         i1,i2,*rindices,match_found=0,bs2;
548a2d1c673SSatish Balay   MPI_Status  recv_status;
549bc5ccf88SSatish Balay 
550bc5ccf88SSatish Balay   PetscFunctionBegin;
551bc5ccf88SSatish Balay 
552a2d1c673SSatish Balay   *flg = 0; /* When a message is discovered this is reset to 1 */
553a2d1c673SSatish Balay   /* Return if no more messages to process */
554a2d1c673SSatish Balay   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
555a2d1c673SSatish Balay 
556a2d1c673SSatish Balay   flg_v = stash->nprocs;
557a2d1c673SSatish Balay   flg_i = flg_v + size;
5584c1ff481SSatish Balay   bs2   = stash->bs*stash->bs;
559a2d1c673SSatish Balay   /* If a matching pair of receieves are found, process them, and return the data to
560a2d1c673SSatish Balay      the calling function. Until then keep receiving messages */
561a2d1c673SSatish Balay   while (!match_found) {
562a2d1c673SSatish Balay     ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
563a2d1c673SSatish Balay     /* Now pack the received message into a structure which is useable by others */
564a2d1c673SSatish Balay     if (i % 2) {
565a2d1c673SSatish Balay       ierr = MPI_Get_count(&recv_status,MPI_INT,nvals);CHKERRQ(ierr);
566a2d1c673SSatish Balay       flg_i[recv_status.MPI_SOURCE] = i/2;
567a2d1c673SSatish Balay       *nvals = *nvals/2; /* This message has both row indices and col indices */
568a2d1c673SSatish Balay     } else {
569a2d1c673SSatish Balay       ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr);
570a2d1c673SSatish Balay       flg_v[recv_status.MPI_SOURCE] = i/2;
571a2d1c673SSatish Balay       *nvals = *nvals/bs2;
572bc5ccf88SSatish Balay     }
573a2d1c673SSatish Balay 
574a2d1c673SSatish Balay     /* Check if we have both the messages from this proc */
575a2d1c673SSatish Balay     i1 = flg_v[recv_status.MPI_SOURCE];
576a2d1c673SSatish Balay     i2 = flg_i[recv_status.MPI_SOURCE];
577a2d1c673SSatish Balay     if (i1 != -1 && i2 != -1) {
578a2d1c673SSatish Balay       rindices    = (int *) (stash->rvalues + bs2*stash->rmax*stash->nrecvs);
579a2d1c673SSatish Balay       *rows       = rindices + 2*i2*stash->rmax;
580a2d1c673SSatish Balay       *cols       = *rows + *nvals;
581a2d1c673SSatish Balay       *vals       = stash->rvalues + i1*bs2*stash->rmax;
582a2d1c673SSatish Balay       *flg        = 1;
583a2d1c673SSatish Balay       stash->nprocessed ++;
584a2d1c673SSatish Balay       match_found = 1;
585bc5ccf88SSatish Balay     }
586bc5ccf88SSatish Balay   }
587bc5ccf88SSatish Balay   PetscFunctionReturn(0);
588bc5ccf88SSatish Balay }
589