xref: /petsc/src/mat/utils/matstash.c (revision 434d7ff956d884db00ea72a3c537582b7f9d5cf5)
1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER
2*434d7ff9SSatish Balay static char vcid[] = "$Id: matstash.c,v 1.28 1999/03/18 01:26:13 balay Exp balay $";
32d5177cdSBarry Smith #endif
42d5177cdSBarry Smith 
570f55243SBarry Smith #include "src/mat/matimpl.h"
69417f4adSLois Curfman McInnes 
7bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE   10000
84c1ff481SSatish Balay 
99417f4adSLois Curfman McInnes /*
108798bf22SSatish Balay   MatStashCreate_Private - Creates a stash ,currently used for all the parallel
114c1ff481SSatish Balay   matrix implementations. The stash is where elements of a matrix destined
124c1ff481SSatish Balay   to be stored on other processors are kept until matrix assembly is done.
139417f4adSLois Curfman McInnes 
144c1ff481SSatish Balay   This is a simple minded stash. Simply adds entries to end of stash.
154c1ff481SSatish Balay 
164c1ff481SSatish Balay   Input Parameters:
174c1ff481SSatish Balay   comm - communicator, required for scatters.
184c1ff481SSatish Balay   bs   - stash block size. used when stashing blocks of values
194c1ff481SSatish Balay 
204c1ff481SSatish Balay   Output Parameters:
214c1ff481SSatish Balay   stash    - the newly created stash
229417f4adSLois Curfman McInnes */
235615d1e5SSatish Balay #undef __FUNC__
248798bf22SSatish Balay #define __FUNC__ "MatStashCreate_Private"
258798bf22SSatish Balay int MatStashCreate_Private(MPI_Comm comm,int bs, MatStash *stash)
269417f4adSLois Curfman McInnes {
27*434d7ff9SSatish Balay   int ierr,flg,max,*opt,nopt;
28bc5ccf88SSatish Balay 
293a40ed3dSBarry Smith   PetscFunctionBegin;
30bc5ccf88SSatish Balay   /* Require 2 tags, get the second using PetscCommGetNewTag() */
31bc5ccf88SSatish Balay   ierr = PetscCommDuplicate_Private(comm,&stash->comm,&stash->tag1);CHKERRQ(ierr);
32a2d1c673SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2); CHKERRQ(ierr);
33a2d1c673SSatish Balay   ierr = MPI_Comm_size(stash->comm,&stash->size); CHKERRQ(ierr);
34a2d1c673SSatish Balay   ierr = MPI_Comm_rank(stash->comm,&stash->rank); CHKERRQ(ierr);
35bc5ccf88SSatish Balay 
36*434d7ff9SSatish Balay   nopt = stash->size;
37*434d7ff9SSatish Balay   opt  = (int*) PetscMalloc(nopt*sizeof(int)); CHKPTRQ(opt);
38*434d7ff9SSatish Balay   ierr = OptionsGetIntArray(PETSC_NULL,"-vecstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
39*434d7ff9SSatish Balay   if (flg) {
40*434d7ff9SSatish Balay     if (nopt == 1)                max = opt[0];
41*434d7ff9SSatish Balay     else if (nopt == stash->size) max = opt[stash->rank];
42*434d7ff9SSatish Balay     else if (stash->rank < nopt)  max = opt[stash->rank];
43*434d7ff9SSatish Balay     /* else use the default */
44*434d7ff9SSatish Balay     stash->umax = max;
45*434d7ff9SSatish Balay   } else {
46*434d7ff9SSatish Balay     stash->umax = 0;
47*434d7ff9SSatish Balay   }
48*434d7ff9SSatish Balay   PetscFree(opt);
494c1ff481SSatish Balay   if (bs <= 0) bs = 1;
50a2d1c673SSatish Balay 
514c1ff481SSatish Balay   stash->bs       = bs;
529417f4adSLois Curfman McInnes   stash->nmax     = 0;
53*434d7ff9SSatish Balay   stash->oldnmax  = 0;
549417f4adSLois Curfman McInnes   stash->n        = 0;
554c1ff481SSatish Balay   stash->reallocs = -1;
569417f4adSLois Curfman McInnes   stash->idx      = 0;
579417f4adSLois Curfman McInnes   stash->idy      = 0;
58bc5ccf88SSatish Balay   stash->array    = 0;
599417f4adSLois Curfman McInnes 
60bc5ccf88SSatish Balay   stash->send_waits  = 0;
61bc5ccf88SSatish Balay   stash->recv_waits  = 0;
62a2d1c673SSatish Balay   stash->send_status = 0;
63bc5ccf88SSatish Balay   stash->nsends      = 0;
64bc5ccf88SSatish Balay   stash->nrecvs      = 0;
65bc5ccf88SSatish Balay   stash->svalues     = 0;
66bc5ccf88SSatish Balay   stash->rvalues     = 0;
67bc5ccf88SSatish Balay   stash->rmax        = 0;
68a2d1c673SSatish Balay   stash->nprocs      = 0;
69a2d1c673SSatish Balay   stash->nprocessed  = 0;
703a40ed3dSBarry Smith   PetscFunctionReturn(0);
719417f4adSLois Curfman McInnes }
729417f4adSLois Curfman McInnes 
734c1ff481SSatish Balay /*
748798bf22SSatish Balay    MatStashDestroy_Private - Destroy the stash
754c1ff481SSatish Balay */
765615d1e5SSatish Balay #undef __FUNC__
778798bf22SSatish Balay #define __FUNC__ "MatStashDestroy_Private"
788798bf22SSatish Balay int MatStashDestroy_Private(MatStash *stash)
799417f4adSLois Curfman McInnes {
80bc5ccf88SSatish Balay   int ierr;
81a2d1c673SSatish Balay 
82bc5ccf88SSatish Balay   PetscFunctionBegin;
83bc5ccf88SSatish Balay   ierr = PetscCommDestroy_Private(&stash->comm); CHKERRQ(ierr);
84bc5ccf88SSatish Balay   if (stash->array) {PetscFree(stash->array); stash->array = 0;}
85bc5ccf88SSatish Balay   PetscFunctionReturn(0);
86bc5ccf88SSatish Balay }
87bc5ccf88SSatish Balay 
884c1ff481SSatish Balay /*
898798bf22SSatish Balay    MatStashScatterEnd_Private - This is called as the fial stage of
904c1ff481SSatish Balay    scatter. The final stages of messagepassing is done here, and
914c1ff481SSatish Balay    all the memory used for messagepassing is cleanedu up. This
924c1ff481SSatish Balay    routine also resets the stash, and deallocates the memory used
934c1ff481SSatish Balay    for the stash. It also keeps track of the current memory usage
944c1ff481SSatish Balay    so that the same value can be used the next time through.
954c1ff481SSatish Balay */
96bc5ccf88SSatish Balay #undef __FUNC__
978798bf22SSatish Balay #define __FUNC__ "MatStashScatterEnd_Private"
988798bf22SSatish Balay int MatStashScatterEnd_Private(MatStash *stash)
99bc5ccf88SSatish Balay {
100*434d7ff9SSatish Balay   int         nsends=stash->nsends,ierr,bs2,oldnmax;
101a2d1c673SSatish Balay   MPI_Status  *send_status;
102a2d1c673SSatish Balay 
1033a40ed3dSBarry Smith   PetscFunctionBegin;
104a2d1c673SSatish Balay   /* wait on sends */
105a2d1c673SSatish Balay   if (nsends) {
106a2d1c673SSatish Balay     send_status = (MPI_Status *)PetscMalloc(2*nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
107a2d1c673SSatish Balay     ierr        = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
108a2d1c673SSatish Balay     PetscFree(send_status);
109a2d1c673SSatish Balay   }
110a2d1c673SSatish Balay 
111c0c58ca7SSatish Balay   /* Now update nmaxold to be app 10% more than max n used, this way the
112*434d7ff9SSatish Balay      wastage of space is reduced the next time this stash is used.
113*434d7ff9SSatish Balay      Also update the oldmax, only if it increases */
11494b769a5SSatish Balay   bs2      = stash->bs*stash->bs;
115*434d7ff9SSatish Balay   oldnmax  = ((int)(stash->n * 1.1) + 5)*stash->bs;
116*434d7ff9SSatish Balay   if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
117*434d7ff9SSatish Balay 
118d07ff455SSatish Balay   stash->nmax       = 0;
119d07ff455SSatish Balay   stash->n          = 0;
1204c1ff481SSatish Balay   stash->reallocs   = -1;
121bc5ccf88SSatish Balay   stash->rmax       = 0;
122a2d1c673SSatish Balay   stash->nprocessed = 0;
123bc5ccf88SSatish Balay 
124bc5ccf88SSatish Balay   if (stash->array) {
125bc5ccf88SSatish Balay     PetscFree(stash->array);
126bc5ccf88SSatish Balay     stash->array = 0;
127bc5ccf88SSatish Balay     stash->idx   = 0;
128bc5ccf88SSatish Balay     stash->idy   = 0;
129bc5ccf88SSatish Balay   }
130bc5ccf88SSatish Balay   if (stash->send_waits)  {PetscFree(stash->send_waits);stash->send_waits = 0;}
131bc5ccf88SSatish Balay   if (stash->recv_waits)  {PetscFree(stash->recv_waits);stash->recv_waits = 0;}
132bc5ccf88SSatish Balay   if (stash->svalues)     {PetscFree(stash->svalues);stash->svalues = 0;}
133bc5ccf88SSatish Balay   if (stash->rvalues)     {PetscFree(stash->rvalues); stash->rvalues = 0;}
134a2d1c673SSatish Balay   if (stash->nprocs)      {PetscFree(stash->nprocs); stash->nprocs = 0;}
135bc5ccf88SSatish Balay 
1363a40ed3dSBarry Smith   PetscFunctionReturn(0);
1379417f4adSLois Curfman McInnes }
1389417f4adSLois Curfman McInnes 
1394c1ff481SSatish Balay /*
1408798bf22SSatish Balay    MatStashGetInfo_Private - Gets the relavant statistics of the stash
1414c1ff481SSatish Balay 
1424c1ff481SSatish Balay    Input Parameters:
1434c1ff481SSatish Balay    stash    - the stash
14494b769a5SSatish Balay    nstash   - the size of the stash. Indicates the number of values stored.
1454c1ff481SSatish Balay    reallocs - the number of additional mallocs incurred.
1464c1ff481SSatish Balay 
1474c1ff481SSatish Balay */
1485615d1e5SSatish Balay #undef __FUNC__
1498798bf22SSatish Balay #define __FUNC__ "MatStashGetInfo_Private"
1508798bf22SSatish Balay int MatStashGetInfo_Private(MatStash *stash,int *nstash, int *reallocs)
15197530c3fSBarry Smith {
15294b769a5SSatish Balay   int bs2 = stash->bs*stash->bs;
15394b769a5SSatish Balay 
1543a40ed3dSBarry Smith   PetscFunctionBegin;
15594b769a5SSatish Balay   *nstash   = stash->n*bs2;
156*434d7ff9SSatish Balay   if (stash->reallocs < 0) *reallocs = 0;
157*434d7ff9SSatish Balay   else                     *reallocs = stash->reallocs;
158bc5ccf88SSatish Balay   PetscFunctionReturn(0);
159bc5ccf88SSatish Balay }
1604c1ff481SSatish Balay 
1614c1ff481SSatish Balay 
1624c1ff481SSatish Balay /*
1638798bf22SSatish Balay    MatStashSetInitialSize_Private - Sets the initial size of the stash
1644c1ff481SSatish Balay 
1654c1ff481SSatish Balay    Input Parameters:
1664c1ff481SSatish Balay    stash  - the stash
1674c1ff481SSatish Balay    max    - the value that is used as the max size of the stash.
1684c1ff481SSatish Balay             this value is used while allocating memory.
1694c1ff481SSatish Balay */
170bc5ccf88SSatish Balay #undef __FUNC__
1718798bf22SSatish Balay #define __FUNC__ "MatStashSetInitialSize_Private"
1728798bf22SSatish Balay int MatStashSetInitialSize_Private(MatStash *stash,int max)
173bc5ccf88SSatish Balay {
174bc5ccf88SSatish Balay   PetscFunctionBegin;
175*434d7ff9SSatish Balay   stash->umax = max;
1763a40ed3dSBarry Smith   PetscFunctionReturn(0);
17797530c3fSBarry Smith }
17897530c3fSBarry Smith 
1798798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called
1804c1ff481SSatish Balay    when the space in the stash is not sufficient to add the new values
1814c1ff481SSatish Balay    being inserted into the stash.
1824c1ff481SSatish Balay 
1834c1ff481SSatish Balay    Input Parameters:
1844c1ff481SSatish Balay    stash - the stash
1854c1ff481SSatish Balay    incr  - the minimum increase requested
1864c1ff481SSatish Balay 
1874c1ff481SSatish Balay    Notes:
1884c1ff481SSatish Balay    This routine doubles the currently used memory.
1894c1ff481SSatish Balay  */
1905615d1e5SSatish Balay #undef __FUNC__
1918798bf22SSatish Balay #define __FUNC__ "MatStashExpand_Private"
1928798bf22SSatish Balay static int MatStashExpand_Private(MatStash *stash,int incr)
1939417f4adSLois Curfman McInnes {
194a2d1c673SSatish Balay   int    *n_idx,*n_idy,newnmax,bs2;
195bc5ccf88SSatish Balay   Scalar *n_array;
1969417f4adSLois Curfman McInnes 
1973a40ed3dSBarry Smith   PetscFunctionBegin;
1989417f4adSLois Curfman McInnes   /* allocate a larger stash */
19994b769a5SSatish Balay   bs2     = stash->bs*stash->bs;
200*434d7ff9SSatish Balay   if (stash->oldnmax == 0)  { /* new stash */
201*434d7ff9SSatish Balay     if (stash->umax)                  newnmax = stash->umax/bs2;
202*434d7ff9SSatish Balay     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
203*434d7ff9SSatish Balay   } else if (stash->nmax == 0) { /* resuing stash */
204*434d7ff9SSatish Balay     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
205*434d7ff9SSatish Balay     else                              newnmax = stash->oldnmax/bs2;
206*434d7ff9SSatish Balay   } else                              newnmax = stash->nmax*2;
2074c1ff481SSatish Balay   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
208d07ff455SSatish Balay 
209a2d1c673SSatish Balay   n_array = (Scalar *)PetscMalloc((newnmax)*(2*sizeof(int)+bs2*sizeof(Scalar)));CHKPTRQ(n_array);
210a2d1c673SSatish Balay   n_idx   = (int *) (n_array + bs2*newnmax);
211d07ff455SSatish Balay   n_idy   = (int *) (n_idx + newnmax);
212a2d1c673SSatish Balay   PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(Scalar));
213416022c9SBarry Smith   PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(int));
214416022c9SBarry Smith   PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(int));
2150452661fSBarry Smith   if (stash->array) PetscFree(stash->array);
216d07ff455SSatish Balay   stash->array   = n_array;
217d07ff455SSatish Balay   stash->idx     = n_idx;
218d07ff455SSatish Balay   stash->idy     = n_idy;
219d07ff455SSatish Balay   stash->nmax    = newnmax;
22094b769a5SSatish Balay   stash->oldnmax = newnmax*bs2;
221bc5ccf88SSatish Balay   stash->reallocs++;
222bc5ccf88SSatish Balay   PetscFunctionReturn(0);
223bc5ccf88SSatish Balay }
224bc5ccf88SSatish Balay /*
2258798bf22SSatish Balay   MatStashValuesRow_Private - inserts values into the stash. This function
2264c1ff481SSatish Balay   expects the values to be roworiented. Multiple columns belong to the same row
2274c1ff481SSatish Balay   can be inserted with a single call to this function.
2284c1ff481SSatish Balay 
2294c1ff481SSatish Balay   Input Parameters:
2304c1ff481SSatish Balay   stash  - the stash
2314c1ff481SSatish Balay   row    - the global row correspoiding to the values
2324c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
2334c1ff481SSatish Balay   idxn   - the global column indices corresponding to each of the values.
2344c1ff481SSatish Balay   values - the values inserted
235bc5ccf88SSatish Balay */
236bc5ccf88SSatish Balay #undef __FUNC__
2378798bf22SSatish Balay #define __FUNC__ "MatStashValuesRow_Private"
2388798bf22SSatish Balay int MatStashValuesRow_Private(MatStash *stash,int row,int n, int *idxn,Scalar *values)
239bc5ccf88SSatish Balay {
240a2d1c673SSatish Balay   int    ierr,i;
241bc5ccf88SSatish Balay 
242bc5ccf88SSatish Balay   PetscFunctionBegin;
2434c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
2444c1ff481SSatish Balay   if ((stash->n + n) > stash->nmax) {
2458798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n); CHKERRQ(ierr);
2469417f4adSLois Curfman McInnes   }
2474c1ff481SSatish Balay   for ( i=0; i<n; i++ ) {
2489417f4adSLois Curfman McInnes     stash->idx[stash->n]   = row;
249a2d1c673SSatish Balay     stash->idy[stash->n]   = idxn[i];
250a2d1c673SSatish Balay     stash->array[stash->n] = values[i];
251a2d1c673SSatish Balay     stash->n++;
2529417f4adSLois Curfman McInnes   }
253a2d1c673SSatish Balay   PetscFunctionReturn(0);
254a2d1c673SSatish Balay }
2554c1ff481SSatish Balay /*
2568798bf22SSatish Balay   MatStashValuesCol_Private - inserts values into the stash. This function
2574c1ff481SSatish Balay   expects the values to be columnoriented. Multiple columns belong to the same row
2584c1ff481SSatish Balay   can be inserted with a single call to this function.
259a2d1c673SSatish Balay 
2604c1ff481SSatish Balay   Input Parameters:
2614c1ff481SSatish Balay   stash   - the stash
2624c1ff481SSatish Balay   row     - the global row correspoiding to the values
2634c1ff481SSatish Balay   n       - the number of elements inserted. All elements belong to the above row.
2644c1ff481SSatish Balay   idxn    - the global column indices corresponding to each of the values.
2654c1ff481SSatish Balay   values  - the values inserted
2664c1ff481SSatish Balay   stepval - the consecutive values are sepated by a distance of stepval.
2674c1ff481SSatish Balay             this happens because the input is columnoriented.
2684c1ff481SSatish Balay */
269a2d1c673SSatish Balay #undef __FUNC__
2708798bf22SSatish Balay #define __FUNC__ "MatStashValuesCol_Private"
2718798bf22SSatish Balay int MatStashValuesCol_Private(MatStash *stash,int row,int n, int *idxn,
2724c1ff481SSatish Balay                                       Scalar *values,int stepval)
273a2d1c673SSatish Balay {
2744c1ff481SSatish Balay   int    ierr,i;
275a2d1c673SSatish Balay 
2764c1ff481SSatish Balay   PetscFunctionBegin;
2774c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
2784c1ff481SSatish Balay   if ((stash->n + n) > stash->nmax) {
2798798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n); CHKERRQ(ierr);
2804c1ff481SSatish Balay   }
2814c1ff481SSatish Balay   for ( i=0; i<n; i++ ) {
2824c1ff481SSatish Balay     stash->idx[stash->n]   = row;
2834c1ff481SSatish Balay     stash->idy[stash->n]   = idxn[i];
2844c1ff481SSatish Balay     stash->array[stash->n] = values[i*stepval];
2854c1ff481SSatish Balay     stash->n++;
2864c1ff481SSatish Balay   }
2874c1ff481SSatish Balay   PetscFunctionReturn(0);
2884c1ff481SSatish Balay }
2894c1ff481SSatish Balay 
2904c1ff481SSatish Balay /*
2918798bf22SSatish Balay   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
2924c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
2934c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
2944c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
2954c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
2964c1ff481SSatish Balay 
2974c1ff481SSatish Balay   Input Parameters:
2984c1ff481SSatish Balay   stash  - the stash
2994c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3004c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3014c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3024c1ff481SSatish Balay            values. Each block is of size bs*bs.
3034c1ff481SSatish Balay   values - the values inserted
3044c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3054c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3064c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3074c1ff481SSatish Balay */
3084c1ff481SSatish Balay #undef __FUNC__
3098798bf22SSatish Balay #define __FUNC__ "MatStashValuesRowBlocked_Private"
3108798bf22SSatish Balay int MatStashValuesRowBlocked_Private(MatStash *stash,int row,int n,int *idxn,Scalar *values,
3114c1ff481SSatish Balay                                int rmax,int cmax,int idx)
3124c1ff481SSatish Balay {
3134c1ff481SSatish Balay   int    ierr,i,j,k,bs2,bs=stash->bs;
3144c1ff481SSatish Balay   Scalar *vals,*array;
315a2d1c673SSatish Balay 
316a2d1c673SSatish Balay   PetscFunctionBegin;
317a2d1c673SSatish Balay   bs2 = bs*bs;
3184c1ff481SSatish Balay   if ((stash->n+n) > stash->nmax) {
3198798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n); CHKERRQ(ierr);
320a2d1c673SSatish Balay   }
3214c1ff481SSatish Balay   for ( i=0; i<n; i++ ) {
322a2d1c673SSatish Balay     stash->idx[stash->n]   = row;
323a2d1c673SSatish Balay     stash->idy[stash->n] = idxn[i];
324a2d1c673SSatish Balay     /* Now copy over the block of values. Store the values column oriented.
325a2d1c673SSatish Balay        This enables inserting multiple blocks belonging to a row with a single
326a2d1c673SSatish Balay        funtion call */
327a2d1c673SSatish Balay     array = stash->array + bs2*stash->n;
328a2d1c673SSatish Balay     vals  = values + idx*bs2*n + bs*i;
329a2d1c673SSatish Balay     for ( j=0; j<bs; j++ ) {
330a2d1c673SSatish Balay       for ( k=0; k<bs; k++ ) {array[k*bs] = vals[k];}
331a2d1c673SSatish Balay       array += 1;
332a2d1c673SSatish Balay       vals  += cmax*bs;
333a2d1c673SSatish Balay     }
3344c1ff481SSatish Balay     stash->n++;
3354c1ff481SSatish Balay   }
3364c1ff481SSatish Balay   PetscFunctionReturn(0);
3374c1ff481SSatish Balay }
3384c1ff481SSatish Balay 
3394c1ff481SSatish Balay /*
3408798bf22SSatish Balay   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
3414c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3424c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3434c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3444c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3454c1ff481SSatish Balay 
3464c1ff481SSatish Balay   Input Parameters:
3474c1ff481SSatish Balay   stash  - the stash
3484c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3494c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3504c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3514c1ff481SSatish Balay            values. Each block is of size bs*bs.
3524c1ff481SSatish Balay   values - the values inserted
3534c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3544c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3554c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3564c1ff481SSatish Balay */
3574c1ff481SSatish Balay #undef __FUNC__
3588798bf22SSatish Balay #define __FUNC__ "MatStashValuesColBlocked_Private"
3598798bf22SSatish Balay int MatStashValuesColBlocked_Private(MatStash *stash,int row,int n,int *idxn,
3604c1ff481SSatish Balay                                              Scalar *values,int rmax,int cmax,int idx)
3614c1ff481SSatish Balay {
3624c1ff481SSatish Balay   int    ierr,i,j,k,bs2,bs=stash->bs;
3634c1ff481SSatish Balay   Scalar *vals,*array;
3644c1ff481SSatish Balay 
3654c1ff481SSatish Balay   PetscFunctionBegin;
3664c1ff481SSatish Balay   bs2 = bs*bs;
3674c1ff481SSatish Balay   if ((stash->n+n) > stash->nmax) {
3688798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n); CHKERRQ(ierr);
3694c1ff481SSatish Balay   }
3704c1ff481SSatish Balay   for ( i=0; i<n; i++ ) {
3714c1ff481SSatish Balay     stash->idx[stash->n]   = row;
3724c1ff481SSatish Balay     stash->idy[stash->n] = idxn[i];
3734c1ff481SSatish Balay     /* Now copy over the block of values. Store the values column oriented.
3744c1ff481SSatish Balay      This enables inserting multiple blocks belonging to a row with a single
3754c1ff481SSatish Balay      funtion call */
376a2d1c673SSatish Balay     array = stash->array + bs2*stash->n;
377a2d1c673SSatish Balay     vals  = values + idx*bs + bs2*rmax*i;
378a2d1c673SSatish Balay     for ( j=0; j<bs; j++ ) {
379a2d1c673SSatish Balay       for ( k=0; k<bs; k++ ) {array[k] = vals[k];}
380a2d1c673SSatish Balay       array += bs;
381a2d1c673SSatish Balay       vals  += rmax*bs;
382a2d1c673SSatish Balay     }
383a2d1c673SSatish Balay     stash->n++;
3849417f4adSLois Curfman McInnes   }
3853a40ed3dSBarry Smith   PetscFunctionReturn(0);
3869417f4adSLois Curfman McInnes }
3874c1ff481SSatish Balay /*
3888798bf22SSatish Balay   MatStashScatterBegin_Private - Initiates the transfer of values to the
3894c1ff481SSatish Balay   correct owners. This function goes through the stash, and check the
3904c1ff481SSatish Balay   owners of each stashed value, and sends the values off to the owner
3914c1ff481SSatish Balay   processors.
392bc5ccf88SSatish Balay 
3934c1ff481SSatish Balay   Input Parameters:
3944c1ff481SSatish Balay   stash  - the stash
3954c1ff481SSatish Balay   owners - an array of size 'no-of-procs' which gives the ownership range
3964c1ff481SSatish Balay            for each node.
3974c1ff481SSatish Balay 
3984c1ff481SSatish Balay   Notes: The 'owners' array in the cased of the blocked-stash has the
3994c1ff481SSatish Balay   ranges specified blocked global indices, and for the regular stash in
4004c1ff481SSatish Balay   the proper global indices.
4014c1ff481SSatish Balay */
402bc5ccf88SSatish Balay #undef __FUNC__
4038798bf22SSatish Balay #define __FUNC__ "MatStashScatterBegin_Private"
4048798bf22SSatish Balay int MatStashScatterBegin_Private(MatStash *stash,int *owners)
405bc5ccf88SSatish Balay {
406a2d1c673SSatish Balay   int         *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
407a2d1c673SSatish Balay   int         rank=stash->rank,size=stash->size,*nprocs,*procs,nsends,nreceives;
4084c1ff481SSatish Balay   int         nmax,*work,count,ierr,*sindices,*rindices,i,j,idx;
409a2d1c673SSatish Balay   Scalar      *rvalues,*svalues;
410bc5ccf88SSatish Balay   MPI_Comm    comm = stash->comm;
411bc5ccf88SSatish Balay   MPI_Request *send_waits,*recv_waits;
412bc5ccf88SSatish Balay 
413bc5ccf88SSatish Balay   PetscFunctionBegin;
414bc5ccf88SSatish Balay 
4154c1ff481SSatish Balay   bs2 = stash->bs*stash->bs;
416bc5ccf88SSatish Balay   /*  first count number of contributors to each processor */
417bc5ccf88SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
418bc5ccf88SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
419bc5ccf88SSatish Balay   owner = (int *) PetscMalloc( (stash->n+1)*sizeof(int) ); CHKPTRQ(owner);
420a2d1c673SSatish Balay 
421bc5ccf88SSatish Balay   for ( i=0; i<stash->n; i++ ) {
422bc5ccf88SSatish Balay     idx = stash->idx[i];
423bc5ccf88SSatish Balay     for ( j=0; j<size; j++ ) {
4244c1ff481SSatish Balay       if (idx >= owners[j] && idx < owners[j+1]) {
425bc5ccf88SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
426bc5ccf88SSatish Balay       }
427bc5ccf88SSatish Balay     }
428bc5ccf88SSatish Balay   }
429bc5ccf88SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
430bc5ccf88SSatish Balay 
431bc5ccf88SSatish Balay   /* inform other processors of number of messages and max length*/
432bc5ccf88SSatish Balay   work = (int *)PetscMalloc(size*sizeof(int)); CHKPTRQ(work);
433bc5ccf88SSatish Balay   ierr = MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
434bc5ccf88SSatish Balay   nreceives = work[rank];
435bc5ccf88SSatish Balay   ierr = MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
436bc5ccf88SSatish Balay   nmax = work[rank];
437bc5ccf88SSatish Balay   PetscFree(work);
438bc5ccf88SSatish Balay   /* post receives:
439bc5ccf88SSatish Balay      since we don't know how long each individual message is we
440bc5ccf88SSatish Balay      allocate the largest needed buffer for each receive. Potentially
441bc5ccf88SSatish Balay      this is a lot of wasted space.
442bc5ccf88SSatish Balay   */
443a2d1c673SSatish Balay   rvalues    = (Scalar *)PetscMalloc((nreceives+1)*(nmax+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(rvalues);
444a2d1c673SSatish Balay   rindices   = (int *) (rvalues + bs2*nreceives*nmax);
445a2d1c673SSatish Balay   recv_waits = (MPI_Request *)PetscMalloc((nreceives+1)*2*sizeof(MPI_Request));CHKPTRQ(recv_waits);
446bc5ccf88SSatish Balay   for ( i=0,count=0; i<nreceives; i++ ) {
447a2d1c673SSatish Balay     ierr = MPI_Irecv(rvalues+bs2*nmax*i,bs2*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag1,comm,
448bc5ccf88SSatish Balay                      recv_waits+count++); CHKERRQ(ierr);
449bc5ccf88SSatish Balay     ierr = MPI_Irecv(rindices+2*nmax*i,2*nmax,MPI_INT,MPI_ANY_SOURCE,tag2,comm,
450bc5ccf88SSatish Balay                      recv_waits+count++); CHKERRQ(ierr);
451bc5ccf88SSatish Balay   }
452bc5ccf88SSatish Balay 
453bc5ccf88SSatish Balay   /* do sends:
454bc5ccf88SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
455bc5ccf88SSatish Balay          the ith processor
456bc5ccf88SSatish Balay   */
457a2d1c673SSatish Balay   svalues    = (Scalar *)PetscMalloc((stash->n+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(svalues);
458a2d1c673SSatish Balay   sindices   = (int *) (svalues + bs2*stash->n);
459bc5ccf88SSatish Balay   send_waits = (MPI_Request *) PetscMalloc(2*(nsends+1)*sizeof(MPI_Request));
460bc5ccf88SSatish Balay   CHKPTRQ(send_waits);
461bc5ccf88SSatish Balay   startv     = (int *) PetscMalloc(2*size*sizeof(int) ); CHKPTRQ(startv);
462bc5ccf88SSatish Balay   starti     = startv + size;
463a2d1c673SSatish Balay   /* use 2 sends the first with all_a, the next with all_i and all_j */
464bc5ccf88SSatish Balay   startv[0]  = 0; starti[0] = 0;
465bc5ccf88SSatish Balay   for ( i=1; i<size; i++ ) {
466bc5ccf88SSatish Balay     startv[i] = startv[i-1] + nprocs[i-1];
467bc5ccf88SSatish Balay     starti[i] = starti[i-1] + nprocs[i-1]*2;
468bc5ccf88SSatish Balay   }
469bc5ccf88SSatish Balay   for ( i=0; i<stash->n; i++ ) {
470bc5ccf88SSatish Balay     j = owner[i];
471a2d1c673SSatish Balay     if (bs2 == 1) {
472bc5ccf88SSatish Balay       svalues[startv[j]]              = stash->array[i];
473a2d1c673SSatish Balay     } else {
4744c1ff481SSatish Balay       int    k;
4754c1ff481SSatish Balay       Scalar *buf1,*buf2;
4764c1ff481SSatish Balay       buf1 = svalues+bs2*startv[j];
4774c1ff481SSatish Balay       buf2 = stash->array+bs2*i;
4784c1ff481SSatish Balay       for ( k=0; k<bs2; k++ ){ buf1[k] = buf2[k]; }
479a2d1c673SSatish Balay     }
480bc5ccf88SSatish Balay     sindices[starti[j]]             = stash->idx[i];
481bc5ccf88SSatish Balay     sindices[starti[j]+nprocs[j]]   = stash->idy[i];
482bc5ccf88SSatish Balay     startv[j]++;
483bc5ccf88SSatish Balay     starti[j]++;
484bc5ccf88SSatish Balay   }
485bc5ccf88SSatish Balay   startv[0] = 0;
486bc5ccf88SSatish Balay   for ( i=1; i<size; i++ ) { startv[i] = startv[i-1] + nprocs[i-1];}
487bc5ccf88SSatish Balay   for ( i=0,count=0; i<size; i++ ) {
488bc5ccf88SSatish Balay     if (procs[i]) {
489a2d1c673SSatish Balay       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nprocs[i],MPIU_SCALAR,i,tag1,comm,
490bc5ccf88SSatish Balay                        send_waits+count++);CHKERRQ(ierr);
491bc5ccf88SSatish Balay       ierr = MPI_Isend(sindices+2*startv[i],2*nprocs[i],MPI_INT,i,tag2,comm,
492bc5ccf88SSatish Balay                        send_waits+count++);CHKERRQ(ierr);
493bc5ccf88SSatish Balay     }
494bc5ccf88SSatish Balay   }
495bc5ccf88SSatish Balay   PetscFree(owner);
496bc5ccf88SSatish Balay   PetscFree(startv);
497a2d1c673SSatish Balay   /* This memory is reused in scatter end  for a different purpose*/
498a2d1c673SSatish Balay   for (i=0; i<2*size; i++ ) nprocs[i] = -1;
499a2d1c673SSatish Balay   stash->nprocs      = nprocs;
500a2d1c673SSatish Balay 
501bc5ccf88SSatish Balay   stash->svalues    = svalues;    stash->rvalues    = rvalues;
502bc5ccf88SSatish Balay   stash->nsends     = nsends;     stash->nrecvs     = nreceives;
503bc5ccf88SSatish Balay   stash->send_waits = send_waits; stash->recv_waits = recv_waits;
504bc5ccf88SSatish Balay   stash->rmax       = nmax;
505bc5ccf88SSatish Balay   PetscFunctionReturn(0);
506bc5ccf88SSatish Balay }
507bc5ccf88SSatish Balay 
508a2d1c673SSatish Balay /*
5098798bf22SSatish Balay    MatStashScatterGetMesg_Private - This function waits on the receives posted
5108798bf22SSatish Balay    in the function MatStashScatterBegin_Private() and returns one message at
5114c1ff481SSatish Balay    a time to the calling function. If no messages are left, it indicates this
5124c1ff481SSatish Balay    by setting flg = 0, else it sets flg = 1.
5134c1ff481SSatish Balay 
5144c1ff481SSatish Balay    Input Parameters:
5154c1ff481SSatish Balay    stash - the stash
5164c1ff481SSatish Balay 
5174c1ff481SSatish Balay    Output Parameters:
5184c1ff481SSatish Balay    nvals - the number of entries in the current message.
5194c1ff481SSatish Balay    rows  - an array of row indices (or blocked indices) corresponding to the values
5204c1ff481SSatish Balay    cols  - an array of columnindices (or blocked indices) corresponding to the values
5214c1ff481SSatish Balay    vals  - the values
5224c1ff481SSatish Balay    flg   - 0 indicates no more message left, and the current call has no values associated.
5234c1ff481SSatish Balay            1 indicates that the current call successfully received a message, and the
5244c1ff481SSatish Balay              other output parameters nvals,rows,cols,vals are set appropriately.
525a2d1c673SSatish Balay */
526bc5ccf88SSatish Balay #undef __FUNC__
5278798bf22SSatish Balay #define __FUNC__ "MatStashScatterGetMesg_Private"
5288798bf22SSatish Balay int MatStashScatterGetMesg_Private(MatStash *stash,int *nvals,int **rows,int** cols,Scalar **vals,int *flg)
529bc5ccf88SSatish Balay {
530a2d1c673SSatish Balay   int         i,ierr,size=stash->size,*flg_v,*flg_i;
531a2d1c673SSatish Balay   int         i1,i2,*rindices,match_found=0,bs2;
532a2d1c673SSatish Balay   MPI_Status  recv_status;
533bc5ccf88SSatish Balay 
534bc5ccf88SSatish Balay   PetscFunctionBegin;
535bc5ccf88SSatish Balay 
536a2d1c673SSatish Balay   *flg = 0; /* When a message is discovered this is reset to 1 */
537a2d1c673SSatish Balay   /* Return if no more messages to process */
538a2d1c673SSatish Balay   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
539a2d1c673SSatish Balay 
540a2d1c673SSatish Balay   flg_v = stash->nprocs;
541a2d1c673SSatish Balay   flg_i = flg_v + size;
5424c1ff481SSatish Balay   bs2   = stash->bs*stash->bs;
543a2d1c673SSatish Balay   /* If a matching pair of receieves are found, process them, and return the data to
544a2d1c673SSatish Balay      the calling function. Until then keep receiving messages */
545a2d1c673SSatish Balay   while (!match_found) {
546a2d1c673SSatish Balay     ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
547a2d1c673SSatish Balay     /* Now pack the received message into a structure which is useable by others */
548a2d1c673SSatish Balay     if (i % 2) {
549a2d1c673SSatish Balay       ierr = MPI_Get_count(&recv_status,MPI_INT,nvals);CHKERRQ(ierr);
550a2d1c673SSatish Balay       flg_i[recv_status.MPI_SOURCE] = i/2;
551a2d1c673SSatish Balay       *nvals = *nvals/2; /* This message has both row indices and col indices */
552a2d1c673SSatish Balay     } else {
553a2d1c673SSatish Balay       ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr);
554a2d1c673SSatish Balay       flg_v[recv_status.MPI_SOURCE] = i/2;
555a2d1c673SSatish Balay       *nvals = *nvals/bs2;
556bc5ccf88SSatish Balay     }
557a2d1c673SSatish Balay 
558a2d1c673SSatish Balay     /* Check if we have both the messages from this proc */
559a2d1c673SSatish Balay     i1 = flg_v[recv_status.MPI_SOURCE];
560a2d1c673SSatish Balay     i2 = flg_i[recv_status.MPI_SOURCE];
561a2d1c673SSatish Balay     if (i1 != -1 && i2 != -1) {
562a2d1c673SSatish Balay       rindices    = (int *) (stash->rvalues + bs2*stash->rmax*stash->nrecvs);
563a2d1c673SSatish Balay       *rows       = rindices + 2*i2*stash->rmax;
564a2d1c673SSatish Balay       *cols       = *rows + *nvals;
565a2d1c673SSatish Balay       *vals       = stash->rvalues + i1*bs2*stash->rmax;
566a2d1c673SSatish Balay       *flg        = 1;
567a2d1c673SSatish Balay       stash->nprocessed ++;
568a2d1c673SSatish Balay       match_found = 1;
569bc5ccf88SSatish Balay     }
570bc5ccf88SSatish Balay   }
571bc5ccf88SSatish Balay   PetscFunctionReturn(0);
572bc5ccf88SSatish Balay }
573