xref: /petsc/src/mat/utils/matstash.c (revision 549d3d68a6ae470532d58d544870024f02ff2d7c)
1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER
2*549d3d68SSatish Balay static char vcid[] = "$Id: matstash.c,v 1.33 1999/03/19 22:42:58 balay Exp balay $";
32d5177cdSBarry Smith #endif
42d5177cdSBarry Smith 
570f55243SBarry Smith #include "src/mat/matimpl.h"
69417f4adSLois Curfman McInnes 
7bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE   10000
84c1ff481SSatish Balay 
99417f4adSLois Curfman McInnes /*
108798bf22SSatish Balay   MatStashCreate_Private - Creates a stash ,currently used for all the parallel
114c1ff481SSatish Balay   matrix implementations. The stash is where elements of a matrix destined
124c1ff481SSatish Balay   to be stored on other processors are kept until matrix assembly is done.
139417f4adSLois Curfman McInnes 
144c1ff481SSatish Balay   This is a simple minded stash. Simply adds entries to end of stash.
154c1ff481SSatish Balay 
164c1ff481SSatish Balay   Input Parameters:
174c1ff481SSatish Balay   comm - communicator, required for scatters.
184c1ff481SSatish Balay   bs   - stash block size. used when stashing blocks of values
194c1ff481SSatish Balay 
204c1ff481SSatish Balay   Output Parameters:
214c1ff481SSatish Balay   stash    - the newly created stash
229417f4adSLois Curfman McInnes */
235615d1e5SSatish Balay #undef __FUNC__
248798bf22SSatish Balay #define __FUNC__ "MatStashCreate_Private"
258798bf22SSatish Balay int MatStashCreate_Private(MPI_Comm comm,int bs, MatStash *stash)
269417f4adSLois Curfman McInnes {
27434d7ff9SSatish Balay   int ierr,flg,max,*opt,nopt;
28bc5ccf88SSatish Balay 
293a40ed3dSBarry Smith   PetscFunctionBegin;
30bc5ccf88SSatish Balay   /* Require 2 tags, get the second using PetscCommGetNewTag() */
31bc5ccf88SSatish Balay   ierr = PetscCommDuplicate_Private(comm,&stash->comm,&stash->tag1);CHKERRQ(ierr);
32a2d1c673SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
33a2d1c673SSatish Balay   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
34a2d1c673SSatish Balay   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
35bc5ccf88SSatish Balay 
36434d7ff9SSatish Balay   nopt = stash->size;
37434d7ff9SSatish Balay   opt  = (int*) PetscMalloc(nopt*sizeof(int));CHKPTRQ(opt);
38434d7ff9SSatish Balay   ierr = OptionsGetIntArray(PETSC_NULL,"-vecstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
39434d7ff9SSatish Balay   if (flg) {
40434d7ff9SSatish Balay     if (nopt == 1)                max = opt[0];
41434d7ff9SSatish Balay     else if (nopt == stash->size) max = opt[stash->rank];
42434d7ff9SSatish Balay     else if (stash->rank < nopt)  max = opt[stash->rank];
43f4ab19daSSatish Balay     else                          max = 0; /* Use default */
44434d7ff9SSatish Balay     stash->umax = max;
45434d7ff9SSatish Balay   } else {
46434d7ff9SSatish Balay     stash->umax = 0;
47434d7ff9SSatish Balay   }
48434d7ff9SSatish Balay   PetscFree(opt);
494c1ff481SSatish Balay   if (bs <= 0) bs = 1;
50a2d1c673SSatish Balay 
514c1ff481SSatish Balay   stash->bs       = bs;
529417f4adSLois Curfman McInnes   stash->nmax     = 0;
53434d7ff9SSatish Balay   stash->oldnmax  = 0;
549417f4adSLois Curfman McInnes   stash->n        = 0;
554c1ff481SSatish Balay   stash->reallocs = -1;
569417f4adSLois Curfman McInnes   stash->idx      = 0;
579417f4adSLois Curfman McInnes   stash->idy      = 0;
58bc5ccf88SSatish Balay   stash->array    = 0;
599417f4adSLois Curfman McInnes 
60bc5ccf88SSatish Balay   stash->send_waits  = 0;
61bc5ccf88SSatish Balay   stash->recv_waits  = 0;
62a2d1c673SSatish Balay   stash->send_status = 0;
63bc5ccf88SSatish Balay   stash->nsends      = 0;
64bc5ccf88SSatish Balay   stash->nrecvs      = 0;
65bc5ccf88SSatish Balay   stash->svalues     = 0;
66bc5ccf88SSatish Balay   stash->rvalues     = 0;
67bc5ccf88SSatish Balay   stash->rmax        = 0;
68a2d1c673SSatish Balay   stash->nprocs      = 0;
69a2d1c673SSatish Balay   stash->nprocessed  = 0;
703a40ed3dSBarry Smith   PetscFunctionReturn(0);
719417f4adSLois Curfman McInnes }
729417f4adSLois Curfman McInnes 
734c1ff481SSatish Balay /*
748798bf22SSatish Balay    MatStashDestroy_Private - Destroy the stash
754c1ff481SSatish Balay */
765615d1e5SSatish Balay #undef __FUNC__
778798bf22SSatish Balay #define __FUNC__ "MatStashDestroy_Private"
788798bf22SSatish Balay int MatStashDestroy_Private(MatStash *stash)
799417f4adSLois Curfman McInnes {
80bc5ccf88SSatish Balay   int ierr;
81a2d1c673SSatish Balay 
82bc5ccf88SSatish Balay   PetscFunctionBegin;
83bc5ccf88SSatish Balay   ierr = PetscCommDestroy_Private(&stash->comm);CHKERRQ(ierr);
84bc5ccf88SSatish Balay   if (stash->array) {PetscFree(stash->array); stash->array = 0;}
85bc5ccf88SSatish Balay   PetscFunctionReturn(0);
86bc5ccf88SSatish Balay }
87bc5ccf88SSatish Balay 
884c1ff481SSatish Balay /*
898798bf22SSatish Balay    MatStashScatterEnd_Private - This is called as the fial stage of
904c1ff481SSatish Balay    scatter. The final stages of messagepassing is done here, and
914c1ff481SSatish Balay    all the memory used for messagepassing is cleanedu up. This
924c1ff481SSatish Balay    routine also resets the stash, and deallocates the memory used
934c1ff481SSatish Balay    for the stash. It also keeps track of the current memory usage
944c1ff481SSatish Balay    so that the same value can be used the next time through.
954c1ff481SSatish Balay */
96bc5ccf88SSatish Balay #undef __FUNC__
978798bf22SSatish Balay #define __FUNC__ "MatStashScatterEnd_Private"
988798bf22SSatish Balay int MatStashScatterEnd_Private(MatStash *stash)
99bc5ccf88SSatish Balay {
100434d7ff9SSatish Balay   int         nsends=stash->nsends,ierr,bs2,oldnmax;
101a2d1c673SSatish Balay   MPI_Status  *send_status;
102a2d1c673SSatish Balay 
1033a40ed3dSBarry Smith   PetscFunctionBegin;
104a2d1c673SSatish Balay   /* wait on sends */
105a2d1c673SSatish Balay   if (nsends) {
106a2d1c673SSatish Balay     send_status = (MPI_Status *)PetscMalloc(2*nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
107a2d1c673SSatish Balay     ierr        = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
108a2d1c673SSatish Balay     PetscFree(send_status);
109a2d1c673SSatish Balay   }
110a2d1c673SSatish Balay 
111c0c58ca7SSatish Balay   /* Now update nmaxold to be app 10% more than max n used, this way the
112434d7ff9SSatish Balay      wastage of space is reduced the next time this stash is used.
113434d7ff9SSatish Balay      Also update the oldmax, only if it increases */
11494b769a5SSatish Balay   bs2      = stash->bs*stash->bs;
1158a9378f0SSatish Balay   oldnmax  = ((int)(stash->n * 1.1) + 5)*bs2;
116434d7ff9SSatish Balay   if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
117434d7ff9SSatish Balay 
118d07ff455SSatish Balay   stash->nmax       = 0;
119d07ff455SSatish Balay   stash->n          = 0;
1204c1ff481SSatish Balay   stash->reallocs   = -1;
121bc5ccf88SSatish Balay   stash->rmax       = 0;
122a2d1c673SSatish Balay   stash->nprocessed = 0;
123bc5ccf88SSatish Balay 
124bc5ccf88SSatish Balay   if (stash->array) {
125bc5ccf88SSatish Balay     PetscFree(stash->array);
126bc5ccf88SSatish Balay     stash->array = 0;
127bc5ccf88SSatish Balay     stash->idx   = 0;
128bc5ccf88SSatish Balay     stash->idy   = 0;
129bc5ccf88SSatish Balay   }
130bc5ccf88SSatish Balay   if (stash->send_waits)  {PetscFree(stash->send_waits);stash->send_waits = 0;}
131bc5ccf88SSatish Balay   if (stash->recv_waits)  {PetscFree(stash->recv_waits);stash->recv_waits = 0;}
132bc5ccf88SSatish Balay   if (stash->svalues)     {PetscFree(stash->svalues);stash->svalues = 0;}
133bc5ccf88SSatish Balay   if (stash->rvalues)     {PetscFree(stash->rvalues); stash->rvalues = 0;}
134a2d1c673SSatish Balay   if (stash->nprocs)      {PetscFree(stash->nprocs); stash->nprocs = 0;}
135bc5ccf88SSatish Balay 
1363a40ed3dSBarry Smith   PetscFunctionReturn(0);
1379417f4adSLois Curfman McInnes }
1389417f4adSLois Curfman McInnes 
1394c1ff481SSatish Balay /*
1408798bf22SSatish Balay    MatStashGetInfo_Private - Gets the relavant statistics of the stash
1414c1ff481SSatish Balay 
1424c1ff481SSatish Balay    Input Parameters:
1434c1ff481SSatish Balay    stash    - the stash
14494b769a5SSatish Balay    nstash   - the size of the stash. Indicates the number of values stored.
1454c1ff481SSatish Balay    reallocs - the number of additional mallocs incurred.
1464c1ff481SSatish Balay 
1474c1ff481SSatish Balay */
1485615d1e5SSatish Balay #undef __FUNC__
1498798bf22SSatish Balay #define __FUNC__ "MatStashGetInfo_Private"
1508798bf22SSatish Balay int MatStashGetInfo_Private(MatStash *stash,int *nstash, int *reallocs)
15197530c3fSBarry Smith {
15294b769a5SSatish Balay   int bs2 = stash->bs*stash->bs;
15394b769a5SSatish Balay 
1543a40ed3dSBarry Smith   PetscFunctionBegin;
15594b769a5SSatish Balay   *nstash   = stash->n*bs2;
156434d7ff9SSatish Balay   if (stash->reallocs < 0) *reallocs = 0;
157434d7ff9SSatish Balay   else                     *reallocs = stash->reallocs;
158bc5ccf88SSatish Balay   PetscFunctionReturn(0);
159bc5ccf88SSatish Balay }
1604c1ff481SSatish Balay 
1614c1ff481SSatish Balay 
1624c1ff481SSatish Balay /*
1638798bf22SSatish Balay    MatStashSetInitialSize_Private - Sets the initial size of the stash
1644c1ff481SSatish Balay 
1654c1ff481SSatish Balay    Input Parameters:
1664c1ff481SSatish Balay    stash  - the stash
1674c1ff481SSatish Balay    max    - the value that is used as the max size of the stash.
1684c1ff481SSatish Balay             this value is used while allocating memory.
1694c1ff481SSatish Balay */
170bc5ccf88SSatish Balay #undef __FUNC__
1718798bf22SSatish Balay #define __FUNC__ "MatStashSetInitialSize_Private"
1728798bf22SSatish Balay int MatStashSetInitialSize_Private(MatStash *stash,int max)
173bc5ccf88SSatish Balay {
174bc5ccf88SSatish Balay   PetscFunctionBegin;
175434d7ff9SSatish Balay   stash->umax = max;
1763a40ed3dSBarry Smith   PetscFunctionReturn(0);
17797530c3fSBarry Smith }
17897530c3fSBarry Smith 
1798798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called
1804c1ff481SSatish Balay    when the space in the stash is not sufficient to add the new values
1814c1ff481SSatish Balay    being inserted into the stash.
1824c1ff481SSatish Balay 
1834c1ff481SSatish Balay    Input Parameters:
1844c1ff481SSatish Balay    stash - the stash
1854c1ff481SSatish Balay    incr  - the minimum increase requested
1864c1ff481SSatish Balay 
1874c1ff481SSatish Balay    Notes:
1884c1ff481SSatish Balay    This routine doubles the currently used memory.
1894c1ff481SSatish Balay  */
1905615d1e5SSatish Balay #undef __FUNC__
1918798bf22SSatish Balay #define __FUNC__ "MatStashExpand_Private"
1928798bf22SSatish Balay static int MatStashExpand_Private(MatStash *stash,int incr)
1939417f4adSLois Curfman McInnes {
194*549d3d68SSatish Balay   int    *n_idx,*n_idy,newnmax,bs2,ierr;
195bc5ccf88SSatish Balay   Scalar *n_array;
1969417f4adSLois Curfman McInnes 
1973a40ed3dSBarry Smith   PetscFunctionBegin;
1989417f4adSLois Curfman McInnes   /* allocate a larger stash */
19994b769a5SSatish Balay   bs2     = stash->bs*stash->bs;
200c481ceb5SSatish Balay   if (!stash->oldnmax && !stash->nmax) { /* new stash */
201434d7ff9SSatish Balay     if (stash->umax)                  newnmax = stash->umax/bs2;
202434d7ff9SSatish Balay     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
203c481ceb5SSatish Balay   } else if (!stash->nmax) { /* resuing stash */
204434d7ff9SSatish Balay     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
205434d7ff9SSatish Balay     else                              newnmax = stash->oldnmax/bs2;
206434d7ff9SSatish Balay   } else                              newnmax = stash->nmax*2;
2074c1ff481SSatish Balay   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
208d07ff455SSatish Balay 
209a2d1c673SSatish Balay   n_array = (Scalar *)PetscMalloc((newnmax)*(2*sizeof(int)+bs2*sizeof(Scalar)));CHKPTRQ(n_array);
210a2d1c673SSatish Balay   n_idx   = (int *) (n_array + bs2*newnmax);
211d07ff455SSatish Balay   n_idy   = (int *) (n_idx + newnmax);
212*549d3d68SSatish Balay   ierr = PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(Scalar));CHKERRQ(ierr);
213*549d3d68SSatish Balay   ierr = PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(int));CHKERRQ(ierr);
214*549d3d68SSatish Balay   ierr = PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(int));CHKERRQ(ierr);
2150452661fSBarry Smith   if (stash->array) PetscFree(stash->array);
216d07ff455SSatish Balay   stash->array   = n_array;
217d07ff455SSatish Balay   stash->idx     = n_idx;
218d07ff455SSatish Balay   stash->idy     = n_idy;
219d07ff455SSatish Balay   stash->nmax    = newnmax;
220bc5ccf88SSatish Balay   stash->reallocs++;
221bc5ccf88SSatish Balay   PetscFunctionReturn(0);
222bc5ccf88SSatish Balay }
223bc5ccf88SSatish Balay /*
2248798bf22SSatish Balay   MatStashValuesRow_Private - inserts values into the stash. This function
2254c1ff481SSatish Balay   expects the values to be roworiented. Multiple columns belong to the same row
2264c1ff481SSatish Balay   can be inserted with a single call to this function.
2274c1ff481SSatish Balay 
2284c1ff481SSatish Balay   Input Parameters:
2294c1ff481SSatish Balay   stash  - the stash
2304c1ff481SSatish Balay   row    - the global row correspoiding to the values
2314c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
2324c1ff481SSatish Balay   idxn   - the global column indices corresponding to each of the values.
2334c1ff481SSatish Balay   values - the values inserted
234bc5ccf88SSatish Balay */
235bc5ccf88SSatish Balay #undef __FUNC__
2368798bf22SSatish Balay #define __FUNC__ "MatStashValuesRow_Private"
2378798bf22SSatish Balay int MatStashValuesRow_Private(MatStash *stash,int row,int n, int *idxn,Scalar *values)
238bc5ccf88SSatish Balay {
239a2d1c673SSatish Balay   int    ierr,i;
240bc5ccf88SSatish Balay 
241bc5ccf88SSatish Balay   PetscFunctionBegin;
2424c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
2434c1ff481SSatish Balay   if ((stash->n + n) > stash->nmax) {
2448798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
2459417f4adSLois Curfman McInnes   }
2464c1ff481SSatish Balay   for ( i=0; i<n; i++ ) {
2479417f4adSLois Curfman McInnes     stash->idx[stash->n]   = row;
248a2d1c673SSatish Balay     stash->idy[stash->n]   = idxn[i];
249a2d1c673SSatish Balay     stash->array[stash->n] = values[i];
250a2d1c673SSatish Balay     stash->n++;
2519417f4adSLois Curfman McInnes   }
252a2d1c673SSatish Balay   PetscFunctionReturn(0);
253a2d1c673SSatish Balay }
2544c1ff481SSatish Balay /*
2558798bf22SSatish Balay   MatStashValuesCol_Private - inserts values into the stash. This function
2564c1ff481SSatish Balay   expects the values to be columnoriented. Multiple columns belong to the same row
2574c1ff481SSatish Balay   can be inserted with a single call to this function.
258a2d1c673SSatish Balay 
2594c1ff481SSatish Balay   Input Parameters:
2604c1ff481SSatish Balay   stash   - the stash
2614c1ff481SSatish Balay   row     - the global row correspoiding to the values
2624c1ff481SSatish Balay   n       - the number of elements inserted. All elements belong to the above row.
2634c1ff481SSatish Balay   idxn    - the global column indices corresponding to each of the values.
2644c1ff481SSatish Balay   values  - the values inserted
2654c1ff481SSatish Balay   stepval - the consecutive values are sepated by a distance of stepval.
2664c1ff481SSatish Balay             this happens because the input is columnoriented.
2674c1ff481SSatish Balay */
268a2d1c673SSatish Balay #undef __FUNC__
2698798bf22SSatish Balay #define __FUNC__ "MatStashValuesCol_Private"
2708798bf22SSatish Balay int MatStashValuesCol_Private(MatStash *stash,int row,int n, int *idxn,
2714c1ff481SSatish Balay                                       Scalar *values,int stepval)
272a2d1c673SSatish Balay {
2734c1ff481SSatish Balay   int    ierr,i;
274a2d1c673SSatish Balay 
2754c1ff481SSatish Balay   PetscFunctionBegin;
2764c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
2774c1ff481SSatish Balay   if ((stash->n + n) > stash->nmax) {
2788798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
2794c1ff481SSatish Balay   }
2804c1ff481SSatish Balay   for ( i=0; i<n; i++ ) {
2814c1ff481SSatish Balay     stash->idx[stash->n]   = row;
2824c1ff481SSatish Balay     stash->idy[stash->n]   = idxn[i];
2834c1ff481SSatish Balay     stash->array[stash->n] = values[i*stepval];
2844c1ff481SSatish Balay     stash->n++;
2854c1ff481SSatish Balay   }
2864c1ff481SSatish Balay   PetscFunctionReturn(0);
2874c1ff481SSatish Balay }
2884c1ff481SSatish Balay 
2894c1ff481SSatish Balay /*
2908798bf22SSatish Balay   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
2914c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
2924c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
2934c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
2944c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
2954c1ff481SSatish Balay 
2964c1ff481SSatish Balay   Input Parameters:
2974c1ff481SSatish Balay   stash  - the stash
2984c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
2994c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3004c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3014c1ff481SSatish Balay            values. Each block is of size bs*bs.
3024c1ff481SSatish Balay   values - the values inserted
3034c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3044c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3054c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3064c1ff481SSatish Balay */
3074c1ff481SSatish Balay #undef __FUNC__
3088798bf22SSatish Balay #define __FUNC__ "MatStashValuesRowBlocked_Private"
3098798bf22SSatish Balay int MatStashValuesRowBlocked_Private(MatStash *stash,int row,int n,int *idxn,Scalar *values,
3104c1ff481SSatish Balay                                int rmax,int cmax,int idx)
3114c1ff481SSatish Balay {
3124c1ff481SSatish Balay   int    ierr,i,j,k,bs2,bs=stash->bs;
3134c1ff481SSatish Balay   Scalar *vals,*array;
314a2d1c673SSatish Balay 
315a2d1c673SSatish Balay   PetscFunctionBegin;
316a2d1c673SSatish Balay   bs2 = bs*bs;
3174c1ff481SSatish Balay   if ((stash->n+n) > stash->nmax) {
3188798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
319a2d1c673SSatish Balay   }
3204c1ff481SSatish Balay   for ( i=0; i<n; i++ ) {
321a2d1c673SSatish Balay     stash->idx[stash->n]   = row;
322a2d1c673SSatish Balay     stash->idy[stash->n] = idxn[i];
323a2d1c673SSatish Balay     /* Now copy over the block of values. Store the values column oriented.
324a2d1c673SSatish Balay        This enables inserting multiple blocks belonging to a row with a single
325a2d1c673SSatish Balay        funtion call */
326a2d1c673SSatish Balay     array = stash->array + bs2*stash->n;
327a2d1c673SSatish Balay     vals  = values + idx*bs2*n + bs*i;
328a2d1c673SSatish Balay     for ( j=0; j<bs; j++ ) {
329a2d1c673SSatish Balay       for ( k=0; k<bs; k++ ) {array[k*bs] = vals[k];}
330a2d1c673SSatish Balay       array += 1;
331a2d1c673SSatish Balay       vals  += cmax*bs;
332a2d1c673SSatish Balay     }
3334c1ff481SSatish Balay     stash->n++;
3344c1ff481SSatish Balay   }
3354c1ff481SSatish Balay   PetscFunctionReturn(0);
3364c1ff481SSatish Balay }
3374c1ff481SSatish Balay 
3384c1ff481SSatish Balay /*
3398798bf22SSatish Balay   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
3404c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3414c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3424c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3434c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3444c1ff481SSatish Balay 
3454c1ff481SSatish Balay   Input Parameters:
3464c1ff481SSatish Balay   stash  - the stash
3474c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3484c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3494c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3504c1ff481SSatish Balay            values. Each block is of size bs*bs.
3514c1ff481SSatish Balay   values - the values inserted
3524c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3534c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3544c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3554c1ff481SSatish Balay */
3564c1ff481SSatish Balay #undef __FUNC__
3578798bf22SSatish Balay #define __FUNC__ "MatStashValuesColBlocked_Private"
3588798bf22SSatish Balay int MatStashValuesColBlocked_Private(MatStash *stash,int row,int n,int *idxn,
3594c1ff481SSatish Balay                                              Scalar *values,int rmax,int cmax,int idx)
3604c1ff481SSatish Balay {
3614c1ff481SSatish Balay   int    ierr,i,j,k,bs2,bs=stash->bs;
3624c1ff481SSatish Balay   Scalar *vals,*array;
3634c1ff481SSatish Balay 
3644c1ff481SSatish Balay   PetscFunctionBegin;
3654c1ff481SSatish Balay   bs2 = bs*bs;
3664c1ff481SSatish Balay   if ((stash->n+n) > stash->nmax) {
3678798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
3684c1ff481SSatish Balay   }
3694c1ff481SSatish Balay   for ( i=0; i<n; i++ ) {
3704c1ff481SSatish Balay     stash->idx[stash->n]   = row;
3714c1ff481SSatish Balay     stash->idy[stash->n] = idxn[i];
3724c1ff481SSatish Balay     /* Now copy over the block of values. Store the values column oriented.
3734c1ff481SSatish Balay      This enables inserting multiple blocks belonging to a row with a single
3744c1ff481SSatish Balay      funtion call */
375a2d1c673SSatish Balay     array = stash->array + bs2*stash->n;
376a2d1c673SSatish Balay     vals  = values + idx*bs + bs2*rmax*i;
377a2d1c673SSatish Balay     for ( j=0; j<bs; j++ ) {
378a2d1c673SSatish Balay       for ( k=0; k<bs; k++ ) {array[k] = vals[k];}
379a2d1c673SSatish Balay       array += bs;
380a2d1c673SSatish Balay       vals  += rmax*bs;
381a2d1c673SSatish Balay     }
382a2d1c673SSatish Balay     stash->n++;
3839417f4adSLois Curfman McInnes   }
3843a40ed3dSBarry Smith   PetscFunctionReturn(0);
3859417f4adSLois Curfman McInnes }
3864c1ff481SSatish Balay /*
3878798bf22SSatish Balay   MatStashScatterBegin_Private - Initiates the transfer of values to the
3884c1ff481SSatish Balay   correct owners. This function goes through the stash, and check the
3894c1ff481SSatish Balay   owners of each stashed value, and sends the values off to the owner
3904c1ff481SSatish Balay   processors.
391bc5ccf88SSatish Balay 
3924c1ff481SSatish Balay   Input Parameters:
3934c1ff481SSatish Balay   stash  - the stash
3944c1ff481SSatish Balay   owners - an array of size 'no-of-procs' which gives the ownership range
3954c1ff481SSatish Balay            for each node.
3964c1ff481SSatish Balay 
3974c1ff481SSatish Balay   Notes: The 'owners' array in the cased of the blocked-stash has the
3984c1ff481SSatish Balay   ranges specified blocked global indices, and for the regular stash in
3994c1ff481SSatish Balay   the proper global indices.
4004c1ff481SSatish Balay */
401bc5ccf88SSatish Balay #undef __FUNC__
4028798bf22SSatish Balay #define __FUNC__ "MatStashScatterBegin_Private"
4038798bf22SSatish Balay int MatStashScatterBegin_Private(MatStash *stash,int *owners)
404bc5ccf88SSatish Balay {
405a2d1c673SSatish Balay   int         *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
406a2d1c673SSatish Balay   int         rank=stash->rank,size=stash->size,*nprocs,*procs,nsends,nreceives;
4074c1ff481SSatish Balay   int         nmax,*work,count,ierr,*sindices,*rindices,i,j,idx;
408a2d1c673SSatish Balay   Scalar      *rvalues,*svalues;
409bc5ccf88SSatish Balay   MPI_Comm    comm = stash->comm;
410bc5ccf88SSatish Balay   MPI_Request *send_waits,*recv_waits;
411bc5ccf88SSatish Balay 
412bc5ccf88SSatish Balay   PetscFunctionBegin;
413bc5ccf88SSatish Balay 
4144c1ff481SSatish Balay   bs2 = stash->bs*stash->bs;
415bc5ccf88SSatish Balay   /*  first count number of contributors to each processor */
416bc5ccf88SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
417*549d3d68SSatish Balay   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
418*549d3d68SSatish Balay   procs  = nprocs + size;
419bc5ccf88SSatish Balay   owner  = (int *) PetscMalloc( (stash->n+1)*sizeof(int) );CHKPTRQ(owner);
420a2d1c673SSatish Balay 
421bc5ccf88SSatish Balay   for ( i=0; i<stash->n; i++ ) {
422bc5ccf88SSatish Balay     idx = stash->idx[i];
423bc5ccf88SSatish Balay     for ( j=0; j<size; j++ ) {
4244c1ff481SSatish Balay       if (idx >= owners[j] && idx < owners[j+1]) {
425bc5ccf88SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
426bc5ccf88SSatish Balay       }
427bc5ccf88SSatish Balay     }
428bc5ccf88SSatish Balay   }
429bc5ccf88SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
430bc5ccf88SSatish Balay 
431bc5ccf88SSatish Balay   /* inform other processors of number of messages and max length*/
432bc5ccf88SSatish Balay   work = (int *)PetscMalloc(size*sizeof(int));CHKPTRQ(work);
433bc5ccf88SSatish Balay   ierr = MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
434bc5ccf88SSatish Balay   nreceives = work[rank];
435bc5ccf88SSatish Balay   ierr = MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
436bc5ccf88SSatish Balay   nmax = work[rank];
437bc5ccf88SSatish Balay   PetscFree(work);
438bc5ccf88SSatish Balay   /* post receives:
439bc5ccf88SSatish Balay      since we don't know how long each individual message is we
440bc5ccf88SSatish Balay      allocate the largest needed buffer for each receive. Potentially
441bc5ccf88SSatish Balay      this is a lot of wasted space.
442bc5ccf88SSatish Balay   */
443a2d1c673SSatish Balay   rvalues    = (Scalar *)PetscMalloc((nreceives+1)*(nmax+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(rvalues);
444a2d1c673SSatish Balay   rindices   = (int *) (rvalues + bs2*nreceives*nmax);
445a2d1c673SSatish Balay   recv_waits = (MPI_Request *)PetscMalloc((nreceives+1)*2*sizeof(MPI_Request));CHKPTRQ(recv_waits);
446bc5ccf88SSatish Balay   for ( i=0,count=0; i<nreceives; i++ ) {
447a2d1c673SSatish Balay     ierr = MPI_Irecv(rvalues+bs2*nmax*i,bs2*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag1,comm,
448bc5ccf88SSatish Balay                      recv_waits+count++);CHKERRQ(ierr);
449bc5ccf88SSatish Balay     ierr = MPI_Irecv(rindices+2*nmax*i,2*nmax,MPI_INT,MPI_ANY_SOURCE,tag2,comm,
450bc5ccf88SSatish Balay                      recv_waits+count++);CHKERRQ(ierr);
451bc5ccf88SSatish Balay   }
452bc5ccf88SSatish Balay 
453bc5ccf88SSatish Balay   /* do sends:
454bc5ccf88SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
455bc5ccf88SSatish Balay          the ith processor
456bc5ccf88SSatish Balay   */
457a2d1c673SSatish Balay   svalues    = (Scalar *)PetscMalloc((stash->n+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(svalues);
458a2d1c673SSatish Balay   sindices   = (int *) (svalues + bs2*stash->n);
459*549d3d68SSatish Balay   send_waits = (MPI_Request *) PetscMalloc(2*(nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
460bc5ccf88SSatish Balay   startv     = (int *) PetscMalloc(2*size*sizeof(int) );CHKPTRQ(startv);
461bc5ccf88SSatish Balay   starti     = startv + size;
462a2d1c673SSatish Balay   /* use 2 sends the first with all_a, the next with all_i and all_j */
463bc5ccf88SSatish Balay   startv[0]  = 0; starti[0] = 0;
464bc5ccf88SSatish Balay   for ( i=1; i<size; i++ ) {
465bc5ccf88SSatish Balay     startv[i] = startv[i-1] + nprocs[i-1];
466bc5ccf88SSatish Balay     starti[i] = starti[i-1] + nprocs[i-1]*2;
467bc5ccf88SSatish Balay   }
468bc5ccf88SSatish Balay   for ( i=0; i<stash->n; i++ ) {
469bc5ccf88SSatish Balay     j = owner[i];
470a2d1c673SSatish Balay     if (bs2 == 1) {
471bc5ccf88SSatish Balay       svalues[startv[j]]              = stash->array[i];
472a2d1c673SSatish Balay     } else {
4734c1ff481SSatish Balay       int    k;
4744c1ff481SSatish Balay       Scalar *buf1,*buf2;
4754c1ff481SSatish Balay       buf1 = svalues+bs2*startv[j];
4764c1ff481SSatish Balay       buf2 = stash->array+bs2*i;
4774c1ff481SSatish Balay       for ( k=0; k<bs2; k++ ){ buf1[k] = buf2[k]; }
478a2d1c673SSatish Balay     }
479bc5ccf88SSatish Balay     sindices[starti[j]]             = stash->idx[i];
480bc5ccf88SSatish Balay     sindices[starti[j]+nprocs[j]]   = stash->idy[i];
481bc5ccf88SSatish Balay     startv[j]++;
482bc5ccf88SSatish Balay     starti[j]++;
483bc5ccf88SSatish Balay   }
484bc5ccf88SSatish Balay   startv[0] = 0;
485bc5ccf88SSatish Balay   for ( i=1; i<size; i++ ) { startv[i] = startv[i-1] + nprocs[i-1];}
486bc5ccf88SSatish Balay   for ( i=0,count=0; i<size; i++ ) {
487bc5ccf88SSatish Balay     if (procs[i]) {
488a2d1c673SSatish Balay       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nprocs[i],MPIU_SCALAR,i,tag1,comm,
489bc5ccf88SSatish Balay                        send_waits+count++);CHKERRQ(ierr);
490bc5ccf88SSatish Balay       ierr = MPI_Isend(sindices+2*startv[i],2*nprocs[i],MPI_INT,i,tag2,comm,
491bc5ccf88SSatish Balay                        send_waits+count++);CHKERRQ(ierr);
492bc5ccf88SSatish Balay     }
493bc5ccf88SSatish Balay   }
494bc5ccf88SSatish Balay   PetscFree(owner);
495bc5ccf88SSatish Balay   PetscFree(startv);
496a2d1c673SSatish Balay   /* This memory is reused in scatter end  for a different purpose*/
497a2d1c673SSatish Balay   for (i=0; i<2*size; i++ ) nprocs[i] = -1;
498a2d1c673SSatish Balay   stash->nprocs      = nprocs;
499a2d1c673SSatish Balay 
500bc5ccf88SSatish Balay   stash->svalues    = svalues;    stash->rvalues    = rvalues;
501bc5ccf88SSatish Balay   stash->nsends     = nsends;     stash->nrecvs     = nreceives;
502bc5ccf88SSatish Balay   stash->send_waits = send_waits; stash->recv_waits = recv_waits;
503bc5ccf88SSatish Balay   stash->rmax       = nmax;
504bc5ccf88SSatish Balay   PetscFunctionReturn(0);
505bc5ccf88SSatish Balay }
506bc5ccf88SSatish Balay 
507a2d1c673SSatish Balay /*
5088798bf22SSatish Balay    MatStashScatterGetMesg_Private - This function waits on the receives posted
5098798bf22SSatish Balay    in the function MatStashScatterBegin_Private() and returns one message at
5104c1ff481SSatish Balay    a time to the calling function. If no messages are left, it indicates this
5114c1ff481SSatish Balay    by setting flg = 0, else it sets flg = 1.
5124c1ff481SSatish Balay 
5134c1ff481SSatish Balay    Input Parameters:
5144c1ff481SSatish Balay    stash - the stash
5154c1ff481SSatish Balay 
5164c1ff481SSatish Balay    Output Parameters:
5174c1ff481SSatish Balay    nvals - the number of entries in the current message.
5184c1ff481SSatish Balay    rows  - an array of row indices (or blocked indices) corresponding to the values
5194c1ff481SSatish Balay    cols  - an array of columnindices (or blocked indices) corresponding to the values
5204c1ff481SSatish Balay    vals  - the values
5214c1ff481SSatish Balay    flg   - 0 indicates no more message left, and the current call has no values associated.
5224c1ff481SSatish Balay            1 indicates that the current call successfully received a message, and the
5234c1ff481SSatish Balay              other output parameters nvals,rows,cols,vals are set appropriately.
524a2d1c673SSatish Balay */
525bc5ccf88SSatish Balay #undef __FUNC__
5268798bf22SSatish Balay #define __FUNC__ "MatStashScatterGetMesg_Private"
5278798bf22SSatish Balay int MatStashScatterGetMesg_Private(MatStash *stash,int *nvals,int **rows,int** cols,Scalar **vals,int *flg)
528bc5ccf88SSatish Balay {
529a2d1c673SSatish Balay   int         i,ierr,size=stash->size,*flg_v,*flg_i;
530a2d1c673SSatish Balay   int         i1,i2,*rindices,match_found=0,bs2;
531a2d1c673SSatish Balay   MPI_Status  recv_status;
532bc5ccf88SSatish Balay 
533bc5ccf88SSatish Balay   PetscFunctionBegin;
534bc5ccf88SSatish Balay 
535a2d1c673SSatish Balay   *flg = 0; /* When a message is discovered this is reset to 1 */
536a2d1c673SSatish Balay   /* Return if no more messages to process */
537a2d1c673SSatish Balay   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
538a2d1c673SSatish Balay 
539a2d1c673SSatish Balay   flg_v = stash->nprocs;
540a2d1c673SSatish Balay   flg_i = flg_v + size;
5414c1ff481SSatish Balay   bs2   = stash->bs*stash->bs;
542a2d1c673SSatish Balay   /* If a matching pair of receieves are found, process them, and return the data to
543a2d1c673SSatish Balay      the calling function. Until then keep receiving messages */
544a2d1c673SSatish Balay   while (!match_found) {
545a2d1c673SSatish Balay     ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
546a2d1c673SSatish Balay     /* Now pack the received message into a structure which is useable by others */
547a2d1c673SSatish Balay     if (i % 2) {
548a2d1c673SSatish Balay       ierr = MPI_Get_count(&recv_status,MPI_INT,nvals);CHKERRQ(ierr);
549a2d1c673SSatish Balay       flg_i[recv_status.MPI_SOURCE] = i/2;
550a2d1c673SSatish Balay       *nvals = *nvals/2; /* This message has both row indices and col indices */
551a2d1c673SSatish Balay     } else {
552a2d1c673SSatish Balay       ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr);
553a2d1c673SSatish Balay       flg_v[recv_status.MPI_SOURCE] = i/2;
554a2d1c673SSatish Balay       *nvals = *nvals/bs2;
555bc5ccf88SSatish Balay     }
556a2d1c673SSatish Balay 
557a2d1c673SSatish Balay     /* Check if we have both the messages from this proc */
558a2d1c673SSatish Balay     i1 = flg_v[recv_status.MPI_SOURCE];
559a2d1c673SSatish Balay     i2 = flg_i[recv_status.MPI_SOURCE];
560a2d1c673SSatish Balay     if (i1 != -1 && i2 != -1) {
561a2d1c673SSatish Balay       rindices    = (int *) (stash->rvalues + bs2*stash->rmax*stash->nrecvs);
562a2d1c673SSatish Balay       *rows       = rindices + 2*i2*stash->rmax;
563a2d1c673SSatish Balay       *cols       = *rows + *nvals;
564a2d1c673SSatish Balay       *vals       = stash->rvalues + i1*bs2*stash->rmax;
565a2d1c673SSatish Balay       *flg        = 1;
566a2d1c673SSatish Balay       stash->nprocessed ++;
567a2d1c673SSatish Balay       match_found = 1;
568bc5ccf88SSatish Balay     }
569bc5ccf88SSatish Balay   }
570bc5ccf88SSatish Balay   PetscFunctionReturn(0);
571bc5ccf88SSatish Balay }
572