xref: /petsc/src/mat/utils/matstash.c (revision f1af5d2ffeae1f5fc391a89939f4818e47770ae3)
1*f1af5d2fSBarry Smith /*$Id: matstash.c,v 1.37 1999/10/24 14:02:51 bsmith Exp bsmith $*/
22d5177cdSBarry Smith 
370f55243SBarry Smith #include "src/mat/matimpl.h"
49417f4adSLois Curfman McInnes 
5bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE   10000
64c1ff481SSatish Balay 
79417f4adSLois Curfman McInnes /*
88798bf22SSatish Balay   MatStashCreate_Private - Creates a stash ,currently used for all the parallel
94c1ff481SSatish Balay   matrix implementations. The stash is where elements of a matrix destined
104c1ff481SSatish Balay   to be stored on other processors are kept until matrix assembly is done.
119417f4adSLois Curfman McInnes 
124c1ff481SSatish Balay   This is a simple minded stash. Simply adds entries to end of stash.
134c1ff481SSatish Balay 
144c1ff481SSatish Balay   Input Parameters:
154c1ff481SSatish Balay   comm - communicator, required for scatters.
164c1ff481SSatish Balay   bs   - stash block size. used when stashing blocks of values
174c1ff481SSatish Balay 
184c1ff481SSatish Balay   Output Parameters:
194c1ff481SSatish Balay   stash    - the newly created stash
209417f4adSLois Curfman McInnes */
215615d1e5SSatish Balay #undef __FUNC__
228798bf22SSatish Balay #define __FUNC__ "MatStashCreate_Private"
238798bf22SSatish Balay int MatStashCreate_Private(MPI_Comm comm,int bs, MatStash *stash)
249417f4adSLois Curfman McInnes {
25*f1af5d2fSBarry Smith   int        ierr,max,*opt,nopt;
26*f1af5d2fSBarry Smith   PetscTruth flg;
27bc5ccf88SSatish Balay 
283a40ed3dSBarry Smith   PetscFunctionBegin;
29bc5ccf88SSatish Balay   /* Require 2 tags, get the second using PetscCommGetNewTag() */
30bc5ccf88SSatish Balay   ierr = PetscCommDuplicate_Private(comm,&stash->comm,&stash->tag1);CHKERRQ(ierr);
31a2d1c673SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
32a2d1c673SSatish Balay   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
33a2d1c673SSatish Balay   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
34bc5ccf88SSatish Balay 
35434d7ff9SSatish Balay   nopt = stash->size;
36434d7ff9SSatish Balay   opt  = (int*) PetscMalloc(nopt*sizeof(int));CHKPTRQ(opt);
37434d7ff9SSatish Balay   ierr = OptionsGetIntArray(PETSC_NULL,"-vecstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
38434d7ff9SSatish Balay   if (flg) {
39434d7ff9SSatish Balay     if (nopt == 1)                max = opt[0];
40434d7ff9SSatish Balay     else if (nopt == stash->size) max = opt[stash->rank];
41434d7ff9SSatish Balay     else if (stash->rank < nopt)  max = opt[stash->rank];
42f4ab19daSSatish Balay     else                          max = 0; /* Use default */
43434d7ff9SSatish Balay     stash->umax = max;
44434d7ff9SSatish Balay   } else {
45434d7ff9SSatish Balay     stash->umax = 0;
46434d7ff9SSatish Balay   }
47606d414cSSatish Balay   ierr = PetscFree(opt);CHKERRQ(ierr);
484c1ff481SSatish Balay   if (bs <= 0) bs = 1;
49a2d1c673SSatish Balay 
504c1ff481SSatish Balay   stash->bs       = bs;
519417f4adSLois Curfman McInnes   stash->nmax     = 0;
52434d7ff9SSatish Balay   stash->oldnmax  = 0;
539417f4adSLois Curfman McInnes   stash->n        = 0;
544c1ff481SSatish Balay   stash->reallocs = -1;
559417f4adSLois Curfman McInnes   stash->idx      = 0;
569417f4adSLois Curfman McInnes   stash->idy      = 0;
57bc5ccf88SSatish Balay   stash->array    = 0;
589417f4adSLois Curfman McInnes 
59bc5ccf88SSatish Balay   stash->send_waits  = 0;
60bc5ccf88SSatish Balay   stash->recv_waits  = 0;
61a2d1c673SSatish Balay   stash->send_status = 0;
62bc5ccf88SSatish Balay   stash->nsends      = 0;
63bc5ccf88SSatish Balay   stash->nrecvs      = 0;
64bc5ccf88SSatish Balay   stash->svalues     = 0;
65bc5ccf88SSatish Balay   stash->rvalues     = 0;
66bc5ccf88SSatish Balay   stash->rmax        = 0;
67a2d1c673SSatish Balay   stash->nprocs      = 0;
68a2d1c673SSatish Balay   stash->nprocessed  = 0;
693a40ed3dSBarry Smith   PetscFunctionReturn(0);
709417f4adSLois Curfman McInnes }
719417f4adSLois Curfman McInnes 
724c1ff481SSatish Balay /*
738798bf22SSatish Balay    MatStashDestroy_Private - Destroy the stash
744c1ff481SSatish Balay */
755615d1e5SSatish Balay #undef __FUNC__
768798bf22SSatish Balay #define __FUNC__ "MatStashDestroy_Private"
778798bf22SSatish Balay int MatStashDestroy_Private(MatStash *stash)
789417f4adSLois Curfman McInnes {
79bc5ccf88SSatish Balay   int ierr;
80a2d1c673SSatish Balay 
81bc5ccf88SSatish Balay   PetscFunctionBegin;
82bc5ccf88SSatish Balay   ierr = PetscCommDestroy_Private(&stash->comm);CHKERRQ(ierr);
83606d414cSSatish Balay   if (stash->array) {
84606d414cSSatish Balay     ierr = PetscFree(stash->array);CHKERRQ(ierr);
85606d414cSSatish Balay     stash->array = 0;
86606d414cSSatish Balay   }
87bc5ccf88SSatish Balay   PetscFunctionReturn(0);
88bc5ccf88SSatish Balay }
89bc5ccf88SSatish Balay 
904c1ff481SSatish Balay /*
918798bf22SSatish Balay    MatStashScatterEnd_Private - This is called as the fial stage of
924c1ff481SSatish Balay    scatter. The final stages of messagepassing is done here, and
934c1ff481SSatish Balay    all the memory used for messagepassing is cleanedu up. This
944c1ff481SSatish Balay    routine also resets the stash, and deallocates the memory used
954c1ff481SSatish Balay    for the stash. It also keeps track of the current memory usage
964c1ff481SSatish Balay    so that the same value can be used the next time through.
974c1ff481SSatish Balay */
98bc5ccf88SSatish Balay #undef __FUNC__
998798bf22SSatish Balay #define __FUNC__ "MatStashScatterEnd_Private"
1008798bf22SSatish Balay int MatStashScatterEnd_Private(MatStash *stash)
101bc5ccf88SSatish Balay {
102434d7ff9SSatish Balay   int         nsends=stash->nsends,ierr,bs2,oldnmax;
103a2d1c673SSatish Balay   MPI_Status  *send_status;
104a2d1c673SSatish Balay 
1053a40ed3dSBarry Smith   PetscFunctionBegin;
106a2d1c673SSatish Balay   /* wait on sends */
107a2d1c673SSatish Balay   if (nsends) {
108a2d1c673SSatish Balay     send_status = (MPI_Status *)PetscMalloc(2*nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
109a2d1c673SSatish Balay     ierr        = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
110606d414cSSatish Balay     ierr        = PetscFree(send_status);CHKERRQ(ierr);
111a2d1c673SSatish Balay   }
112a2d1c673SSatish Balay 
113c0c58ca7SSatish Balay   /* Now update nmaxold to be app 10% more than max n used, this way the
114434d7ff9SSatish Balay      wastage of space is reduced the next time this stash is used.
115434d7ff9SSatish Balay      Also update the oldmax, only if it increases */
11694b769a5SSatish Balay   bs2      = stash->bs*stash->bs;
1178a9378f0SSatish Balay   oldnmax  = ((int)(stash->n * 1.1) + 5)*bs2;
118434d7ff9SSatish Balay   if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
119434d7ff9SSatish Balay 
120d07ff455SSatish Balay   stash->nmax       = 0;
121d07ff455SSatish Balay   stash->n          = 0;
1224c1ff481SSatish Balay   stash->reallocs   = -1;
123bc5ccf88SSatish Balay   stash->rmax       = 0;
124a2d1c673SSatish Balay   stash->nprocessed = 0;
125bc5ccf88SSatish Balay 
126bc5ccf88SSatish Balay   if (stash->array) {
127606d414cSSatish Balay     ierr         = PetscFree(stash->array);CHKERRQ(ierr);
128bc5ccf88SSatish Balay     stash->array = 0;
129bc5ccf88SSatish Balay     stash->idx   = 0;
130bc5ccf88SSatish Balay     stash->idy   = 0;
131bc5ccf88SSatish Balay   }
132606d414cSSatish Balay   if (stash->send_waits) {
133606d414cSSatish Balay     ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
134606d414cSSatish Balay     stash->send_waits = 0;
135606d414cSSatish Balay   }
136606d414cSSatish Balay   if (stash->recv_waits) {
137606d414cSSatish Balay     ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
138606d414cSSatish Balay     stash->recv_waits = 0;
139606d414cSSatish Balay   }
140606d414cSSatish Balay   if (stash->svalues) {
141606d414cSSatish Balay     ierr = PetscFree(stash->svalues);CHKERRQ(ierr);
142606d414cSSatish Balay     stash->svalues = 0;
143606d414cSSatish Balay   }
144606d414cSSatish Balay   if (stash->rvalues) {
145606d414cSSatish Balay     ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
146606d414cSSatish Balay     stash->rvalues = 0;
147606d414cSSatish Balay   }
148606d414cSSatish Balay   if (stash->nprocs) {
149606d414cSSatish Balay     ierr = PetscFree(stash->nprocs);
150606d414cSSatish Balay     stash->nprocs = 0;
151606d414cSSatish Balay   }
152bc5ccf88SSatish Balay 
1533a40ed3dSBarry Smith   PetscFunctionReturn(0);
1549417f4adSLois Curfman McInnes }
1559417f4adSLois Curfman McInnes 
1564c1ff481SSatish Balay /*
1578798bf22SSatish Balay    MatStashGetInfo_Private - Gets the relavant statistics of the stash
1584c1ff481SSatish Balay 
1594c1ff481SSatish Balay    Input Parameters:
1604c1ff481SSatish Balay    stash    - the stash
16194b769a5SSatish Balay    nstash   - the size of the stash. Indicates the number of values stored.
1624c1ff481SSatish Balay    reallocs - the number of additional mallocs incurred.
1634c1ff481SSatish Balay 
1644c1ff481SSatish Balay */
1655615d1e5SSatish Balay #undef __FUNC__
1668798bf22SSatish Balay #define __FUNC__ "MatStashGetInfo_Private"
1678798bf22SSatish Balay int MatStashGetInfo_Private(MatStash *stash,int *nstash, int *reallocs)
16897530c3fSBarry Smith {
16994b769a5SSatish Balay   int bs2 = stash->bs*stash->bs;
17094b769a5SSatish Balay 
1713a40ed3dSBarry Smith   PetscFunctionBegin;
17294b769a5SSatish Balay   *nstash   = stash->n*bs2;
173434d7ff9SSatish Balay   if (stash->reallocs < 0) *reallocs = 0;
174434d7ff9SSatish Balay   else                     *reallocs = stash->reallocs;
175bc5ccf88SSatish Balay   PetscFunctionReturn(0);
176bc5ccf88SSatish Balay }
1774c1ff481SSatish Balay 
1784c1ff481SSatish Balay 
1794c1ff481SSatish Balay /*
1808798bf22SSatish Balay    MatStashSetInitialSize_Private - Sets the initial size of the stash
1814c1ff481SSatish Balay 
1824c1ff481SSatish Balay    Input Parameters:
1834c1ff481SSatish Balay    stash  - the stash
1844c1ff481SSatish Balay    max    - the value that is used as the max size of the stash.
1854c1ff481SSatish Balay             this value is used while allocating memory.
1864c1ff481SSatish Balay */
187bc5ccf88SSatish Balay #undef __FUNC__
1888798bf22SSatish Balay #define __FUNC__ "MatStashSetInitialSize_Private"
1898798bf22SSatish Balay int MatStashSetInitialSize_Private(MatStash *stash,int max)
190bc5ccf88SSatish Balay {
191bc5ccf88SSatish Balay   PetscFunctionBegin;
192434d7ff9SSatish Balay   stash->umax = max;
1933a40ed3dSBarry Smith   PetscFunctionReturn(0);
19497530c3fSBarry Smith }
19597530c3fSBarry Smith 
1968798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called
1974c1ff481SSatish Balay    when the space in the stash is not sufficient to add the new values
1984c1ff481SSatish Balay    being inserted into the stash.
1994c1ff481SSatish Balay 
2004c1ff481SSatish Balay    Input Parameters:
2014c1ff481SSatish Balay    stash - the stash
2024c1ff481SSatish Balay    incr  - the minimum increase requested
2034c1ff481SSatish Balay 
2044c1ff481SSatish Balay    Notes:
2054c1ff481SSatish Balay    This routine doubles the currently used memory.
2064c1ff481SSatish Balay  */
2075615d1e5SSatish Balay #undef __FUNC__
2088798bf22SSatish Balay #define __FUNC__ "MatStashExpand_Private"
2098798bf22SSatish Balay static int MatStashExpand_Private(MatStash *stash,int incr)
2109417f4adSLois Curfman McInnes {
211549d3d68SSatish Balay   int    *n_idx,*n_idy,newnmax,bs2,ierr;
212bc5ccf88SSatish Balay   Scalar *n_array;
2139417f4adSLois Curfman McInnes 
2143a40ed3dSBarry Smith   PetscFunctionBegin;
2159417f4adSLois Curfman McInnes   /* allocate a larger stash */
21694b769a5SSatish Balay   bs2     = stash->bs*stash->bs;
217c481ceb5SSatish Balay   if (!stash->oldnmax && !stash->nmax) { /* new stash */
218434d7ff9SSatish Balay     if (stash->umax)                  newnmax = stash->umax/bs2;
219434d7ff9SSatish Balay     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
220c481ceb5SSatish Balay   } else if (!stash->nmax) { /* resuing stash */
221434d7ff9SSatish Balay     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
222434d7ff9SSatish Balay     else                              newnmax = stash->oldnmax/bs2;
223434d7ff9SSatish Balay   } else                              newnmax = stash->nmax*2;
2244c1ff481SSatish Balay   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
225d07ff455SSatish Balay 
226a2d1c673SSatish Balay   n_array = (Scalar *)PetscMalloc((newnmax)*(2*sizeof(int)+bs2*sizeof(Scalar)));CHKPTRQ(n_array);
227a2d1c673SSatish Balay   n_idx   = (int *) (n_array + bs2*newnmax);
228d07ff455SSatish Balay   n_idy   = (int *) (n_idx + newnmax);
229549d3d68SSatish Balay   ierr = PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(Scalar));CHKERRQ(ierr);
230549d3d68SSatish Balay   ierr = PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(int));CHKERRQ(ierr);
231549d3d68SSatish Balay   ierr = PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(int));CHKERRQ(ierr);
232606d414cSSatish Balay   if (stash->array) {ierr = PetscFree(stash->array);CHKERRQ(ierr);}
233d07ff455SSatish Balay   stash->array   = n_array;
234d07ff455SSatish Balay   stash->idx     = n_idx;
235d07ff455SSatish Balay   stash->idy     = n_idy;
236d07ff455SSatish Balay   stash->nmax    = newnmax;
237bc5ccf88SSatish Balay   stash->reallocs++;
238bc5ccf88SSatish Balay   PetscFunctionReturn(0);
239bc5ccf88SSatish Balay }
240bc5ccf88SSatish Balay /*
2418798bf22SSatish Balay   MatStashValuesRow_Private - inserts values into the stash. This function
2424c1ff481SSatish Balay   expects the values to be roworiented. Multiple columns belong to the same row
2434c1ff481SSatish Balay   can be inserted with a single call to this function.
2444c1ff481SSatish Balay 
2454c1ff481SSatish Balay   Input Parameters:
2464c1ff481SSatish Balay   stash  - the stash
2474c1ff481SSatish Balay   row    - the global row correspoiding to the values
2484c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
2494c1ff481SSatish Balay   idxn   - the global column indices corresponding to each of the values.
2504c1ff481SSatish Balay   values - the values inserted
251bc5ccf88SSatish Balay */
252bc5ccf88SSatish Balay #undef __FUNC__
2538798bf22SSatish Balay #define __FUNC__ "MatStashValuesRow_Private"
2548798bf22SSatish Balay int MatStashValuesRow_Private(MatStash *stash,int row,int n, int *idxn,Scalar *values)
255bc5ccf88SSatish Balay {
256a2d1c673SSatish Balay   int    ierr,i;
257bc5ccf88SSatish Balay 
258bc5ccf88SSatish Balay   PetscFunctionBegin;
2594c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
2604c1ff481SSatish Balay   if ((stash->n + n) > stash->nmax) {
2618798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
2629417f4adSLois Curfman McInnes   }
2634c1ff481SSatish Balay   for ( i=0; i<n; i++ ) {
2649417f4adSLois Curfman McInnes     stash->idx[stash->n]   = row;
265a2d1c673SSatish Balay     stash->idy[stash->n]   = idxn[i];
266a2d1c673SSatish Balay     stash->array[stash->n] = values[i];
267a2d1c673SSatish Balay     stash->n++;
2689417f4adSLois Curfman McInnes   }
269a2d1c673SSatish Balay   PetscFunctionReturn(0);
270a2d1c673SSatish Balay }
2714c1ff481SSatish Balay /*
2728798bf22SSatish Balay   MatStashValuesCol_Private - inserts values into the stash. This function
2734c1ff481SSatish Balay   expects the values to be columnoriented. Multiple columns belong to the same row
2744c1ff481SSatish Balay   can be inserted with a single call to this function.
275a2d1c673SSatish Balay 
2764c1ff481SSatish Balay   Input Parameters:
2774c1ff481SSatish Balay   stash   - the stash
2784c1ff481SSatish Balay   row     - the global row correspoiding to the values
2794c1ff481SSatish Balay   n       - the number of elements inserted. All elements belong to the above row.
2804c1ff481SSatish Balay   idxn    - the global column indices corresponding to each of the values.
2814c1ff481SSatish Balay   values  - the values inserted
2824c1ff481SSatish Balay   stepval - the consecutive values are sepated by a distance of stepval.
2834c1ff481SSatish Balay             this happens because the input is columnoriented.
2844c1ff481SSatish Balay */
285a2d1c673SSatish Balay #undef __FUNC__
2868798bf22SSatish Balay #define __FUNC__ "MatStashValuesCol_Private"
2878798bf22SSatish Balay int MatStashValuesCol_Private(MatStash *stash,int row,int n, int *idxn,
2884c1ff481SSatish Balay                                       Scalar *values,int stepval)
289a2d1c673SSatish Balay {
2904c1ff481SSatish Balay   int    ierr,i;
291a2d1c673SSatish Balay 
2924c1ff481SSatish Balay   PetscFunctionBegin;
2934c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
2944c1ff481SSatish Balay   if ((stash->n + n) > stash->nmax) {
2958798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
2964c1ff481SSatish Balay   }
2974c1ff481SSatish Balay   for ( i=0; i<n; i++ ) {
2984c1ff481SSatish Balay     stash->idx[stash->n]   = row;
2994c1ff481SSatish Balay     stash->idy[stash->n]   = idxn[i];
3004c1ff481SSatish Balay     stash->array[stash->n] = values[i*stepval];
3014c1ff481SSatish Balay     stash->n++;
3024c1ff481SSatish Balay   }
3034c1ff481SSatish Balay   PetscFunctionReturn(0);
3044c1ff481SSatish Balay }
3054c1ff481SSatish Balay 
3064c1ff481SSatish Balay /*
3078798bf22SSatish Balay   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
3084c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3094c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3104c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3114c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3124c1ff481SSatish Balay 
3134c1ff481SSatish Balay   Input Parameters:
3144c1ff481SSatish Balay   stash  - the stash
3154c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3164c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3174c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3184c1ff481SSatish Balay            values. Each block is of size bs*bs.
3194c1ff481SSatish Balay   values - the values inserted
3204c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3214c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3224c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3234c1ff481SSatish Balay */
3244c1ff481SSatish Balay #undef __FUNC__
3258798bf22SSatish Balay #define __FUNC__ "MatStashValuesRowBlocked_Private"
3268798bf22SSatish Balay int MatStashValuesRowBlocked_Private(MatStash *stash,int row,int n,int *idxn,Scalar *values,
3274c1ff481SSatish Balay                                int rmax,int cmax,int idx)
3284c1ff481SSatish Balay {
3294c1ff481SSatish Balay   int    ierr,i,j,k,bs2,bs=stash->bs;
3304c1ff481SSatish Balay   Scalar *vals,*array;
331a2d1c673SSatish Balay 
332a2d1c673SSatish Balay   PetscFunctionBegin;
333a2d1c673SSatish Balay   bs2 = bs*bs;
3344c1ff481SSatish Balay   if ((stash->n+n) > stash->nmax) {
3358798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
336a2d1c673SSatish Balay   }
3374c1ff481SSatish Balay   for ( i=0; i<n; i++ ) {
338a2d1c673SSatish Balay     stash->idx[stash->n]   = row;
339a2d1c673SSatish Balay     stash->idy[stash->n] = idxn[i];
340a2d1c673SSatish Balay     /* Now copy over the block of values. Store the values column oriented.
341a2d1c673SSatish Balay        This enables inserting multiple blocks belonging to a row with a single
342a2d1c673SSatish Balay        funtion call */
343a2d1c673SSatish Balay     array = stash->array + bs2*stash->n;
344a2d1c673SSatish Balay     vals  = values + idx*bs2*n + bs*i;
345a2d1c673SSatish Balay     for ( j=0; j<bs; j++ ) {
346a2d1c673SSatish Balay       for ( k=0; k<bs; k++ ) {array[k*bs] = vals[k];}
347a2d1c673SSatish Balay       array += 1;
348a2d1c673SSatish Balay       vals  += cmax*bs;
349a2d1c673SSatish Balay     }
3504c1ff481SSatish Balay     stash->n++;
3514c1ff481SSatish Balay   }
3524c1ff481SSatish Balay   PetscFunctionReturn(0);
3534c1ff481SSatish Balay }
3544c1ff481SSatish Balay 
3554c1ff481SSatish Balay /*
3568798bf22SSatish Balay   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
3574c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3584c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3594c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3604c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3614c1ff481SSatish Balay 
3624c1ff481SSatish Balay   Input Parameters:
3634c1ff481SSatish Balay   stash  - the stash
3644c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3654c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3664c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3674c1ff481SSatish Balay            values. Each block is of size bs*bs.
3684c1ff481SSatish Balay   values - the values inserted
3694c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3704c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3714c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3724c1ff481SSatish Balay */
3734c1ff481SSatish Balay #undef __FUNC__
3748798bf22SSatish Balay #define __FUNC__ "MatStashValuesColBlocked_Private"
3758798bf22SSatish Balay int MatStashValuesColBlocked_Private(MatStash *stash,int row,int n,int *idxn,
3764c1ff481SSatish Balay                                              Scalar *values,int rmax,int cmax,int idx)
3774c1ff481SSatish Balay {
3784c1ff481SSatish Balay   int    ierr,i,j,k,bs2,bs=stash->bs;
3794c1ff481SSatish Balay   Scalar *vals,*array;
3804c1ff481SSatish Balay 
3814c1ff481SSatish Balay   PetscFunctionBegin;
3824c1ff481SSatish Balay   bs2 = bs*bs;
3834c1ff481SSatish Balay   if ((stash->n+n) > stash->nmax) {
3848798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
3854c1ff481SSatish Balay   }
3864c1ff481SSatish Balay   for ( i=0; i<n; i++ ) {
3874c1ff481SSatish Balay     stash->idx[stash->n]   = row;
3884c1ff481SSatish Balay     stash->idy[stash->n] = idxn[i];
3894c1ff481SSatish Balay     /* Now copy over the block of values. Store the values column oriented.
3904c1ff481SSatish Balay      This enables inserting multiple blocks belonging to a row with a single
3914c1ff481SSatish Balay      funtion call */
392a2d1c673SSatish Balay     array = stash->array + bs2*stash->n;
393a2d1c673SSatish Balay     vals  = values + idx*bs + bs2*rmax*i;
394a2d1c673SSatish Balay     for ( j=0; j<bs; j++ ) {
395a2d1c673SSatish Balay       for ( k=0; k<bs; k++ ) {array[k] = vals[k];}
396a2d1c673SSatish Balay       array += bs;
397a2d1c673SSatish Balay       vals  += rmax*bs;
398a2d1c673SSatish Balay     }
399a2d1c673SSatish Balay     stash->n++;
4009417f4adSLois Curfman McInnes   }
4013a40ed3dSBarry Smith   PetscFunctionReturn(0);
4029417f4adSLois Curfman McInnes }
4034c1ff481SSatish Balay /*
4048798bf22SSatish Balay   MatStashScatterBegin_Private - Initiates the transfer of values to the
4054c1ff481SSatish Balay   correct owners. This function goes through the stash, and check the
4064c1ff481SSatish Balay   owners of each stashed value, and sends the values off to the owner
4074c1ff481SSatish Balay   processors.
408bc5ccf88SSatish Balay 
4094c1ff481SSatish Balay   Input Parameters:
4104c1ff481SSatish Balay   stash  - the stash
4114c1ff481SSatish Balay   owners - an array of size 'no-of-procs' which gives the ownership range
4124c1ff481SSatish Balay            for each node.
4134c1ff481SSatish Balay 
4144c1ff481SSatish Balay   Notes: The 'owners' array in the cased of the blocked-stash has the
4154c1ff481SSatish Balay   ranges specified blocked global indices, and for the regular stash in
4164c1ff481SSatish Balay   the proper global indices.
4174c1ff481SSatish Balay */
418bc5ccf88SSatish Balay #undef __FUNC__
4198798bf22SSatish Balay #define __FUNC__ "MatStashScatterBegin_Private"
4208798bf22SSatish Balay int MatStashScatterBegin_Private(MatStash *stash,int *owners)
421bc5ccf88SSatish Balay {
422a2d1c673SSatish Balay   int         *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
423a2d1c673SSatish Balay   int         rank=stash->rank,size=stash->size,*nprocs,*procs,nsends,nreceives;
4244c1ff481SSatish Balay   int         nmax,*work,count,ierr,*sindices,*rindices,i,j,idx;
425a2d1c673SSatish Balay   Scalar      *rvalues,*svalues;
426bc5ccf88SSatish Balay   MPI_Comm    comm = stash->comm;
427bc5ccf88SSatish Balay   MPI_Request *send_waits,*recv_waits;
428bc5ccf88SSatish Balay 
429bc5ccf88SSatish Balay   PetscFunctionBegin;
430bc5ccf88SSatish Balay 
4314c1ff481SSatish Balay   bs2 = stash->bs*stash->bs;
432bc5ccf88SSatish Balay   /*  first count number of contributors to each processor */
433bc5ccf88SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
434549d3d68SSatish Balay   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
435549d3d68SSatish Balay   procs  = nprocs + size;
436bc5ccf88SSatish Balay   owner  = (int *) PetscMalloc( (stash->n+1)*sizeof(int) );CHKPTRQ(owner);
437a2d1c673SSatish Balay 
438bc5ccf88SSatish Balay   for ( i=0; i<stash->n; i++ ) {
439bc5ccf88SSatish Balay     idx = stash->idx[i];
440bc5ccf88SSatish Balay     for ( j=0; j<size; j++ ) {
4414c1ff481SSatish Balay       if (idx >= owners[j] && idx < owners[j+1]) {
442bc5ccf88SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
443bc5ccf88SSatish Balay       }
444bc5ccf88SSatish Balay     }
445bc5ccf88SSatish Balay   }
446bc5ccf88SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
447bc5ccf88SSatish Balay 
448bc5ccf88SSatish Balay   /* inform other processors of number of messages and max length*/
4496831982aSBarry Smith   work      = (int *)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work);
4506831982aSBarry Smith   ierr      = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
451bc5ccf88SSatish Balay   nmax      = work[rank];
4526831982aSBarry Smith   nreceives = work[size+rank];
453606d414cSSatish Balay   ierr      = PetscFree(work);CHKERRQ(ierr);
454bc5ccf88SSatish Balay   /* post receives:
455bc5ccf88SSatish Balay      since we don't know how long each individual message is we
456bc5ccf88SSatish Balay      allocate the largest needed buffer for each receive. Potentially
457bc5ccf88SSatish Balay      this is a lot of wasted space.
458bc5ccf88SSatish Balay   */
459a2d1c673SSatish Balay   rvalues    = (Scalar *)PetscMalloc((nreceives+1)*(nmax+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(rvalues);
460a2d1c673SSatish Balay   rindices   = (int *) (rvalues + bs2*nreceives*nmax);
461a2d1c673SSatish Balay   recv_waits = (MPI_Request *)PetscMalloc((nreceives+1)*2*sizeof(MPI_Request));CHKPTRQ(recv_waits);
462bc5ccf88SSatish Balay   for ( i=0,count=0; i<nreceives; i++ ) {
463a2d1c673SSatish Balay     ierr = MPI_Irecv(rvalues+bs2*nmax*i,bs2*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag1,comm,
464bc5ccf88SSatish Balay                      recv_waits+count++);CHKERRQ(ierr);
465bc5ccf88SSatish Balay     ierr = MPI_Irecv(rindices+2*nmax*i,2*nmax,MPI_INT,MPI_ANY_SOURCE,tag2,comm,
466bc5ccf88SSatish Balay                      recv_waits+count++);CHKERRQ(ierr);
467bc5ccf88SSatish Balay   }
468bc5ccf88SSatish Balay 
469bc5ccf88SSatish Balay   /* do sends:
470bc5ccf88SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
471bc5ccf88SSatish Balay          the ith processor
472bc5ccf88SSatish Balay   */
473a2d1c673SSatish Balay   svalues    = (Scalar *)PetscMalloc((stash->n+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(svalues);
474a2d1c673SSatish Balay   sindices   = (int *) (svalues + bs2*stash->n);
475549d3d68SSatish Balay   send_waits = (MPI_Request *) PetscMalloc(2*(nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
476bc5ccf88SSatish Balay   startv     = (int *) PetscMalloc(2*size*sizeof(int) );CHKPTRQ(startv);
477bc5ccf88SSatish Balay   starti     = startv + size;
478a2d1c673SSatish Balay   /* use 2 sends the first with all_a, the next with all_i and all_j */
479bc5ccf88SSatish Balay   startv[0]  = 0; starti[0] = 0;
480bc5ccf88SSatish Balay   for ( i=1; i<size; i++ ) {
481bc5ccf88SSatish Balay     startv[i] = startv[i-1] + nprocs[i-1];
482bc5ccf88SSatish Balay     starti[i] = starti[i-1] + nprocs[i-1]*2;
483bc5ccf88SSatish Balay   }
484bc5ccf88SSatish Balay   for ( i=0; i<stash->n; i++ ) {
485bc5ccf88SSatish Balay     j = owner[i];
486a2d1c673SSatish Balay     if (bs2 == 1) {
487bc5ccf88SSatish Balay       svalues[startv[j]]              = stash->array[i];
488a2d1c673SSatish Balay     } else {
4894c1ff481SSatish Balay       int    k;
4904c1ff481SSatish Balay       Scalar *buf1,*buf2;
4914c1ff481SSatish Balay       buf1 = svalues+bs2*startv[j];
4924c1ff481SSatish Balay       buf2 = stash->array+bs2*i;
4934c1ff481SSatish Balay       for ( k=0; k<bs2; k++ ){ buf1[k] = buf2[k]; }
494a2d1c673SSatish Balay     }
495bc5ccf88SSatish Balay     sindices[starti[j]]             = stash->idx[i];
496bc5ccf88SSatish Balay     sindices[starti[j]+nprocs[j]]   = stash->idy[i];
497bc5ccf88SSatish Balay     startv[j]++;
498bc5ccf88SSatish Balay     starti[j]++;
499bc5ccf88SSatish Balay   }
500bc5ccf88SSatish Balay   startv[0] = 0;
501bc5ccf88SSatish Balay   for ( i=1; i<size; i++ ) { startv[i] = startv[i-1] + nprocs[i-1];}
502bc5ccf88SSatish Balay   for ( i=0,count=0; i<size; i++ ) {
503bc5ccf88SSatish Balay     if (procs[i]) {
504a2d1c673SSatish Balay       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nprocs[i],MPIU_SCALAR,i,tag1,comm,
505bc5ccf88SSatish Balay                        send_waits+count++);CHKERRQ(ierr);
506bc5ccf88SSatish Balay       ierr = MPI_Isend(sindices+2*startv[i],2*nprocs[i],MPI_INT,i,tag2,comm,
507bc5ccf88SSatish Balay                        send_waits+count++);CHKERRQ(ierr);
508bc5ccf88SSatish Balay     }
509bc5ccf88SSatish Balay   }
510606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
511606d414cSSatish Balay   ierr = PetscFree(startv);CHKERRQ(ierr);
512a2d1c673SSatish Balay   /* This memory is reused in scatter end  for a different purpose*/
513a2d1c673SSatish Balay   for (i=0; i<2*size; i++ ) nprocs[i] = -1;
514a2d1c673SSatish Balay   stash->nprocs      = nprocs;
515a2d1c673SSatish Balay 
516bc5ccf88SSatish Balay   stash->svalues    = svalues;    stash->rvalues    = rvalues;
517bc5ccf88SSatish Balay   stash->nsends     = nsends;     stash->nrecvs     = nreceives;
518bc5ccf88SSatish Balay   stash->send_waits = send_waits; stash->recv_waits = recv_waits;
519bc5ccf88SSatish Balay   stash->rmax       = nmax;
520bc5ccf88SSatish Balay   PetscFunctionReturn(0);
521bc5ccf88SSatish Balay }
522bc5ccf88SSatish Balay 
523a2d1c673SSatish Balay /*
5248798bf22SSatish Balay    MatStashScatterGetMesg_Private - This function waits on the receives posted
5258798bf22SSatish Balay    in the function MatStashScatterBegin_Private() and returns one message at
5264c1ff481SSatish Balay    a time to the calling function. If no messages are left, it indicates this
5274c1ff481SSatish Balay    by setting flg = 0, else it sets flg = 1.
5284c1ff481SSatish Balay 
5294c1ff481SSatish Balay    Input Parameters:
5304c1ff481SSatish Balay    stash - the stash
5314c1ff481SSatish Balay 
5324c1ff481SSatish Balay    Output Parameters:
5334c1ff481SSatish Balay    nvals - the number of entries in the current message.
5344c1ff481SSatish Balay    rows  - an array of row indices (or blocked indices) corresponding to the values
5354c1ff481SSatish Balay    cols  - an array of columnindices (or blocked indices) corresponding to the values
5364c1ff481SSatish Balay    vals  - the values
5374c1ff481SSatish Balay    flg   - 0 indicates no more message left, and the current call has no values associated.
5384c1ff481SSatish Balay            1 indicates that the current call successfully received a message, and the
5394c1ff481SSatish Balay              other output parameters nvals,rows,cols,vals are set appropriately.
540a2d1c673SSatish Balay */
541bc5ccf88SSatish Balay #undef __FUNC__
5428798bf22SSatish Balay #define __FUNC__ "MatStashScatterGetMesg_Private"
5438798bf22SSatish Balay int MatStashScatterGetMesg_Private(MatStash *stash,int *nvals,int **rows,int** cols,Scalar **vals,int *flg)
544bc5ccf88SSatish Balay {
545a2d1c673SSatish Balay   int         i,ierr,size=stash->size,*flg_v,*flg_i;
546a2d1c673SSatish Balay   int         i1,i2,*rindices,match_found=0,bs2;
547a2d1c673SSatish Balay   MPI_Status  recv_status;
548bc5ccf88SSatish Balay 
549bc5ccf88SSatish Balay   PetscFunctionBegin;
550bc5ccf88SSatish Balay 
551a2d1c673SSatish Balay   *flg = 0; /* When a message is discovered this is reset to 1 */
552a2d1c673SSatish Balay   /* Return if no more messages to process */
553a2d1c673SSatish Balay   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
554a2d1c673SSatish Balay 
555a2d1c673SSatish Balay   flg_v = stash->nprocs;
556a2d1c673SSatish Balay   flg_i = flg_v + size;
5574c1ff481SSatish Balay   bs2   = stash->bs*stash->bs;
558a2d1c673SSatish Balay   /* If a matching pair of receieves are found, process them, and return the data to
559a2d1c673SSatish Balay      the calling function. Until then keep receiving messages */
560a2d1c673SSatish Balay   while (!match_found) {
561a2d1c673SSatish Balay     ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
562a2d1c673SSatish Balay     /* Now pack the received message into a structure which is useable by others */
563a2d1c673SSatish Balay     if (i % 2) {
564a2d1c673SSatish Balay       ierr = MPI_Get_count(&recv_status,MPI_INT,nvals);CHKERRQ(ierr);
565a2d1c673SSatish Balay       flg_i[recv_status.MPI_SOURCE] = i/2;
566a2d1c673SSatish Balay       *nvals = *nvals/2; /* This message has both row indices and col indices */
567a2d1c673SSatish Balay     } else {
568a2d1c673SSatish Balay       ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr);
569a2d1c673SSatish Balay       flg_v[recv_status.MPI_SOURCE] = i/2;
570a2d1c673SSatish Balay       *nvals = *nvals/bs2;
571bc5ccf88SSatish Balay     }
572a2d1c673SSatish Balay 
573a2d1c673SSatish Balay     /* Check if we have both the messages from this proc */
574a2d1c673SSatish Balay     i1 = flg_v[recv_status.MPI_SOURCE];
575a2d1c673SSatish Balay     i2 = flg_i[recv_status.MPI_SOURCE];
576a2d1c673SSatish Balay     if (i1 != -1 && i2 != -1) {
577a2d1c673SSatish Balay       rindices    = (int *) (stash->rvalues + bs2*stash->rmax*stash->nrecvs);
578a2d1c673SSatish Balay       *rows       = rindices + 2*i2*stash->rmax;
579a2d1c673SSatish Balay       *cols       = *rows + *nvals;
580a2d1c673SSatish Balay       *vals       = stash->rvalues + i1*bs2*stash->rmax;
581a2d1c673SSatish Balay       *flg        = 1;
582a2d1c673SSatish Balay       stash->nprocessed ++;
583a2d1c673SSatish Balay       match_found = 1;
584bc5ccf88SSatish Balay     }
585bc5ccf88SSatish Balay   }
586bc5ccf88SSatish Balay   PetscFunctionReturn(0);
587bc5ccf88SSatish Balay }
588