xref: /petsc/src/mat/utils/matstash.c (revision b2863d3a52358f7ee22ca3a1f84f250141973e94)
1*b2863d3aSBarry Smith /*$Id: matstash.c,v 1.40 2000/04/09 03:10:11 bsmith Exp bsmith $*/
22d5177cdSBarry Smith 
370f55243SBarry Smith #include "src/mat/matimpl.h"
49417f4adSLois Curfman McInnes 
53eda8832SBarry Smith /*
63eda8832SBarry Smith        The input to the stash is ALWAYS in Scalar precision, BUT the
73eda8832SBarry Smith     internal storage and output is in MatScalar.
83eda8832SBarry Smith */
9bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE   10000
104c1ff481SSatish Balay 
119417f4adSLois Curfman McInnes /*
128798bf22SSatish Balay   MatStashCreate_Private - Creates a stash,currently used for all the parallel
134c1ff481SSatish Balay   matrix implementations. The stash is where elements of a matrix destined
144c1ff481SSatish Balay   to be stored on other processors are kept until matrix assembly is done.
159417f4adSLois Curfman McInnes 
164c1ff481SSatish Balay   This is a simple minded stash. Simply adds entries to end of stash.
174c1ff481SSatish Balay 
184c1ff481SSatish Balay   Input Parameters:
194c1ff481SSatish Balay   comm - communicator, required for scatters.
204c1ff481SSatish Balay   bs   - stash block size. used when stashing blocks of values
214c1ff481SSatish Balay 
224c1ff481SSatish Balay   Output Parameters:
234c1ff481SSatish Balay   stash    - the newly created stash
249417f4adSLois Curfman McInnes */
255615d1e5SSatish Balay #undef __FUNC__
26*b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatStashCreate_Private"
278798bf22SSatish Balay int MatStashCreate_Private(MPI_Comm comm,int bs,MatStash *stash)
289417f4adSLois Curfman McInnes {
29f1af5d2fSBarry Smith   int        ierr,max,*opt,nopt;
30f1af5d2fSBarry Smith   PetscTruth flg;
31bc5ccf88SSatish Balay 
323a40ed3dSBarry Smith   PetscFunctionBegin;
33bc5ccf88SSatish Balay   /* Require 2 tags,get the second using PetscCommGetNewTag() */
34bc5ccf88SSatish Balay   ierr = PetscCommDuplicate_Private(comm,&stash->comm,&stash->tag1);CHKERRQ(ierr);
35a2d1c673SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
36a2d1c673SSatish Balay   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
37a2d1c673SSatish Balay   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
38bc5ccf88SSatish Balay 
39434d7ff9SSatish Balay   nopt = stash->size;
40434d7ff9SSatish Balay   opt  = (int*)PetscMalloc(nopt*sizeof(int));CHKPTRQ(opt);
41434d7ff9SSatish Balay   ierr = OptionsGetIntArray(PETSC_NULL,"-vecstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
42434d7ff9SSatish Balay   if (flg) {
43434d7ff9SSatish Balay     if (nopt == 1)                max = opt[0];
44434d7ff9SSatish Balay     else if (nopt == stash->size) max = opt[stash->rank];
45434d7ff9SSatish Balay     else if (stash->rank < nopt)  max = opt[stash->rank];
46f4ab19daSSatish Balay     else                          max = 0; /* Use default */
47434d7ff9SSatish Balay     stash->umax = max;
48434d7ff9SSatish Balay   } else {
49434d7ff9SSatish Balay     stash->umax = 0;
50434d7ff9SSatish Balay   }
51606d414cSSatish Balay   ierr = PetscFree(opt);CHKERRQ(ierr);
524c1ff481SSatish Balay   if (bs <= 0) bs = 1;
53a2d1c673SSatish Balay 
544c1ff481SSatish Balay   stash->bs       = bs;
559417f4adSLois Curfman McInnes   stash->nmax     = 0;
56434d7ff9SSatish Balay   stash->oldnmax  = 0;
579417f4adSLois Curfman McInnes   stash->n        = 0;
584c1ff481SSatish Balay   stash->reallocs = -1;
599417f4adSLois Curfman McInnes   stash->idx      = 0;
609417f4adSLois Curfman McInnes   stash->idy      = 0;
61bc5ccf88SSatish Balay   stash->array    = 0;
629417f4adSLois Curfman McInnes 
63bc5ccf88SSatish Balay   stash->send_waits  = 0;
64bc5ccf88SSatish Balay   stash->recv_waits  = 0;
65a2d1c673SSatish Balay   stash->send_status = 0;
66bc5ccf88SSatish Balay   stash->nsends      = 0;
67bc5ccf88SSatish Balay   stash->nrecvs      = 0;
68bc5ccf88SSatish Balay   stash->svalues     = 0;
69bc5ccf88SSatish Balay   stash->rvalues     = 0;
70bc5ccf88SSatish Balay   stash->rmax        = 0;
71a2d1c673SSatish Balay   stash->nprocs      = 0;
72a2d1c673SSatish Balay   stash->nprocessed  = 0;
733a40ed3dSBarry Smith   PetscFunctionReturn(0);
749417f4adSLois Curfman McInnes }
759417f4adSLois Curfman McInnes 
764c1ff481SSatish Balay /*
778798bf22SSatish Balay    MatStashDestroy_Private - Destroy the stash
784c1ff481SSatish Balay */
795615d1e5SSatish Balay #undef __FUNC__
80*b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatStashDestroy_Private"
818798bf22SSatish Balay int MatStashDestroy_Private(MatStash *stash)
829417f4adSLois Curfman McInnes {
83bc5ccf88SSatish Balay   int ierr;
84a2d1c673SSatish Balay 
85bc5ccf88SSatish Balay   PetscFunctionBegin;
86bc5ccf88SSatish Balay   ierr = PetscCommDestroy_Private(&stash->comm);CHKERRQ(ierr);
87606d414cSSatish Balay   if (stash->array) {
88606d414cSSatish Balay     ierr = PetscFree(stash->array);CHKERRQ(ierr);
89606d414cSSatish Balay     stash->array = 0;
90606d414cSSatish Balay   }
91bc5ccf88SSatish Balay   PetscFunctionReturn(0);
92bc5ccf88SSatish Balay }
93bc5ccf88SSatish Balay 
944c1ff481SSatish Balay /*
958798bf22SSatish Balay    MatStashScatterEnd_Private - This is called as the fial stage of
964c1ff481SSatish Balay    scatter. The final stages of messagepassing is done here, and
974c1ff481SSatish Balay    all the memory used for messagepassing is cleanedu up. This
984c1ff481SSatish Balay    routine also resets the stash, and deallocates the memory used
994c1ff481SSatish Balay    for the stash. It also keeps track of the current memory usage
1004c1ff481SSatish Balay    so that the same value can be used the next time through.
1014c1ff481SSatish Balay */
102bc5ccf88SSatish Balay #undef __FUNC__
103*b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatStashScatterEnd_Private"
1048798bf22SSatish Balay int MatStashScatterEnd_Private(MatStash *stash)
105bc5ccf88SSatish Balay {
106434d7ff9SSatish Balay   int         nsends=stash->nsends,ierr,bs2,oldnmax;
107a2d1c673SSatish Balay   MPI_Status  *send_status;
108a2d1c673SSatish Balay 
1093a40ed3dSBarry Smith   PetscFunctionBegin;
110a2d1c673SSatish Balay   /* wait on sends */
111a2d1c673SSatish Balay   if (nsends) {
112a2d1c673SSatish Balay     send_status = (MPI_Status *)PetscMalloc(2*nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
113a2d1c673SSatish Balay     ierr        = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
114606d414cSSatish Balay     ierr        = PetscFree(send_status);CHKERRQ(ierr);
115a2d1c673SSatish Balay   }
116a2d1c673SSatish Balay 
117c0c58ca7SSatish Balay   /* Now update nmaxold to be app 10% more than max n used, this way the
118434d7ff9SSatish Balay      wastage of space is reduced the next time this stash is used.
119434d7ff9SSatish Balay      Also update the oldmax, only if it increases */
12094b769a5SSatish Balay   bs2      = stash->bs*stash->bs;
1218a9378f0SSatish Balay   oldnmax  = ((int)(stash->n * 1.1) + 5)*bs2;
122434d7ff9SSatish Balay   if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
123434d7ff9SSatish Balay 
124d07ff455SSatish Balay   stash->nmax       = 0;
125d07ff455SSatish Balay   stash->n          = 0;
1264c1ff481SSatish Balay   stash->reallocs   = -1;
127bc5ccf88SSatish Balay   stash->rmax       = 0;
128a2d1c673SSatish Balay   stash->nprocessed = 0;
129bc5ccf88SSatish Balay 
130bc5ccf88SSatish Balay   if (stash->array) {
131606d414cSSatish Balay     ierr         = PetscFree(stash->array);CHKERRQ(ierr);
132bc5ccf88SSatish Balay     stash->array = 0;
133bc5ccf88SSatish Balay     stash->idx   = 0;
134bc5ccf88SSatish Balay     stash->idy   = 0;
135bc5ccf88SSatish Balay   }
136606d414cSSatish Balay   if (stash->send_waits) {
137606d414cSSatish Balay     ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
138606d414cSSatish Balay     stash->send_waits = 0;
139606d414cSSatish Balay   }
140606d414cSSatish Balay   if (stash->recv_waits) {
141606d414cSSatish Balay     ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
142606d414cSSatish Balay     stash->recv_waits = 0;
143606d414cSSatish Balay   }
144606d414cSSatish Balay   if (stash->svalues) {
145606d414cSSatish Balay     ierr = PetscFree(stash->svalues);CHKERRQ(ierr);
146606d414cSSatish Balay     stash->svalues = 0;
147606d414cSSatish Balay   }
148606d414cSSatish Balay   if (stash->rvalues) {
149606d414cSSatish Balay     ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
150606d414cSSatish Balay     stash->rvalues = 0;
151606d414cSSatish Balay   }
152606d414cSSatish Balay   if (stash->nprocs) {
153606d414cSSatish Balay     ierr = PetscFree(stash->nprocs);
154606d414cSSatish Balay     stash->nprocs = 0;
155606d414cSSatish Balay   }
156bc5ccf88SSatish Balay 
1573a40ed3dSBarry Smith   PetscFunctionReturn(0);
1589417f4adSLois Curfman McInnes }
1599417f4adSLois Curfman McInnes 
1604c1ff481SSatish Balay /*
1618798bf22SSatish Balay    MatStashGetInfo_Private - Gets the relavant statistics of the stash
1624c1ff481SSatish Balay 
1634c1ff481SSatish Balay    Input Parameters:
1644c1ff481SSatish Balay    stash    - the stash
16594b769a5SSatish Balay    nstash   - the size of the stash. Indicates the number of values stored.
1664c1ff481SSatish Balay    reallocs - the number of additional mallocs incurred.
1674c1ff481SSatish Balay 
1684c1ff481SSatish Balay */
1695615d1e5SSatish Balay #undef __FUNC__
170*b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatStashGetInfo_Private"
1718798bf22SSatish Balay int MatStashGetInfo_Private(MatStash *stash,int *nstash,int *reallocs)
17297530c3fSBarry Smith {
17394b769a5SSatish Balay   int bs2 = stash->bs*stash->bs;
17494b769a5SSatish Balay 
1753a40ed3dSBarry Smith   PetscFunctionBegin;
17694b769a5SSatish Balay   *nstash   = stash->n*bs2;
177434d7ff9SSatish Balay   if (stash->reallocs < 0) *reallocs = 0;
178434d7ff9SSatish Balay   else                     *reallocs = stash->reallocs;
179bc5ccf88SSatish Balay   PetscFunctionReturn(0);
180bc5ccf88SSatish Balay }
1814c1ff481SSatish Balay 
1824c1ff481SSatish Balay 
1834c1ff481SSatish Balay /*
1848798bf22SSatish Balay    MatStashSetInitialSize_Private - Sets the initial size of the stash
1854c1ff481SSatish Balay 
1864c1ff481SSatish Balay    Input Parameters:
1874c1ff481SSatish Balay    stash  - the stash
1884c1ff481SSatish Balay    max    - the value that is used as the max size of the stash.
1894c1ff481SSatish Balay             this value is used while allocating memory.
1904c1ff481SSatish Balay */
191bc5ccf88SSatish Balay #undef __FUNC__
192*b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatStashSetInitialSize_Private"
1938798bf22SSatish Balay int MatStashSetInitialSize_Private(MatStash *stash,int max)
194bc5ccf88SSatish Balay {
195bc5ccf88SSatish Balay   PetscFunctionBegin;
196434d7ff9SSatish Balay   stash->umax = max;
1973a40ed3dSBarry Smith   PetscFunctionReturn(0);
19897530c3fSBarry Smith }
19997530c3fSBarry Smith 
2008798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called
2014c1ff481SSatish Balay    when the space in the stash is not sufficient to add the new values
2024c1ff481SSatish Balay    being inserted into the stash.
2034c1ff481SSatish Balay 
2044c1ff481SSatish Balay    Input Parameters:
2054c1ff481SSatish Balay    stash - the stash
2064c1ff481SSatish Balay    incr  - the minimum increase requested
2074c1ff481SSatish Balay 
2084c1ff481SSatish Balay    Notes:
2094c1ff481SSatish Balay    This routine doubles the currently used memory.
2104c1ff481SSatish Balay  */
2115615d1e5SSatish Balay #undef __FUNC__
212*b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatStashExpand_Private"
2138798bf22SSatish Balay static int MatStashExpand_Private(MatStash *stash,int incr)
2149417f4adSLois Curfman McInnes {
215549d3d68SSatish Balay   int       *n_idx,*n_idy,newnmax,bs2,ierr;
2163eda8832SBarry Smith   MatScalar *n_array;
2179417f4adSLois Curfman McInnes 
2183a40ed3dSBarry Smith   PetscFunctionBegin;
2199417f4adSLois Curfman McInnes   /* allocate a larger stash */
22094b769a5SSatish Balay   bs2     = stash->bs*stash->bs;
221c481ceb5SSatish Balay   if (!stash->oldnmax && !stash->nmax) { /* new stash */
222434d7ff9SSatish Balay     if (stash->umax)                  newnmax = stash->umax/bs2;
223434d7ff9SSatish Balay     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
224c481ceb5SSatish Balay   } else if (!stash->nmax) { /* resuing stash */
225434d7ff9SSatish Balay     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
226434d7ff9SSatish Balay     else                              newnmax = stash->oldnmax/bs2;
227434d7ff9SSatish Balay   } else                              newnmax = stash->nmax*2;
2284c1ff481SSatish Balay   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
229d07ff455SSatish Balay 
2303eda8832SBarry Smith   n_array = (MatScalar *)PetscMalloc((newnmax)*(2*sizeof(int)+bs2*sizeof(MatScalar)));CHKPTRQ(n_array);
231a2d1c673SSatish Balay   n_idx   = (int*)(n_array + bs2*newnmax);
232d07ff455SSatish Balay   n_idy   = (int*)(n_idx + newnmax);
2333eda8832SBarry Smith   ierr = PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(MatScalar));CHKERRQ(ierr);
234549d3d68SSatish Balay   ierr = PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(int));CHKERRQ(ierr);
235549d3d68SSatish Balay   ierr = PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(int));CHKERRQ(ierr);
236606d414cSSatish Balay   if (stash->array) {ierr = PetscFree(stash->array);CHKERRQ(ierr);}
237d07ff455SSatish Balay   stash->array   = n_array;
238d07ff455SSatish Balay   stash->idx     = n_idx;
239d07ff455SSatish Balay   stash->idy     = n_idy;
240d07ff455SSatish Balay   stash->nmax    = newnmax;
241bc5ccf88SSatish Balay   stash->reallocs++;
242bc5ccf88SSatish Balay   PetscFunctionReturn(0);
243bc5ccf88SSatish Balay }
244bc5ccf88SSatish Balay /*
2458798bf22SSatish Balay   MatStashValuesRow_Private - inserts values into the stash. This function
2464c1ff481SSatish Balay   expects the values to be roworiented. Multiple columns belong to the same row
2474c1ff481SSatish Balay   can be inserted with a single call to this function.
2484c1ff481SSatish Balay 
2494c1ff481SSatish Balay   Input Parameters:
2504c1ff481SSatish Balay   stash  - the stash
2514c1ff481SSatish Balay   row    - the global row correspoiding to the values
2524c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
2534c1ff481SSatish Balay   idxn   - the global column indices corresponding to each of the values.
2544c1ff481SSatish Balay   values - the values inserted
255bc5ccf88SSatish Balay */
256bc5ccf88SSatish Balay #undef __FUNC__
257*b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatStashValuesRow_Private"
2588798bf22SSatish Balay int MatStashValuesRow_Private(MatStash *stash,int row,int n,int *idxn,Scalar *values)
259bc5ccf88SSatish Balay {
260a2d1c673SSatish Balay   int    ierr,i;
261bc5ccf88SSatish Balay 
262bc5ccf88SSatish Balay   PetscFunctionBegin;
2634c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
2644c1ff481SSatish Balay   if ((stash->n + n) > stash->nmax) {
2658798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
2669417f4adSLois Curfman McInnes   }
2674c1ff481SSatish Balay   for (i=0; i<n; i++) {
2689417f4adSLois Curfman McInnes     stash->idx[stash->n]   = row;
269a2d1c673SSatish Balay     stash->idy[stash->n]   = idxn[i];
2703eda8832SBarry Smith     stash->array[stash->n] = (MatScalar)values[i];
271a2d1c673SSatish Balay     stash->n++;
2729417f4adSLois Curfman McInnes   }
273a2d1c673SSatish Balay   PetscFunctionReturn(0);
274a2d1c673SSatish Balay }
2754c1ff481SSatish Balay /*
2768798bf22SSatish Balay   MatStashValuesCol_Private - inserts values into the stash. This function
2774c1ff481SSatish Balay   expects the values to be columnoriented. Multiple columns belong to the same row
2784c1ff481SSatish Balay   can be inserted with a single call to this function.
279a2d1c673SSatish Balay 
2804c1ff481SSatish Balay   Input Parameters:
2814c1ff481SSatish Balay   stash   - the stash
2824c1ff481SSatish Balay   row     - the global row correspoiding to the values
2834c1ff481SSatish Balay   n       - the number of elements inserted. All elements belong to the above row.
2844c1ff481SSatish Balay   idxn    - the global column indices corresponding to each of the values.
2854c1ff481SSatish Balay   values  - the values inserted
2864c1ff481SSatish Balay   stepval - the consecutive values are sepated by a distance of stepval.
2874c1ff481SSatish Balay             this happens because the input is columnoriented.
2884c1ff481SSatish Balay */
289a2d1c673SSatish Balay #undef __FUNC__
290*b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatStashValuesCol_Private"
2913eda8832SBarry Smith int MatStashValuesCol_Private(MatStash *stash,int row,int n,int *idxn,Scalar *values,int stepval)
292a2d1c673SSatish Balay {
2934c1ff481SSatish Balay   int    ierr,i;
294a2d1c673SSatish Balay 
2954c1ff481SSatish Balay   PetscFunctionBegin;
2964c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
2974c1ff481SSatish Balay   if ((stash->n + n) > stash->nmax) {
2988798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
2994c1ff481SSatish Balay   }
3004c1ff481SSatish Balay   for (i=0; i<n; i++) {
3014c1ff481SSatish Balay     stash->idx[stash->n]   = row;
3024c1ff481SSatish Balay     stash->idy[stash->n]   = idxn[i];
3033eda8832SBarry Smith     stash->array[stash->n] = (MatScalar)values[i*stepval];
3044c1ff481SSatish Balay     stash->n++;
3054c1ff481SSatish Balay   }
3064c1ff481SSatish Balay   PetscFunctionReturn(0);
3074c1ff481SSatish Balay }
3084c1ff481SSatish Balay 
3094c1ff481SSatish Balay /*
3108798bf22SSatish Balay   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
3114c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3124c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3134c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3144c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3154c1ff481SSatish Balay 
3164c1ff481SSatish Balay   Input Parameters:
3174c1ff481SSatish Balay   stash  - the stash
3184c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3194c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3204c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3214c1ff481SSatish Balay            values. Each block is of size bs*bs.
3224c1ff481SSatish Balay   values - the values inserted
3234c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3244c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3254c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3264c1ff481SSatish Balay */
3274c1ff481SSatish Balay #undef __FUNC__
328*b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatStashValuesRowBlocked_Private"
3293eda8832SBarry Smith int MatStashValuesRowBlocked_Private(MatStash *stash,int row,int n,int *idxn,Scalar *values,int rmax,int cmax,int idx)
3304c1ff481SSatish Balay {
3314c1ff481SSatish Balay   int       ierr,i,j,k,bs2,bs=stash->bs;
3323eda8832SBarry Smith   Scalar    *vals;
3333eda8832SBarry Smith   MatScalar *array;
334a2d1c673SSatish Balay 
335a2d1c673SSatish Balay   PetscFunctionBegin;
336a2d1c673SSatish Balay   bs2 = bs*bs;
3374c1ff481SSatish Balay   if ((stash->n+n) > stash->nmax) {
3388798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
339a2d1c673SSatish Balay   }
3404c1ff481SSatish Balay   for (i=0; i<n; i++) {
341a2d1c673SSatish Balay     stash->idx[stash->n]   = row;
342a2d1c673SSatish Balay     stash->idy[stash->n] = idxn[i];
343a2d1c673SSatish Balay     /* Now copy over the block of values. Store the values column oriented.
344a2d1c673SSatish Balay        This enables inserting multiple blocks belonging to a row with a single
345a2d1c673SSatish Balay        funtion call */
346a2d1c673SSatish Balay     array = stash->array + bs2*stash->n;
347a2d1c673SSatish Balay     vals  = values + idx*bs2*n + bs*i;
348a2d1c673SSatish Balay     for (j=0; j<bs; j++) {
3493eda8832SBarry Smith       for (k=0; k<bs; k++) {array[k*bs] = (MatScalar)vals[k];}
350a2d1c673SSatish Balay       array += 1;
351a2d1c673SSatish Balay       vals  += cmax*bs;
352a2d1c673SSatish Balay     }
3534c1ff481SSatish Balay     stash->n++;
3544c1ff481SSatish Balay   }
3554c1ff481SSatish Balay   PetscFunctionReturn(0);
3564c1ff481SSatish Balay }
3574c1ff481SSatish Balay 
3584c1ff481SSatish Balay /*
3598798bf22SSatish Balay   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
3604c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3614c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3624c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3634c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3644c1ff481SSatish Balay 
3654c1ff481SSatish Balay   Input Parameters:
3664c1ff481SSatish Balay   stash  - the stash
3674c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3684c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3694c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3704c1ff481SSatish Balay            values. Each block is of size bs*bs.
3714c1ff481SSatish Balay   values - the values inserted
3724c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3734c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3744c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3754c1ff481SSatish Balay */
3764c1ff481SSatish Balay #undef __FUNC__
377*b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatStashValuesColBlocked_Private"
3783eda8832SBarry Smith int MatStashValuesColBlocked_Private(MatStash *stash,int row,int n,int *idxn,Scalar *values,int rmax,int cmax,int idx)
3794c1ff481SSatish Balay {
3804c1ff481SSatish Balay   int       ierr,i,j,k,bs2,bs=stash->bs;
3813eda8832SBarry Smith   Scalar    *vals;
3823eda8832SBarry Smith   MatScalar *array;
3834c1ff481SSatish Balay 
3844c1ff481SSatish Balay   PetscFunctionBegin;
3854c1ff481SSatish Balay   bs2 = bs*bs;
3864c1ff481SSatish Balay   if ((stash->n+n) > stash->nmax) {
3878798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
3884c1ff481SSatish Balay   }
3894c1ff481SSatish Balay   for (i=0; i<n; i++) {
3904c1ff481SSatish Balay     stash->idx[stash->n]   = row;
3914c1ff481SSatish Balay     stash->idy[stash->n] = idxn[i];
3924c1ff481SSatish Balay     /* Now copy over the block of values. Store the values column oriented.
3934c1ff481SSatish Balay      This enables inserting multiple blocks belonging to a row with a single
3944c1ff481SSatish Balay      funtion call */
395a2d1c673SSatish Balay     array = stash->array + bs2*stash->n;
396a2d1c673SSatish Balay     vals  = values + idx*bs + bs2*rmax*i;
397a2d1c673SSatish Balay     for (j=0; j<bs; j++) {
3983eda8832SBarry Smith       for (k=0; k<bs; k++) {array[k] = (MatScalar)vals[k];}
399a2d1c673SSatish Balay       array += bs;
400a2d1c673SSatish Balay       vals  += rmax*bs;
401a2d1c673SSatish Balay     }
402a2d1c673SSatish Balay     stash->n++;
4039417f4adSLois Curfman McInnes   }
4043a40ed3dSBarry Smith   PetscFunctionReturn(0);
4059417f4adSLois Curfman McInnes }
4064c1ff481SSatish Balay /*
4078798bf22SSatish Balay   MatStashScatterBegin_Private - Initiates the transfer of values to the
4084c1ff481SSatish Balay   correct owners. This function goes through the stash, and check the
4094c1ff481SSatish Balay   owners of each stashed value, and sends the values off to the owner
4104c1ff481SSatish Balay   processors.
411bc5ccf88SSatish Balay 
4124c1ff481SSatish Balay   Input Parameters:
4134c1ff481SSatish Balay   stash  - the stash
4144c1ff481SSatish Balay   owners - an array of size 'no-of-procs' which gives the ownership range
4154c1ff481SSatish Balay            for each node.
4164c1ff481SSatish Balay 
4174c1ff481SSatish Balay   Notes: The 'owners' array in the cased of the blocked-stash has the
4184c1ff481SSatish Balay   ranges specified blocked global indices, and for the regular stash in
4194c1ff481SSatish Balay   the proper global indices.
4204c1ff481SSatish Balay */
421bc5ccf88SSatish Balay #undef __FUNC__
422*b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatStashScatterBegin_Private"
4238798bf22SSatish Balay int MatStashScatterBegin_Private(MatStash *stash,int *owners)
424bc5ccf88SSatish Balay {
425a2d1c673SSatish Balay   int         *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
426a2d1c673SSatish Balay   int         rank=stash->rank,size=stash->size,*nprocs,*procs,nsends,nreceives;
4274c1ff481SSatish Balay   int         nmax,*work,count,ierr,*sindices,*rindices,i,j,idx;
4283eda8832SBarry Smith   MatScalar   *rvalues,*svalues;
429bc5ccf88SSatish Balay   MPI_Comm    comm = stash->comm;
430bc5ccf88SSatish Balay   MPI_Request *send_waits,*recv_waits;
431bc5ccf88SSatish Balay 
432bc5ccf88SSatish Balay   PetscFunctionBegin;
433bc5ccf88SSatish Balay 
4344c1ff481SSatish Balay   bs2 = stash->bs*stash->bs;
435bc5ccf88SSatish Balay   /*  first count number of contributors to each processor */
436bc5ccf88SSatish Balay   nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs);
437549d3d68SSatish Balay   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
438549d3d68SSatish Balay   procs  = nprocs + size;
439bc5ccf88SSatish Balay   owner  = (int*)PetscMalloc((stash->n+1)*sizeof(int));CHKPTRQ(owner);
440a2d1c673SSatish Balay 
441bc5ccf88SSatish Balay   for (i=0; i<stash->n; i++) {
442bc5ccf88SSatish Balay     idx = stash->idx[i];
443bc5ccf88SSatish Balay     for (j=0; j<size; j++) {
4444c1ff481SSatish Balay       if (idx >= owners[j] && idx < owners[j+1]) {
445bc5ccf88SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
446bc5ccf88SSatish Balay       }
447bc5ccf88SSatish Balay     }
448bc5ccf88SSatish Balay   }
449bc5ccf88SSatish Balay   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
450bc5ccf88SSatish Balay 
451bc5ccf88SSatish Balay   /* inform other processors of number of messages and max length*/
4526831982aSBarry Smith   work      = (int *)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work);
4536831982aSBarry Smith   ierr      = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
454bc5ccf88SSatish Balay   nmax      = work[rank];
4556831982aSBarry Smith   nreceives = work[size+rank];
456606d414cSSatish Balay   ierr      = PetscFree(work);CHKERRQ(ierr);
457bc5ccf88SSatish Balay   /* post receives:
458bc5ccf88SSatish Balay      since we don't know how long each individual message is we
459bc5ccf88SSatish Balay      allocate the largest needed buffer for each receive. Potentially
460bc5ccf88SSatish Balay      this is a lot of wasted space.
461bc5ccf88SSatish Balay   */
4623eda8832SBarry Smith   rvalues    = (MatScalar *)PetscMalloc((nreceives+1)*(nmax+1)*(bs2*sizeof(MatScalar)+2*sizeof(int)));CHKPTRQ(rvalues);
463a2d1c673SSatish Balay   rindices   = (int*)(rvalues + bs2*nreceives*nmax);
464a2d1c673SSatish Balay   recv_waits = (MPI_Request *)PetscMalloc((nreceives+1)*2*sizeof(MPI_Request));CHKPTRQ(recv_waits);
465bc5ccf88SSatish Balay   for (i=0,count=0; i<nreceives; i++) {
4663eda8832SBarry Smith     ierr = MPI_Irecv(rvalues+bs2*nmax*i,bs2*nmax,MPIU_MATSCALAR,MPI_ANY_SOURCE,tag1,comm,
467bc5ccf88SSatish Balay                      recv_waits+count++);CHKERRQ(ierr);
468bc5ccf88SSatish Balay     ierr = MPI_Irecv(rindices+2*nmax*i,2*nmax,MPI_INT,MPI_ANY_SOURCE,tag2,comm,
469bc5ccf88SSatish Balay                      recv_waits+count++);CHKERRQ(ierr);
470bc5ccf88SSatish Balay   }
471bc5ccf88SSatish Balay 
472bc5ccf88SSatish Balay   /* do sends:
473bc5ccf88SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
474bc5ccf88SSatish Balay          the ith processor
475bc5ccf88SSatish Balay   */
4763eda8832SBarry Smith   svalues    = (MatScalar *)PetscMalloc((stash->n+1)*(bs2*sizeof(MatScalar)+2*sizeof(int)));CHKPTRQ(svalues);
477a2d1c673SSatish Balay   sindices   = (int*)(svalues + bs2*stash->n);
478549d3d68SSatish Balay   send_waits = (MPI_Request*)PetscMalloc(2*(nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
479bc5ccf88SSatish Balay   startv     = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(startv);
480bc5ccf88SSatish Balay   starti     = startv + size;
481a2d1c673SSatish Balay   /* use 2 sends the first with all_a, the next with all_i and all_j */
482bc5ccf88SSatish Balay   startv[0]  = 0; starti[0] = 0;
483bc5ccf88SSatish Balay   for (i=1; i<size; i++) {
484bc5ccf88SSatish Balay     startv[i] = startv[i-1] + nprocs[i-1];
485bc5ccf88SSatish Balay     starti[i] = starti[i-1] + nprocs[i-1]*2;
486bc5ccf88SSatish Balay   }
487bc5ccf88SSatish Balay   for (i=0; i<stash->n; i++) {
488bc5ccf88SSatish Balay     j = owner[i];
489a2d1c673SSatish Balay     if (bs2 == 1) {
490bc5ccf88SSatish Balay       svalues[startv[j]]              = stash->array[i];
491a2d1c673SSatish Balay     } else {
4924c1ff481SSatish Balay       int       k;
4933eda8832SBarry Smith       MatScalar *buf1,*buf2;
4944c1ff481SSatish Balay       buf1 = svalues+bs2*startv[j];
4954c1ff481SSatish Balay       buf2 = stash->array+bs2*i;
4964c1ff481SSatish Balay       for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
497a2d1c673SSatish Balay     }
498bc5ccf88SSatish Balay     sindices[starti[j]]             = stash->idx[i];
499bc5ccf88SSatish Balay     sindices[starti[j]+nprocs[j]]   = stash->idy[i];
500bc5ccf88SSatish Balay     startv[j]++;
501bc5ccf88SSatish Balay     starti[j]++;
502bc5ccf88SSatish Balay   }
503bc5ccf88SSatish Balay   startv[0] = 0;
504bc5ccf88SSatish Balay   for (i=1; i<size; i++) { startv[i] = startv[i-1] + nprocs[i-1];}
505bc5ccf88SSatish Balay   for (i=0,count=0; i<size; i++) {
506bc5ccf88SSatish Balay     if (procs[i]) {
5073eda8832SBarry Smith       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nprocs[i],MPIU_MATSCALAR,i,tag1,comm,
508bc5ccf88SSatish Balay                        send_waits+count++);CHKERRQ(ierr);
509bc5ccf88SSatish Balay       ierr = MPI_Isend(sindices+2*startv[i],2*nprocs[i],MPI_INT,i,tag2,comm,
510bc5ccf88SSatish Balay                        send_waits+count++);CHKERRQ(ierr);
511bc5ccf88SSatish Balay     }
512bc5ccf88SSatish Balay   }
513606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
514606d414cSSatish Balay   ierr = PetscFree(startv);CHKERRQ(ierr);
515a2d1c673SSatish Balay   /* This memory is reused in scatter end  for a different purpose*/
516a2d1c673SSatish Balay   for (i=0; i<2*size; i++) nprocs[i] = -1;
517a2d1c673SSatish Balay   stash->nprocs      = nprocs;
518a2d1c673SSatish Balay 
519bc5ccf88SSatish Balay   stash->svalues    = svalues;    stash->rvalues    = rvalues;
520bc5ccf88SSatish Balay   stash->nsends     = nsends;     stash->nrecvs     = nreceives;
521bc5ccf88SSatish Balay   stash->send_waits = send_waits; stash->recv_waits = recv_waits;
522bc5ccf88SSatish Balay   stash->rmax       = nmax;
523bc5ccf88SSatish Balay   PetscFunctionReturn(0);
524bc5ccf88SSatish Balay }
525bc5ccf88SSatish Balay 
526a2d1c673SSatish Balay /*
5278798bf22SSatish Balay    MatStashScatterGetMesg_Private - This function waits on the receives posted
5288798bf22SSatish Balay    in the function MatStashScatterBegin_Private() and returns one message at
5294c1ff481SSatish Balay    a time to the calling function. If no messages are left, it indicates this
5304c1ff481SSatish Balay    by setting flg = 0, else it sets flg = 1.
5314c1ff481SSatish Balay 
5324c1ff481SSatish Balay    Input Parameters:
5334c1ff481SSatish Balay    stash - the stash
5344c1ff481SSatish Balay 
5354c1ff481SSatish Balay    Output Parameters:
5364c1ff481SSatish Balay    nvals - the number of entries in the current message.
5374c1ff481SSatish Balay    rows  - an array of row indices (or blocked indices) corresponding to the values
5384c1ff481SSatish Balay    cols  - an array of columnindices (or blocked indices) corresponding to the values
5394c1ff481SSatish Balay    vals  - the values
5404c1ff481SSatish Balay    flg   - 0 indicates no more message left, and the current call has no values associated.
5414c1ff481SSatish Balay            1 indicates that the current call successfully received a message, and the
5424c1ff481SSatish Balay              other output parameters nvals,rows,cols,vals are set appropriately.
543a2d1c673SSatish Balay */
544bc5ccf88SSatish Balay #undef __FUNC__
545*b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatStashScatterGetMesg_Private"
5463eda8832SBarry Smith int MatStashScatterGetMesg_Private(MatStash *stash,int *nvals,int **rows,int** cols,MatScalar **vals,int *flg)
547bc5ccf88SSatish Balay {
548a2d1c673SSatish Balay   int         i,ierr,size=stash->size,*flg_v,*flg_i;
549a2d1c673SSatish Balay   int         i1,i2,*rindices,match_found=0,bs2;
550a2d1c673SSatish Balay   MPI_Status  recv_status;
551bc5ccf88SSatish Balay 
552bc5ccf88SSatish Balay   PetscFunctionBegin;
553bc5ccf88SSatish Balay 
554a2d1c673SSatish Balay   *flg = 0; /* When a message is discovered this is reset to 1 */
555a2d1c673SSatish Balay   /* Return if no more messages to process */
556a2d1c673SSatish Balay   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
557a2d1c673SSatish Balay 
558a2d1c673SSatish Balay   flg_v = stash->nprocs;
559a2d1c673SSatish Balay   flg_i = flg_v + size;
5604c1ff481SSatish Balay   bs2   = stash->bs*stash->bs;
561a2d1c673SSatish Balay   /* If a matching pair of receieves are found, process them, and return the data to
562a2d1c673SSatish Balay      the calling function. Until then keep receiving messages */
563a2d1c673SSatish Balay   while (!match_found) {
564a2d1c673SSatish Balay     ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
565a2d1c673SSatish Balay     /* Now pack the received message into a structure which is useable by others */
566a2d1c673SSatish Balay     if (i % 2) {
567a2d1c673SSatish Balay       ierr = MPI_Get_count(&recv_status,MPI_INT,nvals);CHKERRQ(ierr);
568a2d1c673SSatish Balay       flg_i[recv_status.MPI_SOURCE] = i/2;
569a2d1c673SSatish Balay       *nvals = *nvals/2; /* This message has both row indices and col indices */
570a2d1c673SSatish Balay     } else {
5713eda8832SBarry Smith       ierr = MPI_Get_count(&recv_status,MPIU_MATSCALAR,nvals);CHKERRQ(ierr);
572a2d1c673SSatish Balay       flg_v[recv_status.MPI_SOURCE] = i/2;
573a2d1c673SSatish Balay       *nvals = *nvals/bs2;
574bc5ccf88SSatish Balay     }
575a2d1c673SSatish Balay 
576a2d1c673SSatish Balay     /* Check if we have both the messages from this proc */
577a2d1c673SSatish Balay     i1 = flg_v[recv_status.MPI_SOURCE];
578a2d1c673SSatish Balay     i2 = flg_i[recv_status.MPI_SOURCE];
579a2d1c673SSatish Balay     if (i1 != -1 && i2 != -1) {
580a2d1c673SSatish Balay       rindices    = (int*)(stash->rvalues + bs2*stash->rmax*stash->nrecvs);
581a2d1c673SSatish Balay       *rows       = rindices + 2*i2*stash->rmax;
582a2d1c673SSatish Balay       *cols       = *rows + *nvals;
583a2d1c673SSatish Balay       *vals       = stash->rvalues + i1*bs2*stash->rmax;
584a2d1c673SSatish Balay       *flg        = 1;
585a2d1c673SSatish Balay       stash->nprocessed ++;
586a2d1c673SSatish Balay       match_found = 1;
587bc5ccf88SSatish Balay     }
588bc5ccf88SSatish Balay   }
589bc5ccf88SSatish Balay   PetscFunctionReturn(0);
590bc5ccf88SSatish Balay }
591