xref: /petsc/src/mat/utils/matstash.c (revision 1ecfd215db73a53f870322246ab153faab91efeb)
173f4d377SMatthew Knepley /*$Id: matstash.c,v 1.50 2001/03/23 23:22:45 balay Exp $*/
22d5177cdSBarry Smith 
370f55243SBarry Smith #include "src/mat/matimpl.h"
49417f4adSLois Curfman McInnes 
53eda8832SBarry Smith /*
60ae3cd3bSBarry Smith        The input to the stash is ALWAYS in MatScalar precision, and the
70ae3cd3bSBarry Smith     internal storage and output is also in MatScalar.
83eda8832SBarry Smith */
9bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE   10000
104c1ff481SSatish Balay 
119417f4adSLois Curfman McInnes /*
128798bf22SSatish Balay   MatStashCreate_Private - Creates a stash,currently used for all the parallel
134c1ff481SSatish Balay   matrix implementations. The stash is where elements of a matrix destined
144c1ff481SSatish Balay   to be stored on other processors are kept until matrix assembly is done.
159417f4adSLois Curfman McInnes 
164c1ff481SSatish Balay   This is a simple minded stash. Simply adds entries to end of stash.
174c1ff481SSatish Balay 
184c1ff481SSatish Balay   Input Parameters:
194c1ff481SSatish Balay   comm - communicator, required for scatters.
204c1ff481SSatish Balay   bs   - stash block size. used when stashing blocks of values
214c1ff481SSatish Balay 
224c1ff481SSatish Balay   Output Parameters:
234c1ff481SSatish Balay   stash    - the newly created stash
249417f4adSLois Curfman McInnes */
254a2ae208SSatish Balay #undef __FUNCT__
264a2ae208SSatish Balay #define __FUNCT__ "MatStashCreate_Private"
278798bf22SSatish Balay int MatStashCreate_Private(MPI_Comm comm,int bs,MatStash *stash)
289417f4adSLois Curfman McInnes {
29f1af5d2fSBarry Smith   int        ierr,max,*opt,nopt;
30f1af5d2fSBarry Smith   PetscTruth flg;
31bc5ccf88SSatish Balay 
323a40ed3dSBarry Smith   PetscFunctionBegin;
33bc5ccf88SSatish Balay   /* Require 2 tags,get the second using PetscCommGetNewTag() */
34752ec6e0SSatish Balay   stash->comm = comm;
35752ec6e0SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
36a2d1c673SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
37a2d1c673SSatish Balay   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
38a2d1c673SSatish Balay   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
39bc5ccf88SSatish Balay 
40434d7ff9SSatish Balay   nopt = stash->size;
4182502324SSatish Balay   ierr = PetscMalloc(nopt*sizeof(int),&opt);CHKERRQ(ierr);
42b0a32e0cSBarry Smith   ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
43434d7ff9SSatish Balay   if (flg) {
44434d7ff9SSatish Balay     if (nopt == 1)                max = opt[0];
45434d7ff9SSatish Balay     else if (nopt == stash->size) max = opt[stash->rank];
46434d7ff9SSatish Balay     else if (stash->rank < nopt)  max = opt[stash->rank];
47f4ab19daSSatish Balay     else                          max = 0; /* Use default */
48434d7ff9SSatish Balay     stash->umax = max;
49434d7ff9SSatish Balay   } else {
50434d7ff9SSatish Balay     stash->umax = 0;
51434d7ff9SSatish Balay   }
52606d414cSSatish Balay   ierr = PetscFree(opt);CHKERRQ(ierr);
534c1ff481SSatish Balay   if (bs <= 0) bs = 1;
54a2d1c673SSatish Balay 
554c1ff481SSatish Balay   stash->bs       = bs;
569417f4adSLois Curfman McInnes   stash->nmax     = 0;
57434d7ff9SSatish Balay   stash->oldnmax  = 0;
589417f4adSLois Curfman McInnes   stash->n        = 0;
594c1ff481SSatish Balay   stash->reallocs = -1;
609417f4adSLois Curfman McInnes   stash->idx      = 0;
619417f4adSLois Curfman McInnes   stash->idy      = 0;
62bc5ccf88SSatish Balay   stash->array    = 0;
639417f4adSLois Curfman McInnes 
64bc5ccf88SSatish Balay   stash->send_waits  = 0;
65bc5ccf88SSatish Balay   stash->recv_waits  = 0;
66a2d1c673SSatish Balay   stash->send_status = 0;
67bc5ccf88SSatish Balay   stash->nsends      = 0;
68bc5ccf88SSatish Balay   stash->nrecvs      = 0;
69bc5ccf88SSatish Balay   stash->svalues     = 0;
70bc5ccf88SSatish Balay   stash->rvalues     = 0;
71bc5ccf88SSatish Balay   stash->rmax        = 0;
72a2d1c673SSatish Balay   stash->nprocs      = 0;
73a2d1c673SSatish Balay   stash->nprocessed  = 0;
743a40ed3dSBarry Smith   PetscFunctionReturn(0);
759417f4adSLois Curfman McInnes }
769417f4adSLois Curfman McInnes 
774c1ff481SSatish Balay /*
788798bf22SSatish Balay    MatStashDestroy_Private - Destroy the stash
794c1ff481SSatish Balay */
804a2ae208SSatish Balay #undef __FUNCT__
814a2ae208SSatish Balay #define __FUNCT__ "MatStashDestroy_Private"
828798bf22SSatish Balay int MatStashDestroy_Private(MatStash *stash)
839417f4adSLois Curfman McInnes {
84bc5ccf88SSatish Balay   int ierr;
85a2d1c673SSatish Balay 
86bc5ccf88SSatish Balay   PetscFunctionBegin;
87606d414cSSatish Balay   if (stash->array) {
88606d414cSSatish Balay     ierr = PetscFree(stash->array);CHKERRQ(ierr);
89606d414cSSatish Balay     stash->array = 0;
90606d414cSSatish Balay   }
91bc5ccf88SSatish Balay   PetscFunctionReturn(0);
92bc5ccf88SSatish Balay }
93bc5ccf88SSatish Balay 
944c1ff481SSatish Balay /*
958798bf22SSatish Balay    MatStashScatterEnd_Private - This is called as the fial stage of
964c1ff481SSatish Balay    scatter. The final stages of messagepassing is done here, and
974c1ff481SSatish Balay    all the memory used for messagepassing is cleanedu up. This
984c1ff481SSatish Balay    routine also resets the stash, and deallocates the memory used
994c1ff481SSatish Balay    for the stash. It also keeps track of the current memory usage
1004c1ff481SSatish Balay    so that the same value can be used the next time through.
1014c1ff481SSatish Balay */
1024a2ae208SSatish Balay #undef __FUNCT__
1034a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterEnd_Private"
1048798bf22SSatish Balay int MatStashScatterEnd_Private(MatStash *stash)
105bc5ccf88SSatish Balay {
106434d7ff9SSatish Balay   int         nsends=stash->nsends,ierr,bs2,oldnmax;
107a2d1c673SSatish Balay   MPI_Status  *send_status;
108a2d1c673SSatish Balay 
1093a40ed3dSBarry Smith   PetscFunctionBegin;
110a2d1c673SSatish Balay   /* wait on sends */
111a2d1c673SSatish Balay   if (nsends) {
11282502324SSatish Balay     ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
113a2d1c673SSatish Balay     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
114606d414cSSatish Balay     ierr = PetscFree(send_status);CHKERRQ(ierr);
115a2d1c673SSatish Balay   }
116a2d1c673SSatish Balay 
117c0c58ca7SSatish Balay   /* Now update nmaxold to be app 10% more than max n used, this way the
118434d7ff9SSatish Balay      wastage of space is reduced the next time this stash is used.
119434d7ff9SSatish Balay      Also update the oldmax, only if it increases */
120b9b97703SBarry Smith   if (stash->n) {
12194b769a5SSatish Balay     bs2      = stash->bs*stash->bs;
1228a9378f0SSatish Balay     oldnmax  = ((int)(stash->n * 1.1) + 5)*bs2;
123434d7ff9SSatish Balay     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
124b9b97703SBarry Smith   }
125434d7ff9SSatish Balay 
126d07ff455SSatish Balay   stash->nmax       = 0;
127d07ff455SSatish Balay   stash->n          = 0;
1284c1ff481SSatish Balay   stash->reallocs   = -1;
129bc5ccf88SSatish Balay   stash->rmax       = 0;
130a2d1c673SSatish Balay   stash->nprocessed = 0;
131bc5ccf88SSatish Balay 
132bc5ccf88SSatish Balay   if (stash->array) {
133606d414cSSatish Balay     ierr         = PetscFree(stash->array);CHKERRQ(ierr);
134bc5ccf88SSatish Balay     stash->array = 0;
135bc5ccf88SSatish Balay     stash->idx   = 0;
136bc5ccf88SSatish Balay     stash->idy   = 0;
137bc5ccf88SSatish Balay   }
138606d414cSSatish Balay   if (stash->send_waits) {
139606d414cSSatish Balay     ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
140606d414cSSatish Balay     stash->send_waits = 0;
141606d414cSSatish Balay   }
142606d414cSSatish Balay   if (stash->recv_waits) {
143606d414cSSatish Balay     ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
144606d414cSSatish Balay     stash->recv_waits = 0;
145606d414cSSatish Balay   }
146606d414cSSatish Balay   if (stash->svalues) {
147606d414cSSatish Balay     ierr = PetscFree(stash->svalues);CHKERRQ(ierr);
148606d414cSSatish Balay     stash->svalues = 0;
149606d414cSSatish Balay   }
150606d414cSSatish Balay   if (stash->rvalues) {
151606d414cSSatish Balay     ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
152606d414cSSatish Balay     stash->rvalues = 0;
153606d414cSSatish Balay   }
154606d414cSSatish Balay   if (stash->nprocs) {
155b22afee1SSatish Balay     ierr = PetscFree(stash->nprocs);CHKERRQ(ierr);
156606d414cSSatish Balay     stash->nprocs = 0;
157606d414cSSatish Balay   }
158bc5ccf88SSatish Balay 
1593a40ed3dSBarry Smith   PetscFunctionReturn(0);
1609417f4adSLois Curfman McInnes }
1619417f4adSLois Curfman McInnes 
1624c1ff481SSatish Balay /*
1638798bf22SSatish Balay    MatStashGetInfo_Private - Gets the relavant statistics of the stash
1644c1ff481SSatish Balay 
1654c1ff481SSatish Balay    Input Parameters:
1664c1ff481SSatish Balay    stash    - the stash
16794b769a5SSatish Balay    nstash   - the size of the stash. Indicates the number of values stored.
1684c1ff481SSatish Balay    reallocs - the number of additional mallocs incurred.
1694c1ff481SSatish Balay 
1704c1ff481SSatish Balay */
1714a2ae208SSatish Balay #undef __FUNCT__
1724a2ae208SSatish Balay #define __FUNCT__ "MatStashGetInfo_Private"
1738798bf22SSatish Balay int MatStashGetInfo_Private(MatStash *stash,int *nstash,int *reallocs)
17497530c3fSBarry Smith {
17594b769a5SSatish Balay   int bs2 = stash->bs*stash->bs;
17694b769a5SSatish Balay 
1773a40ed3dSBarry Smith   PetscFunctionBegin;
178*1ecfd215SBarry Smith   if (nstash) *nstash   = stash->n*bs2;
179*1ecfd215SBarry Smith   if (reallocs) {
180434d7ff9SSatish Balay     if (stash->reallocs < 0) *reallocs = 0;
181434d7ff9SSatish Balay     else                     *reallocs = stash->reallocs;
182*1ecfd215SBarry Smith   }
183bc5ccf88SSatish Balay   PetscFunctionReturn(0);
184bc5ccf88SSatish Balay }
1854c1ff481SSatish Balay 
1864c1ff481SSatish Balay 
1874c1ff481SSatish Balay /*
1888798bf22SSatish Balay    MatStashSetInitialSize_Private - Sets the initial size of the stash
1894c1ff481SSatish Balay 
1904c1ff481SSatish Balay    Input Parameters:
1914c1ff481SSatish Balay    stash  - the stash
1924c1ff481SSatish Balay    max    - the value that is used as the max size of the stash.
1934c1ff481SSatish Balay             this value is used while allocating memory.
1944c1ff481SSatish Balay */
1954a2ae208SSatish Balay #undef __FUNCT__
1964a2ae208SSatish Balay #define __FUNCT__ "MatStashSetInitialSize_Private"
1978798bf22SSatish Balay int MatStashSetInitialSize_Private(MatStash *stash,int max)
198bc5ccf88SSatish Balay {
199bc5ccf88SSatish Balay   PetscFunctionBegin;
200434d7ff9SSatish Balay   stash->umax = max;
2013a40ed3dSBarry Smith   PetscFunctionReturn(0);
20297530c3fSBarry Smith }
20397530c3fSBarry Smith 
2048798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called
2054c1ff481SSatish Balay    when the space in the stash is not sufficient to add the new values
2064c1ff481SSatish Balay    being inserted into the stash.
2074c1ff481SSatish Balay 
2084c1ff481SSatish Balay    Input Parameters:
2094c1ff481SSatish Balay    stash - the stash
2104c1ff481SSatish Balay    incr  - the minimum increase requested
2114c1ff481SSatish Balay 
2124c1ff481SSatish Balay    Notes:
2134c1ff481SSatish Balay    This routine doubles the currently used memory.
2144c1ff481SSatish Balay  */
2154a2ae208SSatish Balay #undef __FUNCT__
2164a2ae208SSatish Balay #define __FUNCT__ "MatStashExpand_Private"
2178798bf22SSatish Balay static int MatStashExpand_Private(MatStash *stash,int incr)
2189417f4adSLois Curfman McInnes {
219549d3d68SSatish Balay   int       *n_idx,*n_idy,newnmax,bs2,ierr;
2203eda8832SBarry Smith   MatScalar *n_array;
2219417f4adSLois Curfman McInnes 
2223a40ed3dSBarry Smith   PetscFunctionBegin;
2239417f4adSLois Curfman McInnes   /* allocate a larger stash */
22494b769a5SSatish Balay   bs2     = stash->bs*stash->bs;
225c481ceb5SSatish Balay   if (!stash->oldnmax && !stash->nmax) { /* new stash */
226434d7ff9SSatish Balay     if (stash->umax)                  newnmax = stash->umax/bs2;
227434d7ff9SSatish Balay     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
228c481ceb5SSatish Balay   } else if (!stash->nmax) { /* resuing stash */
229434d7ff9SSatish Balay     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
230434d7ff9SSatish Balay     else                              newnmax = stash->oldnmax/bs2;
231434d7ff9SSatish Balay   } else                              newnmax = stash->nmax*2;
2324c1ff481SSatish Balay   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
233d07ff455SSatish Balay 
234b0a32e0cSBarry Smith   ierr  = PetscMalloc((newnmax)*(2*sizeof(int)+bs2*sizeof(MatScalar)),&n_array);CHKERRQ(ierr);
235a2d1c673SSatish Balay   n_idx = (int*)(n_array + bs2*newnmax);
236d07ff455SSatish Balay   n_idy = (int*)(n_idx + newnmax);
2373eda8832SBarry Smith   ierr  = PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(MatScalar));CHKERRQ(ierr);
238549d3d68SSatish Balay   ierr  = PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(int));CHKERRQ(ierr);
239549d3d68SSatish Balay   ierr  = PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(int));CHKERRQ(ierr);
240606d414cSSatish Balay   if (stash->array) {ierr = PetscFree(stash->array);CHKERRQ(ierr);}
241d07ff455SSatish Balay   stash->array   = n_array;
242d07ff455SSatish Balay   stash->idx     = n_idx;
243d07ff455SSatish Balay   stash->idy     = n_idy;
244d07ff455SSatish Balay   stash->nmax    = newnmax;
245bc5ccf88SSatish Balay   stash->reallocs++;
246bc5ccf88SSatish Balay   PetscFunctionReturn(0);
247bc5ccf88SSatish Balay }
248bc5ccf88SSatish Balay /*
2498798bf22SSatish Balay   MatStashValuesRow_Private - inserts values into the stash. This function
2504c1ff481SSatish Balay   expects the values to be roworiented. Multiple columns belong to the same row
2514c1ff481SSatish Balay   can be inserted with a single call to this function.
2524c1ff481SSatish Balay 
2534c1ff481SSatish Balay   Input Parameters:
2544c1ff481SSatish Balay   stash  - the stash
2554c1ff481SSatish Balay   row    - the global row correspoiding to the values
2564c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
2574c1ff481SSatish Balay   idxn   - the global column indices corresponding to each of the values.
2584c1ff481SSatish Balay   values - the values inserted
259bc5ccf88SSatish Balay */
2604a2ae208SSatish Balay #undef __FUNCT__
2614a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRow_Private"
262f15d580aSBarry Smith int MatStashValuesRow_Private(MatStash *stash,int row,int n,const int idxn[],const MatScalar values[])
263bc5ccf88SSatish Balay {
264a2d1c673SSatish Balay   int    ierr,i;
265bc5ccf88SSatish Balay 
266bc5ccf88SSatish Balay   PetscFunctionBegin;
2674c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
2684c1ff481SSatish Balay   if ((stash->n + n) > stash->nmax) {
2698798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
2709417f4adSLois Curfman McInnes   }
2714c1ff481SSatish Balay   for (i=0; i<n; i++) {
2729417f4adSLois Curfman McInnes     stash->idx[stash->n]   = row;
273a2d1c673SSatish Balay     stash->idy[stash->n]   = idxn[i];
2740ae3cd3bSBarry Smith     stash->array[stash->n] = values[i];
275a2d1c673SSatish Balay     stash->n++;
2769417f4adSLois Curfman McInnes   }
277a2d1c673SSatish Balay   PetscFunctionReturn(0);
278a2d1c673SSatish Balay }
2794c1ff481SSatish Balay /*
2808798bf22SSatish Balay   MatStashValuesCol_Private - inserts values into the stash. This function
2814c1ff481SSatish Balay   expects the values to be columnoriented. Multiple columns belong to the same row
2824c1ff481SSatish Balay   can be inserted with a single call to this function.
283a2d1c673SSatish Balay 
2844c1ff481SSatish Balay   Input Parameters:
2854c1ff481SSatish Balay   stash   - the stash
2864c1ff481SSatish Balay   row     - the global row correspoiding to the values
2874c1ff481SSatish Balay   n       - the number of elements inserted. All elements belong to the above row.
2884c1ff481SSatish Balay   idxn    - the global column indices corresponding to each of the values.
2894c1ff481SSatish Balay   values  - the values inserted
2904c1ff481SSatish Balay   stepval - the consecutive values are sepated by a distance of stepval.
2914c1ff481SSatish Balay             this happens because the input is columnoriented.
2924c1ff481SSatish Balay */
2934a2ae208SSatish Balay #undef __FUNCT__
2944a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesCol_Private"
295f15d580aSBarry Smith int MatStashValuesCol_Private(MatStash *stash,int row,int n,const int idxn[],const MatScalar values[],int stepval)
296a2d1c673SSatish Balay {
2974c1ff481SSatish Balay   int    ierr,i;
298a2d1c673SSatish Balay 
2994c1ff481SSatish Balay   PetscFunctionBegin;
3004c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
3014c1ff481SSatish Balay   if ((stash->n + n) > stash->nmax) {
3028798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
3034c1ff481SSatish Balay   }
3044c1ff481SSatish Balay   for (i=0; i<n; i++) {
3054c1ff481SSatish Balay     stash->idx[stash->n]   = row;
3064c1ff481SSatish Balay     stash->idy[stash->n]   = idxn[i];
3070ae3cd3bSBarry Smith     stash->array[stash->n] = values[i*stepval];
3084c1ff481SSatish Balay     stash->n++;
3094c1ff481SSatish Balay   }
3104c1ff481SSatish Balay   PetscFunctionReturn(0);
3114c1ff481SSatish Balay }
3124c1ff481SSatish Balay 
3134c1ff481SSatish Balay /*
3148798bf22SSatish Balay   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
3154c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3164c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3174c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3184c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3194c1ff481SSatish Balay 
3204c1ff481SSatish Balay   Input Parameters:
3214c1ff481SSatish Balay   stash  - the stash
3224c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3234c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3244c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3254c1ff481SSatish Balay            values. Each block is of size bs*bs.
3264c1ff481SSatish Balay   values - the values inserted
3274c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3284c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3294c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3304c1ff481SSatish Balay */
3314a2ae208SSatish Balay #undef __FUNCT__
3324a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRowBlocked_Private"
333f15d580aSBarry Smith int MatStashValuesRowBlocked_Private(MatStash *stash,int row,int n,const int idxn[],const MatScalar values[],int rmax,int cmax,int idx)
3344c1ff481SSatish Balay {
3354c1ff481SSatish Balay   int             ierr,i,j,k,bs2,bs=stash->bs;
336f15d580aSBarry Smith   const MatScalar *vals;
337f15d580aSBarry Smith   MatScalar       *array;
338a2d1c673SSatish Balay 
339a2d1c673SSatish Balay   PetscFunctionBegin;
340a2d1c673SSatish Balay   bs2 = bs*bs;
3414c1ff481SSatish Balay   if ((stash->n+n) > stash->nmax) {
3428798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
343a2d1c673SSatish Balay   }
3444c1ff481SSatish Balay   for (i=0; i<n; i++) {
345a2d1c673SSatish Balay     stash->idx[stash->n]   = row;
346a2d1c673SSatish Balay     stash->idy[stash->n] = idxn[i];
347a2d1c673SSatish Balay     /* Now copy over the block of values. Store the values column oriented.
348a2d1c673SSatish Balay        This enables inserting multiple blocks belonging to a row with a single
349a2d1c673SSatish Balay        funtion call */
350a2d1c673SSatish Balay     array = stash->array + bs2*stash->n;
351a2d1c673SSatish Balay     vals  = values + idx*bs2*n + bs*i;
352a2d1c673SSatish Balay     for (j=0; j<bs; j++) {
3530ae3cd3bSBarry Smith       for (k=0; k<bs; k++) {array[k*bs] = vals[k];}
354a2d1c673SSatish Balay       array += 1;
355a2d1c673SSatish Balay       vals  += cmax*bs;
356a2d1c673SSatish Balay     }
3574c1ff481SSatish Balay     stash->n++;
3584c1ff481SSatish Balay   }
3594c1ff481SSatish Balay   PetscFunctionReturn(0);
3604c1ff481SSatish Balay }
3614c1ff481SSatish Balay 
3624c1ff481SSatish Balay /*
3638798bf22SSatish Balay   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
3644c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3654c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3664c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3674c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3684c1ff481SSatish Balay 
3694c1ff481SSatish Balay   Input Parameters:
3704c1ff481SSatish Balay   stash  - the stash
3714c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3724c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3734c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3744c1ff481SSatish Balay            values. Each block is of size bs*bs.
3754c1ff481SSatish Balay   values - the values inserted
3764c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3774c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3784c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3794c1ff481SSatish Balay */
3804a2ae208SSatish Balay #undef __FUNCT__
3814a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesColBlocked_Private"
382f15d580aSBarry Smith int MatStashValuesColBlocked_Private(MatStash *stash,int row,int n,const int idxn[],const MatScalar values[],int rmax,int cmax,int idx)
3834c1ff481SSatish Balay {
3844c1ff481SSatish Balay   int             ierr,i,j,k,bs2,bs=stash->bs;
385f15d580aSBarry Smith   const MatScalar *vals;
386f15d580aSBarry Smith   MatScalar       *array;
3874c1ff481SSatish Balay 
3884c1ff481SSatish Balay   PetscFunctionBegin;
3894c1ff481SSatish Balay   bs2 = bs*bs;
3904c1ff481SSatish Balay   if ((stash->n+n) > stash->nmax) {
3918798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
3924c1ff481SSatish Balay   }
3934c1ff481SSatish Balay   for (i=0; i<n; i++) {
3944c1ff481SSatish Balay     stash->idx[stash->n]   = row;
3954c1ff481SSatish Balay     stash->idy[stash->n] = idxn[i];
3964c1ff481SSatish Balay     /* Now copy over the block of values. Store the values column oriented.
3974c1ff481SSatish Balay      This enables inserting multiple blocks belonging to a row with a single
3984c1ff481SSatish Balay      funtion call */
399a2d1c673SSatish Balay     array = stash->array + bs2*stash->n;
400a2d1c673SSatish Balay     vals  = values + idx*bs + bs2*rmax*i;
401a2d1c673SSatish Balay     for (j=0; j<bs; j++) {
4020ae3cd3bSBarry Smith       for (k=0; k<bs; k++) {array[k] = vals[k];}
403a2d1c673SSatish Balay       array += bs;
404a2d1c673SSatish Balay       vals  += rmax*bs;
405a2d1c673SSatish Balay     }
406a2d1c673SSatish Balay     stash->n++;
4079417f4adSLois Curfman McInnes   }
4083a40ed3dSBarry Smith   PetscFunctionReturn(0);
4099417f4adSLois Curfman McInnes }
4104c1ff481SSatish Balay /*
4118798bf22SSatish Balay   MatStashScatterBegin_Private - Initiates the transfer of values to the
4124c1ff481SSatish Balay   correct owners. This function goes through the stash, and check the
4134c1ff481SSatish Balay   owners of each stashed value, and sends the values off to the owner
4144c1ff481SSatish Balay   processors.
415bc5ccf88SSatish Balay 
4164c1ff481SSatish Balay   Input Parameters:
4174c1ff481SSatish Balay   stash  - the stash
4184c1ff481SSatish Balay   owners - an array of size 'no-of-procs' which gives the ownership range
4194c1ff481SSatish Balay            for each node.
4204c1ff481SSatish Balay 
4214c1ff481SSatish Balay   Notes: The 'owners' array in the cased of the blocked-stash has the
4224c1ff481SSatish Balay   ranges specified blocked global indices, and for the regular stash in
4234c1ff481SSatish Balay   the proper global indices.
4244c1ff481SSatish Balay */
4254a2ae208SSatish Balay #undef __FUNCT__
4264a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterBegin_Private"
4278798bf22SSatish Balay int MatStashScatterBegin_Private(MatStash *stash,int *owners)
428bc5ccf88SSatish Balay {
429a2d1c673SSatish Balay   int         *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
430ccae9161SBarry Smith   int         size=stash->size,*nprocs,nsends,nreceives;
431c1dc657dSBarry Smith   int         nmax,count,ierr,*sindices,*rindices,i,j,idx;
4323eda8832SBarry Smith   MatScalar   *rvalues,*svalues;
433bc5ccf88SSatish Balay   MPI_Comm    comm = stash->comm;
434bc5ccf88SSatish Balay   MPI_Request *send_waits,*recv_waits;
435bc5ccf88SSatish Balay 
436bc5ccf88SSatish Balay   PetscFunctionBegin;
437bc5ccf88SSatish Balay 
4384c1ff481SSatish Balay   bs2   = stash->bs*stash->bs;
439bc5ccf88SSatish Balay   /*  first count number of contributors to each processor */
44082502324SSatish Balay   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
441549d3d68SSatish Balay   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
44282502324SSatish Balay   ierr  = PetscMalloc((stash->n+1)*sizeof(int),&owner);CHKERRQ(ierr);
443a2d1c673SSatish Balay 
444bc5ccf88SSatish Balay   for (i=0; i<stash->n; i++) {
445bc5ccf88SSatish Balay     idx = stash->idx[i];
446bc5ccf88SSatish Balay     for (j=0; j<size; j++) {
4474c1ff481SSatish Balay       if (idx >= owners[j] && idx < owners[j+1]) {
448c1dc657dSBarry Smith         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; break;
449bc5ccf88SSatish Balay       }
450bc5ccf88SSatish Balay     }
451bc5ccf88SSatish Balay   }
452c1dc657dSBarry Smith   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
453bc5ccf88SSatish Balay 
454bc5ccf88SSatish Balay   /* inform other processors of number of messages and max length*/
455c1dc657dSBarry Smith   ierr = PetscMaxSum(comm,nprocs,&nmax,&nreceives);CHKERRQ(ierr);
456c1dc657dSBarry Smith 
457bc5ccf88SSatish Balay   /* post receives:
458bc5ccf88SSatish Balay      since we don't know how long each individual message is we
459bc5ccf88SSatish Balay      allocate the largest needed buffer for each receive. Potentially
460bc5ccf88SSatish Balay      this is a lot of wasted space.
461bc5ccf88SSatish Balay   */
462b0a32e0cSBarry Smith   ierr     = PetscMalloc((nreceives+1)*(nmax+1)*(bs2*sizeof(MatScalar)+2*sizeof(int)),&rvalues);CHKERRQ(ierr);
463a2d1c673SSatish Balay   rindices = (int*)(rvalues + bs2*nreceives*nmax);
464b0a32e0cSBarry Smith   ierr     = PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
465bc5ccf88SSatish Balay   for (i=0,count=0; i<nreceives; i++) {
4663eda8832SBarry Smith     ierr = MPI_Irecv(rvalues+bs2*nmax*i,bs2*nmax,MPIU_MATSCALAR,MPI_ANY_SOURCE,tag1,comm,
467bc5ccf88SSatish Balay                      recv_waits+count++);CHKERRQ(ierr);
4680ae3cd3bSBarry Smith     ierr = MPI_Irecv(rindices+2*nmax*i,2*nmax,MPI_INT,MPI_ANY_SOURCE,tag2,comm,recv_waits+count++);CHKERRQ(ierr);
469bc5ccf88SSatish Balay   }
470bc5ccf88SSatish Balay 
471bc5ccf88SSatish Balay   /* do sends:
472bc5ccf88SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
473bc5ccf88SSatish Balay          the ith processor
474bc5ccf88SSatish Balay   */
47582502324SSatish Balay   ierr     = PetscMalloc((stash->n+1)*(bs2*sizeof(MatScalar)+2*sizeof(int)),&svalues);CHKERRQ(ierr);
476a2d1c673SSatish Balay   sindices = (int*)(svalues + bs2*stash->n);
477b0a32e0cSBarry Smith   ierr     = PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
47882502324SSatish Balay   ierr     = PetscMalloc(2*size*sizeof(int),&startv);CHKERRQ(ierr);
479bc5ccf88SSatish Balay   starti   = startv + size;
480a2d1c673SSatish Balay   /* use 2 sends the first with all_a, the next with all_i and all_j */
481bc5ccf88SSatish Balay   startv[0]  = 0; starti[0] = 0;
482bc5ccf88SSatish Balay   for (i=1; i<size; i++) {
483c1dc657dSBarry Smith     startv[i] = startv[i-1] + nprocs[2*i-2];
484c1dc657dSBarry Smith     starti[i] = starti[i-1] + nprocs[2*i-2]*2;
485bc5ccf88SSatish Balay   }
486bc5ccf88SSatish Balay   for (i=0; i<stash->n; i++) {
487bc5ccf88SSatish Balay     j = owner[i];
488a2d1c673SSatish Balay     if (bs2 == 1) {
489bc5ccf88SSatish Balay       svalues[startv[j]]              = stash->array[i];
490a2d1c673SSatish Balay     } else {
4914c1ff481SSatish Balay       int       k;
4923eda8832SBarry Smith       MatScalar *buf1,*buf2;
4934c1ff481SSatish Balay       buf1 = svalues+bs2*startv[j];
4944c1ff481SSatish Balay       buf2 = stash->array+bs2*i;
4954c1ff481SSatish Balay       for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
496a2d1c673SSatish Balay     }
497bc5ccf88SSatish Balay     sindices[starti[j]]               = stash->idx[i];
498c1dc657dSBarry Smith     sindices[starti[j]+nprocs[2*j]]   = stash->idy[i];
499bc5ccf88SSatish Balay     startv[j]++;
500bc5ccf88SSatish Balay     starti[j]++;
501bc5ccf88SSatish Balay   }
502bc5ccf88SSatish Balay   startv[0] = 0;
503c1dc657dSBarry Smith   for (i=1; i<size; i++) { startv[i] = startv[i-1] + nprocs[2*i-2];}
504bc5ccf88SSatish Balay   for (i=0,count=0; i<size; i++) {
505c1dc657dSBarry Smith     if (nprocs[2*i+1]) {
506c1dc657dSBarry Smith       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nprocs[2*i],MPIU_MATSCALAR,i,tag1,comm,
507bc5ccf88SSatish Balay                        send_waits+count++);CHKERRQ(ierr);
508c1dc657dSBarry Smith       ierr = MPI_Isend(sindices+2*startv[i],2*nprocs[2*i],MPI_INT,i,tag2,comm,
509bc5ccf88SSatish Balay                        send_waits+count++);CHKERRQ(ierr);
510bc5ccf88SSatish Balay     }
511bc5ccf88SSatish Balay   }
512606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
513606d414cSSatish Balay   ierr = PetscFree(startv);CHKERRQ(ierr);
514a2d1c673SSatish Balay   /* This memory is reused in scatter end  for a different purpose*/
515a2d1c673SSatish Balay   for (i=0; i<2*size; i++) nprocs[i] = -1;
516a2d1c673SSatish Balay   stash->nprocs      = nprocs;
517a2d1c673SSatish Balay 
518bc5ccf88SSatish Balay   stash->svalues    = svalues;    stash->rvalues    = rvalues;
519bc5ccf88SSatish Balay   stash->nsends     = nsends;     stash->nrecvs     = nreceives;
520bc5ccf88SSatish Balay   stash->send_waits = send_waits; stash->recv_waits = recv_waits;
521bc5ccf88SSatish Balay   stash->rmax       = nmax;
522bc5ccf88SSatish Balay   PetscFunctionReturn(0);
523bc5ccf88SSatish Balay }
524bc5ccf88SSatish Balay 
525a2d1c673SSatish Balay /*
5268798bf22SSatish Balay    MatStashScatterGetMesg_Private - This function waits on the receives posted
5278798bf22SSatish Balay    in the function MatStashScatterBegin_Private() and returns one message at
5284c1ff481SSatish Balay    a time to the calling function. If no messages are left, it indicates this
5294c1ff481SSatish Balay    by setting flg = 0, else it sets flg = 1.
5304c1ff481SSatish Balay 
5314c1ff481SSatish Balay    Input Parameters:
5324c1ff481SSatish Balay    stash - the stash
5334c1ff481SSatish Balay 
5344c1ff481SSatish Balay    Output Parameters:
5354c1ff481SSatish Balay    nvals - the number of entries in the current message.
5364c1ff481SSatish Balay    rows  - an array of row indices (or blocked indices) corresponding to the values
5374c1ff481SSatish Balay    cols  - an array of columnindices (or blocked indices) corresponding to the values
5384c1ff481SSatish Balay    vals  - the values
5394c1ff481SSatish Balay    flg   - 0 indicates no more message left, and the current call has no values associated.
5404c1ff481SSatish Balay            1 indicates that the current call successfully received a message, and the
5414c1ff481SSatish Balay              other output parameters nvals,rows,cols,vals are set appropriately.
542a2d1c673SSatish Balay */
5434a2ae208SSatish Balay #undef __FUNCT__
5444a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterGetMesg_Private"
5453eda8832SBarry Smith int MatStashScatterGetMesg_Private(MatStash *stash,int *nvals,int **rows,int** cols,MatScalar **vals,int *flg)
546bc5ccf88SSatish Balay {
547ccae9161SBarry Smith   int         i,ierr,*flg_v,i1,i2,*rindices,bs2;
548a2d1c673SSatish Balay   MPI_Status  recv_status;
549b0a32e0cSBarry Smith   PetscTruth  match_found = PETSC_FALSE;
550bc5ccf88SSatish Balay 
551bc5ccf88SSatish Balay   PetscFunctionBegin;
552bc5ccf88SSatish Balay 
553a2d1c673SSatish Balay   *flg = 0; /* When a message is discovered this is reset to 1 */
554a2d1c673SSatish Balay   /* Return if no more messages to process */
555a2d1c673SSatish Balay   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
556a2d1c673SSatish Balay 
557a2d1c673SSatish Balay   flg_v = stash->nprocs;
5584c1ff481SSatish Balay   bs2   = stash->bs*stash->bs;
559a2d1c673SSatish Balay   /* If a matching pair of receieves are found, process them, and return the data to
560a2d1c673SSatish Balay      the calling function. Until then keep receiving messages */
561a2d1c673SSatish Balay   while (!match_found) {
562a2d1c673SSatish Balay     ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
563a2d1c673SSatish Balay     /* Now pack the received message into a structure which is useable by others */
564a2d1c673SSatish Balay     if (i % 2) {
565a2d1c673SSatish Balay       ierr = MPI_Get_count(&recv_status,MPI_INT,nvals);CHKERRQ(ierr);
566c1dc657dSBarry Smith       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
567a2d1c673SSatish Balay       *nvals = *nvals/2; /* This message has both row indices and col indices */
568a2d1c673SSatish Balay     } else {
5693eda8832SBarry Smith       ierr = MPI_Get_count(&recv_status,MPIU_MATSCALAR,nvals);CHKERRQ(ierr);
570c1dc657dSBarry Smith       flg_v[2*recv_status.MPI_SOURCE] = i/2;
571a2d1c673SSatish Balay       *nvals = *nvals/bs2;
572bc5ccf88SSatish Balay     }
573a2d1c673SSatish Balay 
574a2d1c673SSatish Balay     /* Check if we have both the messages from this proc */
575c1dc657dSBarry Smith     i1 = flg_v[2*recv_status.MPI_SOURCE];
576c1dc657dSBarry Smith     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
577a2d1c673SSatish Balay     if (i1 != -1 && i2 != -1) {
578a2d1c673SSatish Balay       rindices    = (int*)(stash->rvalues + bs2*stash->rmax*stash->nrecvs);
579a2d1c673SSatish Balay       *rows       = rindices + 2*i2*stash->rmax;
580a2d1c673SSatish Balay       *cols       = *rows + *nvals;
581a2d1c673SSatish Balay       *vals       = stash->rvalues + i1*bs2*stash->rmax;
582a2d1c673SSatish Balay       *flg        = 1;
583a2d1c673SSatish Balay       stash->nprocessed ++;
58435d8aa7fSBarry Smith       match_found = PETSC_TRUE;
585bc5ccf88SSatish Balay     }
586bc5ccf88SSatish Balay   }
587bc5ccf88SSatish Balay   PetscFunctionReturn(0);
588bc5ccf88SSatish Balay }
589