xref: /petsc/src/mat/utils/matstash.c (revision ae15b995b5732fffd2de5a75cf61ef7190c6fef1)
1be1d678aSKris Buschelman #define PETSCMAT_DLL
22d5177cdSBarry Smith 
370f55243SBarry Smith #include "src/mat/matimpl.h"
49417f4adSLois Curfman McInnes 
53eda8832SBarry Smith /*
60ae3cd3bSBarry Smith        The input to the stash is ALWAYS in MatScalar precision, and the
70ae3cd3bSBarry Smith     internal storage and output is also in MatScalar.
83eda8832SBarry Smith */
9bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE   10000
104c1ff481SSatish Balay 
119417f4adSLois Curfman McInnes /*
128798bf22SSatish Balay   MatStashCreate_Private - Creates a stash,currently used for all the parallel
134c1ff481SSatish Balay   matrix implementations. The stash is where elements of a matrix destined
144c1ff481SSatish Balay   to be stored on other processors are kept until matrix assembly is done.
159417f4adSLois Curfman McInnes 
164c1ff481SSatish Balay   This is a simple minded stash. Simply adds entries to end of stash.
174c1ff481SSatish Balay 
184c1ff481SSatish Balay   Input Parameters:
194c1ff481SSatish Balay   comm - communicator, required for scatters.
204c1ff481SSatish Balay   bs   - stash block size. used when stashing blocks of values
214c1ff481SSatish Balay 
224c1ff481SSatish Balay   Output Parameters:
234c1ff481SSatish Balay   stash    - the newly created stash
249417f4adSLois Curfman McInnes */
254a2ae208SSatish Balay #undef __FUNCT__
264a2ae208SSatish Balay #define __FUNCT__ "MatStashCreate_Private"
27c1ac3661SBarry Smith PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
289417f4adSLois Curfman McInnes {
29dfbe8321SBarry Smith   PetscErrorCode ierr;
30c1ac3661SBarry Smith   PetscInt       max,*opt,nopt;
31f1af5d2fSBarry Smith   PetscTruth     flg;
32bc5ccf88SSatish Balay 
333a40ed3dSBarry Smith   PetscFunctionBegin;
34bc5ccf88SSatish Balay   /* Require 2 tags,get the second using PetscCommGetNewTag() */
35752ec6e0SSatish Balay   stash->comm = comm;
36752ec6e0SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
37a2d1c673SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
38a2d1c673SSatish Balay   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
39a2d1c673SSatish Balay   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
40bc5ccf88SSatish Balay 
41434d7ff9SSatish Balay   nopt = stash->size;
42d7d82daaSBarry Smith   ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr);
43b0a32e0cSBarry Smith   ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
44434d7ff9SSatish Balay   if (flg) {
45434d7ff9SSatish Balay     if (nopt == 1)                max = opt[0];
46434d7ff9SSatish Balay     else if (nopt == stash->size) max = opt[stash->rank];
47434d7ff9SSatish Balay     else if (stash->rank < nopt)  max = opt[stash->rank];
48f4ab19daSSatish Balay     else                          max = 0; /* Use default */
49434d7ff9SSatish Balay     stash->umax = max;
50434d7ff9SSatish Balay   } else {
51434d7ff9SSatish Balay     stash->umax = 0;
52434d7ff9SSatish Balay   }
53606d414cSSatish Balay   ierr = PetscFree(opt);CHKERRQ(ierr);
544c1ff481SSatish Balay   if (bs <= 0) bs = 1;
55a2d1c673SSatish Balay 
564c1ff481SSatish Balay   stash->bs       = bs;
579417f4adSLois Curfman McInnes   stash->nmax     = 0;
58434d7ff9SSatish Balay   stash->oldnmax  = 0;
599417f4adSLois Curfman McInnes   stash->n        = 0;
604c1ff481SSatish Balay   stash->reallocs = -1;
619417f4adSLois Curfman McInnes   stash->idx      = 0;
629417f4adSLois Curfman McInnes   stash->idy      = 0;
63bc5ccf88SSatish Balay   stash->array    = 0;
649417f4adSLois Curfman McInnes 
65bc5ccf88SSatish Balay   stash->send_waits  = 0;
66bc5ccf88SSatish Balay   stash->recv_waits  = 0;
67a2d1c673SSatish Balay   stash->send_status = 0;
68bc5ccf88SSatish Balay   stash->nsends      = 0;
69bc5ccf88SSatish Balay   stash->nrecvs      = 0;
70bc5ccf88SSatish Balay   stash->svalues     = 0;
71bc5ccf88SSatish Balay   stash->rvalues     = 0;
72563fb871SSatish Balay   stash->rindices    = 0;
73a2d1c673SSatish Balay   stash->nprocs      = 0;
74a2d1c673SSatish Balay   stash->nprocessed  = 0;
753a40ed3dSBarry Smith   PetscFunctionReturn(0);
769417f4adSLois Curfman McInnes }
779417f4adSLois Curfman McInnes 
784c1ff481SSatish Balay /*
798798bf22SSatish Balay    MatStashDestroy_Private - Destroy the stash
804c1ff481SSatish Balay */
814a2ae208SSatish Balay #undef __FUNCT__
824a2ae208SSatish Balay #define __FUNCT__ "MatStashDestroy_Private"
83dfbe8321SBarry Smith PetscErrorCode MatStashDestroy_Private(MatStash *stash)
849417f4adSLois Curfman McInnes {
85dfbe8321SBarry Smith   PetscErrorCode ierr;
86a2d1c673SSatish Balay 
87bc5ccf88SSatish Balay   PetscFunctionBegin;
88606d414cSSatish Balay   if (stash->array) {
89606d414cSSatish Balay     ierr = PetscFree(stash->array);CHKERRQ(ierr);
90606d414cSSatish Balay     stash->array = 0;
91606d414cSSatish Balay   }
92bc5ccf88SSatish Balay   PetscFunctionReturn(0);
93bc5ccf88SSatish Balay }
94bc5ccf88SSatish Balay 
954c1ff481SSatish Balay /*
968798bf22SSatish Balay    MatStashScatterEnd_Private - This is called as the fial stage of
974c1ff481SSatish Balay    scatter. The final stages of messagepassing is done here, and
984c1ff481SSatish Balay    all the memory used for messagepassing is cleanedu up. This
994c1ff481SSatish Balay    routine also resets the stash, and deallocates the memory used
1004c1ff481SSatish Balay    for the stash. It also keeps track of the current memory usage
1014c1ff481SSatish Balay    so that the same value can be used the next time through.
1024c1ff481SSatish Balay */
1034a2ae208SSatish Balay #undef __FUNCT__
1044a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterEnd_Private"
105dfbe8321SBarry Smith PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
106bc5ccf88SSatish Balay {
1076849ba73SBarry Smith   PetscErrorCode ierr;
1086849ba73SBarry Smith   int         nsends=stash->nsends,bs2,oldnmax;
109a2d1c673SSatish Balay   MPI_Status  *send_status;
110a2d1c673SSatish Balay 
1113a40ed3dSBarry Smith   PetscFunctionBegin;
112a2d1c673SSatish Balay   /* wait on sends */
113a2d1c673SSatish Balay   if (nsends) {
11482502324SSatish Balay     ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
115a2d1c673SSatish Balay     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
116606d414cSSatish Balay     ierr = PetscFree(send_status);CHKERRQ(ierr);
117a2d1c673SSatish Balay   }
118a2d1c673SSatish Balay 
119c0c58ca7SSatish Balay   /* Now update nmaxold to be app 10% more than max n used, this way the
120434d7ff9SSatish Balay      wastage of space is reduced the next time this stash is used.
121434d7ff9SSatish Balay      Also update the oldmax, only if it increases */
122b9b97703SBarry Smith   if (stash->n) {
12394b769a5SSatish Balay     bs2      = stash->bs*stash->bs;
1248a9378f0SSatish Balay     oldnmax  = ((int)(stash->n * 1.1) + 5)*bs2;
125434d7ff9SSatish Balay     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
126b9b97703SBarry Smith   }
127434d7ff9SSatish Balay 
128d07ff455SSatish Balay   stash->nmax       = 0;
129d07ff455SSatish Balay   stash->n          = 0;
1304c1ff481SSatish Balay   stash->reallocs   = -1;
131a2d1c673SSatish Balay   stash->nprocessed = 0;
132bc5ccf88SSatish Balay 
133bc5ccf88SSatish Balay   if (stash->array) {
134606d414cSSatish Balay     ierr         = PetscFree(stash->array);CHKERRQ(ierr);
135bc5ccf88SSatish Balay     stash->array = 0;
136bc5ccf88SSatish Balay     stash->idx   = 0;
137bc5ccf88SSatish Balay     stash->idy   = 0;
138bc5ccf88SSatish Balay   }
139606d414cSSatish Balay   if (stash->send_waits) {
140606d414cSSatish Balay     ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
141606d414cSSatish Balay     stash->send_waits = 0;
142606d414cSSatish Balay   }
143606d414cSSatish Balay   if (stash->recv_waits) {
144606d414cSSatish Balay     ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
145606d414cSSatish Balay     stash->recv_waits = 0;
146606d414cSSatish Balay   }
147606d414cSSatish Balay   if (stash->svalues) {
148606d414cSSatish Balay     ierr = PetscFree(stash->svalues);CHKERRQ(ierr);
149606d414cSSatish Balay     stash->svalues = 0;
150606d414cSSatish Balay   }
151606d414cSSatish Balay   if (stash->rvalues) {
152606d414cSSatish Balay     ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
153606d414cSSatish Balay     stash->rvalues = 0;
154606d414cSSatish Balay   }
155563fb871SSatish Balay   if (stash->rindices) {
156563fb871SSatish Balay     ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
157563fb871SSatish Balay     stash->rindices = 0;
158563fb871SSatish Balay   }
159606d414cSSatish Balay   if (stash->nprocs) {
160b22afee1SSatish Balay     ierr = PetscFree(stash->nprocs);CHKERRQ(ierr);
161606d414cSSatish Balay     stash->nprocs = 0;
162606d414cSSatish Balay   }
163bc5ccf88SSatish Balay 
1643a40ed3dSBarry Smith   PetscFunctionReturn(0);
1659417f4adSLois Curfman McInnes }
1669417f4adSLois Curfman McInnes 
1674c1ff481SSatish Balay /*
1688798bf22SSatish Balay    MatStashGetInfo_Private - Gets the relavant statistics of the stash
1694c1ff481SSatish Balay 
1704c1ff481SSatish Balay    Input Parameters:
1714c1ff481SSatish Balay    stash    - the stash
17294b769a5SSatish Balay    nstash   - the size of the stash. Indicates the number of values stored.
1734c1ff481SSatish Balay    reallocs - the number of additional mallocs incurred.
1744c1ff481SSatish Balay 
1754c1ff481SSatish Balay */
1764a2ae208SSatish Balay #undef __FUNCT__
1774a2ae208SSatish Balay #define __FUNCT__ "MatStashGetInfo_Private"
178c1ac3661SBarry Smith PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
17997530c3fSBarry Smith {
180c1ac3661SBarry Smith   PetscInt bs2 = stash->bs*stash->bs;
18194b769a5SSatish Balay 
1823a40ed3dSBarry Smith   PetscFunctionBegin;
1831ecfd215SBarry Smith   if (nstash) *nstash   = stash->n*bs2;
1841ecfd215SBarry Smith   if (reallocs) {
185434d7ff9SSatish Balay     if (stash->reallocs < 0) *reallocs = 0;
186434d7ff9SSatish Balay     else                     *reallocs = stash->reallocs;
1871ecfd215SBarry Smith   }
188bc5ccf88SSatish Balay   PetscFunctionReturn(0);
189bc5ccf88SSatish Balay }
1904c1ff481SSatish Balay 
1914c1ff481SSatish Balay 
1924c1ff481SSatish Balay /*
1938798bf22SSatish Balay    MatStashSetInitialSize_Private - Sets the initial size of the stash
1944c1ff481SSatish Balay 
1954c1ff481SSatish Balay    Input Parameters:
1964c1ff481SSatish Balay    stash  - the stash
1974c1ff481SSatish Balay    max    - the value that is used as the max size of the stash.
1984c1ff481SSatish Balay             this value is used while allocating memory.
1994c1ff481SSatish Balay */
2004a2ae208SSatish Balay #undef __FUNCT__
2014a2ae208SSatish Balay #define __FUNCT__ "MatStashSetInitialSize_Private"
202c1ac3661SBarry Smith PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
203bc5ccf88SSatish Balay {
204bc5ccf88SSatish Balay   PetscFunctionBegin;
205434d7ff9SSatish Balay   stash->umax = max;
2063a40ed3dSBarry Smith   PetscFunctionReturn(0);
20797530c3fSBarry Smith }
20897530c3fSBarry Smith 
2098798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called
2104c1ff481SSatish Balay    when the space in the stash is not sufficient to add the new values
2114c1ff481SSatish Balay    being inserted into the stash.
2124c1ff481SSatish Balay 
2134c1ff481SSatish Balay    Input Parameters:
2144c1ff481SSatish Balay    stash - the stash
2154c1ff481SSatish Balay    incr  - the minimum increase requested
2164c1ff481SSatish Balay 
2174c1ff481SSatish Balay    Notes:
2184c1ff481SSatish Balay    This routine doubles the currently used memory.
2194c1ff481SSatish Balay  */
2204a2ae208SSatish Balay #undef __FUNCT__
2214a2ae208SSatish Balay #define __FUNCT__ "MatStashExpand_Private"
222c1ac3661SBarry Smith static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
2239417f4adSLois Curfman McInnes {
2246849ba73SBarry Smith   PetscErrorCode ierr;
225c1ac3661SBarry Smith   PetscInt       *n_idx,*n_idy,newnmax,bs2;
2263eda8832SBarry Smith   MatScalar *n_array;
2279417f4adSLois Curfman McInnes 
2283a40ed3dSBarry Smith   PetscFunctionBegin;
2299417f4adSLois Curfman McInnes   /* allocate a larger stash */
23094b769a5SSatish Balay   bs2     = stash->bs*stash->bs;
231c481ceb5SSatish Balay   if (!stash->oldnmax && !stash->nmax) { /* new stash */
232434d7ff9SSatish Balay     if (stash->umax)                  newnmax = stash->umax/bs2;
233434d7ff9SSatish Balay     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
234c481ceb5SSatish Balay   } else if (!stash->nmax) { /* resuing stash */
235434d7ff9SSatish Balay     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
236434d7ff9SSatish Balay     else                              newnmax = stash->oldnmax/bs2;
237434d7ff9SSatish Balay   } else                              newnmax = stash->nmax*2;
2384c1ff481SSatish Balay   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
239d07ff455SSatish Balay 
240c1ac3661SBarry Smith   ierr  = PetscMalloc((newnmax)*(2*sizeof(PetscInt)+bs2*sizeof(MatScalar)),&n_array);CHKERRQ(ierr);
241c1ac3661SBarry Smith   n_idx = (PetscInt*)(n_array + bs2*newnmax);
242c1ac3661SBarry Smith   n_idy = (PetscInt*)(n_idx + newnmax);
2433eda8832SBarry Smith   ierr  = PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(MatScalar));CHKERRQ(ierr);
244c1ac3661SBarry Smith   ierr  = PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(PetscInt));CHKERRQ(ierr);
245c1ac3661SBarry Smith   ierr  = PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(PetscInt));CHKERRQ(ierr);
246606d414cSSatish Balay   if (stash->array) {ierr = PetscFree(stash->array);CHKERRQ(ierr);}
247d07ff455SSatish Balay   stash->array   = n_array;
248d07ff455SSatish Balay   stash->idx     = n_idx;
249d07ff455SSatish Balay   stash->idy     = n_idy;
250d07ff455SSatish Balay   stash->nmax    = newnmax;
251bc5ccf88SSatish Balay   stash->reallocs++;
252bc5ccf88SSatish Balay   PetscFunctionReturn(0);
253bc5ccf88SSatish Balay }
254bc5ccf88SSatish Balay /*
2558798bf22SSatish Balay   MatStashValuesRow_Private - inserts values into the stash. This function
2564c1ff481SSatish Balay   expects the values to be roworiented. Multiple columns belong to the same row
2574c1ff481SSatish Balay   can be inserted with a single call to this function.
2584c1ff481SSatish Balay 
2594c1ff481SSatish Balay   Input Parameters:
2604c1ff481SSatish Balay   stash  - the stash
2614c1ff481SSatish Balay   row    - the global row correspoiding to the values
2624c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
2634c1ff481SSatish Balay   idxn   - the global column indices corresponding to each of the values.
2644c1ff481SSatish Balay   values - the values inserted
265bc5ccf88SSatish Balay */
2664a2ae208SSatish Balay #undef __FUNCT__
2674a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRow_Private"
268c1ac3661SBarry Smith PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[])
269bc5ccf88SSatish Balay {
270dfbe8321SBarry Smith   PetscErrorCode ierr;
271c1ac3661SBarry Smith   PetscInt i;
272bc5ccf88SSatish Balay 
273bc5ccf88SSatish Balay   PetscFunctionBegin;
2744c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
2754c1ff481SSatish Balay   if ((stash->n + n) > stash->nmax) {
2768798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
2779417f4adSLois Curfman McInnes   }
2784c1ff481SSatish Balay   for (i=0; i<n; i++) {
2799417f4adSLois Curfman McInnes     stash->idx[stash->n]   = row;
280a2d1c673SSatish Balay     stash->idy[stash->n]   = idxn[i];
2810ae3cd3bSBarry Smith     stash->array[stash->n] = values[i];
282a2d1c673SSatish Balay     stash->n++;
2839417f4adSLois Curfman McInnes   }
284a2d1c673SSatish Balay   PetscFunctionReturn(0);
285a2d1c673SSatish Balay }
2864c1ff481SSatish Balay /*
2878798bf22SSatish Balay   MatStashValuesCol_Private - inserts values into the stash. This function
2884c1ff481SSatish Balay   expects the values to be columnoriented. Multiple columns belong to the same row
2894c1ff481SSatish Balay   can be inserted with a single call to this function.
290a2d1c673SSatish Balay 
2914c1ff481SSatish Balay   Input Parameters:
2924c1ff481SSatish Balay   stash   - the stash
2934c1ff481SSatish Balay   row     - the global row correspoiding to the values
2944c1ff481SSatish Balay   n       - the number of elements inserted. All elements belong to the above row.
2954c1ff481SSatish Balay   idxn    - the global column indices corresponding to each of the values.
2964c1ff481SSatish Balay   values  - the values inserted
2974c1ff481SSatish Balay   stepval - the consecutive values are sepated by a distance of stepval.
2984c1ff481SSatish Balay             this happens because the input is columnoriented.
2994c1ff481SSatish Balay */
3004a2ae208SSatish Balay #undef __FUNCT__
3014a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesCol_Private"
302c1ac3661SBarry Smith PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt stepval)
303a2d1c673SSatish Balay {
304dfbe8321SBarry Smith   PetscErrorCode ierr;
305c1ac3661SBarry Smith   PetscInt i;
306a2d1c673SSatish Balay 
3074c1ff481SSatish Balay   PetscFunctionBegin;
3084c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
3094c1ff481SSatish Balay   if ((stash->n + n) > stash->nmax) {
3108798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
3114c1ff481SSatish Balay   }
3124c1ff481SSatish Balay   for (i=0; i<n; i++) {
3134c1ff481SSatish Balay     stash->idx[stash->n]   = row;
3144c1ff481SSatish Balay     stash->idy[stash->n]   = idxn[i];
3150ae3cd3bSBarry Smith     stash->array[stash->n] = values[i*stepval];
3164c1ff481SSatish Balay     stash->n++;
3174c1ff481SSatish Balay   }
3184c1ff481SSatish Balay   PetscFunctionReturn(0);
3194c1ff481SSatish Balay }
3204c1ff481SSatish Balay 
3214c1ff481SSatish Balay /*
3228798bf22SSatish Balay   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
3234c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3244c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3254c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3264c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3274c1ff481SSatish Balay 
3284c1ff481SSatish Balay   Input Parameters:
3294c1ff481SSatish Balay   stash  - the stash
3304c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3314c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3324c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3334c1ff481SSatish Balay            values. Each block is of size bs*bs.
3344c1ff481SSatish Balay   values - the values inserted
3354c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3364c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3374c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3384c1ff481SSatish Balay */
3394a2ae208SSatish Balay #undef __FUNCT__
3404a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRowBlocked_Private"
341c1ac3661SBarry Smith PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
3424c1ff481SSatish Balay {
343dfbe8321SBarry Smith   PetscErrorCode ierr;
344c1ac3661SBarry Smith   PetscInt i,j,k,bs2,bs=stash->bs;
345f15d580aSBarry Smith   const MatScalar *vals;
346f15d580aSBarry Smith   MatScalar       *array;
347a2d1c673SSatish Balay 
348a2d1c673SSatish Balay   PetscFunctionBegin;
349a2d1c673SSatish Balay   bs2 = bs*bs;
3504c1ff481SSatish Balay   if ((stash->n+n) > stash->nmax) {
3518798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
352a2d1c673SSatish Balay   }
3534c1ff481SSatish Balay   for (i=0; i<n; i++) {
354a2d1c673SSatish Balay     stash->idx[stash->n]   = row;
355a2d1c673SSatish Balay     stash->idy[stash->n] = idxn[i];
356a2d1c673SSatish Balay     /* Now copy over the block of values. Store the values column oriented.
357a2d1c673SSatish Balay        This enables inserting multiple blocks belonging to a row with a single
358a2d1c673SSatish Balay        funtion call */
359a2d1c673SSatish Balay     array = stash->array + bs2*stash->n;
360a2d1c673SSatish Balay     vals  = values + idx*bs2*n + bs*i;
361a2d1c673SSatish Balay     for (j=0; j<bs; j++) {
3620ae3cd3bSBarry Smith       for (k=0; k<bs; k++) {array[k*bs] = vals[k];}
363a2d1c673SSatish Balay       array += 1;
364a2d1c673SSatish Balay       vals  += cmax*bs;
365a2d1c673SSatish Balay     }
3664c1ff481SSatish Balay     stash->n++;
3674c1ff481SSatish Balay   }
3684c1ff481SSatish Balay   PetscFunctionReturn(0);
3694c1ff481SSatish Balay }
3704c1ff481SSatish Balay 
3714c1ff481SSatish Balay /*
3728798bf22SSatish Balay   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
3734c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3744c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3754c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3764c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3774c1ff481SSatish Balay 
3784c1ff481SSatish Balay   Input Parameters:
3794c1ff481SSatish Balay   stash  - the stash
3804c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3814c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3824c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3834c1ff481SSatish Balay            values. Each block is of size bs*bs.
3844c1ff481SSatish Balay   values - the values inserted
3854c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3864c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3874c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3884c1ff481SSatish Balay */
3894a2ae208SSatish Balay #undef __FUNCT__
3904a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesColBlocked_Private"
391c1ac3661SBarry Smith PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
3924c1ff481SSatish Balay {
393dfbe8321SBarry Smith   PetscErrorCode ierr;
394c1ac3661SBarry Smith   PetscInt i,j,k,bs2,bs=stash->bs;
395f15d580aSBarry Smith   const MatScalar *vals;
396f15d580aSBarry Smith   MatScalar       *array;
3974c1ff481SSatish Balay 
3984c1ff481SSatish Balay   PetscFunctionBegin;
3994c1ff481SSatish Balay   bs2 = bs*bs;
4004c1ff481SSatish Balay   if ((stash->n+n) > stash->nmax) {
4018798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
4024c1ff481SSatish Balay   }
4034c1ff481SSatish Balay   for (i=0; i<n; i++) {
4044c1ff481SSatish Balay     stash->idx[stash->n]   = row;
4054c1ff481SSatish Balay     stash->idy[stash->n] = idxn[i];
4064c1ff481SSatish Balay     /* Now copy over the block of values. Store the values column oriented.
4074c1ff481SSatish Balay      This enables inserting multiple blocks belonging to a row with a single
4084c1ff481SSatish Balay      funtion call */
409a2d1c673SSatish Balay     array = stash->array + bs2*stash->n;
410a2d1c673SSatish Balay     vals  = values + idx*bs + bs2*rmax*i;
411a2d1c673SSatish Balay     for (j=0; j<bs; j++) {
4120ae3cd3bSBarry Smith       for (k=0; k<bs; k++) {array[k] = vals[k];}
413a2d1c673SSatish Balay       array += bs;
414a2d1c673SSatish Balay       vals  += rmax*bs;
415a2d1c673SSatish Balay     }
416a2d1c673SSatish Balay     stash->n++;
4179417f4adSLois Curfman McInnes   }
4183a40ed3dSBarry Smith   PetscFunctionReturn(0);
4199417f4adSLois Curfman McInnes }
4204c1ff481SSatish Balay /*
4218798bf22SSatish Balay   MatStashScatterBegin_Private - Initiates the transfer of values to the
4224c1ff481SSatish Balay   correct owners. This function goes through the stash, and check the
4234c1ff481SSatish Balay   owners of each stashed value, and sends the values off to the owner
4244c1ff481SSatish Balay   processors.
425bc5ccf88SSatish Balay 
4264c1ff481SSatish Balay   Input Parameters:
4274c1ff481SSatish Balay   stash  - the stash
4284c1ff481SSatish Balay   owners - an array of size 'no-of-procs' which gives the ownership range
4294c1ff481SSatish Balay            for each node.
4304c1ff481SSatish Balay 
4314c1ff481SSatish Balay   Notes: The 'owners' array in the cased of the blocked-stash has the
4324c1ff481SSatish Balay   ranges specified blocked global indices, and for the regular stash in
4334c1ff481SSatish Balay   the proper global indices.
4344c1ff481SSatish Balay */
4354a2ae208SSatish Balay #undef __FUNCT__
4364a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterBegin_Private"
437c1ac3661SBarry Smith PetscErrorCode MatStashScatterBegin_Private(MatStash *stash,PetscInt *owners)
438bc5ccf88SSatish Balay {
439c1ac3661SBarry Smith   PetscInt       *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
440fe09c992SBarry Smith   PetscInt       size=stash->size,nsends;
4416849ba73SBarry Smith   PetscErrorCode ierr;
4421b27d27dSSatish Balay   PetscInt       count,*sindices,**rindices,i,j,idx,lastidx;
443563fb871SSatish Balay   MatScalar      **rvalues,*svalues;
444bc5ccf88SSatish Balay   MPI_Comm       comm = stash->comm;
445563fb871SSatish Balay   MPI_Request    *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
446fe09c992SBarry Smith   PetscMPIInt    *nprocs,*nlengths,nreceives;
447bc5ccf88SSatish Balay 
448bc5ccf88SSatish Balay   PetscFunctionBegin;
449bc5ccf88SSatish Balay 
4504c1ff481SSatish Balay   bs2   = stash->bs*stash->bs;
451bc5ccf88SSatish Balay   /*  first count number of contributors to each processor */
452fe09c992SBarry Smith   ierr  = PetscMalloc(2*size*sizeof(PetscMPIInt),&nprocs);CHKERRQ(ierr);
453fe09c992SBarry Smith   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscMPIInt));CHKERRQ(ierr);
454c1ac3661SBarry Smith   ierr  = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);
455a2d1c673SSatish Balay 
456563fb871SSatish Balay   nlengths = nprocs+size;
4577357eb19SBarry Smith   j        = 0;
4587357eb19SBarry Smith   lastidx  = -1;
459bc5ccf88SSatish Balay   for (i=0; i<stash->n; i++) {
4607357eb19SBarry Smith     /* if indices are NOT locally sorted, need to start search at the beginning */
4617357eb19SBarry Smith     if (lastidx > (idx = stash->idx[i])) j = 0;
4627357eb19SBarry Smith     lastidx = idx;
4637357eb19SBarry Smith     for (; j<size; j++) {
4644c1ff481SSatish Balay       if (idx >= owners[j] && idx < owners[j+1]) {
465563fb871SSatish Balay         nlengths[j]++; owner[i] = j; break;
466bc5ccf88SSatish Balay       }
467bc5ccf88SSatish Balay     }
468bc5ccf88SSatish Balay   }
469563fb871SSatish Balay   /* Now check what procs get messages - and compute nsends. */
470563fb871SSatish Balay   for (i=0, nsends=0 ; i<size; i++) {
471563fb871SSatish Balay     if (nlengths[i]) { nprocs[i] = 1; nsends ++;}
472563fb871SSatish Balay   }
473bc5ccf88SSatish Balay 
474563fb871SSatish Balay   { int  *onodes,*olengths;
475563fb871SSatish Balay   /* Determine the number of messages to expect, their lengths, from from-ids */
476563fb871SSatish Balay   ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr);
477563fb871SSatish Balay   ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
478563fb871SSatish Balay   /* since clubbing row,col - lengths are multiplied by 2 */
479563fb871SSatish Balay   for (i=0; i<nreceives; i++) olengths[i] *=2;
480563fb871SSatish Balay   ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
481563fb871SSatish Balay   /* values are size 'bs2' lengths (and remove earlier factor 2 */
482563fb871SSatish Balay   for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
483563fb871SSatish Balay   ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
484563fb871SSatish Balay   ierr = PetscFree(onodes);CHKERRQ(ierr);
485563fb871SSatish Balay   ierr = PetscFree(olengths);CHKERRQ(ierr);
486bc5ccf88SSatish Balay   }
487bc5ccf88SSatish Balay 
488bc5ccf88SSatish Balay   /* do sends:
489bc5ccf88SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
490bc5ccf88SSatish Balay          the ith processor
491bc5ccf88SSatish Balay   */
492c1ac3661SBarry Smith   ierr     = PetscMalloc((stash->n+1)*(bs2*sizeof(MatScalar)+2*sizeof(PetscInt)),&svalues);CHKERRQ(ierr);
493c1ac3661SBarry Smith   sindices = (PetscInt*)(svalues + bs2*stash->n);
494b0a32e0cSBarry Smith   ierr     = PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
495c1ac3661SBarry Smith   ierr     = PetscMalloc(2*size*sizeof(PetscInt),&startv);CHKERRQ(ierr);
496bc5ccf88SSatish Balay   starti   = startv + size;
497a2d1c673SSatish Balay   /* use 2 sends the first with all_a, the next with all_i and all_j */
498bc5ccf88SSatish Balay   startv[0]  = 0; starti[0] = 0;
499bc5ccf88SSatish Balay   for (i=1; i<size; i++) {
500563fb871SSatish Balay     startv[i] = startv[i-1] + nlengths[i-1];
501563fb871SSatish Balay     starti[i] = starti[i-1] + nlengths[i-1]*2;
502bc5ccf88SSatish Balay   }
503bc5ccf88SSatish Balay   for (i=0; i<stash->n; i++) {
504bc5ccf88SSatish Balay     j = owner[i];
505a2d1c673SSatish Balay     if (bs2 == 1) {
506bc5ccf88SSatish Balay       svalues[startv[j]]              = stash->array[i];
507a2d1c673SSatish Balay     } else {
508c1ac3661SBarry Smith       PetscInt       k;
5093eda8832SBarry Smith       MatScalar *buf1,*buf2;
5104c1ff481SSatish Balay       buf1 = svalues+bs2*startv[j];
5114c1ff481SSatish Balay       buf2 = stash->array+bs2*i;
5124c1ff481SSatish Balay       for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
513a2d1c673SSatish Balay     }
514bc5ccf88SSatish Balay     sindices[starti[j]]               = stash->idx[i];
515563fb871SSatish Balay     sindices[starti[j]+nlengths[j]]   = stash->idy[i];
516bc5ccf88SSatish Balay     startv[j]++;
517bc5ccf88SSatish Balay     starti[j]++;
518bc5ccf88SSatish Balay   }
519bc5ccf88SSatish Balay   startv[0] = 0;
520563fb871SSatish Balay   for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];}
521e5d0e772SSatish Balay 
522bc5ccf88SSatish Balay   for (i=0,count=0; i<size; i++) {
523563fb871SSatish Balay     if (nprocs[i]) {
524563fb871SSatish Balay       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
525563fb871SSatish Balay       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_MATSCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
526bc5ccf88SSatish Balay     }
527b85c94c3SSatish Balay   }
5286cf91177SBarry Smith #if defined(PETSC_USE_INFO)
529*ae15b995SBarry Smith   ierr = PetscInfo1(0,"No of messages: %d \n",nsends);CHKERRQ(ierr);
530e5d0e772SSatish Balay   for (i=0; i<size; i++) {
531e5d0e772SSatish Balay     if (nprocs[i]) {
532*ae15b995SBarry Smith       ierr = PetscInfo2(0,"Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(MatScalar)+2*sizeof(PetscInt));CHKERRQ(ierr);
533e5d0e772SSatish Balay     }
534e5d0e772SSatish Balay   }
535e5d0e772SSatish Balay #endif
536606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
537606d414cSSatish Balay   ierr = PetscFree(startv);CHKERRQ(ierr);
538a2d1c673SSatish Balay   /* This memory is reused in scatter end  for a different purpose*/
539a2d1c673SSatish Balay   for (i=0; i<2*size; i++) nprocs[i] = -1;
540a2d1c673SSatish Balay   stash->nprocs      = nprocs;
541a2d1c673SSatish Balay 
542563fb871SSatish Balay   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
543563fb871SSatish Balay   ierr  = PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
544563fb871SSatish Balay 
545563fb871SSatish Balay   for (i=0; i<nreceives; i++) {
546563fb871SSatish Balay     recv_waits[2*i]   = recv_waits1[i];
547563fb871SSatish Balay     recv_waits[2*i+1] = recv_waits2[i];
548563fb871SSatish Balay   }
549563fb871SSatish Balay   stash->recv_waits = recv_waits;
550563fb871SSatish Balay   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
551563fb871SSatish Balay   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
552563fb871SSatish Balay 
553bc5ccf88SSatish Balay   stash->svalues    = svalues;    stash->rvalues     = rvalues;
554563fb871SSatish Balay   stash->rindices   = rindices;   stash->send_waits  = send_waits;
555bc5ccf88SSatish Balay   stash->nsends     = nsends;     stash->nrecvs      = nreceives;
556bc5ccf88SSatish Balay   PetscFunctionReturn(0);
557bc5ccf88SSatish Balay }
558bc5ccf88SSatish Balay 
559a2d1c673SSatish Balay /*
5608798bf22SSatish Balay    MatStashScatterGetMesg_Private - This function waits on the receives posted
5618798bf22SSatish Balay    in the function MatStashScatterBegin_Private() and returns one message at
5624c1ff481SSatish Balay    a time to the calling function. If no messages are left, it indicates this
5634c1ff481SSatish Balay    by setting flg = 0, else it sets flg = 1.
5644c1ff481SSatish Balay 
5654c1ff481SSatish Balay    Input Parameters:
5664c1ff481SSatish Balay    stash - the stash
5674c1ff481SSatish Balay 
5684c1ff481SSatish Balay    Output Parameters:
5694c1ff481SSatish Balay    nvals - the number of entries in the current message.
5704c1ff481SSatish Balay    rows  - an array of row indices (or blocked indices) corresponding to the values
5714c1ff481SSatish Balay    cols  - an array of columnindices (or blocked indices) corresponding to the values
5724c1ff481SSatish Balay    vals  - the values
5734c1ff481SSatish Balay    flg   - 0 indicates no more message left, and the current call has no values associated.
5744c1ff481SSatish Balay            1 indicates that the current call successfully received a message, and the
5754c1ff481SSatish Balay              other output parameters nvals,rows,cols,vals are set appropriately.
576a2d1c673SSatish Balay */
5774a2ae208SSatish Balay #undef __FUNCT__
5784a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterGetMesg_Private"
579c1ac3661SBarry Smith PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,MatScalar **vals,PetscInt *flg)
580bc5ccf88SSatish Balay {
5816849ba73SBarry Smith   PetscErrorCode ierr;
582fe09c992SBarry Smith   PetscMPIInt    i,*flg_v,i1,i2;
583fe09c992SBarry Smith   PetscInt       bs2;
584a2d1c673SSatish Balay   MPI_Status     recv_status;
585b0a32e0cSBarry Smith   PetscTruth     match_found = PETSC_FALSE;
586bc5ccf88SSatish Balay 
587bc5ccf88SSatish Balay   PetscFunctionBegin;
588bc5ccf88SSatish Balay 
589a2d1c673SSatish Balay   *flg = 0; /* When a message is discovered this is reset to 1 */
590a2d1c673SSatish Balay   /* Return if no more messages to process */
591a2d1c673SSatish Balay   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
592a2d1c673SSatish Balay 
593a2d1c673SSatish Balay   flg_v = stash->nprocs;
5944c1ff481SSatish Balay   bs2   = stash->bs*stash->bs;
595a2d1c673SSatish Balay   /* If a matching pair of receieves are found, process them, and return the data to
596a2d1c673SSatish Balay      the calling function. Until then keep receiving messages */
597a2d1c673SSatish Balay   while (!match_found) {
598a2d1c673SSatish Balay     ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
599a2d1c673SSatish Balay     /* Now pack the received message into a structure which is useable by others */
600a2d1c673SSatish Balay     if (i % 2) {
6013eda8832SBarry Smith       ierr = MPI_Get_count(&recv_status,MPIU_MATSCALAR,nvals);CHKERRQ(ierr);
602c1dc657dSBarry Smith       flg_v[2*recv_status.MPI_SOURCE] = i/2;
603a2d1c673SSatish Balay       *nvals = *nvals/bs2;
604563fb871SSatish Balay     } else {
605563fb871SSatish Balay       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
606563fb871SSatish Balay       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
607563fb871SSatish Balay       *nvals = *nvals/2; /* This message has both row indices and col indices */
608bc5ccf88SSatish Balay     }
609a2d1c673SSatish Balay 
610a2d1c673SSatish Balay     /* Check if we have both the messages from this proc */
611c1dc657dSBarry Smith     i1 = flg_v[2*recv_status.MPI_SOURCE];
612c1dc657dSBarry Smith     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
613a2d1c673SSatish Balay     if (i1 != -1 && i2 != -1) {
614563fb871SSatish Balay       *rows       = stash->rindices[i2];
615a2d1c673SSatish Balay       *cols       = *rows + *nvals;
616563fb871SSatish Balay       *vals       = stash->rvalues[i1];
617a2d1c673SSatish Balay       *flg        = 1;
618a2d1c673SSatish Balay       stash->nprocessed ++;
61935d8aa7fSBarry Smith       match_found = PETSC_TRUE;
620bc5ccf88SSatish Balay     }
621bc5ccf88SSatish Balay   }
622bc5ccf88SSatish Balay   PetscFunctionReturn(0);
623bc5ccf88SSatish Balay }
624