xref: /petsc/src/mat/utils/matstash.c (revision be1d678a52e6eff2808b2fa31ae986cdbf03c9fe)
1*be1d678aSKris Buschelman #define PETSCMAT_DLL
22d5177cdSBarry Smith 
370f55243SBarry Smith #include "src/mat/matimpl.h"
49417f4adSLois Curfman McInnes 
53eda8832SBarry Smith /*
60ae3cd3bSBarry Smith        The input to the stash is ALWAYS in MatScalar precision, and the
70ae3cd3bSBarry Smith     internal storage and output is also in MatScalar.
83eda8832SBarry Smith */
9bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE   10000
104c1ff481SSatish Balay 
119417f4adSLois Curfman McInnes /*
128798bf22SSatish Balay   MatStashCreate_Private - Creates a stash,currently used for all the parallel
134c1ff481SSatish Balay   matrix implementations. The stash is where elements of a matrix destined
144c1ff481SSatish Balay   to be stored on other processors are kept until matrix assembly is done.
159417f4adSLois Curfman McInnes 
164c1ff481SSatish Balay   This is a simple minded stash. Simply adds entries to end of stash.
174c1ff481SSatish Balay 
184c1ff481SSatish Balay   Input Parameters:
194c1ff481SSatish Balay   comm - communicator, required for scatters.
204c1ff481SSatish Balay   bs   - stash block size. used when stashing blocks of values
214c1ff481SSatish Balay 
224c1ff481SSatish Balay   Output Parameters:
234c1ff481SSatish Balay   stash    - the newly created stash
249417f4adSLois Curfman McInnes */
254a2ae208SSatish Balay #undef __FUNCT__
264a2ae208SSatish Balay #define __FUNCT__ "MatStashCreate_Private"
27c1ac3661SBarry Smith PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
289417f4adSLois Curfman McInnes {
29dfbe8321SBarry Smith   PetscErrorCode ierr;
30c1ac3661SBarry Smith   PetscInt       max,*opt,nopt;
31f1af5d2fSBarry Smith   PetscTruth     flg;
32bc5ccf88SSatish Balay 
333a40ed3dSBarry Smith   PetscFunctionBegin;
34bc5ccf88SSatish Balay   /* Require 2 tags,get the second using PetscCommGetNewTag() */
35752ec6e0SSatish Balay   stash->comm = comm;
36752ec6e0SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
37a2d1c673SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
38a2d1c673SSatish Balay   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
39a2d1c673SSatish Balay   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
40bc5ccf88SSatish Balay 
41434d7ff9SSatish Balay   nopt = stash->size;
42d7d82daaSBarry Smith   ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr);
43b0a32e0cSBarry Smith   ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
44434d7ff9SSatish Balay   if (flg) {
45434d7ff9SSatish Balay     if (nopt == 1)                max = opt[0];
46434d7ff9SSatish Balay     else if (nopt == stash->size) max = opt[stash->rank];
47434d7ff9SSatish Balay     else if (stash->rank < nopt)  max = opt[stash->rank];
48f4ab19daSSatish Balay     else                          max = 0; /* Use default */
49434d7ff9SSatish Balay     stash->umax = max;
50434d7ff9SSatish Balay   } else {
51434d7ff9SSatish Balay     stash->umax = 0;
52434d7ff9SSatish Balay   }
53606d414cSSatish Balay   ierr = PetscFree(opt);CHKERRQ(ierr);
544c1ff481SSatish Balay   if (bs <= 0) bs = 1;
55a2d1c673SSatish Balay 
564c1ff481SSatish Balay   stash->bs       = bs;
579417f4adSLois Curfman McInnes   stash->nmax     = 0;
58434d7ff9SSatish Balay   stash->oldnmax  = 0;
599417f4adSLois Curfman McInnes   stash->n        = 0;
604c1ff481SSatish Balay   stash->reallocs = -1;
619417f4adSLois Curfman McInnes   stash->idx      = 0;
629417f4adSLois Curfman McInnes   stash->idy      = 0;
63bc5ccf88SSatish Balay   stash->array    = 0;
649417f4adSLois Curfman McInnes 
65bc5ccf88SSatish Balay   stash->send_waits  = 0;
66bc5ccf88SSatish Balay   stash->recv_waits  = 0;
67a2d1c673SSatish Balay   stash->send_status = 0;
68bc5ccf88SSatish Balay   stash->nsends      = 0;
69bc5ccf88SSatish Balay   stash->nrecvs      = 0;
70bc5ccf88SSatish Balay   stash->svalues     = 0;
71bc5ccf88SSatish Balay   stash->rvalues     = 0;
72bc5ccf88SSatish Balay   stash->rmax        = 0;
73a2d1c673SSatish Balay   stash->nprocs      = 0;
74a2d1c673SSatish Balay   stash->nprocessed  = 0;
753a40ed3dSBarry Smith   PetscFunctionReturn(0);
769417f4adSLois Curfman McInnes }
779417f4adSLois Curfman McInnes 
784c1ff481SSatish Balay /*
798798bf22SSatish Balay    MatStashDestroy_Private - Destroy the stash
804c1ff481SSatish Balay */
814a2ae208SSatish Balay #undef __FUNCT__
824a2ae208SSatish Balay #define __FUNCT__ "MatStashDestroy_Private"
83dfbe8321SBarry Smith PetscErrorCode MatStashDestroy_Private(MatStash *stash)
849417f4adSLois Curfman McInnes {
85dfbe8321SBarry Smith   PetscErrorCode ierr;
86a2d1c673SSatish Balay 
87bc5ccf88SSatish Balay   PetscFunctionBegin;
88606d414cSSatish Balay   if (stash->array) {
89606d414cSSatish Balay     ierr = PetscFree(stash->array);CHKERRQ(ierr);
90606d414cSSatish Balay     stash->array = 0;
91606d414cSSatish Balay   }
92bc5ccf88SSatish Balay   PetscFunctionReturn(0);
93bc5ccf88SSatish Balay }
94bc5ccf88SSatish Balay 
954c1ff481SSatish Balay /*
968798bf22SSatish Balay    MatStashScatterEnd_Private - This is called as the fial stage of
974c1ff481SSatish Balay    scatter. The final stages of messagepassing is done here, and
984c1ff481SSatish Balay    all the memory used for messagepassing is cleanedu up. This
994c1ff481SSatish Balay    routine also resets the stash, and deallocates the memory used
1004c1ff481SSatish Balay    for the stash. It also keeps track of the current memory usage
1014c1ff481SSatish Balay    so that the same value can be used the next time through.
1024c1ff481SSatish Balay */
1034a2ae208SSatish Balay #undef __FUNCT__
1044a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterEnd_Private"
105dfbe8321SBarry Smith PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
106bc5ccf88SSatish Balay {
1076849ba73SBarry Smith   PetscErrorCode ierr;
1086849ba73SBarry Smith   int         nsends=stash->nsends,bs2,oldnmax;
109a2d1c673SSatish Balay   MPI_Status  *send_status;
110a2d1c673SSatish Balay 
1113a40ed3dSBarry Smith   PetscFunctionBegin;
112a2d1c673SSatish Balay   /* wait on sends */
113a2d1c673SSatish Balay   if (nsends) {
11482502324SSatish Balay     ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
115a2d1c673SSatish Balay     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
116606d414cSSatish Balay     ierr = PetscFree(send_status);CHKERRQ(ierr);
117a2d1c673SSatish Balay   }
118a2d1c673SSatish Balay 
119c0c58ca7SSatish Balay   /* Now update nmaxold to be app 10% more than max n used, this way the
120434d7ff9SSatish Balay      wastage of space is reduced the next time this stash is used.
121434d7ff9SSatish Balay      Also update the oldmax, only if it increases */
122b9b97703SBarry Smith   if (stash->n) {
12394b769a5SSatish Balay     bs2      = stash->bs*stash->bs;
1248a9378f0SSatish Balay     oldnmax  = ((int)(stash->n * 1.1) + 5)*bs2;
125434d7ff9SSatish Balay     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
126b9b97703SBarry Smith   }
127434d7ff9SSatish Balay 
128d07ff455SSatish Balay   stash->nmax       = 0;
129d07ff455SSatish Balay   stash->n          = 0;
1304c1ff481SSatish Balay   stash->reallocs   = -1;
131bc5ccf88SSatish Balay   stash->rmax       = 0;
132a2d1c673SSatish Balay   stash->nprocessed = 0;
133bc5ccf88SSatish Balay 
134bc5ccf88SSatish Balay   if (stash->array) {
135606d414cSSatish Balay     ierr         = PetscFree(stash->array);CHKERRQ(ierr);
136bc5ccf88SSatish Balay     stash->array = 0;
137bc5ccf88SSatish Balay     stash->idx   = 0;
138bc5ccf88SSatish Balay     stash->idy   = 0;
139bc5ccf88SSatish Balay   }
140606d414cSSatish Balay   if (stash->send_waits) {
141606d414cSSatish Balay     ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
142606d414cSSatish Balay     stash->send_waits = 0;
143606d414cSSatish Balay   }
144606d414cSSatish Balay   if (stash->recv_waits) {
145606d414cSSatish Balay     ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
146606d414cSSatish Balay     stash->recv_waits = 0;
147606d414cSSatish Balay   }
148606d414cSSatish Balay   if (stash->svalues) {
149606d414cSSatish Balay     ierr = PetscFree(stash->svalues);CHKERRQ(ierr);
150606d414cSSatish Balay     stash->svalues = 0;
151606d414cSSatish Balay   }
152606d414cSSatish Balay   if (stash->rvalues) {
153606d414cSSatish Balay     ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
154606d414cSSatish Balay     stash->rvalues = 0;
155606d414cSSatish Balay   }
156606d414cSSatish Balay   if (stash->nprocs) {
157b22afee1SSatish Balay     ierr = PetscFree(stash->nprocs);CHKERRQ(ierr);
158606d414cSSatish Balay     stash->nprocs = 0;
159606d414cSSatish Balay   }
160bc5ccf88SSatish Balay 
1613a40ed3dSBarry Smith   PetscFunctionReturn(0);
1629417f4adSLois Curfman McInnes }
1639417f4adSLois Curfman McInnes 
1644c1ff481SSatish Balay /*
1658798bf22SSatish Balay    MatStashGetInfo_Private - Gets the relavant statistics of the stash
1664c1ff481SSatish Balay 
1674c1ff481SSatish Balay    Input Parameters:
1684c1ff481SSatish Balay    stash    - the stash
16994b769a5SSatish Balay    nstash   - the size of the stash. Indicates the number of values stored.
1704c1ff481SSatish Balay    reallocs - the number of additional mallocs incurred.
1714c1ff481SSatish Balay 
1724c1ff481SSatish Balay */
1734a2ae208SSatish Balay #undef __FUNCT__
1744a2ae208SSatish Balay #define __FUNCT__ "MatStashGetInfo_Private"
175c1ac3661SBarry Smith PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
17697530c3fSBarry Smith {
177c1ac3661SBarry Smith   PetscInt bs2 = stash->bs*stash->bs;
17894b769a5SSatish Balay 
1793a40ed3dSBarry Smith   PetscFunctionBegin;
1801ecfd215SBarry Smith   if (nstash) *nstash   = stash->n*bs2;
1811ecfd215SBarry Smith   if (reallocs) {
182434d7ff9SSatish Balay     if (stash->reallocs < 0) *reallocs = 0;
183434d7ff9SSatish Balay     else                     *reallocs = stash->reallocs;
1841ecfd215SBarry Smith   }
185bc5ccf88SSatish Balay   PetscFunctionReturn(0);
186bc5ccf88SSatish Balay }
1874c1ff481SSatish Balay 
1884c1ff481SSatish Balay 
1894c1ff481SSatish Balay /*
1908798bf22SSatish Balay    MatStashSetInitialSize_Private - Sets the initial size of the stash
1914c1ff481SSatish Balay 
1924c1ff481SSatish Balay    Input Parameters:
1934c1ff481SSatish Balay    stash  - the stash
1944c1ff481SSatish Balay    max    - the value that is used as the max size of the stash.
1954c1ff481SSatish Balay             this value is used while allocating memory.
1964c1ff481SSatish Balay */
1974a2ae208SSatish Balay #undef __FUNCT__
1984a2ae208SSatish Balay #define __FUNCT__ "MatStashSetInitialSize_Private"
199c1ac3661SBarry Smith PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
200bc5ccf88SSatish Balay {
201bc5ccf88SSatish Balay   PetscFunctionBegin;
202434d7ff9SSatish Balay   stash->umax = max;
2033a40ed3dSBarry Smith   PetscFunctionReturn(0);
20497530c3fSBarry Smith }
20597530c3fSBarry Smith 
2068798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called
2074c1ff481SSatish Balay    when the space in the stash is not sufficient to add the new values
2084c1ff481SSatish Balay    being inserted into the stash.
2094c1ff481SSatish Balay 
2104c1ff481SSatish Balay    Input Parameters:
2114c1ff481SSatish Balay    stash - the stash
2124c1ff481SSatish Balay    incr  - the minimum increase requested
2134c1ff481SSatish Balay 
2144c1ff481SSatish Balay    Notes:
2154c1ff481SSatish Balay    This routine doubles the currently used memory.
2164c1ff481SSatish Balay  */
2174a2ae208SSatish Balay #undef __FUNCT__
2184a2ae208SSatish Balay #define __FUNCT__ "MatStashExpand_Private"
219c1ac3661SBarry Smith static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
2209417f4adSLois Curfman McInnes {
2216849ba73SBarry Smith   PetscErrorCode ierr;
222c1ac3661SBarry Smith   PetscInt       *n_idx,*n_idy,newnmax,bs2;
2233eda8832SBarry Smith   MatScalar *n_array;
2249417f4adSLois Curfman McInnes 
2253a40ed3dSBarry Smith   PetscFunctionBegin;
2269417f4adSLois Curfman McInnes   /* allocate a larger stash */
22794b769a5SSatish Balay   bs2     = stash->bs*stash->bs;
228c481ceb5SSatish Balay   if (!stash->oldnmax && !stash->nmax) { /* new stash */
229434d7ff9SSatish Balay     if (stash->umax)                  newnmax = stash->umax/bs2;
230434d7ff9SSatish Balay     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
231c481ceb5SSatish Balay   } else if (!stash->nmax) { /* resuing stash */
232434d7ff9SSatish Balay     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
233434d7ff9SSatish Balay     else                              newnmax = stash->oldnmax/bs2;
234434d7ff9SSatish Balay   } else                              newnmax = stash->nmax*2;
2354c1ff481SSatish Balay   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
236d07ff455SSatish Balay 
237c1ac3661SBarry Smith   ierr  = PetscMalloc((newnmax)*(2*sizeof(PetscInt)+bs2*sizeof(MatScalar)),&n_array);CHKERRQ(ierr);
238c1ac3661SBarry Smith   n_idx = (PetscInt*)(n_array + bs2*newnmax);
239c1ac3661SBarry Smith   n_idy = (PetscInt*)(n_idx + newnmax);
2403eda8832SBarry Smith   ierr  = PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(MatScalar));CHKERRQ(ierr);
241c1ac3661SBarry Smith   ierr  = PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(PetscInt));CHKERRQ(ierr);
242c1ac3661SBarry Smith   ierr  = PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(PetscInt));CHKERRQ(ierr);
243606d414cSSatish Balay   if (stash->array) {ierr = PetscFree(stash->array);CHKERRQ(ierr);}
244d07ff455SSatish Balay   stash->array   = n_array;
245d07ff455SSatish Balay   stash->idx     = n_idx;
246d07ff455SSatish Balay   stash->idy     = n_idy;
247d07ff455SSatish Balay   stash->nmax    = newnmax;
248bc5ccf88SSatish Balay   stash->reallocs++;
249bc5ccf88SSatish Balay   PetscFunctionReturn(0);
250bc5ccf88SSatish Balay }
251bc5ccf88SSatish Balay /*
2528798bf22SSatish Balay   MatStashValuesRow_Private - inserts values into the stash. This function
2534c1ff481SSatish Balay   expects the values to be roworiented. Multiple columns belong to the same row
2544c1ff481SSatish Balay   can be inserted with a single call to this function.
2554c1ff481SSatish Balay 
2564c1ff481SSatish Balay   Input Parameters:
2574c1ff481SSatish Balay   stash  - the stash
2584c1ff481SSatish Balay   row    - the global row correspoiding to the values
2594c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
2604c1ff481SSatish Balay   idxn   - the global column indices corresponding to each of the values.
2614c1ff481SSatish Balay   values - the values inserted
262bc5ccf88SSatish Balay */
2634a2ae208SSatish Balay #undef __FUNCT__
2644a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRow_Private"
265c1ac3661SBarry Smith PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[])
266bc5ccf88SSatish Balay {
267dfbe8321SBarry Smith   PetscErrorCode ierr;
268c1ac3661SBarry Smith   PetscInt i;
269bc5ccf88SSatish Balay 
270bc5ccf88SSatish Balay   PetscFunctionBegin;
2714c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
2724c1ff481SSatish Balay   if ((stash->n + n) > stash->nmax) {
2738798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
2749417f4adSLois Curfman McInnes   }
2754c1ff481SSatish Balay   for (i=0; i<n; i++) {
2769417f4adSLois Curfman McInnes     stash->idx[stash->n]   = row;
277a2d1c673SSatish Balay     stash->idy[stash->n]   = idxn[i];
2780ae3cd3bSBarry Smith     stash->array[stash->n] = values[i];
279a2d1c673SSatish Balay     stash->n++;
2809417f4adSLois Curfman McInnes   }
281a2d1c673SSatish Balay   PetscFunctionReturn(0);
282a2d1c673SSatish Balay }
2834c1ff481SSatish Balay /*
2848798bf22SSatish Balay   MatStashValuesCol_Private - inserts values into the stash. This function
2854c1ff481SSatish Balay   expects the values to be columnoriented. Multiple columns belong to the same row
2864c1ff481SSatish Balay   can be inserted with a single call to this function.
287a2d1c673SSatish Balay 
2884c1ff481SSatish Balay   Input Parameters:
2894c1ff481SSatish Balay   stash   - the stash
2904c1ff481SSatish Balay   row     - the global row correspoiding to the values
2914c1ff481SSatish Balay   n       - the number of elements inserted. All elements belong to the above row.
2924c1ff481SSatish Balay   idxn    - the global column indices corresponding to each of the values.
2934c1ff481SSatish Balay   values  - the values inserted
2944c1ff481SSatish Balay   stepval - the consecutive values are sepated by a distance of stepval.
2954c1ff481SSatish Balay             this happens because the input is columnoriented.
2964c1ff481SSatish Balay */
2974a2ae208SSatish Balay #undef __FUNCT__
2984a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesCol_Private"
299c1ac3661SBarry Smith PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt stepval)
300a2d1c673SSatish Balay {
301dfbe8321SBarry Smith   PetscErrorCode ierr;
302c1ac3661SBarry Smith   PetscInt i;
303a2d1c673SSatish Balay 
3044c1ff481SSatish Balay   PetscFunctionBegin;
3054c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
3064c1ff481SSatish Balay   if ((stash->n + n) > stash->nmax) {
3078798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
3084c1ff481SSatish Balay   }
3094c1ff481SSatish Balay   for (i=0; i<n; i++) {
3104c1ff481SSatish Balay     stash->idx[stash->n]   = row;
3114c1ff481SSatish Balay     stash->idy[stash->n]   = idxn[i];
3120ae3cd3bSBarry Smith     stash->array[stash->n] = values[i*stepval];
3134c1ff481SSatish Balay     stash->n++;
3144c1ff481SSatish Balay   }
3154c1ff481SSatish Balay   PetscFunctionReturn(0);
3164c1ff481SSatish Balay }
3174c1ff481SSatish Balay 
3184c1ff481SSatish Balay /*
3198798bf22SSatish Balay   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
3204c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3214c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3224c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3234c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3244c1ff481SSatish Balay 
3254c1ff481SSatish Balay   Input Parameters:
3264c1ff481SSatish Balay   stash  - the stash
3274c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3284c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3294c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3304c1ff481SSatish Balay            values. Each block is of size bs*bs.
3314c1ff481SSatish Balay   values - the values inserted
3324c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3334c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3344c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3354c1ff481SSatish Balay */
3364a2ae208SSatish Balay #undef __FUNCT__
3374a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRowBlocked_Private"
338c1ac3661SBarry Smith PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
3394c1ff481SSatish Balay {
340dfbe8321SBarry Smith   PetscErrorCode ierr;
341c1ac3661SBarry Smith   PetscInt i,j,k,bs2,bs=stash->bs;
342f15d580aSBarry Smith   const MatScalar *vals;
343f15d580aSBarry Smith   MatScalar       *array;
344a2d1c673SSatish Balay 
345a2d1c673SSatish Balay   PetscFunctionBegin;
346a2d1c673SSatish Balay   bs2 = bs*bs;
3474c1ff481SSatish Balay   if ((stash->n+n) > stash->nmax) {
3488798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
349a2d1c673SSatish Balay   }
3504c1ff481SSatish Balay   for (i=0; i<n; i++) {
351a2d1c673SSatish Balay     stash->idx[stash->n]   = row;
352a2d1c673SSatish Balay     stash->idy[stash->n] = idxn[i];
353a2d1c673SSatish Balay     /* Now copy over the block of values. Store the values column oriented.
354a2d1c673SSatish Balay        This enables inserting multiple blocks belonging to a row with a single
355a2d1c673SSatish Balay        funtion call */
356a2d1c673SSatish Balay     array = stash->array + bs2*stash->n;
357a2d1c673SSatish Balay     vals  = values + idx*bs2*n + bs*i;
358a2d1c673SSatish Balay     for (j=0; j<bs; j++) {
3590ae3cd3bSBarry Smith       for (k=0; k<bs; k++) {array[k*bs] = vals[k];}
360a2d1c673SSatish Balay       array += 1;
361a2d1c673SSatish Balay       vals  += cmax*bs;
362a2d1c673SSatish Balay     }
3634c1ff481SSatish Balay     stash->n++;
3644c1ff481SSatish Balay   }
3654c1ff481SSatish Balay   PetscFunctionReturn(0);
3664c1ff481SSatish Balay }
3674c1ff481SSatish Balay 
3684c1ff481SSatish Balay /*
3698798bf22SSatish Balay   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
3704c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3714c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3724c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3734c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3744c1ff481SSatish Balay 
3754c1ff481SSatish Balay   Input Parameters:
3764c1ff481SSatish Balay   stash  - the stash
3774c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3784c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3794c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3804c1ff481SSatish Balay            values. Each block is of size bs*bs.
3814c1ff481SSatish Balay   values - the values inserted
3824c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3834c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3844c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3854c1ff481SSatish Balay */
3864a2ae208SSatish Balay #undef __FUNCT__
3874a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesColBlocked_Private"
388c1ac3661SBarry Smith PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
3894c1ff481SSatish Balay {
390dfbe8321SBarry Smith   PetscErrorCode ierr;
391c1ac3661SBarry Smith   PetscInt i,j,k,bs2,bs=stash->bs;
392f15d580aSBarry Smith   const MatScalar *vals;
393f15d580aSBarry Smith   MatScalar       *array;
3944c1ff481SSatish Balay 
3954c1ff481SSatish Balay   PetscFunctionBegin;
3964c1ff481SSatish Balay   bs2 = bs*bs;
3974c1ff481SSatish Balay   if ((stash->n+n) > stash->nmax) {
3988798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
3994c1ff481SSatish Balay   }
4004c1ff481SSatish Balay   for (i=0; i<n; i++) {
4014c1ff481SSatish Balay     stash->idx[stash->n]   = row;
4024c1ff481SSatish Balay     stash->idy[stash->n] = idxn[i];
4034c1ff481SSatish Balay     /* Now copy over the block of values. Store the values column oriented.
4044c1ff481SSatish Balay      This enables inserting multiple blocks belonging to a row with a single
4054c1ff481SSatish Balay      funtion call */
406a2d1c673SSatish Balay     array = stash->array + bs2*stash->n;
407a2d1c673SSatish Balay     vals  = values + idx*bs + bs2*rmax*i;
408a2d1c673SSatish Balay     for (j=0; j<bs; j++) {
4090ae3cd3bSBarry Smith       for (k=0; k<bs; k++) {array[k] = vals[k];}
410a2d1c673SSatish Balay       array += bs;
411a2d1c673SSatish Balay       vals  += rmax*bs;
412a2d1c673SSatish Balay     }
413a2d1c673SSatish Balay     stash->n++;
4149417f4adSLois Curfman McInnes   }
4153a40ed3dSBarry Smith   PetscFunctionReturn(0);
4169417f4adSLois Curfman McInnes }
4174c1ff481SSatish Balay /*
4188798bf22SSatish Balay   MatStashScatterBegin_Private - Initiates the transfer of values to the
4194c1ff481SSatish Balay   correct owners. This function goes through the stash, and check the
4204c1ff481SSatish Balay   owners of each stashed value, and sends the values off to the owner
4214c1ff481SSatish Balay   processors.
422bc5ccf88SSatish Balay 
4234c1ff481SSatish Balay   Input Parameters:
4244c1ff481SSatish Balay   stash  - the stash
4254c1ff481SSatish Balay   owners - an array of size 'no-of-procs' which gives the ownership range
4264c1ff481SSatish Balay            for each node.
4274c1ff481SSatish Balay 
4284c1ff481SSatish Balay   Notes: The 'owners' array in the cased of the blocked-stash has the
4294c1ff481SSatish Balay   ranges specified blocked global indices, and for the regular stash in
4304c1ff481SSatish Balay   the proper global indices.
4314c1ff481SSatish Balay */
4324a2ae208SSatish Balay #undef __FUNCT__
4334a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterBegin_Private"
434c1ac3661SBarry Smith PetscErrorCode MatStashScatterBegin_Private(MatStash *stash,PetscInt *owners)
435bc5ccf88SSatish Balay {
436c1ac3661SBarry Smith   PetscInt       *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
437c1ac3661SBarry Smith   PetscInt       size=stash->size,*nprocs,nsends,nreceives;
4386849ba73SBarry Smith   PetscErrorCode ierr;
4397357eb19SBarry Smith   PetscInt       nmax,count,*sindices,*rindices,i,j,idx,lastidx;
4403eda8832SBarry Smith   MatScalar      *rvalues,*svalues;
441bc5ccf88SSatish Balay   MPI_Comm       comm = stash->comm;
442bc5ccf88SSatish Balay   MPI_Request    *send_waits,*recv_waits;
443bc5ccf88SSatish Balay 
444bc5ccf88SSatish Balay   PetscFunctionBegin;
445bc5ccf88SSatish Balay 
4464c1ff481SSatish Balay   bs2   = stash->bs*stash->bs;
447bc5ccf88SSatish Balay   /*  first count number of contributors to each processor */
448c1ac3661SBarry Smith   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
449c1ac3661SBarry Smith   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
450c1ac3661SBarry Smith   ierr  = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);
451a2d1c673SSatish Balay 
4527357eb19SBarry Smith   j       = 0;
4537357eb19SBarry Smith   lastidx = -1;
454bc5ccf88SSatish Balay   for (i=0; i<stash->n; i++) {
4557357eb19SBarry Smith     /* if indices are NOT locally sorted, need to start search at the beginning */
4567357eb19SBarry Smith     if (lastidx > (idx = stash->idx[i])) j = 0;
4577357eb19SBarry Smith     lastidx = idx;
4587357eb19SBarry Smith     for (; j<size; j++) {
4594c1ff481SSatish Balay       if (idx >= owners[j] && idx < owners[j+1]) {
460c1dc657dSBarry Smith         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; break;
461bc5ccf88SSatish Balay       }
462bc5ccf88SSatish Balay     }
463bc5ccf88SSatish Balay   }
464c1dc657dSBarry Smith   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
465bc5ccf88SSatish Balay 
466bc5ccf88SSatish Balay   /* inform other processors of number of messages and max length*/
467c1dc657dSBarry Smith   ierr = PetscMaxSum(comm,nprocs,&nmax,&nreceives);CHKERRQ(ierr);
468c1dc657dSBarry Smith 
469bc5ccf88SSatish Balay   /* post receives:
470bc5ccf88SSatish Balay      since we don't know how long each individual message is we
471bc5ccf88SSatish Balay      allocate the largest needed buffer for each receive. Potentially
472bc5ccf88SSatish Balay      this is a lot of wasted space.
473bc5ccf88SSatish Balay   */
474c1ac3661SBarry Smith   ierr     = PetscMalloc((nreceives+1)*(nmax+1)*(bs2*sizeof(MatScalar)+2*sizeof(PetscInt)),&rvalues);CHKERRQ(ierr);
475c1ac3661SBarry Smith   rindices = (PetscInt*)(rvalues + bs2*nreceives*nmax);
476b0a32e0cSBarry Smith   ierr     = PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
477bc5ccf88SSatish Balay   for (i=0,count=0; i<nreceives; i++) {
478d7d82daaSBarry Smith     ierr = MPI_Irecv(rvalues+bs2*nmax*i,bs2*nmax,MPIU_MATSCALAR,MPI_ANY_SOURCE,tag1,comm,recv_waits+count++);CHKERRQ(ierr);
479c1ac3661SBarry Smith     ierr = MPI_Irecv(rindices+2*nmax*i,2*nmax,MPIU_INT,MPI_ANY_SOURCE,tag2,comm,recv_waits+count++);CHKERRQ(ierr);
480bc5ccf88SSatish Balay   }
481bc5ccf88SSatish Balay 
482bc5ccf88SSatish Balay   /* do sends:
483bc5ccf88SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
484bc5ccf88SSatish Balay          the ith processor
485bc5ccf88SSatish Balay   */
486c1ac3661SBarry Smith   ierr     = PetscMalloc((stash->n+1)*(bs2*sizeof(MatScalar)+2*sizeof(PetscInt)),&svalues);CHKERRQ(ierr);
487c1ac3661SBarry Smith   sindices = (PetscInt*)(svalues + bs2*stash->n);
488b0a32e0cSBarry Smith   ierr     = PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
489c1ac3661SBarry Smith   ierr     = PetscMalloc(2*size*sizeof(PetscInt),&startv);CHKERRQ(ierr);
490bc5ccf88SSatish Balay   starti   = startv + size;
491a2d1c673SSatish Balay   /* use 2 sends the first with all_a, the next with all_i and all_j */
492bc5ccf88SSatish Balay   startv[0]  = 0; starti[0] = 0;
493bc5ccf88SSatish Balay   for (i=1; i<size; i++) {
494c1dc657dSBarry Smith     startv[i] = startv[i-1] + nprocs[2*i-2];
495c1dc657dSBarry Smith     starti[i] = starti[i-1] + nprocs[2*i-2]*2;
496bc5ccf88SSatish Balay   }
497bc5ccf88SSatish Balay   for (i=0; i<stash->n; i++) {
498bc5ccf88SSatish Balay     j = owner[i];
499a2d1c673SSatish Balay     if (bs2 == 1) {
500bc5ccf88SSatish Balay       svalues[startv[j]]              = stash->array[i];
501a2d1c673SSatish Balay     } else {
502c1ac3661SBarry Smith       PetscInt       k;
5033eda8832SBarry Smith       MatScalar *buf1,*buf2;
5044c1ff481SSatish Balay       buf1 = svalues+bs2*startv[j];
5054c1ff481SSatish Balay       buf2 = stash->array+bs2*i;
5064c1ff481SSatish Balay       for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
507a2d1c673SSatish Balay     }
508bc5ccf88SSatish Balay     sindices[starti[j]]               = stash->idx[i];
509c1dc657dSBarry Smith     sindices[starti[j]+nprocs[2*j]]   = stash->idy[i];
510bc5ccf88SSatish Balay     startv[j]++;
511bc5ccf88SSatish Balay     starti[j]++;
512bc5ccf88SSatish Balay   }
513bc5ccf88SSatish Balay   startv[0] = 0;
514c1dc657dSBarry Smith   for (i=1; i<size; i++) { startv[i] = startv[i-1] + nprocs[2*i-2];}
515bc5ccf88SSatish Balay   for (i=0,count=0; i<size; i++) {
516c1dc657dSBarry Smith     if (nprocs[2*i+1]) {
517d7d82daaSBarry Smith       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nprocs[2*i],MPIU_MATSCALAR,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
518d7d82daaSBarry Smith       ierr = MPI_Isend(sindices+2*startv[i],2*nprocs[2*i],MPIU_INT,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
519bc5ccf88SSatish Balay     }
520bc5ccf88SSatish Balay   }
521606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
522606d414cSSatish Balay   ierr = PetscFree(startv);CHKERRQ(ierr);
523a2d1c673SSatish Balay   /* This memory is reused in scatter end  for a different purpose*/
524a2d1c673SSatish Balay   for (i=0; i<2*size; i++) nprocs[i] = -1;
525a2d1c673SSatish Balay   stash->nprocs      = nprocs;
526a2d1c673SSatish Balay 
527bc5ccf88SSatish Balay   stash->svalues    = svalues;    stash->rvalues    = rvalues;
528bc5ccf88SSatish Balay   stash->nsends     = nsends;     stash->nrecvs     = nreceives;
529bc5ccf88SSatish Balay   stash->send_waits = send_waits; stash->recv_waits = recv_waits;
530bc5ccf88SSatish Balay   stash->rmax       = nmax;
531bc5ccf88SSatish Balay   PetscFunctionReturn(0);
532bc5ccf88SSatish Balay }
533bc5ccf88SSatish Balay 
534a2d1c673SSatish Balay /*
5358798bf22SSatish Balay    MatStashScatterGetMesg_Private - This function waits on the receives posted
5368798bf22SSatish Balay    in the function MatStashScatterBegin_Private() and returns one message at
5374c1ff481SSatish Balay    a time to the calling function. If no messages are left, it indicates this
5384c1ff481SSatish Balay    by setting flg = 0, else it sets flg = 1.
5394c1ff481SSatish Balay 
5404c1ff481SSatish Balay    Input Parameters:
5414c1ff481SSatish Balay    stash - the stash
5424c1ff481SSatish Balay 
5434c1ff481SSatish Balay    Output Parameters:
5444c1ff481SSatish Balay    nvals - the number of entries in the current message.
5454c1ff481SSatish Balay    rows  - an array of row indices (or blocked indices) corresponding to the values
5464c1ff481SSatish Balay    cols  - an array of columnindices (or blocked indices) corresponding to the values
5474c1ff481SSatish Balay    vals  - the values
5484c1ff481SSatish Balay    flg   - 0 indicates no more message left, and the current call has no values associated.
5494c1ff481SSatish Balay            1 indicates that the current call successfully received a message, and the
5504c1ff481SSatish Balay              other output parameters nvals,rows,cols,vals are set appropriately.
551a2d1c673SSatish Balay */
5524a2ae208SSatish Balay #undef __FUNCT__
5534a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterGetMesg_Private"
554c1ac3661SBarry Smith PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,MatScalar **vals,PetscInt *flg)
555bc5ccf88SSatish Balay {
5566849ba73SBarry Smith   PetscErrorCode ierr;
557c1ac3661SBarry Smith   PetscMPIInt    i;
558c1ac3661SBarry Smith   PetscInt       *flg_v,i1,i2,*rindices,bs2;
559a2d1c673SSatish Balay   MPI_Status     recv_status;
560b0a32e0cSBarry Smith   PetscTruth     match_found = PETSC_FALSE;
561bc5ccf88SSatish Balay 
562bc5ccf88SSatish Balay   PetscFunctionBegin;
563bc5ccf88SSatish Balay 
564a2d1c673SSatish Balay   *flg = 0; /* When a message is discovered this is reset to 1 */
565a2d1c673SSatish Balay   /* Return if no more messages to process */
566a2d1c673SSatish Balay   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
567a2d1c673SSatish Balay 
568a2d1c673SSatish Balay   flg_v = stash->nprocs;
5694c1ff481SSatish Balay   bs2   = stash->bs*stash->bs;
570a2d1c673SSatish Balay   /* If a matching pair of receieves are found, process them, and return the data to
571a2d1c673SSatish Balay      the calling function. Until then keep receiving messages */
572a2d1c673SSatish Balay   while (!match_found) {
573a2d1c673SSatish Balay     ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
574a2d1c673SSatish Balay     /* Now pack the received message into a structure which is useable by others */
575a2d1c673SSatish Balay     if (i % 2) {
576d7d82daaSBarry Smith       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
577c1dc657dSBarry Smith       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
578a2d1c673SSatish Balay       *nvals = *nvals/2; /* This message has both row indices and col indices */
579a2d1c673SSatish Balay     } else {
5803eda8832SBarry Smith       ierr = MPI_Get_count(&recv_status,MPIU_MATSCALAR,nvals);CHKERRQ(ierr);
581c1dc657dSBarry Smith       flg_v[2*recv_status.MPI_SOURCE] = i/2;
582a2d1c673SSatish Balay       *nvals = *nvals/bs2;
583bc5ccf88SSatish Balay     }
584a2d1c673SSatish Balay 
585a2d1c673SSatish Balay     /* Check if we have both the messages from this proc */
586c1dc657dSBarry Smith     i1 = flg_v[2*recv_status.MPI_SOURCE];
587c1dc657dSBarry Smith     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
588a2d1c673SSatish Balay     if (i1 != -1 && i2 != -1) {
589c1ac3661SBarry Smith       rindices    = (PetscInt*)(stash->rvalues + bs2*stash->rmax*stash->nrecvs);
590a2d1c673SSatish Balay       *rows       = rindices + 2*i2*stash->rmax;
591a2d1c673SSatish Balay       *cols       = *rows + *nvals;
592a2d1c673SSatish Balay       *vals       = stash->rvalues + i1*bs2*stash->rmax;
593a2d1c673SSatish Balay       *flg        = 1;
594a2d1c673SSatish Balay       stash->nprocessed ++;
59535d8aa7fSBarry Smith       match_found = PETSC_TRUE;
596bc5ccf88SSatish Balay     }
597bc5ccf88SSatish Balay   }
598bc5ccf88SSatish Balay   PetscFunctionReturn(0);
599bc5ccf88SSatish Balay }
600