xref: /petsc/src/mat/utils/matstash.c (revision 563fb8714a5d78400bbe9510b73a96321c613bbf)
1be1d678aSKris Buschelman #define PETSCMAT_DLL
22d5177cdSBarry Smith 
370f55243SBarry Smith #include "src/mat/matimpl.h"
49417f4adSLois Curfman McInnes 
53eda8832SBarry Smith /*
60ae3cd3bSBarry Smith        The input to the stash is ALWAYS in MatScalar precision, and the
70ae3cd3bSBarry Smith     internal storage and output is also in MatScalar.
83eda8832SBarry Smith */
9bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE   10000
104c1ff481SSatish Balay 
119417f4adSLois Curfman McInnes /*
128798bf22SSatish Balay   MatStashCreate_Private - Creates a stash,currently used for all the parallel
134c1ff481SSatish Balay   matrix implementations. The stash is where elements of a matrix destined
144c1ff481SSatish Balay   to be stored on other processors are kept until matrix assembly is done.
159417f4adSLois Curfman McInnes 
164c1ff481SSatish Balay   This is a simple minded stash. Simply adds entries to end of stash.
174c1ff481SSatish Balay 
184c1ff481SSatish Balay   Input Parameters:
194c1ff481SSatish Balay   comm - communicator, required for scatters.
204c1ff481SSatish Balay   bs   - stash block size. used when stashing blocks of values
214c1ff481SSatish Balay 
224c1ff481SSatish Balay   Output Parameters:
234c1ff481SSatish Balay   stash    - the newly created stash
249417f4adSLois Curfman McInnes */
254a2ae208SSatish Balay #undef __FUNCT__
264a2ae208SSatish Balay #define __FUNCT__ "MatStashCreate_Private"
27c1ac3661SBarry Smith PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
289417f4adSLois Curfman McInnes {
29dfbe8321SBarry Smith   PetscErrorCode ierr;
30c1ac3661SBarry Smith   PetscInt       max,*opt,nopt;
31f1af5d2fSBarry Smith   PetscTruth     flg;
32bc5ccf88SSatish Balay 
333a40ed3dSBarry Smith   PetscFunctionBegin;
34bc5ccf88SSatish Balay   /* Require 2 tags,get the second using PetscCommGetNewTag() */
35752ec6e0SSatish Balay   stash->comm = comm;
36752ec6e0SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
37a2d1c673SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
38a2d1c673SSatish Balay   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
39a2d1c673SSatish Balay   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
40bc5ccf88SSatish Balay 
41434d7ff9SSatish Balay   nopt = stash->size;
42d7d82daaSBarry Smith   ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr);
43b0a32e0cSBarry Smith   ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
44434d7ff9SSatish Balay   if (flg) {
45434d7ff9SSatish Balay     if (nopt == 1)                max = opt[0];
46434d7ff9SSatish Balay     else if (nopt == stash->size) max = opt[stash->rank];
47434d7ff9SSatish Balay     else if (stash->rank < nopt)  max = opt[stash->rank];
48f4ab19daSSatish Balay     else                          max = 0; /* Use default */
49434d7ff9SSatish Balay     stash->umax = max;
50434d7ff9SSatish Balay   } else {
51434d7ff9SSatish Balay     stash->umax = 0;
52434d7ff9SSatish Balay   }
53606d414cSSatish Balay   ierr = PetscFree(opt);CHKERRQ(ierr);
544c1ff481SSatish Balay   if (bs <= 0) bs = 1;
55a2d1c673SSatish Balay 
564c1ff481SSatish Balay   stash->bs       = bs;
579417f4adSLois Curfman McInnes   stash->nmax     = 0;
58434d7ff9SSatish Balay   stash->oldnmax  = 0;
599417f4adSLois Curfman McInnes   stash->n        = 0;
604c1ff481SSatish Balay   stash->reallocs = -1;
619417f4adSLois Curfman McInnes   stash->idx      = 0;
629417f4adSLois Curfman McInnes   stash->idy      = 0;
63bc5ccf88SSatish Balay   stash->array    = 0;
649417f4adSLois Curfman McInnes 
65bc5ccf88SSatish Balay   stash->send_waits  = 0;
66bc5ccf88SSatish Balay   stash->recv_waits  = 0;
67a2d1c673SSatish Balay   stash->send_status = 0;
68bc5ccf88SSatish Balay   stash->nsends      = 0;
69bc5ccf88SSatish Balay   stash->nrecvs      = 0;
70bc5ccf88SSatish Balay   stash->svalues     = 0;
71bc5ccf88SSatish Balay   stash->rvalues     = 0;
72*563fb871SSatish Balay   stash->rindices    = 0;
73bc5ccf88SSatish Balay   stash->rmax        = 0;
74a2d1c673SSatish Balay   stash->nprocs      = 0;
75a2d1c673SSatish Balay   stash->nprocessed  = 0;
763a40ed3dSBarry Smith   PetscFunctionReturn(0);
779417f4adSLois Curfman McInnes }
789417f4adSLois Curfman McInnes 
794c1ff481SSatish Balay /*
808798bf22SSatish Balay    MatStashDestroy_Private - Destroy the stash
814c1ff481SSatish Balay */
824a2ae208SSatish Balay #undef __FUNCT__
834a2ae208SSatish Balay #define __FUNCT__ "MatStashDestroy_Private"
84dfbe8321SBarry Smith PetscErrorCode MatStashDestroy_Private(MatStash *stash)
859417f4adSLois Curfman McInnes {
86dfbe8321SBarry Smith   PetscErrorCode ierr;
87a2d1c673SSatish Balay 
88bc5ccf88SSatish Balay   PetscFunctionBegin;
89606d414cSSatish Balay   if (stash->array) {
90606d414cSSatish Balay     ierr = PetscFree(stash->array);CHKERRQ(ierr);
91606d414cSSatish Balay     stash->array = 0;
92606d414cSSatish Balay   }
93bc5ccf88SSatish Balay   PetscFunctionReturn(0);
94bc5ccf88SSatish Balay }
95bc5ccf88SSatish Balay 
964c1ff481SSatish Balay /*
978798bf22SSatish Balay    MatStashScatterEnd_Private - This is called as the fial stage of
984c1ff481SSatish Balay    scatter. The final stages of messagepassing is done here, and
994c1ff481SSatish Balay    all the memory used for messagepassing is cleanedu up. This
1004c1ff481SSatish Balay    routine also resets the stash, and deallocates the memory used
1014c1ff481SSatish Balay    for the stash. It also keeps track of the current memory usage
1024c1ff481SSatish Balay    so that the same value can be used the next time through.
1034c1ff481SSatish Balay */
1044a2ae208SSatish Balay #undef __FUNCT__
1054a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterEnd_Private"
106dfbe8321SBarry Smith PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
107bc5ccf88SSatish Balay {
1086849ba73SBarry Smith   PetscErrorCode ierr;
1096849ba73SBarry Smith   int         nsends=stash->nsends,bs2,oldnmax;
110a2d1c673SSatish Balay   MPI_Status  *send_status;
111a2d1c673SSatish Balay 
1123a40ed3dSBarry Smith   PetscFunctionBegin;
113a2d1c673SSatish Balay   /* wait on sends */
114a2d1c673SSatish Balay   if (nsends) {
11582502324SSatish Balay     ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
116a2d1c673SSatish Balay     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
117606d414cSSatish Balay     ierr = PetscFree(send_status);CHKERRQ(ierr);
118a2d1c673SSatish Balay   }
119a2d1c673SSatish Balay 
120c0c58ca7SSatish Balay   /* Now update nmaxold to be app 10% more than max n used, this way the
121434d7ff9SSatish Balay      wastage of space is reduced the next time this stash is used.
122434d7ff9SSatish Balay      Also update the oldmax, only if it increases */
123b9b97703SBarry Smith   if (stash->n) {
12494b769a5SSatish Balay     bs2      = stash->bs*stash->bs;
1258a9378f0SSatish Balay     oldnmax  = ((int)(stash->n * 1.1) + 5)*bs2;
126434d7ff9SSatish Balay     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
127b9b97703SBarry Smith   }
128434d7ff9SSatish Balay 
129d07ff455SSatish Balay   stash->nmax       = 0;
130d07ff455SSatish Balay   stash->n          = 0;
1314c1ff481SSatish Balay   stash->reallocs   = -1;
132bc5ccf88SSatish Balay   stash->rmax       = 0;
133a2d1c673SSatish Balay   stash->nprocessed = 0;
134bc5ccf88SSatish Balay 
135bc5ccf88SSatish Balay   if (stash->array) {
136606d414cSSatish Balay     ierr         = PetscFree(stash->array);CHKERRQ(ierr);
137bc5ccf88SSatish Balay     stash->array = 0;
138bc5ccf88SSatish Balay     stash->idx   = 0;
139bc5ccf88SSatish Balay     stash->idy   = 0;
140bc5ccf88SSatish Balay   }
141606d414cSSatish Balay   if (stash->send_waits) {
142606d414cSSatish Balay     ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
143606d414cSSatish Balay     stash->send_waits = 0;
144606d414cSSatish Balay   }
145606d414cSSatish Balay   if (stash->recv_waits) {
146606d414cSSatish Balay     ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
147606d414cSSatish Balay     stash->recv_waits = 0;
148606d414cSSatish Balay   }
149606d414cSSatish Balay   if (stash->svalues) {
150606d414cSSatish Balay     ierr = PetscFree(stash->svalues);CHKERRQ(ierr);
151606d414cSSatish Balay     stash->svalues = 0;
152606d414cSSatish Balay   }
153606d414cSSatish Balay   if (stash->rvalues) {
154606d414cSSatish Balay     ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
155606d414cSSatish Balay     stash->rvalues = 0;
156606d414cSSatish Balay   }
157*563fb871SSatish Balay   if (stash->rindices) {
158*563fb871SSatish Balay     ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
159*563fb871SSatish Balay     stash->rindices = 0;
160*563fb871SSatish Balay   }
161606d414cSSatish Balay   if (stash->nprocs) {
162b22afee1SSatish Balay     ierr = PetscFree(stash->nprocs);CHKERRQ(ierr);
163606d414cSSatish Balay     stash->nprocs = 0;
164606d414cSSatish Balay   }
165bc5ccf88SSatish Balay 
1663a40ed3dSBarry Smith   PetscFunctionReturn(0);
1679417f4adSLois Curfman McInnes }
1689417f4adSLois Curfman McInnes 
1694c1ff481SSatish Balay /*
1708798bf22SSatish Balay    MatStashGetInfo_Private - Gets the relavant statistics of the stash
1714c1ff481SSatish Balay 
1724c1ff481SSatish Balay    Input Parameters:
1734c1ff481SSatish Balay    stash    - the stash
17494b769a5SSatish Balay    nstash   - the size of the stash. Indicates the number of values stored.
1754c1ff481SSatish Balay    reallocs - the number of additional mallocs incurred.
1764c1ff481SSatish Balay 
1774c1ff481SSatish Balay */
1784a2ae208SSatish Balay #undef __FUNCT__
1794a2ae208SSatish Balay #define __FUNCT__ "MatStashGetInfo_Private"
180c1ac3661SBarry Smith PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
18197530c3fSBarry Smith {
182c1ac3661SBarry Smith   PetscInt bs2 = stash->bs*stash->bs;
18394b769a5SSatish Balay 
1843a40ed3dSBarry Smith   PetscFunctionBegin;
1851ecfd215SBarry Smith   if (nstash) *nstash   = stash->n*bs2;
1861ecfd215SBarry Smith   if (reallocs) {
187434d7ff9SSatish Balay     if (stash->reallocs < 0) *reallocs = 0;
188434d7ff9SSatish Balay     else                     *reallocs = stash->reallocs;
1891ecfd215SBarry Smith   }
190bc5ccf88SSatish Balay   PetscFunctionReturn(0);
191bc5ccf88SSatish Balay }
1924c1ff481SSatish Balay 
1934c1ff481SSatish Balay 
1944c1ff481SSatish Balay /*
1958798bf22SSatish Balay    MatStashSetInitialSize_Private - Sets the initial size of the stash
1964c1ff481SSatish Balay 
1974c1ff481SSatish Balay    Input Parameters:
1984c1ff481SSatish Balay    stash  - the stash
1994c1ff481SSatish Balay    max    - the value that is used as the max size of the stash.
2004c1ff481SSatish Balay             this value is used while allocating memory.
2014c1ff481SSatish Balay */
2024a2ae208SSatish Balay #undef __FUNCT__
2034a2ae208SSatish Balay #define __FUNCT__ "MatStashSetInitialSize_Private"
204c1ac3661SBarry Smith PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
205bc5ccf88SSatish Balay {
206bc5ccf88SSatish Balay   PetscFunctionBegin;
207434d7ff9SSatish Balay   stash->umax = max;
2083a40ed3dSBarry Smith   PetscFunctionReturn(0);
20997530c3fSBarry Smith }
21097530c3fSBarry Smith 
2118798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called
2124c1ff481SSatish Balay    when the space in the stash is not sufficient to add the new values
2134c1ff481SSatish Balay    being inserted into the stash.
2144c1ff481SSatish Balay 
2154c1ff481SSatish Balay    Input Parameters:
2164c1ff481SSatish Balay    stash - the stash
2174c1ff481SSatish Balay    incr  - the minimum increase requested
2184c1ff481SSatish Balay 
2194c1ff481SSatish Balay    Notes:
2204c1ff481SSatish Balay    This routine doubles the currently used memory.
2214c1ff481SSatish Balay  */
2224a2ae208SSatish Balay #undef __FUNCT__
2234a2ae208SSatish Balay #define __FUNCT__ "MatStashExpand_Private"
224c1ac3661SBarry Smith static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
2259417f4adSLois Curfman McInnes {
2266849ba73SBarry Smith   PetscErrorCode ierr;
227c1ac3661SBarry Smith   PetscInt       *n_idx,*n_idy,newnmax,bs2;
2283eda8832SBarry Smith   MatScalar *n_array;
2299417f4adSLois Curfman McInnes 
2303a40ed3dSBarry Smith   PetscFunctionBegin;
2319417f4adSLois Curfman McInnes   /* allocate a larger stash */
23294b769a5SSatish Balay   bs2     = stash->bs*stash->bs;
233c481ceb5SSatish Balay   if (!stash->oldnmax && !stash->nmax) { /* new stash */
234434d7ff9SSatish Balay     if (stash->umax)                  newnmax = stash->umax/bs2;
235434d7ff9SSatish Balay     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
236c481ceb5SSatish Balay   } else if (!stash->nmax) { /* resuing stash */
237434d7ff9SSatish Balay     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
238434d7ff9SSatish Balay     else                              newnmax = stash->oldnmax/bs2;
239434d7ff9SSatish Balay   } else                              newnmax = stash->nmax*2;
2404c1ff481SSatish Balay   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
241d07ff455SSatish Balay 
242c1ac3661SBarry Smith   ierr  = PetscMalloc((newnmax)*(2*sizeof(PetscInt)+bs2*sizeof(MatScalar)),&n_array);CHKERRQ(ierr);
243c1ac3661SBarry Smith   n_idx = (PetscInt*)(n_array + bs2*newnmax);
244c1ac3661SBarry Smith   n_idy = (PetscInt*)(n_idx + newnmax);
2453eda8832SBarry Smith   ierr  = PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(MatScalar));CHKERRQ(ierr);
246c1ac3661SBarry Smith   ierr  = PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(PetscInt));CHKERRQ(ierr);
247c1ac3661SBarry Smith   ierr  = PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(PetscInt));CHKERRQ(ierr);
248606d414cSSatish Balay   if (stash->array) {ierr = PetscFree(stash->array);CHKERRQ(ierr);}
249d07ff455SSatish Balay   stash->array   = n_array;
250d07ff455SSatish Balay   stash->idx     = n_idx;
251d07ff455SSatish Balay   stash->idy     = n_idy;
252d07ff455SSatish Balay   stash->nmax    = newnmax;
253bc5ccf88SSatish Balay   stash->reallocs++;
254bc5ccf88SSatish Balay   PetscFunctionReturn(0);
255bc5ccf88SSatish Balay }
256bc5ccf88SSatish Balay /*
2578798bf22SSatish Balay   MatStashValuesRow_Private - inserts values into the stash. This function
2584c1ff481SSatish Balay   expects the values to be roworiented. Multiple columns belong to the same row
2594c1ff481SSatish Balay   can be inserted with a single call to this function.
2604c1ff481SSatish Balay 
2614c1ff481SSatish Balay   Input Parameters:
2624c1ff481SSatish Balay   stash  - the stash
2634c1ff481SSatish Balay   row    - the global row correspoiding to the values
2644c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
2654c1ff481SSatish Balay   idxn   - the global column indices corresponding to each of the values.
2664c1ff481SSatish Balay   values - the values inserted
267bc5ccf88SSatish Balay */
2684a2ae208SSatish Balay #undef __FUNCT__
2694a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRow_Private"
270c1ac3661SBarry Smith PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[])
271bc5ccf88SSatish Balay {
272dfbe8321SBarry Smith   PetscErrorCode ierr;
273c1ac3661SBarry Smith   PetscInt i;
274bc5ccf88SSatish Balay 
275bc5ccf88SSatish Balay   PetscFunctionBegin;
2764c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
2774c1ff481SSatish Balay   if ((stash->n + n) > stash->nmax) {
2788798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
2799417f4adSLois Curfman McInnes   }
2804c1ff481SSatish Balay   for (i=0; i<n; i++) {
2819417f4adSLois Curfman McInnes     stash->idx[stash->n]   = row;
282a2d1c673SSatish Balay     stash->idy[stash->n]   = idxn[i];
2830ae3cd3bSBarry Smith     stash->array[stash->n] = values[i];
284a2d1c673SSatish Balay     stash->n++;
2859417f4adSLois Curfman McInnes   }
286a2d1c673SSatish Balay   PetscFunctionReturn(0);
287a2d1c673SSatish Balay }
2884c1ff481SSatish Balay /*
2898798bf22SSatish Balay   MatStashValuesCol_Private - inserts values into the stash. This function
2904c1ff481SSatish Balay   expects the values to be columnoriented. Multiple columns belong to the same row
2914c1ff481SSatish Balay   can be inserted with a single call to this function.
292a2d1c673SSatish Balay 
2934c1ff481SSatish Balay   Input Parameters:
2944c1ff481SSatish Balay   stash   - the stash
2954c1ff481SSatish Balay   row     - the global row correspoiding to the values
2964c1ff481SSatish Balay   n       - the number of elements inserted. All elements belong to the above row.
2974c1ff481SSatish Balay   idxn    - the global column indices corresponding to each of the values.
2984c1ff481SSatish Balay   values  - the values inserted
2994c1ff481SSatish Balay   stepval - the consecutive values are sepated by a distance of stepval.
3004c1ff481SSatish Balay             this happens because the input is columnoriented.
3014c1ff481SSatish Balay */
3024a2ae208SSatish Balay #undef __FUNCT__
3034a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesCol_Private"
304c1ac3661SBarry Smith PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt stepval)
305a2d1c673SSatish Balay {
306dfbe8321SBarry Smith   PetscErrorCode ierr;
307c1ac3661SBarry Smith   PetscInt i;
308a2d1c673SSatish Balay 
3094c1ff481SSatish Balay   PetscFunctionBegin;
3104c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
3114c1ff481SSatish Balay   if ((stash->n + n) > stash->nmax) {
3128798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
3134c1ff481SSatish Balay   }
3144c1ff481SSatish Balay   for (i=0; i<n; i++) {
3154c1ff481SSatish Balay     stash->idx[stash->n]   = row;
3164c1ff481SSatish Balay     stash->idy[stash->n]   = idxn[i];
3170ae3cd3bSBarry Smith     stash->array[stash->n] = values[i*stepval];
3184c1ff481SSatish Balay     stash->n++;
3194c1ff481SSatish Balay   }
3204c1ff481SSatish Balay   PetscFunctionReturn(0);
3214c1ff481SSatish Balay }
3224c1ff481SSatish Balay 
3234c1ff481SSatish Balay /*
3248798bf22SSatish Balay   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
3254c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3264c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3274c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3284c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3294c1ff481SSatish Balay 
3304c1ff481SSatish Balay   Input Parameters:
3314c1ff481SSatish Balay   stash  - the stash
3324c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3334c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3344c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3354c1ff481SSatish Balay            values. Each block is of size bs*bs.
3364c1ff481SSatish Balay   values - the values inserted
3374c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3384c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3394c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3404c1ff481SSatish Balay */
3414a2ae208SSatish Balay #undef __FUNCT__
3424a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRowBlocked_Private"
343c1ac3661SBarry Smith PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
3444c1ff481SSatish Balay {
345dfbe8321SBarry Smith   PetscErrorCode ierr;
346c1ac3661SBarry Smith   PetscInt i,j,k,bs2,bs=stash->bs;
347f15d580aSBarry Smith   const MatScalar *vals;
348f15d580aSBarry Smith   MatScalar       *array;
349a2d1c673SSatish Balay 
350a2d1c673SSatish Balay   PetscFunctionBegin;
351a2d1c673SSatish Balay   bs2 = bs*bs;
3524c1ff481SSatish Balay   if ((stash->n+n) > stash->nmax) {
3538798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
354a2d1c673SSatish Balay   }
3554c1ff481SSatish Balay   for (i=0; i<n; i++) {
356a2d1c673SSatish Balay     stash->idx[stash->n]   = row;
357a2d1c673SSatish Balay     stash->idy[stash->n] = idxn[i];
358a2d1c673SSatish Balay     /* Now copy over the block of values. Store the values column oriented.
359a2d1c673SSatish Balay        This enables inserting multiple blocks belonging to a row with a single
360a2d1c673SSatish Balay        funtion call */
361a2d1c673SSatish Balay     array = stash->array + bs2*stash->n;
362a2d1c673SSatish Balay     vals  = values + idx*bs2*n + bs*i;
363a2d1c673SSatish Balay     for (j=0; j<bs; j++) {
3640ae3cd3bSBarry Smith       for (k=0; k<bs; k++) {array[k*bs] = vals[k];}
365a2d1c673SSatish Balay       array += 1;
366a2d1c673SSatish Balay       vals  += cmax*bs;
367a2d1c673SSatish Balay     }
3684c1ff481SSatish Balay     stash->n++;
3694c1ff481SSatish Balay   }
3704c1ff481SSatish Balay   PetscFunctionReturn(0);
3714c1ff481SSatish Balay }
3724c1ff481SSatish Balay 
3734c1ff481SSatish Balay /*
3748798bf22SSatish Balay   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
3754c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3764c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3774c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3784c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3794c1ff481SSatish Balay 
3804c1ff481SSatish Balay   Input Parameters:
3814c1ff481SSatish Balay   stash  - the stash
3824c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3834c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3844c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3854c1ff481SSatish Balay            values. Each block is of size bs*bs.
3864c1ff481SSatish Balay   values - the values inserted
3874c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3884c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3894c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3904c1ff481SSatish Balay */
3914a2ae208SSatish Balay #undef __FUNCT__
3924a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesColBlocked_Private"
393c1ac3661SBarry Smith PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
3944c1ff481SSatish Balay {
395dfbe8321SBarry Smith   PetscErrorCode ierr;
396c1ac3661SBarry Smith   PetscInt i,j,k,bs2,bs=stash->bs;
397f15d580aSBarry Smith   const MatScalar *vals;
398f15d580aSBarry Smith   MatScalar       *array;
3994c1ff481SSatish Balay 
4004c1ff481SSatish Balay   PetscFunctionBegin;
4014c1ff481SSatish Balay   bs2 = bs*bs;
4024c1ff481SSatish Balay   if ((stash->n+n) > stash->nmax) {
4038798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
4044c1ff481SSatish Balay   }
4054c1ff481SSatish Balay   for (i=0; i<n; i++) {
4064c1ff481SSatish Balay     stash->idx[stash->n]   = row;
4074c1ff481SSatish Balay     stash->idy[stash->n] = idxn[i];
4084c1ff481SSatish Balay     /* Now copy over the block of values. Store the values column oriented.
4094c1ff481SSatish Balay      This enables inserting multiple blocks belonging to a row with a single
4104c1ff481SSatish Balay      funtion call */
411a2d1c673SSatish Balay     array = stash->array + bs2*stash->n;
412a2d1c673SSatish Balay     vals  = values + idx*bs + bs2*rmax*i;
413a2d1c673SSatish Balay     for (j=0; j<bs; j++) {
4140ae3cd3bSBarry Smith       for (k=0; k<bs; k++) {array[k] = vals[k];}
415a2d1c673SSatish Balay       array += bs;
416a2d1c673SSatish Balay       vals  += rmax*bs;
417a2d1c673SSatish Balay     }
418a2d1c673SSatish Balay     stash->n++;
4199417f4adSLois Curfman McInnes   }
4203a40ed3dSBarry Smith   PetscFunctionReturn(0);
4219417f4adSLois Curfman McInnes }
4224c1ff481SSatish Balay /*
4238798bf22SSatish Balay   MatStashScatterBegin_Private - Initiates the transfer of values to the
4244c1ff481SSatish Balay   correct owners. This function goes through the stash, and check the
4254c1ff481SSatish Balay   owners of each stashed value, and sends the values off to the owner
4264c1ff481SSatish Balay   processors.
427bc5ccf88SSatish Balay 
4284c1ff481SSatish Balay   Input Parameters:
4294c1ff481SSatish Balay   stash  - the stash
4304c1ff481SSatish Balay   owners - an array of size 'no-of-procs' which gives the ownership range
4314c1ff481SSatish Balay            for each node.
4324c1ff481SSatish Balay 
4334c1ff481SSatish Balay   Notes: The 'owners' array in the cased of the blocked-stash has the
4344c1ff481SSatish Balay   ranges specified blocked global indices, and for the regular stash in
4354c1ff481SSatish Balay   the proper global indices.
4364c1ff481SSatish Balay */
4374a2ae208SSatish Balay #undef __FUNCT__
4384a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterBegin_Private"
439c1ac3661SBarry Smith PetscErrorCode MatStashScatterBegin_Private(MatStash *stash,PetscInt *owners)
440bc5ccf88SSatish Balay {
441c1ac3661SBarry Smith   PetscInt       *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
442*563fb871SSatish Balay   PetscInt       size=stash->size,*nprocs,*nlengths,nsends,nreceives;
4436849ba73SBarry Smith   PetscErrorCode ierr;
444*563fb871SSatish Balay   PetscInt       nmax,count,*sindices,**rindices,i,j,idx,lastidx;
445*563fb871SSatish Balay   MatScalar      **rvalues,*svalues;
446bc5ccf88SSatish Balay   MPI_Comm       comm = stash->comm;
447*563fb871SSatish Balay   MPI_Request    *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
448bc5ccf88SSatish Balay 
449bc5ccf88SSatish Balay   PetscFunctionBegin;
450bc5ccf88SSatish Balay 
4514c1ff481SSatish Balay   bs2   = stash->bs*stash->bs;
452bc5ccf88SSatish Balay   /*  first count number of contributors to each processor */
453c1ac3661SBarry Smith   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
454c1ac3661SBarry Smith   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
455c1ac3661SBarry Smith   ierr  = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);
456a2d1c673SSatish Balay 
457*563fb871SSatish Balay   nlengths = nprocs+size;
4587357eb19SBarry Smith   j        = 0;
4597357eb19SBarry Smith   lastidx  = -1;
460bc5ccf88SSatish Balay   for (i=0; i<stash->n; i++) {
4617357eb19SBarry Smith     /* if indices are NOT locally sorted, need to start search at the beginning */
4627357eb19SBarry Smith     if (lastidx > (idx = stash->idx[i])) j = 0;
4637357eb19SBarry Smith     lastidx = idx;
4647357eb19SBarry Smith     for (; j<size; j++) {
4654c1ff481SSatish Balay       if (idx >= owners[j] && idx < owners[j+1]) {
466*563fb871SSatish Balay         nlengths[j]++; owner[i] = j; break;
467bc5ccf88SSatish Balay       }
468bc5ccf88SSatish Balay     }
469bc5ccf88SSatish Balay   }
470*563fb871SSatish Balay   /* Now check what procs get messages - and compute nsends. */
471*563fb871SSatish Balay   for (i=0, nsends=0 ; i<size; i++) {
472*563fb871SSatish Balay     if (nlengths[i]) { nprocs[i] = 1; nsends ++;}
473*563fb871SSatish Balay   }
474bc5ccf88SSatish Balay 
475*563fb871SSatish Balay   { int  *onodes,*olengths;
476*563fb871SSatish Balay   /* Determine the number of messages to expect, their lengths, from from-ids */
477*563fb871SSatish Balay   ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr);
478*563fb871SSatish Balay   ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
479*563fb871SSatish Balay   /* since clubbing row,col - lengths are multiplied by 2 */
480*563fb871SSatish Balay   for (i=0; i<nreceives; i++) olengths[i] *=2;
481*563fb871SSatish Balay   ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
482*563fb871SSatish Balay   /* values are size 'bs2' lengths (and remove earlier factor 2 */
483*563fb871SSatish Balay   for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
484*563fb871SSatish Balay   ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
485*563fb871SSatish Balay   ierr = PetscFree(onodes);CHKERRQ(ierr);
486*563fb871SSatish Balay   ierr = PetscFree(olengths);CHKERRQ(ierr);
487bc5ccf88SSatish Balay   }
488bc5ccf88SSatish Balay 
489bc5ccf88SSatish Balay   /* do sends:
490bc5ccf88SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
491bc5ccf88SSatish Balay          the ith processor
492bc5ccf88SSatish Balay   */
493c1ac3661SBarry Smith   ierr     = PetscMalloc((stash->n+1)*(bs2*sizeof(MatScalar)+2*sizeof(PetscInt)),&svalues);CHKERRQ(ierr);
494c1ac3661SBarry Smith   sindices = (PetscInt*)(svalues + bs2*stash->n);
495b0a32e0cSBarry Smith   ierr     = PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
496c1ac3661SBarry Smith   ierr     = PetscMalloc(2*size*sizeof(PetscInt),&startv);CHKERRQ(ierr);
497bc5ccf88SSatish Balay   starti   = startv + size;
498a2d1c673SSatish Balay   /* use 2 sends the first with all_a, the next with all_i and all_j */
499bc5ccf88SSatish Balay   startv[0]  = 0; starti[0] = 0;
500bc5ccf88SSatish Balay   for (i=1; i<size; i++) {
501*563fb871SSatish Balay     startv[i] = startv[i-1] + nlengths[i-1];
502*563fb871SSatish Balay     starti[i] = starti[i-1] + nlengths[i-1]*2;
503bc5ccf88SSatish Balay   }
504bc5ccf88SSatish Balay   for (i=0; i<stash->n; i++) {
505bc5ccf88SSatish Balay     j = owner[i];
506a2d1c673SSatish Balay     if (bs2 == 1) {
507bc5ccf88SSatish Balay       svalues[startv[j]]              = stash->array[i];
508a2d1c673SSatish Balay     } else {
509c1ac3661SBarry Smith       PetscInt       k;
5103eda8832SBarry Smith       MatScalar *buf1,*buf2;
5114c1ff481SSatish Balay       buf1 = svalues+bs2*startv[j];
5124c1ff481SSatish Balay       buf2 = stash->array+bs2*i;
5134c1ff481SSatish Balay       for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
514a2d1c673SSatish Balay     }
515bc5ccf88SSatish Balay     sindices[starti[j]]               = stash->idx[i];
516*563fb871SSatish Balay     sindices[starti[j]+nlengths[j]]   = stash->idy[i];
517bc5ccf88SSatish Balay     startv[j]++;
518bc5ccf88SSatish Balay     starti[j]++;
519bc5ccf88SSatish Balay   }
520bc5ccf88SSatish Balay   startv[0] = 0;
521*563fb871SSatish Balay   for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];}
522bc5ccf88SSatish Balay   for (i=0,count=0; i<size; i++) {
523*563fb871SSatish Balay     if (nprocs[i]) {
524*563fb871SSatish Balay       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
525*563fb871SSatish Balay       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_MATSCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
526bc5ccf88SSatish Balay     }
527bc5ccf88SSatish Balay   }
528606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
529606d414cSSatish Balay   ierr = PetscFree(startv);CHKERRQ(ierr);
530a2d1c673SSatish Balay   /* This memory is reused in scatter end  for a different purpose*/
531a2d1c673SSatish Balay   for (i=0; i<2*size; i++) nprocs[i] = -1;
532a2d1c673SSatish Balay   stash->nprocs      = nprocs;
533a2d1c673SSatish Balay 
534*563fb871SSatish Balay   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
535*563fb871SSatish Balay   ierr  = PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
536*563fb871SSatish Balay 
537*563fb871SSatish Balay   for (i=0; i<nreceives; i++) {
538*563fb871SSatish Balay     recv_waits[2*i]   = recv_waits1[i];
539*563fb871SSatish Balay     recv_waits[2*i+1] = recv_waits2[i];
540*563fb871SSatish Balay   }
541*563fb871SSatish Balay   stash->recv_waits = recv_waits;
542*563fb871SSatish Balay   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
543*563fb871SSatish Balay   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
544*563fb871SSatish Balay 
545bc5ccf88SSatish Balay   stash->svalues    = svalues;    stash->rvalues     = rvalues;
546*563fb871SSatish Balay   stash->rindices   = rindices;   stash->send_waits  = send_waits;
547bc5ccf88SSatish Balay   stash->nsends     = nsends;     stash->nrecvs      = nreceives;
548bc5ccf88SSatish Balay   stash->rmax       = nmax;
549bc5ccf88SSatish Balay   PetscFunctionReturn(0);
550bc5ccf88SSatish Balay }
551bc5ccf88SSatish Balay 
552a2d1c673SSatish Balay /*
5538798bf22SSatish Balay    MatStashScatterGetMesg_Private - This function waits on the receives posted
5548798bf22SSatish Balay    in the function MatStashScatterBegin_Private() and returns one message at
5554c1ff481SSatish Balay    a time to the calling function. If no messages are left, it indicates this
5564c1ff481SSatish Balay    by setting flg = 0, else it sets flg = 1.
5574c1ff481SSatish Balay 
5584c1ff481SSatish Balay    Input Parameters:
5594c1ff481SSatish Balay    stash - the stash
5604c1ff481SSatish Balay 
5614c1ff481SSatish Balay    Output Parameters:
5624c1ff481SSatish Balay    nvals - the number of entries in the current message.
5634c1ff481SSatish Balay    rows  - an array of row indices (or blocked indices) corresponding to the values
5644c1ff481SSatish Balay    cols  - an array of columnindices (or blocked indices) corresponding to the values
5654c1ff481SSatish Balay    vals  - the values
5664c1ff481SSatish Balay    flg   - 0 indicates no more message left, and the current call has no values associated.
5674c1ff481SSatish Balay            1 indicates that the current call successfully received a message, and the
5684c1ff481SSatish Balay              other output parameters nvals,rows,cols,vals are set appropriately.
569a2d1c673SSatish Balay */
5704a2ae208SSatish Balay #undef __FUNCT__
5714a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterGetMesg_Private"
572c1ac3661SBarry Smith PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,MatScalar **vals,PetscInt *flg)
573bc5ccf88SSatish Balay {
5746849ba73SBarry Smith   PetscErrorCode ierr;
575c1ac3661SBarry Smith   PetscMPIInt    i;
576*563fb871SSatish Balay   PetscInt       *flg_v,i1,i2,bs2;
577a2d1c673SSatish Balay   MPI_Status     recv_status;
578b0a32e0cSBarry Smith   PetscTruth     match_found = PETSC_FALSE;
579bc5ccf88SSatish Balay 
580bc5ccf88SSatish Balay   PetscFunctionBegin;
581bc5ccf88SSatish Balay 
582a2d1c673SSatish Balay   *flg = 0; /* When a message is discovered this is reset to 1 */
583a2d1c673SSatish Balay   /* Return if no more messages to process */
584a2d1c673SSatish Balay   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
585a2d1c673SSatish Balay 
586a2d1c673SSatish Balay   flg_v = stash->nprocs;
5874c1ff481SSatish Balay   bs2   = stash->bs*stash->bs;
588a2d1c673SSatish Balay   /* If a matching pair of receieves are found, process them, and return the data to
589a2d1c673SSatish Balay      the calling function. Until then keep receiving messages */
590a2d1c673SSatish Balay   while (!match_found) {
591a2d1c673SSatish Balay     ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
592a2d1c673SSatish Balay     /* Now pack the received message into a structure which is useable by others */
593a2d1c673SSatish Balay     if (i % 2) {
5943eda8832SBarry Smith       ierr = MPI_Get_count(&recv_status,MPIU_MATSCALAR,nvals);CHKERRQ(ierr);
595c1dc657dSBarry Smith       flg_v[2*recv_status.MPI_SOURCE] = i/2;
596a2d1c673SSatish Balay       *nvals = *nvals/bs2;
597*563fb871SSatish Balay     } else {
598*563fb871SSatish Balay       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
599*563fb871SSatish Balay       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
600*563fb871SSatish Balay       *nvals = *nvals/2; /* This message has both row indices and col indices */
601bc5ccf88SSatish Balay     }
602a2d1c673SSatish Balay 
603a2d1c673SSatish Balay     /* Check if we have both the messages from this proc */
604c1dc657dSBarry Smith     i1 = flg_v[2*recv_status.MPI_SOURCE];
605c1dc657dSBarry Smith     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
606a2d1c673SSatish Balay     if (i1 != -1 && i2 != -1) {
607*563fb871SSatish Balay       *rows       = stash->rindices[i2];
608a2d1c673SSatish Balay       *cols       = *rows + *nvals;
609*563fb871SSatish Balay       *vals       = stash->rvalues[i1];
610a2d1c673SSatish Balay       *flg        = 1;
611a2d1c673SSatish Balay       stash->nprocessed ++;
61235d8aa7fSBarry Smith       match_found = PETSC_TRUE;
613bc5ccf88SSatish Balay     }
614bc5ccf88SSatish Balay   }
615bc5ccf88SSatish Balay   PetscFunctionReturn(0);
616bc5ccf88SSatish Balay }
617