xref: /petsc/src/mat/utils/matstash.c (revision 75cae7c1d616af54ee0c7389aaac575bbdf1327d)
1be1d678aSKris Buschelman #define PETSCMAT_DLL
22d5177cdSBarry Smith 
370f55243SBarry Smith #include "src/mat/matimpl.h"
4*75cae7c1SHong Zhang #include "src/mat/utils/matstashspace.h"
5*75cae7c1SHong Zhang #undef MV
63eda8832SBarry Smith /*
70ae3cd3bSBarry Smith        The input to the stash is ALWAYS in MatScalar precision, and the
80ae3cd3bSBarry Smith     internal storage and output is also in MatScalar.
93eda8832SBarry Smith */
10bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE   10000
114c1ff481SSatish Balay 
129417f4adSLois Curfman McInnes /*
138798bf22SSatish Balay   MatStashCreate_Private - Creates a stash,currently used for all the parallel
144c1ff481SSatish Balay   matrix implementations. The stash is where elements of a matrix destined
154c1ff481SSatish Balay   to be stored on other processors are kept until matrix assembly is done.
169417f4adSLois Curfman McInnes 
174c1ff481SSatish Balay   This is a simple minded stash. Simply adds entries to end of stash.
184c1ff481SSatish Balay 
194c1ff481SSatish Balay   Input Parameters:
204c1ff481SSatish Balay   comm - communicator, required for scatters.
214c1ff481SSatish Balay   bs   - stash block size. used when stashing blocks of values
224c1ff481SSatish Balay 
234c1ff481SSatish Balay   Output Parameters:
244c1ff481SSatish Balay   stash    - the newly created stash
259417f4adSLois Curfman McInnes */
264a2ae208SSatish Balay #undef __FUNCT__
274a2ae208SSatish Balay #define __FUNCT__ "MatStashCreate_Private"
28c1ac3661SBarry Smith PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
299417f4adSLois Curfman McInnes {
30dfbe8321SBarry Smith   PetscErrorCode ierr;
31c1ac3661SBarry Smith   PetscInt       max,*opt,nopt;
32f1af5d2fSBarry Smith   PetscTruth     flg;
33bc5ccf88SSatish Balay 
343a40ed3dSBarry Smith   PetscFunctionBegin;
35bc5ccf88SSatish Balay   /* Require 2 tags,get the second using PetscCommGetNewTag() */
36752ec6e0SSatish Balay   stash->comm = comm;
37752ec6e0SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
38a2d1c673SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
39a2d1c673SSatish Balay   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
40a2d1c673SSatish Balay   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
41bc5ccf88SSatish Balay 
42434d7ff9SSatish Balay   nopt = stash->size;
43d7d82daaSBarry Smith   ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr);
44b0a32e0cSBarry Smith   ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
45434d7ff9SSatish Balay   if (flg) {
46434d7ff9SSatish Balay     if (nopt == 1)                max = opt[0];
47434d7ff9SSatish Balay     else if (nopt == stash->size) max = opt[stash->rank];
48434d7ff9SSatish Balay     else if (stash->rank < nopt)  max = opt[stash->rank];
49f4ab19daSSatish Balay     else                          max = 0; /* Use default */
50434d7ff9SSatish Balay     stash->umax = max;
51434d7ff9SSatish Balay   } else {
52434d7ff9SSatish Balay     stash->umax = 0;
53434d7ff9SSatish Balay   }
54606d414cSSatish Balay   ierr = PetscFree(opt);CHKERRQ(ierr);
554c1ff481SSatish Balay   if (bs <= 0) bs = 1;
56a2d1c673SSatish Balay 
574c1ff481SSatish Balay   stash->bs       = bs;
589417f4adSLois Curfman McInnes   stash->nmax     = 0;
59434d7ff9SSatish Balay   stash->oldnmax  = 0;
609417f4adSLois Curfman McInnes   stash->n        = 0;
614c1ff481SSatish Balay   stash->reallocs = -1;
62*75cae7c1SHong Zhang #ifdef MV
639417f4adSLois Curfman McInnes   stash->idx      = 0;
649417f4adSLois Curfman McInnes   stash->idy      = 0;
65bc5ccf88SSatish Balay   stash->array    = 0;
66*75cae7c1SHong Zhang #endif
67*75cae7c1SHong Zhang   stash->space_head = 0;
68*75cae7c1SHong Zhang   stash->space      = 0;
699417f4adSLois Curfman McInnes 
70bc5ccf88SSatish Balay   stash->send_waits  = 0;
71bc5ccf88SSatish Balay   stash->recv_waits  = 0;
72a2d1c673SSatish Balay   stash->send_status = 0;
73bc5ccf88SSatish Balay   stash->nsends      = 0;
74bc5ccf88SSatish Balay   stash->nrecvs      = 0;
75bc5ccf88SSatish Balay   stash->svalues     = 0;
76bc5ccf88SSatish Balay   stash->rvalues     = 0;
77563fb871SSatish Balay   stash->rindices    = 0;
78a2d1c673SSatish Balay   stash->nprocs      = 0;
79a2d1c673SSatish Balay   stash->nprocessed  = 0;
803a40ed3dSBarry Smith   PetscFunctionReturn(0);
819417f4adSLois Curfman McInnes }
829417f4adSLois Curfman McInnes 
834c1ff481SSatish Balay /*
848798bf22SSatish Balay    MatStashDestroy_Private - Destroy the stash
854c1ff481SSatish Balay */
864a2ae208SSatish Balay #undef __FUNCT__
874a2ae208SSatish Balay #define __FUNCT__ "MatStashDestroy_Private"
88dfbe8321SBarry Smith PetscErrorCode MatStashDestroy_Private(MatStash *stash)
899417f4adSLois Curfman McInnes {
90dfbe8321SBarry Smith   PetscErrorCode ierr;
91a2d1c673SSatish Balay 
92bc5ccf88SSatish Balay   PetscFunctionBegin;
93*75cae7c1SHong Zhang #ifdef MV
94606d414cSSatish Balay   if (stash->array) {
95606d414cSSatish Balay     ierr = PetscFree(stash->array);CHKERRQ(ierr);
96606d414cSSatish Balay     stash->array = 0;
97606d414cSSatish Balay   }
98*75cae7c1SHong Zhang #endif
99*75cae7c1SHong Zhang   if (stash->space_head){
100*75cae7c1SHong Zhang     ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr);
101*75cae7c1SHong Zhang     stash->space_head = 0;
102*75cae7c1SHong Zhang   }
103bc5ccf88SSatish Balay   PetscFunctionReturn(0);
104bc5ccf88SSatish Balay }
105bc5ccf88SSatish Balay 
1064c1ff481SSatish Balay /*
1078798bf22SSatish Balay    MatStashScatterEnd_Private - This is called as the fial stage of
1084c1ff481SSatish Balay    scatter. The final stages of messagepassing is done here, and
1094c1ff481SSatish Balay    all the memory used for messagepassing is cleanedu up. This
1104c1ff481SSatish Balay    routine also resets the stash, and deallocates the memory used
1114c1ff481SSatish Balay    for the stash. It also keeps track of the current memory usage
1124c1ff481SSatish Balay    so that the same value can be used the next time through.
1134c1ff481SSatish Balay */
1144a2ae208SSatish Balay #undef __FUNCT__
1154a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterEnd_Private"
116dfbe8321SBarry Smith PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
117bc5ccf88SSatish Balay {
1186849ba73SBarry Smith   PetscErrorCode ierr;
1196849ba73SBarry Smith   int         nsends=stash->nsends,bs2,oldnmax;
120a2d1c673SSatish Balay   MPI_Status  *send_status;
121a2d1c673SSatish Balay 
1223a40ed3dSBarry Smith   PetscFunctionBegin;
123a2d1c673SSatish Balay   /* wait on sends */
124a2d1c673SSatish Balay   if (nsends) {
12582502324SSatish Balay     ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
126a2d1c673SSatish Balay     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
127606d414cSSatish Balay     ierr = PetscFree(send_status);CHKERRQ(ierr);
128a2d1c673SSatish Balay   }
129a2d1c673SSatish Balay 
130c0c58ca7SSatish Balay   /* Now update nmaxold to be app 10% more than max n used, this way the
131434d7ff9SSatish Balay      wastage of space is reduced the next time this stash is used.
132434d7ff9SSatish Balay      Also update the oldmax, only if it increases */
133b9b97703SBarry Smith   if (stash->n) {
13494b769a5SSatish Balay     bs2      = stash->bs*stash->bs;
1358a9378f0SSatish Balay     oldnmax  = ((int)(stash->n * 1.1) + 5)*bs2;
136434d7ff9SSatish Balay     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
137b9b97703SBarry Smith   }
138434d7ff9SSatish Balay 
139d07ff455SSatish Balay   stash->nmax       = 0;
140d07ff455SSatish Balay   stash->n          = 0;
1414c1ff481SSatish Balay   stash->reallocs   = -1;
142a2d1c673SSatish Balay   stash->nprocessed = 0;
143*75cae7c1SHong Zhang #ifdef MV
144bc5ccf88SSatish Balay   if (stash->array) {
145606d414cSSatish Balay     ierr         = PetscFree(stash->array);CHKERRQ(ierr);
146bc5ccf88SSatish Balay     stash->array = 0;
147bc5ccf88SSatish Balay     stash->idx   = 0;
148bc5ccf88SSatish Balay     stash->idy   = 0;
149bc5ccf88SSatish Balay   }
150*75cae7c1SHong Zhang #endif
151*75cae7c1SHong Zhang   if (stash->space_head){
152*75cae7c1SHong Zhang     ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr);
153*75cae7c1SHong Zhang     stash->space_head = 0;
154*75cae7c1SHong Zhang   }
155606d414cSSatish Balay   if (stash->send_waits) {
156606d414cSSatish Balay     ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
157606d414cSSatish Balay     stash->send_waits = 0;
158606d414cSSatish Balay   }
159606d414cSSatish Balay   if (stash->recv_waits) {
160606d414cSSatish Balay     ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
161606d414cSSatish Balay     stash->recv_waits = 0;
162606d414cSSatish Balay   }
163606d414cSSatish Balay   if (stash->svalues) {
164606d414cSSatish Balay     ierr = PetscFree(stash->svalues);CHKERRQ(ierr);
165606d414cSSatish Balay     stash->svalues = 0;
166606d414cSSatish Balay   }
167606d414cSSatish Balay   if (stash->rvalues) {
168606d414cSSatish Balay     ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
169606d414cSSatish Balay     stash->rvalues = 0;
170606d414cSSatish Balay   }
171563fb871SSatish Balay   if (stash->rindices) {
172563fb871SSatish Balay     ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
173563fb871SSatish Balay     stash->rindices = 0;
174563fb871SSatish Balay   }
175606d414cSSatish Balay   if (stash->nprocs) {
176b22afee1SSatish Balay     ierr = PetscFree(stash->nprocs);CHKERRQ(ierr);
177606d414cSSatish Balay     stash->nprocs = 0;
178606d414cSSatish Balay   }
179bc5ccf88SSatish Balay 
1803a40ed3dSBarry Smith   PetscFunctionReturn(0);
1819417f4adSLois Curfman McInnes }
1829417f4adSLois Curfman McInnes 
1834c1ff481SSatish Balay /*
1848798bf22SSatish Balay    MatStashGetInfo_Private - Gets the relavant statistics of the stash
1854c1ff481SSatish Balay 
1864c1ff481SSatish Balay    Input Parameters:
1874c1ff481SSatish Balay    stash    - the stash
18894b769a5SSatish Balay    nstash   - the size of the stash. Indicates the number of values stored.
1894c1ff481SSatish Balay    reallocs - the number of additional mallocs incurred.
1904c1ff481SSatish Balay 
1914c1ff481SSatish Balay */
1924a2ae208SSatish Balay #undef __FUNCT__
1934a2ae208SSatish Balay #define __FUNCT__ "MatStashGetInfo_Private"
194c1ac3661SBarry Smith PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
19597530c3fSBarry Smith {
196c1ac3661SBarry Smith   PetscInt bs2 = stash->bs*stash->bs;
19794b769a5SSatish Balay 
1983a40ed3dSBarry Smith   PetscFunctionBegin;
1991ecfd215SBarry Smith   if (nstash) *nstash   = stash->n*bs2;
2001ecfd215SBarry Smith   if (reallocs) {
201434d7ff9SSatish Balay     if (stash->reallocs < 0) *reallocs = 0;
202434d7ff9SSatish Balay     else                     *reallocs = stash->reallocs;
2031ecfd215SBarry Smith   }
204bc5ccf88SSatish Balay   PetscFunctionReturn(0);
205bc5ccf88SSatish Balay }
2064c1ff481SSatish Balay 
2074c1ff481SSatish Balay 
2084c1ff481SSatish Balay /*
2098798bf22SSatish Balay    MatStashSetInitialSize_Private - Sets the initial size of the stash
2104c1ff481SSatish Balay 
2114c1ff481SSatish Balay    Input Parameters:
2124c1ff481SSatish Balay    stash  - the stash
2134c1ff481SSatish Balay    max    - the value that is used as the max size of the stash.
2144c1ff481SSatish Balay             this value is used while allocating memory.
2154c1ff481SSatish Balay */
2164a2ae208SSatish Balay #undef __FUNCT__
2174a2ae208SSatish Balay #define __FUNCT__ "MatStashSetInitialSize_Private"
218c1ac3661SBarry Smith PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
219bc5ccf88SSatish Balay {
220bc5ccf88SSatish Balay   PetscFunctionBegin;
221434d7ff9SSatish Balay   stash->umax = max;
2223a40ed3dSBarry Smith   PetscFunctionReturn(0);
22397530c3fSBarry Smith }
22497530c3fSBarry Smith 
2258798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called
2264c1ff481SSatish Balay    when the space in the stash is not sufficient to add the new values
2274c1ff481SSatish Balay    being inserted into the stash.
2284c1ff481SSatish Balay 
2294c1ff481SSatish Balay    Input Parameters:
2304c1ff481SSatish Balay    stash - the stash
2314c1ff481SSatish Balay    incr  - the minimum increase requested
2324c1ff481SSatish Balay 
2334c1ff481SSatish Balay    Notes:
2344c1ff481SSatish Balay    This routine doubles the currently used memory.
2354c1ff481SSatish Balay  */
2364a2ae208SSatish Balay #undef __FUNCT__
2374a2ae208SSatish Balay #define __FUNCT__ "MatStashExpand_Private"
238c1ac3661SBarry Smith static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
2399417f4adSLois Curfman McInnes {
2406849ba73SBarry Smith   PetscErrorCode ierr;
241*75cae7c1SHong Zhang   PetscInt       *n_idx,*n_idy,newnmax,bs2= stash->bs*stash->bs;
2423eda8832SBarry Smith   MatScalar      *n_array;
2439417f4adSLois Curfman McInnes 
2443a40ed3dSBarry Smith   PetscFunctionBegin;
2459417f4adSLois Curfman McInnes   /* allocate a larger stash */
246c481ceb5SSatish Balay   if (!stash->oldnmax && !stash->nmax) { /* new stash */
247434d7ff9SSatish Balay     if (stash->umax)                  newnmax = stash->umax/bs2;
248434d7ff9SSatish Balay     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
249c481ceb5SSatish Balay   } else if (!stash->nmax) { /* resuing stash */
250434d7ff9SSatish Balay     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
251434d7ff9SSatish Balay     else                              newnmax = stash->oldnmax/bs2;
252434d7ff9SSatish Balay   } else                              newnmax = stash->nmax*2;
2534c1ff481SSatish Balay   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
254d07ff455SSatish Balay 
255*75cae7c1SHong Zhang   /* Get a MatStashSpace and attach it to stash */
256*75cae7c1SHong Zhang   if (!stash->nmax) { /* new stash or resuing stash->oldnmax */
257*75cae7c1SHong Zhang     ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space_head);CHKERRQ(ierr);
258*75cae7c1SHong Zhang     stash->space = stash->space_head;
259*75cae7c1SHong Zhang   } else {
260*75cae7c1SHong Zhang     ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr);
261*75cae7c1SHong Zhang   }
262*75cae7c1SHong Zhang #ifdef MV
263*75cae7c1SHong Zhang   PetscMPIInt rank;
264*75cae7c1SHong Zhang   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
265*75cae7c1SHong Zhang   printf("[%d] MatStashExpand ends, incr %d, space %p, space->val %p\n",rank,incr,stash->space,(stash->space)->val);
266*75cae7c1SHong Zhang #endif
267*75cae7c1SHong Zhang #ifdef MV
268c1ac3661SBarry Smith   ierr  = PetscMalloc((newnmax)*(2*sizeof(PetscInt)+bs2*sizeof(MatScalar)),&n_array);CHKERRQ(ierr);
269c1ac3661SBarry Smith   n_idx = (PetscInt*)(n_array + bs2*newnmax);
270c1ac3661SBarry Smith   n_idy = (PetscInt*)(n_idx + newnmax);
2713eda8832SBarry Smith   ierr  = PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(MatScalar));CHKERRQ(ierr);
272c1ac3661SBarry Smith   ierr  = PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(PetscInt));CHKERRQ(ierr);
273c1ac3661SBarry Smith   ierr  = PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(PetscInt));CHKERRQ(ierr);
274606d414cSSatish Balay   if (stash->array) {ierr = PetscFree(stash->array);CHKERRQ(ierr);}
275d07ff455SSatish Balay   stash->array   = n_array;
276d07ff455SSatish Balay   stash->idx     = n_idx;
277d07ff455SSatish Balay   stash->idy     = n_idy;
278*75cae7c1SHong Zhang #endif /* MV */
279bc5ccf88SSatish Balay   stash->reallocs++;
280*75cae7c1SHong Zhang   stash->nmax    = newnmax;
281bc5ccf88SSatish Balay   PetscFunctionReturn(0);
282bc5ccf88SSatish Balay }
283bc5ccf88SSatish Balay /*
2848798bf22SSatish Balay   MatStashValuesRow_Private - inserts values into the stash. This function
2854c1ff481SSatish Balay   expects the values to be roworiented. Multiple columns belong to the same row
2864c1ff481SSatish Balay   can be inserted with a single call to this function.
2874c1ff481SSatish Balay 
2884c1ff481SSatish Balay   Input Parameters:
2894c1ff481SSatish Balay   stash  - the stash
2904c1ff481SSatish Balay   row    - the global row correspoiding to the values
2914c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
2924c1ff481SSatish Balay   idxn   - the global column indices corresponding to each of the values.
2934c1ff481SSatish Balay   values - the values inserted
294bc5ccf88SSatish Balay */
2954a2ae208SSatish Balay #undef __FUNCT__
2964a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRow_Private"
297c1ac3661SBarry Smith PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[])
298bc5ccf88SSatish Balay {
299dfbe8321SBarry Smith   PetscErrorCode     ierr;
300*75cae7c1SHong Zhang   PetscInt           i,k;
301*75cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
302bc5ccf88SSatish Balay 
303bc5ccf88SSatish Balay   PetscFunctionBegin;
304*75cae7c1SHong Zhang #ifdef MV
305*75cae7c1SHong Zhang   PetscMPIInt rank;
306*75cae7c1SHong Zhang   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
307*75cae7c1SHong Zhang #endif
3084c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
309*75cae7c1SHong Zhang   if (!space || space->local_remaining < n){
3108798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
3119417f4adSLois Curfman McInnes   }
312*75cae7c1SHong Zhang   space = stash->space;
313*75cae7c1SHong Zhang #ifdef MV
314*75cae7c1SHong Zhang   if (rank == 1){
315*75cae7c1SHong Zhang     printf(" [%d] MatStashValuesRow, local_remaining %d, n %d\n",rank,space->local_remaining,n);
316*75cae7c1SHong Zhang   }
317*75cae7c1SHong Zhang   /* printf("space %p, stash %d values, local_used %d\n",space,n,space->local_used); */
318*75cae7c1SHong Zhang #endif
319*75cae7c1SHong Zhang   k = space->local_used;
3204c1ff481SSatish Balay   for (i=0; i<n; i++) {
321*75cae7c1SHong Zhang     space->idx[k] = row;
322*75cae7c1SHong Zhang     space->idy[k] = idxn[i];
323*75cae7c1SHong Zhang     space->val[k] = values[i];
324*75cae7c1SHong Zhang     k++;
325*75cae7c1SHong Zhang #ifdef MV
3269417f4adSLois Curfman McInnes     stash->idx[stash->n]   = row;
327a2d1c673SSatish Balay     stash->idy[stash->n]   = idxn[i];
3280ae3cd3bSBarry Smith     stash->array[stash->n] = values[i];
329*75cae7c1SHong Zhang #endif
330a2d1c673SSatish Balay     stash->n++;
3319417f4adSLois Curfman McInnes   }
332*75cae7c1SHong Zhang   /* stash->n               += n; */
333*75cae7c1SHong Zhang   space->local_used      += n;
334*75cae7c1SHong Zhang   space->local_remaining -= n;
335a2d1c673SSatish Balay   PetscFunctionReturn(0);
336a2d1c673SSatish Balay }
337*75cae7c1SHong Zhang 
3384c1ff481SSatish Balay /*
3398798bf22SSatish Balay   MatStashValuesCol_Private - inserts values into the stash. This function
3404c1ff481SSatish Balay   expects the values to be columnoriented. Multiple columns belong to the same row
3414c1ff481SSatish Balay   can be inserted with a single call to this function.
342a2d1c673SSatish Balay 
3434c1ff481SSatish Balay   Input Parameters:
3444c1ff481SSatish Balay   stash   - the stash
3454c1ff481SSatish Balay   row     - the global row correspoiding to the values
3464c1ff481SSatish Balay   n       - the number of elements inserted. All elements belong to the above row.
3474c1ff481SSatish Balay   idxn    - the global column indices corresponding to each of the values.
3484c1ff481SSatish Balay   values  - the values inserted
3494c1ff481SSatish Balay   stepval - the consecutive values are sepated by a distance of stepval.
3504c1ff481SSatish Balay             this happens because the input is columnoriented.
3514c1ff481SSatish Balay */
3524a2ae208SSatish Balay #undef __FUNCT__
3534a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesCol_Private"
354c1ac3661SBarry Smith PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt stepval)
355a2d1c673SSatish Balay {
356dfbe8321SBarry Smith   PetscErrorCode     ierr;
357*75cae7c1SHong Zhang   PetscInt           i,k;
358*75cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
359a2d1c673SSatish Balay 
3604c1ff481SSatish Balay   PetscFunctionBegin;
3614c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
362*75cae7c1SHong Zhang   if (!space || space->local_remaining < n){
3638798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
3644c1ff481SSatish Balay   }
365*75cae7c1SHong Zhang   space = stash->space;
366*75cae7c1SHong Zhang   k = space->local_used;
3674c1ff481SSatish Balay   for (i=0; i<n; i++) {
368*75cae7c1SHong Zhang     space->idx[k] = row;
369*75cae7c1SHong Zhang     space->idy[k] = idxn[i];
370*75cae7c1SHong Zhang     space->val[k] = values[i*stepval];
371*75cae7c1SHong Zhang     k++;
372*75cae7c1SHong Zhang #ifdef MV
3734c1ff481SSatish Balay     stash->idx[stash->n]   = row;
3744c1ff481SSatish Balay     stash->idy[stash->n]   = idxn[i];
3750ae3cd3bSBarry Smith     stash->array[stash->n] = values[i*stepval];
376*75cae7c1SHong Zhang #endif
3774c1ff481SSatish Balay     stash->n++;
3784c1ff481SSatish Balay   }
379*75cae7c1SHong Zhang   /* stash->n               += n; */
380*75cae7c1SHong Zhang   space->local_used      += n;
381*75cae7c1SHong Zhang   space->local_remaining -= n;
3824c1ff481SSatish Balay   PetscFunctionReturn(0);
3834c1ff481SSatish Balay }
3844c1ff481SSatish Balay 
3854c1ff481SSatish Balay /*
3868798bf22SSatish Balay   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
3874c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3884c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3894c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3904c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3914c1ff481SSatish Balay 
3924c1ff481SSatish Balay   Input Parameters:
3934c1ff481SSatish Balay   stash  - the stash
3944c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3954c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3964c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3974c1ff481SSatish Balay            values. Each block is of size bs*bs.
3984c1ff481SSatish Balay   values - the values inserted
3994c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
4004c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
4014c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
4024c1ff481SSatish Balay */
4034a2ae208SSatish Balay #undef __FUNCT__
4044a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRowBlocked_Private"
405c1ac3661SBarry Smith PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
4064c1ff481SSatish Balay {
407dfbe8321SBarry Smith   PetscErrorCode  ierr;
408*75cae7c1SHong Zhang   PetscInt        i,j,k,bs2,bs=stash->bs,l;
409f15d580aSBarry Smith   const MatScalar *vals;
410f15d580aSBarry Smith   MatScalar       *array;
411*75cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
412a2d1c673SSatish Balay 
413a2d1c673SSatish Balay   PetscFunctionBegin;
414*75cae7c1SHong Zhang   if (!space || space->local_remaining < n){
4158798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
416a2d1c673SSatish Balay   }
417*75cae7c1SHong Zhang   space = stash->space;
418*75cae7c1SHong Zhang   l     = space->local_used;
419*75cae7c1SHong Zhang   bs2   = bs*bs;
4204c1ff481SSatish Balay   for (i=0; i<n; i++) {
421*75cae7c1SHong Zhang     space->idx[l] = row;
422*75cae7c1SHong Zhang     space->idy[l] = idxn[i];
423*75cae7c1SHong Zhang     /* Now copy over the block of values. Store the values column oriented.
424*75cae7c1SHong Zhang        This enables inserting multiple blocks belonging to a row with a single
425*75cae7c1SHong Zhang        funtion call */
426*75cae7c1SHong Zhang     array = space->val + bs2*l;
427*75cae7c1SHong Zhang     vals  = values + idx*bs2*n + bs*i;
428*75cae7c1SHong Zhang     for (j=0; j<bs; j++) {
429*75cae7c1SHong Zhang       for (k=0; k<bs; k++) array[k*bs] = vals[k];
430*75cae7c1SHong Zhang       array++;
431*75cae7c1SHong Zhang       vals  += cmax*bs;
432*75cae7c1SHong Zhang     }
433*75cae7c1SHong Zhang     l++;
434*75cae7c1SHong Zhang #ifdef MV
435a2d1c673SSatish Balay     stash->idx[stash->n]   = row;
436a2d1c673SSatish Balay     stash->idy[stash->n] = idxn[i];
437a2d1c673SSatish Balay     /* Now copy over the block of values. Store the values column oriented.
438a2d1c673SSatish Balay        This enables inserting multiple blocks belonging to a row with a single
439a2d1c673SSatish Balay        funtion call */
440a2d1c673SSatish Balay     array = stash->array + bs2*stash->n;
441a2d1c673SSatish Balay     vals  = values + idx*bs2*n + bs*i;
442a2d1c673SSatish Balay     for (j=0; j<bs; j++) {
4430ae3cd3bSBarry Smith       for (k=0; k<bs; k++) {array[k*bs] = vals[k];}
444a2d1c673SSatish Balay       array += 1;
445a2d1c673SSatish Balay       vals  += cmax*bs;
446a2d1c673SSatish Balay     }
447*75cae7c1SHong Zhang #endif
4484c1ff481SSatish Balay     stash->n++;
4494c1ff481SSatish Balay   }
450*75cae7c1SHong Zhang   space->local_used      += n;
451*75cae7c1SHong Zhang   space->local_remaining -= n;
4524c1ff481SSatish Balay   PetscFunctionReturn(0);
4534c1ff481SSatish Balay }
4544c1ff481SSatish Balay 
4554c1ff481SSatish Balay /*
4568798bf22SSatish Balay   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
4574c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
4584c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
4594c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
4604c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
4614c1ff481SSatish Balay 
4624c1ff481SSatish Balay   Input Parameters:
4634c1ff481SSatish Balay   stash  - the stash
4644c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
4654c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
4664c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
4674c1ff481SSatish Balay            values. Each block is of size bs*bs.
4684c1ff481SSatish Balay   values - the values inserted
4694c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
4704c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
4714c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
4724c1ff481SSatish Balay */
4734a2ae208SSatish Balay #undef __FUNCT__
4744a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesColBlocked_Private"
475c1ac3661SBarry Smith PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
4764c1ff481SSatish Balay {
477dfbe8321SBarry Smith   PetscErrorCode  ierr;
478*75cae7c1SHong Zhang   PetscInt        i,j,k,bs2,bs=stash->bs,l;
479f15d580aSBarry Smith   const MatScalar *vals;
480f15d580aSBarry Smith   MatScalar       *array;
481*75cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
4824c1ff481SSatish Balay 
4834c1ff481SSatish Balay   PetscFunctionBegin;
484*75cae7c1SHong Zhang   if (!space || space->local_remaining < n){
4858798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
4864c1ff481SSatish Balay   }
487*75cae7c1SHong Zhang   space = stash->space;
488*75cae7c1SHong Zhang   l     = space->local_used;
489*75cae7c1SHong Zhang   bs2   = bs*bs;
4904c1ff481SSatish Balay   for (i=0; i<n; i++) {
491*75cae7c1SHong Zhang     space->idx[l] = row;
492*75cae7c1SHong Zhang     space->idy[l] = idxn[i];
493*75cae7c1SHong Zhang     /* Now copy over the block of values. Store the values column oriented.
494*75cae7c1SHong Zhang      This enables inserting multiple blocks belonging to a row with a single
495*75cae7c1SHong Zhang      funtion call */
496*75cae7c1SHong Zhang     array = space->val + bs2*l;
497*75cae7c1SHong Zhang     vals  = values + idx*bs2*n + bs*i;
498*75cae7c1SHong Zhang     for (j=0; j<bs; j++) {
499*75cae7c1SHong Zhang       for (k=0; k<bs; k++) {array[k] = vals[k];}
500*75cae7c1SHong Zhang       array += bs;
501*75cae7c1SHong Zhang       vals  += rmax*bs;
502*75cae7c1SHong Zhang     }
503*75cae7c1SHong Zhang #ifdef MV
5044c1ff481SSatish Balay     stash->idx[stash->n]   = row;
5054c1ff481SSatish Balay     stash->idy[stash->n] = idxn[i];
5064c1ff481SSatish Balay     /* Now copy over the block of values. Store the values column oriented.
5074c1ff481SSatish Balay      This enables inserting multiple blocks belonging to a row with a single
5084c1ff481SSatish Balay      funtion call */
509a2d1c673SSatish Balay     array = stash->array + bs2*stash->n;
510a2d1c673SSatish Balay     vals  = values + idx*bs + bs2*rmax*i;
511a2d1c673SSatish Balay     for (j=0; j<bs; j++) {
5120ae3cd3bSBarry Smith       for (k=0; k<bs; k++) {array[k] = vals[k];}
513a2d1c673SSatish Balay       array += bs;
514a2d1c673SSatish Balay       vals  += rmax*bs;
515a2d1c673SSatish Balay     }
516*75cae7c1SHong Zhang #endif
517a2d1c673SSatish Balay     stash->n++;
5189417f4adSLois Curfman McInnes   }
519*75cae7c1SHong Zhang   space->local_used      += n;
520*75cae7c1SHong Zhang   space->local_remaining -= n;
5213a40ed3dSBarry Smith   PetscFunctionReturn(0);
5229417f4adSLois Curfman McInnes }
5234c1ff481SSatish Balay /*
5248798bf22SSatish Balay   MatStashScatterBegin_Private - Initiates the transfer of values to the
5254c1ff481SSatish Balay   correct owners. This function goes through the stash, and check the
5264c1ff481SSatish Balay   owners of each stashed value, and sends the values off to the owner
5274c1ff481SSatish Balay   processors.
528bc5ccf88SSatish Balay 
5294c1ff481SSatish Balay   Input Parameters:
5304c1ff481SSatish Balay   stash  - the stash
5314c1ff481SSatish Balay   owners - an array of size 'no-of-procs' which gives the ownership range
5324c1ff481SSatish Balay            for each node.
5334c1ff481SSatish Balay 
5344c1ff481SSatish Balay   Notes: The 'owners' array in the cased of the blocked-stash has the
5354c1ff481SSatish Balay   ranges specified blocked global indices, and for the regular stash in
5364c1ff481SSatish Balay   the proper global indices.
5374c1ff481SSatish Balay */
5384a2ae208SSatish Balay #undef __FUNCT__
5394a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterBegin_Private"
540c1ac3661SBarry Smith PetscErrorCode MatStashScatterBegin_Private(MatStash *stash,PetscInt *owners)
541bc5ccf88SSatish Balay {
542c1ac3661SBarry Smith   PetscInt       *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
543fe09c992SBarry Smith   PetscInt       size=stash->size,nsends;
5446849ba73SBarry Smith   PetscErrorCode ierr;
545*75cae7c1SHong Zhang   PetscInt       count,*sindices,**rindices,i,j,idx,lastidx,l;
546563fb871SSatish Balay   MatScalar      **rvalues,*svalues;
547bc5ccf88SSatish Balay   MPI_Comm       comm = stash->comm;
548563fb871SSatish Balay   MPI_Request    *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
549fe09c992SBarry Smith   PetscMPIInt    *nprocs,*nlengths,nreceives;
550*75cae7c1SHong Zhang   PetscInt       *idx_ptr,*idy,n=stash->n;
551*75cae7c1SHong Zhang   MatScalar      *val;
552*75cae7c1SHong Zhang   PetscMatStashSpace space,space_next,space_head=stash->space_head;
553bc5ccf88SSatish Balay 
554bc5ccf88SSatish Balay   PetscFunctionBegin;
5554c1ff481SSatish Balay   bs2 = stash->bs*stash->bs;
556*75cae7c1SHong Zhang #ifdef MV
557*75cae7c1SHong Zhang   PetscMPIInt rank;
558*75cae7c1SHong Zhang   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
559*75cae7c1SHong Zhang #endif
560*75cae7c1SHong Zhang 
561*75cae7c1SHong Zhang #ifdef MV
562*75cae7c1SHong Zhang   /* Copy values of space into val, idx, idy, and destroy space */
563*75cae7c1SHong Zhang   ierr = PetscMalloc((n+1)*(bs2*sizeof(MatScalar)+2*sizeof(PetscInt)),&val);CHKERRQ(ierr);
564*75cae7c1SHong Zhang   idx_ptr = (PetscInt*)(val + bs2*n);
565*75cae7c1SHong Zhang   idy     = (PetscInt*)(idx_ptr + n);
566*75cae7c1SHong Zhang   ierr = PetscMatStashSpaceContiguous(bs2,&stash->space_head,val,idx_ptr,idy);CHKERRQ(ierr);
567*75cae7c1SHong Zhang   /* printf("[%d] Before and after SpaceContiguous, space->head %p/%p\n",rank,space_head,stash->space_head); */
568*75cae7c1SHong Zhang   ierr = PetscFree(val);CHKERRQ(ierr);
569*75cae7c1SHong Zhang #endif
570*75cae7c1SHong Zhang #ifdef MV
571*75cae7c1SHong Zhang   printf("[%d] compare array with val ...\n",rank);
572*75cae7c1SHong Zhang   PetscScalar *array,*valtmp;
573*75cae7c1SHong Zhang   for (i=0; i<stash->n; i++){
574*75cae7c1SHong Zhang     array = stash->array +i*bs2;
575*75cae7c1SHong Zhang     valtmp = val+i*bs2;
576*75cae7c1SHong Zhang     for (j=0; j<bs2; j++){
577*75cae7c1SHong Zhang       if (*array != *valtmp) SETERRQ3(PETSC_ERR_ARG_SIZ, "%d, array %g != val %g",i,*array,*valtmp);
578*75cae7c1SHong Zhang       array++; valtmp++;
579*75cae7c1SHong Zhang     }
580*75cae7c1SHong Zhang     if (stash->idx[i] != idx_ptr[i]) SETERRQ3(PETSC_ERR_ARG_SIZ, "%d, array %d != idx %d",i,stash->idx[i],idx_ptr[i]);
581*75cae7c1SHong Zhang     if (stash->idy[i] != idy[i]) SETERRQ3(PETSC_ERR_ARG_SIZ, "%d, array %d != idy %d",i,stash->idy[i],idy[i]);
582*75cae7c1SHong Zhang   }
583*75cae7c1SHong Zhang #endif
584*75cae7c1SHong Zhang 
585bc5ccf88SSatish Balay   /*  first count number of contributors to each processor */
586fe09c992SBarry Smith   ierr  = PetscMalloc(2*size*sizeof(PetscMPIInt),&nprocs);CHKERRQ(ierr);
587fe09c992SBarry Smith   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscMPIInt));CHKERRQ(ierr);
588c1ac3661SBarry Smith   ierr  = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);
589a2d1c673SSatish Balay 
590563fb871SSatish Balay   nlengths = nprocs+size;
591*75cae7c1SHong Zhang   i = j    = 0;
5927357eb19SBarry Smith   lastidx  = -1;
593*75cae7c1SHong Zhang   space    = space_head;
594*75cae7c1SHong Zhang   while (space != PETSC_NULL){
595*75cae7c1SHong Zhang     space_next = space->next;
596*75cae7c1SHong Zhang     for (l=0; l<space->local_used; l++){
597*75cae7c1SHong Zhang       idx=space->idx[l];
5987357eb19SBarry Smith       /* if indices are NOT locally sorted, need to start search at the beginning */
599*75cae7c1SHong Zhang       if (lastidx > idx) j = 0; /* idx = stash->idx[i] */
6007357eb19SBarry Smith       lastidx = idx;
6017357eb19SBarry Smith       for (; j<size; j++) {
6024c1ff481SSatish Balay         if (idx >= owners[j] && idx < owners[j+1]) {
603563fb871SSatish Balay           nlengths[j]++; owner[i] = j; break;
604bc5ccf88SSatish Balay         }
605bc5ccf88SSatish Balay       }
606*75cae7c1SHong Zhang       i++;
607*75cae7c1SHong Zhang     }
608*75cae7c1SHong Zhang     space = space_next;
609bc5ccf88SSatish Balay   }
610563fb871SSatish Balay   /* Now check what procs get messages - and compute nsends. */
611563fb871SSatish Balay   for (i=0, nsends=0 ; i<size; i++) {
612563fb871SSatish Balay     if (nlengths[i]) { nprocs[i] = 1; nsends ++;}
613563fb871SSatish Balay   }
614bc5ccf88SSatish Balay 
615563fb871SSatish Balay   { int  *onodes,*olengths;
616563fb871SSatish Balay   /* Determine the number of messages to expect, their lengths, from from-ids */
617563fb871SSatish Balay   ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr);
618563fb871SSatish Balay   ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
619563fb871SSatish Balay   /* since clubbing row,col - lengths are multiplied by 2 */
620563fb871SSatish Balay   for (i=0; i<nreceives; i++) olengths[i] *=2;
621563fb871SSatish Balay   ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
622563fb871SSatish Balay   /* values are size 'bs2' lengths (and remove earlier factor 2 */
623563fb871SSatish Balay   for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
624563fb871SSatish Balay   ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
625563fb871SSatish Balay   ierr = PetscFree(onodes);CHKERRQ(ierr);
626563fb871SSatish Balay   ierr = PetscFree(olengths);CHKERRQ(ierr);
627bc5ccf88SSatish Balay   }
628bc5ccf88SSatish Balay 
629bc5ccf88SSatish Balay   /* do sends:
630bc5ccf88SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
631bc5ccf88SSatish Balay          the ith processor
632bc5ccf88SSatish Balay   */
633c1ac3661SBarry Smith   ierr     = PetscMalloc((stash->n+1)*(bs2*sizeof(MatScalar)+2*sizeof(PetscInt)),&svalues);CHKERRQ(ierr);
634c1ac3661SBarry Smith   sindices = (PetscInt*)(svalues + bs2*stash->n);
635b0a32e0cSBarry Smith   ierr     = PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
636c1ac3661SBarry Smith   ierr     = PetscMalloc(2*size*sizeof(PetscInt),&startv);CHKERRQ(ierr);
637bc5ccf88SSatish Balay   starti   = startv + size;
638a2d1c673SSatish Balay   /* use 2 sends the first with all_a, the next with all_i and all_j */
639bc5ccf88SSatish Balay   startv[0]  = 0; starti[0] = 0;
640bc5ccf88SSatish Balay   for (i=1; i<size; i++) {
641563fb871SSatish Balay     startv[i] = startv[i-1] + nlengths[i-1];
642563fb871SSatish Balay     starti[i] = starti[i-1] + nlengths[i-1]*2;
643bc5ccf88SSatish Balay   }
644*75cae7c1SHong Zhang 
645*75cae7c1SHong Zhang   i     = 0;
646*75cae7c1SHong Zhang   space = space_head;
647*75cae7c1SHong Zhang   while (space != PETSC_NULL){
648*75cae7c1SHong Zhang     space_next = space->next;
649*75cae7c1SHong Zhang     for (l=0; l<space->local_used; l++){
650bc5ccf88SSatish Balay       j = owner[i];
651a2d1c673SSatish Balay       if (bs2 == 1) {
652*75cae7c1SHong Zhang         svalues[startv[j]] = space->val[l];       /* = stash->array[i]; */
653a2d1c673SSatish Balay       } else {
654c1ac3661SBarry Smith         PetscInt       k;
6553eda8832SBarry Smith         MatScalar *buf1,*buf2;
6564c1ff481SSatish Balay         buf1 = svalues+bs2*startv[j];
657*75cae7c1SHong Zhang         buf2 = space->val + bs2*i;                      /* stash->array+bs2*i; */
6584c1ff481SSatish Balay         for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
659a2d1c673SSatish Balay       }
660*75cae7c1SHong Zhang       sindices[starti[j]]             = space->idx[l]; /* stash->idx[i]; */
661*75cae7c1SHong Zhang       sindices[starti[j]+nlengths[j]] = space->idy[l]; /* stash->idy[i]; */
662bc5ccf88SSatish Balay       startv[j]++;
663bc5ccf88SSatish Balay       starti[j]++;
664*75cae7c1SHong Zhang       i++;
665*75cae7c1SHong Zhang     }
666*75cae7c1SHong Zhang     space = space_next;
667bc5ccf88SSatish Balay   }
668bc5ccf88SSatish Balay   startv[0] = 0;
669563fb871SSatish Balay   for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];}
670e5d0e772SSatish Balay 
671bc5ccf88SSatish Balay   for (i=0,count=0; i<size; i++) {
672563fb871SSatish Balay     if (nprocs[i]) {
673563fb871SSatish Balay       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
674563fb871SSatish Balay       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_MATSCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
675bc5ccf88SSatish Balay     }
676b85c94c3SSatish Balay   }
6775bcf5ddbSSatish Balay #if defined(PETSC_USE_VERBOSE)
67809f3b4e5SSatish Balay   ierr = PetscVerboseInfo((0,"MatStashScatterBegin_Private: No of messages: %d \n",nsends));CHKERRQ(ierr);
679e5d0e772SSatish Balay   for (i=0; i<size; i++) {
680e5d0e772SSatish Balay     if (nprocs[i]) {
68109f3b4e5SSatish Balay       ierr = PetscVerboseInfo((0,"MatStashScatterBegin_Private: Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
682e5d0e772SSatish Balay     }
683e5d0e772SSatish Balay   }
684e5d0e772SSatish Balay #endif
685606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
686606d414cSSatish Balay   ierr = PetscFree(startv);CHKERRQ(ierr);
687a2d1c673SSatish Balay   /* This memory is reused in scatter end  for a different purpose*/
688a2d1c673SSatish Balay   for (i=0; i<2*size; i++) nprocs[i] = -1;
689a2d1c673SSatish Balay   stash->nprocs      = nprocs;
690a2d1c673SSatish Balay 
691563fb871SSatish Balay   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
692563fb871SSatish Balay   ierr  = PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
693563fb871SSatish Balay 
694563fb871SSatish Balay   for (i=0; i<nreceives; i++) {
695563fb871SSatish Balay     recv_waits[2*i]   = recv_waits1[i];
696563fb871SSatish Balay     recv_waits[2*i+1] = recv_waits2[i];
697563fb871SSatish Balay   }
698563fb871SSatish Balay   stash->recv_waits = recv_waits;
699563fb871SSatish Balay   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
700563fb871SSatish Balay   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
701563fb871SSatish Balay 
702bc5ccf88SSatish Balay   stash->svalues    = svalues;    stash->rvalues     = rvalues;
703563fb871SSatish Balay   stash->rindices   = rindices;   stash->send_waits  = send_waits;
704bc5ccf88SSatish Balay   stash->nsends     = nsends;     stash->nrecvs      = nreceives;
705bc5ccf88SSatish Balay   PetscFunctionReturn(0);
706bc5ccf88SSatish Balay }
707bc5ccf88SSatish Balay 
708a2d1c673SSatish Balay /*
7098798bf22SSatish Balay    MatStashScatterGetMesg_Private - This function waits on the receives posted
7108798bf22SSatish Balay    in the function MatStashScatterBegin_Private() and returns one message at
7114c1ff481SSatish Balay    a time to the calling function. If no messages are left, it indicates this
7124c1ff481SSatish Balay    by setting flg = 0, else it sets flg = 1.
7134c1ff481SSatish Balay 
7144c1ff481SSatish Balay    Input Parameters:
7154c1ff481SSatish Balay    stash - the stash
7164c1ff481SSatish Balay 
7174c1ff481SSatish Balay    Output Parameters:
7184c1ff481SSatish Balay    nvals - the number of entries in the current message.
7194c1ff481SSatish Balay    rows  - an array of row indices (or blocked indices) corresponding to the values
7204c1ff481SSatish Balay    cols  - an array of columnindices (or blocked indices) corresponding to the values
7214c1ff481SSatish Balay    vals  - the values
7224c1ff481SSatish Balay    flg   - 0 indicates no more message left, and the current call has no values associated.
7234c1ff481SSatish Balay            1 indicates that the current call successfully received a message, and the
7244c1ff481SSatish Balay              other output parameters nvals,rows,cols,vals are set appropriately.
725a2d1c673SSatish Balay */
7264a2ae208SSatish Balay #undef __FUNCT__
7274a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterGetMesg_Private"
728c1ac3661SBarry Smith PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,MatScalar **vals,PetscInt *flg)
729bc5ccf88SSatish Balay {
7306849ba73SBarry Smith   PetscErrorCode ierr;
731fe09c992SBarry Smith   PetscMPIInt    i,*flg_v,i1,i2;
732fe09c992SBarry Smith   PetscInt       bs2;
733a2d1c673SSatish Balay   MPI_Status     recv_status;
734b0a32e0cSBarry Smith   PetscTruth     match_found = PETSC_FALSE;
735bc5ccf88SSatish Balay 
736bc5ccf88SSatish Balay   PetscFunctionBegin;
737bc5ccf88SSatish Balay 
738a2d1c673SSatish Balay   *flg = 0; /* When a message is discovered this is reset to 1 */
739a2d1c673SSatish Balay   /* Return if no more messages to process */
740a2d1c673SSatish Balay   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
741a2d1c673SSatish Balay 
742a2d1c673SSatish Balay   flg_v = stash->nprocs;
7434c1ff481SSatish Balay   bs2   = stash->bs*stash->bs;
744a2d1c673SSatish Balay   /* If a matching pair of receieves are found, process them, and return the data to
745a2d1c673SSatish Balay      the calling function. Until then keep receiving messages */
746a2d1c673SSatish Balay   while (!match_found) {
747a2d1c673SSatish Balay     ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
748a2d1c673SSatish Balay     /* Now pack the received message into a structure which is useable by others */
749a2d1c673SSatish Balay     if (i % 2) {
7503eda8832SBarry Smith       ierr = MPI_Get_count(&recv_status,MPIU_MATSCALAR,nvals);CHKERRQ(ierr);
751c1dc657dSBarry Smith       flg_v[2*recv_status.MPI_SOURCE] = i/2;
752a2d1c673SSatish Balay       *nvals = *nvals/bs2;
753563fb871SSatish Balay     } else {
754563fb871SSatish Balay       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
755563fb871SSatish Balay       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
756563fb871SSatish Balay       *nvals = *nvals/2; /* This message has both row indices and col indices */
757bc5ccf88SSatish Balay     }
758a2d1c673SSatish Balay 
759a2d1c673SSatish Balay     /* Check if we have both the messages from this proc */
760c1dc657dSBarry Smith     i1 = flg_v[2*recv_status.MPI_SOURCE];
761c1dc657dSBarry Smith     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
762a2d1c673SSatish Balay     if (i1 != -1 && i2 != -1) {
763563fb871SSatish Balay       *rows       = stash->rindices[i2];
764a2d1c673SSatish Balay       *cols       = *rows + *nvals;
765563fb871SSatish Balay       *vals       = stash->rvalues[i1];
766a2d1c673SSatish Balay       *flg        = 1;
767a2d1c673SSatish Balay       stash->nprocessed ++;
76835d8aa7fSBarry Smith       match_found = PETSC_TRUE;
769bc5ccf88SSatish Balay     }
770bc5ccf88SSatish Balay   }
771bc5ccf88SSatish Balay   PetscFunctionReturn(0);
772bc5ccf88SSatish Balay }
773