xref: /petsc/src/mat/utils/matstash.c (revision a2d1c673a0257ed9b5719df99a4d7488fb13e120)
1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER
2*a2d1c673SSatish Balay static char vcid[] = "$Id: stash.c,v 1.21 1999/03/09 21:33:15 balay Exp balay $";
32d5177cdSBarry Smith #endif
42d5177cdSBarry Smith 
570f55243SBarry Smith #include "src/mat/matimpl.h"
69417f4adSLois Curfman McInnes 
7bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE   10000
89417f4adSLois Curfman McInnes /*
99417f4adSLois Curfman McInnes    This stash is currently used for all the parallel matrix implementations.
10d5d45c9bSBarry Smith    The stash is where elements of a matrix destined to be stored on other
11d5d45c9bSBarry Smith    processors are kept until matrix assembly is done.
129417f4adSLois Curfman McInnes 
1390f02eecSBarry Smith    This is a simple minded stash. Simply add entry to end of stash.
149417f4adSLois Curfman McInnes */
159417f4adSLois Curfman McInnes 
165615d1e5SSatish Balay #undef __FUNC__
17bc5ccf88SSatish Balay #define __FUNC__ "StashCreate_Private"
18*a2d1c673SSatish Balay int StashCreate_Private(MPI_Comm comm,int bs_stash, int bs_mat, Stash *stash)
199417f4adSLois Curfman McInnes {
20bc5ccf88SSatish Balay   int ierr,flg,max=DEFAULT_STASH_SIZE;
21bc5ccf88SSatish Balay 
223a40ed3dSBarry Smith   PetscFunctionBegin;
23bc5ccf88SSatish Balay   /* Require 2 tags, get the second using PetscCommGetNewTag() */
24bc5ccf88SSatish Balay   ierr = PetscCommDuplicate_Private(comm,&stash->comm,&stash->tag1);CHKERRQ(ierr);
25*a2d1c673SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2); CHKERRQ(ierr);
26bc5ccf88SSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-stash_initial_size",&max,&flg);CHKERRQ(ierr);
27bc5ccf88SSatish Balay   ierr = StashSetInitialSize_Private(stash,max); CHKERRQ(ierr);
28*a2d1c673SSatish Balay   ierr = MPI_Comm_size(stash->comm,&stash->size); CHKERRQ(ierr);
29*a2d1c673SSatish Balay   ierr = MPI_Comm_rank(stash->comm,&stash->rank); CHKERRQ(ierr);
30bc5ccf88SSatish Balay 
31*a2d1c673SSatish Balay   if (bs_stash <= 0) bs_stash = 1;
32*a2d1c673SSatish Balay   if (bs_mat   <= 0) bs_mat   = 1;
33*a2d1c673SSatish Balay 
34*a2d1c673SSatish Balay   stash->bs_stash = bs_stash;
35*a2d1c673SSatish Balay   stash->bs_mat   = bs_mat;
369417f4adSLois Curfman McInnes   stash->nmax     = 0;
379417f4adSLois Curfman McInnes   stash->n        = 0;
38bc5ccf88SSatish Balay   stash->reallocs = 0;
399417f4adSLois Curfman McInnes   stash->idx      = 0;
409417f4adSLois Curfman McInnes   stash->idy      = 0;
41bc5ccf88SSatish Balay   stash->array    = 0;
429417f4adSLois Curfman McInnes 
43bc5ccf88SSatish Balay   stash->send_waits  = 0;
44bc5ccf88SSatish Balay   stash->recv_waits  = 0;
45*a2d1c673SSatish Balay   stash->send_status = 0;
46bc5ccf88SSatish Balay   stash->nsends      = 0;
47bc5ccf88SSatish Balay   stash->nrecvs      = 0;
48bc5ccf88SSatish Balay   stash->svalues     = 0;
49bc5ccf88SSatish Balay   stash->rvalues     = 0;
50bc5ccf88SSatish Balay   stash->rmax        = 0;
51*a2d1c673SSatish Balay   stash->nprocs      = 0;
52*a2d1c673SSatish Balay   stash->nprocessed  = 0;
533a40ed3dSBarry Smith   PetscFunctionReturn(0);
549417f4adSLois Curfman McInnes }
559417f4adSLois Curfman McInnes 
565615d1e5SSatish Balay #undef __FUNC__
57d4bb536fSBarry Smith #define __FUNC__ "StashDestroy_Private"
589417f4adSLois Curfman McInnes int StashDestroy_Private(Stash *stash)
599417f4adSLois Curfman McInnes {
60bc5ccf88SSatish Balay   int ierr;
61*a2d1c673SSatish Balay 
62bc5ccf88SSatish Balay   PetscFunctionBegin;
63bc5ccf88SSatish Balay   ierr = PetscCommDestroy_Private(&stash->comm); CHKERRQ(ierr);
64bc5ccf88SSatish Balay   if (stash->array) {PetscFree(stash->array); stash->array = 0;}
65bc5ccf88SSatish Balay   PetscFunctionReturn(0);
66bc5ccf88SSatish Balay }
67bc5ccf88SSatish Balay 
68bc5ccf88SSatish Balay #undef __FUNC__
69*a2d1c673SSatish Balay #define __FUNC__ "StashScatterEnd_Private"
70*a2d1c673SSatish Balay int StashScatterEnd_Private(Stash *stash)
71bc5ccf88SSatish Balay {
72*a2d1c673SSatish Balay   int         nsends=stash->nsends,ierr;
73*a2d1c673SSatish Balay   MPI_Status  *send_status;
74*a2d1c673SSatish Balay 
753a40ed3dSBarry Smith   PetscFunctionBegin;
76*a2d1c673SSatish Balay   /* wait on sends */
77*a2d1c673SSatish Balay   if (nsends) {
78*a2d1c673SSatish Balay     send_status = (MPI_Status *)PetscMalloc(2*nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
79*a2d1c673SSatish Balay     ierr        = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
80*a2d1c673SSatish Balay     PetscFree(send_status);
81*a2d1c673SSatish Balay   }
82*a2d1c673SSatish Balay 
83d07ff455SSatish Balay   /* Now update nmaxold to be app 10% more than nmax, this way the
84d07ff455SSatish Balay      wastage of space is reduced the next time this stash is used */
85bc5ccf88SSatish Balay   stash->oldnmax    = (int)(stash->nmax * 1.1) + 5;
86d07ff455SSatish Balay   stash->nmax       = 0;
87d07ff455SSatish Balay   stash->n          = 0;
88bc5ccf88SSatish Balay   stash->reallocs   = 0;
89bc5ccf88SSatish Balay   stash->rmax       = 0;
90*a2d1c673SSatish Balay   stash->nprocessed = 0;
91bc5ccf88SSatish Balay 
92bc5ccf88SSatish Balay   if (stash->array) {
93bc5ccf88SSatish Balay     PetscFree(stash->array);
94bc5ccf88SSatish Balay     stash->array = 0;
95bc5ccf88SSatish Balay     stash->idx   = 0;
96bc5ccf88SSatish Balay     stash->idy   = 0;
97bc5ccf88SSatish Balay   }
98bc5ccf88SSatish Balay   if (stash->send_waits)  {PetscFree(stash->send_waits);stash->send_waits = 0;}
99bc5ccf88SSatish Balay   if (stash->recv_waits)  {PetscFree(stash->recv_waits);stash->recv_waits = 0;}
100bc5ccf88SSatish Balay   if (stash->svalues)     {PetscFree(stash->svalues);stash->svalues = 0;}
101bc5ccf88SSatish Balay   if (stash->rvalues)     {PetscFree(stash->rvalues); stash->rvalues = 0;}
102*a2d1c673SSatish Balay   if (stash->nprocs)      {PetscFree(stash->nprocs); stash->nprocs = 0;}
103bc5ccf88SSatish Balay 
1043a40ed3dSBarry Smith   PetscFunctionReturn(0);
1059417f4adSLois Curfman McInnes }
1069417f4adSLois Curfman McInnes 
1075615d1e5SSatish Balay #undef __FUNC__
108d4bb536fSBarry Smith #define __FUNC__ "StashInfo_Private"
10997530c3fSBarry Smith int StashInfo_Private(Stash *stash)
11097530c3fSBarry Smith {
1113a40ed3dSBarry Smith   PetscFunctionBegin;
112bc5ccf88SSatish Balay   PLogInfo(0,"StashInfo_Private:Stash size %d, mallocs incured %d\n",stash->n,stash->reallocs);
113bc5ccf88SSatish Balay   PetscFunctionReturn(0);
114bc5ccf88SSatish Balay }
115bc5ccf88SSatish Balay #undef __FUNC__
116bc5ccf88SSatish Balay #define __FUNC__ "StashSetInitialSize_Private"
117bc5ccf88SSatish Balay int StashSetInitialSize_Private(Stash *stash,int max)
118bc5ccf88SSatish Balay {
119bc5ccf88SSatish Balay   PetscFunctionBegin;
120bc5ccf88SSatish Balay   stash->oldnmax = max;
121bc5ccf88SSatish Balay   stash->nmax    = 0;
1223a40ed3dSBarry Smith   PetscFunctionReturn(0);
12397530c3fSBarry Smith }
12497530c3fSBarry Smith 
1255615d1e5SSatish Balay #undef __FUNC__
126bc5ccf88SSatish Balay #define __FUNC__ "StashExpand_Private"
127bc5ccf88SSatish Balay static int StashExpand_Private(Stash *stash)
1289417f4adSLois Curfman McInnes {
129*a2d1c673SSatish Balay   int    *n_idx,*n_idy,newnmax,bs2;
130bc5ccf88SSatish Balay   Scalar *n_array;
1319417f4adSLois Curfman McInnes 
1323a40ed3dSBarry Smith   PetscFunctionBegin;
1339417f4adSLois Curfman McInnes   /* allocate a larger stash */
134d07ff455SSatish Balay   if (stash->nmax == 0) newnmax = stash->oldnmax;
135d07ff455SSatish Balay   else                  newnmax = stash->nmax*2;
136d07ff455SSatish Balay 
137*a2d1c673SSatish Balay   bs2     = stash->bs_stash*stash->bs_stash;
138*a2d1c673SSatish Balay   n_array = (Scalar *)PetscMalloc((newnmax)*(2*sizeof(int)+bs2*sizeof(Scalar)));CHKPTRQ(n_array);
139*a2d1c673SSatish Balay   n_idx   = (int *) (n_array + bs2*newnmax);
140d07ff455SSatish Balay   n_idy   = (int *) (n_idx + newnmax);
141*a2d1c673SSatish Balay   PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(Scalar));
142416022c9SBarry Smith   PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(int));
143416022c9SBarry Smith   PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(int));
1440452661fSBarry Smith   if (stash->array) PetscFree(stash->array);
145d07ff455SSatish Balay   stash->array   = n_array;
146d07ff455SSatish Balay   stash->idx     = n_idx;
147d07ff455SSatish Balay   stash->idy     = n_idy;
148d07ff455SSatish Balay   stash->nmax    = newnmax;
149d07ff455SSatish Balay   stash->oldnmax = newnmax;
150bc5ccf88SSatish Balay   stash->reallocs++;
151bc5ccf88SSatish Balay   PetscFunctionReturn(0);
152bc5ccf88SSatish Balay }
153bc5ccf88SSatish Balay 
154bc5ccf88SSatish Balay /*
155bc5ccf88SSatish Balay     Should do this properly. With a sorted array.
156bc5ccf88SSatish Balay */
157bc5ccf88SSatish Balay #undef __FUNC__
158bc5ccf88SSatish Balay #define __FUNC__ "StashValues_Private"
159*a2d1c673SSatish Balay int StashValues_Private(Stash *stash,int row,int n, int *idxn,Scalar *values)
160bc5ccf88SSatish Balay {
161*a2d1c673SSatish Balay   int    ierr,i;
162bc5ccf88SSatish Balay   Scalar val;
163bc5ccf88SSatish Balay 
164bc5ccf88SSatish Balay   PetscFunctionBegin;
165bc5ccf88SSatish Balay   for ( i=0; i<n; i++ ) {
166bc5ccf88SSatish Balay     if ( stash->n == stash->nmax ) {
167bc5ccf88SSatish Balay       ierr = StashExpand_Private(stash); CHKERRQ(ierr);
1689417f4adSLois Curfman McInnes     }
1699417f4adSLois Curfman McInnes     stash->idx[stash->n]   = row;
170*a2d1c673SSatish Balay     stash->idy[stash->n]   = idxn[i];
171*a2d1c673SSatish Balay     stash->array[stash->n] = values[i];
172*a2d1c673SSatish Balay     stash->n++;
1739417f4adSLois Curfman McInnes   }
174*a2d1c673SSatish Balay   PetscFunctionReturn(0);
175*a2d1c673SSatish Balay }
176*a2d1c673SSatish Balay 
177*a2d1c673SSatish Balay #undef __FUNC__
178*a2d1c673SSatish Balay #define __FUNC__ "StashValuesBlocked_Private"
179*a2d1c673SSatish Balay int StashValuesBlocked_Private(Stash *stash,int row,int n,int *idxn,Scalar *values,
180*a2d1c673SSatish Balay                                int rmax,int cmax,int roworiented,int idx)
181*a2d1c673SSatish Balay {
182*a2d1c673SSatish Balay   int    ierr,i,j,k,found,bs2,bs=stash->bs_stash;
183*a2d1c673SSatish Balay   Scalar *vals,*array;
184*a2d1c673SSatish Balay 
185*a2d1c673SSatish Balay   /* stepval gives the offset that one should use to access the next line of
186*a2d1c673SSatish Balay      a given block of values */
187*a2d1c673SSatish Balay 
188*a2d1c673SSatish Balay   PetscFunctionBegin;
189*a2d1c673SSatish Balay   bs2 = bs*bs;
190*a2d1c673SSatish Balay   for ( i=0; i<n; i++ ) {
191*a2d1c673SSatish Balay     if ( stash->n == stash->nmax ) {
192*a2d1c673SSatish Balay       ierr = StashExpand_Private(stash); CHKERRQ(ierr);
193*a2d1c673SSatish Balay     }
194*a2d1c673SSatish Balay     stash->idx[stash->n]   = row;
195*a2d1c673SSatish Balay     stash->idy[stash->n] = idxn[i];
196*a2d1c673SSatish Balay     /* Now copy over the block of values. Store the values column oriented.
197*a2d1c673SSatish Balay      This enables inserting multiple blocks belonging to a row with a single
198*a2d1c673SSatish Balay      funtion call */
199*a2d1c673SSatish Balay     if (roworiented) {
200*a2d1c673SSatish Balay       array = stash->array + bs2*stash->n;
201*a2d1c673SSatish Balay       vals  = values + idx*bs2*n + bs*i;
202*a2d1c673SSatish Balay       for ( j=0; j<bs; j++ ) {
203*a2d1c673SSatish Balay         for ( k=0; k<bs; k++ ) {array[k*bs] = vals[k];}
204*a2d1c673SSatish Balay         array += 1;
205*a2d1c673SSatish Balay         vals  += cmax*bs;
206*a2d1c673SSatish Balay       }
207*a2d1c673SSatish Balay     } else {
208*a2d1c673SSatish Balay       array = stash->array + bs2*stash->n;
209*a2d1c673SSatish Balay       vals  = values + idx*bs + bs2*rmax*i;
210*a2d1c673SSatish Balay       for ( j=0; j<bs; j++ ) {
211*a2d1c673SSatish Balay         for ( k=0; k<bs; k++ ) {array[k] = vals[k];}
212*a2d1c673SSatish Balay         array += bs;
213*a2d1c673SSatish Balay         vals  += rmax*bs;
214*a2d1c673SSatish Balay       }
215*a2d1c673SSatish Balay     }
216*a2d1c673SSatish Balay     stash->n++;
2179417f4adSLois Curfman McInnes   }
2183a40ed3dSBarry Smith   PetscFunctionReturn(0);
2199417f4adSLois Curfman McInnes }
220bc5ccf88SSatish Balay 
221bc5ccf88SSatish Balay #undef __FUNC__
222bc5ccf88SSatish Balay #define __FUNC__ "StashScatterBegin_Private"
223bc5ccf88SSatish Balay int StashScatterBegin_Private(Stash *stash,int *owners)
224bc5ccf88SSatish Balay {
225*a2d1c673SSatish Balay   int         *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
226*a2d1c673SSatish Balay   int         rank=stash->rank,size=stash->size,*nprocs,*procs,nsends,nreceives;
227*a2d1c673SSatish Balay   int         nmax,*work,count,ierr,*sindices,*rindices,i,j,idx,mult;
228*a2d1c673SSatish Balay   Scalar      *rvalues,*svalues;
229bc5ccf88SSatish Balay   MPI_Comm    comm = stash->comm;
230bc5ccf88SSatish Balay   MPI_Request *send_waits,*recv_waits;
231bc5ccf88SSatish Balay 
232bc5ccf88SSatish Balay   PetscFunctionBegin;
233bc5ccf88SSatish Balay 
234*a2d1c673SSatish Balay   bs2 = stash->bs_stash*stash->bs_stash;
235bc5ccf88SSatish Balay   /*  first count number of contributors to each processor */
236bc5ccf88SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
237bc5ccf88SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
238bc5ccf88SSatish Balay   owner = (int *) PetscMalloc( (stash->n+1)*sizeof(int) ); CHKPTRQ(owner);
239*a2d1c673SSatish Balay 
240*a2d1c673SSatish Balay   /* if blockstash, then the the owners and row,col indices
241*a2d1c673SSatish Balay    correspond to reduced indices */
242*a2d1c673SSatish Balay   if (stash->bs_stash == 1) mult = stash->bs_mat;
243*a2d1c673SSatish Balay   else                      mult = 1;
244*a2d1c673SSatish Balay 
245bc5ccf88SSatish Balay   for ( i=0; i<stash->n; i++ ) {
246bc5ccf88SSatish Balay     idx = stash->idx[i];
247bc5ccf88SSatish Balay     for ( j=0; j<size; j++ ) {
248*a2d1c673SSatish Balay       if (idx >= mult*owners[j] && idx < mult*owners[j+1]) {
249bc5ccf88SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
250bc5ccf88SSatish Balay       }
251bc5ccf88SSatish Balay     }
252bc5ccf88SSatish Balay   }
253bc5ccf88SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
254bc5ccf88SSatish Balay 
255bc5ccf88SSatish Balay   /* inform other processors of number of messages and max length*/
256bc5ccf88SSatish Balay   work = (int *)PetscMalloc(size*sizeof(int)); CHKPTRQ(work);
257bc5ccf88SSatish Balay   ierr = MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
258bc5ccf88SSatish Balay   nreceives = work[rank];
259bc5ccf88SSatish Balay   ierr = MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
260bc5ccf88SSatish Balay   nmax = work[rank];
261bc5ccf88SSatish Balay   PetscFree(work);
262bc5ccf88SSatish Balay   /* post receives:
263bc5ccf88SSatish Balay      since we don't know how long each individual message is we
264bc5ccf88SSatish Balay      allocate the largest needed buffer for each receive. Potentially
265bc5ccf88SSatish Balay      this is a lot of wasted space.
266bc5ccf88SSatish Balay   */
267*a2d1c673SSatish Balay   rvalues    = (Scalar *)PetscMalloc((nreceives+1)*(nmax+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(rvalues);
268*a2d1c673SSatish Balay   rindices   = (int *) (rvalues + bs2*nreceives*nmax);
269*a2d1c673SSatish Balay   recv_waits = (MPI_Request *)PetscMalloc((nreceives+1)*2*sizeof(MPI_Request));CHKPTRQ(recv_waits);
270bc5ccf88SSatish Balay   for ( i=0,count=0; i<nreceives; i++ ) {
271*a2d1c673SSatish Balay     ierr = MPI_Irecv(rvalues+bs2*nmax*i,bs2*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag1,comm,
272bc5ccf88SSatish Balay                      recv_waits+count++); CHKERRQ(ierr);
273bc5ccf88SSatish Balay     ierr = MPI_Irecv(rindices+2*nmax*i,2*nmax,MPI_INT,MPI_ANY_SOURCE,tag2,comm,
274bc5ccf88SSatish Balay                      recv_waits+count++); CHKERRQ(ierr);
275bc5ccf88SSatish Balay   }
276bc5ccf88SSatish Balay 
277bc5ccf88SSatish Balay   /* do sends:
278bc5ccf88SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
279bc5ccf88SSatish Balay          the ith processor
280bc5ccf88SSatish Balay   */
281*a2d1c673SSatish Balay   svalues    = (Scalar *)PetscMalloc((stash->n+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(svalues);
282*a2d1c673SSatish Balay   sindices   = (int *) (svalues + bs2*stash->n);
283bc5ccf88SSatish Balay   send_waits = (MPI_Request *) PetscMalloc(2*(nsends+1)*sizeof(MPI_Request));
284bc5ccf88SSatish Balay   CHKPTRQ(send_waits);
285bc5ccf88SSatish Balay   startv     = (int *) PetscMalloc(2*size*sizeof(int) ); CHKPTRQ(startv);
286bc5ccf88SSatish Balay   starti     = startv + size;
287*a2d1c673SSatish Balay   /* use 2 sends the first with all_a, the next with all_i and all_j */
288bc5ccf88SSatish Balay   startv[0]  = 0; starti[0] = 0;
289bc5ccf88SSatish Balay   for ( i=1; i<size; i++ ) {
290bc5ccf88SSatish Balay     startv[i] = startv[i-1] + nprocs[i-1];
291bc5ccf88SSatish Balay     starti[i] = starti[i-1] + nprocs[i-1]*2;
292bc5ccf88SSatish Balay   }
293bc5ccf88SSatish Balay   for ( i=0; i<stash->n; i++ ) {
294bc5ccf88SSatish Balay     j = owner[i];
295*a2d1c673SSatish Balay     if (bs2 == 1) {
296bc5ccf88SSatish Balay       svalues[startv[j]]              = stash->array[i];
297*a2d1c673SSatish Balay     } else {
298*a2d1c673SSatish Balay       PetscMemcpy(svalues+bs2*startv[j],stash->array+bs2*i,bs2*sizeof(Scalar));
299*a2d1c673SSatish Balay     }
300bc5ccf88SSatish Balay     sindices[starti[j]]             = stash->idx[i];
301bc5ccf88SSatish Balay     sindices[starti[j]+nprocs[j]]   = stash->idy[i];
302bc5ccf88SSatish Balay     startv[j]++;
303bc5ccf88SSatish Balay     starti[j]++;
304bc5ccf88SSatish Balay   }
305bc5ccf88SSatish Balay   startv[0] = 0;
306bc5ccf88SSatish Balay   for ( i=1; i<size; i++ ) { startv[i] = startv[i-1] + nprocs[i-1];}
307bc5ccf88SSatish Balay   for ( i=0,count=0; i<size; i++ ) {
308bc5ccf88SSatish Balay     if (procs[i]) {
309*a2d1c673SSatish Balay       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nprocs[i],MPIU_SCALAR,i,tag1,comm,
310bc5ccf88SSatish Balay                        send_waits+count++);CHKERRQ(ierr);
311bc5ccf88SSatish Balay       ierr = MPI_Isend(sindices+2*startv[i],2*nprocs[i],MPI_INT,i,tag2,comm,
312bc5ccf88SSatish Balay                        send_waits+count++);CHKERRQ(ierr);
313bc5ccf88SSatish Balay     }
314bc5ccf88SSatish Balay   }
315bc5ccf88SSatish Balay   PetscFree(owner);
316bc5ccf88SSatish Balay   PetscFree(startv);
317*a2d1c673SSatish Balay   /* This memory is reused in scatter end  for a different purpose*/
318*a2d1c673SSatish Balay   for (i=0; i<2*size; i++ ) nprocs[i] = -1;
319*a2d1c673SSatish Balay   stash->nprocs      = nprocs;
320*a2d1c673SSatish Balay 
321bc5ccf88SSatish Balay   stash->svalues    = svalues;    stash->rvalues    = rvalues;
322bc5ccf88SSatish Balay   stash->nsends     = nsends;     stash->nrecvs     = nreceives;
323bc5ccf88SSatish Balay   stash->send_waits = send_waits; stash->recv_waits = recv_waits;
324bc5ccf88SSatish Balay   stash->rmax       = nmax;
325bc5ccf88SSatish Balay   PetscFunctionReturn(0);
326bc5ccf88SSatish Balay }
327bc5ccf88SSatish Balay 
328*a2d1c673SSatish Balay /*
329*a2d1c673SSatish Balay    This function waits on the receives posted in the function
330*a2d1c673SSatish Balay    StashScatterBegin_Private() and returns one message at a time to
331*a2d1c673SSatish Balay    the calling function. If no messages are left, it indicates this by
332*a2d1c673SSatish Balay    setting flg = 0, else it sets flg = 1.
333*a2d1c673SSatish Balay */
334bc5ccf88SSatish Balay #undef __FUNC__
335*a2d1c673SSatish Balay #define __FUNC__ "StashScatterGetMesg_Private"
336*a2d1c673SSatish Balay int StashScatterGetMesg_Private(Stash *stash,int *nvals,int **rows,int** cols,Scalar **vals,int *flg)
337bc5ccf88SSatish Balay {
338*a2d1c673SSatish Balay   int         i,ierr,size=stash->size,*flg_v,*flg_i;
339*a2d1c673SSatish Balay   int         i1,i2,*rindices,match_found=0,bs2;
340*a2d1c673SSatish Balay   MPI_Status  recv_status;
341bc5ccf88SSatish Balay 
342bc5ccf88SSatish Balay   PetscFunctionBegin;
343bc5ccf88SSatish Balay 
344*a2d1c673SSatish Balay   *flg = 0; /* When a message is discovered this is reset to 1 */
345*a2d1c673SSatish Balay   /* Return if no more messages to process */
346*a2d1c673SSatish Balay   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
347*a2d1c673SSatish Balay 
348*a2d1c673SSatish Balay   flg_v = stash->nprocs;
349*a2d1c673SSatish Balay   flg_i = flg_v + size;
350*a2d1c673SSatish Balay   bs2   = stash->bs_stash*stash->bs_stash;
351*a2d1c673SSatish Balay   /* If a matching pair of receieves are found, process them, and return the data to
352*a2d1c673SSatish Balay      the calling function. Until then keep receiving messages */
353*a2d1c673SSatish Balay   while (!match_found) {
354*a2d1c673SSatish Balay     ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
355*a2d1c673SSatish Balay     /* Now pack the received message into a structure which is useable by others */
356*a2d1c673SSatish Balay     if (i % 2) {
357*a2d1c673SSatish Balay       ierr = MPI_Get_count(&recv_status,MPI_INT,nvals);CHKERRQ(ierr);
358*a2d1c673SSatish Balay       flg_i[recv_status.MPI_SOURCE] = i/2;
359*a2d1c673SSatish Balay       *nvals = *nvals/2; /* This message has both row indices and col indices */
360*a2d1c673SSatish Balay     } else {
361*a2d1c673SSatish Balay       ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr);
362*a2d1c673SSatish Balay       flg_v[recv_status.MPI_SOURCE] = i/2;
363*a2d1c673SSatish Balay       *nvals = *nvals/bs2;
364bc5ccf88SSatish Balay     }
365*a2d1c673SSatish Balay 
366*a2d1c673SSatish Balay     /* Check if we have both the messages from this proc */
367*a2d1c673SSatish Balay     i1 = flg_v[recv_status.MPI_SOURCE];
368*a2d1c673SSatish Balay     i2 = flg_i[recv_status.MPI_SOURCE];
369*a2d1c673SSatish Balay     if (i1 != -1 && i2 != -1) {
370*a2d1c673SSatish Balay       rindices    = (int *) (stash->rvalues + bs2*stash->rmax*stash->nrecvs);
371*a2d1c673SSatish Balay       *rows       = rindices + 2*i2*stash->rmax;
372*a2d1c673SSatish Balay       *cols       = *rows + *nvals;
373*a2d1c673SSatish Balay       *vals       = stash->rvalues + i1*bs2*stash->rmax;
374*a2d1c673SSatish Balay       *flg        = 1;
375*a2d1c673SSatish Balay       stash->nprocessed ++;
376*a2d1c673SSatish Balay       match_found = 1;
377bc5ccf88SSatish Balay     }
378bc5ccf88SSatish Balay   }
379bc5ccf88SSatish Balay   PetscFunctionReturn(0);
380bc5ccf88SSatish Balay }
381