1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER 2*a2d1c673SSatish Balay static char vcid[] = "$Id: stash.c,v 1.21 1999/03/09 21:33:15 balay Exp balay $"; 32d5177cdSBarry Smith #endif 42d5177cdSBarry Smith 570f55243SBarry Smith #include "src/mat/matimpl.h" 69417f4adSLois Curfman McInnes 7bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE 10000 89417f4adSLois Curfman McInnes /* 99417f4adSLois Curfman McInnes This stash is currently used for all the parallel matrix implementations. 10d5d45c9bSBarry Smith The stash is where elements of a matrix destined to be stored on other 11d5d45c9bSBarry Smith processors are kept until matrix assembly is done. 129417f4adSLois Curfman McInnes 1390f02eecSBarry Smith This is a simple minded stash. Simply add entry to end of stash. 149417f4adSLois Curfman McInnes */ 159417f4adSLois Curfman McInnes 165615d1e5SSatish Balay #undef __FUNC__ 17bc5ccf88SSatish Balay #define __FUNC__ "StashCreate_Private" 18*a2d1c673SSatish Balay int StashCreate_Private(MPI_Comm comm,int bs_stash, int bs_mat, Stash *stash) 199417f4adSLois Curfman McInnes { 20bc5ccf88SSatish Balay int ierr,flg,max=DEFAULT_STASH_SIZE; 21bc5ccf88SSatish Balay 223a40ed3dSBarry Smith PetscFunctionBegin; 23bc5ccf88SSatish Balay /* Require 2 tags, get the second using PetscCommGetNewTag() */ 24bc5ccf88SSatish Balay ierr = PetscCommDuplicate_Private(comm,&stash->comm,&stash->tag1);CHKERRQ(ierr); 25*a2d1c673SSatish Balay ierr = PetscCommGetNewTag(stash->comm,&stash->tag2); CHKERRQ(ierr); 26bc5ccf88SSatish Balay ierr = OptionsGetInt(PETSC_NULL,"-stash_initial_size",&max,&flg);CHKERRQ(ierr); 27bc5ccf88SSatish Balay ierr = StashSetInitialSize_Private(stash,max); CHKERRQ(ierr); 28*a2d1c673SSatish Balay ierr = MPI_Comm_size(stash->comm,&stash->size); CHKERRQ(ierr); 29*a2d1c673SSatish Balay ierr = MPI_Comm_rank(stash->comm,&stash->rank); CHKERRQ(ierr); 30bc5ccf88SSatish Balay 31*a2d1c673SSatish Balay if (bs_stash <= 0) bs_stash = 1; 32*a2d1c673SSatish Balay if (bs_mat <= 0) bs_mat = 1; 33*a2d1c673SSatish Balay 34*a2d1c673SSatish Balay stash->bs_stash = bs_stash; 35*a2d1c673SSatish Balay stash->bs_mat = bs_mat; 369417f4adSLois Curfman McInnes stash->nmax = 0; 379417f4adSLois Curfman McInnes stash->n = 0; 38bc5ccf88SSatish Balay stash->reallocs = 0; 399417f4adSLois Curfman McInnes stash->idx = 0; 409417f4adSLois Curfman McInnes stash->idy = 0; 41bc5ccf88SSatish Balay stash->array = 0; 429417f4adSLois Curfman McInnes 43bc5ccf88SSatish Balay stash->send_waits = 0; 44bc5ccf88SSatish Balay stash->recv_waits = 0; 45*a2d1c673SSatish Balay stash->send_status = 0; 46bc5ccf88SSatish Balay stash->nsends = 0; 47bc5ccf88SSatish Balay stash->nrecvs = 0; 48bc5ccf88SSatish Balay stash->svalues = 0; 49bc5ccf88SSatish Balay stash->rvalues = 0; 50bc5ccf88SSatish Balay stash->rmax = 0; 51*a2d1c673SSatish Balay stash->nprocs = 0; 52*a2d1c673SSatish Balay stash->nprocessed = 0; 533a40ed3dSBarry Smith PetscFunctionReturn(0); 549417f4adSLois Curfman McInnes } 559417f4adSLois Curfman McInnes 565615d1e5SSatish Balay #undef __FUNC__ 57d4bb536fSBarry Smith #define __FUNC__ "StashDestroy_Private" 589417f4adSLois Curfman McInnes int StashDestroy_Private(Stash *stash) 599417f4adSLois Curfman McInnes { 60bc5ccf88SSatish Balay int ierr; 61*a2d1c673SSatish Balay 62bc5ccf88SSatish Balay PetscFunctionBegin; 63bc5ccf88SSatish Balay ierr = PetscCommDestroy_Private(&stash->comm); CHKERRQ(ierr); 64bc5ccf88SSatish Balay if (stash->array) {PetscFree(stash->array); stash->array = 0;} 65bc5ccf88SSatish Balay PetscFunctionReturn(0); 66bc5ccf88SSatish Balay } 67bc5ccf88SSatish Balay 68bc5ccf88SSatish Balay #undef __FUNC__ 69*a2d1c673SSatish Balay #define __FUNC__ "StashScatterEnd_Private" 70*a2d1c673SSatish Balay int StashScatterEnd_Private(Stash *stash) 71bc5ccf88SSatish Balay { 72*a2d1c673SSatish Balay int nsends=stash->nsends,ierr; 73*a2d1c673SSatish Balay MPI_Status *send_status; 74*a2d1c673SSatish Balay 753a40ed3dSBarry Smith PetscFunctionBegin; 76*a2d1c673SSatish Balay /* wait on sends */ 77*a2d1c673SSatish Balay if (nsends) { 78*a2d1c673SSatish Balay send_status = (MPI_Status *)PetscMalloc(2*nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 79*a2d1c673SSatish Balay ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr); 80*a2d1c673SSatish Balay PetscFree(send_status); 81*a2d1c673SSatish Balay } 82*a2d1c673SSatish Balay 83d07ff455SSatish Balay /* Now update nmaxold to be app 10% more than nmax, this way the 84d07ff455SSatish Balay wastage of space is reduced the next time this stash is used */ 85bc5ccf88SSatish Balay stash->oldnmax = (int)(stash->nmax * 1.1) + 5; 86d07ff455SSatish Balay stash->nmax = 0; 87d07ff455SSatish Balay stash->n = 0; 88bc5ccf88SSatish Balay stash->reallocs = 0; 89bc5ccf88SSatish Balay stash->rmax = 0; 90*a2d1c673SSatish Balay stash->nprocessed = 0; 91bc5ccf88SSatish Balay 92bc5ccf88SSatish Balay if (stash->array) { 93bc5ccf88SSatish Balay PetscFree(stash->array); 94bc5ccf88SSatish Balay stash->array = 0; 95bc5ccf88SSatish Balay stash->idx = 0; 96bc5ccf88SSatish Balay stash->idy = 0; 97bc5ccf88SSatish Balay } 98bc5ccf88SSatish Balay if (stash->send_waits) {PetscFree(stash->send_waits);stash->send_waits = 0;} 99bc5ccf88SSatish Balay if (stash->recv_waits) {PetscFree(stash->recv_waits);stash->recv_waits = 0;} 100bc5ccf88SSatish Balay if (stash->svalues) {PetscFree(stash->svalues);stash->svalues = 0;} 101bc5ccf88SSatish Balay if (stash->rvalues) {PetscFree(stash->rvalues); stash->rvalues = 0;} 102*a2d1c673SSatish Balay if (stash->nprocs) {PetscFree(stash->nprocs); stash->nprocs = 0;} 103bc5ccf88SSatish Balay 1043a40ed3dSBarry Smith PetscFunctionReturn(0); 1059417f4adSLois Curfman McInnes } 1069417f4adSLois Curfman McInnes 1075615d1e5SSatish Balay #undef __FUNC__ 108d4bb536fSBarry Smith #define __FUNC__ "StashInfo_Private" 10997530c3fSBarry Smith int StashInfo_Private(Stash *stash) 11097530c3fSBarry Smith { 1113a40ed3dSBarry Smith PetscFunctionBegin; 112bc5ccf88SSatish Balay PLogInfo(0,"StashInfo_Private:Stash size %d, mallocs incured %d\n",stash->n,stash->reallocs); 113bc5ccf88SSatish Balay PetscFunctionReturn(0); 114bc5ccf88SSatish Balay } 115bc5ccf88SSatish Balay #undef __FUNC__ 116bc5ccf88SSatish Balay #define __FUNC__ "StashSetInitialSize_Private" 117bc5ccf88SSatish Balay int StashSetInitialSize_Private(Stash *stash,int max) 118bc5ccf88SSatish Balay { 119bc5ccf88SSatish Balay PetscFunctionBegin; 120bc5ccf88SSatish Balay stash->oldnmax = max; 121bc5ccf88SSatish Balay stash->nmax = 0; 1223a40ed3dSBarry Smith PetscFunctionReturn(0); 12397530c3fSBarry Smith } 12497530c3fSBarry Smith 1255615d1e5SSatish Balay #undef __FUNC__ 126bc5ccf88SSatish Balay #define __FUNC__ "StashExpand_Private" 127bc5ccf88SSatish Balay static int StashExpand_Private(Stash *stash) 1289417f4adSLois Curfman McInnes { 129*a2d1c673SSatish Balay int *n_idx,*n_idy,newnmax,bs2; 130bc5ccf88SSatish Balay Scalar *n_array; 1319417f4adSLois Curfman McInnes 1323a40ed3dSBarry Smith PetscFunctionBegin; 1339417f4adSLois Curfman McInnes /* allocate a larger stash */ 134d07ff455SSatish Balay if (stash->nmax == 0) newnmax = stash->oldnmax; 135d07ff455SSatish Balay else newnmax = stash->nmax*2; 136d07ff455SSatish Balay 137*a2d1c673SSatish Balay bs2 = stash->bs_stash*stash->bs_stash; 138*a2d1c673SSatish Balay n_array = (Scalar *)PetscMalloc((newnmax)*(2*sizeof(int)+bs2*sizeof(Scalar)));CHKPTRQ(n_array); 139*a2d1c673SSatish Balay n_idx = (int *) (n_array + bs2*newnmax); 140d07ff455SSatish Balay n_idy = (int *) (n_idx + newnmax); 141*a2d1c673SSatish Balay PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(Scalar)); 142416022c9SBarry Smith PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(int)); 143416022c9SBarry Smith PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(int)); 1440452661fSBarry Smith if (stash->array) PetscFree(stash->array); 145d07ff455SSatish Balay stash->array = n_array; 146d07ff455SSatish Balay stash->idx = n_idx; 147d07ff455SSatish Balay stash->idy = n_idy; 148d07ff455SSatish Balay stash->nmax = newnmax; 149d07ff455SSatish Balay stash->oldnmax = newnmax; 150bc5ccf88SSatish Balay stash->reallocs++; 151bc5ccf88SSatish Balay PetscFunctionReturn(0); 152bc5ccf88SSatish Balay } 153bc5ccf88SSatish Balay 154bc5ccf88SSatish Balay /* 155bc5ccf88SSatish Balay Should do this properly. With a sorted array. 156bc5ccf88SSatish Balay */ 157bc5ccf88SSatish Balay #undef __FUNC__ 158bc5ccf88SSatish Balay #define __FUNC__ "StashValues_Private" 159*a2d1c673SSatish Balay int StashValues_Private(Stash *stash,int row,int n, int *idxn,Scalar *values) 160bc5ccf88SSatish Balay { 161*a2d1c673SSatish Balay int ierr,i; 162bc5ccf88SSatish Balay Scalar val; 163bc5ccf88SSatish Balay 164bc5ccf88SSatish Balay PetscFunctionBegin; 165bc5ccf88SSatish Balay for ( i=0; i<n; i++ ) { 166bc5ccf88SSatish Balay if ( stash->n == stash->nmax ) { 167bc5ccf88SSatish Balay ierr = StashExpand_Private(stash); CHKERRQ(ierr); 1689417f4adSLois Curfman McInnes } 1699417f4adSLois Curfman McInnes stash->idx[stash->n] = row; 170*a2d1c673SSatish Balay stash->idy[stash->n] = idxn[i]; 171*a2d1c673SSatish Balay stash->array[stash->n] = values[i]; 172*a2d1c673SSatish Balay stash->n++; 1739417f4adSLois Curfman McInnes } 174*a2d1c673SSatish Balay PetscFunctionReturn(0); 175*a2d1c673SSatish Balay } 176*a2d1c673SSatish Balay 177*a2d1c673SSatish Balay #undef __FUNC__ 178*a2d1c673SSatish Balay #define __FUNC__ "StashValuesBlocked_Private" 179*a2d1c673SSatish Balay int StashValuesBlocked_Private(Stash *stash,int row,int n,int *idxn,Scalar *values, 180*a2d1c673SSatish Balay int rmax,int cmax,int roworiented,int idx) 181*a2d1c673SSatish Balay { 182*a2d1c673SSatish Balay int ierr,i,j,k,found,bs2,bs=stash->bs_stash; 183*a2d1c673SSatish Balay Scalar *vals,*array; 184*a2d1c673SSatish Balay 185*a2d1c673SSatish Balay /* stepval gives the offset that one should use to access the next line of 186*a2d1c673SSatish Balay a given block of values */ 187*a2d1c673SSatish Balay 188*a2d1c673SSatish Balay PetscFunctionBegin; 189*a2d1c673SSatish Balay bs2 = bs*bs; 190*a2d1c673SSatish Balay for ( i=0; i<n; i++ ) { 191*a2d1c673SSatish Balay if ( stash->n == stash->nmax ) { 192*a2d1c673SSatish Balay ierr = StashExpand_Private(stash); CHKERRQ(ierr); 193*a2d1c673SSatish Balay } 194*a2d1c673SSatish Balay stash->idx[stash->n] = row; 195*a2d1c673SSatish Balay stash->idy[stash->n] = idxn[i]; 196*a2d1c673SSatish Balay /* Now copy over the block of values. Store the values column oriented. 197*a2d1c673SSatish Balay This enables inserting multiple blocks belonging to a row with a single 198*a2d1c673SSatish Balay funtion call */ 199*a2d1c673SSatish Balay if (roworiented) { 200*a2d1c673SSatish Balay array = stash->array + bs2*stash->n; 201*a2d1c673SSatish Balay vals = values + idx*bs2*n + bs*i; 202*a2d1c673SSatish Balay for ( j=0; j<bs; j++ ) { 203*a2d1c673SSatish Balay for ( k=0; k<bs; k++ ) {array[k*bs] = vals[k];} 204*a2d1c673SSatish Balay array += 1; 205*a2d1c673SSatish Balay vals += cmax*bs; 206*a2d1c673SSatish Balay } 207*a2d1c673SSatish Balay } else { 208*a2d1c673SSatish Balay array = stash->array + bs2*stash->n; 209*a2d1c673SSatish Balay vals = values + idx*bs + bs2*rmax*i; 210*a2d1c673SSatish Balay for ( j=0; j<bs; j++ ) { 211*a2d1c673SSatish Balay for ( k=0; k<bs; k++ ) {array[k] = vals[k];} 212*a2d1c673SSatish Balay array += bs; 213*a2d1c673SSatish Balay vals += rmax*bs; 214*a2d1c673SSatish Balay } 215*a2d1c673SSatish Balay } 216*a2d1c673SSatish Balay stash->n++; 2179417f4adSLois Curfman McInnes } 2183a40ed3dSBarry Smith PetscFunctionReturn(0); 2199417f4adSLois Curfman McInnes } 220bc5ccf88SSatish Balay 221bc5ccf88SSatish Balay #undef __FUNC__ 222bc5ccf88SSatish Balay #define __FUNC__ "StashScatterBegin_Private" 223bc5ccf88SSatish Balay int StashScatterBegin_Private(Stash *stash,int *owners) 224bc5ccf88SSatish Balay { 225*a2d1c673SSatish Balay int *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 226*a2d1c673SSatish Balay int rank=stash->rank,size=stash->size,*nprocs,*procs,nsends,nreceives; 227*a2d1c673SSatish Balay int nmax,*work,count,ierr,*sindices,*rindices,i,j,idx,mult; 228*a2d1c673SSatish Balay Scalar *rvalues,*svalues; 229bc5ccf88SSatish Balay MPI_Comm comm = stash->comm; 230bc5ccf88SSatish Balay MPI_Request *send_waits,*recv_waits; 231bc5ccf88SSatish Balay 232bc5ccf88SSatish Balay PetscFunctionBegin; 233bc5ccf88SSatish Balay 234*a2d1c673SSatish Balay bs2 = stash->bs_stash*stash->bs_stash; 235bc5ccf88SSatish Balay /* first count number of contributors to each processor */ 236bc5ccf88SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 237bc5ccf88SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 238bc5ccf88SSatish Balay owner = (int *) PetscMalloc( (stash->n+1)*sizeof(int) ); CHKPTRQ(owner); 239*a2d1c673SSatish Balay 240*a2d1c673SSatish Balay /* if blockstash, then the the owners and row,col indices 241*a2d1c673SSatish Balay correspond to reduced indices */ 242*a2d1c673SSatish Balay if (stash->bs_stash == 1) mult = stash->bs_mat; 243*a2d1c673SSatish Balay else mult = 1; 244*a2d1c673SSatish Balay 245bc5ccf88SSatish Balay for ( i=0; i<stash->n; i++ ) { 246bc5ccf88SSatish Balay idx = stash->idx[i]; 247bc5ccf88SSatish Balay for ( j=0; j<size; j++ ) { 248*a2d1c673SSatish Balay if (idx >= mult*owners[j] && idx < mult*owners[j+1]) { 249bc5ccf88SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; break; 250bc5ccf88SSatish Balay } 251bc5ccf88SSatish Balay } 252bc5ccf88SSatish Balay } 253bc5ccf88SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 254bc5ccf88SSatish Balay 255bc5ccf88SSatish Balay /* inform other processors of number of messages and max length*/ 256bc5ccf88SSatish Balay work = (int *)PetscMalloc(size*sizeof(int)); CHKPTRQ(work); 257bc5ccf88SSatish Balay ierr = MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 258bc5ccf88SSatish Balay nreceives = work[rank]; 259bc5ccf88SSatish Balay ierr = MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 260bc5ccf88SSatish Balay nmax = work[rank]; 261bc5ccf88SSatish Balay PetscFree(work); 262bc5ccf88SSatish Balay /* post receives: 263bc5ccf88SSatish Balay since we don't know how long each individual message is we 264bc5ccf88SSatish Balay allocate the largest needed buffer for each receive. Potentially 265bc5ccf88SSatish Balay this is a lot of wasted space. 266bc5ccf88SSatish Balay */ 267*a2d1c673SSatish Balay rvalues = (Scalar *)PetscMalloc((nreceives+1)*(nmax+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(rvalues); 268*a2d1c673SSatish Balay rindices = (int *) (rvalues + bs2*nreceives*nmax); 269*a2d1c673SSatish Balay recv_waits = (MPI_Request *)PetscMalloc((nreceives+1)*2*sizeof(MPI_Request));CHKPTRQ(recv_waits); 270bc5ccf88SSatish Balay for ( i=0,count=0; i<nreceives; i++ ) { 271*a2d1c673SSatish Balay ierr = MPI_Irecv(rvalues+bs2*nmax*i,bs2*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag1,comm, 272bc5ccf88SSatish Balay recv_waits+count++); CHKERRQ(ierr); 273bc5ccf88SSatish Balay ierr = MPI_Irecv(rindices+2*nmax*i,2*nmax,MPI_INT,MPI_ANY_SOURCE,tag2,comm, 274bc5ccf88SSatish Balay recv_waits+count++); CHKERRQ(ierr); 275bc5ccf88SSatish Balay } 276bc5ccf88SSatish Balay 277bc5ccf88SSatish Balay /* do sends: 278bc5ccf88SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 279bc5ccf88SSatish Balay the ith processor 280bc5ccf88SSatish Balay */ 281*a2d1c673SSatish Balay svalues = (Scalar *)PetscMalloc((stash->n+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(svalues); 282*a2d1c673SSatish Balay sindices = (int *) (svalues + bs2*stash->n); 283bc5ccf88SSatish Balay send_waits = (MPI_Request *) PetscMalloc(2*(nsends+1)*sizeof(MPI_Request)); 284bc5ccf88SSatish Balay CHKPTRQ(send_waits); 285bc5ccf88SSatish Balay startv = (int *) PetscMalloc(2*size*sizeof(int) ); CHKPTRQ(startv); 286bc5ccf88SSatish Balay starti = startv + size; 287*a2d1c673SSatish Balay /* use 2 sends the first with all_a, the next with all_i and all_j */ 288bc5ccf88SSatish Balay startv[0] = 0; starti[0] = 0; 289bc5ccf88SSatish Balay for ( i=1; i<size; i++ ) { 290bc5ccf88SSatish Balay startv[i] = startv[i-1] + nprocs[i-1]; 291bc5ccf88SSatish Balay starti[i] = starti[i-1] + nprocs[i-1]*2; 292bc5ccf88SSatish Balay } 293bc5ccf88SSatish Balay for ( i=0; i<stash->n; i++ ) { 294bc5ccf88SSatish Balay j = owner[i]; 295*a2d1c673SSatish Balay if (bs2 == 1) { 296bc5ccf88SSatish Balay svalues[startv[j]] = stash->array[i]; 297*a2d1c673SSatish Balay } else { 298*a2d1c673SSatish Balay PetscMemcpy(svalues+bs2*startv[j],stash->array+bs2*i,bs2*sizeof(Scalar)); 299*a2d1c673SSatish Balay } 300bc5ccf88SSatish Balay sindices[starti[j]] = stash->idx[i]; 301bc5ccf88SSatish Balay sindices[starti[j]+nprocs[j]] = stash->idy[i]; 302bc5ccf88SSatish Balay startv[j]++; 303bc5ccf88SSatish Balay starti[j]++; 304bc5ccf88SSatish Balay } 305bc5ccf88SSatish Balay startv[0] = 0; 306bc5ccf88SSatish Balay for ( i=1; i<size; i++ ) { startv[i] = startv[i-1] + nprocs[i-1];} 307bc5ccf88SSatish Balay for ( i=0,count=0; i<size; i++ ) { 308bc5ccf88SSatish Balay if (procs[i]) { 309*a2d1c673SSatish Balay ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nprocs[i],MPIU_SCALAR,i,tag1,comm, 310bc5ccf88SSatish Balay send_waits+count++);CHKERRQ(ierr); 311bc5ccf88SSatish Balay ierr = MPI_Isend(sindices+2*startv[i],2*nprocs[i],MPI_INT,i,tag2,comm, 312bc5ccf88SSatish Balay send_waits+count++);CHKERRQ(ierr); 313bc5ccf88SSatish Balay } 314bc5ccf88SSatish Balay } 315bc5ccf88SSatish Balay PetscFree(owner); 316bc5ccf88SSatish Balay PetscFree(startv); 317*a2d1c673SSatish Balay /* This memory is reused in scatter end for a different purpose*/ 318*a2d1c673SSatish Balay for (i=0; i<2*size; i++ ) nprocs[i] = -1; 319*a2d1c673SSatish Balay stash->nprocs = nprocs; 320*a2d1c673SSatish Balay 321bc5ccf88SSatish Balay stash->svalues = svalues; stash->rvalues = rvalues; 322bc5ccf88SSatish Balay stash->nsends = nsends; stash->nrecvs = nreceives; 323bc5ccf88SSatish Balay stash->send_waits = send_waits; stash->recv_waits = recv_waits; 324bc5ccf88SSatish Balay stash->rmax = nmax; 325bc5ccf88SSatish Balay PetscFunctionReturn(0); 326bc5ccf88SSatish Balay } 327bc5ccf88SSatish Balay 328*a2d1c673SSatish Balay /* 329*a2d1c673SSatish Balay This function waits on the receives posted in the function 330*a2d1c673SSatish Balay StashScatterBegin_Private() and returns one message at a time to 331*a2d1c673SSatish Balay the calling function. If no messages are left, it indicates this by 332*a2d1c673SSatish Balay setting flg = 0, else it sets flg = 1. 333*a2d1c673SSatish Balay */ 334bc5ccf88SSatish Balay #undef __FUNC__ 335*a2d1c673SSatish Balay #define __FUNC__ "StashScatterGetMesg_Private" 336*a2d1c673SSatish Balay int StashScatterGetMesg_Private(Stash *stash,int *nvals,int **rows,int** cols,Scalar **vals,int *flg) 337bc5ccf88SSatish Balay { 338*a2d1c673SSatish Balay int i,ierr,size=stash->size,*flg_v,*flg_i; 339*a2d1c673SSatish Balay int i1,i2,*rindices,match_found=0,bs2; 340*a2d1c673SSatish Balay MPI_Status recv_status; 341bc5ccf88SSatish Balay 342bc5ccf88SSatish Balay PetscFunctionBegin; 343bc5ccf88SSatish Balay 344*a2d1c673SSatish Balay *flg = 0; /* When a message is discovered this is reset to 1 */ 345*a2d1c673SSatish Balay /* Return if no more messages to process */ 346*a2d1c673SSatish Balay if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); } 347*a2d1c673SSatish Balay 348*a2d1c673SSatish Balay flg_v = stash->nprocs; 349*a2d1c673SSatish Balay flg_i = flg_v + size; 350*a2d1c673SSatish Balay bs2 = stash->bs_stash*stash->bs_stash; 351*a2d1c673SSatish Balay /* If a matching pair of receieves are found, process them, and return the data to 352*a2d1c673SSatish Balay the calling function. Until then keep receiving messages */ 353*a2d1c673SSatish Balay while (!match_found) { 354*a2d1c673SSatish Balay ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr); 355*a2d1c673SSatish Balay /* Now pack the received message into a structure which is useable by others */ 356*a2d1c673SSatish Balay if (i % 2) { 357*a2d1c673SSatish Balay ierr = MPI_Get_count(&recv_status,MPI_INT,nvals);CHKERRQ(ierr); 358*a2d1c673SSatish Balay flg_i[recv_status.MPI_SOURCE] = i/2; 359*a2d1c673SSatish Balay *nvals = *nvals/2; /* This message has both row indices and col indices */ 360*a2d1c673SSatish Balay } else { 361*a2d1c673SSatish Balay ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr); 362*a2d1c673SSatish Balay flg_v[recv_status.MPI_SOURCE] = i/2; 363*a2d1c673SSatish Balay *nvals = *nvals/bs2; 364bc5ccf88SSatish Balay } 365*a2d1c673SSatish Balay 366*a2d1c673SSatish Balay /* Check if we have both the messages from this proc */ 367*a2d1c673SSatish Balay i1 = flg_v[recv_status.MPI_SOURCE]; 368*a2d1c673SSatish Balay i2 = flg_i[recv_status.MPI_SOURCE]; 369*a2d1c673SSatish Balay if (i1 != -1 && i2 != -1) { 370*a2d1c673SSatish Balay rindices = (int *) (stash->rvalues + bs2*stash->rmax*stash->nrecvs); 371*a2d1c673SSatish Balay *rows = rindices + 2*i2*stash->rmax; 372*a2d1c673SSatish Balay *cols = *rows + *nvals; 373*a2d1c673SSatish Balay *vals = stash->rvalues + i1*bs2*stash->rmax; 374*a2d1c673SSatish Balay *flg = 1; 375*a2d1c673SSatish Balay stash->nprocessed ++; 376*a2d1c673SSatish Balay match_found = 1; 377bc5ccf88SSatish Balay } 378bc5ccf88SSatish Balay } 379bc5ccf88SSatish Balay PetscFunctionReturn(0); 380bc5ccf88SSatish Balay } 381