1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER 2*4c1ff481SSatish Balay static char vcid[] = "$Id: stash.c,v 1.24 1999/03/11 23:21:51 balay Exp balay $"; 32d5177cdSBarry Smith #endif 42d5177cdSBarry Smith 570f55243SBarry Smith #include "src/mat/matimpl.h" 69417f4adSLois Curfman McInnes 7bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE 10000 8*4c1ff481SSatish Balay 99417f4adSLois Curfman McInnes /* 10*4c1ff481SSatish Balay StashCreate_Private - Creates a stash ,currently used for all the parallel 11*4c1ff481SSatish Balay matrix implementations. The stash is where elements of a matrix destined 12*4c1ff481SSatish Balay to be stored on other processors are kept until matrix assembly is done. 139417f4adSLois Curfman McInnes 14*4c1ff481SSatish Balay This is a simple minded stash. Simply adds entries to end of stash. 15*4c1ff481SSatish Balay 16*4c1ff481SSatish Balay Input Parameters: 17*4c1ff481SSatish Balay comm - communicator, required for scatters. 18*4c1ff481SSatish Balay bs - stash block size. used when stashing blocks of values 19*4c1ff481SSatish Balay 20*4c1ff481SSatish Balay Output Parameters: 21*4c1ff481SSatish Balay stash - the newly created stash 229417f4adSLois Curfman McInnes */ 235615d1e5SSatish Balay #undef __FUNC__ 24bc5ccf88SSatish Balay #define __FUNC__ "StashCreate_Private" 25*4c1ff481SSatish Balay int StashCreate_Private(MPI_Comm comm,int bs, Stash *stash) 269417f4adSLois Curfman McInnes { 27*4c1ff481SSatish Balay int ierr,flg,max=DEFAULT_STASH_SIZE/(bs*bs); 28bc5ccf88SSatish Balay 293a40ed3dSBarry Smith PetscFunctionBegin; 30bc5ccf88SSatish Balay /* Require 2 tags, get the second using PetscCommGetNewTag() */ 31bc5ccf88SSatish Balay ierr = PetscCommDuplicate_Private(comm,&stash->comm,&stash->tag1);CHKERRQ(ierr); 32a2d1c673SSatish Balay ierr = PetscCommGetNewTag(stash->comm,&stash->tag2); CHKERRQ(ierr); 33bc5ccf88SSatish Balay ierr = OptionsGetInt(PETSC_NULL,"-stash_initial_size",&max,&flg);CHKERRQ(ierr); 34bc5ccf88SSatish Balay ierr = StashSetInitialSize_Private(stash,max); CHKERRQ(ierr); 35a2d1c673SSatish Balay ierr = MPI_Comm_size(stash->comm,&stash->size); CHKERRQ(ierr); 36a2d1c673SSatish Balay ierr = MPI_Comm_rank(stash->comm,&stash->rank); CHKERRQ(ierr); 37bc5ccf88SSatish Balay 38*4c1ff481SSatish Balay if (bs <= 0) bs = 1; 39a2d1c673SSatish Balay 40*4c1ff481SSatish Balay stash->bs = bs; 419417f4adSLois Curfman McInnes stash->nmax = 0; 429417f4adSLois Curfman McInnes stash->n = 0; 43*4c1ff481SSatish Balay stash->reallocs = -1; 449417f4adSLois Curfman McInnes stash->idx = 0; 459417f4adSLois Curfman McInnes stash->idy = 0; 46bc5ccf88SSatish Balay stash->array = 0; 479417f4adSLois Curfman McInnes 48bc5ccf88SSatish Balay stash->send_waits = 0; 49bc5ccf88SSatish Balay stash->recv_waits = 0; 50a2d1c673SSatish Balay stash->send_status = 0; 51bc5ccf88SSatish Balay stash->nsends = 0; 52bc5ccf88SSatish Balay stash->nrecvs = 0; 53bc5ccf88SSatish Balay stash->svalues = 0; 54bc5ccf88SSatish Balay stash->rvalues = 0; 55bc5ccf88SSatish Balay stash->rmax = 0; 56a2d1c673SSatish Balay stash->nprocs = 0; 57a2d1c673SSatish Balay stash->nprocessed = 0; 583a40ed3dSBarry Smith PetscFunctionReturn(0); 599417f4adSLois Curfman McInnes } 609417f4adSLois Curfman McInnes 61*4c1ff481SSatish Balay /* 62*4c1ff481SSatish Balay StashDestroy_Private - Destroy the stash 63*4c1ff481SSatish Balay */ 645615d1e5SSatish Balay #undef __FUNC__ 65d4bb536fSBarry Smith #define __FUNC__ "StashDestroy_Private" 669417f4adSLois Curfman McInnes int StashDestroy_Private(Stash *stash) 679417f4adSLois Curfman McInnes { 68bc5ccf88SSatish Balay int ierr; 69a2d1c673SSatish Balay 70bc5ccf88SSatish Balay PetscFunctionBegin; 71bc5ccf88SSatish Balay ierr = PetscCommDestroy_Private(&stash->comm); CHKERRQ(ierr); 72bc5ccf88SSatish Balay if (stash->array) {PetscFree(stash->array); stash->array = 0;} 73bc5ccf88SSatish Balay PetscFunctionReturn(0); 74bc5ccf88SSatish Balay } 75bc5ccf88SSatish Balay 76*4c1ff481SSatish Balay /* 77*4c1ff481SSatish Balay StashScatterEnd_Private - This is called as the fial stage of 78*4c1ff481SSatish Balay scatter. The final stages of messagepassing is done here, and 79*4c1ff481SSatish Balay all the memory used for messagepassing is cleanedu up. This 80*4c1ff481SSatish Balay routine also resets the stash, and deallocates the memory used 81*4c1ff481SSatish Balay for the stash. It also keeps track of the current memory usage 82*4c1ff481SSatish Balay so that the same value can be used the next time through. 83*4c1ff481SSatish Balay */ 84bc5ccf88SSatish Balay #undef __FUNC__ 85a2d1c673SSatish Balay #define __FUNC__ "StashScatterEnd_Private" 86a2d1c673SSatish Balay int StashScatterEnd_Private(Stash *stash) 87bc5ccf88SSatish Balay { 88a2d1c673SSatish Balay int nsends=stash->nsends,ierr; 89a2d1c673SSatish Balay MPI_Status *send_status; 90a2d1c673SSatish Balay 913a40ed3dSBarry Smith PetscFunctionBegin; 92a2d1c673SSatish Balay /* wait on sends */ 93a2d1c673SSatish Balay if (nsends) { 94a2d1c673SSatish Balay send_status = (MPI_Status *)PetscMalloc(2*nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 95a2d1c673SSatish Balay ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr); 96a2d1c673SSatish Balay PetscFree(send_status); 97a2d1c673SSatish Balay } 98a2d1c673SSatish Balay 99d07ff455SSatish Balay /* Now update nmaxold to be app 10% more than nmax, this way the 100d07ff455SSatish Balay wastage of space is reduced the next time this stash is used */ 101bc5ccf88SSatish Balay stash->oldnmax = (int)(stash->nmax * 1.1) + 5; 102d07ff455SSatish Balay stash->nmax = 0; 103d07ff455SSatish Balay stash->n = 0; 104*4c1ff481SSatish Balay stash->reallocs = -1; 105bc5ccf88SSatish Balay stash->rmax = 0; 106a2d1c673SSatish Balay stash->nprocessed = 0; 107bc5ccf88SSatish Balay 108bc5ccf88SSatish Balay if (stash->array) { 109bc5ccf88SSatish Balay PetscFree(stash->array); 110bc5ccf88SSatish Balay stash->array = 0; 111bc5ccf88SSatish Balay stash->idx = 0; 112bc5ccf88SSatish Balay stash->idy = 0; 113bc5ccf88SSatish Balay } 114bc5ccf88SSatish Balay if (stash->send_waits) {PetscFree(stash->send_waits);stash->send_waits = 0;} 115bc5ccf88SSatish Balay if (stash->recv_waits) {PetscFree(stash->recv_waits);stash->recv_waits = 0;} 116bc5ccf88SSatish Balay if (stash->svalues) {PetscFree(stash->svalues);stash->svalues = 0;} 117bc5ccf88SSatish Balay if (stash->rvalues) {PetscFree(stash->rvalues); stash->rvalues = 0;} 118a2d1c673SSatish Balay if (stash->nprocs) {PetscFree(stash->nprocs); stash->nprocs = 0;} 119bc5ccf88SSatish Balay 1203a40ed3dSBarry Smith PetscFunctionReturn(0); 1219417f4adSLois Curfman McInnes } 1229417f4adSLois Curfman McInnes 123*4c1ff481SSatish Balay /* 124*4c1ff481SSatish Balay StashGetInfo_Private - Gets the relavant statistics of the stash 125*4c1ff481SSatish Balay 126*4c1ff481SSatish Balay Input Parameters: 127*4c1ff481SSatish Balay stash - the stash 128*4c1ff481SSatish Balay nstash - the size of the stash 129*4c1ff481SSatish Balay reallocs - the number of additional mallocs incurred. 130*4c1ff481SSatish Balay 131*4c1ff481SSatish Balay */ 1325615d1e5SSatish Balay #undef __FUNC__ 133*4c1ff481SSatish Balay #define __FUNC__ "StashGetInfo_Private" 134*4c1ff481SSatish Balay int StashGetInfo_Private(Stash *stash,int *nstash, int *reallocs) 13597530c3fSBarry Smith { 1363a40ed3dSBarry Smith PetscFunctionBegin; 137*4c1ff481SSatish Balay *nstash = stash->n; 138*4c1ff481SSatish Balay *reallocs = stash->reallocs; 139bc5ccf88SSatish Balay PetscFunctionReturn(0); 140bc5ccf88SSatish Balay } 141*4c1ff481SSatish Balay 142*4c1ff481SSatish Balay 143*4c1ff481SSatish Balay /* 144*4c1ff481SSatish Balay StashSetInitialSize_Private - Sets the initial size of the stash 145*4c1ff481SSatish Balay 146*4c1ff481SSatish Balay Input Parameters: 147*4c1ff481SSatish Balay stash - the stash 148*4c1ff481SSatish Balay max - the value that is used as the max size of the stash. 149*4c1ff481SSatish Balay this value is used while allocating memory. 150*4c1ff481SSatish Balay */ 151bc5ccf88SSatish Balay #undef __FUNC__ 152bc5ccf88SSatish Balay #define __FUNC__ "StashSetInitialSize_Private" 153bc5ccf88SSatish Balay int StashSetInitialSize_Private(Stash *stash,int max) 154bc5ccf88SSatish Balay { 155bc5ccf88SSatish Balay PetscFunctionBegin; 156bc5ccf88SSatish Balay stash->oldnmax = max; 157bc5ccf88SSatish Balay stash->nmax = 0; 1583a40ed3dSBarry Smith PetscFunctionReturn(0); 15997530c3fSBarry Smith } 16097530c3fSBarry Smith 161*4c1ff481SSatish Balay /* StashExpand_Private - Expand the stash. This function is called 162*4c1ff481SSatish Balay when the space in the stash is not sufficient to add the new values 163*4c1ff481SSatish Balay being inserted into the stash. 164*4c1ff481SSatish Balay 165*4c1ff481SSatish Balay Input Parameters: 166*4c1ff481SSatish Balay stash - the stash 167*4c1ff481SSatish Balay incr - the minimum increase requested 168*4c1ff481SSatish Balay 169*4c1ff481SSatish Balay Notes: 170*4c1ff481SSatish Balay This routine doubles the currently used memory. 171*4c1ff481SSatish Balay */ 1725615d1e5SSatish Balay #undef __FUNC__ 173bc5ccf88SSatish Balay #define __FUNC__ "StashExpand_Private" 174*4c1ff481SSatish Balay static int StashExpand_Private(Stash *stash,int incr) 1759417f4adSLois Curfman McInnes { 176a2d1c673SSatish Balay int *n_idx,*n_idy,newnmax,bs2; 177bc5ccf88SSatish Balay Scalar *n_array; 1789417f4adSLois Curfman McInnes 1793a40ed3dSBarry Smith PetscFunctionBegin; 1809417f4adSLois Curfman McInnes /* allocate a larger stash */ 181d07ff455SSatish Balay if (stash->nmax == 0) newnmax = stash->oldnmax; 182d07ff455SSatish Balay else newnmax = stash->nmax*2; 183*4c1ff481SSatish Balay if (newnmax < (stash->nmax + incr)) newnmax += 2*incr; 184d07ff455SSatish Balay 185*4c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 186a2d1c673SSatish Balay n_array = (Scalar *)PetscMalloc((newnmax)*(2*sizeof(int)+bs2*sizeof(Scalar)));CHKPTRQ(n_array); 187a2d1c673SSatish Balay n_idx = (int *) (n_array + bs2*newnmax); 188d07ff455SSatish Balay n_idy = (int *) (n_idx + newnmax); 189a2d1c673SSatish Balay PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(Scalar)); 190416022c9SBarry Smith PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(int)); 191416022c9SBarry Smith PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(int)); 1920452661fSBarry Smith if (stash->array) PetscFree(stash->array); 193d07ff455SSatish Balay stash->array = n_array; 194d07ff455SSatish Balay stash->idx = n_idx; 195d07ff455SSatish Balay stash->idy = n_idy; 196d07ff455SSatish Balay stash->nmax = newnmax; 197d07ff455SSatish Balay stash->oldnmax = newnmax; 198bc5ccf88SSatish Balay stash->reallocs++; 199bc5ccf88SSatish Balay PetscFunctionReturn(0); 200bc5ccf88SSatish Balay } 201bc5ccf88SSatish Balay /* 202*4c1ff481SSatish Balay StashValuesRoworiented_Private - inserts values into the stash. This function 203*4c1ff481SSatish Balay expects the values to be roworiented. Multiple columns belong to the same row 204*4c1ff481SSatish Balay can be inserted with a single call to this function. 205*4c1ff481SSatish Balay 206*4c1ff481SSatish Balay Input Parameters: 207*4c1ff481SSatish Balay stash - the stash 208*4c1ff481SSatish Balay row - the global row correspoiding to the values 209*4c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 210*4c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 211*4c1ff481SSatish Balay values - the values inserted 212bc5ccf88SSatish Balay */ 213bc5ccf88SSatish Balay #undef __FUNC__ 214*4c1ff481SSatish Balay #define __FUNC__ "StashValuesRoworiented_Private" 215*4c1ff481SSatish Balay int StashValuesRoworiented_Private(Stash *stash,int row,int n, int *idxn,Scalar *values) 216bc5ccf88SSatish Balay { 217a2d1c673SSatish Balay int ierr,i; 218bc5ccf88SSatish Balay 219bc5ccf88SSatish Balay PetscFunctionBegin; 220*4c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 221*4c1ff481SSatish Balay if ((stash->n + n) > stash->nmax) { 222*4c1ff481SSatish Balay ierr = StashExpand_Private(stash,n); CHKERRQ(ierr); 2239417f4adSLois Curfman McInnes } 224*4c1ff481SSatish Balay for ( i=0; i<n; i++ ) { 2259417f4adSLois Curfman McInnes stash->idx[stash->n] = row; 226a2d1c673SSatish Balay stash->idy[stash->n] = idxn[i]; 227a2d1c673SSatish Balay stash->array[stash->n] = values[i]; 228a2d1c673SSatish Balay stash->n++; 2299417f4adSLois Curfman McInnes } 230a2d1c673SSatish Balay PetscFunctionReturn(0); 231a2d1c673SSatish Balay } 232*4c1ff481SSatish Balay /* 233*4c1ff481SSatish Balay StashValuesColumnoriented_Private - inserts values into the stash. This function 234*4c1ff481SSatish Balay expects the values to be columnoriented. Multiple columns belong to the same row 235*4c1ff481SSatish Balay can be inserted with a single call to this function. 236a2d1c673SSatish Balay 237*4c1ff481SSatish Balay Input Parameters: 238*4c1ff481SSatish Balay stash - the stash 239*4c1ff481SSatish Balay row - the global row correspoiding to the values 240*4c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 241*4c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 242*4c1ff481SSatish Balay values - the values inserted 243*4c1ff481SSatish Balay stepval - the consecutive values are sepated by a distance of stepval. 244*4c1ff481SSatish Balay this happens because the input is columnoriented. 245*4c1ff481SSatish Balay */ 246a2d1c673SSatish Balay #undef __FUNC__ 247*4c1ff481SSatish Balay #define __FUNC__ "StashValuesColumnoriented_Private" 248*4c1ff481SSatish Balay int StashValuesColumnoriented_Private(Stash *stash,int row,int n, int *idxn, 249*4c1ff481SSatish Balay Scalar *values,int stepval) 250a2d1c673SSatish Balay { 251*4c1ff481SSatish Balay int ierr,i; 252a2d1c673SSatish Balay 253*4c1ff481SSatish Balay PetscFunctionBegin; 254*4c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 255*4c1ff481SSatish Balay if ((stash->n + n) > stash->nmax) { 256*4c1ff481SSatish Balay ierr = StashExpand_Private(stash,n); CHKERRQ(ierr); 257*4c1ff481SSatish Balay } 258*4c1ff481SSatish Balay for ( i=0; i<n; i++ ) { 259*4c1ff481SSatish Balay stash->idx[stash->n] = row; 260*4c1ff481SSatish Balay stash->idy[stash->n] = idxn[i]; 261*4c1ff481SSatish Balay stash->array[stash->n] = values[i*stepval]; 262*4c1ff481SSatish Balay stash->n++; 263*4c1ff481SSatish Balay } 264*4c1ff481SSatish Balay PetscFunctionReturn(0); 265*4c1ff481SSatish Balay } 266*4c1ff481SSatish Balay 267*4c1ff481SSatish Balay /* 268*4c1ff481SSatish Balay StashValuesRoworientedBlocked_Private - inserts blocks of values into the stash. 269*4c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 270*4c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 271*4c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 272*4c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 273*4c1ff481SSatish Balay 274*4c1ff481SSatish Balay Input Parameters: 275*4c1ff481SSatish Balay stash - the stash 276*4c1ff481SSatish Balay row - the global block-row correspoiding to the values 277*4c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 278*4c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 279*4c1ff481SSatish Balay values. Each block is of size bs*bs. 280*4c1ff481SSatish Balay values - the values inserted 281*4c1ff481SSatish Balay rmax - the number of block-rows in the original block. 282*4c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 283*4c1ff481SSatish Balay idx - the index of the current block-row in the original block. 284*4c1ff481SSatish Balay */ 285*4c1ff481SSatish Balay #undef __FUNC__ 286*4c1ff481SSatish Balay #define __FUNC__ "StashValuesRoworientedBlocked_Private" 287*4c1ff481SSatish Balay int StashValuesRoworientedBlocked_Private(Stash *stash,int row,int n,int *idxn,Scalar *values, 288*4c1ff481SSatish Balay int rmax,int cmax,int idx) 289*4c1ff481SSatish Balay { 290*4c1ff481SSatish Balay int ierr,i,j,k,bs2,bs=stash->bs; 291*4c1ff481SSatish Balay Scalar *vals,*array; 292a2d1c673SSatish Balay 293a2d1c673SSatish Balay PetscFunctionBegin; 294a2d1c673SSatish Balay bs2 = bs*bs; 295*4c1ff481SSatish Balay if ((stash->n+n) > stash->nmax) { 296*4c1ff481SSatish Balay ierr = StashExpand_Private(stash,n); CHKERRQ(ierr); 297a2d1c673SSatish Balay } 298*4c1ff481SSatish Balay for ( i=0; i<n; i++ ) { 299a2d1c673SSatish Balay stash->idx[stash->n] = row; 300a2d1c673SSatish Balay stash->idy[stash->n] = idxn[i]; 301a2d1c673SSatish Balay /* Now copy over the block of values. Store the values column oriented. 302a2d1c673SSatish Balay This enables inserting multiple blocks belonging to a row with a single 303a2d1c673SSatish Balay funtion call */ 304a2d1c673SSatish Balay array = stash->array + bs2*stash->n; 305a2d1c673SSatish Balay vals = values + idx*bs2*n + bs*i; 306a2d1c673SSatish Balay for ( j=0; j<bs; j++ ) { 307a2d1c673SSatish Balay for ( k=0; k<bs; k++ ) {array[k*bs] = vals[k];} 308a2d1c673SSatish Balay array += 1; 309a2d1c673SSatish Balay vals += cmax*bs; 310a2d1c673SSatish Balay } 311*4c1ff481SSatish Balay stash->n++; 312*4c1ff481SSatish Balay } 313*4c1ff481SSatish Balay PetscFunctionReturn(0); 314*4c1ff481SSatish Balay } 315*4c1ff481SSatish Balay 316*4c1ff481SSatish Balay /* 317*4c1ff481SSatish Balay StashValuesColumnorientedBlocked_Private - inserts blocks of values into the stash. 318*4c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 319*4c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 320*4c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 321*4c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 322*4c1ff481SSatish Balay 323*4c1ff481SSatish Balay Input Parameters: 324*4c1ff481SSatish Balay stash - the stash 325*4c1ff481SSatish Balay row - the global block-row correspoiding to the values 326*4c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 327*4c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 328*4c1ff481SSatish Balay values. Each block is of size bs*bs. 329*4c1ff481SSatish Balay values - the values inserted 330*4c1ff481SSatish Balay rmax - the number of block-rows in the original block. 331*4c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 332*4c1ff481SSatish Balay idx - the index of the current block-row in the original block. 333*4c1ff481SSatish Balay */ 334*4c1ff481SSatish Balay #undef __FUNC__ 335*4c1ff481SSatish Balay #define __FUNC__ "StashValuesColumnorientedBlocked_Private" 336*4c1ff481SSatish Balay int StashValuesColumnorientedBlocked_Private(Stash *stash,int row,int n,int *idxn, 337*4c1ff481SSatish Balay Scalar *values,int rmax,int cmax,int idx) 338*4c1ff481SSatish Balay { 339*4c1ff481SSatish Balay int ierr,i,j,k,bs2,bs=stash->bs; 340*4c1ff481SSatish Balay Scalar *vals,*array; 341*4c1ff481SSatish Balay 342*4c1ff481SSatish Balay PetscFunctionBegin; 343*4c1ff481SSatish Balay bs2 = bs*bs; 344*4c1ff481SSatish Balay if ((stash->n+n) > stash->nmax) { 345*4c1ff481SSatish Balay ierr = StashExpand_Private(stash,n); CHKERRQ(ierr); 346*4c1ff481SSatish Balay } 347*4c1ff481SSatish Balay for ( i=0; i<n; i++ ) { 348*4c1ff481SSatish Balay stash->idx[stash->n] = row; 349*4c1ff481SSatish Balay stash->idy[stash->n] = idxn[i]; 350*4c1ff481SSatish Balay /* Now copy over the block of values. Store the values column oriented. 351*4c1ff481SSatish Balay This enables inserting multiple blocks belonging to a row with a single 352*4c1ff481SSatish Balay funtion call */ 353a2d1c673SSatish Balay array = stash->array + bs2*stash->n; 354a2d1c673SSatish Balay vals = values + idx*bs + bs2*rmax*i; 355a2d1c673SSatish Balay for ( j=0; j<bs; j++ ) { 356a2d1c673SSatish Balay for ( k=0; k<bs; k++ ) {array[k] = vals[k];} 357a2d1c673SSatish Balay array += bs; 358a2d1c673SSatish Balay vals += rmax*bs; 359a2d1c673SSatish Balay } 360a2d1c673SSatish Balay stash->n++; 3619417f4adSLois Curfman McInnes } 3623a40ed3dSBarry Smith PetscFunctionReturn(0); 3639417f4adSLois Curfman McInnes } 364*4c1ff481SSatish Balay /* 365*4c1ff481SSatish Balay StashScatterBegin_Private - Initiates the transfer of values to the 366*4c1ff481SSatish Balay correct owners. This function goes through the stash, and check the 367*4c1ff481SSatish Balay owners of each stashed value, and sends the values off to the owner 368*4c1ff481SSatish Balay processors. 369bc5ccf88SSatish Balay 370*4c1ff481SSatish Balay Input Parameters: 371*4c1ff481SSatish Balay stash - the stash 372*4c1ff481SSatish Balay owners - an array of size 'no-of-procs' which gives the ownership range 373*4c1ff481SSatish Balay for each node. 374*4c1ff481SSatish Balay 375*4c1ff481SSatish Balay Notes: The 'owners' array in the cased of the blocked-stash has the 376*4c1ff481SSatish Balay ranges specified blocked global indices, and for the regular stash in 377*4c1ff481SSatish Balay the proper global indices. 378*4c1ff481SSatish Balay */ 379bc5ccf88SSatish Balay #undef __FUNC__ 380bc5ccf88SSatish Balay #define __FUNC__ "StashScatterBegin_Private" 381bc5ccf88SSatish Balay int StashScatterBegin_Private(Stash *stash,int *owners) 382bc5ccf88SSatish Balay { 383a2d1c673SSatish Balay int *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 384a2d1c673SSatish Balay int rank=stash->rank,size=stash->size,*nprocs,*procs,nsends,nreceives; 385*4c1ff481SSatish Balay int nmax,*work,count,ierr,*sindices,*rindices,i,j,idx; 386a2d1c673SSatish Balay Scalar *rvalues,*svalues; 387bc5ccf88SSatish Balay MPI_Comm comm = stash->comm; 388bc5ccf88SSatish Balay MPI_Request *send_waits,*recv_waits; 389bc5ccf88SSatish Balay 390bc5ccf88SSatish Balay PetscFunctionBegin; 391bc5ccf88SSatish Balay 392*4c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 393bc5ccf88SSatish Balay /* first count number of contributors to each processor */ 394bc5ccf88SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 395bc5ccf88SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 396bc5ccf88SSatish Balay owner = (int *) PetscMalloc( (stash->n+1)*sizeof(int) ); CHKPTRQ(owner); 397a2d1c673SSatish Balay 398bc5ccf88SSatish Balay for ( i=0; i<stash->n; i++ ) { 399bc5ccf88SSatish Balay idx = stash->idx[i]; 400bc5ccf88SSatish Balay for ( j=0; j<size; j++ ) { 401*4c1ff481SSatish Balay if (idx >= owners[j] && idx < owners[j+1]) { 402bc5ccf88SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; break; 403bc5ccf88SSatish Balay } 404bc5ccf88SSatish Balay } 405bc5ccf88SSatish Balay } 406bc5ccf88SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 407bc5ccf88SSatish Balay 408bc5ccf88SSatish Balay /* inform other processors of number of messages and max length*/ 409bc5ccf88SSatish Balay work = (int *)PetscMalloc(size*sizeof(int)); CHKPTRQ(work); 410bc5ccf88SSatish Balay ierr = MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 411bc5ccf88SSatish Balay nreceives = work[rank]; 412bc5ccf88SSatish Balay ierr = MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 413bc5ccf88SSatish Balay nmax = work[rank]; 414bc5ccf88SSatish Balay PetscFree(work); 415bc5ccf88SSatish Balay /* post receives: 416bc5ccf88SSatish Balay since we don't know how long each individual message is we 417bc5ccf88SSatish Balay allocate the largest needed buffer for each receive. Potentially 418bc5ccf88SSatish Balay this is a lot of wasted space. 419bc5ccf88SSatish Balay */ 420a2d1c673SSatish Balay rvalues = (Scalar *)PetscMalloc((nreceives+1)*(nmax+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(rvalues); 421a2d1c673SSatish Balay rindices = (int *) (rvalues + bs2*nreceives*nmax); 422a2d1c673SSatish Balay recv_waits = (MPI_Request *)PetscMalloc((nreceives+1)*2*sizeof(MPI_Request));CHKPTRQ(recv_waits); 423bc5ccf88SSatish Balay for ( i=0,count=0; i<nreceives; i++ ) { 424a2d1c673SSatish Balay ierr = MPI_Irecv(rvalues+bs2*nmax*i,bs2*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag1,comm, 425bc5ccf88SSatish Balay recv_waits+count++); CHKERRQ(ierr); 426bc5ccf88SSatish Balay ierr = MPI_Irecv(rindices+2*nmax*i,2*nmax,MPI_INT,MPI_ANY_SOURCE,tag2,comm, 427bc5ccf88SSatish Balay recv_waits+count++); CHKERRQ(ierr); 428bc5ccf88SSatish Balay } 429bc5ccf88SSatish Balay 430bc5ccf88SSatish Balay /* do sends: 431bc5ccf88SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 432bc5ccf88SSatish Balay the ith processor 433bc5ccf88SSatish Balay */ 434a2d1c673SSatish Balay svalues = (Scalar *)PetscMalloc((stash->n+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(svalues); 435a2d1c673SSatish Balay sindices = (int *) (svalues + bs2*stash->n); 436bc5ccf88SSatish Balay send_waits = (MPI_Request *) PetscMalloc(2*(nsends+1)*sizeof(MPI_Request)); 437bc5ccf88SSatish Balay CHKPTRQ(send_waits); 438bc5ccf88SSatish Balay startv = (int *) PetscMalloc(2*size*sizeof(int) ); CHKPTRQ(startv); 439bc5ccf88SSatish Balay starti = startv + size; 440a2d1c673SSatish Balay /* use 2 sends the first with all_a, the next with all_i and all_j */ 441bc5ccf88SSatish Balay startv[0] = 0; starti[0] = 0; 442bc5ccf88SSatish Balay for ( i=1; i<size; i++ ) { 443bc5ccf88SSatish Balay startv[i] = startv[i-1] + nprocs[i-1]; 444bc5ccf88SSatish Balay starti[i] = starti[i-1] + nprocs[i-1]*2; 445bc5ccf88SSatish Balay } 446bc5ccf88SSatish Balay for ( i=0; i<stash->n; i++ ) { 447bc5ccf88SSatish Balay j = owner[i]; 448a2d1c673SSatish Balay if (bs2 == 1) { 449bc5ccf88SSatish Balay svalues[startv[j]] = stash->array[i]; 450a2d1c673SSatish Balay } else { 451*4c1ff481SSatish Balay int k; 452*4c1ff481SSatish Balay Scalar *buf1,*buf2; 453*4c1ff481SSatish Balay buf1 = svalues+bs2*startv[j]; 454*4c1ff481SSatish Balay buf2 = stash->array+bs2*i; 455*4c1ff481SSatish Balay for ( k=0; k<bs2; k++ ){ buf1[k] = buf2[k]; } 456a2d1c673SSatish Balay } 457bc5ccf88SSatish Balay sindices[starti[j]] = stash->idx[i]; 458bc5ccf88SSatish Balay sindices[starti[j]+nprocs[j]] = stash->idy[i]; 459bc5ccf88SSatish Balay startv[j]++; 460bc5ccf88SSatish Balay starti[j]++; 461bc5ccf88SSatish Balay } 462bc5ccf88SSatish Balay startv[0] = 0; 463bc5ccf88SSatish Balay for ( i=1; i<size; i++ ) { startv[i] = startv[i-1] + nprocs[i-1];} 464bc5ccf88SSatish Balay for ( i=0,count=0; i<size; i++ ) { 465bc5ccf88SSatish Balay if (procs[i]) { 466a2d1c673SSatish Balay ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nprocs[i],MPIU_SCALAR,i,tag1,comm, 467bc5ccf88SSatish Balay send_waits+count++);CHKERRQ(ierr); 468bc5ccf88SSatish Balay ierr = MPI_Isend(sindices+2*startv[i],2*nprocs[i],MPI_INT,i,tag2,comm, 469bc5ccf88SSatish Balay send_waits+count++);CHKERRQ(ierr); 470bc5ccf88SSatish Balay } 471bc5ccf88SSatish Balay } 472bc5ccf88SSatish Balay PetscFree(owner); 473bc5ccf88SSatish Balay PetscFree(startv); 474a2d1c673SSatish Balay /* This memory is reused in scatter end for a different purpose*/ 475a2d1c673SSatish Balay for (i=0; i<2*size; i++ ) nprocs[i] = -1; 476a2d1c673SSatish Balay stash->nprocs = nprocs; 477a2d1c673SSatish Balay 478bc5ccf88SSatish Balay stash->svalues = svalues; stash->rvalues = rvalues; 479bc5ccf88SSatish Balay stash->nsends = nsends; stash->nrecvs = nreceives; 480bc5ccf88SSatish Balay stash->send_waits = send_waits; stash->recv_waits = recv_waits; 481bc5ccf88SSatish Balay stash->rmax = nmax; 482bc5ccf88SSatish Balay PetscFunctionReturn(0); 483bc5ccf88SSatish Balay } 484bc5ccf88SSatish Balay 485a2d1c673SSatish Balay /* 486*4c1ff481SSatish Balay StashScatterGetMesg_Private - This function waits on the receives posted 487*4c1ff481SSatish Balay in the function StashScatterBegin_Private() and returns one message at 488*4c1ff481SSatish Balay a time to the calling function. If no messages are left, it indicates this 489*4c1ff481SSatish Balay by setting flg = 0, else it sets flg = 1. 490*4c1ff481SSatish Balay 491*4c1ff481SSatish Balay Input Parameters: 492*4c1ff481SSatish Balay stash - the stash 493*4c1ff481SSatish Balay 494*4c1ff481SSatish Balay Output Parameters: 495*4c1ff481SSatish Balay nvals - the number of entries in the current message. 496*4c1ff481SSatish Balay rows - an array of row indices (or blocked indices) corresponding to the values 497*4c1ff481SSatish Balay cols - an array of columnindices (or blocked indices) corresponding to the values 498*4c1ff481SSatish Balay vals - the values 499*4c1ff481SSatish Balay flg - 0 indicates no more message left, and the current call has no values associated. 500*4c1ff481SSatish Balay 1 indicates that the current call successfully received a message, and the 501*4c1ff481SSatish Balay other output parameters nvals,rows,cols,vals are set appropriately. 502a2d1c673SSatish Balay */ 503bc5ccf88SSatish Balay #undef __FUNC__ 504a2d1c673SSatish Balay #define __FUNC__ "StashScatterGetMesg_Private" 505a2d1c673SSatish Balay int StashScatterGetMesg_Private(Stash *stash,int *nvals,int **rows,int** cols,Scalar **vals,int *flg) 506bc5ccf88SSatish Balay { 507a2d1c673SSatish Balay int i,ierr,size=stash->size,*flg_v,*flg_i; 508a2d1c673SSatish Balay int i1,i2,*rindices,match_found=0,bs2; 509a2d1c673SSatish Balay MPI_Status recv_status; 510bc5ccf88SSatish Balay 511bc5ccf88SSatish Balay PetscFunctionBegin; 512bc5ccf88SSatish Balay 513a2d1c673SSatish Balay *flg = 0; /* When a message is discovered this is reset to 1 */ 514a2d1c673SSatish Balay /* Return if no more messages to process */ 515a2d1c673SSatish Balay if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); } 516a2d1c673SSatish Balay 517a2d1c673SSatish Balay flg_v = stash->nprocs; 518a2d1c673SSatish Balay flg_i = flg_v + size; 519*4c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 520a2d1c673SSatish Balay /* If a matching pair of receieves are found, process them, and return the data to 521a2d1c673SSatish Balay the calling function. Until then keep receiving messages */ 522a2d1c673SSatish Balay while (!match_found) { 523a2d1c673SSatish Balay ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr); 524a2d1c673SSatish Balay /* Now pack the received message into a structure which is useable by others */ 525a2d1c673SSatish Balay if (i % 2) { 526a2d1c673SSatish Balay ierr = MPI_Get_count(&recv_status,MPI_INT,nvals);CHKERRQ(ierr); 527a2d1c673SSatish Balay flg_i[recv_status.MPI_SOURCE] = i/2; 528a2d1c673SSatish Balay *nvals = *nvals/2; /* This message has both row indices and col indices */ 529a2d1c673SSatish Balay } else { 530a2d1c673SSatish Balay ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr); 531a2d1c673SSatish Balay flg_v[recv_status.MPI_SOURCE] = i/2; 532a2d1c673SSatish Balay *nvals = *nvals/bs2; 533bc5ccf88SSatish Balay } 534a2d1c673SSatish Balay 535a2d1c673SSatish Balay /* Check if we have both the messages from this proc */ 536a2d1c673SSatish Balay i1 = flg_v[recv_status.MPI_SOURCE]; 537a2d1c673SSatish Balay i2 = flg_i[recv_status.MPI_SOURCE]; 538a2d1c673SSatish Balay if (i1 != -1 && i2 != -1) { 539a2d1c673SSatish Balay rindices = (int *) (stash->rvalues + bs2*stash->rmax*stash->nrecvs); 540a2d1c673SSatish Balay *rows = rindices + 2*i2*stash->rmax; 541a2d1c673SSatish Balay *cols = *rows + *nvals; 542a2d1c673SSatish Balay *vals = stash->rvalues + i1*bs2*stash->rmax; 543a2d1c673SSatish Balay *flg = 1; 544a2d1c673SSatish Balay stash->nprocessed ++; 545a2d1c673SSatish Balay match_found = 1; 546bc5ccf88SSatish Balay } 547bc5ccf88SSatish Balay } 548bc5ccf88SSatish Balay PetscFunctionReturn(0); 549bc5ccf88SSatish Balay } 550