1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER 2*8798bf22SSatish Balay static char vcid[] = "$Id: stash.c,v 1.26 1999/03/17 21:14:34 balay Exp balay $"; 32d5177cdSBarry Smith #endif 42d5177cdSBarry Smith 570f55243SBarry Smith #include "src/mat/matimpl.h" 69417f4adSLois Curfman McInnes 7bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE 10000 84c1ff481SSatish Balay 99417f4adSLois Curfman McInnes /* 10*8798bf22SSatish Balay MatStashCreate_Private - Creates a stash ,currently used for all the parallel 114c1ff481SSatish Balay matrix implementations. The stash is where elements of a matrix destined 124c1ff481SSatish Balay to be stored on other processors are kept until matrix assembly is done. 139417f4adSLois Curfman McInnes 144c1ff481SSatish Balay This is a simple minded stash. Simply adds entries to end of stash. 154c1ff481SSatish Balay 164c1ff481SSatish Balay Input Parameters: 174c1ff481SSatish Balay comm - communicator, required for scatters. 184c1ff481SSatish Balay bs - stash block size. used when stashing blocks of values 194c1ff481SSatish Balay 204c1ff481SSatish Balay Output Parameters: 214c1ff481SSatish Balay stash - the newly created stash 229417f4adSLois Curfman McInnes */ 235615d1e5SSatish Balay #undef __FUNC__ 24*8798bf22SSatish Balay #define __FUNC__ "MatStashCreate_Private" 25*8798bf22SSatish Balay int MatStashCreate_Private(MPI_Comm comm,int bs, MatStash *stash) 269417f4adSLois Curfman McInnes { 274c1ff481SSatish Balay int ierr,flg,max=DEFAULT_STASH_SIZE/(bs*bs); 28bc5ccf88SSatish Balay 293a40ed3dSBarry Smith PetscFunctionBegin; 30bc5ccf88SSatish Balay /* Require 2 tags, get the second using PetscCommGetNewTag() */ 31bc5ccf88SSatish Balay ierr = PetscCommDuplicate_Private(comm,&stash->comm,&stash->tag1);CHKERRQ(ierr); 32a2d1c673SSatish Balay ierr = PetscCommGetNewTag(stash->comm,&stash->tag2); CHKERRQ(ierr); 33*8798bf22SSatish Balay ierr = OptionsGetInt(PETSC_NULL,"-matstash_initial_size",&max,&flg);CHKERRQ(ierr); 34*8798bf22SSatish Balay ierr = MatStashSetInitialSize_Private(stash,max); CHKERRQ(ierr); 35a2d1c673SSatish Balay ierr = MPI_Comm_size(stash->comm,&stash->size); CHKERRQ(ierr); 36a2d1c673SSatish Balay ierr = MPI_Comm_rank(stash->comm,&stash->rank); CHKERRQ(ierr); 37bc5ccf88SSatish Balay 384c1ff481SSatish Balay if (bs <= 0) bs = 1; 39a2d1c673SSatish Balay 404c1ff481SSatish Balay stash->bs = bs; 419417f4adSLois Curfman McInnes stash->nmax = 0; 429417f4adSLois Curfman McInnes stash->n = 0; 434c1ff481SSatish Balay stash->reallocs = -1; 449417f4adSLois Curfman McInnes stash->idx = 0; 459417f4adSLois Curfman McInnes stash->idy = 0; 46bc5ccf88SSatish Balay stash->array = 0; 479417f4adSLois Curfman McInnes 48bc5ccf88SSatish Balay stash->send_waits = 0; 49bc5ccf88SSatish Balay stash->recv_waits = 0; 50a2d1c673SSatish Balay stash->send_status = 0; 51bc5ccf88SSatish Balay stash->nsends = 0; 52bc5ccf88SSatish Balay stash->nrecvs = 0; 53bc5ccf88SSatish Balay stash->svalues = 0; 54bc5ccf88SSatish Balay stash->rvalues = 0; 55bc5ccf88SSatish Balay stash->rmax = 0; 56a2d1c673SSatish Balay stash->nprocs = 0; 57a2d1c673SSatish Balay stash->nprocessed = 0; 583a40ed3dSBarry Smith PetscFunctionReturn(0); 599417f4adSLois Curfman McInnes } 609417f4adSLois Curfman McInnes 614c1ff481SSatish Balay /* 62*8798bf22SSatish Balay MatStashDestroy_Private - Destroy the stash 634c1ff481SSatish Balay */ 645615d1e5SSatish Balay #undef __FUNC__ 65*8798bf22SSatish Balay #define __FUNC__ "MatStashDestroy_Private" 66*8798bf22SSatish Balay int MatStashDestroy_Private(MatStash *stash) 679417f4adSLois Curfman McInnes { 68bc5ccf88SSatish Balay int ierr; 69a2d1c673SSatish Balay 70bc5ccf88SSatish Balay PetscFunctionBegin; 71bc5ccf88SSatish Balay ierr = PetscCommDestroy_Private(&stash->comm); CHKERRQ(ierr); 72bc5ccf88SSatish Balay if (stash->array) {PetscFree(stash->array); stash->array = 0;} 73bc5ccf88SSatish Balay PetscFunctionReturn(0); 74bc5ccf88SSatish Balay } 75bc5ccf88SSatish Balay 764c1ff481SSatish Balay /* 77*8798bf22SSatish Balay MatStashScatterEnd_Private - This is called as the fial stage of 784c1ff481SSatish Balay scatter. The final stages of messagepassing is done here, and 794c1ff481SSatish Balay all the memory used for messagepassing is cleanedu up. This 804c1ff481SSatish Balay routine also resets the stash, and deallocates the memory used 814c1ff481SSatish Balay for the stash. It also keeps track of the current memory usage 824c1ff481SSatish Balay so that the same value can be used the next time through. 834c1ff481SSatish Balay */ 84bc5ccf88SSatish Balay #undef __FUNC__ 85*8798bf22SSatish Balay #define __FUNC__ "MatStashScatterEnd_Private" 86*8798bf22SSatish Balay int MatStashScatterEnd_Private(MatStash *stash) 87bc5ccf88SSatish Balay { 88a2d1c673SSatish Balay int nsends=stash->nsends,ierr; 89a2d1c673SSatish Balay MPI_Status *send_status; 90a2d1c673SSatish Balay 913a40ed3dSBarry Smith PetscFunctionBegin; 92a2d1c673SSatish Balay /* wait on sends */ 93a2d1c673SSatish Balay if (nsends) { 94a2d1c673SSatish Balay send_status = (MPI_Status *)PetscMalloc(2*nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 95a2d1c673SSatish Balay ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr); 96a2d1c673SSatish Balay PetscFree(send_status); 97a2d1c673SSatish Balay } 98a2d1c673SSatish Balay 99c0c58ca7SSatish Balay /* Now update nmaxold to be app 10% more than max n used, this way the 100d07ff455SSatish Balay wastage of space is reduced the next time this stash is used */ 101c0c58ca7SSatish Balay stash->oldnmax = (int)(stash->n * 1.1) + 5; 102d07ff455SSatish Balay stash->nmax = 0; 103d07ff455SSatish Balay stash->n = 0; 1044c1ff481SSatish Balay stash->reallocs = -1; 105bc5ccf88SSatish Balay stash->rmax = 0; 106a2d1c673SSatish Balay stash->nprocessed = 0; 107bc5ccf88SSatish Balay 108bc5ccf88SSatish Balay if (stash->array) { 109bc5ccf88SSatish Balay PetscFree(stash->array); 110bc5ccf88SSatish Balay stash->array = 0; 111bc5ccf88SSatish Balay stash->idx = 0; 112bc5ccf88SSatish Balay stash->idy = 0; 113bc5ccf88SSatish Balay } 114bc5ccf88SSatish Balay if (stash->send_waits) {PetscFree(stash->send_waits);stash->send_waits = 0;} 115bc5ccf88SSatish Balay if (stash->recv_waits) {PetscFree(stash->recv_waits);stash->recv_waits = 0;} 116bc5ccf88SSatish Balay if (stash->svalues) {PetscFree(stash->svalues);stash->svalues = 0;} 117bc5ccf88SSatish Balay if (stash->rvalues) {PetscFree(stash->rvalues); stash->rvalues = 0;} 118a2d1c673SSatish Balay if (stash->nprocs) {PetscFree(stash->nprocs); stash->nprocs = 0;} 119bc5ccf88SSatish Balay 1203a40ed3dSBarry Smith PetscFunctionReturn(0); 1219417f4adSLois Curfman McInnes } 1229417f4adSLois Curfman McInnes 1234c1ff481SSatish Balay /* 124*8798bf22SSatish Balay MatStashGetInfo_Private - Gets the relavant statistics of the stash 1254c1ff481SSatish Balay 1264c1ff481SSatish Balay Input Parameters: 1274c1ff481SSatish Balay stash - the stash 1284c1ff481SSatish Balay nstash - the size of the stash 1294c1ff481SSatish Balay reallocs - the number of additional mallocs incurred. 1304c1ff481SSatish Balay 1314c1ff481SSatish Balay */ 1325615d1e5SSatish Balay #undef __FUNC__ 133*8798bf22SSatish Balay #define __FUNC__ "MatStashGetInfo_Private" 134*8798bf22SSatish Balay int MatStashGetInfo_Private(MatStash *stash,int *nstash, int *reallocs) 13597530c3fSBarry Smith { 1363a40ed3dSBarry Smith PetscFunctionBegin; 1374c1ff481SSatish Balay *nstash = stash->n; 1384c1ff481SSatish Balay *reallocs = stash->reallocs; 139bc5ccf88SSatish Balay PetscFunctionReturn(0); 140bc5ccf88SSatish Balay } 1414c1ff481SSatish Balay 1424c1ff481SSatish Balay 1434c1ff481SSatish Balay /* 144*8798bf22SSatish Balay MatStashSetInitialSize_Private - Sets the initial size of the stash 1454c1ff481SSatish Balay 1464c1ff481SSatish Balay Input Parameters: 1474c1ff481SSatish Balay stash - the stash 1484c1ff481SSatish Balay max - the value that is used as the max size of the stash. 1494c1ff481SSatish Balay this value is used while allocating memory. 1504c1ff481SSatish Balay */ 151bc5ccf88SSatish Balay #undef __FUNC__ 152*8798bf22SSatish Balay #define __FUNC__ "MatStashSetInitialSize_Private" 153*8798bf22SSatish Balay int MatStashSetInitialSize_Private(MatStash *stash,int max) 154bc5ccf88SSatish Balay { 155bc5ccf88SSatish Balay PetscFunctionBegin; 156bc5ccf88SSatish Balay stash->oldnmax = max; 157bc5ccf88SSatish Balay stash->nmax = 0; 1583a40ed3dSBarry Smith PetscFunctionReturn(0); 15997530c3fSBarry Smith } 16097530c3fSBarry Smith 161*8798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called 1624c1ff481SSatish Balay when the space in the stash is not sufficient to add the new values 1634c1ff481SSatish Balay being inserted into the stash. 1644c1ff481SSatish Balay 1654c1ff481SSatish Balay Input Parameters: 1664c1ff481SSatish Balay stash - the stash 1674c1ff481SSatish Balay incr - the minimum increase requested 1684c1ff481SSatish Balay 1694c1ff481SSatish Balay Notes: 1704c1ff481SSatish Balay This routine doubles the currently used memory. 1714c1ff481SSatish Balay */ 1725615d1e5SSatish Balay #undef __FUNC__ 173*8798bf22SSatish Balay #define __FUNC__ "MatStashExpand_Private" 174*8798bf22SSatish Balay static int MatStashExpand_Private(MatStash *stash,int incr) 1759417f4adSLois Curfman McInnes { 176a2d1c673SSatish Balay int *n_idx,*n_idy,newnmax,bs2; 177bc5ccf88SSatish Balay Scalar *n_array; 1789417f4adSLois Curfman McInnes 1793a40ed3dSBarry Smith PetscFunctionBegin; 1809417f4adSLois Curfman McInnes /* allocate a larger stash */ 181d07ff455SSatish Balay if (stash->nmax == 0) newnmax = stash->oldnmax; 182d07ff455SSatish Balay else newnmax = stash->nmax*2; 1834c1ff481SSatish Balay if (newnmax < (stash->nmax + incr)) newnmax += 2*incr; 184d07ff455SSatish Balay 1854c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 186a2d1c673SSatish Balay n_array = (Scalar *)PetscMalloc((newnmax)*(2*sizeof(int)+bs2*sizeof(Scalar)));CHKPTRQ(n_array); 187a2d1c673SSatish Balay n_idx = (int *) (n_array + bs2*newnmax); 188d07ff455SSatish Balay n_idy = (int *) (n_idx + newnmax); 189a2d1c673SSatish Balay PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(Scalar)); 190416022c9SBarry Smith PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(int)); 191416022c9SBarry Smith PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(int)); 1920452661fSBarry Smith if (stash->array) PetscFree(stash->array); 193d07ff455SSatish Balay stash->array = n_array; 194d07ff455SSatish Balay stash->idx = n_idx; 195d07ff455SSatish Balay stash->idy = n_idy; 196d07ff455SSatish Balay stash->nmax = newnmax; 197d07ff455SSatish Balay stash->oldnmax = newnmax; 198bc5ccf88SSatish Balay stash->reallocs++; 199bc5ccf88SSatish Balay PetscFunctionReturn(0); 200bc5ccf88SSatish Balay } 201bc5ccf88SSatish Balay /* 202*8798bf22SSatish Balay MatStashValuesRow_Private - inserts values into the stash. This function 2034c1ff481SSatish Balay expects the values to be roworiented. Multiple columns belong to the same row 2044c1ff481SSatish Balay can be inserted with a single call to this function. 2054c1ff481SSatish Balay 2064c1ff481SSatish Balay Input Parameters: 2074c1ff481SSatish Balay stash - the stash 2084c1ff481SSatish Balay row - the global row correspoiding to the values 2094c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2104c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2114c1ff481SSatish Balay values - the values inserted 212bc5ccf88SSatish Balay */ 213bc5ccf88SSatish Balay #undef __FUNC__ 214*8798bf22SSatish Balay #define __FUNC__ "MatStashValuesRow_Private" 215*8798bf22SSatish Balay int MatStashValuesRow_Private(MatStash *stash,int row,int n, int *idxn,Scalar *values) 216bc5ccf88SSatish Balay { 217a2d1c673SSatish Balay int ierr,i; 218bc5ccf88SSatish Balay 219bc5ccf88SSatish Balay PetscFunctionBegin; 2204c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 2214c1ff481SSatish Balay if ((stash->n + n) > stash->nmax) { 222*8798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n); CHKERRQ(ierr); 2239417f4adSLois Curfman McInnes } 2244c1ff481SSatish Balay for ( i=0; i<n; i++ ) { 2259417f4adSLois Curfman McInnes stash->idx[stash->n] = row; 226a2d1c673SSatish Balay stash->idy[stash->n] = idxn[i]; 227a2d1c673SSatish Balay stash->array[stash->n] = values[i]; 228a2d1c673SSatish Balay stash->n++; 2299417f4adSLois Curfman McInnes } 230a2d1c673SSatish Balay PetscFunctionReturn(0); 231a2d1c673SSatish Balay } 2324c1ff481SSatish Balay /* 233*8798bf22SSatish Balay MatStashValuesCol_Private - inserts values into the stash. This function 2344c1ff481SSatish Balay expects the values to be columnoriented. Multiple columns belong to the same row 2354c1ff481SSatish Balay can be inserted with a single call to this function. 236a2d1c673SSatish Balay 2374c1ff481SSatish Balay Input Parameters: 2384c1ff481SSatish Balay stash - the stash 2394c1ff481SSatish Balay row - the global row correspoiding to the values 2404c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2414c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2424c1ff481SSatish Balay values - the values inserted 2434c1ff481SSatish Balay stepval - the consecutive values are sepated by a distance of stepval. 2444c1ff481SSatish Balay this happens because the input is columnoriented. 2454c1ff481SSatish Balay */ 246a2d1c673SSatish Balay #undef __FUNC__ 247*8798bf22SSatish Balay #define __FUNC__ "MatStashValuesCol_Private" 248*8798bf22SSatish Balay int MatStashValuesCol_Private(MatStash *stash,int row,int n, int *idxn, 2494c1ff481SSatish Balay Scalar *values,int stepval) 250a2d1c673SSatish Balay { 2514c1ff481SSatish Balay int ierr,i; 252a2d1c673SSatish Balay 2534c1ff481SSatish Balay PetscFunctionBegin; 2544c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 2554c1ff481SSatish Balay if ((stash->n + n) > stash->nmax) { 256*8798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n); CHKERRQ(ierr); 2574c1ff481SSatish Balay } 2584c1ff481SSatish Balay for ( i=0; i<n; i++ ) { 2594c1ff481SSatish Balay stash->idx[stash->n] = row; 2604c1ff481SSatish Balay stash->idy[stash->n] = idxn[i]; 2614c1ff481SSatish Balay stash->array[stash->n] = values[i*stepval]; 2624c1ff481SSatish Balay stash->n++; 2634c1ff481SSatish Balay } 2644c1ff481SSatish Balay PetscFunctionReturn(0); 2654c1ff481SSatish Balay } 2664c1ff481SSatish Balay 2674c1ff481SSatish Balay /* 268*8798bf22SSatish Balay MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 2694c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 2704c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 2714c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 2724c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 2734c1ff481SSatish Balay 2744c1ff481SSatish Balay Input Parameters: 2754c1ff481SSatish Balay stash - the stash 2764c1ff481SSatish Balay row - the global block-row correspoiding to the values 2774c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2784c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 2794c1ff481SSatish Balay values. Each block is of size bs*bs. 2804c1ff481SSatish Balay values - the values inserted 2814c1ff481SSatish Balay rmax - the number of block-rows in the original block. 2824c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 2834c1ff481SSatish Balay idx - the index of the current block-row in the original block. 2844c1ff481SSatish Balay */ 2854c1ff481SSatish Balay #undef __FUNC__ 286*8798bf22SSatish Balay #define __FUNC__ "MatStashValuesRowBlocked_Private" 287*8798bf22SSatish Balay int MatStashValuesRowBlocked_Private(MatStash *stash,int row,int n,int *idxn,Scalar *values, 2884c1ff481SSatish Balay int rmax,int cmax,int idx) 2894c1ff481SSatish Balay { 2904c1ff481SSatish Balay int ierr,i,j,k,bs2,bs=stash->bs; 2914c1ff481SSatish Balay Scalar *vals,*array; 292a2d1c673SSatish Balay 293a2d1c673SSatish Balay PetscFunctionBegin; 294a2d1c673SSatish Balay bs2 = bs*bs; 2954c1ff481SSatish Balay if ((stash->n+n) > stash->nmax) { 296*8798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n); CHKERRQ(ierr); 297a2d1c673SSatish Balay } 2984c1ff481SSatish Balay for ( i=0; i<n; i++ ) { 299a2d1c673SSatish Balay stash->idx[stash->n] = row; 300a2d1c673SSatish Balay stash->idy[stash->n] = idxn[i]; 301a2d1c673SSatish Balay /* Now copy over the block of values. Store the values column oriented. 302a2d1c673SSatish Balay This enables inserting multiple blocks belonging to a row with a single 303a2d1c673SSatish Balay funtion call */ 304a2d1c673SSatish Balay array = stash->array + bs2*stash->n; 305a2d1c673SSatish Balay vals = values + idx*bs2*n + bs*i; 306a2d1c673SSatish Balay for ( j=0; j<bs; j++ ) { 307a2d1c673SSatish Balay for ( k=0; k<bs; k++ ) {array[k*bs] = vals[k];} 308a2d1c673SSatish Balay array += 1; 309a2d1c673SSatish Balay vals += cmax*bs; 310a2d1c673SSatish Balay } 3114c1ff481SSatish Balay stash->n++; 3124c1ff481SSatish Balay } 3134c1ff481SSatish Balay PetscFunctionReturn(0); 3144c1ff481SSatish Balay } 3154c1ff481SSatish Balay 3164c1ff481SSatish Balay /* 317*8798bf22SSatish Balay MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 3184c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 3194c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3204c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3214c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3224c1ff481SSatish Balay 3234c1ff481SSatish Balay Input Parameters: 3244c1ff481SSatish Balay stash - the stash 3254c1ff481SSatish Balay row - the global block-row correspoiding to the values 3264c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3274c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3284c1ff481SSatish Balay values. Each block is of size bs*bs. 3294c1ff481SSatish Balay values - the values inserted 3304c1ff481SSatish Balay rmax - the number of block-rows in the original block. 3314c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 3324c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3334c1ff481SSatish Balay */ 3344c1ff481SSatish Balay #undef __FUNC__ 335*8798bf22SSatish Balay #define __FUNC__ "MatStashValuesColBlocked_Private" 336*8798bf22SSatish Balay int MatStashValuesColBlocked_Private(MatStash *stash,int row,int n,int *idxn, 3374c1ff481SSatish Balay Scalar *values,int rmax,int cmax,int idx) 3384c1ff481SSatish Balay { 3394c1ff481SSatish Balay int ierr,i,j,k,bs2,bs=stash->bs; 3404c1ff481SSatish Balay Scalar *vals,*array; 3414c1ff481SSatish Balay 3424c1ff481SSatish Balay PetscFunctionBegin; 3434c1ff481SSatish Balay bs2 = bs*bs; 3444c1ff481SSatish Balay if ((stash->n+n) > stash->nmax) { 345*8798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n); CHKERRQ(ierr); 3464c1ff481SSatish Balay } 3474c1ff481SSatish Balay for ( i=0; i<n; i++ ) { 3484c1ff481SSatish Balay stash->idx[stash->n] = row; 3494c1ff481SSatish Balay stash->idy[stash->n] = idxn[i]; 3504c1ff481SSatish Balay /* Now copy over the block of values. Store the values column oriented. 3514c1ff481SSatish Balay This enables inserting multiple blocks belonging to a row with a single 3524c1ff481SSatish Balay funtion call */ 353a2d1c673SSatish Balay array = stash->array + bs2*stash->n; 354a2d1c673SSatish Balay vals = values + idx*bs + bs2*rmax*i; 355a2d1c673SSatish Balay for ( j=0; j<bs; j++ ) { 356a2d1c673SSatish Balay for ( k=0; k<bs; k++ ) {array[k] = vals[k];} 357a2d1c673SSatish Balay array += bs; 358a2d1c673SSatish Balay vals += rmax*bs; 359a2d1c673SSatish Balay } 360a2d1c673SSatish Balay stash->n++; 3619417f4adSLois Curfman McInnes } 3623a40ed3dSBarry Smith PetscFunctionReturn(0); 3639417f4adSLois Curfman McInnes } 3644c1ff481SSatish Balay /* 365*8798bf22SSatish Balay MatStashScatterBegin_Private - Initiates the transfer of values to the 3664c1ff481SSatish Balay correct owners. This function goes through the stash, and check the 3674c1ff481SSatish Balay owners of each stashed value, and sends the values off to the owner 3684c1ff481SSatish Balay processors. 369bc5ccf88SSatish Balay 3704c1ff481SSatish Balay Input Parameters: 3714c1ff481SSatish Balay stash - the stash 3724c1ff481SSatish Balay owners - an array of size 'no-of-procs' which gives the ownership range 3734c1ff481SSatish Balay for each node. 3744c1ff481SSatish Balay 3754c1ff481SSatish Balay Notes: The 'owners' array in the cased of the blocked-stash has the 3764c1ff481SSatish Balay ranges specified blocked global indices, and for the regular stash in 3774c1ff481SSatish Balay the proper global indices. 3784c1ff481SSatish Balay */ 379bc5ccf88SSatish Balay #undef __FUNC__ 380*8798bf22SSatish Balay #define __FUNC__ "MatStashScatterBegin_Private" 381*8798bf22SSatish Balay int MatStashScatterBegin_Private(MatStash *stash,int *owners) 382bc5ccf88SSatish Balay { 383a2d1c673SSatish Balay int *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 384a2d1c673SSatish Balay int rank=stash->rank,size=stash->size,*nprocs,*procs,nsends,nreceives; 3854c1ff481SSatish Balay int nmax,*work,count,ierr,*sindices,*rindices,i,j,idx; 386a2d1c673SSatish Balay Scalar *rvalues,*svalues; 387bc5ccf88SSatish Balay MPI_Comm comm = stash->comm; 388bc5ccf88SSatish Balay MPI_Request *send_waits,*recv_waits; 389bc5ccf88SSatish Balay 390bc5ccf88SSatish Balay PetscFunctionBegin; 391bc5ccf88SSatish Balay 3924c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 393bc5ccf88SSatish Balay /* first count number of contributors to each processor */ 394bc5ccf88SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 395bc5ccf88SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 396bc5ccf88SSatish Balay owner = (int *) PetscMalloc( (stash->n+1)*sizeof(int) ); CHKPTRQ(owner); 397a2d1c673SSatish Balay 398bc5ccf88SSatish Balay for ( i=0; i<stash->n; i++ ) { 399bc5ccf88SSatish Balay idx = stash->idx[i]; 400bc5ccf88SSatish Balay for ( j=0; j<size; j++ ) { 4014c1ff481SSatish Balay if (idx >= owners[j] && idx < owners[j+1]) { 402bc5ccf88SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; break; 403bc5ccf88SSatish Balay } 404bc5ccf88SSatish Balay } 405bc5ccf88SSatish Balay } 406bc5ccf88SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 407bc5ccf88SSatish Balay 408bc5ccf88SSatish Balay /* inform other processors of number of messages and max length*/ 409bc5ccf88SSatish Balay work = (int *)PetscMalloc(size*sizeof(int)); CHKPTRQ(work); 410bc5ccf88SSatish Balay ierr = MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 411bc5ccf88SSatish Balay nreceives = work[rank]; 412bc5ccf88SSatish Balay ierr = MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 413bc5ccf88SSatish Balay nmax = work[rank]; 414bc5ccf88SSatish Balay PetscFree(work); 415bc5ccf88SSatish Balay /* post receives: 416bc5ccf88SSatish Balay since we don't know how long each individual message is we 417bc5ccf88SSatish Balay allocate the largest needed buffer for each receive. Potentially 418bc5ccf88SSatish Balay this is a lot of wasted space. 419bc5ccf88SSatish Balay */ 420a2d1c673SSatish Balay rvalues = (Scalar *)PetscMalloc((nreceives+1)*(nmax+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(rvalues); 421a2d1c673SSatish Balay rindices = (int *) (rvalues + bs2*nreceives*nmax); 422a2d1c673SSatish Balay recv_waits = (MPI_Request *)PetscMalloc((nreceives+1)*2*sizeof(MPI_Request));CHKPTRQ(recv_waits); 423bc5ccf88SSatish Balay for ( i=0,count=0; i<nreceives; i++ ) { 424a2d1c673SSatish Balay ierr = MPI_Irecv(rvalues+bs2*nmax*i,bs2*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag1,comm, 425bc5ccf88SSatish Balay recv_waits+count++); CHKERRQ(ierr); 426bc5ccf88SSatish Balay ierr = MPI_Irecv(rindices+2*nmax*i,2*nmax,MPI_INT,MPI_ANY_SOURCE,tag2,comm, 427bc5ccf88SSatish Balay recv_waits+count++); CHKERRQ(ierr); 428bc5ccf88SSatish Balay } 429bc5ccf88SSatish Balay 430bc5ccf88SSatish Balay /* do sends: 431bc5ccf88SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 432bc5ccf88SSatish Balay the ith processor 433bc5ccf88SSatish Balay */ 434a2d1c673SSatish Balay svalues = (Scalar *)PetscMalloc((stash->n+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(svalues); 435a2d1c673SSatish Balay sindices = (int *) (svalues + bs2*stash->n); 436bc5ccf88SSatish Balay send_waits = (MPI_Request *) PetscMalloc(2*(nsends+1)*sizeof(MPI_Request)); 437bc5ccf88SSatish Balay CHKPTRQ(send_waits); 438bc5ccf88SSatish Balay startv = (int *) PetscMalloc(2*size*sizeof(int) ); CHKPTRQ(startv); 439bc5ccf88SSatish Balay starti = startv + size; 440a2d1c673SSatish Balay /* use 2 sends the first with all_a, the next with all_i and all_j */ 441bc5ccf88SSatish Balay startv[0] = 0; starti[0] = 0; 442bc5ccf88SSatish Balay for ( i=1; i<size; i++ ) { 443bc5ccf88SSatish Balay startv[i] = startv[i-1] + nprocs[i-1]; 444bc5ccf88SSatish Balay starti[i] = starti[i-1] + nprocs[i-1]*2; 445bc5ccf88SSatish Balay } 446bc5ccf88SSatish Balay for ( i=0; i<stash->n; i++ ) { 447bc5ccf88SSatish Balay j = owner[i]; 448a2d1c673SSatish Balay if (bs2 == 1) { 449bc5ccf88SSatish Balay svalues[startv[j]] = stash->array[i]; 450a2d1c673SSatish Balay } else { 4514c1ff481SSatish Balay int k; 4524c1ff481SSatish Balay Scalar *buf1,*buf2; 4534c1ff481SSatish Balay buf1 = svalues+bs2*startv[j]; 4544c1ff481SSatish Balay buf2 = stash->array+bs2*i; 4554c1ff481SSatish Balay for ( k=0; k<bs2; k++ ){ buf1[k] = buf2[k]; } 456a2d1c673SSatish Balay } 457bc5ccf88SSatish Balay sindices[starti[j]] = stash->idx[i]; 458bc5ccf88SSatish Balay sindices[starti[j]+nprocs[j]] = stash->idy[i]; 459bc5ccf88SSatish Balay startv[j]++; 460bc5ccf88SSatish Balay starti[j]++; 461bc5ccf88SSatish Balay } 462bc5ccf88SSatish Balay startv[0] = 0; 463bc5ccf88SSatish Balay for ( i=1; i<size; i++ ) { startv[i] = startv[i-1] + nprocs[i-1];} 464bc5ccf88SSatish Balay for ( i=0,count=0; i<size; i++ ) { 465bc5ccf88SSatish Balay if (procs[i]) { 466a2d1c673SSatish Balay ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nprocs[i],MPIU_SCALAR,i,tag1,comm, 467bc5ccf88SSatish Balay send_waits+count++);CHKERRQ(ierr); 468bc5ccf88SSatish Balay ierr = MPI_Isend(sindices+2*startv[i],2*nprocs[i],MPI_INT,i,tag2,comm, 469bc5ccf88SSatish Balay send_waits+count++);CHKERRQ(ierr); 470bc5ccf88SSatish Balay } 471bc5ccf88SSatish Balay } 472bc5ccf88SSatish Balay PetscFree(owner); 473bc5ccf88SSatish Balay PetscFree(startv); 474a2d1c673SSatish Balay /* This memory is reused in scatter end for a different purpose*/ 475a2d1c673SSatish Balay for (i=0; i<2*size; i++ ) nprocs[i] = -1; 476a2d1c673SSatish Balay stash->nprocs = nprocs; 477a2d1c673SSatish Balay 478bc5ccf88SSatish Balay stash->svalues = svalues; stash->rvalues = rvalues; 479bc5ccf88SSatish Balay stash->nsends = nsends; stash->nrecvs = nreceives; 480bc5ccf88SSatish Balay stash->send_waits = send_waits; stash->recv_waits = recv_waits; 481bc5ccf88SSatish Balay stash->rmax = nmax; 482bc5ccf88SSatish Balay PetscFunctionReturn(0); 483bc5ccf88SSatish Balay } 484bc5ccf88SSatish Balay 485a2d1c673SSatish Balay /* 486*8798bf22SSatish Balay MatStashScatterGetMesg_Private - This function waits on the receives posted 487*8798bf22SSatish Balay in the function MatStashScatterBegin_Private() and returns one message at 4884c1ff481SSatish Balay a time to the calling function. If no messages are left, it indicates this 4894c1ff481SSatish Balay by setting flg = 0, else it sets flg = 1. 4904c1ff481SSatish Balay 4914c1ff481SSatish Balay Input Parameters: 4924c1ff481SSatish Balay stash - the stash 4934c1ff481SSatish Balay 4944c1ff481SSatish Balay Output Parameters: 4954c1ff481SSatish Balay nvals - the number of entries in the current message. 4964c1ff481SSatish Balay rows - an array of row indices (or blocked indices) corresponding to the values 4974c1ff481SSatish Balay cols - an array of columnindices (or blocked indices) corresponding to the values 4984c1ff481SSatish Balay vals - the values 4994c1ff481SSatish Balay flg - 0 indicates no more message left, and the current call has no values associated. 5004c1ff481SSatish Balay 1 indicates that the current call successfully received a message, and the 5014c1ff481SSatish Balay other output parameters nvals,rows,cols,vals are set appropriately. 502a2d1c673SSatish Balay */ 503bc5ccf88SSatish Balay #undef __FUNC__ 504*8798bf22SSatish Balay #define __FUNC__ "MatStashScatterGetMesg_Private" 505*8798bf22SSatish Balay int MatStashScatterGetMesg_Private(MatStash *stash,int *nvals,int **rows,int** cols,Scalar **vals,int *flg) 506bc5ccf88SSatish Balay { 507a2d1c673SSatish Balay int i,ierr,size=stash->size,*flg_v,*flg_i; 508a2d1c673SSatish Balay int i1,i2,*rindices,match_found=0,bs2; 509a2d1c673SSatish Balay MPI_Status recv_status; 510bc5ccf88SSatish Balay 511bc5ccf88SSatish Balay PetscFunctionBegin; 512bc5ccf88SSatish Balay 513a2d1c673SSatish Balay *flg = 0; /* When a message is discovered this is reset to 1 */ 514a2d1c673SSatish Balay /* Return if no more messages to process */ 515a2d1c673SSatish Balay if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); } 516a2d1c673SSatish Balay 517a2d1c673SSatish Balay flg_v = stash->nprocs; 518a2d1c673SSatish Balay flg_i = flg_v + size; 5194c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 520a2d1c673SSatish Balay /* If a matching pair of receieves are found, process them, and return the data to 521a2d1c673SSatish Balay the calling function. Until then keep receiving messages */ 522a2d1c673SSatish Balay while (!match_found) { 523a2d1c673SSatish Balay ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr); 524a2d1c673SSatish Balay /* Now pack the received message into a structure which is useable by others */ 525a2d1c673SSatish Balay if (i % 2) { 526a2d1c673SSatish Balay ierr = MPI_Get_count(&recv_status,MPI_INT,nvals);CHKERRQ(ierr); 527a2d1c673SSatish Balay flg_i[recv_status.MPI_SOURCE] = i/2; 528a2d1c673SSatish Balay *nvals = *nvals/2; /* This message has both row indices and col indices */ 529a2d1c673SSatish Balay } else { 530a2d1c673SSatish Balay ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr); 531a2d1c673SSatish Balay flg_v[recv_status.MPI_SOURCE] = i/2; 532a2d1c673SSatish Balay *nvals = *nvals/bs2; 533bc5ccf88SSatish Balay } 534a2d1c673SSatish Balay 535a2d1c673SSatish Balay /* Check if we have both the messages from this proc */ 536a2d1c673SSatish Balay i1 = flg_v[recv_status.MPI_SOURCE]; 537a2d1c673SSatish Balay i2 = flg_i[recv_status.MPI_SOURCE]; 538a2d1c673SSatish Balay if (i1 != -1 && i2 != -1) { 539a2d1c673SSatish Balay rindices = (int *) (stash->rvalues + bs2*stash->rmax*stash->nrecvs); 540a2d1c673SSatish Balay *rows = rindices + 2*i2*stash->rmax; 541a2d1c673SSatish Balay *cols = *rows + *nvals; 542a2d1c673SSatish Balay *vals = stash->rvalues + i1*bs2*stash->rmax; 543a2d1c673SSatish Balay *flg = 1; 544a2d1c673SSatish Balay stash->nprocessed ++; 545a2d1c673SSatish Balay match_found = 1; 546bc5ccf88SSatish Balay } 547bc5ccf88SSatish Balay } 548bc5ccf88SSatish Balay PetscFunctionReturn(0); 549bc5ccf88SSatish Balay } 550