1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER 2*94b769a5SSatish Balay static char vcid[] = "$Id: matstash.c,v 1.27 1999/03/18 00:33:52 balay Exp balay $"; 32d5177cdSBarry Smith #endif 42d5177cdSBarry Smith 570f55243SBarry Smith #include "src/mat/matimpl.h" 69417f4adSLois Curfman McInnes 7bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE 10000 84c1ff481SSatish Balay 99417f4adSLois Curfman McInnes /* 108798bf22SSatish Balay MatStashCreate_Private - Creates a stash ,currently used for all the parallel 114c1ff481SSatish Balay matrix implementations. The stash is where elements of a matrix destined 124c1ff481SSatish Balay to be stored on other processors are kept until matrix assembly is done. 139417f4adSLois Curfman McInnes 144c1ff481SSatish Balay This is a simple minded stash. Simply adds entries to end of stash. 154c1ff481SSatish Balay 164c1ff481SSatish Balay Input Parameters: 174c1ff481SSatish Balay comm - communicator, required for scatters. 184c1ff481SSatish Balay bs - stash block size. used when stashing blocks of values 194c1ff481SSatish Balay 204c1ff481SSatish Balay Output Parameters: 214c1ff481SSatish Balay stash - the newly created stash 229417f4adSLois Curfman McInnes */ 235615d1e5SSatish Balay #undef __FUNC__ 248798bf22SSatish Balay #define __FUNC__ "MatStashCreate_Private" 258798bf22SSatish Balay int MatStashCreate_Private(MPI_Comm comm,int bs, MatStash *stash) 269417f4adSLois Curfman McInnes { 27*94b769a5SSatish Balay int ierr,flg,max=DEFAULT_STASH_SIZE; 28bc5ccf88SSatish Balay 293a40ed3dSBarry Smith PetscFunctionBegin; 30bc5ccf88SSatish Balay /* Require 2 tags, get the second using PetscCommGetNewTag() */ 31bc5ccf88SSatish Balay ierr = PetscCommDuplicate_Private(comm,&stash->comm,&stash->tag1);CHKERRQ(ierr); 32a2d1c673SSatish Balay ierr = PetscCommGetNewTag(stash->comm,&stash->tag2); CHKERRQ(ierr); 338798bf22SSatish Balay ierr = OptionsGetInt(PETSC_NULL,"-matstash_initial_size",&max,&flg);CHKERRQ(ierr); 348798bf22SSatish Balay ierr = MatStashSetInitialSize_Private(stash,max); CHKERRQ(ierr); 35a2d1c673SSatish Balay ierr = MPI_Comm_size(stash->comm,&stash->size); CHKERRQ(ierr); 36a2d1c673SSatish Balay ierr = MPI_Comm_rank(stash->comm,&stash->rank); CHKERRQ(ierr); 37bc5ccf88SSatish Balay 384c1ff481SSatish Balay if (bs <= 0) bs = 1; 39a2d1c673SSatish Balay 404c1ff481SSatish Balay stash->bs = bs; 419417f4adSLois Curfman McInnes stash->nmax = 0; 429417f4adSLois Curfman McInnes stash->n = 0; 434c1ff481SSatish Balay stash->reallocs = -1; 449417f4adSLois Curfman McInnes stash->idx = 0; 459417f4adSLois Curfman McInnes stash->idy = 0; 46bc5ccf88SSatish Balay stash->array = 0; 479417f4adSLois Curfman McInnes 48bc5ccf88SSatish Balay stash->send_waits = 0; 49bc5ccf88SSatish Balay stash->recv_waits = 0; 50a2d1c673SSatish Balay stash->send_status = 0; 51bc5ccf88SSatish Balay stash->nsends = 0; 52bc5ccf88SSatish Balay stash->nrecvs = 0; 53bc5ccf88SSatish Balay stash->svalues = 0; 54bc5ccf88SSatish Balay stash->rvalues = 0; 55bc5ccf88SSatish Balay stash->rmax = 0; 56a2d1c673SSatish Balay stash->nprocs = 0; 57a2d1c673SSatish Balay stash->nprocessed = 0; 583a40ed3dSBarry Smith PetscFunctionReturn(0); 599417f4adSLois Curfman McInnes } 609417f4adSLois Curfman McInnes 614c1ff481SSatish Balay /* 628798bf22SSatish Balay MatStashDestroy_Private - Destroy the stash 634c1ff481SSatish Balay */ 645615d1e5SSatish Balay #undef __FUNC__ 658798bf22SSatish Balay #define __FUNC__ "MatStashDestroy_Private" 668798bf22SSatish Balay int MatStashDestroy_Private(MatStash *stash) 679417f4adSLois Curfman McInnes { 68bc5ccf88SSatish Balay int ierr; 69a2d1c673SSatish Balay 70bc5ccf88SSatish Balay PetscFunctionBegin; 71bc5ccf88SSatish Balay ierr = PetscCommDestroy_Private(&stash->comm); CHKERRQ(ierr); 72bc5ccf88SSatish Balay if (stash->array) {PetscFree(stash->array); stash->array = 0;} 73bc5ccf88SSatish Balay PetscFunctionReturn(0); 74bc5ccf88SSatish Balay } 75bc5ccf88SSatish Balay 764c1ff481SSatish Balay /* 778798bf22SSatish Balay MatStashScatterEnd_Private - This is called as the fial stage of 784c1ff481SSatish Balay scatter. The final stages of messagepassing is done here, and 794c1ff481SSatish Balay all the memory used for messagepassing is cleanedu up. This 804c1ff481SSatish Balay routine also resets the stash, and deallocates the memory used 814c1ff481SSatish Balay for the stash. It also keeps track of the current memory usage 824c1ff481SSatish Balay so that the same value can be used the next time through. 834c1ff481SSatish Balay */ 84bc5ccf88SSatish Balay #undef __FUNC__ 858798bf22SSatish Balay #define __FUNC__ "MatStashScatterEnd_Private" 868798bf22SSatish Balay int MatStashScatterEnd_Private(MatStash *stash) 87bc5ccf88SSatish Balay { 88*94b769a5SSatish Balay int nsends=stash->nsends,ierr,bs2; 89a2d1c673SSatish Balay MPI_Status *send_status; 90a2d1c673SSatish Balay 913a40ed3dSBarry Smith PetscFunctionBegin; 92a2d1c673SSatish Balay /* wait on sends */ 93a2d1c673SSatish Balay if (nsends) { 94a2d1c673SSatish Balay send_status = (MPI_Status *)PetscMalloc(2*nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 95a2d1c673SSatish Balay ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr); 96a2d1c673SSatish Balay PetscFree(send_status); 97a2d1c673SSatish Balay } 98a2d1c673SSatish Balay 99c0c58ca7SSatish Balay /* Now update nmaxold to be app 10% more than max n used, this way the 100d07ff455SSatish Balay wastage of space is reduced the next time this stash is used */ 101*94b769a5SSatish Balay bs2 = stash->bs*stash->bs; 102*94b769a5SSatish Balay stash->oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 103d07ff455SSatish Balay stash->nmax = 0; 104d07ff455SSatish Balay stash->n = 0; 1054c1ff481SSatish Balay stash->reallocs = -1; 106bc5ccf88SSatish Balay stash->rmax = 0; 107a2d1c673SSatish Balay stash->nprocessed = 0; 108bc5ccf88SSatish Balay 109bc5ccf88SSatish Balay if (stash->array) { 110bc5ccf88SSatish Balay PetscFree(stash->array); 111bc5ccf88SSatish Balay stash->array = 0; 112bc5ccf88SSatish Balay stash->idx = 0; 113bc5ccf88SSatish Balay stash->idy = 0; 114bc5ccf88SSatish Balay } 115bc5ccf88SSatish Balay if (stash->send_waits) {PetscFree(stash->send_waits);stash->send_waits = 0;} 116bc5ccf88SSatish Balay if (stash->recv_waits) {PetscFree(stash->recv_waits);stash->recv_waits = 0;} 117bc5ccf88SSatish Balay if (stash->svalues) {PetscFree(stash->svalues);stash->svalues = 0;} 118bc5ccf88SSatish Balay if (stash->rvalues) {PetscFree(stash->rvalues); stash->rvalues = 0;} 119a2d1c673SSatish Balay if (stash->nprocs) {PetscFree(stash->nprocs); stash->nprocs = 0;} 120bc5ccf88SSatish Balay 1213a40ed3dSBarry Smith PetscFunctionReturn(0); 1229417f4adSLois Curfman McInnes } 1239417f4adSLois Curfman McInnes 1244c1ff481SSatish Balay /* 1258798bf22SSatish Balay MatStashGetInfo_Private - Gets the relavant statistics of the stash 1264c1ff481SSatish Balay 1274c1ff481SSatish Balay Input Parameters: 1284c1ff481SSatish Balay stash - the stash 129*94b769a5SSatish Balay nstash - the size of the stash. Indicates the number of values stored. 1304c1ff481SSatish Balay reallocs - the number of additional mallocs incurred. 1314c1ff481SSatish Balay 1324c1ff481SSatish Balay */ 1335615d1e5SSatish Balay #undef __FUNC__ 1348798bf22SSatish Balay #define __FUNC__ "MatStashGetInfo_Private" 1358798bf22SSatish Balay int MatStashGetInfo_Private(MatStash *stash,int *nstash, int *reallocs) 13697530c3fSBarry Smith { 137*94b769a5SSatish Balay int bs2 = stash->bs*stash->bs; 138*94b769a5SSatish Balay 1393a40ed3dSBarry Smith PetscFunctionBegin; 140*94b769a5SSatish Balay *nstash = stash->n*bs2; 1414c1ff481SSatish Balay *reallocs = stash->reallocs; 142bc5ccf88SSatish Balay PetscFunctionReturn(0); 143bc5ccf88SSatish Balay } 1444c1ff481SSatish Balay 1454c1ff481SSatish Balay 1464c1ff481SSatish Balay /* 1478798bf22SSatish Balay MatStashSetInitialSize_Private - Sets the initial size of the stash 1484c1ff481SSatish Balay 1494c1ff481SSatish Balay Input Parameters: 1504c1ff481SSatish Balay stash - the stash 1514c1ff481SSatish Balay max - the value that is used as the max size of the stash. 1524c1ff481SSatish Balay this value is used while allocating memory. 1534c1ff481SSatish Balay */ 154bc5ccf88SSatish Balay #undef __FUNC__ 1558798bf22SSatish Balay #define __FUNC__ "MatStashSetInitialSize_Private" 1568798bf22SSatish Balay int MatStashSetInitialSize_Private(MatStash *stash,int max) 157bc5ccf88SSatish Balay { 158bc5ccf88SSatish Balay PetscFunctionBegin; 159bc5ccf88SSatish Balay stash->oldnmax = max; 160bc5ccf88SSatish Balay stash->nmax = 0; 1613a40ed3dSBarry Smith PetscFunctionReturn(0); 16297530c3fSBarry Smith } 16397530c3fSBarry Smith 1648798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called 1654c1ff481SSatish Balay when the space in the stash is not sufficient to add the new values 1664c1ff481SSatish Balay being inserted into the stash. 1674c1ff481SSatish Balay 1684c1ff481SSatish Balay Input Parameters: 1694c1ff481SSatish Balay stash - the stash 1704c1ff481SSatish Balay incr - the minimum increase requested 1714c1ff481SSatish Balay 1724c1ff481SSatish Balay Notes: 1734c1ff481SSatish Balay This routine doubles the currently used memory. 1744c1ff481SSatish Balay */ 1755615d1e5SSatish Balay #undef __FUNC__ 1768798bf22SSatish Balay #define __FUNC__ "MatStashExpand_Private" 1778798bf22SSatish Balay static int MatStashExpand_Private(MatStash *stash,int incr) 1789417f4adSLois Curfman McInnes { 179a2d1c673SSatish Balay int *n_idx,*n_idy,newnmax,bs2; 180bc5ccf88SSatish Balay Scalar *n_array; 1819417f4adSLois Curfman McInnes 1823a40ed3dSBarry Smith PetscFunctionBegin; 1839417f4adSLois Curfman McInnes /* allocate a larger stash */ 184*94b769a5SSatish Balay bs2 = stash->bs*stash->bs; 185*94b769a5SSatish Balay if (stash->nmax == 0) newnmax = stash->oldnmax/bs2; 186d07ff455SSatish Balay else newnmax = stash->nmax*2; 1874c1ff481SSatish Balay if (newnmax < (stash->nmax + incr)) newnmax += 2*incr; 188d07ff455SSatish Balay 189a2d1c673SSatish Balay n_array = (Scalar *)PetscMalloc((newnmax)*(2*sizeof(int)+bs2*sizeof(Scalar)));CHKPTRQ(n_array); 190a2d1c673SSatish Balay n_idx = (int *) (n_array + bs2*newnmax); 191d07ff455SSatish Balay n_idy = (int *) (n_idx + newnmax); 192a2d1c673SSatish Balay PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(Scalar)); 193416022c9SBarry Smith PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(int)); 194416022c9SBarry Smith PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(int)); 1950452661fSBarry Smith if (stash->array) PetscFree(stash->array); 196d07ff455SSatish Balay stash->array = n_array; 197d07ff455SSatish Balay stash->idx = n_idx; 198d07ff455SSatish Balay stash->idy = n_idy; 199d07ff455SSatish Balay stash->nmax = newnmax; 200*94b769a5SSatish Balay stash->oldnmax = newnmax*bs2; 201bc5ccf88SSatish Balay stash->reallocs++; 202bc5ccf88SSatish Balay PetscFunctionReturn(0); 203bc5ccf88SSatish Balay } 204bc5ccf88SSatish Balay /* 2058798bf22SSatish Balay MatStashValuesRow_Private - inserts values into the stash. This function 2064c1ff481SSatish Balay expects the values to be roworiented. Multiple columns belong to the same row 2074c1ff481SSatish Balay can be inserted with a single call to this function. 2084c1ff481SSatish Balay 2094c1ff481SSatish Balay Input Parameters: 2104c1ff481SSatish Balay stash - the stash 2114c1ff481SSatish Balay row - the global row correspoiding to the values 2124c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2134c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2144c1ff481SSatish Balay values - the values inserted 215bc5ccf88SSatish Balay */ 216bc5ccf88SSatish Balay #undef __FUNC__ 2178798bf22SSatish Balay #define __FUNC__ "MatStashValuesRow_Private" 2188798bf22SSatish Balay int MatStashValuesRow_Private(MatStash *stash,int row,int n, int *idxn,Scalar *values) 219bc5ccf88SSatish Balay { 220a2d1c673SSatish Balay int ierr,i; 221bc5ccf88SSatish Balay 222bc5ccf88SSatish Balay PetscFunctionBegin; 2234c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 2244c1ff481SSatish Balay if ((stash->n + n) > stash->nmax) { 2258798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n); CHKERRQ(ierr); 2269417f4adSLois Curfman McInnes } 2274c1ff481SSatish Balay for ( i=0; i<n; i++ ) { 2289417f4adSLois Curfman McInnes stash->idx[stash->n] = row; 229a2d1c673SSatish Balay stash->idy[stash->n] = idxn[i]; 230a2d1c673SSatish Balay stash->array[stash->n] = values[i]; 231a2d1c673SSatish Balay stash->n++; 2329417f4adSLois Curfman McInnes } 233a2d1c673SSatish Balay PetscFunctionReturn(0); 234a2d1c673SSatish Balay } 2354c1ff481SSatish Balay /* 2368798bf22SSatish Balay MatStashValuesCol_Private - inserts values into the stash. This function 2374c1ff481SSatish Balay expects the values to be columnoriented. Multiple columns belong to the same row 2384c1ff481SSatish Balay can be inserted with a single call to this function. 239a2d1c673SSatish Balay 2404c1ff481SSatish Balay Input Parameters: 2414c1ff481SSatish Balay stash - the stash 2424c1ff481SSatish Balay row - the global row correspoiding to the values 2434c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2444c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2454c1ff481SSatish Balay values - the values inserted 2464c1ff481SSatish Balay stepval - the consecutive values are sepated by a distance of stepval. 2474c1ff481SSatish Balay this happens because the input is columnoriented. 2484c1ff481SSatish Balay */ 249a2d1c673SSatish Balay #undef __FUNC__ 2508798bf22SSatish Balay #define __FUNC__ "MatStashValuesCol_Private" 2518798bf22SSatish Balay int MatStashValuesCol_Private(MatStash *stash,int row,int n, int *idxn, 2524c1ff481SSatish Balay Scalar *values,int stepval) 253a2d1c673SSatish Balay { 2544c1ff481SSatish Balay int ierr,i; 255a2d1c673SSatish Balay 2564c1ff481SSatish Balay PetscFunctionBegin; 2574c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 2584c1ff481SSatish Balay if ((stash->n + n) > stash->nmax) { 2598798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n); CHKERRQ(ierr); 2604c1ff481SSatish Balay } 2614c1ff481SSatish Balay for ( i=0; i<n; i++ ) { 2624c1ff481SSatish Balay stash->idx[stash->n] = row; 2634c1ff481SSatish Balay stash->idy[stash->n] = idxn[i]; 2644c1ff481SSatish Balay stash->array[stash->n] = values[i*stepval]; 2654c1ff481SSatish Balay stash->n++; 2664c1ff481SSatish Balay } 2674c1ff481SSatish Balay PetscFunctionReturn(0); 2684c1ff481SSatish Balay } 2694c1ff481SSatish Balay 2704c1ff481SSatish Balay /* 2718798bf22SSatish Balay MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 2724c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 2734c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 2744c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 2754c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 2764c1ff481SSatish Balay 2774c1ff481SSatish Balay Input Parameters: 2784c1ff481SSatish Balay stash - the stash 2794c1ff481SSatish Balay row - the global block-row correspoiding to the values 2804c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2814c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 2824c1ff481SSatish Balay values. Each block is of size bs*bs. 2834c1ff481SSatish Balay values - the values inserted 2844c1ff481SSatish Balay rmax - the number of block-rows in the original block. 2854c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 2864c1ff481SSatish Balay idx - the index of the current block-row in the original block. 2874c1ff481SSatish Balay */ 2884c1ff481SSatish Balay #undef __FUNC__ 2898798bf22SSatish Balay #define __FUNC__ "MatStashValuesRowBlocked_Private" 2908798bf22SSatish Balay int MatStashValuesRowBlocked_Private(MatStash *stash,int row,int n,int *idxn,Scalar *values, 2914c1ff481SSatish Balay int rmax,int cmax,int idx) 2924c1ff481SSatish Balay { 2934c1ff481SSatish Balay int ierr,i,j,k,bs2,bs=stash->bs; 2944c1ff481SSatish Balay Scalar *vals,*array; 295a2d1c673SSatish Balay 296a2d1c673SSatish Balay PetscFunctionBegin; 297a2d1c673SSatish Balay bs2 = bs*bs; 2984c1ff481SSatish Balay if ((stash->n+n) > stash->nmax) { 2998798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n); CHKERRQ(ierr); 300a2d1c673SSatish Balay } 3014c1ff481SSatish Balay for ( i=0; i<n; i++ ) { 302a2d1c673SSatish Balay stash->idx[stash->n] = row; 303a2d1c673SSatish Balay stash->idy[stash->n] = idxn[i]; 304a2d1c673SSatish Balay /* Now copy over the block of values. Store the values column oriented. 305a2d1c673SSatish Balay This enables inserting multiple blocks belonging to a row with a single 306a2d1c673SSatish Balay funtion call */ 307a2d1c673SSatish Balay array = stash->array + bs2*stash->n; 308a2d1c673SSatish Balay vals = values + idx*bs2*n + bs*i; 309a2d1c673SSatish Balay for ( j=0; j<bs; j++ ) { 310a2d1c673SSatish Balay for ( k=0; k<bs; k++ ) {array[k*bs] = vals[k];} 311a2d1c673SSatish Balay array += 1; 312a2d1c673SSatish Balay vals += cmax*bs; 313a2d1c673SSatish Balay } 3144c1ff481SSatish Balay stash->n++; 3154c1ff481SSatish Balay } 3164c1ff481SSatish Balay PetscFunctionReturn(0); 3174c1ff481SSatish Balay } 3184c1ff481SSatish Balay 3194c1ff481SSatish Balay /* 3208798bf22SSatish Balay MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 3214c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 3224c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3234c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3244c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3254c1ff481SSatish Balay 3264c1ff481SSatish Balay Input Parameters: 3274c1ff481SSatish Balay stash - the stash 3284c1ff481SSatish Balay row - the global block-row correspoiding to the values 3294c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3304c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3314c1ff481SSatish Balay values. Each block is of size bs*bs. 3324c1ff481SSatish Balay values - the values inserted 3334c1ff481SSatish Balay rmax - the number of block-rows in the original block. 3344c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 3354c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3364c1ff481SSatish Balay */ 3374c1ff481SSatish Balay #undef __FUNC__ 3388798bf22SSatish Balay #define __FUNC__ "MatStashValuesColBlocked_Private" 3398798bf22SSatish Balay int MatStashValuesColBlocked_Private(MatStash *stash,int row,int n,int *idxn, 3404c1ff481SSatish Balay Scalar *values,int rmax,int cmax,int idx) 3414c1ff481SSatish Balay { 3424c1ff481SSatish Balay int ierr,i,j,k,bs2,bs=stash->bs; 3434c1ff481SSatish Balay Scalar *vals,*array; 3444c1ff481SSatish Balay 3454c1ff481SSatish Balay PetscFunctionBegin; 3464c1ff481SSatish Balay bs2 = bs*bs; 3474c1ff481SSatish Balay if ((stash->n+n) > stash->nmax) { 3488798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n); CHKERRQ(ierr); 3494c1ff481SSatish Balay } 3504c1ff481SSatish Balay for ( i=0; i<n; i++ ) { 3514c1ff481SSatish Balay stash->idx[stash->n] = row; 3524c1ff481SSatish Balay stash->idy[stash->n] = idxn[i]; 3534c1ff481SSatish Balay /* Now copy over the block of values. Store the values column oriented. 3544c1ff481SSatish Balay This enables inserting multiple blocks belonging to a row with a single 3554c1ff481SSatish Balay funtion call */ 356a2d1c673SSatish Balay array = stash->array + bs2*stash->n; 357a2d1c673SSatish Balay vals = values + idx*bs + bs2*rmax*i; 358a2d1c673SSatish Balay for ( j=0; j<bs; j++ ) { 359a2d1c673SSatish Balay for ( k=0; k<bs; k++ ) {array[k] = vals[k];} 360a2d1c673SSatish Balay array += bs; 361a2d1c673SSatish Balay vals += rmax*bs; 362a2d1c673SSatish Balay } 363a2d1c673SSatish Balay stash->n++; 3649417f4adSLois Curfman McInnes } 3653a40ed3dSBarry Smith PetscFunctionReturn(0); 3669417f4adSLois Curfman McInnes } 3674c1ff481SSatish Balay /* 3688798bf22SSatish Balay MatStashScatterBegin_Private - Initiates the transfer of values to the 3694c1ff481SSatish Balay correct owners. This function goes through the stash, and check the 3704c1ff481SSatish Balay owners of each stashed value, and sends the values off to the owner 3714c1ff481SSatish Balay processors. 372bc5ccf88SSatish Balay 3734c1ff481SSatish Balay Input Parameters: 3744c1ff481SSatish Balay stash - the stash 3754c1ff481SSatish Balay owners - an array of size 'no-of-procs' which gives the ownership range 3764c1ff481SSatish Balay for each node. 3774c1ff481SSatish Balay 3784c1ff481SSatish Balay Notes: The 'owners' array in the cased of the blocked-stash has the 3794c1ff481SSatish Balay ranges specified blocked global indices, and for the regular stash in 3804c1ff481SSatish Balay the proper global indices. 3814c1ff481SSatish Balay */ 382bc5ccf88SSatish Balay #undef __FUNC__ 3838798bf22SSatish Balay #define __FUNC__ "MatStashScatterBegin_Private" 3848798bf22SSatish Balay int MatStashScatterBegin_Private(MatStash *stash,int *owners) 385bc5ccf88SSatish Balay { 386a2d1c673SSatish Balay int *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 387a2d1c673SSatish Balay int rank=stash->rank,size=stash->size,*nprocs,*procs,nsends,nreceives; 3884c1ff481SSatish Balay int nmax,*work,count,ierr,*sindices,*rindices,i,j,idx; 389a2d1c673SSatish Balay Scalar *rvalues,*svalues; 390bc5ccf88SSatish Balay MPI_Comm comm = stash->comm; 391bc5ccf88SSatish Balay MPI_Request *send_waits,*recv_waits; 392bc5ccf88SSatish Balay 393bc5ccf88SSatish Balay PetscFunctionBegin; 394bc5ccf88SSatish Balay 3954c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 396bc5ccf88SSatish Balay /* first count number of contributors to each processor */ 397bc5ccf88SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 398bc5ccf88SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 399bc5ccf88SSatish Balay owner = (int *) PetscMalloc( (stash->n+1)*sizeof(int) ); CHKPTRQ(owner); 400a2d1c673SSatish Balay 401bc5ccf88SSatish Balay for ( i=0; i<stash->n; i++ ) { 402bc5ccf88SSatish Balay idx = stash->idx[i]; 403bc5ccf88SSatish Balay for ( j=0; j<size; j++ ) { 4044c1ff481SSatish Balay if (idx >= owners[j] && idx < owners[j+1]) { 405bc5ccf88SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; break; 406bc5ccf88SSatish Balay } 407bc5ccf88SSatish Balay } 408bc5ccf88SSatish Balay } 409bc5ccf88SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 410bc5ccf88SSatish Balay 411bc5ccf88SSatish Balay /* inform other processors of number of messages and max length*/ 412bc5ccf88SSatish Balay work = (int *)PetscMalloc(size*sizeof(int)); CHKPTRQ(work); 413bc5ccf88SSatish Balay ierr = MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 414bc5ccf88SSatish Balay nreceives = work[rank]; 415bc5ccf88SSatish Balay ierr = MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 416bc5ccf88SSatish Balay nmax = work[rank]; 417bc5ccf88SSatish Balay PetscFree(work); 418bc5ccf88SSatish Balay /* post receives: 419bc5ccf88SSatish Balay since we don't know how long each individual message is we 420bc5ccf88SSatish Balay allocate the largest needed buffer for each receive. Potentially 421bc5ccf88SSatish Balay this is a lot of wasted space. 422bc5ccf88SSatish Balay */ 423a2d1c673SSatish Balay rvalues = (Scalar *)PetscMalloc((nreceives+1)*(nmax+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(rvalues); 424a2d1c673SSatish Balay rindices = (int *) (rvalues + bs2*nreceives*nmax); 425a2d1c673SSatish Balay recv_waits = (MPI_Request *)PetscMalloc((nreceives+1)*2*sizeof(MPI_Request));CHKPTRQ(recv_waits); 426bc5ccf88SSatish Balay for ( i=0,count=0; i<nreceives; i++ ) { 427a2d1c673SSatish Balay ierr = MPI_Irecv(rvalues+bs2*nmax*i,bs2*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag1,comm, 428bc5ccf88SSatish Balay recv_waits+count++); CHKERRQ(ierr); 429bc5ccf88SSatish Balay ierr = MPI_Irecv(rindices+2*nmax*i,2*nmax,MPI_INT,MPI_ANY_SOURCE,tag2,comm, 430bc5ccf88SSatish Balay recv_waits+count++); CHKERRQ(ierr); 431bc5ccf88SSatish Balay } 432bc5ccf88SSatish Balay 433bc5ccf88SSatish Balay /* do sends: 434bc5ccf88SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 435bc5ccf88SSatish Balay the ith processor 436bc5ccf88SSatish Balay */ 437a2d1c673SSatish Balay svalues = (Scalar *)PetscMalloc((stash->n+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(svalues); 438a2d1c673SSatish Balay sindices = (int *) (svalues + bs2*stash->n); 439bc5ccf88SSatish Balay send_waits = (MPI_Request *) PetscMalloc(2*(nsends+1)*sizeof(MPI_Request)); 440bc5ccf88SSatish Balay CHKPTRQ(send_waits); 441bc5ccf88SSatish Balay startv = (int *) PetscMalloc(2*size*sizeof(int) ); CHKPTRQ(startv); 442bc5ccf88SSatish Balay starti = startv + size; 443a2d1c673SSatish Balay /* use 2 sends the first with all_a, the next with all_i and all_j */ 444bc5ccf88SSatish Balay startv[0] = 0; starti[0] = 0; 445bc5ccf88SSatish Balay for ( i=1; i<size; i++ ) { 446bc5ccf88SSatish Balay startv[i] = startv[i-1] + nprocs[i-1]; 447bc5ccf88SSatish Balay starti[i] = starti[i-1] + nprocs[i-1]*2; 448bc5ccf88SSatish Balay } 449bc5ccf88SSatish Balay for ( i=0; i<stash->n; i++ ) { 450bc5ccf88SSatish Balay j = owner[i]; 451a2d1c673SSatish Balay if (bs2 == 1) { 452bc5ccf88SSatish Balay svalues[startv[j]] = stash->array[i]; 453a2d1c673SSatish Balay } else { 4544c1ff481SSatish Balay int k; 4554c1ff481SSatish Balay Scalar *buf1,*buf2; 4564c1ff481SSatish Balay buf1 = svalues+bs2*startv[j]; 4574c1ff481SSatish Balay buf2 = stash->array+bs2*i; 4584c1ff481SSatish Balay for ( k=0; k<bs2; k++ ){ buf1[k] = buf2[k]; } 459a2d1c673SSatish Balay } 460bc5ccf88SSatish Balay sindices[starti[j]] = stash->idx[i]; 461bc5ccf88SSatish Balay sindices[starti[j]+nprocs[j]] = stash->idy[i]; 462bc5ccf88SSatish Balay startv[j]++; 463bc5ccf88SSatish Balay starti[j]++; 464bc5ccf88SSatish Balay } 465bc5ccf88SSatish Balay startv[0] = 0; 466bc5ccf88SSatish Balay for ( i=1; i<size; i++ ) { startv[i] = startv[i-1] + nprocs[i-1];} 467bc5ccf88SSatish Balay for ( i=0,count=0; i<size; i++ ) { 468bc5ccf88SSatish Balay if (procs[i]) { 469a2d1c673SSatish Balay ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nprocs[i],MPIU_SCALAR,i,tag1,comm, 470bc5ccf88SSatish Balay send_waits+count++);CHKERRQ(ierr); 471bc5ccf88SSatish Balay ierr = MPI_Isend(sindices+2*startv[i],2*nprocs[i],MPI_INT,i,tag2,comm, 472bc5ccf88SSatish Balay send_waits+count++);CHKERRQ(ierr); 473bc5ccf88SSatish Balay } 474bc5ccf88SSatish Balay } 475bc5ccf88SSatish Balay PetscFree(owner); 476bc5ccf88SSatish Balay PetscFree(startv); 477a2d1c673SSatish Balay /* This memory is reused in scatter end for a different purpose*/ 478a2d1c673SSatish Balay for (i=0; i<2*size; i++ ) nprocs[i] = -1; 479a2d1c673SSatish Balay stash->nprocs = nprocs; 480a2d1c673SSatish Balay 481bc5ccf88SSatish Balay stash->svalues = svalues; stash->rvalues = rvalues; 482bc5ccf88SSatish Balay stash->nsends = nsends; stash->nrecvs = nreceives; 483bc5ccf88SSatish Balay stash->send_waits = send_waits; stash->recv_waits = recv_waits; 484bc5ccf88SSatish Balay stash->rmax = nmax; 485bc5ccf88SSatish Balay PetscFunctionReturn(0); 486bc5ccf88SSatish Balay } 487bc5ccf88SSatish Balay 488a2d1c673SSatish Balay /* 4898798bf22SSatish Balay MatStashScatterGetMesg_Private - This function waits on the receives posted 4908798bf22SSatish Balay in the function MatStashScatterBegin_Private() and returns one message at 4914c1ff481SSatish Balay a time to the calling function. If no messages are left, it indicates this 4924c1ff481SSatish Balay by setting flg = 0, else it sets flg = 1. 4934c1ff481SSatish Balay 4944c1ff481SSatish Balay Input Parameters: 4954c1ff481SSatish Balay stash - the stash 4964c1ff481SSatish Balay 4974c1ff481SSatish Balay Output Parameters: 4984c1ff481SSatish Balay nvals - the number of entries in the current message. 4994c1ff481SSatish Balay rows - an array of row indices (or blocked indices) corresponding to the values 5004c1ff481SSatish Balay cols - an array of columnindices (or blocked indices) corresponding to the values 5014c1ff481SSatish Balay vals - the values 5024c1ff481SSatish Balay flg - 0 indicates no more message left, and the current call has no values associated. 5034c1ff481SSatish Balay 1 indicates that the current call successfully received a message, and the 5044c1ff481SSatish Balay other output parameters nvals,rows,cols,vals are set appropriately. 505a2d1c673SSatish Balay */ 506bc5ccf88SSatish Balay #undef __FUNC__ 5078798bf22SSatish Balay #define __FUNC__ "MatStashScatterGetMesg_Private" 5088798bf22SSatish Balay int MatStashScatterGetMesg_Private(MatStash *stash,int *nvals,int **rows,int** cols,Scalar **vals,int *flg) 509bc5ccf88SSatish Balay { 510a2d1c673SSatish Balay int i,ierr,size=stash->size,*flg_v,*flg_i; 511a2d1c673SSatish Balay int i1,i2,*rindices,match_found=0,bs2; 512a2d1c673SSatish Balay MPI_Status recv_status; 513bc5ccf88SSatish Balay 514bc5ccf88SSatish Balay PetscFunctionBegin; 515bc5ccf88SSatish Balay 516a2d1c673SSatish Balay *flg = 0; /* When a message is discovered this is reset to 1 */ 517a2d1c673SSatish Balay /* Return if no more messages to process */ 518a2d1c673SSatish Balay if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); } 519a2d1c673SSatish Balay 520a2d1c673SSatish Balay flg_v = stash->nprocs; 521a2d1c673SSatish Balay flg_i = flg_v + size; 5224c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 523a2d1c673SSatish Balay /* If a matching pair of receieves are found, process them, and return the data to 524a2d1c673SSatish Balay the calling function. Until then keep receiving messages */ 525a2d1c673SSatish Balay while (!match_found) { 526a2d1c673SSatish Balay ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr); 527a2d1c673SSatish Balay /* Now pack the received message into a structure which is useable by others */ 528a2d1c673SSatish Balay if (i % 2) { 529a2d1c673SSatish Balay ierr = MPI_Get_count(&recv_status,MPI_INT,nvals);CHKERRQ(ierr); 530a2d1c673SSatish Balay flg_i[recv_status.MPI_SOURCE] = i/2; 531a2d1c673SSatish Balay *nvals = *nvals/2; /* This message has both row indices and col indices */ 532a2d1c673SSatish Balay } else { 533a2d1c673SSatish Balay ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr); 534a2d1c673SSatish Balay flg_v[recv_status.MPI_SOURCE] = i/2; 535a2d1c673SSatish Balay *nvals = *nvals/bs2; 536bc5ccf88SSatish Balay } 537a2d1c673SSatish Balay 538a2d1c673SSatish Balay /* Check if we have both the messages from this proc */ 539a2d1c673SSatish Balay i1 = flg_v[recv_status.MPI_SOURCE]; 540a2d1c673SSatish Balay i2 = flg_i[recv_status.MPI_SOURCE]; 541a2d1c673SSatish Balay if (i1 != -1 && i2 != -1) { 542a2d1c673SSatish Balay rindices = (int *) (stash->rvalues + bs2*stash->rmax*stash->nrecvs); 543a2d1c673SSatish Balay *rows = rindices + 2*i2*stash->rmax; 544a2d1c673SSatish Balay *cols = *rows + *nvals; 545a2d1c673SSatish Balay *vals = stash->rvalues + i1*bs2*stash->rmax; 546a2d1c673SSatish Balay *flg = 1; 547a2d1c673SSatish Balay stash->nprocessed ++; 548a2d1c673SSatish Balay match_found = 1; 549bc5ccf88SSatish Balay } 550bc5ccf88SSatish Balay } 551bc5ccf88SSatish Balay PetscFunctionReturn(0); 552bc5ccf88SSatish Balay } 553