1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER 2*6831982aSBarry Smith static char vcid[] = "$Id: matstash.c,v 1.35 1999/06/30 23:52:35 balay Exp bsmith $"; 32d5177cdSBarry Smith #endif 42d5177cdSBarry Smith 570f55243SBarry Smith #include "src/mat/matimpl.h" 69417f4adSLois Curfman McInnes 7bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE 10000 84c1ff481SSatish Balay 99417f4adSLois Curfman McInnes /* 108798bf22SSatish Balay MatStashCreate_Private - Creates a stash ,currently used for all the parallel 114c1ff481SSatish Balay matrix implementations. The stash is where elements of a matrix destined 124c1ff481SSatish Balay to be stored on other processors are kept until matrix assembly is done. 139417f4adSLois Curfman McInnes 144c1ff481SSatish Balay This is a simple minded stash. Simply adds entries to end of stash. 154c1ff481SSatish Balay 164c1ff481SSatish Balay Input Parameters: 174c1ff481SSatish Balay comm - communicator, required for scatters. 184c1ff481SSatish Balay bs - stash block size. used when stashing blocks of values 194c1ff481SSatish Balay 204c1ff481SSatish Balay Output Parameters: 214c1ff481SSatish Balay stash - the newly created stash 229417f4adSLois Curfman McInnes */ 235615d1e5SSatish Balay #undef __FUNC__ 248798bf22SSatish Balay #define __FUNC__ "MatStashCreate_Private" 258798bf22SSatish Balay int MatStashCreate_Private(MPI_Comm comm,int bs, MatStash *stash) 269417f4adSLois Curfman McInnes { 27434d7ff9SSatish Balay int ierr,flg,max,*opt,nopt; 28bc5ccf88SSatish Balay 293a40ed3dSBarry Smith PetscFunctionBegin; 30bc5ccf88SSatish Balay /* Require 2 tags, get the second using PetscCommGetNewTag() */ 31bc5ccf88SSatish Balay ierr = PetscCommDuplicate_Private(comm,&stash->comm,&stash->tag1);CHKERRQ(ierr); 32a2d1c673SSatish Balay ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr); 33a2d1c673SSatish Balay ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr); 34a2d1c673SSatish Balay ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr); 35bc5ccf88SSatish Balay 36434d7ff9SSatish Balay nopt = stash->size; 37434d7ff9SSatish Balay opt = (int*) PetscMalloc(nopt*sizeof(int));CHKPTRQ(opt); 38434d7ff9SSatish Balay ierr = OptionsGetIntArray(PETSC_NULL,"-vecstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr); 39434d7ff9SSatish Balay if (flg) { 40434d7ff9SSatish Balay if (nopt == 1) max = opt[0]; 41434d7ff9SSatish Balay else if (nopt == stash->size) max = opt[stash->rank]; 42434d7ff9SSatish Balay else if (stash->rank < nopt) max = opt[stash->rank]; 43f4ab19daSSatish Balay else max = 0; /* Use default */ 44434d7ff9SSatish Balay stash->umax = max; 45434d7ff9SSatish Balay } else { 46434d7ff9SSatish Balay stash->umax = 0; 47434d7ff9SSatish Balay } 48606d414cSSatish Balay ierr = PetscFree(opt);CHKERRQ(ierr); 494c1ff481SSatish Balay if (bs <= 0) bs = 1; 50a2d1c673SSatish Balay 514c1ff481SSatish Balay stash->bs = bs; 529417f4adSLois Curfman McInnes stash->nmax = 0; 53434d7ff9SSatish Balay stash->oldnmax = 0; 549417f4adSLois Curfman McInnes stash->n = 0; 554c1ff481SSatish Balay stash->reallocs = -1; 569417f4adSLois Curfman McInnes stash->idx = 0; 579417f4adSLois Curfman McInnes stash->idy = 0; 58bc5ccf88SSatish Balay stash->array = 0; 599417f4adSLois Curfman McInnes 60bc5ccf88SSatish Balay stash->send_waits = 0; 61bc5ccf88SSatish Balay stash->recv_waits = 0; 62a2d1c673SSatish Balay stash->send_status = 0; 63bc5ccf88SSatish Balay stash->nsends = 0; 64bc5ccf88SSatish Balay stash->nrecvs = 0; 65bc5ccf88SSatish Balay stash->svalues = 0; 66bc5ccf88SSatish Balay stash->rvalues = 0; 67bc5ccf88SSatish Balay stash->rmax = 0; 68a2d1c673SSatish Balay stash->nprocs = 0; 69a2d1c673SSatish Balay stash->nprocessed = 0; 703a40ed3dSBarry Smith PetscFunctionReturn(0); 719417f4adSLois Curfman McInnes } 729417f4adSLois Curfman McInnes 734c1ff481SSatish Balay /* 748798bf22SSatish Balay MatStashDestroy_Private - Destroy the stash 754c1ff481SSatish Balay */ 765615d1e5SSatish Balay #undef __FUNC__ 778798bf22SSatish Balay #define __FUNC__ "MatStashDestroy_Private" 788798bf22SSatish Balay int MatStashDestroy_Private(MatStash *stash) 799417f4adSLois Curfman McInnes { 80bc5ccf88SSatish Balay int ierr; 81a2d1c673SSatish Balay 82bc5ccf88SSatish Balay PetscFunctionBegin; 83bc5ccf88SSatish Balay ierr = PetscCommDestroy_Private(&stash->comm);CHKERRQ(ierr); 84606d414cSSatish Balay if (stash->array) { 85606d414cSSatish Balay ierr = PetscFree(stash->array);CHKERRQ(ierr); 86606d414cSSatish Balay stash->array = 0; 87606d414cSSatish Balay } 88bc5ccf88SSatish Balay PetscFunctionReturn(0); 89bc5ccf88SSatish Balay } 90bc5ccf88SSatish Balay 914c1ff481SSatish Balay /* 928798bf22SSatish Balay MatStashScatterEnd_Private - This is called as the fial stage of 934c1ff481SSatish Balay scatter. The final stages of messagepassing is done here, and 944c1ff481SSatish Balay all the memory used for messagepassing is cleanedu up. This 954c1ff481SSatish Balay routine also resets the stash, and deallocates the memory used 964c1ff481SSatish Balay for the stash. It also keeps track of the current memory usage 974c1ff481SSatish Balay so that the same value can be used the next time through. 984c1ff481SSatish Balay */ 99bc5ccf88SSatish Balay #undef __FUNC__ 1008798bf22SSatish Balay #define __FUNC__ "MatStashScatterEnd_Private" 1018798bf22SSatish Balay int MatStashScatterEnd_Private(MatStash *stash) 102bc5ccf88SSatish Balay { 103434d7ff9SSatish Balay int nsends=stash->nsends,ierr,bs2,oldnmax; 104a2d1c673SSatish Balay MPI_Status *send_status; 105a2d1c673SSatish Balay 1063a40ed3dSBarry Smith PetscFunctionBegin; 107a2d1c673SSatish Balay /* wait on sends */ 108a2d1c673SSatish Balay if (nsends) { 109a2d1c673SSatish Balay send_status = (MPI_Status *)PetscMalloc(2*nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 110a2d1c673SSatish Balay ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr); 111606d414cSSatish Balay ierr = PetscFree(send_status);CHKERRQ(ierr); 112a2d1c673SSatish Balay } 113a2d1c673SSatish Balay 114c0c58ca7SSatish Balay /* Now update nmaxold to be app 10% more than max n used, this way the 115434d7ff9SSatish Balay wastage of space is reduced the next time this stash is used. 116434d7ff9SSatish Balay Also update the oldmax, only if it increases */ 11794b769a5SSatish Balay bs2 = stash->bs*stash->bs; 1188a9378f0SSatish Balay oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 119434d7ff9SSatish Balay if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 120434d7ff9SSatish Balay 121d07ff455SSatish Balay stash->nmax = 0; 122d07ff455SSatish Balay stash->n = 0; 1234c1ff481SSatish Balay stash->reallocs = -1; 124bc5ccf88SSatish Balay stash->rmax = 0; 125a2d1c673SSatish Balay stash->nprocessed = 0; 126bc5ccf88SSatish Balay 127bc5ccf88SSatish Balay if (stash->array) { 128606d414cSSatish Balay ierr = PetscFree(stash->array);CHKERRQ(ierr); 129bc5ccf88SSatish Balay stash->array = 0; 130bc5ccf88SSatish Balay stash->idx = 0; 131bc5ccf88SSatish Balay stash->idy = 0; 132bc5ccf88SSatish Balay } 133606d414cSSatish Balay if (stash->send_waits) { 134606d414cSSatish Balay ierr = PetscFree(stash->send_waits);CHKERRQ(ierr); 135606d414cSSatish Balay stash->send_waits = 0; 136606d414cSSatish Balay } 137606d414cSSatish Balay if (stash->recv_waits) { 138606d414cSSatish Balay ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr); 139606d414cSSatish Balay stash->recv_waits = 0; 140606d414cSSatish Balay } 141606d414cSSatish Balay if (stash->svalues) { 142606d414cSSatish Balay ierr = PetscFree(stash->svalues);CHKERRQ(ierr); 143606d414cSSatish Balay stash->svalues = 0; 144606d414cSSatish Balay } 145606d414cSSatish Balay if (stash->rvalues) { 146606d414cSSatish Balay ierr = PetscFree(stash->rvalues);CHKERRQ(ierr); 147606d414cSSatish Balay stash->rvalues = 0; 148606d414cSSatish Balay } 149606d414cSSatish Balay if (stash->nprocs) { 150606d414cSSatish Balay ierr = PetscFree(stash->nprocs); 151606d414cSSatish Balay stash->nprocs = 0; 152606d414cSSatish Balay } 153bc5ccf88SSatish Balay 1543a40ed3dSBarry Smith PetscFunctionReturn(0); 1559417f4adSLois Curfman McInnes } 1569417f4adSLois Curfman McInnes 1574c1ff481SSatish Balay /* 1588798bf22SSatish Balay MatStashGetInfo_Private - Gets the relavant statistics of the stash 1594c1ff481SSatish Balay 1604c1ff481SSatish Balay Input Parameters: 1614c1ff481SSatish Balay stash - the stash 16294b769a5SSatish Balay nstash - the size of the stash. Indicates the number of values stored. 1634c1ff481SSatish Balay reallocs - the number of additional mallocs incurred. 1644c1ff481SSatish Balay 1654c1ff481SSatish Balay */ 1665615d1e5SSatish Balay #undef __FUNC__ 1678798bf22SSatish Balay #define __FUNC__ "MatStashGetInfo_Private" 1688798bf22SSatish Balay int MatStashGetInfo_Private(MatStash *stash,int *nstash, int *reallocs) 16997530c3fSBarry Smith { 17094b769a5SSatish Balay int bs2 = stash->bs*stash->bs; 17194b769a5SSatish Balay 1723a40ed3dSBarry Smith PetscFunctionBegin; 17394b769a5SSatish Balay *nstash = stash->n*bs2; 174434d7ff9SSatish Balay if (stash->reallocs < 0) *reallocs = 0; 175434d7ff9SSatish Balay else *reallocs = stash->reallocs; 176bc5ccf88SSatish Balay PetscFunctionReturn(0); 177bc5ccf88SSatish Balay } 1784c1ff481SSatish Balay 1794c1ff481SSatish Balay 1804c1ff481SSatish Balay /* 1818798bf22SSatish Balay MatStashSetInitialSize_Private - Sets the initial size of the stash 1824c1ff481SSatish Balay 1834c1ff481SSatish Balay Input Parameters: 1844c1ff481SSatish Balay stash - the stash 1854c1ff481SSatish Balay max - the value that is used as the max size of the stash. 1864c1ff481SSatish Balay this value is used while allocating memory. 1874c1ff481SSatish Balay */ 188bc5ccf88SSatish Balay #undef __FUNC__ 1898798bf22SSatish Balay #define __FUNC__ "MatStashSetInitialSize_Private" 1908798bf22SSatish Balay int MatStashSetInitialSize_Private(MatStash *stash,int max) 191bc5ccf88SSatish Balay { 192bc5ccf88SSatish Balay PetscFunctionBegin; 193434d7ff9SSatish Balay stash->umax = max; 1943a40ed3dSBarry Smith PetscFunctionReturn(0); 19597530c3fSBarry Smith } 19697530c3fSBarry Smith 1978798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called 1984c1ff481SSatish Balay when the space in the stash is not sufficient to add the new values 1994c1ff481SSatish Balay being inserted into the stash. 2004c1ff481SSatish Balay 2014c1ff481SSatish Balay Input Parameters: 2024c1ff481SSatish Balay stash - the stash 2034c1ff481SSatish Balay incr - the minimum increase requested 2044c1ff481SSatish Balay 2054c1ff481SSatish Balay Notes: 2064c1ff481SSatish Balay This routine doubles the currently used memory. 2074c1ff481SSatish Balay */ 2085615d1e5SSatish Balay #undef __FUNC__ 2098798bf22SSatish Balay #define __FUNC__ "MatStashExpand_Private" 2108798bf22SSatish Balay static int MatStashExpand_Private(MatStash *stash,int incr) 2119417f4adSLois Curfman McInnes { 212549d3d68SSatish Balay int *n_idx,*n_idy,newnmax,bs2,ierr; 213bc5ccf88SSatish Balay Scalar *n_array; 2149417f4adSLois Curfman McInnes 2153a40ed3dSBarry Smith PetscFunctionBegin; 2169417f4adSLois Curfman McInnes /* allocate a larger stash */ 21794b769a5SSatish Balay bs2 = stash->bs*stash->bs; 218c481ceb5SSatish Balay if (!stash->oldnmax && !stash->nmax) { /* new stash */ 219434d7ff9SSatish Balay if (stash->umax) newnmax = stash->umax/bs2; 220434d7ff9SSatish Balay else newnmax = DEFAULT_STASH_SIZE/bs2; 221c481ceb5SSatish Balay } else if (!stash->nmax) { /* resuing stash */ 222434d7ff9SSatish Balay if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2; 223434d7ff9SSatish Balay else newnmax = stash->oldnmax/bs2; 224434d7ff9SSatish Balay } else newnmax = stash->nmax*2; 2254c1ff481SSatish Balay if (newnmax < (stash->nmax + incr)) newnmax += 2*incr; 226d07ff455SSatish Balay 227a2d1c673SSatish Balay n_array = (Scalar *)PetscMalloc((newnmax)*(2*sizeof(int)+bs2*sizeof(Scalar)));CHKPTRQ(n_array); 228a2d1c673SSatish Balay n_idx = (int *) (n_array + bs2*newnmax); 229d07ff455SSatish Balay n_idy = (int *) (n_idx + newnmax); 230549d3d68SSatish Balay ierr = PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(Scalar));CHKERRQ(ierr); 231549d3d68SSatish Balay ierr = PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(int));CHKERRQ(ierr); 232549d3d68SSatish Balay ierr = PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(int));CHKERRQ(ierr); 233606d414cSSatish Balay if (stash->array) {ierr = PetscFree(stash->array);CHKERRQ(ierr);} 234d07ff455SSatish Balay stash->array = n_array; 235d07ff455SSatish Balay stash->idx = n_idx; 236d07ff455SSatish Balay stash->idy = n_idy; 237d07ff455SSatish Balay stash->nmax = newnmax; 238bc5ccf88SSatish Balay stash->reallocs++; 239bc5ccf88SSatish Balay PetscFunctionReturn(0); 240bc5ccf88SSatish Balay } 241bc5ccf88SSatish Balay /* 2428798bf22SSatish Balay MatStashValuesRow_Private - inserts values into the stash. This function 2434c1ff481SSatish Balay expects the values to be roworiented. Multiple columns belong to the same row 2444c1ff481SSatish Balay can be inserted with a single call to this function. 2454c1ff481SSatish Balay 2464c1ff481SSatish Balay Input Parameters: 2474c1ff481SSatish Balay stash - the stash 2484c1ff481SSatish Balay row - the global row correspoiding to the values 2494c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2504c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2514c1ff481SSatish Balay values - the values inserted 252bc5ccf88SSatish Balay */ 253bc5ccf88SSatish Balay #undef __FUNC__ 2548798bf22SSatish Balay #define __FUNC__ "MatStashValuesRow_Private" 2558798bf22SSatish Balay int MatStashValuesRow_Private(MatStash *stash,int row,int n, int *idxn,Scalar *values) 256bc5ccf88SSatish Balay { 257a2d1c673SSatish Balay int ierr,i; 258bc5ccf88SSatish Balay 259bc5ccf88SSatish Balay PetscFunctionBegin; 2604c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 2614c1ff481SSatish Balay if ((stash->n + n) > stash->nmax) { 2628798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 2639417f4adSLois Curfman McInnes } 2644c1ff481SSatish Balay for ( i=0; i<n; i++ ) { 2659417f4adSLois Curfman McInnes stash->idx[stash->n] = row; 266a2d1c673SSatish Balay stash->idy[stash->n] = idxn[i]; 267a2d1c673SSatish Balay stash->array[stash->n] = values[i]; 268a2d1c673SSatish Balay stash->n++; 2699417f4adSLois Curfman McInnes } 270a2d1c673SSatish Balay PetscFunctionReturn(0); 271a2d1c673SSatish Balay } 2724c1ff481SSatish Balay /* 2738798bf22SSatish Balay MatStashValuesCol_Private - inserts values into the stash. This function 2744c1ff481SSatish Balay expects the values to be columnoriented. Multiple columns belong to the same row 2754c1ff481SSatish Balay can be inserted with a single call to this function. 276a2d1c673SSatish Balay 2774c1ff481SSatish Balay Input Parameters: 2784c1ff481SSatish Balay stash - the stash 2794c1ff481SSatish Balay row - the global row correspoiding to the values 2804c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2814c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2824c1ff481SSatish Balay values - the values inserted 2834c1ff481SSatish Balay stepval - the consecutive values are sepated by a distance of stepval. 2844c1ff481SSatish Balay this happens because the input is columnoriented. 2854c1ff481SSatish Balay */ 286a2d1c673SSatish Balay #undef __FUNC__ 2878798bf22SSatish Balay #define __FUNC__ "MatStashValuesCol_Private" 2888798bf22SSatish Balay int MatStashValuesCol_Private(MatStash *stash,int row,int n, int *idxn, 2894c1ff481SSatish Balay Scalar *values,int stepval) 290a2d1c673SSatish Balay { 2914c1ff481SSatish Balay int ierr,i; 292a2d1c673SSatish Balay 2934c1ff481SSatish Balay PetscFunctionBegin; 2944c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 2954c1ff481SSatish Balay if ((stash->n + n) > stash->nmax) { 2968798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 2974c1ff481SSatish Balay } 2984c1ff481SSatish Balay for ( i=0; i<n; i++ ) { 2994c1ff481SSatish Balay stash->idx[stash->n] = row; 3004c1ff481SSatish Balay stash->idy[stash->n] = idxn[i]; 3014c1ff481SSatish Balay stash->array[stash->n] = values[i*stepval]; 3024c1ff481SSatish Balay stash->n++; 3034c1ff481SSatish Balay } 3044c1ff481SSatish Balay PetscFunctionReturn(0); 3054c1ff481SSatish Balay } 3064c1ff481SSatish Balay 3074c1ff481SSatish Balay /* 3088798bf22SSatish Balay MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 3094c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 3104c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3114c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3124c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3134c1ff481SSatish Balay 3144c1ff481SSatish Balay Input Parameters: 3154c1ff481SSatish Balay stash - the stash 3164c1ff481SSatish Balay row - the global block-row correspoiding to the values 3174c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3184c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3194c1ff481SSatish Balay values. Each block is of size bs*bs. 3204c1ff481SSatish Balay values - the values inserted 3214c1ff481SSatish Balay rmax - the number of block-rows in the original block. 3224c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 3234c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3244c1ff481SSatish Balay */ 3254c1ff481SSatish Balay #undef __FUNC__ 3268798bf22SSatish Balay #define __FUNC__ "MatStashValuesRowBlocked_Private" 3278798bf22SSatish Balay int MatStashValuesRowBlocked_Private(MatStash *stash,int row,int n,int *idxn,Scalar *values, 3284c1ff481SSatish Balay int rmax,int cmax,int idx) 3294c1ff481SSatish Balay { 3304c1ff481SSatish Balay int ierr,i,j,k,bs2,bs=stash->bs; 3314c1ff481SSatish Balay Scalar *vals,*array; 332a2d1c673SSatish Balay 333a2d1c673SSatish Balay PetscFunctionBegin; 334a2d1c673SSatish Balay bs2 = bs*bs; 3354c1ff481SSatish Balay if ((stash->n+n) > stash->nmax) { 3368798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 337a2d1c673SSatish Balay } 3384c1ff481SSatish Balay for ( i=0; i<n; i++ ) { 339a2d1c673SSatish Balay stash->idx[stash->n] = row; 340a2d1c673SSatish Balay stash->idy[stash->n] = idxn[i]; 341a2d1c673SSatish Balay /* Now copy over the block of values. Store the values column oriented. 342a2d1c673SSatish Balay This enables inserting multiple blocks belonging to a row with a single 343a2d1c673SSatish Balay funtion call */ 344a2d1c673SSatish Balay array = stash->array + bs2*stash->n; 345a2d1c673SSatish Balay vals = values + idx*bs2*n + bs*i; 346a2d1c673SSatish Balay for ( j=0; j<bs; j++ ) { 347a2d1c673SSatish Balay for ( k=0; k<bs; k++ ) {array[k*bs] = vals[k];} 348a2d1c673SSatish Balay array += 1; 349a2d1c673SSatish Balay vals += cmax*bs; 350a2d1c673SSatish Balay } 3514c1ff481SSatish Balay stash->n++; 3524c1ff481SSatish Balay } 3534c1ff481SSatish Balay PetscFunctionReturn(0); 3544c1ff481SSatish Balay } 3554c1ff481SSatish Balay 3564c1ff481SSatish Balay /* 3578798bf22SSatish Balay MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 3584c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 3594c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3604c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3614c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3624c1ff481SSatish Balay 3634c1ff481SSatish Balay Input Parameters: 3644c1ff481SSatish Balay stash - the stash 3654c1ff481SSatish Balay row - the global block-row correspoiding to the values 3664c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3674c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3684c1ff481SSatish Balay values. Each block is of size bs*bs. 3694c1ff481SSatish Balay values - the values inserted 3704c1ff481SSatish Balay rmax - the number of block-rows in the original block. 3714c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 3724c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3734c1ff481SSatish Balay */ 3744c1ff481SSatish Balay #undef __FUNC__ 3758798bf22SSatish Balay #define __FUNC__ "MatStashValuesColBlocked_Private" 3768798bf22SSatish Balay int MatStashValuesColBlocked_Private(MatStash *stash,int row,int n,int *idxn, 3774c1ff481SSatish Balay Scalar *values,int rmax,int cmax,int idx) 3784c1ff481SSatish Balay { 3794c1ff481SSatish Balay int ierr,i,j,k,bs2,bs=stash->bs; 3804c1ff481SSatish Balay Scalar *vals,*array; 3814c1ff481SSatish Balay 3824c1ff481SSatish Balay PetscFunctionBegin; 3834c1ff481SSatish Balay bs2 = bs*bs; 3844c1ff481SSatish Balay if ((stash->n+n) > stash->nmax) { 3858798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 3864c1ff481SSatish Balay } 3874c1ff481SSatish Balay for ( i=0; i<n; i++ ) { 3884c1ff481SSatish Balay stash->idx[stash->n] = row; 3894c1ff481SSatish Balay stash->idy[stash->n] = idxn[i]; 3904c1ff481SSatish Balay /* Now copy over the block of values. Store the values column oriented. 3914c1ff481SSatish Balay This enables inserting multiple blocks belonging to a row with a single 3924c1ff481SSatish Balay funtion call */ 393a2d1c673SSatish Balay array = stash->array + bs2*stash->n; 394a2d1c673SSatish Balay vals = values + idx*bs + bs2*rmax*i; 395a2d1c673SSatish Balay for ( j=0; j<bs; j++ ) { 396a2d1c673SSatish Balay for ( k=0; k<bs; k++ ) {array[k] = vals[k];} 397a2d1c673SSatish Balay array += bs; 398a2d1c673SSatish Balay vals += rmax*bs; 399a2d1c673SSatish Balay } 400a2d1c673SSatish Balay stash->n++; 4019417f4adSLois Curfman McInnes } 4023a40ed3dSBarry Smith PetscFunctionReturn(0); 4039417f4adSLois Curfman McInnes } 4044c1ff481SSatish Balay /* 4058798bf22SSatish Balay MatStashScatterBegin_Private - Initiates the transfer of values to the 4064c1ff481SSatish Balay correct owners. This function goes through the stash, and check the 4074c1ff481SSatish Balay owners of each stashed value, and sends the values off to the owner 4084c1ff481SSatish Balay processors. 409bc5ccf88SSatish Balay 4104c1ff481SSatish Balay Input Parameters: 4114c1ff481SSatish Balay stash - the stash 4124c1ff481SSatish Balay owners - an array of size 'no-of-procs' which gives the ownership range 4134c1ff481SSatish Balay for each node. 4144c1ff481SSatish Balay 4154c1ff481SSatish Balay Notes: The 'owners' array in the cased of the blocked-stash has the 4164c1ff481SSatish Balay ranges specified blocked global indices, and for the regular stash in 4174c1ff481SSatish Balay the proper global indices. 4184c1ff481SSatish Balay */ 419bc5ccf88SSatish Balay #undef __FUNC__ 4208798bf22SSatish Balay #define __FUNC__ "MatStashScatterBegin_Private" 4218798bf22SSatish Balay int MatStashScatterBegin_Private(MatStash *stash,int *owners) 422bc5ccf88SSatish Balay { 423a2d1c673SSatish Balay int *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 424a2d1c673SSatish Balay int rank=stash->rank,size=stash->size,*nprocs,*procs,nsends,nreceives; 4254c1ff481SSatish Balay int nmax,*work,count,ierr,*sindices,*rindices,i,j,idx; 426a2d1c673SSatish Balay Scalar *rvalues,*svalues; 427bc5ccf88SSatish Balay MPI_Comm comm = stash->comm; 428bc5ccf88SSatish Balay MPI_Request *send_waits,*recv_waits; 429bc5ccf88SSatish Balay 430bc5ccf88SSatish Balay PetscFunctionBegin; 431bc5ccf88SSatish Balay 4324c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 433bc5ccf88SSatish Balay /* first count number of contributors to each processor */ 434bc5ccf88SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs); 435549d3d68SSatish Balay ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 436549d3d68SSatish Balay procs = nprocs + size; 437bc5ccf88SSatish Balay owner = (int *) PetscMalloc( (stash->n+1)*sizeof(int) );CHKPTRQ(owner); 438a2d1c673SSatish Balay 439bc5ccf88SSatish Balay for ( i=0; i<stash->n; i++ ) { 440bc5ccf88SSatish Balay idx = stash->idx[i]; 441bc5ccf88SSatish Balay for ( j=0; j<size; j++ ) { 4424c1ff481SSatish Balay if (idx >= owners[j] && idx < owners[j+1]) { 443bc5ccf88SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; break; 444bc5ccf88SSatish Balay } 445bc5ccf88SSatish Balay } 446bc5ccf88SSatish Balay } 447bc5ccf88SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 448bc5ccf88SSatish Balay 449bc5ccf88SSatish Balay /* inform other processors of number of messages and max length*/ 450*6831982aSBarry Smith work = (int *)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work); 451*6831982aSBarry Smith ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 452bc5ccf88SSatish Balay nmax = work[rank]; 453*6831982aSBarry Smith nreceives = work[size+rank]; 454606d414cSSatish Balay ierr = PetscFree(work);CHKERRQ(ierr); 455bc5ccf88SSatish Balay /* post receives: 456bc5ccf88SSatish Balay since we don't know how long each individual message is we 457bc5ccf88SSatish Balay allocate the largest needed buffer for each receive. Potentially 458bc5ccf88SSatish Balay this is a lot of wasted space. 459bc5ccf88SSatish Balay */ 460a2d1c673SSatish Balay rvalues = (Scalar *)PetscMalloc((nreceives+1)*(nmax+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(rvalues); 461a2d1c673SSatish Balay rindices = (int *) (rvalues + bs2*nreceives*nmax); 462a2d1c673SSatish Balay recv_waits = (MPI_Request *)PetscMalloc((nreceives+1)*2*sizeof(MPI_Request));CHKPTRQ(recv_waits); 463bc5ccf88SSatish Balay for ( i=0,count=0; i<nreceives; i++ ) { 464a2d1c673SSatish Balay ierr = MPI_Irecv(rvalues+bs2*nmax*i,bs2*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag1,comm, 465bc5ccf88SSatish Balay recv_waits+count++);CHKERRQ(ierr); 466bc5ccf88SSatish Balay ierr = MPI_Irecv(rindices+2*nmax*i,2*nmax,MPI_INT,MPI_ANY_SOURCE,tag2,comm, 467bc5ccf88SSatish Balay recv_waits+count++);CHKERRQ(ierr); 468bc5ccf88SSatish Balay } 469bc5ccf88SSatish Balay 470bc5ccf88SSatish Balay /* do sends: 471bc5ccf88SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 472bc5ccf88SSatish Balay the ith processor 473bc5ccf88SSatish Balay */ 474a2d1c673SSatish Balay svalues = (Scalar *)PetscMalloc((stash->n+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(svalues); 475a2d1c673SSatish Balay sindices = (int *) (svalues + bs2*stash->n); 476549d3d68SSatish Balay send_waits = (MPI_Request *) PetscMalloc(2*(nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 477bc5ccf88SSatish Balay startv = (int *) PetscMalloc(2*size*sizeof(int) );CHKPTRQ(startv); 478bc5ccf88SSatish Balay starti = startv + size; 479a2d1c673SSatish Balay /* use 2 sends the first with all_a, the next with all_i and all_j */ 480bc5ccf88SSatish Balay startv[0] = 0; starti[0] = 0; 481bc5ccf88SSatish Balay for ( i=1; i<size; i++ ) { 482bc5ccf88SSatish Balay startv[i] = startv[i-1] + nprocs[i-1]; 483bc5ccf88SSatish Balay starti[i] = starti[i-1] + nprocs[i-1]*2; 484bc5ccf88SSatish Balay } 485bc5ccf88SSatish Balay for ( i=0; i<stash->n; i++ ) { 486bc5ccf88SSatish Balay j = owner[i]; 487a2d1c673SSatish Balay if (bs2 == 1) { 488bc5ccf88SSatish Balay svalues[startv[j]] = stash->array[i]; 489a2d1c673SSatish Balay } else { 4904c1ff481SSatish Balay int k; 4914c1ff481SSatish Balay Scalar *buf1,*buf2; 4924c1ff481SSatish Balay buf1 = svalues+bs2*startv[j]; 4934c1ff481SSatish Balay buf2 = stash->array+bs2*i; 4944c1ff481SSatish Balay for ( k=0; k<bs2; k++ ){ buf1[k] = buf2[k]; } 495a2d1c673SSatish Balay } 496bc5ccf88SSatish Balay sindices[starti[j]] = stash->idx[i]; 497bc5ccf88SSatish Balay sindices[starti[j]+nprocs[j]] = stash->idy[i]; 498bc5ccf88SSatish Balay startv[j]++; 499bc5ccf88SSatish Balay starti[j]++; 500bc5ccf88SSatish Balay } 501bc5ccf88SSatish Balay startv[0] = 0; 502bc5ccf88SSatish Balay for ( i=1; i<size; i++ ) { startv[i] = startv[i-1] + nprocs[i-1];} 503bc5ccf88SSatish Balay for ( i=0,count=0; i<size; i++ ) { 504bc5ccf88SSatish Balay if (procs[i]) { 505a2d1c673SSatish Balay ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nprocs[i],MPIU_SCALAR,i,tag1,comm, 506bc5ccf88SSatish Balay send_waits+count++);CHKERRQ(ierr); 507bc5ccf88SSatish Balay ierr = MPI_Isend(sindices+2*startv[i],2*nprocs[i],MPI_INT,i,tag2,comm, 508bc5ccf88SSatish Balay send_waits+count++);CHKERRQ(ierr); 509bc5ccf88SSatish Balay } 510bc5ccf88SSatish Balay } 511606d414cSSatish Balay ierr = PetscFree(owner);CHKERRQ(ierr); 512606d414cSSatish Balay ierr = PetscFree(startv);CHKERRQ(ierr); 513a2d1c673SSatish Balay /* This memory is reused in scatter end for a different purpose*/ 514a2d1c673SSatish Balay for (i=0; i<2*size; i++ ) nprocs[i] = -1; 515a2d1c673SSatish Balay stash->nprocs = nprocs; 516a2d1c673SSatish Balay 517bc5ccf88SSatish Balay stash->svalues = svalues; stash->rvalues = rvalues; 518bc5ccf88SSatish Balay stash->nsends = nsends; stash->nrecvs = nreceives; 519bc5ccf88SSatish Balay stash->send_waits = send_waits; stash->recv_waits = recv_waits; 520bc5ccf88SSatish Balay stash->rmax = nmax; 521bc5ccf88SSatish Balay PetscFunctionReturn(0); 522bc5ccf88SSatish Balay } 523bc5ccf88SSatish Balay 524a2d1c673SSatish Balay /* 5258798bf22SSatish Balay MatStashScatterGetMesg_Private - This function waits on the receives posted 5268798bf22SSatish Balay in the function MatStashScatterBegin_Private() and returns one message at 5274c1ff481SSatish Balay a time to the calling function. If no messages are left, it indicates this 5284c1ff481SSatish Balay by setting flg = 0, else it sets flg = 1. 5294c1ff481SSatish Balay 5304c1ff481SSatish Balay Input Parameters: 5314c1ff481SSatish Balay stash - the stash 5324c1ff481SSatish Balay 5334c1ff481SSatish Balay Output Parameters: 5344c1ff481SSatish Balay nvals - the number of entries in the current message. 5354c1ff481SSatish Balay rows - an array of row indices (or blocked indices) corresponding to the values 5364c1ff481SSatish Balay cols - an array of columnindices (or blocked indices) corresponding to the values 5374c1ff481SSatish Balay vals - the values 5384c1ff481SSatish Balay flg - 0 indicates no more message left, and the current call has no values associated. 5394c1ff481SSatish Balay 1 indicates that the current call successfully received a message, and the 5404c1ff481SSatish Balay other output parameters nvals,rows,cols,vals are set appropriately. 541a2d1c673SSatish Balay */ 542bc5ccf88SSatish Balay #undef __FUNC__ 5438798bf22SSatish Balay #define __FUNC__ "MatStashScatterGetMesg_Private" 5448798bf22SSatish Balay int MatStashScatterGetMesg_Private(MatStash *stash,int *nvals,int **rows,int** cols,Scalar **vals,int *flg) 545bc5ccf88SSatish Balay { 546a2d1c673SSatish Balay int i,ierr,size=stash->size,*flg_v,*flg_i; 547a2d1c673SSatish Balay int i1,i2,*rindices,match_found=0,bs2; 548a2d1c673SSatish Balay MPI_Status recv_status; 549bc5ccf88SSatish Balay 550bc5ccf88SSatish Balay PetscFunctionBegin; 551bc5ccf88SSatish Balay 552a2d1c673SSatish Balay *flg = 0; /* When a message is discovered this is reset to 1 */ 553a2d1c673SSatish Balay /* Return if no more messages to process */ 554a2d1c673SSatish Balay if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); } 555a2d1c673SSatish Balay 556a2d1c673SSatish Balay flg_v = stash->nprocs; 557a2d1c673SSatish Balay flg_i = flg_v + size; 5584c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 559a2d1c673SSatish Balay /* If a matching pair of receieves are found, process them, and return the data to 560a2d1c673SSatish Balay the calling function. Until then keep receiving messages */ 561a2d1c673SSatish Balay while (!match_found) { 562a2d1c673SSatish Balay ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr); 563a2d1c673SSatish Balay /* Now pack the received message into a structure which is useable by others */ 564a2d1c673SSatish Balay if (i % 2) { 565a2d1c673SSatish Balay ierr = MPI_Get_count(&recv_status,MPI_INT,nvals);CHKERRQ(ierr); 566a2d1c673SSatish Balay flg_i[recv_status.MPI_SOURCE] = i/2; 567a2d1c673SSatish Balay *nvals = *nvals/2; /* This message has both row indices and col indices */ 568a2d1c673SSatish Balay } else { 569a2d1c673SSatish Balay ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr); 570a2d1c673SSatish Balay flg_v[recv_status.MPI_SOURCE] = i/2; 571a2d1c673SSatish Balay *nvals = *nvals/bs2; 572bc5ccf88SSatish Balay } 573a2d1c673SSatish Balay 574a2d1c673SSatish Balay /* Check if we have both the messages from this proc */ 575a2d1c673SSatish Balay i1 = flg_v[recv_status.MPI_SOURCE]; 576a2d1c673SSatish Balay i2 = flg_i[recv_status.MPI_SOURCE]; 577a2d1c673SSatish Balay if (i1 != -1 && i2 != -1) { 578a2d1c673SSatish Balay rindices = (int *) (stash->rvalues + bs2*stash->rmax*stash->nrecvs); 579a2d1c673SSatish Balay *rows = rindices + 2*i2*stash->rmax; 580a2d1c673SSatish Balay *cols = *rows + *nvals; 581a2d1c673SSatish Balay *vals = stash->rvalues + i1*bs2*stash->rmax; 582a2d1c673SSatish Balay *flg = 1; 583a2d1c673SSatish Balay stash->nprocessed ++; 584a2d1c673SSatish Balay match_found = 1; 585bc5ccf88SSatish Balay } 586bc5ccf88SSatish Balay } 587bc5ccf88SSatish Balay PetscFunctionReturn(0); 588bc5ccf88SSatish Balay } 589