1*0ae3cd3bSBarry Smith /*$Id: matstash.c,v 1.42 2000/04/12 04:24:13 bsmith Exp bsmith $*/ 22d5177cdSBarry Smith 370f55243SBarry Smith #include "src/mat/matimpl.h" 49417f4adSLois Curfman McInnes 53eda8832SBarry Smith /* 6*0ae3cd3bSBarry Smith The input to the stash is ALWAYS in MatScalar precision, and the 7*0ae3cd3bSBarry Smith internal storage and output is also in MatScalar. 83eda8832SBarry Smith */ 9bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE 10000 104c1ff481SSatish Balay 119417f4adSLois Curfman McInnes /* 128798bf22SSatish Balay MatStashCreate_Private - Creates a stash,currently used for all the parallel 134c1ff481SSatish Balay matrix implementations. The stash is where elements of a matrix destined 144c1ff481SSatish Balay to be stored on other processors are kept until matrix assembly is done. 159417f4adSLois Curfman McInnes 164c1ff481SSatish Balay This is a simple minded stash. Simply adds entries to end of stash. 174c1ff481SSatish Balay 184c1ff481SSatish Balay Input Parameters: 194c1ff481SSatish Balay comm - communicator, required for scatters. 204c1ff481SSatish Balay bs - stash block size. used when stashing blocks of values 214c1ff481SSatish Balay 224c1ff481SSatish Balay Output Parameters: 234c1ff481SSatish Balay stash - the newly created stash 249417f4adSLois Curfman McInnes */ 255615d1e5SSatish Balay #undef __FUNC__ 26b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatStashCreate_Private" 278798bf22SSatish Balay int MatStashCreate_Private(MPI_Comm comm,int bs,MatStash *stash) 289417f4adSLois Curfman McInnes { 29f1af5d2fSBarry Smith int ierr,max,*opt,nopt; 30f1af5d2fSBarry Smith PetscTruth flg; 31bc5ccf88SSatish Balay 323a40ed3dSBarry Smith PetscFunctionBegin; 33bc5ccf88SSatish Balay /* Require 2 tags,get the second using PetscCommGetNewTag() */ 34bc5ccf88SSatish Balay ierr = PetscCommDuplicate_Private(comm,&stash->comm,&stash->tag1);CHKERRQ(ierr); 35a2d1c673SSatish Balay ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr); 36a2d1c673SSatish Balay ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr); 37a2d1c673SSatish Balay ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr); 38bc5ccf88SSatish Balay 39434d7ff9SSatish Balay nopt = stash->size; 40434d7ff9SSatish Balay opt = (int*)PetscMalloc(nopt*sizeof(int));CHKPTRQ(opt); 41434d7ff9SSatish Balay ierr = OptionsGetIntArray(PETSC_NULL,"-vecstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr); 42434d7ff9SSatish Balay if (flg) { 43434d7ff9SSatish Balay if (nopt == 1) max = opt[0]; 44434d7ff9SSatish Balay else if (nopt == stash->size) max = opt[stash->rank]; 45434d7ff9SSatish Balay else if (stash->rank < nopt) max = opt[stash->rank]; 46f4ab19daSSatish Balay else max = 0; /* Use default */ 47434d7ff9SSatish Balay stash->umax = max; 48434d7ff9SSatish Balay } else { 49434d7ff9SSatish Balay stash->umax = 0; 50434d7ff9SSatish Balay } 51606d414cSSatish Balay ierr = PetscFree(opt);CHKERRQ(ierr); 524c1ff481SSatish Balay if (bs <= 0) bs = 1; 53a2d1c673SSatish Balay 544c1ff481SSatish Balay stash->bs = bs; 559417f4adSLois Curfman McInnes stash->nmax = 0; 56434d7ff9SSatish Balay stash->oldnmax = 0; 579417f4adSLois Curfman McInnes stash->n = 0; 584c1ff481SSatish Balay stash->reallocs = -1; 599417f4adSLois Curfman McInnes stash->idx = 0; 609417f4adSLois Curfman McInnes stash->idy = 0; 61bc5ccf88SSatish Balay stash->array = 0; 629417f4adSLois Curfman McInnes 63bc5ccf88SSatish Balay stash->send_waits = 0; 64bc5ccf88SSatish Balay stash->recv_waits = 0; 65a2d1c673SSatish Balay stash->send_status = 0; 66bc5ccf88SSatish Balay stash->nsends = 0; 67bc5ccf88SSatish Balay stash->nrecvs = 0; 68bc5ccf88SSatish Balay stash->svalues = 0; 69bc5ccf88SSatish Balay stash->rvalues = 0; 70bc5ccf88SSatish Balay stash->rmax = 0; 71a2d1c673SSatish Balay stash->nprocs = 0; 72a2d1c673SSatish Balay stash->nprocessed = 0; 733a40ed3dSBarry Smith PetscFunctionReturn(0); 749417f4adSLois Curfman McInnes } 759417f4adSLois Curfman McInnes 764c1ff481SSatish Balay /* 778798bf22SSatish Balay MatStashDestroy_Private - Destroy the stash 784c1ff481SSatish Balay */ 795615d1e5SSatish Balay #undef __FUNC__ 80b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatStashDestroy_Private" 818798bf22SSatish Balay int MatStashDestroy_Private(MatStash *stash) 829417f4adSLois Curfman McInnes { 83bc5ccf88SSatish Balay int ierr; 84a2d1c673SSatish Balay 85bc5ccf88SSatish Balay PetscFunctionBegin; 86bc5ccf88SSatish Balay ierr = PetscCommDestroy_Private(&stash->comm);CHKERRQ(ierr); 87606d414cSSatish Balay if (stash->array) { 88606d414cSSatish Balay ierr = PetscFree(stash->array);CHKERRQ(ierr); 89606d414cSSatish Balay stash->array = 0; 90606d414cSSatish Balay } 91bc5ccf88SSatish Balay PetscFunctionReturn(0); 92bc5ccf88SSatish Balay } 93bc5ccf88SSatish Balay 944c1ff481SSatish Balay /* 958798bf22SSatish Balay MatStashScatterEnd_Private - This is called as the fial stage of 964c1ff481SSatish Balay scatter. The final stages of messagepassing is done here, and 974c1ff481SSatish Balay all the memory used for messagepassing is cleanedu up. This 984c1ff481SSatish Balay routine also resets the stash, and deallocates the memory used 994c1ff481SSatish Balay for the stash. It also keeps track of the current memory usage 1004c1ff481SSatish Balay so that the same value can be used the next time through. 1014c1ff481SSatish Balay */ 102bc5ccf88SSatish Balay #undef __FUNC__ 103b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatStashScatterEnd_Private" 1048798bf22SSatish Balay int MatStashScatterEnd_Private(MatStash *stash) 105bc5ccf88SSatish Balay { 106434d7ff9SSatish Balay int nsends=stash->nsends,ierr,bs2,oldnmax; 107a2d1c673SSatish Balay MPI_Status *send_status; 108a2d1c673SSatish Balay 1093a40ed3dSBarry Smith PetscFunctionBegin; 110a2d1c673SSatish Balay /* wait on sends */ 111a2d1c673SSatish Balay if (nsends) { 112a2d1c673SSatish Balay send_status = (MPI_Status *)PetscMalloc(2*nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 113a2d1c673SSatish Balay ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr); 114606d414cSSatish Balay ierr = PetscFree(send_status);CHKERRQ(ierr); 115a2d1c673SSatish Balay } 116a2d1c673SSatish Balay 117c0c58ca7SSatish Balay /* Now update nmaxold to be app 10% more than max n used, this way the 118434d7ff9SSatish Balay wastage of space is reduced the next time this stash is used. 119434d7ff9SSatish Balay Also update the oldmax, only if it increases */ 12094b769a5SSatish Balay bs2 = stash->bs*stash->bs; 1218a9378f0SSatish Balay oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 122434d7ff9SSatish Balay if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 123434d7ff9SSatish Balay 124d07ff455SSatish Balay stash->nmax = 0; 125d07ff455SSatish Balay stash->n = 0; 1264c1ff481SSatish Balay stash->reallocs = -1; 127bc5ccf88SSatish Balay stash->rmax = 0; 128a2d1c673SSatish Balay stash->nprocessed = 0; 129bc5ccf88SSatish Balay 130bc5ccf88SSatish Balay if (stash->array) { 131606d414cSSatish Balay ierr = PetscFree(stash->array);CHKERRQ(ierr); 132bc5ccf88SSatish Balay stash->array = 0; 133bc5ccf88SSatish Balay stash->idx = 0; 134bc5ccf88SSatish Balay stash->idy = 0; 135bc5ccf88SSatish Balay } 136606d414cSSatish Balay if (stash->send_waits) { 137606d414cSSatish Balay ierr = PetscFree(stash->send_waits);CHKERRQ(ierr); 138606d414cSSatish Balay stash->send_waits = 0; 139606d414cSSatish Balay } 140606d414cSSatish Balay if (stash->recv_waits) { 141606d414cSSatish Balay ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr); 142606d414cSSatish Balay stash->recv_waits = 0; 143606d414cSSatish Balay } 144606d414cSSatish Balay if (stash->svalues) { 145606d414cSSatish Balay ierr = PetscFree(stash->svalues);CHKERRQ(ierr); 146606d414cSSatish Balay stash->svalues = 0; 147606d414cSSatish Balay } 148606d414cSSatish Balay if (stash->rvalues) { 149606d414cSSatish Balay ierr = PetscFree(stash->rvalues);CHKERRQ(ierr); 150606d414cSSatish Balay stash->rvalues = 0; 151606d414cSSatish Balay } 152606d414cSSatish Balay if (stash->nprocs) { 153606d414cSSatish Balay ierr = PetscFree(stash->nprocs); 154606d414cSSatish Balay stash->nprocs = 0; 155606d414cSSatish Balay } 156bc5ccf88SSatish Balay 1573a40ed3dSBarry Smith PetscFunctionReturn(0); 1589417f4adSLois Curfman McInnes } 1599417f4adSLois Curfman McInnes 1604c1ff481SSatish Balay /* 1618798bf22SSatish Balay MatStashGetInfo_Private - Gets the relavant statistics of the stash 1624c1ff481SSatish Balay 1634c1ff481SSatish Balay Input Parameters: 1644c1ff481SSatish Balay stash - the stash 16594b769a5SSatish Balay nstash - the size of the stash. Indicates the number of values stored. 1664c1ff481SSatish Balay reallocs - the number of additional mallocs incurred. 1674c1ff481SSatish Balay 1684c1ff481SSatish Balay */ 1695615d1e5SSatish Balay #undef __FUNC__ 170b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatStashGetInfo_Private" 1718798bf22SSatish Balay int MatStashGetInfo_Private(MatStash *stash,int *nstash,int *reallocs) 17297530c3fSBarry Smith { 17394b769a5SSatish Balay int bs2 = stash->bs*stash->bs; 17494b769a5SSatish Balay 1753a40ed3dSBarry Smith PetscFunctionBegin; 17694b769a5SSatish Balay *nstash = stash->n*bs2; 177434d7ff9SSatish Balay if (stash->reallocs < 0) *reallocs = 0; 178434d7ff9SSatish Balay else *reallocs = stash->reallocs; 179bc5ccf88SSatish Balay PetscFunctionReturn(0); 180bc5ccf88SSatish Balay } 1814c1ff481SSatish Balay 1824c1ff481SSatish Balay 1834c1ff481SSatish Balay /* 1848798bf22SSatish Balay MatStashSetInitialSize_Private - Sets the initial size of the stash 1854c1ff481SSatish Balay 1864c1ff481SSatish Balay Input Parameters: 1874c1ff481SSatish Balay stash - the stash 1884c1ff481SSatish Balay max - the value that is used as the max size of the stash. 1894c1ff481SSatish Balay this value is used while allocating memory. 1904c1ff481SSatish Balay */ 191bc5ccf88SSatish Balay #undef __FUNC__ 192b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatStashSetInitialSize_Private" 1938798bf22SSatish Balay int MatStashSetInitialSize_Private(MatStash *stash,int max) 194bc5ccf88SSatish Balay { 195bc5ccf88SSatish Balay PetscFunctionBegin; 196434d7ff9SSatish Balay stash->umax = max; 1973a40ed3dSBarry Smith PetscFunctionReturn(0); 19897530c3fSBarry Smith } 19997530c3fSBarry Smith 2008798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called 2014c1ff481SSatish Balay when the space in the stash is not sufficient to add the new values 2024c1ff481SSatish Balay being inserted into the stash. 2034c1ff481SSatish Balay 2044c1ff481SSatish Balay Input Parameters: 2054c1ff481SSatish Balay stash - the stash 2064c1ff481SSatish Balay incr - the minimum increase requested 2074c1ff481SSatish Balay 2084c1ff481SSatish Balay Notes: 2094c1ff481SSatish Balay This routine doubles the currently used memory. 2104c1ff481SSatish Balay */ 2115615d1e5SSatish Balay #undef __FUNC__ 212b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatStashExpand_Private" 2138798bf22SSatish Balay static int MatStashExpand_Private(MatStash *stash,int incr) 2149417f4adSLois Curfman McInnes { 215549d3d68SSatish Balay int *n_idx,*n_idy,newnmax,bs2,ierr; 2163eda8832SBarry Smith MatScalar *n_array; 2179417f4adSLois Curfman McInnes 2183a40ed3dSBarry Smith PetscFunctionBegin; 2199417f4adSLois Curfman McInnes /* allocate a larger stash */ 22094b769a5SSatish Balay bs2 = stash->bs*stash->bs; 221c481ceb5SSatish Balay if (!stash->oldnmax && !stash->nmax) { /* new stash */ 222434d7ff9SSatish Balay if (stash->umax) newnmax = stash->umax/bs2; 223434d7ff9SSatish Balay else newnmax = DEFAULT_STASH_SIZE/bs2; 224c481ceb5SSatish Balay } else if (!stash->nmax) { /* resuing stash */ 225434d7ff9SSatish Balay if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2; 226434d7ff9SSatish Balay else newnmax = stash->oldnmax/bs2; 227434d7ff9SSatish Balay } else newnmax = stash->nmax*2; 2284c1ff481SSatish Balay if (newnmax < (stash->nmax + incr)) newnmax += 2*incr; 229d07ff455SSatish Balay 2303eda8832SBarry Smith n_array = (MatScalar*)PetscMalloc((newnmax)*(2*sizeof(int)+bs2*sizeof(MatScalar)));CHKPTRQ(n_array); 231a2d1c673SSatish Balay n_idx = (int*)(n_array + bs2*newnmax); 232d07ff455SSatish Balay n_idy = (int*)(n_idx + newnmax); 2333eda8832SBarry Smith ierr = PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(MatScalar));CHKERRQ(ierr); 234549d3d68SSatish Balay ierr = PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(int));CHKERRQ(ierr); 235549d3d68SSatish Balay ierr = PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(int));CHKERRQ(ierr); 236606d414cSSatish Balay if (stash->array) {ierr = PetscFree(stash->array);CHKERRQ(ierr);} 237d07ff455SSatish Balay stash->array = n_array; 238d07ff455SSatish Balay stash->idx = n_idx; 239d07ff455SSatish Balay stash->idy = n_idy; 240d07ff455SSatish Balay stash->nmax = newnmax; 241bc5ccf88SSatish Balay stash->reallocs++; 242bc5ccf88SSatish Balay PetscFunctionReturn(0); 243bc5ccf88SSatish Balay } 244bc5ccf88SSatish Balay /* 2458798bf22SSatish Balay MatStashValuesRow_Private - inserts values into the stash. This function 2464c1ff481SSatish Balay expects the values to be roworiented. Multiple columns belong to the same row 2474c1ff481SSatish Balay can be inserted with a single call to this function. 2484c1ff481SSatish Balay 2494c1ff481SSatish Balay Input Parameters: 2504c1ff481SSatish Balay stash - the stash 2514c1ff481SSatish Balay row - the global row correspoiding to the values 2524c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2534c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2544c1ff481SSatish Balay values - the values inserted 255bc5ccf88SSatish Balay */ 256bc5ccf88SSatish Balay #undef __FUNC__ 257b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatStashValuesRow_Private" 258*0ae3cd3bSBarry Smith int MatStashValuesRow_Private(MatStash *stash,int row,int n,int *idxn,MatScalar *values) 259bc5ccf88SSatish Balay { 260a2d1c673SSatish Balay int ierr,i; 261bc5ccf88SSatish Balay 262bc5ccf88SSatish Balay PetscFunctionBegin; 2634c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 2644c1ff481SSatish Balay if ((stash->n + n) > stash->nmax) { 2658798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 2669417f4adSLois Curfman McInnes } 2674c1ff481SSatish Balay for (i=0; i<n; i++) { 2689417f4adSLois Curfman McInnes stash->idx[stash->n] = row; 269a2d1c673SSatish Balay stash->idy[stash->n] = idxn[i]; 270*0ae3cd3bSBarry Smith stash->array[stash->n] = values[i]; 271a2d1c673SSatish Balay stash->n++; 2729417f4adSLois Curfman McInnes } 273a2d1c673SSatish Balay PetscFunctionReturn(0); 274a2d1c673SSatish Balay } 2754c1ff481SSatish Balay /* 2768798bf22SSatish Balay MatStashValuesCol_Private - inserts values into the stash. This function 2774c1ff481SSatish Balay expects the values to be columnoriented. Multiple columns belong to the same row 2784c1ff481SSatish Balay can be inserted with a single call to this function. 279a2d1c673SSatish Balay 2804c1ff481SSatish Balay Input Parameters: 2814c1ff481SSatish Balay stash - the stash 2824c1ff481SSatish Balay row - the global row correspoiding to the values 2834c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2844c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2854c1ff481SSatish Balay values - the values inserted 2864c1ff481SSatish Balay stepval - the consecutive values are sepated by a distance of stepval. 2874c1ff481SSatish Balay this happens because the input is columnoriented. 2884c1ff481SSatish Balay */ 289a2d1c673SSatish Balay #undef __FUNC__ 290b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatStashValuesCol_Private" 291*0ae3cd3bSBarry Smith int MatStashValuesCol_Private(MatStash *stash,int row,int n,int *idxn,MatScalar *values,int stepval) 292a2d1c673SSatish Balay { 2934c1ff481SSatish Balay int ierr,i; 294a2d1c673SSatish Balay 2954c1ff481SSatish Balay PetscFunctionBegin; 2964c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 2974c1ff481SSatish Balay if ((stash->n + n) > stash->nmax) { 2988798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 2994c1ff481SSatish Balay } 3004c1ff481SSatish Balay for (i=0; i<n; i++) { 3014c1ff481SSatish Balay stash->idx[stash->n] = row; 3024c1ff481SSatish Balay stash->idy[stash->n] = idxn[i]; 303*0ae3cd3bSBarry Smith stash->array[stash->n] = values[i*stepval]; 3044c1ff481SSatish Balay stash->n++; 3054c1ff481SSatish Balay } 3064c1ff481SSatish Balay PetscFunctionReturn(0); 3074c1ff481SSatish Balay } 3084c1ff481SSatish Balay 3094c1ff481SSatish Balay /* 3108798bf22SSatish Balay MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 3114c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 3124c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3134c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3144c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3154c1ff481SSatish Balay 3164c1ff481SSatish Balay Input Parameters: 3174c1ff481SSatish Balay stash - the stash 3184c1ff481SSatish Balay row - the global block-row correspoiding to the values 3194c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3204c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3214c1ff481SSatish Balay values. Each block is of size bs*bs. 3224c1ff481SSatish Balay values - the values inserted 3234c1ff481SSatish Balay rmax - the number of block-rows in the original block. 3244c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 3254c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3264c1ff481SSatish Balay */ 3274c1ff481SSatish Balay #undef __FUNC__ 328b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatStashValuesRowBlocked_Private" 329*0ae3cd3bSBarry Smith int MatStashValuesRowBlocked_Private(MatStash *stash,int row,int n,int *idxn,MatScalar *values,int rmax,int cmax,int idx) 3304c1ff481SSatish Balay { 3314c1ff481SSatish Balay int ierr,i,j,k,bs2,bs=stash->bs; 332*0ae3cd3bSBarry Smith MatScalar *vals,*array; 333a2d1c673SSatish Balay 334a2d1c673SSatish Balay PetscFunctionBegin; 335a2d1c673SSatish Balay bs2 = bs*bs; 3364c1ff481SSatish Balay if ((stash->n+n) > stash->nmax) { 3378798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 338a2d1c673SSatish Balay } 3394c1ff481SSatish Balay for (i=0; i<n; i++) { 340a2d1c673SSatish Balay stash->idx[stash->n] = row; 341a2d1c673SSatish Balay stash->idy[stash->n] = idxn[i]; 342a2d1c673SSatish Balay /* Now copy over the block of values. Store the values column oriented. 343a2d1c673SSatish Balay This enables inserting multiple blocks belonging to a row with a single 344a2d1c673SSatish Balay funtion call */ 345a2d1c673SSatish Balay array = stash->array + bs2*stash->n; 346a2d1c673SSatish Balay vals = values + idx*bs2*n + bs*i; 347a2d1c673SSatish Balay for (j=0; j<bs; j++) { 348*0ae3cd3bSBarry Smith for (k=0; k<bs; k++) {array[k*bs] = vals[k];} 349a2d1c673SSatish Balay array += 1; 350a2d1c673SSatish Balay vals += cmax*bs; 351a2d1c673SSatish Balay } 3524c1ff481SSatish Balay stash->n++; 3534c1ff481SSatish Balay } 3544c1ff481SSatish Balay PetscFunctionReturn(0); 3554c1ff481SSatish Balay } 3564c1ff481SSatish Balay 3574c1ff481SSatish Balay /* 3588798bf22SSatish Balay MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 3594c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 3604c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3614c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3624c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3634c1ff481SSatish Balay 3644c1ff481SSatish Balay Input Parameters: 3654c1ff481SSatish Balay stash - the stash 3664c1ff481SSatish Balay row - the global block-row correspoiding to the values 3674c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3684c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3694c1ff481SSatish Balay values. Each block is of size bs*bs. 3704c1ff481SSatish Balay values - the values inserted 3714c1ff481SSatish Balay rmax - the number of block-rows in the original block. 3724c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 3734c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3744c1ff481SSatish Balay */ 3754c1ff481SSatish Balay #undef __FUNC__ 376b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatStashValuesColBlocked_Private" 377*0ae3cd3bSBarry Smith int MatStashValuesColBlocked_Private(MatStash *stash,int row,int n,int *idxn,MatScalar *values,int rmax,int cmax,int idx) 3784c1ff481SSatish Balay { 3794c1ff481SSatish Balay int ierr,i,j,k,bs2,bs=stash->bs; 380*0ae3cd3bSBarry Smith MatScalar *vals,*array; 3814c1ff481SSatish Balay 3824c1ff481SSatish Balay PetscFunctionBegin; 3834c1ff481SSatish Balay bs2 = bs*bs; 3844c1ff481SSatish Balay if ((stash->n+n) > stash->nmax) { 3858798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 3864c1ff481SSatish Balay } 3874c1ff481SSatish Balay for (i=0; i<n; i++) { 3884c1ff481SSatish Balay stash->idx[stash->n] = row; 3894c1ff481SSatish Balay stash->idy[stash->n] = idxn[i]; 3904c1ff481SSatish Balay /* Now copy over the block of values. Store the values column oriented. 3914c1ff481SSatish Balay This enables inserting multiple blocks belonging to a row with a single 3924c1ff481SSatish Balay funtion call */ 393a2d1c673SSatish Balay array = stash->array + bs2*stash->n; 394a2d1c673SSatish Balay vals = values + idx*bs + bs2*rmax*i; 395a2d1c673SSatish Balay for (j=0; j<bs; j++) { 396*0ae3cd3bSBarry Smith for (k=0; k<bs; k++) {array[k] = vals[k];} 397a2d1c673SSatish Balay array += bs; 398a2d1c673SSatish Balay vals += rmax*bs; 399a2d1c673SSatish Balay } 400a2d1c673SSatish Balay stash->n++; 4019417f4adSLois Curfman McInnes } 4023a40ed3dSBarry Smith PetscFunctionReturn(0); 4039417f4adSLois Curfman McInnes } 4044c1ff481SSatish Balay /* 4058798bf22SSatish Balay MatStashScatterBegin_Private - Initiates the transfer of values to the 4064c1ff481SSatish Balay correct owners. This function goes through the stash, and check the 4074c1ff481SSatish Balay owners of each stashed value, and sends the values off to the owner 4084c1ff481SSatish Balay processors. 409bc5ccf88SSatish Balay 4104c1ff481SSatish Balay Input Parameters: 4114c1ff481SSatish Balay stash - the stash 4124c1ff481SSatish Balay owners - an array of size 'no-of-procs' which gives the ownership range 4134c1ff481SSatish Balay for each node. 4144c1ff481SSatish Balay 4154c1ff481SSatish Balay Notes: The 'owners' array in the cased of the blocked-stash has the 4164c1ff481SSatish Balay ranges specified blocked global indices, and for the regular stash in 4174c1ff481SSatish Balay the proper global indices. 4184c1ff481SSatish Balay */ 419bc5ccf88SSatish Balay #undef __FUNC__ 420b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatStashScatterBegin_Private" 4218798bf22SSatish Balay int MatStashScatterBegin_Private(MatStash *stash,int *owners) 422bc5ccf88SSatish Balay { 423a2d1c673SSatish Balay int *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 424a2d1c673SSatish Balay int rank=stash->rank,size=stash->size,*nprocs,*procs,nsends,nreceives; 4254c1ff481SSatish Balay int nmax,*work,count,ierr,*sindices,*rindices,i,j,idx; 4263eda8832SBarry Smith MatScalar *rvalues,*svalues; 427bc5ccf88SSatish Balay MPI_Comm comm = stash->comm; 428bc5ccf88SSatish Balay MPI_Request *send_waits,*recv_waits; 429bc5ccf88SSatish Balay 430bc5ccf88SSatish Balay PetscFunctionBegin; 431bc5ccf88SSatish Balay 4324c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 433bc5ccf88SSatish Balay /* first count number of contributors to each processor */ 434bc5ccf88SSatish Balay nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs); 435549d3d68SSatish Balay ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 436549d3d68SSatish Balay procs = nprocs + size; 437bc5ccf88SSatish Balay owner = (int*)PetscMalloc((stash->n+1)*sizeof(int));CHKPTRQ(owner); 438a2d1c673SSatish Balay 439bc5ccf88SSatish Balay for (i=0; i<stash->n; i++) { 440bc5ccf88SSatish Balay idx = stash->idx[i]; 441bc5ccf88SSatish Balay for (j=0; j<size; j++) { 4424c1ff481SSatish Balay if (idx >= owners[j] && idx < owners[j+1]) { 443bc5ccf88SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; break; 444bc5ccf88SSatish Balay } 445bc5ccf88SSatish Balay } 446bc5ccf88SSatish Balay } 447bc5ccf88SSatish Balay nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 448bc5ccf88SSatish Balay 449bc5ccf88SSatish Balay /* inform other processors of number of messages and max length*/ 4506831982aSBarry Smith work = (int *)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work); 4516831982aSBarry Smith ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 452bc5ccf88SSatish Balay nmax = work[rank]; 4536831982aSBarry Smith nreceives = work[size+rank]; 454606d414cSSatish Balay ierr = PetscFree(work);CHKERRQ(ierr); 455bc5ccf88SSatish Balay /* post receives: 456bc5ccf88SSatish Balay since we don't know how long each individual message is we 457bc5ccf88SSatish Balay allocate the largest needed buffer for each receive. Potentially 458bc5ccf88SSatish Balay this is a lot of wasted space. 459bc5ccf88SSatish Balay */ 4603eda8832SBarry Smith rvalues = (MatScalar*)PetscMalloc((nreceives+1)*(nmax+1)*(bs2*sizeof(MatScalar)+2*sizeof(int)));CHKPTRQ(rvalues); 461a2d1c673SSatish Balay rindices = (int*)(rvalues + bs2*nreceives*nmax); 462a2d1c673SSatish Balay recv_waits = (MPI_Request *)PetscMalloc((nreceives+1)*2*sizeof(MPI_Request));CHKPTRQ(recv_waits); 463bc5ccf88SSatish Balay for (i=0,count=0; i<nreceives; i++) { 4643eda8832SBarry Smith ierr = MPI_Irecv(rvalues+bs2*nmax*i,bs2*nmax,MPIU_MATSCALAR,MPI_ANY_SOURCE,tag1,comm, 465bc5ccf88SSatish Balay recv_waits+count++);CHKERRQ(ierr); 466*0ae3cd3bSBarry Smith ierr = MPI_Irecv(rindices+2*nmax*i,2*nmax,MPI_INT,MPI_ANY_SOURCE,tag2,comm,recv_waits+count++);CHKERRQ(ierr); 467bc5ccf88SSatish Balay } 468bc5ccf88SSatish Balay 469bc5ccf88SSatish Balay /* do sends: 470bc5ccf88SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 471bc5ccf88SSatish Balay the ith processor 472bc5ccf88SSatish Balay */ 4733eda8832SBarry Smith svalues = (MatScalar*)PetscMalloc((stash->n+1)*(bs2*sizeof(MatScalar)+2*sizeof(int)));CHKPTRQ(svalues); 474a2d1c673SSatish Balay sindices = (int*)(svalues + bs2*stash->n); 475549d3d68SSatish Balay send_waits = (MPI_Request*)PetscMalloc(2*(nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 476bc5ccf88SSatish Balay startv = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(startv); 477bc5ccf88SSatish Balay starti = startv + size; 478a2d1c673SSatish Balay /* use 2 sends the first with all_a, the next with all_i and all_j */ 479bc5ccf88SSatish Balay startv[0] = 0; starti[0] = 0; 480bc5ccf88SSatish Balay for (i=1; i<size; i++) { 481bc5ccf88SSatish Balay startv[i] = startv[i-1] + nprocs[i-1]; 482bc5ccf88SSatish Balay starti[i] = starti[i-1] + nprocs[i-1]*2; 483bc5ccf88SSatish Balay } 484bc5ccf88SSatish Balay for (i=0; i<stash->n; i++) { 485bc5ccf88SSatish Balay j = owner[i]; 486a2d1c673SSatish Balay if (bs2 == 1) { 487bc5ccf88SSatish Balay svalues[startv[j]] = stash->array[i]; 488a2d1c673SSatish Balay } else { 4894c1ff481SSatish Balay int k; 4903eda8832SBarry Smith MatScalar *buf1,*buf2; 4914c1ff481SSatish Balay buf1 = svalues+bs2*startv[j]; 4924c1ff481SSatish Balay buf2 = stash->array+bs2*i; 4934c1ff481SSatish Balay for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; } 494a2d1c673SSatish Balay } 495bc5ccf88SSatish Balay sindices[starti[j]] = stash->idx[i]; 496bc5ccf88SSatish Balay sindices[starti[j]+nprocs[j]] = stash->idy[i]; 497bc5ccf88SSatish Balay startv[j]++; 498bc5ccf88SSatish Balay starti[j]++; 499bc5ccf88SSatish Balay } 500bc5ccf88SSatish Balay startv[0] = 0; 501bc5ccf88SSatish Balay for (i=1; i<size; i++) { startv[i] = startv[i-1] + nprocs[i-1];} 502bc5ccf88SSatish Balay for (i=0,count=0; i<size; i++) { 503bc5ccf88SSatish Balay if (procs[i]) { 5043eda8832SBarry Smith ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nprocs[i],MPIU_MATSCALAR,i,tag1,comm, 505bc5ccf88SSatish Balay send_waits+count++);CHKERRQ(ierr); 506bc5ccf88SSatish Balay ierr = MPI_Isend(sindices+2*startv[i],2*nprocs[i],MPI_INT,i,tag2,comm, 507bc5ccf88SSatish Balay send_waits+count++);CHKERRQ(ierr); 508bc5ccf88SSatish Balay } 509bc5ccf88SSatish Balay } 510606d414cSSatish Balay ierr = PetscFree(owner);CHKERRQ(ierr); 511606d414cSSatish Balay ierr = PetscFree(startv);CHKERRQ(ierr); 512a2d1c673SSatish Balay /* This memory is reused in scatter end for a different purpose*/ 513a2d1c673SSatish Balay for (i=0; i<2*size; i++) nprocs[i] = -1; 514a2d1c673SSatish Balay stash->nprocs = nprocs; 515a2d1c673SSatish Balay 516bc5ccf88SSatish Balay stash->svalues = svalues; stash->rvalues = rvalues; 517bc5ccf88SSatish Balay stash->nsends = nsends; stash->nrecvs = nreceives; 518bc5ccf88SSatish Balay stash->send_waits = send_waits; stash->recv_waits = recv_waits; 519bc5ccf88SSatish Balay stash->rmax = nmax; 520bc5ccf88SSatish Balay PetscFunctionReturn(0); 521bc5ccf88SSatish Balay } 522bc5ccf88SSatish Balay 523a2d1c673SSatish Balay /* 5248798bf22SSatish Balay MatStashScatterGetMesg_Private - This function waits on the receives posted 5258798bf22SSatish Balay in the function MatStashScatterBegin_Private() and returns one message at 5264c1ff481SSatish Balay a time to the calling function. If no messages are left, it indicates this 5274c1ff481SSatish Balay by setting flg = 0, else it sets flg = 1. 5284c1ff481SSatish Balay 5294c1ff481SSatish Balay Input Parameters: 5304c1ff481SSatish Balay stash - the stash 5314c1ff481SSatish Balay 5324c1ff481SSatish Balay Output Parameters: 5334c1ff481SSatish Balay nvals - the number of entries in the current message. 5344c1ff481SSatish Balay rows - an array of row indices (or blocked indices) corresponding to the values 5354c1ff481SSatish Balay cols - an array of columnindices (or blocked indices) corresponding to the values 5364c1ff481SSatish Balay vals - the values 5374c1ff481SSatish Balay flg - 0 indicates no more message left, and the current call has no values associated. 5384c1ff481SSatish Balay 1 indicates that the current call successfully received a message, and the 5394c1ff481SSatish Balay other output parameters nvals,rows,cols,vals are set appropriately. 540a2d1c673SSatish Balay */ 541bc5ccf88SSatish Balay #undef __FUNC__ 542b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatStashScatterGetMesg_Private" 5433eda8832SBarry Smith int MatStashScatterGetMesg_Private(MatStash *stash,int *nvals,int **rows,int** cols,MatScalar **vals,int *flg) 544bc5ccf88SSatish Balay { 545*0ae3cd3bSBarry Smith int i,ierr,size=stash->size,*flg_v,*flg_i,i1,i2,*rindices,match_found=0,bs2; 546a2d1c673SSatish Balay MPI_Status recv_status; 547bc5ccf88SSatish Balay 548bc5ccf88SSatish Balay PetscFunctionBegin; 549bc5ccf88SSatish Balay 550a2d1c673SSatish Balay *flg = 0; /* When a message is discovered this is reset to 1 */ 551a2d1c673SSatish Balay /* Return if no more messages to process */ 552a2d1c673SSatish Balay if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); } 553a2d1c673SSatish Balay 554a2d1c673SSatish Balay flg_v = stash->nprocs; 555a2d1c673SSatish Balay flg_i = flg_v + size; 5564c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 557a2d1c673SSatish Balay /* If a matching pair of receieves are found, process them, and return the data to 558a2d1c673SSatish Balay the calling function. Until then keep receiving messages */ 559a2d1c673SSatish Balay while (!match_found) { 560a2d1c673SSatish Balay ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr); 561a2d1c673SSatish Balay /* Now pack the received message into a structure which is useable by others */ 562a2d1c673SSatish Balay if (i % 2) { 563a2d1c673SSatish Balay ierr = MPI_Get_count(&recv_status,MPI_INT,nvals);CHKERRQ(ierr); 564a2d1c673SSatish Balay flg_i[recv_status.MPI_SOURCE] = i/2; 565a2d1c673SSatish Balay *nvals = *nvals/2; /* This message has both row indices and col indices */ 566a2d1c673SSatish Balay } else { 5673eda8832SBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_MATSCALAR,nvals);CHKERRQ(ierr); 568a2d1c673SSatish Balay flg_v[recv_status.MPI_SOURCE] = i/2; 569a2d1c673SSatish Balay *nvals = *nvals/bs2; 570bc5ccf88SSatish Balay } 571a2d1c673SSatish Balay 572a2d1c673SSatish Balay /* Check if we have both the messages from this proc */ 573a2d1c673SSatish Balay i1 = flg_v[recv_status.MPI_SOURCE]; 574a2d1c673SSatish Balay i2 = flg_i[recv_status.MPI_SOURCE]; 575a2d1c673SSatish Balay if (i1 != -1 && i2 != -1) { 576a2d1c673SSatish Balay rindices = (int*)(stash->rvalues + bs2*stash->rmax*stash->nrecvs); 577a2d1c673SSatish Balay *rows = rindices + 2*i2*stash->rmax; 578a2d1c673SSatish Balay *cols = *rows + *nvals; 579a2d1c673SSatish Balay *vals = stash->rvalues + i1*bs2*stash->rmax; 580a2d1c673SSatish Balay *flg = 1; 581a2d1c673SSatish Balay stash->nprocessed ++; 582a2d1c673SSatish Balay match_found = 1; 583bc5ccf88SSatish Balay } 584bc5ccf88SSatish Balay } 585bc5ccf88SSatish Balay PetscFunctionReturn(0); 586bc5ccf88SSatish Balay } 587