1be1d678aSKris Buschelman #define PETSCMAT_DLL 22d5177cdSBarry Smith 370f55243SBarry Smith #include "src/mat/matimpl.h" 49417f4adSLois Curfman McInnes 53eda8832SBarry Smith /* 60ae3cd3bSBarry Smith The input to the stash is ALWAYS in MatScalar precision, and the 70ae3cd3bSBarry Smith internal storage and output is also in MatScalar. 83eda8832SBarry Smith */ 9bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE 10000 104c1ff481SSatish Balay 119417f4adSLois Curfman McInnes /* 128798bf22SSatish Balay MatStashCreate_Private - Creates a stash,currently used for all the parallel 134c1ff481SSatish Balay matrix implementations. The stash is where elements of a matrix destined 144c1ff481SSatish Balay to be stored on other processors are kept until matrix assembly is done. 159417f4adSLois Curfman McInnes 164c1ff481SSatish Balay This is a simple minded stash. Simply adds entries to end of stash. 174c1ff481SSatish Balay 184c1ff481SSatish Balay Input Parameters: 194c1ff481SSatish Balay comm - communicator, required for scatters. 204c1ff481SSatish Balay bs - stash block size. used when stashing blocks of values 214c1ff481SSatish Balay 224c1ff481SSatish Balay Output Parameters: 234c1ff481SSatish Balay stash - the newly created stash 249417f4adSLois Curfman McInnes */ 254a2ae208SSatish Balay #undef __FUNCT__ 264a2ae208SSatish Balay #define __FUNCT__ "MatStashCreate_Private" 27c1ac3661SBarry Smith PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash) 289417f4adSLois Curfman McInnes { 29dfbe8321SBarry Smith PetscErrorCode ierr; 30c1ac3661SBarry Smith PetscInt max,*opt,nopt; 31f1af5d2fSBarry Smith PetscTruth flg; 32bc5ccf88SSatish Balay 333a40ed3dSBarry Smith PetscFunctionBegin; 34bc5ccf88SSatish Balay /* Require 2 tags,get the second using PetscCommGetNewTag() */ 35752ec6e0SSatish Balay stash->comm = comm; 36752ec6e0SSatish Balay ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr); 37a2d1c673SSatish Balay ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr); 38a2d1c673SSatish Balay ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr); 39a2d1c673SSatish Balay ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr); 40bc5ccf88SSatish Balay 41434d7ff9SSatish Balay nopt = stash->size; 42d7d82daaSBarry Smith ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr); 43b0a32e0cSBarry Smith ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr); 44434d7ff9SSatish Balay if (flg) { 45434d7ff9SSatish Balay if (nopt == 1) max = opt[0]; 46434d7ff9SSatish Balay else if (nopt == stash->size) max = opt[stash->rank]; 47434d7ff9SSatish Balay else if (stash->rank < nopt) max = opt[stash->rank]; 48f4ab19daSSatish Balay else max = 0; /* Use default */ 49434d7ff9SSatish Balay stash->umax = max; 50434d7ff9SSatish Balay } else { 51434d7ff9SSatish Balay stash->umax = 0; 52434d7ff9SSatish Balay } 53606d414cSSatish Balay ierr = PetscFree(opt);CHKERRQ(ierr); 544c1ff481SSatish Balay if (bs <= 0) bs = 1; 55a2d1c673SSatish Balay 564c1ff481SSatish Balay stash->bs = bs; 579417f4adSLois Curfman McInnes stash->nmax = 0; 58434d7ff9SSatish Balay stash->oldnmax = 0; 599417f4adSLois Curfman McInnes stash->n = 0; 604c1ff481SSatish Balay stash->reallocs = -1; 619417f4adSLois Curfman McInnes stash->idx = 0; 629417f4adSLois Curfman McInnes stash->idy = 0; 63bc5ccf88SSatish Balay stash->array = 0; 649417f4adSLois Curfman McInnes 65bc5ccf88SSatish Balay stash->send_waits = 0; 66bc5ccf88SSatish Balay stash->recv_waits = 0; 67a2d1c673SSatish Balay stash->send_status = 0; 68bc5ccf88SSatish Balay stash->nsends = 0; 69bc5ccf88SSatish Balay stash->nrecvs = 0; 70bc5ccf88SSatish Balay stash->svalues = 0; 71bc5ccf88SSatish Balay stash->rvalues = 0; 72*563fb871SSatish Balay stash->rindices = 0; 73bc5ccf88SSatish Balay stash->rmax = 0; 74a2d1c673SSatish Balay stash->nprocs = 0; 75a2d1c673SSatish Balay stash->nprocessed = 0; 763a40ed3dSBarry Smith PetscFunctionReturn(0); 779417f4adSLois Curfman McInnes } 789417f4adSLois Curfman McInnes 794c1ff481SSatish Balay /* 808798bf22SSatish Balay MatStashDestroy_Private - Destroy the stash 814c1ff481SSatish Balay */ 824a2ae208SSatish Balay #undef __FUNCT__ 834a2ae208SSatish Balay #define __FUNCT__ "MatStashDestroy_Private" 84dfbe8321SBarry Smith PetscErrorCode MatStashDestroy_Private(MatStash *stash) 859417f4adSLois Curfman McInnes { 86dfbe8321SBarry Smith PetscErrorCode ierr; 87a2d1c673SSatish Balay 88bc5ccf88SSatish Balay PetscFunctionBegin; 89606d414cSSatish Balay if (stash->array) { 90606d414cSSatish Balay ierr = PetscFree(stash->array);CHKERRQ(ierr); 91606d414cSSatish Balay stash->array = 0; 92606d414cSSatish Balay } 93bc5ccf88SSatish Balay PetscFunctionReturn(0); 94bc5ccf88SSatish Balay } 95bc5ccf88SSatish Balay 964c1ff481SSatish Balay /* 978798bf22SSatish Balay MatStashScatterEnd_Private - This is called as the fial stage of 984c1ff481SSatish Balay scatter. The final stages of messagepassing is done here, and 994c1ff481SSatish Balay all the memory used for messagepassing is cleanedu up. This 1004c1ff481SSatish Balay routine also resets the stash, and deallocates the memory used 1014c1ff481SSatish Balay for the stash. It also keeps track of the current memory usage 1024c1ff481SSatish Balay so that the same value can be used the next time through. 1034c1ff481SSatish Balay */ 1044a2ae208SSatish Balay #undef __FUNCT__ 1054a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterEnd_Private" 106dfbe8321SBarry Smith PetscErrorCode MatStashScatterEnd_Private(MatStash *stash) 107bc5ccf88SSatish Balay { 1086849ba73SBarry Smith PetscErrorCode ierr; 1096849ba73SBarry Smith int nsends=stash->nsends,bs2,oldnmax; 110a2d1c673SSatish Balay MPI_Status *send_status; 111a2d1c673SSatish Balay 1123a40ed3dSBarry Smith PetscFunctionBegin; 113a2d1c673SSatish Balay /* wait on sends */ 114a2d1c673SSatish Balay if (nsends) { 11582502324SSatish Balay ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 116a2d1c673SSatish Balay ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr); 117606d414cSSatish Balay ierr = PetscFree(send_status);CHKERRQ(ierr); 118a2d1c673SSatish Balay } 119a2d1c673SSatish Balay 120c0c58ca7SSatish Balay /* Now update nmaxold to be app 10% more than max n used, this way the 121434d7ff9SSatish Balay wastage of space is reduced the next time this stash is used. 122434d7ff9SSatish Balay Also update the oldmax, only if it increases */ 123b9b97703SBarry Smith if (stash->n) { 12494b769a5SSatish Balay bs2 = stash->bs*stash->bs; 1258a9378f0SSatish Balay oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 126434d7ff9SSatish Balay if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 127b9b97703SBarry Smith } 128434d7ff9SSatish Balay 129d07ff455SSatish Balay stash->nmax = 0; 130d07ff455SSatish Balay stash->n = 0; 1314c1ff481SSatish Balay stash->reallocs = -1; 132bc5ccf88SSatish Balay stash->rmax = 0; 133a2d1c673SSatish Balay stash->nprocessed = 0; 134bc5ccf88SSatish Balay 135bc5ccf88SSatish Balay if (stash->array) { 136606d414cSSatish Balay ierr = PetscFree(stash->array);CHKERRQ(ierr); 137bc5ccf88SSatish Balay stash->array = 0; 138bc5ccf88SSatish Balay stash->idx = 0; 139bc5ccf88SSatish Balay stash->idy = 0; 140bc5ccf88SSatish Balay } 141606d414cSSatish Balay if (stash->send_waits) { 142606d414cSSatish Balay ierr = PetscFree(stash->send_waits);CHKERRQ(ierr); 143606d414cSSatish Balay stash->send_waits = 0; 144606d414cSSatish Balay } 145606d414cSSatish Balay if (stash->recv_waits) { 146606d414cSSatish Balay ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr); 147606d414cSSatish Balay stash->recv_waits = 0; 148606d414cSSatish Balay } 149606d414cSSatish Balay if (stash->svalues) { 150606d414cSSatish Balay ierr = PetscFree(stash->svalues);CHKERRQ(ierr); 151606d414cSSatish Balay stash->svalues = 0; 152606d414cSSatish Balay } 153606d414cSSatish Balay if (stash->rvalues) { 154606d414cSSatish Balay ierr = PetscFree(stash->rvalues);CHKERRQ(ierr); 155606d414cSSatish Balay stash->rvalues = 0; 156606d414cSSatish Balay } 157*563fb871SSatish Balay if (stash->rindices) { 158*563fb871SSatish Balay ierr = PetscFree(stash->rindices);CHKERRQ(ierr); 159*563fb871SSatish Balay stash->rindices = 0; 160*563fb871SSatish Balay } 161606d414cSSatish Balay if (stash->nprocs) { 162b22afee1SSatish Balay ierr = PetscFree(stash->nprocs);CHKERRQ(ierr); 163606d414cSSatish Balay stash->nprocs = 0; 164606d414cSSatish Balay } 165bc5ccf88SSatish Balay 1663a40ed3dSBarry Smith PetscFunctionReturn(0); 1679417f4adSLois Curfman McInnes } 1689417f4adSLois Curfman McInnes 1694c1ff481SSatish Balay /* 1708798bf22SSatish Balay MatStashGetInfo_Private - Gets the relavant statistics of the stash 1714c1ff481SSatish Balay 1724c1ff481SSatish Balay Input Parameters: 1734c1ff481SSatish Balay stash - the stash 17494b769a5SSatish Balay nstash - the size of the stash. Indicates the number of values stored. 1754c1ff481SSatish Balay reallocs - the number of additional mallocs incurred. 1764c1ff481SSatish Balay 1774c1ff481SSatish Balay */ 1784a2ae208SSatish Balay #undef __FUNCT__ 1794a2ae208SSatish Balay #define __FUNCT__ "MatStashGetInfo_Private" 180c1ac3661SBarry Smith PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs) 18197530c3fSBarry Smith { 182c1ac3661SBarry Smith PetscInt bs2 = stash->bs*stash->bs; 18394b769a5SSatish Balay 1843a40ed3dSBarry Smith PetscFunctionBegin; 1851ecfd215SBarry Smith if (nstash) *nstash = stash->n*bs2; 1861ecfd215SBarry Smith if (reallocs) { 187434d7ff9SSatish Balay if (stash->reallocs < 0) *reallocs = 0; 188434d7ff9SSatish Balay else *reallocs = stash->reallocs; 1891ecfd215SBarry Smith } 190bc5ccf88SSatish Balay PetscFunctionReturn(0); 191bc5ccf88SSatish Balay } 1924c1ff481SSatish Balay 1934c1ff481SSatish Balay 1944c1ff481SSatish Balay /* 1958798bf22SSatish Balay MatStashSetInitialSize_Private - Sets the initial size of the stash 1964c1ff481SSatish Balay 1974c1ff481SSatish Balay Input Parameters: 1984c1ff481SSatish Balay stash - the stash 1994c1ff481SSatish Balay max - the value that is used as the max size of the stash. 2004c1ff481SSatish Balay this value is used while allocating memory. 2014c1ff481SSatish Balay */ 2024a2ae208SSatish Balay #undef __FUNCT__ 2034a2ae208SSatish Balay #define __FUNCT__ "MatStashSetInitialSize_Private" 204c1ac3661SBarry Smith PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max) 205bc5ccf88SSatish Balay { 206bc5ccf88SSatish Balay PetscFunctionBegin; 207434d7ff9SSatish Balay stash->umax = max; 2083a40ed3dSBarry Smith PetscFunctionReturn(0); 20997530c3fSBarry Smith } 21097530c3fSBarry Smith 2118798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called 2124c1ff481SSatish Balay when the space in the stash is not sufficient to add the new values 2134c1ff481SSatish Balay being inserted into the stash. 2144c1ff481SSatish Balay 2154c1ff481SSatish Balay Input Parameters: 2164c1ff481SSatish Balay stash - the stash 2174c1ff481SSatish Balay incr - the minimum increase requested 2184c1ff481SSatish Balay 2194c1ff481SSatish Balay Notes: 2204c1ff481SSatish Balay This routine doubles the currently used memory. 2214c1ff481SSatish Balay */ 2224a2ae208SSatish Balay #undef __FUNCT__ 2234a2ae208SSatish Balay #define __FUNCT__ "MatStashExpand_Private" 224c1ac3661SBarry Smith static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr) 2259417f4adSLois Curfman McInnes { 2266849ba73SBarry Smith PetscErrorCode ierr; 227c1ac3661SBarry Smith PetscInt *n_idx,*n_idy,newnmax,bs2; 2283eda8832SBarry Smith MatScalar *n_array; 2299417f4adSLois Curfman McInnes 2303a40ed3dSBarry Smith PetscFunctionBegin; 2319417f4adSLois Curfman McInnes /* allocate a larger stash */ 23294b769a5SSatish Balay bs2 = stash->bs*stash->bs; 233c481ceb5SSatish Balay if (!stash->oldnmax && !stash->nmax) { /* new stash */ 234434d7ff9SSatish Balay if (stash->umax) newnmax = stash->umax/bs2; 235434d7ff9SSatish Balay else newnmax = DEFAULT_STASH_SIZE/bs2; 236c481ceb5SSatish Balay } else if (!stash->nmax) { /* resuing stash */ 237434d7ff9SSatish Balay if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2; 238434d7ff9SSatish Balay else newnmax = stash->oldnmax/bs2; 239434d7ff9SSatish Balay } else newnmax = stash->nmax*2; 2404c1ff481SSatish Balay if (newnmax < (stash->nmax + incr)) newnmax += 2*incr; 241d07ff455SSatish Balay 242c1ac3661SBarry Smith ierr = PetscMalloc((newnmax)*(2*sizeof(PetscInt)+bs2*sizeof(MatScalar)),&n_array);CHKERRQ(ierr); 243c1ac3661SBarry Smith n_idx = (PetscInt*)(n_array + bs2*newnmax); 244c1ac3661SBarry Smith n_idy = (PetscInt*)(n_idx + newnmax); 2453eda8832SBarry Smith ierr = PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(MatScalar));CHKERRQ(ierr); 246c1ac3661SBarry Smith ierr = PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(PetscInt));CHKERRQ(ierr); 247c1ac3661SBarry Smith ierr = PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(PetscInt));CHKERRQ(ierr); 248606d414cSSatish Balay if (stash->array) {ierr = PetscFree(stash->array);CHKERRQ(ierr);} 249d07ff455SSatish Balay stash->array = n_array; 250d07ff455SSatish Balay stash->idx = n_idx; 251d07ff455SSatish Balay stash->idy = n_idy; 252d07ff455SSatish Balay stash->nmax = newnmax; 253bc5ccf88SSatish Balay stash->reallocs++; 254bc5ccf88SSatish Balay PetscFunctionReturn(0); 255bc5ccf88SSatish Balay } 256bc5ccf88SSatish Balay /* 2578798bf22SSatish Balay MatStashValuesRow_Private - inserts values into the stash. This function 2584c1ff481SSatish Balay expects the values to be roworiented. Multiple columns belong to the same row 2594c1ff481SSatish Balay can be inserted with a single call to this function. 2604c1ff481SSatish Balay 2614c1ff481SSatish Balay Input Parameters: 2624c1ff481SSatish Balay stash - the stash 2634c1ff481SSatish Balay row - the global row correspoiding to the values 2644c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2654c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2664c1ff481SSatish Balay values - the values inserted 267bc5ccf88SSatish Balay */ 2684a2ae208SSatish Balay #undef __FUNCT__ 2694a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRow_Private" 270c1ac3661SBarry Smith PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[]) 271bc5ccf88SSatish Balay { 272dfbe8321SBarry Smith PetscErrorCode ierr; 273c1ac3661SBarry Smith PetscInt i; 274bc5ccf88SSatish Balay 275bc5ccf88SSatish Balay PetscFunctionBegin; 2764c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 2774c1ff481SSatish Balay if ((stash->n + n) > stash->nmax) { 2788798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 2799417f4adSLois Curfman McInnes } 2804c1ff481SSatish Balay for (i=0; i<n; i++) { 2819417f4adSLois Curfman McInnes stash->idx[stash->n] = row; 282a2d1c673SSatish Balay stash->idy[stash->n] = idxn[i]; 2830ae3cd3bSBarry Smith stash->array[stash->n] = values[i]; 284a2d1c673SSatish Balay stash->n++; 2859417f4adSLois Curfman McInnes } 286a2d1c673SSatish Balay PetscFunctionReturn(0); 287a2d1c673SSatish Balay } 2884c1ff481SSatish Balay /* 2898798bf22SSatish Balay MatStashValuesCol_Private - inserts values into the stash. This function 2904c1ff481SSatish Balay expects the values to be columnoriented. Multiple columns belong to the same row 2914c1ff481SSatish Balay can be inserted with a single call to this function. 292a2d1c673SSatish Balay 2934c1ff481SSatish Balay Input Parameters: 2944c1ff481SSatish Balay stash - the stash 2954c1ff481SSatish Balay row - the global row correspoiding to the values 2964c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2974c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2984c1ff481SSatish Balay values - the values inserted 2994c1ff481SSatish Balay stepval - the consecutive values are sepated by a distance of stepval. 3004c1ff481SSatish Balay this happens because the input is columnoriented. 3014c1ff481SSatish Balay */ 3024a2ae208SSatish Balay #undef __FUNCT__ 3034a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesCol_Private" 304c1ac3661SBarry Smith PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt stepval) 305a2d1c673SSatish Balay { 306dfbe8321SBarry Smith PetscErrorCode ierr; 307c1ac3661SBarry Smith PetscInt i; 308a2d1c673SSatish Balay 3094c1ff481SSatish Balay PetscFunctionBegin; 3104c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 3114c1ff481SSatish Balay if ((stash->n + n) > stash->nmax) { 3128798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 3134c1ff481SSatish Balay } 3144c1ff481SSatish Balay for (i=0; i<n; i++) { 3154c1ff481SSatish Balay stash->idx[stash->n] = row; 3164c1ff481SSatish Balay stash->idy[stash->n] = idxn[i]; 3170ae3cd3bSBarry Smith stash->array[stash->n] = values[i*stepval]; 3184c1ff481SSatish Balay stash->n++; 3194c1ff481SSatish Balay } 3204c1ff481SSatish Balay PetscFunctionReturn(0); 3214c1ff481SSatish Balay } 3224c1ff481SSatish Balay 3234c1ff481SSatish Balay /* 3248798bf22SSatish Balay MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 3254c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 3264c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3274c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3284c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3294c1ff481SSatish Balay 3304c1ff481SSatish Balay Input Parameters: 3314c1ff481SSatish Balay stash - the stash 3324c1ff481SSatish Balay row - the global block-row correspoiding to the values 3334c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3344c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3354c1ff481SSatish Balay values. Each block is of size bs*bs. 3364c1ff481SSatish Balay values - the values inserted 3374c1ff481SSatish Balay rmax - the number of block-rows in the original block. 3384c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 3394c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3404c1ff481SSatish Balay */ 3414a2ae208SSatish Balay #undef __FUNCT__ 3424a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRowBlocked_Private" 343c1ac3661SBarry Smith PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 3444c1ff481SSatish Balay { 345dfbe8321SBarry Smith PetscErrorCode ierr; 346c1ac3661SBarry Smith PetscInt i,j,k,bs2,bs=stash->bs; 347f15d580aSBarry Smith const MatScalar *vals; 348f15d580aSBarry Smith MatScalar *array; 349a2d1c673SSatish Balay 350a2d1c673SSatish Balay PetscFunctionBegin; 351a2d1c673SSatish Balay bs2 = bs*bs; 3524c1ff481SSatish Balay if ((stash->n+n) > stash->nmax) { 3538798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 354a2d1c673SSatish Balay } 3554c1ff481SSatish Balay for (i=0; i<n; i++) { 356a2d1c673SSatish Balay stash->idx[stash->n] = row; 357a2d1c673SSatish Balay stash->idy[stash->n] = idxn[i]; 358a2d1c673SSatish Balay /* Now copy over the block of values. Store the values column oriented. 359a2d1c673SSatish Balay This enables inserting multiple blocks belonging to a row with a single 360a2d1c673SSatish Balay funtion call */ 361a2d1c673SSatish Balay array = stash->array + bs2*stash->n; 362a2d1c673SSatish Balay vals = values + idx*bs2*n + bs*i; 363a2d1c673SSatish Balay for (j=0; j<bs; j++) { 3640ae3cd3bSBarry Smith for (k=0; k<bs; k++) {array[k*bs] = vals[k];} 365a2d1c673SSatish Balay array += 1; 366a2d1c673SSatish Balay vals += cmax*bs; 367a2d1c673SSatish Balay } 3684c1ff481SSatish Balay stash->n++; 3694c1ff481SSatish Balay } 3704c1ff481SSatish Balay PetscFunctionReturn(0); 3714c1ff481SSatish Balay } 3724c1ff481SSatish Balay 3734c1ff481SSatish Balay /* 3748798bf22SSatish Balay MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 3754c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 3764c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3774c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3784c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3794c1ff481SSatish Balay 3804c1ff481SSatish Balay Input Parameters: 3814c1ff481SSatish Balay stash - the stash 3824c1ff481SSatish Balay row - the global block-row correspoiding to the values 3834c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3844c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3854c1ff481SSatish Balay values. Each block is of size bs*bs. 3864c1ff481SSatish Balay values - the values inserted 3874c1ff481SSatish Balay rmax - the number of block-rows in the original block. 3884c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 3894c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3904c1ff481SSatish Balay */ 3914a2ae208SSatish Balay #undef __FUNCT__ 3924a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesColBlocked_Private" 393c1ac3661SBarry Smith PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 3944c1ff481SSatish Balay { 395dfbe8321SBarry Smith PetscErrorCode ierr; 396c1ac3661SBarry Smith PetscInt i,j,k,bs2,bs=stash->bs; 397f15d580aSBarry Smith const MatScalar *vals; 398f15d580aSBarry Smith MatScalar *array; 3994c1ff481SSatish Balay 4004c1ff481SSatish Balay PetscFunctionBegin; 4014c1ff481SSatish Balay bs2 = bs*bs; 4024c1ff481SSatish Balay if ((stash->n+n) > stash->nmax) { 4038798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 4044c1ff481SSatish Balay } 4054c1ff481SSatish Balay for (i=0; i<n; i++) { 4064c1ff481SSatish Balay stash->idx[stash->n] = row; 4074c1ff481SSatish Balay stash->idy[stash->n] = idxn[i]; 4084c1ff481SSatish Balay /* Now copy over the block of values. Store the values column oriented. 4094c1ff481SSatish Balay This enables inserting multiple blocks belonging to a row with a single 4104c1ff481SSatish Balay funtion call */ 411a2d1c673SSatish Balay array = stash->array + bs2*stash->n; 412a2d1c673SSatish Balay vals = values + idx*bs + bs2*rmax*i; 413a2d1c673SSatish Balay for (j=0; j<bs; j++) { 4140ae3cd3bSBarry Smith for (k=0; k<bs; k++) {array[k] = vals[k];} 415a2d1c673SSatish Balay array += bs; 416a2d1c673SSatish Balay vals += rmax*bs; 417a2d1c673SSatish Balay } 418a2d1c673SSatish Balay stash->n++; 4199417f4adSLois Curfman McInnes } 4203a40ed3dSBarry Smith PetscFunctionReturn(0); 4219417f4adSLois Curfman McInnes } 4224c1ff481SSatish Balay /* 4238798bf22SSatish Balay MatStashScatterBegin_Private - Initiates the transfer of values to the 4244c1ff481SSatish Balay correct owners. This function goes through the stash, and check the 4254c1ff481SSatish Balay owners of each stashed value, and sends the values off to the owner 4264c1ff481SSatish Balay processors. 427bc5ccf88SSatish Balay 4284c1ff481SSatish Balay Input Parameters: 4294c1ff481SSatish Balay stash - the stash 4304c1ff481SSatish Balay owners - an array of size 'no-of-procs' which gives the ownership range 4314c1ff481SSatish Balay for each node. 4324c1ff481SSatish Balay 4334c1ff481SSatish Balay Notes: The 'owners' array in the cased of the blocked-stash has the 4344c1ff481SSatish Balay ranges specified blocked global indices, and for the regular stash in 4354c1ff481SSatish Balay the proper global indices. 4364c1ff481SSatish Balay */ 4374a2ae208SSatish Balay #undef __FUNCT__ 4384a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterBegin_Private" 439c1ac3661SBarry Smith PetscErrorCode MatStashScatterBegin_Private(MatStash *stash,PetscInt *owners) 440bc5ccf88SSatish Balay { 441c1ac3661SBarry Smith PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 442*563fb871SSatish Balay PetscInt size=stash->size,*nprocs,*nlengths,nsends,nreceives; 4436849ba73SBarry Smith PetscErrorCode ierr; 444*563fb871SSatish Balay PetscInt nmax,count,*sindices,**rindices,i,j,idx,lastidx; 445*563fb871SSatish Balay MatScalar **rvalues,*svalues; 446bc5ccf88SSatish Balay MPI_Comm comm = stash->comm; 447*563fb871SSatish Balay MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 448bc5ccf88SSatish Balay 449bc5ccf88SSatish Balay PetscFunctionBegin; 450bc5ccf88SSatish Balay 4514c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 452bc5ccf88SSatish Balay /* first count number of contributors to each processor */ 453c1ac3661SBarry Smith ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 454c1ac3661SBarry Smith ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 455c1ac3661SBarry Smith ierr = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); 456a2d1c673SSatish Balay 457*563fb871SSatish Balay nlengths = nprocs+size; 4587357eb19SBarry Smith j = 0; 4597357eb19SBarry Smith lastidx = -1; 460bc5ccf88SSatish Balay for (i=0; i<stash->n; i++) { 4617357eb19SBarry Smith /* if indices are NOT locally sorted, need to start search at the beginning */ 4627357eb19SBarry Smith if (lastidx > (idx = stash->idx[i])) j = 0; 4637357eb19SBarry Smith lastidx = idx; 4647357eb19SBarry Smith for (; j<size; j++) { 4654c1ff481SSatish Balay if (idx >= owners[j] && idx < owners[j+1]) { 466*563fb871SSatish Balay nlengths[j]++; owner[i] = j; break; 467bc5ccf88SSatish Balay } 468bc5ccf88SSatish Balay } 469bc5ccf88SSatish Balay } 470*563fb871SSatish Balay /* Now check what procs get messages - and compute nsends. */ 471*563fb871SSatish Balay for (i=0, nsends=0 ; i<size; i++) { 472*563fb871SSatish Balay if (nlengths[i]) { nprocs[i] = 1; nsends ++;} 473*563fb871SSatish Balay } 474bc5ccf88SSatish Balay 475*563fb871SSatish Balay { int *onodes,*olengths; 476*563fb871SSatish Balay /* Determine the number of messages to expect, their lengths, from from-ids */ 477*563fb871SSatish Balay ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr); 478*563fb871SSatish Balay ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr); 479*563fb871SSatish Balay /* since clubbing row,col - lengths are multiplied by 2 */ 480*563fb871SSatish Balay for (i=0; i<nreceives; i++) olengths[i] *=2; 481*563fb871SSatish Balay ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr); 482*563fb871SSatish Balay /* values are size 'bs2' lengths (and remove earlier factor 2 */ 483*563fb871SSatish Balay for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 484*563fb871SSatish Balay ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr); 485*563fb871SSatish Balay ierr = PetscFree(onodes);CHKERRQ(ierr); 486*563fb871SSatish Balay ierr = PetscFree(olengths);CHKERRQ(ierr); 487bc5ccf88SSatish Balay } 488bc5ccf88SSatish Balay 489bc5ccf88SSatish Balay /* do sends: 490bc5ccf88SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 491bc5ccf88SSatish Balay the ith processor 492bc5ccf88SSatish Balay */ 493c1ac3661SBarry Smith ierr = PetscMalloc((stash->n+1)*(bs2*sizeof(MatScalar)+2*sizeof(PetscInt)),&svalues);CHKERRQ(ierr); 494c1ac3661SBarry Smith sindices = (PetscInt*)(svalues + bs2*stash->n); 495b0a32e0cSBarry Smith ierr = PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 496c1ac3661SBarry Smith ierr = PetscMalloc(2*size*sizeof(PetscInt),&startv);CHKERRQ(ierr); 497bc5ccf88SSatish Balay starti = startv + size; 498a2d1c673SSatish Balay /* use 2 sends the first with all_a, the next with all_i and all_j */ 499bc5ccf88SSatish Balay startv[0] = 0; starti[0] = 0; 500bc5ccf88SSatish Balay for (i=1; i<size; i++) { 501*563fb871SSatish Balay startv[i] = startv[i-1] + nlengths[i-1]; 502*563fb871SSatish Balay starti[i] = starti[i-1] + nlengths[i-1]*2; 503bc5ccf88SSatish Balay } 504bc5ccf88SSatish Balay for (i=0; i<stash->n; i++) { 505bc5ccf88SSatish Balay j = owner[i]; 506a2d1c673SSatish Balay if (bs2 == 1) { 507bc5ccf88SSatish Balay svalues[startv[j]] = stash->array[i]; 508a2d1c673SSatish Balay } else { 509c1ac3661SBarry Smith PetscInt k; 5103eda8832SBarry Smith MatScalar *buf1,*buf2; 5114c1ff481SSatish Balay buf1 = svalues+bs2*startv[j]; 5124c1ff481SSatish Balay buf2 = stash->array+bs2*i; 5134c1ff481SSatish Balay for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; } 514a2d1c673SSatish Balay } 515bc5ccf88SSatish Balay sindices[starti[j]] = stash->idx[i]; 516*563fb871SSatish Balay sindices[starti[j]+nlengths[j]] = stash->idy[i]; 517bc5ccf88SSatish Balay startv[j]++; 518bc5ccf88SSatish Balay starti[j]++; 519bc5ccf88SSatish Balay } 520bc5ccf88SSatish Balay startv[0] = 0; 521*563fb871SSatish Balay for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];} 522bc5ccf88SSatish Balay for (i=0,count=0; i<size; i++) { 523*563fb871SSatish Balay if (nprocs[i]) { 524*563fb871SSatish Balay ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr); 525*563fb871SSatish Balay ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_MATSCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr); 526bc5ccf88SSatish Balay } 527bc5ccf88SSatish Balay } 528606d414cSSatish Balay ierr = PetscFree(owner);CHKERRQ(ierr); 529606d414cSSatish Balay ierr = PetscFree(startv);CHKERRQ(ierr); 530a2d1c673SSatish Balay /* This memory is reused in scatter end for a different purpose*/ 531a2d1c673SSatish Balay for (i=0; i<2*size; i++) nprocs[i] = -1; 532a2d1c673SSatish Balay stash->nprocs = nprocs; 533a2d1c673SSatish Balay 534*563fb871SSatish Balay /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 535*563fb871SSatish Balay ierr = PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 536*563fb871SSatish Balay 537*563fb871SSatish Balay for (i=0; i<nreceives; i++) { 538*563fb871SSatish Balay recv_waits[2*i] = recv_waits1[i]; 539*563fb871SSatish Balay recv_waits[2*i+1] = recv_waits2[i]; 540*563fb871SSatish Balay } 541*563fb871SSatish Balay stash->recv_waits = recv_waits; 542*563fb871SSatish Balay ierr = PetscFree(recv_waits1);CHKERRQ(ierr); 543*563fb871SSatish Balay ierr = PetscFree(recv_waits2);CHKERRQ(ierr); 544*563fb871SSatish Balay 545bc5ccf88SSatish Balay stash->svalues = svalues; stash->rvalues = rvalues; 546*563fb871SSatish Balay stash->rindices = rindices; stash->send_waits = send_waits; 547bc5ccf88SSatish Balay stash->nsends = nsends; stash->nrecvs = nreceives; 548bc5ccf88SSatish Balay stash->rmax = nmax; 549bc5ccf88SSatish Balay PetscFunctionReturn(0); 550bc5ccf88SSatish Balay } 551bc5ccf88SSatish Balay 552a2d1c673SSatish Balay /* 5538798bf22SSatish Balay MatStashScatterGetMesg_Private - This function waits on the receives posted 5548798bf22SSatish Balay in the function MatStashScatterBegin_Private() and returns one message at 5554c1ff481SSatish Balay a time to the calling function. If no messages are left, it indicates this 5564c1ff481SSatish Balay by setting flg = 0, else it sets flg = 1. 5574c1ff481SSatish Balay 5584c1ff481SSatish Balay Input Parameters: 5594c1ff481SSatish Balay stash - the stash 5604c1ff481SSatish Balay 5614c1ff481SSatish Balay Output Parameters: 5624c1ff481SSatish Balay nvals - the number of entries in the current message. 5634c1ff481SSatish Balay rows - an array of row indices (or blocked indices) corresponding to the values 5644c1ff481SSatish Balay cols - an array of columnindices (or blocked indices) corresponding to the values 5654c1ff481SSatish Balay vals - the values 5664c1ff481SSatish Balay flg - 0 indicates no more message left, and the current call has no values associated. 5674c1ff481SSatish Balay 1 indicates that the current call successfully received a message, and the 5684c1ff481SSatish Balay other output parameters nvals,rows,cols,vals are set appropriately. 569a2d1c673SSatish Balay */ 5704a2ae208SSatish Balay #undef __FUNCT__ 5714a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterGetMesg_Private" 572c1ac3661SBarry Smith PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,MatScalar **vals,PetscInt *flg) 573bc5ccf88SSatish Balay { 5746849ba73SBarry Smith PetscErrorCode ierr; 575c1ac3661SBarry Smith PetscMPIInt i; 576*563fb871SSatish Balay PetscInt *flg_v,i1,i2,bs2; 577a2d1c673SSatish Balay MPI_Status recv_status; 578b0a32e0cSBarry Smith PetscTruth match_found = PETSC_FALSE; 579bc5ccf88SSatish Balay 580bc5ccf88SSatish Balay PetscFunctionBegin; 581bc5ccf88SSatish Balay 582a2d1c673SSatish Balay *flg = 0; /* When a message is discovered this is reset to 1 */ 583a2d1c673SSatish Balay /* Return if no more messages to process */ 584a2d1c673SSatish Balay if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); } 585a2d1c673SSatish Balay 586a2d1c673SSatish Balay flg_v = stash->nprocs; 5874c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 588a2d1c673SSatish Balay /* If a matching pair of receieves are found, process them, and return the data to 589a2d1c673SSatish Balay the calling function. Until then keep receiving messages */ 590a2d1c673SSatish Balay while (!match_found) { 591a2d1c673SSatish Balay ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr); 592a2d1c673SSatish Balay /* Now pack the received message into a structure which is useable by others */ 593a2d1c673SSatish Balay if (i % 2) { 5943eda8832SBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_MATSCALAR,nvals);CHKERRQ(ierr); 595c1dc657dSBarry Smith flg_v[2*recv_status.MPI_SOURCE] = i/2; 596a2d1c673SSatish Balay *nvals = *nvals/bs2; 597*563fb871SSatish Balay } else { 598*563fb871SSatish Balay ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr); 599*563fb871SSatish Balay flg_v[2*recv_status.MPI_SOURCE+1] = i/2; 600*563fb871SSatish Balay *nvals = *nvals/2; /* This message has both row indices and col indices */ 601bc5ccf88SSatish Balay } 602a2d1c673SSatish Balay 603a2d1c673SSatish Balay /* Check if we have both the messages from this proc */ 604c1dc657dSBarry Smith i1 = flg_v[2*recv_status.MPI_SOURCE]; 605c1dc657dSBarry Smith i2 = flg_v[2*recv_status.MPI_SOURCE+1]; 606a2d1c673SSatish Balay if (i1 != -1 && i2 != -1) { 607*563fb871SSatish Balay *rows = stash->rindices[i2]; 608a2d1c673SSatish Balay *cols = *rows + *nvals; 609*563fb871SSatish Balay *vals = stash->rvalues[i1]; 610a2d1c673SSatish Balay *flg = 1; 611a2d1c673SSatish Balay stash->nprocessed ++; 61235d8aa7fSBarry Smith match_found = PETSC_TRUE; 613bc5ccf88SSatish Balay } 614bc5ccf88SSatish Balay } 615bc5ccf88SSatish Balay PetscFunctionReturn(0); 616bc5ccf88SSatish Balay } 617