1be1d678aSKris Buschelman #define PETSCMAT_DLL 22d5177cdSBarry Smith 370f55243SBarry Smith #include "src/mat/matimpl.h" 475cae7c1SHong Zhang #include "src/mat/utils/matstashspace.h" 55bd3b8fbSHong Zhang 63eda8832SBarry Smith /* 70ae3cd3bSBarry Smith The input to the stash is ALWAYS in MatScalar precision, and the 80ae3cd3bSBarry Smith internal storage and output is also in MatScalar. 93eda8832SBarry Smith */ 10bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE 10000 114c1ff481SSatish Balay 129417f4adSLois Curfman McInnes /* 138798bf22SSatish Balay MatStashCreate_Private - Creates a stash,currently used for all the parallel 144c1ff481SSatish Balay matrix implementations. The stash is where elements of a matrix destined 154c1ff481SSatish Balay to be stored on other processors are kept until matrix assembly is done. 169417f4adSLois Curfman McInnes 174c1ff481SSatish Balay This is a simple minded stash. Simply adds entries to end of stash. 184c1ff481SSatish Balay 194c1ff481SSatish Balay Input Parameters: 204c1ff481SSatish Balay comm - communicator, required for scatters. 214c1ff481SSatish Balay bs - stash block size. used when stashing blocks of values 224c1ff481SSatish Balay 234c1ff481SSatish Balay Output Parameters: 244c1ff481SSatish Balay stash - the newly created stash 259417f4adSLois Curfman McInnes */ 264a2ae208SSatish Balay #undef __FUNCT__ 274a2ae208SSatish Balay #define __FUNCT__ "MatStashCreate_Private" 28c1ac3661SBarry Smith PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash) 299417f4adSLois Curfman McInnes { 30dfbe8321SBarry Smith PetscErrorCode ierr; 31c1ac3661SBarry Smith PetscInt max,*opt,nopt; 32f1af5d2fSBarry Smith PetscTruth flg; 33bc5ccf88SSatish Balay 343a40ed3dSBarry Smith PetscFunctionBegin; 35bc5ccf88SSatish Balay /* Require 2 tags,get the second using PetscCommGetNewTag() */ 36752ec6e0SSatish Balay stash->comm = comm; 37752ec6e0SSatish Balay ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr); 38a2d1c673SSatish Balay ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr); 39a2d1c673SSatish Balay ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr); 40a2d1c673SSatish Balay ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr); 41bc5ccf88SSatish Balay 42434d7ff9SSatish Balay nopt = stash->size; 43d7d82daaSBarry Smith ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr); 44b0a32e0cSBarry Smith ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr); 45434d7ff9SSatish Balay if (flg) { 46434d7ff9SSatish Balay if (nopt == 1) max = opt[0]; 47434d7ff9SSatish Balay else if (nopt == stash->size) max = opt[stash->rank]; 48434d7ff9SSatish Balay else if (stash->rank < nopt) max = opt[stash->rank]; 49f4ab19daSSatish Balay else max = 0; /* Use default */ 50434d7ff9SSatish Balay stash->umax = max; 51434d7ff9SSatish Balay } else { 52434d7ff9SSatish Balay stash->umax = 0; 53434d7ff9SSatish Balay } 54606d414cSSatish Balay ierr = PetscFree(opt);CHKERRQ(ierr); 554c1ff481SSatish Balay if (bs <= 0) bs = 1; 56a2d1c673SSatish Balay 574c1ff481SSatish Balay stash->bs = bs; 589417f4adSLois Curfman McInnes stash->nmax = 0; 59434d7ff9SSatish Balay stash->oldnmax = 0; 609417f4adSLois Curfman McInnes stash->n = 0; 614c1ff481SSatish Balay stash->reallocs = -1; 6275cae7c1SHong Zhang stash->space_head = 0; 6375cae7c1SHong Zhang stash->space = 0; 649417f4adSLois Curfman McInnes 65bc5ccf88SSatish Balay stash->send_waits = 0; 66bc5ccf88SSatish Balay stash->recv_waits = 0; 67a2d1c673SSatish Balay stash->send_status = 0; 68bc5ccf88SSatish Balay stash->nsends = 0; 69bc5ccf88SSatish Balay stash->nrecvs = 0; 70bc5ccf88SSatish Balay stash->svalues = 0; 71bc5ccf88SSatish Balay stash->rvalues = 0; 72563fb871SSatish Balay stash->rindices = 0; 73a2d1c673SSatish Balay stash->nprocs = 0; 74a2d1c673SSatish Balay stash->nprocessed = 0; 753a40ed3dSBarry Smith PetscFunctionReturn(0); 769417f4adSLois Curfman McInnes } 779417f4adSLois Curfman McInnes 784c1ff481SSatish Balay /* 798798bf22SSatish Balay MatStashDestroy_Private - Destroy the stash 804c1ff481SSatish Balay */ 814a2ae208SSatish Balay #undef __FUNCT__ 824a2ae208SSatish Balay #define __FUNCT__ "MatStashDestroy_Private" 83dfbe8321SBarry Smith PetscErrorCode MatStashDestroy_Private(MatStash *stash) 849417f4adSLois Curfman McInnes { 85dfbe8321SBarry Smith PetscErrorCode ierr; 86a2d1c673SSatish Balay 87bc5ccf88SSatish Balay PetscFunctionBegin; 8875cae7c1SHong Zhang if (stash->space_head){ 8975cae7c1SHong Zhang ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr); 9075cae7c1SHong Zhang stash->space_head = 0; 91*82740460SHong Zhang stash->space = 0; 9275cae7c1SHong Zhang } 93bc5ccf88SSatish Balay PetscFunctionReturn(0); 94bc5ccf88SSatish Balay } 95bc5ccf88SSatish Balay 964c1ff481SSatish Balay /* 978798bf22SSatish Balay MatStashScatterEnd_Private - This is called as the fial stage of 984c1ff481SSatish Balay scatter. The final stages of messagepassing is done here, and 994c1ff481SSatish Balay all the memory used for messagepassing is cleanedu up. This 1004c1ff481SSatish Balay routine also resets the stash, and deallocates the memory used 1014c1ff481SSatish Balay for the stash. It also keeps track of the current memory usage 1024c1ff481SSatish Balay so that the same value can be used the next time through. 1034c1ff481SSatish Balay */ 1044a2ae208SSatish Balay #undef __FUNCT__ 1054a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterEnd_Private" 106dfbe8321SBarry Smith PetscErrorCode MatStashScatterEnd_Private(MatStash *stash) 107bc5ccf88SSatish Balay { 1086849ba73SBarry Smith PetscErrorCode ierr; 1095bd3b8fbSHong Zhang PetscInt nsends=stash->nsends,bs2,oldnmax; 110a2d1c673SSatish Balay MPI_Status *send_status; 111a2d1c673SSatish Balay 1123a40ed3dSBarry Smith PetscFunctionBegin; 113a2d1c673SSatish Balay /* wait on sends */ 114a2d1c673SSatish Balay if (nsends) { 11582502324SSatish Balay ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 116a2d1c673SSatish Balay ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr); 117606d414cSSatish Balay ierr = PetscFree(send_status);CHKERRQ(ierr); 118a2d1c673SSatish Balay } 119a2d1c673SSatish Balay 120c0c58ca7SSatish Balay /* Now update nmaxold to be app 10% more than max n used, this way the 121434d7ff9SSatish Balay wastage of space is reduced the next time this stash is used. 122434d7ff9SSatish Balay Also update the oldmax, only if it increases */ 123b9b97703SBarry Smith if (stash->n) { 12494b769a5SSatish Balay bs2 = stash->bs*stash->bs; 1258a9378f0SSatish Balay oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 126434d7ff9SSatish Balay if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 127b9b97703SBarry Smith } 128434d7ff9SSatish Balay 129d07ff455SSatish Balay stash->nmax = 0; 130d07ff455SSatish Balay stash->n = 0; 1314c1ff481SSatish Balay stash->reallocs = -1; 132a2d1c673SSatish Balay stash->nprocessed = 0; 13375cae7c1SHong Zhang if (stash->space_head){ 13475cae7c1SHong Zhang ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr); 13575cae7c1SHong Zhang stash->space_head = 0; 136*82740460SHong Zhang stash->space = 0; 13775cae7c1SHong Zhang } 138606d414cSSatish Balay if (stash->send_waits) { 139606d414cSSatish Balay ierr = PetscFree(stash->send_waits);CHKERRQ(ierr); 140606d414cSSatish Balay stash->send_waits = 0; 141606d414cSSatish Balay } 142606d414cSSatish Balay if (stash->recv_waits) { 143606d414cSSatish Balay ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr); 144606d414cSSatish Balay stash->recv_waits = 0; 145606d414cSSatish Balay } 146606d414cSSatish Balay if (stash->svalues) { 147606d414cSSatish Balay ierr = PetscFree(stash->svalues);CHKERRQ(ierr); 148606d414cSSatish Balay stash->svalues = 0; 149606d414cSSatish Balay } 150606d414cSSatish Balay if (stash->rvalues) { 151606d414cSSatish Balay ierr = PetscFree(stash->rvalues);CHKERRQ(ierr); 152606d414cSSatish Balay stash->rvalues = 0; 153606d414cSSatish Balay } 154563fb871SSatish Balay if (stash->rindices) { 155563fb871SSatish Balay ierr = PetscFree(stash->rindices);CHKERRQ(ierr); 156563fb871SSatish Balay stash->rindices = 0; 157563fb871SSatish Balay } 158606d414cSSatish Balay if (stash->nprocs) { 159b22afee1SSatish Balay ierr = PetscFree(stash->nprocs);CHKERRQ(ierr); 160606d414cSSatish Balay stash->nprocs = 0; 161606d414cSSatish Balay } 1623a40ed3dSBarry Smith PetscFunctionReturn(0); 1639417f4adSLois Curfman McInnes } 1649417f4adSLois Curfman McInnes 1654c1ff481SSatish Balay /* 1668798bf22SSatish Balay MatStashGetInfo_Private - Gets the relavant statistics of the stash 1674c1ff481SSatish Balay 1684c1ff481SSatish Balay Input Parameters: 1694c1ff481SSatish Balay stash - the stash 17094b769a5SSatish Balay nstash - the size of the stash. Indicates the number of values stored. 1714c1ff481SSatish Balay reallocs - the number of additional mallocs incurred. 1724c1ff481SSatish Balay 1734c1ff481SSatish Balay */ 1744a2ae208SSatish Balay #undef __FUNCT__ 1754a2ae208SSatish Balay #define __FUNCT__ "MatStashGetInfo_Private" 176c1ac3661SBarry Smith PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs) 17797530c3fSBarry Smith { 178c1ac3661SBarry Smith PetscInt bs2 = stash->bs*stash->bs; 17994b769a5SSatish Balay 1803a40ed3dSBarry Smith PetscFunctionBegin; 1811ecfd215SBarry Smith if (nstash) *nstash = stash->n*bs2; 1821ecfd215SBarry Smith if (reallocs) { 183434d7ff9SSatish Balay if (stash->reallocs < 0) *reallocs = 0; 184434d7ff9SSatish Balay else *reallocs = stash->reallocs; 1851ecfd215SBarry Smith } 186bc5ccf88SSatish Balay PetscFunctionReturn(0); 187bc5ccf88SSatish Balay } 1884c1ff481SSatish Balay 1894c1ff481SSatish Balay /* 1908798bf22SSatish Balay MatStashSetInitialSize_Private - Sets the initial size of the stash 1914c1ff481SSatish Balay 1924c1ff481SSatish Balay Input Parameters: 1934c1ff481SSatish Balay stash - the stash 1944c1ff481SSatish Balay max - the value that is used as the max size of the stash. 1954c1ff481SSatish Balay this value is used while allocating memory. 1964c1ff481SSatish Balay */ 1974a2ae208SSatish Balay #undef __FUNCT__ 1984a2ae208SSatish Balay #define __FUNCT__ "MatStashSetInitialSize_Private" 199c1ac3661SBarry Smith PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max) 200bc5ccf88SSatish Balay { 201bc5ccf88SSatish Balay PetscFunctionBegin; 202434d7ff9SSatish Balay stash->umax = max; 2033a40ed3dSBarry Smith PetscFunctionReturn(0); 20497530c3fSBarry Smith } 20597530c3fSBarry Smith 2068798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called 2074c1ff481SSatish Balay when the space in the stash is not sufficient to add the new values 2084c1ff481SSatish Balay being inserted into the stash. 2094c1ff481SSatish Balay 2104c1ff481SSatish Balay Input Parameters: 2114c1ff481SSatish Balay stash - the stash 2124c1ff481SSatish Balay incr - the minimum increase requested 2134c1ff481SSatish Balay 2144c1ff481SSatish Balay Notes: 2154c1ff481SSatish Balay This routine doubles the currently used memory. 2164c1ff481SSatish Balay */ 2174a2ae208SSatish Balay #undef __FUNCT__ 2184a2ae208SSatish Balay #define __FUNCT__ "MatStashExpand_Private" 219c1ac3661SBarry Smith static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr) 2209417f4adSLois Curfman McInnes { 2216849ba73SBarry Smith PetscErrorCode ierr; 2225bd3b8fbSHong Zhang PetscInt newnmax,bs2= stash->bs*stash->bs; 2239417f4adSLois Curfman McInnes 2243a40ed3dSBarry Smith PetscFunctionBegin; 2259417f4adSLois Curfman McInnes /* allocate a larger stash */ 226c481ceb5SSatish Balay if (!stash->oldnmax && !stash->nmax) { /* new stash */ 227434d7ff9SSatish Balay if (stash->umax) newnmax = stash->umax/bs2; 228434d7ff9SSatish Balay else newnmax = DEFAULT_STASH_SIZE/bs2; 229c481ceb5SSatish Balay } else if (!stash->nmax) { /* resuing stash */ 230434d7ff9SSatish Balay if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2; 231434d7ff9SSatish Balay else newnmax = stash->oldnmax/bs2; 232434d7ff9SSatish Balay } else newnmax = stash->nmax*2; 2334c1ff481SSatish Balay if (newnmax < (stash->nmax + incr)) newnmax += 2*incr; 234d07ff455SSatish Balay 23575cae7c1SHong Zhang /* Get a MatStashSpace and attach it to stash */ 23675cae7c1SHong Zhang if (!stash->nmax) { /* new stash or resuing stash->oldnmax */ 23775cae7c1SHong Zhang ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space_head);CHKERRQ(ierr); 23875cae7c1SHong Zhang stash->space = stash->space_head; 23975cae7c1SHong Zhang } else { 24075cae7c1SHong Zhang ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr); 24175cae7c1SHong Zhang } 242bc5ccf88SSatish Balay stash->reallocs++; 24375cae7c1SHong Zhang stash->nmax = newnmax; 244bc5ccf88SSatish Balay PetscFunctionReturn(0); 245bc5ccf88SSatish Balay } 246bc5ccf88SSatish Balay /* 2478798bf22SSatish Balay MatStashValuesRow_Private - inserts values into the stash. This function 2484c1ff481SSatish Balay expects the values to be roworiented. Multiple columns belong to the same row 2494c1ff481SSatish Balay can be inserted with a single call to this function. 2504c1ff481SSatish Balay 2514c1ff481SSatish Balay Input Parameters: 2524c1ff481SSatish Balay stash - the stash 2534c1ff481SSatish Balay row - the global row correspoiding to the values 2544c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2554c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2564c1ff481SSatish Balay values - the values inserted 257bc5ccf88SSatish Balay */ 2584a2ae208SSatish Balay #undef __FUNCT__ 2594a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRow_Private" 260c1ac3661SBarry Smith PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[]) 261bc5ccf88SSatish Balay { 262dfbe8321SBarry Smith PetscErrorCode ierr; 26375cae7c1SHong Zhang PetscInt i,k; 26475cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 265bc5ccf88SSatish Balay 266bc5ccf88SSatish Balay PetscFunctionBegin; 2674c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 26875cae7c1SHong Zhang if (!space || space->local_remaining < n){ 2698798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 2709417f4adSLois Curfman McInnes } 27175cae7c1SHong Zhang space = stash->space; 27275cae7c1SHong Zhang k = space->local_used; 2734c1ff481SSatish Balay for (i=0; i<n; i++) { 27475cae7c1SHong Zhang space->idx[k] = row; 27575cae7c1SHong Zhang space->idy[k] = idxn[i]; 27675cae7c1SHong Zhang space->val[k] = values[i]; 27775cae7c1SHong Zhang k++; 2789417f4adSLois Curfman McInnes } 2795bd3b8fbSHong Zhang stash->n += n; 28075cae7c1SHong Zhang space->local_used += n; 28175cae7c1SHong Zhang space->local_remaining -= n; 282a2d1c673SSatish Balay PetscFunctionReturn(0); 283a2d1c673SSatish Balay } 28475cae7c1SHong Zhang 2854c1ff481SSatish Balay /* 2868798bf22SSatish Balay MatStashValuesCol_Private - inserts values into the stash. This function 2874c1ff481SSatish Balay expects the values to be columnoriented. Multiple columns belong to the same row 2884c1ff481SSatish Balay can be inserted with a single call to this function. 289a2d1c673SSatish Balay 2904c1ff481SSatish Balay Input Parameters: 2914c1ff481SSatish Balay stash - the stash 2924c1ff481SSatish Balay row - the global row correspoiding to the values 2934c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2944c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2954c1ff481SSatish Balay values - the values inserted 2964c1ff481SSatish Balay stepval - the consecutive values are sepated by a distance of stepval. 2974c1ff481SSatish Balay this happens because the input is columnoriented. 2984c1ff481SSatish Balay */ 2994a2ae208SSatish Balay #undef __FUNCT__ 3004a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesCol_Private" 301c1ac3661SBarry Smith PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt stepval) 302a2d1c673SSatish Balay { 303dfbe8321SBarry Smith PetscErrorCode ierr; 30475cae7c1SHong Zhang PetscInt i,k; 30575cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 306a2d1c673SSatish Balay 3074c1ff481SSatish Balay PetscFunctionBegin; 3084c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 30975cae7c1SHong Zhang if (!space || space->local_remaining < n){ 3108798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 3114c1ff481SSatish Balay } 31275cae7c1SHong Zhang space = stash->space; 31375cae7c1SHong Zhang k = space->local_used; 3144c1ff481SSatish Balay for (i=0; i<n; i++) { 31575cae7c1SHong Zhang space->idx[k] = row; 31675cae7c1SHong Zhang space->idy[k] = idxn[i]; 31775cae7c1SHong Zhang space->val[k] = values[i*stepval]; 31875cae7c1SHong Zhang k++; 3194c1ff481SSatish Balay } 3205bd3b8fbSHong Zhang stash->n += n; 32175cae7c1SHong Zhang space->local_used += n; 32275cae7c1SHong Zhang space->local_remaining -= n; 3234c1ff481SSatish Balay PetscFunctionReturn(0); 3244c1ff481SSatish Balay } 3254c1ff481SSatish Balay 3264c1ff481SSatish Balay /* 3278798bf22SSatish Balay MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 3284c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 3294c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3304c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3314c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3324c1ff481SSatish Balay 3334c1ff481SSatish Balay Input Parameters: 3344c1ff481SSatish Balay stash - the stash 3354c1ff481SSatish Balay row - the global block-row correspoiding to the values 3364c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3374c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3384c1ff481SSatish Balay values. Each block is of size bs*bs. 3394c1ff481SSatish Balay values - the values inserted 3404c1ff481SSatish Balay rmax - the number of block-rows in the original block. 3414c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 3424c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3434c1ff481SSatish Balay */ 3444a2ae208SSatish Balay #undef __FUNCT__ 3454a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRowBlocked_Private" 346c1ac3661SBarry Smith PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 3474c1ff481SSatish Balay { 348dfbe8321SBarry Smith PetscErrorCode ierr; 34975cae7c1SHong Zhang PetscInt i,j,k,bs2,bs=stash->bs,l; 350f15d580aSBarry Smith const MatScalar *vals; 351f15d580aSBarry Smith MatScalar *array; 35275cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 353a2d1c673SSatish Balay 354a2d1c673SSatish Balay PetscFunctionBegin; 35575cae7c1SHong Zhang if (!space || space->local_remaining < n){ 3568798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 357a2d1c673SSatish Balay } 35875cae7c1SHong Zhang space = stash->space; 35975cae7c1SHong Zhang l = space->local_used; 36075cae7c1SHong Zhang bs2 = bs*bs; 3614c1ff481SSatish Balay for (i=0; i<n; i++) { 36275cae7c1SHong Zhang space->idx[l] = row; 36375cae7c1SHong Zhang space->idy[l] = idxn[i]; 36475cae7c1SHong Zhang /* Now copy over the block of values. Store the values column oriented. 36575cae7c1SHong Zhang This enables inserting multiple blocks belonging to a row with a single 36675cae7c1SHong Zhang funtion call */ 36775cae7c1SHong Zhang array = space->val + bs2*l; 36875cae7c1SHong Zhang vals = values + idx*bs2*n + bs*i; 36975cae7c1SHong Zhang for (j=0; j<bs; j++) { 37075cae7c1SHong Zhang for (k=0; k<bs; k++) array[k*bs] = vals[k]; 37175cae7c1SHong Zhang array++; 37275cae7c1SHong Zhang vals += cmax*bs; 37375cae7c1SHong Zhang } 37475cae7c1SHong Zhang l++; 375a2d1c673SSatish Balay } 3765bd3b8fbSHong Zhang stash->n += n; 37775cae7c1SHong Zhang space->local_used += n; 37875cae7c1SHong Zhang space->local_remaining -= n; 3794c1ff481SSatish Balay PetscFunctionReturn(0); 3804c1ff481SSatish Balay } 3814c1ff481SSatish Balay 3824c1ff481SSatish Balay /* 3838798bf22SSatish Balay MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 3844c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 3854c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3864c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3874c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3884c1ff481SSatish Balay 3894c1ff481SSatish Balay Input Parameters: 3904c1ff481SSatish Balay stash - the stash 3914c1ff481SSatish Balay row - the global block-row correspoiding to the values 3924c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3934c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3944c1ff481SSatish Balay values. Each block is of size bs*bs. 3954c1ff481SSatish Balay values - the values inserted 3964c1ff481SSatish Balay rmax - the number of block-rows in the original block. 3974c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 3984c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3994c1ff481SSatish Balay */ 4004a2ae208SSatish Balay #undef __FUNCT__ 4014a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesColBlocked_Private" 402c1ac3661SBarry Smith PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 4034c1ff481SSatish Balay { 404dfbe8321SBarry Smith PetscErrorCode ierr; 40575cae7c1SHong Zhang PetscInt i,j,k,bs2,bs=stash->bs,l; 406f15d580aSBarry Smith const MatScalar *vals; 407f15d580aSBarry Smith MatScalar *array; 40875cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 4094c1ff481SSatish Balay 4104c1ff481SSatish Balay PetscFunctionBegin; 41175cae7c1SHong Zhang if (!space || space->local_remaining < n){ 4128798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 4134c1ff481SSatish Balay } 41475cae7c1SHong Zhang space = stash->space; 41575cae7c1SHong Zhang l = space->local_used; 41675cae7c1SHong Zhang bs2 = bs*bs; 4174c1ff481SSatish Balay for (i=0; i<n; i++) { 41875cae7c1SHong Zhang space->idx[l] = row; 41975cae7c1SHong Zhang space->idy[l] = idxn[i]; 42075cae7c1SHong Zhang /* Now copy over the block of values. Store the values column oriented. 42175cae7c1SHong Zhang This enables inserting multiple blocks belonging to a row with a single 42275cae7c1SHong Zhang funtion call */ 42375cae7c1SHong Zhang array = space->val + bs2*l; 42475cae7c1SHong Zhang vals = values + idx*bs2*n + bs*i; 42575cae7c1SHong Zhang for (j=0; j<bs; j++) { 42675cae7c1SHong Zhang for (k=0; k<bs; k++) {array[k] = vals[k];} 42775cae7c1SHong Zhang array += bs; 42875cae7c1SHong Zhang vals += rmax*bs; 42975cae7c1SHong Zhang } 4305bd3b8fbSHong Zhang l++; 431a2d1c673SSatish Balay } 4325bd3b8fbSHong Zhang stash->n += n; 43375cae7c1SHong Zhang space->local_used += n; 43475cae7c1SHong Zhang space->local_remaining -= n; 4353a40ed3dSBarry Smith PetscFunctionReturn(0); 4369417f4adSLois Curfman McInnes } 4374c1ff481SSatish Balay /* 4388798bf22SSatish Balay MatStashScatterBegin_Private - Initiates the transfer of values to the 4394c1ff481SSatish Balay correct owners. This function goes through the stash, and check the 4404c1ff481SSatish Balay owners of each stashed value, and sends the values off to the owner 4414c1ff481SSatish Balay processors. 442bc5ccf88SSatish Balay 4434c1ff481SSatish Balay Input Parameters: 4444c1ff481SSatish Balay stash - the stash 4454c1ff481SSatish Balay owners - an array of size 'no-of-procs' which gives the ownership range 4464c1ff481SSatish Balay for each node. 4474c1ff481SSatish Balay 4484c1ff481SSatish Balay Notes: The 'owners' array in the cased of the blocked-stash has the 4494c1ff481SSatish Balay ranges specified blocked global indices, and for the regular stash in 4504c1ff481SSatish Balay the proper global indices. 4514c1ff481SSatish Balay */ 4524a2ae208SSatish Balay #undef __FUNCT__ 4534a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterBegin_Private" 454c1ac3661SBarry Smith PetscErrorCode MatStashScatterBegin_Private(MatStash *stash,PetscInt *owners) 455bc5ccf88SSatish Balay { 456c1ac3661SBarry Smith PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 457fe09c992SBarry Smith PetscInt size=stash->size,nsends; 4586849ba73SBarry Smith PetscErrorCode ierr; 45975cae7c1SHong Zhang PetscInt count,*sindices,**rindices,i,j,idx,lastidx,l; 460563fb871SSatish Balay MatScalar **rvalues,*svalues; 461bc5ccf88SSatish Balay MPI_Comm comm = stash->comm; 462563fb871SSatish Balay MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 463fe09c992SBarry Smith PetscMPIInt *nprocs,*nlengths,nreceives; 4645bd3b8fbSHong Zhang PetscInt *sp_idx,*sp_idy; 4655bd3b8fbSHong Zhang MatScalar *sp_val; 4665bd3b8fbSHong Zhang PetscMatStashSpace space,space_next; 467bc5ccf88SSatish Balay 468bc5ccf88SSatish Balay PetscFunctionBegin; 4694c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 47075cae7c1SHong Zhang 471bc5ccf88SSatish Balay /* first count number of contributors to each processor */ 472fe09c992SBarry Smith ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&nprocs);CHKERRQ(ierr); 473fe09c992SBarry Smith ierr = PetscMemzero(nprocs,2*size*sizeof(PetscMPIInt));CHKERRQ(ierr); 474c1ac3661SBarry Smith ierr = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); 475a2d1c673SSatish Balay 476563fb871SSatish Balay nlengths = nprocs+size; 47775cae7c1SHong Zhang i = j = 0; 4787357eb19SBarry Smith lastidx = -1; 4795bd3b8fbSHong Zhang space = stash->space_head; 48075cae7c1SHong Zhang while (space != PETSC_NULL){ 48175cae7c1SHong Zhang space_next = space->next; 4825bd3b8fbSHong Zhang sp_idx = space->idx; 48375cae7c1SHong Zhang for (l=0; l<space->local_used; l++){ 4847357eb19SBarry Smith /* if indices are NOT locally sorted, need to start search at the beginning */ 4855bd3b8fbSHong Zhang if (lastidx > (idx = sp_idx[l])) j = 0; 4867357eb19SBarry Smith lastidx = idx; 4877357eb19SBarry Smith for (; j<size; j++) { 4884c1ff481SSatish Balay if (idx >= owners[j] && idx < owners[j+1]) { 489563fb871SSatish Balay nlengths[j]++; owner[i] = j; break; 490bc5ccf88SSatish Balay } 491bc5ccf88SSatish Balay } 49275cae7c1SHong Zhang i++; 49375cae7c1SHong Zhang } 49475cae7c1SHong Zhang space = space_next; 495bc5ccf88SSatish Balay } 496563fb871SSatish Balay /* Now check what procs get messages - and compute nsends. */ 497563fb871SSatish Balay for (i=0, nsends=0 ; i<size; i++) { 498563fb871SSatish Balay if (nlengths[i]) { nprocs[i] = 1; nsends ++;} 499563fb871SSatish Balay } 500bc5ccf88SSatish Balay 501563fb871SSatish Balay { int *onodes,*olengths; 502563fb871SSatish Balay /* Determine the number of messages to expect, their lengths, from from-ids */ 503563fb871SSatish Balay ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr); 504563fb871SSatish Balay ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr); 505563fb871SSatish Balay /* since clubbing row,col - lengths are multiplied by 2 */ 506563fb871SSatish Balay for (i=0; i<nreceives; i++) olengths[i] *=2; 507563fb871SSatish Balay ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr); 508563fb871SSatish Balay /* values are size 'bs2' lengths (and remove earlier factor 2 */ 509563fb871SSatish Balay for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 510563fb871SSatish Balay ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr); 511563fb871SSatish Balay ierr = PetscFree(onodes);CHKERRQ(ierr); 512563fb871SSatish Balay ierr = PetscFree(olengths);CHKERRQ(ierr); 513bc5ccf88SSatish Balay } 514bc5ccf88SSatish Balay 515bc5ccf88SSatish Balay /* do sends: 516bc5ccf88SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 517bc5ccf88SSatish Balay the ith processor 518bc5ccf88SSatish Balay */ 519c1ac3661SBarry Smith ierr = PetscMalloc((stash->n+1)*(bs2*sizeof(MatScalar)+2*sizeof(PetscInt)),&svalues);CHKERRQ(ierr); 520c1ac3661SBarry Smith sindices = (PetscInt*)(svalues + bs2*stash->n); 521b0a32e0cSBarry Smith ierr = PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 522c1ac3661SBarry Smith ierr = PetscMalloc(2*size*sizeof(PetscInt),&startv);CHKERRQ(ierr); 523bc5ccf88SSatish Balay starti = startv + size; 524a2d1c673SSatish Balay /* use 2 sends the first with all_a, the next with all_i and all_j */ 525bc5ccf88SSatish Balay startv[0] = 0; starti[0] = 0; 526bc5ccf88SSatish Balay for (i=1; i<size; i++) { 527563fb871SSatish Balay startv[i] = startv[i-1] + nlengths[i-1]; 528563fb871SSatish Balay starti[i] = starti[i-1] + nlengths[i-1]*2; 529bc5ccf88SSatish Balay } 53075cae7c1SHong Zhang 53175cae7c1SHong Zhang i = 0; 5325bd3b8fbSHong Zhang space = stash->space_head; 53375cae7c1SHong Zhang while (space != PETSC_NULL){ 53475cae7c1SHong Zhang space_next = space->next; 5355bd3b8fbSHong Zhang sp_idx = space->idx; 5365bd3b8fbSHong Zhang sp_idy = space->idy; 5375bd3b8fbSHong Zhang sp_val = space->val; 53875cae7c1SHong Zhang for (l=0; l<space->local_used; l++){ 539bc5ccf88SSatish Balay j = owner[i]; 540a2d1c673SSatish Balay if (bs2 == 1) { 5415bd3b8fbSHong Zhang svalues[startv[j]] = sp_val[l]; 542a2d1c673SSatish Balay } else { 543c1ac3661SBarry Smith PetscInt k; 5443eda8832SBarry Smith MatScalar *buf1,*buf2; 5454c1ff481SSatish Balay buf1 = svalues+bs2*startv[j]; 5465bd3b8fbSHong Zhang buf2 = space->val + bs2*i; 5474c1ff481SSatish Balay for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; } 548a2d1c673SSatish Balay } 5495bd3b8fbSHong Zhang sindices[starti[j]] = sp_idx[l]; 5505bd3b8fbSHong Zhang sindices[starti[j]+nlengths[j]] = sp_idy[l]; 551bc5ccf88SSatish Balay startv[j]++; 552bc5ccf88SSatish Balay starti[j]++; 55375cae7c1SHong Zhang i++; 55475cae7c1SHong Zhang } 55575cae7c1SHong Zhang space = space_next; 556bc5ccf88SSatish Balay } 557bc5ccf88SSatish Balay startv[0] = 0; 558563fb871SSatish Balay for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];} 559e5d0e772SSatish Balay 560bc5ccf88SSatish Balay for (i=0,count=0; i<size; i++) { 561563fb871SSatish Balay if (nprocs[i]) { 562563fb871SSatish Balay ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr); 563563fb871SSatish Balay ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_MATSCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr); 564bc5ccf88SSatish Balay } 565b85c94c3SSatish Balay } 5666cf91177SBarry Smith #if defined(PETSC_USE_INFO) 567ae15b995SBarry Smith ierr = PetscInfo1(0,"No of messages: %d \n",nsends);CHKERRQ(ierr); 568e5d0e772SSatish Balay for (i=0; i<size; i++) { 569e5d0e772SSatish Balay if (nprocs[i]) { 570ae15b995SBarry Smith ierr = PetscInfo2(0,"Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(MatScalar)+2*sizeof(PetscInt));CHKERRQ(ierr); 571e5d0e772SSatish Balay } 572e5d0e772SSatish Balay } 573e5d0e772SSatish Balay #endif 574606d414cSSatish Balay ierr = PetscFree(owner);CHKERRQ(ierr); 575606d414cSSatish Balay ierr = PetscFree(startv);CHKERRQ(ierr); 576a2d1c673SSatish Balay /* This memory is reused in scatter end for a different purpose*/ 577a2d1c673SSatish Balay for (i=0; i<2*size; i++) nprocs[i] = -1; 578a2d1c673SSatish Balay stash->nprocs = nprocs; 579a2d1c673SSatish Balay 580563fb871SSatish Balay /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 581563fb871SSatish Balay ierr = PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 582563fb871SSatish Balay 583563fb871SSatish Balay for (i=0; i<nreceives; i++) { 584563fb871SSatish Balay recv_waits[2*i] = recv_waits1[i]; 585563fb871SSatish Balay recv_waits[2*i+1] = recv_waits2[i]; 586563fb871SSatish Balay } 587563fb871SSatish Balay stash->recv_waits = recv_waits; 588563fb871SSatish Balay ierr = PetscFree(recv_waits1);CHKERRQ(ierr); 589563fb871SSatish Balay ierr = PetscFree(recv_waits2);CHKERRQ(ierr); 590563fb871SSatish Balay 591bc5ccf88SSatish Balay stash->svalues = svalues; stash->rvalues = rvalues; 592563fb871SSatish Balay stash->rindices = rindices; stash->send_waits = send_waits; 593bc5ccf88SSatish Balay stash->nsends = nsends; stash->nrecvs = nreceives; 594bc5ccf88SSatish Balay PetscFunctionReturn(0); 595bc5ccf88SSatish Balay } 596bc5ccf88SSatish Balay 597a2d1c673SSatish Balay /* 5988798bf22SSatish Balay MatStashScatterGetMesg_Private - This function waits on the receives posted 5998798bf22SSatish Balay in the function MatStashScatterBegin_Private() and returns one message at 6004c1ff481SSatish Balay a time to the calling function. If no messages are left, it indicates this 6014c1ff481SSatish Balay by setting flg = 0, else it sets flg = 1. 6024c1ff481SSatish Balay 6034c1ff481SSatish Balay Input Parameters: 6044c1ff481SSatish Balay stash - the stash 6054c1ff481SSatish Balay 6064c1ff481SSatish Balay Output Parameters: 6074c1ff481SSatish Balay nvals - the number of entries in the current message. 6084c1ff481SSatish Balay rows - an array of row indices (or blocked indices) corresponding to the values 6094c1ff481SSatish Balay cols - an array of columnindices (or blocked indices) corresponding to the values 6104c1ff481SSatish Balay vals - the values 6114c1ff481SSatish Balay flg - 0 indicates no more message left, and the current call has no values associated. 6124c1ff481SSatish Balay 1 indicates that the current call successfully received a message, and the 6134c1ff481SSatish Balay other output parameters nvals,rows,cols,vals are set appropriately. 614a2d1c673SSatish Balay */ 6154a2ae208SSatish Balay #undef __FUNCT__ 6164a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterGetMesg_Private" 617c1ac3661SBarry Smith PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,MatScalar **vals,PetscInt *flg) 618bc5ccf88SSatish Balay { 6196849ba73SBarry Smith PetscErrorCode ierr; 620fe09c992SBarry Smith PetscMPIInt i,*flg_v,i1,i2; 621fe09c992SBarry Smith PetscInt bs2; 622a2d1c673SSatish Balay MPI_Status recv_status; 623b0a32e0cSBarry Smith PetscTruth match_found = PETSC_FALSE; 624bc5ccf88SSatish Balay 625bc5ccf88SSatish Balay PetscFunctionBegin; 626bc5ccf88SSatish Balay 627a2d1c673SSatish Balay *flg = 0; /* When a message is discovered this is reset to 1 */ 628a2d1c673SSatish Balay /* Return if no more messages to process */ 629a2d1c673SSatish Balay if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); } 630a2d1c673SSatish Balay 631a2d1c673SSatish Balay flg_v = stash->nprocs; 6324c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 633a2d1c673SSatish Balay /* If a matching pair of receieves are found, process them, and return the data to 634a2d1c673SSatish Balay the calling function. Until then keep receiving messages */ 635a2d1c673SSatish Balay while (!match_found) { 636a2d1c673SSatish Balay ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr); 637a2d1c673SSatish Balay /* Now pack the received message into a structure which is useable by others */ 638a2d1c673SSatish Balay if (i % 2) { 6393eda8832SBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_MATSCALAR,nvals);CHKERRQ(ierr); 640c1dc657dSBarry Smith flg_v[2*recv_status.MPI_SOURCE] = i/2; 641a2d1c673SSatish Balay *nvals = *nvals/bs2; 642563fb871SSatish Balay } else { 643563fb871SSatish Balay ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr); 644563fb871SSatish Balay flg_v[2*recv_status.MPI_SOURCE+1] = i/2; 645563fb871SSatish Balay *nvals = *nvals/2; /* This message has both row indices and col indices */ 646bc5ccf88SSatish Balay } 647a2d1c673SSatish Balay 648a2d1c673SSatish Balay /* Check if we have both the messages from this proc */ 649c1dc657dSBarry Smith i1 = flg_v[2*recv_status.MPI_SOURCE]; 650c1dc657dSBarry Smith i2 = flg_v[2*recv_status.MPI_SOURCE+1]; 651a2d1c673SSatish Balay if (i1 != -1 && i2 != -1) { 652563fb871SSatish Balay *rows = stash->rindices[i2]; 653a2d1c673SSatish Balay *cols = *rows + *nvals; 654563fb871SSatish Balay *vals = stash->rvalues[i1]; 655a2d1c673SSatish Balay *flg = 1; 656a2d1c673SSatish Balay stash->nprocessed ++; 65735d8aa7fSBarry Smith match_found = PETSC_TRUE; 658bc5ccf88SSatish Balay } 659bc5ccf88SSatish Balay } 660bc5ccf88SSatish Balay PetscFunctionReturn(0); 661bc5ccf88SSatish Balay } 662