1be1d678aSKris Buschelman #define PETSCMAT_DLL 22d5177cdSBarry Smith 370f55243SBarry Smith #include "src/mat/matimpl.h" 475cae7c1SHong Zhang #include "src/mat/utils/matstashspace.h" 5*5bd3b8fbSHong Zhang 63eda8832SBarry Smith /* 70ae3cd3bSBarry Smith The input to the stash is ALWAYS in MatScalar precision, and the 80ae3cd3bSBarry Smith internal storage and output is also in MatScalar. 93eda8832SBarry Smith */ 10bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE 10000 114c1ff481SSatish Balay 129417f4adSLois Curfman McInnes /* 138798bf22SSatish Balay MatStashCreate_Private - Creates a stash,currently used for all the parallel 144c1ff481SSatish Balay matrix implementations. The stash is where elements of a matrix destined 154c1ff481SSatish Balay to be stored on other processors are kept until matrix assembly is done. 169417f4adSLois Curfman McInnes 174c1ff481SSatish Balay This is a simple minded stash. Simply adds entries to end of stash. 184c1ff481SSatish Balay 194c1ff481SSatish Balay Input Parameters: 204c1ff481SSatish Balay comm - communicator, required for scatters. 214c1ff481SSatish Balay bs - stash block size. used when stashing blocks of values 224c1ff481SSatish Balay 234c1ff481SSatish Balay Output Parameters: 244c1ff481SSatish Balay stash - the newly created stash 259417f4adSLois Curfman McInnes */ 264a2ae208SSatish Balay #undef __FUNCT__ 274a2ae208SSatish Balay #define __FUNCT__ "MatStashCreate_Private" 28c1ac3661SBarry Smith PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash) 299417f4adSLois Curfman McInnes { 30dfbe8321SBarry Smith PetscErrorCode ierr; 31c1ac3661SBarry Smith PetscInt max,*opt,nopt; 32f1af5d2fSBarry Smith PetscTruth flg; 33bc5ccf88SSatish Balay 343a40ed3dSBarry Smith PetscFunctionBegin; 35bc5ccf88SSatish Balay /* Require 2 tags,get the second using PetscCommGetNewTag() */ 36752ec6e0SSatish Balay stash->comm = comm; 37752ec6e0SSatish Balay ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr); 38a2d1c673SSatish Balay ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr); 39a2d1c673SSatish Balay ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr); 40a2d1c673SSatish Balay ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr); 41bc5ccf88SSatish Balay 42434d7ff9SSatish Balay nopt = stash->size; 43d7d82daaSBarry Smith ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr); 44b0a32e0cSBarry Smith ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr); 45434d7ff9SSatish Balay if (flg) { 46434d7ff9SSatish Balay if (nopt == 1) max = opt[0]; 47434d7ff9SSatish Balay else if (nopt == stash->size) max = opt[stash->rank]; 48434d7ff9SSatish Balay else if (stash->rank < nopt) max = opt[stash->rank]; 49f4ab19daSSatish Balay else max = 0; /* Use default */ 50434d7ff9SSatish Balay stash->umax = max; 51434d7ff9SSatish Balay } else { 52434d7ff9SSatish Balay stash->umax = 0; 53434d7ff9SSatish Balay } 54606d414cSSatish Balay ierr = PetscFree(opt);CHKERRQ(ierr); 554c1ff481SSatish Balay if (bs <= 0) bs = 1; 56a2d1c673SSatish Balay 574c1ff481SSatish Balay stash->bs = bs; 589417f4adSLois Curfman McInnes stash->nmax = 0; 59434d7ff9SSatish Balay stash->oldnmax = 0; 609417f4adSLois Curfman McInnes stash->n = 0; 614c1ff481SSatish Balay stash->reallocs = -1; 6275cae7c1SHong Zhang stash->space_head = 0; 6375cae7c1SHong Zhang stash->space = 0; 649417f4adSLois Curfman McInnes 65bc5ccf88SSatish Balay stash->send_waits = 0; 66bc5ccf88SSatish Balay stash->recv_waits = 0; 67a2d1c673SSatish Balay stash->send_status = 0; 68bc5ccf88SSatish Balay stash->nsends = 0; 69bc5ccf88SSatish Balay stash->nrecvs = 0; 70bc5ccf88SSatish Balay stash->svalues = 0; 71bc5ccf88SSatish Balay stash->rvalues = 0; 72563fb871SSatish Balay stash->rindices = 0; 73a2d1c673SSatish Balay stash->nprocs = 0; 74a2d1c673SSatish Balay stash->nprocessed = 0; 753a40ed3dSBarry Smith PetscFunctionReturn(0); 769417f4adSLois Curfman McInnes } 779417f4adSLois Curfman McInnes 784c1ff481SSatish Balay /* 798798bf22SSatish Balay MatStashDestroy_Private - Destroy the stash 804c1ff481SSatish Balay */ 814a2ae208SSatish Balay #undef __FUNCT__ 824a2ae208SSatish Balay #define __FUNCT__ "MatStashDestroy_Private" 83dfbe8321SBarry Smith PetscErrorCode MatStashDestroy_Private(MatStash *stash) 849417f4adSLois Curfman McInnes { 85dfbe8321SBarry Smith PetscErrorCode ierr; 86a2d1c673SSatish Balay 87bc5ccf88SSatish Balay PetscFunctionBegin; 8875cae7c1SHong Zhang if (stash->space_head){ 8975cae7c1SHong Zhang ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr); 9075cae7c1SHong Zhang stash->space_head = 0; 9175cae7c1SHong Zhang } 92bc5ccf88SSatish Balay PetscFunctionReturn(0); 93bc5ccf88SSatish Balay } 94bc5ccf88SSatish Balay 954c1ff481SSatish Balay /* 968798bf22SSatish Balay MatStashScatterEnd_Private - This is called as the fial stage of 974c1ff481SSatish Balay scatter. The final stages of messagepassing is done here, and 984c1ff481SSatish Balay all the memory used for messagepassing is cleanedu up. This 994c1ff481SSatish Balay routine also resets the stash, and deallocates the memory used 1004c1ff481SSatish Balay for the stash. It also keeps track of the current memory usage 1014c1ff481SSatish Balay so that the same value can be used the next time through. 1024c1ff481SSatish Balay */ 1034a2ae208SSatish Balay #undef __FUNCT__ 1044a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterEnd_Private" 105dfbe8321SBarry Smith PetscErrorCode MatStashScatterEnd_Private(MatStash *stash) 106bc5ccf88SSatish Balay { 1076849ba73SBarry Smith PetscErrorCode ierr; 108*5bd3b8fbSHong Zhang PetscInt nsends=stash->nsends,bs2,oldnmax; 109a2d1c673SSatish Balay MPI_Status *send_status; 110a2d1c673SSatish Balay 1113a40ed3dSBarry Smith PetscFunctionBegin; 112a2d1c673SSatish Balay /* wait on sends */ 113a2d1c673SSatish Balay if (nsends) { 11482502324SSatish Balay ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 115a2d1c673SSatish Balay ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr); 116606d414cSSatish Balay ierr = PetscFree(send_status);CHKERRQ(ierr); 117a2d1c673SSatish Balay } 118a2d1c673SSatish Balay 119c0c58ca7SSatish Balay /* Now update nmaxold to be app 10% more than max n used, this way the 120434d7ff9SSatish Balay wastage of space is reduced the next time this stash is used. 121434d7ff9SSatish Balay Also update the oldmax, only if it increases */ 122b9b97703SBarry Smith if (stash->n) { 12394b769a5SSatish Balay bs2 = stash->bs*stash->bs; 1248a9378f0SSatish Balay oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 125434d7ff9SSatish Balay if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 126b9b97703SBarry Smith } 127434d7ff9SSatish Balay 128d07ff455SSatish Balay stash->nmax = 0; 129d07ff455SSatish Balay stash->n = 0; 1304c1ff481SSatish Balay stash->reallocs = -1; 131a2d1c673SSatish Balay stash->nprocessed = 0; 13275cae7c1SHong Zhang if (stash->space_head){ 13375cae7c1SHong Zhang ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr); 13475cae7c1SHong Zhang stash->space_head = 0; 13575cae7c1SHong Zhang } 136606d414cSSatish Balay if (stash->send_waits) { 137606d414cSSatish Balay ierr = PetscFree(stash->send_waits);CHKERRQ(ierr); 138606d414cSSatish Balay stash->send_waits = 0; 139606d414cSSatish Balay } 140606d414cSSatish Balay if (stash->recv_waits) { 141606d414cSSatish Balay ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr); 142606d414cSSatish Balay stash->recv_waits = 0; 143606d414cSSatish Balay } 144606d414cSSatish Balay if (stash->svalues) { 145606d414cSSatish Balay ierr = PetscFree(stash->svalues);CHKERRQ(ierr); 146606d414cSSatish Balay stash->svalues = 0; 147606d414cSSatish Balay } 148606d414cSSatish Balay if (stash->rvalues) { 149606d414cSSatish Balay ierr = PetscFree(stash->rvalues);CHKERRQ(ierr); 150606d414cSSatish Balay stash->rvalues = 0; 151606d414cSSatish Balay } 152563fb871SSatish Balay if (stash->rindices) { 153563fb871SSatish Balay ierr = PetscFree(stash->rindices);CHKERRQ(ierr); 154563fb871SSatish Balay stash->rindices = 0; 155563fb871SSatish Balay } 156606d414cSSatish Balay if (stash->nprocs) { 157b22afee1SSatish Balay ierr = PetscFree(stash->nprocs);CHKERRQ(ierr); 158606d414cSSatish Balay stash->nprocs = 0; 159606d414cSSatish Balay } 1603a40ed3dSBarry Smith PetscFunctionReturn(0); 1619417f4adSLois Curfman McInnes } 1629417f4adSLois Curfman McInnes 1634c1ff481SSatish Balay /* 1648798bf22SSatish Balay MatStashGetInfo_Private - Gets the relavant statistics of the stash 1654c1ff481SSatish Balay 1664c1ff481SSatish Balay Input Parameters: 1674c1ff481SSatish Balay stash - the stash 16894b769a5SSatish Balay nstash - the size of the stash. Indicates the number of values stored. 1694c1ff481SSatish Balay reallocs - the number of additional mallocs incurred. 1704c1ff481SSatish Balay 1714c1ff481SSatish Balay */ 1724a2ae208SSatish Balay #undef __FUNCT__ 1734a2ae208SSatish Balay #define __FUNCT__ "MatStashGetInfo_Private" 174c1ac3661SBarry Smith PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs) 17597530c3fSBarry Smith { 176c1ac3661SBarry Smith PetscInt bs2 = stash->bs*stash->bs; 17794b769a5SSatish Balay 1783a40ed3dSBarry Smith PetscFunctionBegin; 1791ecfd215SBarry Smith if (nstash) *nstash = stash->n*bs2; 1801ecfd215SBarry Smith if (reallocs) { 181434d7ff9SSatish Balay if (stash->reallocs < 0) *reallocs = 0; 182434d7ff9SSatish Balay else *reallocs = stash->reallocs; 1831ecfd215SBarry Smith } 184bc5ccf88SSatish Balay PetscFunctionReturn(0); 185bc5ccf88SSatish Balay } 1864c1ff481SSatish Balay 1874c1ff481SSatish Balay /* 1888798bf22SSatish Balay MatStashSetInitialSize_Private - Sets the initial size of the stash 1894c1ff481SSatish Balay 1904c1ff481SSatish Balay Input Parameters: 1914c1ff481SSatish Balay stash - the stash 1924c1ff481SSatish Balay max - the value that is used as the max size of the stash. 1934c1ff481SSatish Balay this value is used while allocating memory. 1944c1ff481SSatish Balay */ 1954a2ae208SSatish Balay #undef __FUNCT__ 1964a2ae208SSatish Balay #define __FUNCT__ "MatStashSetInitialSize_Private" 197c1ac3661SBarry Smith PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max) 198bc5ccf88SSatish Balay { 199bc5ccf88SSatish Balay PetscFunctionBegin; 200434d7ff9SSatish Balay stash->umax = max; 2013a40ed3dSBarry Smith PetscFunctionReturn(0); 20297530c3fSBarry Smith } 20397530c3fSBarry Smith 2048798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called 2054c1ff481SSatish Balay when the space in the stash is not sufficient to add the new values 2064c1ff481SSatish Balay being inserted into the stash. 2074c1ff481SSatish Balay 2084c1ff481SSatish Balay Input Parameters: 2094c1ff481SSatish Balay stash - the stash 2104c1ff481SSatish Balay incr - the minimum increase requested 2114c1ff481SSatish Balay 2124c1ff481SSatish Balay Notes: 2134c1ff481SSatish Balay This routine doubles the currently used memory. 2144c1ff481SSatish Balay */ 2154a2ae208SSatish Balay #undef __FUNCT__ 2164a2ae208SSatish Balay #define __FUNCT__ "MatStashExpand_Private" 217c1ac3661SBarry Smith static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr) 2189417f4adSLois Curfman McInnes { 2196849ba73SBarry Smith PetscErrorCode ierr; 220*5bd3b8fbSHong Zhang PetscInt newnmax,bs2= stash->bs*stash->bs; 2219417f4adSLois Curfman McInnes 2223a40ed3dSBarry Smith PetscFunctionBegin; 2239417f4adSLois Curfman McInnes /* allocate a larger stash */ 224c481ceb5SSatish Balay if (!stash->oldnmax && !stash->nmax) { /* new stash */ 225434d7ff9SSatish Balay if (stash->umax) newnmax = stash->umax/bs2; 226434d7ff9SSatish Balay else newnmax = DEFAULT_STASH_SIZE/bs2; 227c481ceb5SSatish Balay } else if (!stash->nmax) { /* resuing stash */ 228434d7ff9SSatish Balay if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2; 229434d7ff9SSatish Balay else newnmax = stash->oldnmax/bs2; 230434d7ff9SSatish Balay } else newnmax = stash->nmax*2; 2314c1ff481SSatish Balay if (newnmax < (stash->nmax + incr)) newnmax += 2*incr; 232d07ff455SSatish Balay 23375cae7c1SHong Zhang /* Get a MatStashSpace and attach it to stash */ 23475cae7c1SHong Zhang if (!stash->nmax) { /* new stash or resuing stash->oldnmax */ 23575cae7c1SHong Zhang ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space_head);CHKERRQ(ierr); 23675cae7c1SHong Zhang stash->space = stash->space_head; 23775cae7c1SHong Zhang } else { 23875cae7c1SHong Zhang ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr); 23975cae7c1SHong Zhang } 240bc5ccf88SSatish Balay stash->reallocs++; 24175cae7c1SHong Zhang stash->nmax = newnmax; 242bc5ccf88SSatish Balay PetscFunctionReturn(0); 243bc5ccf88SSatish Balay } 244bc5ccf88SSatish Balay /* 2458798bf22SSatish Balay MatStashValuesRow_Private - inserts values into the stash. This function 2464c1ff481SSatish Balay expects the values to be roworiented. Multiple columns belong to the same row 2474c1ff481SSatish Balay can be inserted with a single call to this function. 2484c1ff481SSatish Balay 2494c1ff481SSatish Balay Input Parameters: 2504c1ff481SSatish Balay stash - the stash 2514c1ff481SSatish Balay row - the global row correspoiding to the values 2524c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2534c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2544c1ff481SSatish Balay values - the values inserted 255bc5ccf88SSatish Balay */ 2564a2ae208SSatish Balay #undef __FUNCT__ 2574a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRow_Private" 258c1ac3661SBarry Smith PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[]) 259bc5ccf88SSatish Balay { 260dfbe8321SBarry Smith PetscErrorCode ierr; 26175cae7c1SHong Zhang PetscInt i,k; 26275cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 263bc5ccf88SSatish Balay 264bc5ccf88SSatish Balay PetscFunctionBegin; 2654c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 26675cae7c1SHong Zhang if (!space || space->local_remaining < n){ 2678798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 2689417f4adSLois Curfman McInnes } 26975cae7c1SHong Zhang space = stash->space; 27075cae7c1SHong Zhang k = space->local_used; 2714c1ff481SSatish Balay for (i=0; i<n; i++) { 27275cae7c1SHong Zhang space->idx[k] = row; 27375cae7c1SHong Zhang space->idy[k] = idxn[i]; 27475cae7c1SHong Zhang space->val[k] = values[i]; 27575cae7c1SHong Zhang k++; 2769417f4adSLois Curfman McInnes } 277*5bd3b8fbSHong Zhang stash->n += n; 27875cae7c1SHong Zhang space->local_used += n; 27975cae7c1SHong Zhang space->local_remaining -= n; 280a2d1c673SSatish Balay PetscFunctionReturn(0); 281a2d1c673SSatish Balay } 28275cae7c1SHong Zhang 2834c1ff481SSatish Balay /* 2848798bf22SSatish Balay MatStashValuesCol_Private - inserts values into the stash. This function 2854c1ff481SSatish Balay expects the values to be columnoriented. Multiple columns belong to the same row 2864c1ff481SSatish Balay can be inserted with a single call to this function. 287a2d1c673SSatish Balay 2884c1ff481SSatish Balay Input Parameters: 2894c1ff481SSatish Balay stash - the stash 2904c1ff481SSatish Balay row - the global row correspoiding to the values 2914c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2924c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2934c1ff481SSatish Balay values - the values inserted 2944c1ff481SSatish Balay stepval - the consecutive values are sepated by a distance of stepval. 2954c1ff481SSatish Balay this happens because the input is columnoriented. 2964c1ff481SSatish Balay */ 2974a2ae208SSatish Balay #undef __FUNCT__ 2984a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesCol_Private" 299c1ac3661SBarry Smith PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt stepval) 300a2d1c673SSatish Balay { 301dfbe8321SBarry Smith PetscErrorCode ierr; 30275cae7c1SHong Zhang PetscInt i,k; 30375cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 304a2d1c673SSatish Balay 3054c1ff481SSatish Balay PetscFunctionBegin; 3064c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 30775cae7c1SHong Zhang if (!space || space->local_remaining < n){ 3088798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 3094c1ff481SSatish Balay } 31075cae7c1SHong Zhang space = stash->space; 31175cae7c1SHong Zhang k = space->local_used; 3124c1ff481SSatish Balay for (i=0; i<n; i++) { 31375cae7c1SHong Zhang space->idx[k] = row; 31475cae7c1SHong Zhang space->idy[k] = idxn[i]; 31575cae7c1SHong Zhang space->val[k] = values[i*stepval]; 31675cae7c1SHong Zhang k++; 3174c1ff481SSatish Balay } 318*5bd3b8fbSHong Zhang stash->n += n; 31975cae7c1SHong Zhang space->local_used += n; 32075cae7c1SHong Zhang space->local_remaining -= n; 3214c1ff481SSatish Balay PetscFunctionReturn(0); 3224c1ff481SSatish Balay } 3234c1ff481SSatish Balay 3244c1ff481SSatish Balay /* 3258798bf22SSatish Balay MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 3264c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 3274c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3284c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3294c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3304c1ff481SSatish Balay 3314c1ff481SSatish Balay Input Parameters: 3324c1ff481SSatish Balay stash - the stash 3334c1ff481SSatish Balay row - the global block-row correspoiding to the values 3344c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3354c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3364c1ff481SSatish Balay values. Each block is of size bs*bs. 3374c1ff481SSatish Balay values - the values inserted 3384c1ff481SSatish Balay rmax - the number of block-rows in the original block. 3394c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 3404c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3414c1ff481SSatish Balay */ 3424a2ae208SSatish Balay #undef __FUNCT__ 3434a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRowBlocked_Private" 344c1ac3661SBarry Smith PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 3454c1ff481SSatish Balay { 346dfbe8321SBarry Smith PetscErrorCode ierr; 34775cae7c1SHong Zhang PetscInt i,j,k,bs2,bs=stash->bs,l; 348f15d580aSBarry Smith const MatScalar *vals; 349f15d580aSBarry Smith MatScalar *array; 35075cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 351a2d1c673SSatish Balay 352a2d1c673SSatish Balay PetscFunctionBegin; 35375cae7c1SHong Zhang if (!space || space->local_remaining < n){ 3548798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 355a2d1c673SSatish Balay } 35675cae7c1SHong Zhang space = stash->space; 35775cae7c1SHong Zhang l = space->local_used; 35875cae7c1SHong Zhang bs2 = bs*bs; 3594c1ff481SSatish Balay for (i=0; i<n; i++) { 36075cae7c1SHong Zhang space->idx[l] = row; 36175cae7c1SHong Zhang space->idy[l] = idxn[i]; 36275cae7c1SHong Zhang /* Now copy over the block of values. Store the values column oriented. 36375cae7c1SHong Zhang This enables inserting multiple blocks belonging to a row with a single 36475cae7c1SHong Zhang funtion call */ 36575cae7c1SHong Zhang array = space->val + bs2*l; 36675cae7c1SHong Zhang vals = values + idx*bs2*n + bs*i; 36775cae7c1SHong Zhang for (j=0; j<bs; j++) { 36875cae7c1SHong Zhang for (k=0; k<bs; k++) array[k*bs] = vals[k]; 36975cae7c1SHong Zhang array++; 37075cae7c1SHong Zhang vals += cmax*bs; 37175cae7c1SHong Zhang } 37275cae7c1SHong Zhang l++; 373a2d1c673SSatish Balay } 374*5bd3b8fbSHong Zhang stash->n += n; 37575cae7c1SHong Zhang space->local_used += n; 37675cae7c1SHong Zhang space->local_remaining -= n; 3774c1ff481SSatish Balay PetscFunctionReturn(0); 3784c1ff481SSatish Balay } 3794c1ff481SSatish Balay 3804c1ff481SSatish Balay /* 3818798bf22SSatish Balay MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 3824c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 3834c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3844c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3854c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3864c1ff481SSatish Balay 3874c1ff481SSatish Balay Input Parameters: 3884c1ff481SSatish Balay stash - the stash 3894c1ff481SSatish Balay row - the global block-row correspoiding to the values 3904c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3914c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3924c1ff481SSatish Balay values. Each block is of size bs*bs. 3934c1ff481SSatish Balay values - the values inserted 3944c1ff481SSatish Balay rmax - the number of block-rows in the original block. 3954c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 3964c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3974c1ff481SSatish Balay */ 3984a2ae208SSatish Balay #undef __FUNCT__ 3994a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesColBlocked_Private" 400c1ac3661SBarry Smith PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 4014c1ff481SSatish Balay { 402dfbe8321SBarry Smith PetscErrorCode ierr; 40375cae7c1SHong Zhang PetscInt i,j,k,bs2,bs=stash->bs,l; 404f15d580aSBarry Smith const MatScalar *vals; 405f15d580aSBarry Smith MatScalar *array; 40675cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 4074c1ff481SSatish Balay 4084c1ff481SSatish Balay PetscFunctionBegin; 40975cae7c1SHong Zhang if (!space || space->local_remaining < n){ 4108798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 4114c1ff481SSatish Balay } 41275cae7c1SHong Zhang space = stash->space; 41375cae7c1SHong Zhang l = space->local_used; 41475cae7c1SHong Zhang bs2 = bs*bs; 4154c1ff481SSatish Balay for (i=0; i<n; i++) { 41675cae7c1SHong Zhang space->idx[l] = row; 41775cae7c1SHong Zhang space->idy[l] = idxn[i]; 41875cae7c1SHong Zhang /* Now copy over the block of values. Store the values column oriented. 41975cae7c1SHong Zhang This enables inserting multiple blocks belonging to a row with a single 42075cae7c1SHong Zhang funtion call */ 42175cae7c1SHong Zhang array = space->val + bs2*l; 42275cae7c1SHong Zhang vals = values + idx*bs2*n + bs*i; 42375cae7c1SHong Zhang for (j=0; j<bs; j++) { 42475cae7c1SHong Zhang for (k=0; k<bs; k++) {array[k] = vals[k];} 42575cae7c1SHong Zhang array += bs; 42675cae7c1SHong Zhang vals += rmax*bs; 42775cae7c1SHong Zhang } 428*5bd3b8fbSHong Zhang l++; 429a2d1c673SSatish Balay } 430*5bd3b8fbSHong Zhang stash->n += n; 43175cae7c1SHong Zhang space->local_used += n; 43275cae7c1SHong Zhang space->local_remaining -= n; 4333a40ed3dSBarry Smith PetscFunctionReturn(0); 4349417f4adSLois Curfman McInnes } 4354c1ff481SSatish Balay /* 4368798bf22SSatish Balay MatStashScatterBegin_Private - Initiates the transfer of values to the 4374c1ff481SSatish Balay correct owners. This function goes through the stash, and check the 4384c1ff481SSatish Balay owners of each stashed value, and sends the values off to the owner 4394c1ff481SSatish Balay processors. 440bc5ccf88SSatish Balay 4414c1ff481SSatish Balay Input Parameters: 4424c1ff481SSatish Balay stash - the stash 4434c1ff481SSatish Balay owners - an array of size 'no-of-procs' which gives the ownership range 4444c1ff481SSatish Balay for each node. 4454c1ff481SSatish Balay 4464c1ff481SSatish Balay Notes: The 'owners' array in the cased of the blocked-stash has the 4474c1ff481SSatish Balay ranges specified blocked global indices, and for the regular stash in 4484c1ff481SSatish Balay the proper global indices. 4494c1ff481SSatish Balay */ 4504a2ae208SSatish Balay #undef __FUNCT__ 4514a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterBegin_Private" 452c1ac3661SBarry Smith PetscErrorCode MatStashScatterBegin_Private(MatStash *stash,PetscInt *owners) 453bc5ccf88SSatish Balay { 454c1ac3661SBarry Smith PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 455fe09c992SBarry Smith PetscInt size=stash->size,nsends; 4566849ba73SBarry Smith PetscErrorCode ierr; 45775cae7c1SHong Zhang PetscInt count,*sindices,**rindices,i,j,idx,lastidx,l; 458563fb871SSatish Balay MatScalar **rvalues,*svalues; 459bc5ccf88SSatish Balay MPI_Comm comm = stash->comm; 460563fb871SSatish Balay MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 461fe09c992SBarry Smith PetscMPIInt *nprocs,*nlengths,nreceives; 462*5bd3b8fbSHong Zhang PetscInt *sp_idx,*sp_idy; 463*5bd3b8fbSHong Zhang MatScalar *sp_val; 464*5bd3b8fbSHong Zhang PetscMatStashSpace space,space_next; 465bc5ccf88SSatish Balay 466bc5ccf88SSatish Balay PetscFunctionBegin; 4674c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 46875cae7c1SHong Zhang 469bc5ccf88SSatish Balay /* first count number of contributors to each processor */ 470fe09c992SBarry Smith ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&nprocs);CHKERRQ(ierr); 471fe09c992SBarry Smith ierr = PetscMemzero(nprocs,2*size*sizeof(PetscMPIInt));CHKERRQ(ierr); 472c1ac3661SBarry Smith ierr = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); 473a2d1c673SSatish Balay 474563fb871SSatish Balay nlengths = nprocs+size; 47575cae7c1SHong Zhang i = j = 0; 4767357eb19SBarry Smith lastidx = -1; 477*5bd3b8fbSHong Zhang space = stash->space_head; 47875cae7c1SHong Zhang while (space != PETSC_NULL){ 47975cae7c1SHong Zhang space_next = space->next; 480*5bd3b8fbSHong Zhang sp_idx = space->idx; 48175cae7c1SHong Zhang for (l=0; l<space->local_used; l++){ 4827357eb19SBarry Smith /* if indices are NOT locally sorted, need to start search at the beginning */ 483*5bd3b8fbSHong Zhang if (lastidx > (idx = sp_idx[l])) j = 0; 4847357eb19SBarry Smith lastidx = idx; 4857357eb19SBarry Smith for (; j<size; j++) { 4864c1ff481SSatish Balay if (idx >= owners[j] && idx < owners[j+1]) { 487563fb871SSatish Balay nlengths[j]++; owner[i] = j; break; 488bc5ccf88SSatish Balay } 489bc5ccf88SSatish Balay } 49075cae7c1SHong Zhang i++; 49175cae7c1SHong Zhang } 49275cae7c1SHong Zhang space = space_next; 493bc5ccf88SSatish Balay } 494563fb871SSatish Balay /* Now check what procs get messages - and compute nsends. */ 495563fb871SSatish Balay for (i=0, nsends=0 ; i<size; i++) { 496563fb871SSatish Balay if (nlengths[i]) { nprocs[i] = 1; nsends ++;} 497563fb871SSatish Balay } 498bc5ccf88SSatish Balay 499563fb871SSatish Balay { int *onodes,*olengths; 500563fb871SSatish Balay /* Determine the number of messages to expect, their lengths, from from-ids */ 501563fb871SSatish Balay ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr); 502563fb871SSatish Balay ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr); 503563fb871SSatish Balay /* since clubbing row,col - lengths are multiplied by 2 */ 504563fb871SSatish Balay for (i=0; i<nreceives; i++) olengths[i] *=2; 505563fb871SSatish Balay ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr); 506563fb871SSatish Balay /* values are size 'bs2' lengths (and remove earlier factor 2 */ 507563fb871SSatish Balay for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 508563fb871SSatish Balay ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr); 509563fb871SSatish Balay ierr = PetscFree(onodes);CHKERRQ(ierr); 510563fb871SSatish Balay ierr = PetscFree(olengths);CHKERRQ(ierr); 511bc5ccf88SSatish Balay } 512bc5ccf88SSatish Balay 513bc5ccf88SSatish Balay /* do sends: 514bc5ccf88SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 515bc5ccf88SSatish Balay the ith processor 516bc5ccf88SSatish Balay */ 517c1ac3661SBarry Smith ierr = PetscMalloc((stash->n+1)*(bs2*sizeof(MatScalar)+2*sizeof(PetscInt)),&svalues);CHKERRQ(ierr); 518c1ac3661SBarry Smith sindices = (PetscInt*)(svalues + bs2*stash->n); 519b0a32e0cSBarry Smith ierr = PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 520c1ac3661SBarry Smith ierr = PetscMalloc(2*size*sizeof(PetscInt),&startv);CHKERRQ(ierr); 521bc5ccf88SSatish Balay starti = startv + size; 522a2d1c673SSatish Balay /* use 2 sends the first with all_a, the next with all_i and all_j */ 523bc5ccf88SSatish Balay startv[0] = 0; starti[0] = 0; 524bc5ccf88SSatish Balay for (i=1; i<size; i++) { 525563fb871SSatish Balay startv[i] = startv[i-1] + nlengths[i-1]; 526563fb871SSatish Balay starti[i] = starti[i-1] + nlengths[i-1]*2; 527bc5ccf88SSatish Balay } 52875cae7c1SHong Zhang 52975cae7c1SHong Zhang i = 0; 530*5bd3b8fbSHong Zhang space = stash->space_head; 53175cae7c1SHong Zhang while (space != PETSC_NULL){ 53275cae7c1SHong Zhang space_next = space->next; 533*5bd3b8fbSHong Zhang sp_idx = space->idx; 534*5bd3b8fbSHong Zhang sp_idy = space->idy; 535*5bd3b8fbSHong Zhang sp_val = space->val; 53675cae7c1SHong Zhang for (l=0; l<space->local_used; l++){ 537bc5ccf88SSatish Balay j = owner[i]; 538a2d1c673SSatish Balay if (bs2 == 1) { 539*5bd3b8fbSHong Zhang svalues[startv[j]] = sp_val[l]; 540a2d1c673SSatish Balay } else { 541c1ac3661SBarry Smith PetscInt k; 5423eda8832SBarry Smith MatScalar *buf1,*buf2; 5434c1ff481SSatish Balay buf1 = svalues+bs2*startv[j]; 544*5bd3b8fbSHong Zhang buf2 = space->val + bs2*i; 5454c1ff481SSatish Balay for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; } 546a2d1c673SSatish Balay } 547*5bd3b8fbSHong Zhang sindices[starti[j]] = sp_idx[l]; 548*5bd3b8fbSHong Zhang sindices[starti[j]+nlengths[j]] = sp_idy[l]; 549bc5ccf88SSatish Balay startv[j]++; 550bc5ccf88SSatish Balay starti[j]++; 55175cae7c1SHong Zhang i++; 55275cae7c1SHong Zhang } 55375cae7c1SHong Zhang space = space_next; 554bc5ccf88SSatish Balay } 555bc5ccf88SSatish Balay startv[0] = 0; 556563fb871SSatish Balay for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];} 557e5d0e772SSatish Balay 558bc5ccf88SSatish Balay for (i=0,count=0; i<size; i++) { 559563fb871SSatish Balay if (nprocs[i]) { 560563fb871SSatish Balay ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr); 561563fb871SSatish Balay ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_MATSCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr); 562bc5ccf88SSatish Balay } 563b85c94c3SSatish Balay } 5645bcf5ddbSSatish Balay #if defined(PETSC_USE_VERBOSE) 56509f3b4e5SSatish Balay ierr = PetscVerboseInfo((0,"MatStashScatterBegin_Private: No of messages: %d \n",nsends));CHKERRQ(ierr); 566e5d0e772SSatish Balay for (i=0; i<size; i++) { 567e5d0e772SSatish Balay if (nprocs[i]) { 56809f3b4e5SSatish Balay ierr = PetscVerboseInfo((0,"MatStashScatterBegin_Private: Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 569e5d0e772SSatish Balay } 570e5d0e772SSatish Balay } 571e5d0e772SSatish Balay #endif 572606d414cSSatish Balay ierr = PetscFree(owner);CHKERRQ(ierr); 573606d414cSSatish Balay ierr = PetscFree(startv);CHKERRQ(ierr); 574a2d1c673SSatish Balay /* This memory is reused in scatter end for a different purpose*/ 575a2d1c673SSatish Balay for (i=0; i<2*size; i++) nprocs[i] = -1; 576a2d1c673SSatish Balay stash->nprocs = nprocs; 577a2d1c673SSatish Balay 578563fb871SSatish Balay /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 579563fb871SSatish Balay ierr = PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 580563fb871SSatish Balay 581563fb871SSatish Balay for (i=0; i<nreceives; i++) { 582563fb871SSatish Balay recv_waits[2*i] = recv_waits1[i]; 583563fb871SSatish Balay recv_waits[2*i+1] = recv_waits2[i]; 584563fb871SSatish Balay } 585563fb871SSatish Balay stash->recv_waits = recv_waits; 586563fb871SSatish Balay ierr = PetscFree(recv_waits1);CHKERRQ(ierr); 587563fb871SSatish Balay ierr = PetscFree(recv_waits2);CHKERRQ(ierr); 588563fb871SSatish Balay 589bc5ccf88SSatish Balay stash->svalues = svalues; stash->rvalues = rvalues; 590563fb871SSatish Balay stash->rindices = rindices; stash->send_waits = send_waits; 591bc5ccf88SSatish Balay stash->nsends = nsends; stash->nrecvs = nreceives; 592bc5ccf88SSatish Balay PetscFunctionReturn(0); 593bc5ccf88SSatish Balay } 594bc5ccf88SSatish Balay 595a2d1c673SSatish Balay /* 5968798bf22SSatish Balay MatStashScatterGetMesg_Private - This function waits on the receives posted 5978798bf22SSatish Balay in the function MatStashScatterBegin_Private() and returns one message at 5984c1ff481SSatish Balay a time to the calling function. If no messages are left, it indicates this 5994c1ff481SSatish Balay by setting flg = 0, else it sets flg = 1. 6004c1ff481SSatish Balay 6014c1ff481SSatish Balay Input Parameters: 6024c1ff481SSatish Balay stash - the stash 6034c1ff481SSatish Balay 6044c1ff481SSatish Balay Output Parameters: 6054c1ff481SSatish Balay nvals - the number of entries in the current message. 6064c1ff481SSatish Balay rows - an array of row indices (or blocked indices) corresponding to the values 6074c1ff481SSatish Balay cols - an array of columnindices (or blocked indices) corresponding to the values 6084c1ff481SSatish Balay vals - the values 6094c1ff481SSatish Balay flg - 0 indicates no more message left, and the current call has no values associated. 6104c1ff481SSatish Balay 1 indicates that the current call successfully received a message, and the 6114c1ff481SSatish Balay other output parameters nvals,rows,cols,vals are set appropriately. 612a2d1c673SSatish Balay */ 6134a2ae208SSatish Balay #undef __FUNCT__ 6144a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterGetMesg_Private" 615c1ac3661SBarry Smith PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,MatScalar **vals,PetscInt *flg) 616bc5ccf88SSatish Balay { 6176849ba73SBarry Smith PetscErrorCode ierr; 618fe09c992SBarry Smith PetscMPIInt i,*flg_v,i1,i2; 619fe09c992SBarry Smith PetscInt bs2; 620a2d1c673SSatish Balay MPI_Status recv_status; 621b0a32e0cSBarry Smith PetscTruth match_found = PETSC_FALSE; 622bc5ccf88SSatish Balay 623bc5ccf88SSatish Balay PetscFunctionBegin; 624bc5ccf88SSatish Balay 625a2d1c673SSatish Balay *flg = 0; /* When a message is discovered this is reset to 1 */ 626a2d1c673SSatish Balay /* Return if no more messages to process */ 627a2d1c673SSatish Balay if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); } 628a2d1c673SSatish Balay 629a2d1c673SSatish Balay flg_v = stash->nprocs; 6304c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 631a2d1c673SSatish Balay /* If a matching pair of receieves are found, process them, and return the data to 632a2d1c673SSatish Balay the calling function. Until then keep receiving messages */ 633a2d1c673SSatish Balay while (!match_found) { 634a2d1c673SSatish Balay ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr); 635a2d1c673SSatish Balay /* Now pack the received message into a structure which is useable by others */ 636a2d1c673SSatish Balay if (i % 2) { 6373eda8832SBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_MATSCALAR,nvals);CHKERRQ(ierr); 638c1dc657dSBarry Smith flg_v[2*recv_status.MPI_SOURCE] = i/2; 639a2d1c673SSatish Balay *nvals = *nvals/bs2; 640563fb871SSatish Balay } else { 641563fb871SSatish Balay ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr); 642563fb871SSatish Balay flg_v[2*recv_status.MPI_SOURCE+1] = i/2; 643563fb871SSatish Balay *nvals = *nvals/2; /* This message has both row indices and col indices */ 644bc5ccf88SSatish Balay } 645a2d1c673SSatish Balay 646a2d1c673SSatish Balay /* Check if we have both the messages from this proc */ 647c1dc657dSBarry Smith i1 = flg_v[2*recv_status.MPI_SOURCE]; 648c1dc657dSBarry Smith i2 = flg_v[2*recv_status.MPI_SOURCE+1]; 649a2d1c673SSatish Balay if (i1 != -1 && i2 != -1) { 650563fb871SSatish Balay *rows = stash->rindices[i2]; 651a2d1c673SSatish Balay *cols = *rows + *nvals; 652563fb871SSatish Balay *vals = stash->rvalues[i1]; 653a2d1c673SSatish Balay *flg = 1; 654a2d1c673SSatish Balay stash->nprocessed ++; 65535d8aa7fSBarry Smith match_found = PETSC_TRUE; 656bc5ccf88SSatish Balay } 657bc5ccf88SSatish Balay } 658bc5ccf88SSatish Balay PetscFunctionReturn(0); 659bc5ccf88SSatish Balay } 660