1be1d678aSKris Buschelman #define PETSCMAT_DLL 22d5177cdSBarry Smith 37c4f633dSBarry Smith #include "private/matimpl.h" 45bd3b8fbSHong Zhang 5bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE 10000 64c1ff481SSatish Balay 79417f4adSLois Curfman McInnes /* 88798bf22SSatish Balay MatStashCreate_Private - Creates a stash,currently used for all the parallel 94c1ff481SSatish Balay matrix implementations. The stash is where elements of a matrix destined 104c1ff481SSatish Balay to be stored on other processors are kept until matrix assembly is done. 119417f4adSLois Curfman McInnes 124c1ff481SSatish Balay This is a simple minded stash. Simply adds entries to end of stash. 134c1ff481SSatish Balay 144c1ff481SSatish Balay Input Parameters: 154c1ff481SSatish Balay comm - communicator, required for scatters. 164c1ff481SSatish Balay bs - stash block size. used when stashing blocks of values 174c1ff481SSatish Balay 184c1ff481SSatish Balay Output Parameters: 194c1ff481SSatish Balay stash - the newly created stash 209417f4adSLois Curfman McInnes */ 214a2ae208SSatish Balay #undef __FUNCT__ 224a2ae208SSatish Balay #define __FUNCT__ "MatStashCreate_Private" 23c1ac3661SBarry Smith PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash) 249417f4adSLois Curfman McInnes { 25dfbe8321SBarry Smith PetscErrorCode ierr; 26c1ac3661SBarry Smith PetscInt max,*opt,nopt; 27f1af5d2fSBarry Smith PetscTruth flg; 28bc5ccf88SSatish Balay 293a40ed3dSBarry Smith PetscFunctionBegin; 30bc5ccf88SSatish Balay /* Require 2 tags,get the second using PetscCommGetNewTag() */ 31752ec6e0SSatish Balay stash->comm = comm; 32752ec6e0SSatish Balay ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr); 33a2d1c673SSatish Balay ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr); 34a2d1c673SSatish Balay ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr); 35a2d1c673SSatish Balay ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr); 36bc5ccf88SSatish Balay 37434d7ff9SSatish Balay nopt = stash->size; 38d7d82daaSBarry Smith ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr); 39b0a32e0cSBarry Smith ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr); 40434d7ff9SSatish Balay if (flg) { 41434d7ff9SSatish Balay if (nopt == 1) max = opt[0]; 42434d7ff9SSatish Balay else if (nopt == stash->size) max = opt[stash->rank]; 43434d7ff9SSatish Balay else if (stash->rank < nopt) max = opt[stash->rank]; 44f4ab19daSSatish Balay else max = 0; /* Use default */ 45434d7ff9SSatish Balay stash->umax = max; 46434d7ff9SSatish Balay } else { 47434d7ff9SSatish Balay stash->umax = 0; 48434d7ff9SSatish Balay } 49606d414cSSatish Balay ierr = PetscFree(opt);CHKERRQ(ierr); 504c1ff481SSatish Balay if (bs <= 0) bs = 1; 51a2d1c673SSatish Balay 524c1ff481SSatish Balay stash->bs = bs; 539417f4adSLois Curfman McInnes stash->nmax = 0; 54434d7ff9SSatish Balay stash->oldnmax = 0; 559417f4adSLois Curfman McInnes stash->n = 0; 564c1ff481SSatish Balay stash->reallocs = -1; 5775cae7c1SHong Zhang stash->space_head = 0; 5875cae7c1SHong Zhang stash->space = 0; 599417f4adSLois Curfman McInnes 60bc5ccf88SSatish Balay stash->send_waits = 0; 61bc5ccf88SSatish Balay stash->recv_waits = 0; 62a2d1c673SSatish Balay stash->send_status = 0; 63bc5ccf88SSatish Balay stash->nsends = 0; 64bc5ccf88SSatish Balay stash->nrecvs = 0; 65bc5ccf88SSatish Balay stash->svalues = 0; 66bc5ccf88SSatish Balay stash->rvalues = 0; 67563fb871SSatish Balay stash->rindices = 0; 68a2d1c673SSatish Balay stash->nprocs = 0; 69a2d1c673SSatish Balay stash->nprocessed = 0; 703a40ed3dSBarry Smith PetscFunctionReturn(0); 719417f4adSLois Curfman McInnes } 729417f4adSLois Curfman McInnes 734c1ff481SSatish Balay /* 748798bf22SSatish Balay MatStashDestroy_Private - Destroy the stash 754c1ff481SSatish Balay */ 764a2ae208SSatish Balay #undef __FUNCT__ 774a2ae208SSatish Balay #define __FUNCT__ "MatStashDestroy_Private" 78dfbe8321SBarry Smith PetscErrorCode MatStashDestroy_Private(MatStash *stash) 799417f4adSLois Curfman McInnes { 80dfbe8321SBarry Smith PetscErrorCode ierr; 81a2d1c673SSatish Balay 82bc5ccf88SSatish Balay PetscFunctionBegin; 8375cae7c1SHong Zhang if (stash->space_head){ 8475cae7c1SHong Zhang ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr); 8575cae7c1SHong Zhang stash->space_head = 0; 8682740460SHong Zhang stash->space = 0; 8775cae7c1SHong Zhang } 88bc5ccf88SSatish Balay PetscFunctionReturn(0); 89bc5ccf88SSatish Balay } 90bc5ccf88SSatish Balay 914c1ff481SSatish Balay /* 928798bf22SSatish Balay MatStashScatterEnd_Private - This is called as the fial stage of 934c1ff481SSatish Balay scatter. The final stages of messagepassing is done here, and 944c1ff481SSatish Balay all the memory used for messagepassing is cleanedu up. This 954c1ff481SSatish Balay routine also resets the stash, and deallocates the memory used 964c1ff481SSatish Balay for the stash. It also keeps track of the current memory usage 974c1ff481SSatish Balay so that the same value can be used the next time through. 984c1ff481SSatish Balay */ 994a2ae208SSatish Balay #undef __FUNCT__ 1004a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterEnd_Private" 101dfbe8321SBarry Smith PetscErrorCode MatStashScatterEnd_Private(MatStash *stash) 102bc5ccf88SSatish Balay { 1036849ba73SBarry Smith PetscErrorCode ierr; 1045bd3b8fbSHong Zhang PetscInt nsends=stash->nsends,bs2,oldnmax; 105a2d1c673SSatish Balay MPI_Status *send_status; 106a2d1c673SSatish Balay 1073a40ed3dSBarry Smith PetscFunctionBegin; 108a2d1c673SSatish Balay /* wait on sends */ 109a2d1c673SSatish Balay if (nsends) { 11082502324SSatish Balay ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 111a2d1c673SSatish Balay ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr); 112606d414cSSatish Balay ierr = PetscFree(send_status);CHKERRQ(ierr); 113a2d1c673SSatish Balay } 114a2d1c673SSatish Balay 115c0c58ca7SSatish Balay /* Now update nmaxold to be app 10% more than max n used, this way the 116434d7ff9SSatish Balay wastage of space is reduced the next time this stash is used. 117434d7ff9SSatish Balay Also update the oldmax, only if it increases */ 118b9b97703SBarry Smith if (stash->n) { 11994b769a5SSatish Balay bs2 = stash->bs*stash->bs; 1208a9378f0SSatish Balay oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 121434d7ff9SSatish Balay if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 122b9b97703SBarry Smith } 123434d7ff9SSatish Balay 124d07ff455SSatish Balay stash->nmax = 0; 125d07ff455SSatish Balay stash->n = 0; 1264c1ff481SSatish Balay stash->reallocs = -1; 127a2d1c673SSatish Balay stash->nprocessed = 0; 12875cae7c1SHong Zhang if (stash->space_head){ 12975cae7c1SHong Zhang ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr); 13075cae7c1SHong Zhang stash->space_head = 0; 13182740460SHong Zhang stash->space = 0; 13275cae7c1SHong Zhang } 133606d414cSSatish Balay ierr = PetscFree(stash->send_waits);CHKERRQ(ierr); 134606d414cSSatish Balay stash->send_waits = 0; 135606d414cSSatish Balay ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr); 136606d414cSSatish Balay stash->recv_waits = 0; 137606d414cSSatish Balay ierr = PetscFree(stash->svalues);CHKERRQ(ierr); 138606d414cSSatish Balay stash->svalues = 0; 139606d414cSSatish Balay ierr = PetscFree(stash->rvalues);CHKERRQ(ierr); 140606d414cSSatish Balay stash->rvalues = 0; 141563fb871SSatish Balay ierr = PetscFree(stash->rindices);CHKERRQ(ierr); 142563fb871SSatish Balay stash->rindices = 0; 143b22afee1SSatish Balay ierr = PetscFree(stash->nprocs);CHKERRQ(ierr); 144606d414cSSatish Balay stash->nprocs = 0; 1453a40ed3dSBarry Smith PetscFunctionReturn(0); 1469417f4adSLois Curfman McInnes } 1479417f4adSLois Curfman McInnes 1484c1ff481SSatish Balay /* 1498798bf22SSatish Balay MatStashGetInfo_Private - Gets the relavant statistics of the stash 1504c1ff481SSatish Balay 1514c1ff481SSatish Balay Input Parameters: 1524c1ff481SSatish Balay stash - the stash 15394b769a5SSatish Balay nstash - the size of the stash. Indicates the number of values stored. 1544c1ff481SSatish Balay reallocs - the number of additional mallocs incurred. 1554c1ff481SSatish Balay 1564c1ff481SSatish Balay */ 1574a2ae208SSatish Balay #undef __FUNCT__ 1584a2ae208SSatish Balay #define __FUNCT__ "MatStashGetInfo_Private" 159c1ac3661SBarry Smith PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs) 16097530c3fSBarry Smith { 161c1ac3661SBarry Smith PetscInt bs2 = stash->bs*stash->bs; 16294b769a5SSatish Balay 1633a40ed3dSBarry Smith PetscFunctionBegin; 1641ecfd215SBarry Smith if (nstash) *nstash = stash->n*bs2; 1651ecfd215SBarry Smith if (reallocs) { 166434d7ff9SSatish Balay if (stash->reallocs < 0) *reallocs = 0; 167434d7ff9SSatish Balay else *reallocs = stash->reallocs; 1681ecfd215SBarry Smith } 169bc5ccf88SSatish Balay PetscFunctionReturn(0); 170bc5ccf88SSatish Balay } 1714c1ff481SSatish Balay 1724c1ff481SSatish Balay /* 1738798bf22SSatish Balay MatStashSetInitialSize_Private - Sets the initial size of the stash 1744c1ff481SSatish Balay 1754c1ff481SSatish Balay Input Parameters: 1764c1ff481SSatish Balay stash - the stash 1774c1ff481SSatish Balay max - the value that is used as the max size of the stash. 1784c1ff481SSatish Balay this value is used while allocating memory. 1794c1ff481SSatish Balay */ 1804a2ae208SSatish Balay #undef __FUNCT__ 1814a2ae208SSatish Balay #define __FUNCT__ "MatStashSetInitialSize_Private" 182c1ac3661SBarry Smith PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max) 183bc5ccf88SSatish Balay { 184bc5ccf88SSatish Balay PetscFunctionBegin; 185434d7ff9SSatish Balay stash->umax = max; 1863a40ed3dSBarry Smith PetscFunctionReturn(0); 18797530c3fSBarry Smith } 18897530c3fSBarry Smith 1898798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called 1904c1ff481SSatish Balay when the space in the stash is not sufficient to add the new values 1914c1ff481SSatish Balay being inserted into the stash. 1924c1ff481SSatish Balay 1934c1ff481SSatish Balay Input Parameters: 1944c1ff481SSatish Balay stash - the stash 1954c1ff481SSatish Balay incr - the minimum increase requested 1964c1ff481SSatish Balay 1974c1ff481SSatish Balay Notes: 1984c1ff481SSatish Balay This routine doubles the currently used memory. 1994c1ff481SSatish Balay */ 2004a2ae208SSatish Balay #undef __FUNCT__ 2014a2ae208SSatish Balay #define __FUNCT__ "MatStashExpand_Private" 202c1ac3661SBarry Smith static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr) 2039417f4adSLois Curfman McInnes { 2046849ba73SBarry Smith PetscErrorCode ierr; 2055bd3b8fbSHong Zhang PetscInt newnmax,bs2= stash->bs*stash->bs; 2069417f4adSLois Curfman McInnes 2073a40ed3dSBarry Smith PetscFunctionBegin; 2089417f4adSLois Curfman McInnes /* allocate a larger stash */ 209c481ceb5SSatish Balay if (!stash->oldnmax && !stash->nmax) { /* new stash */ 210434d7ff9SSatish Balay if (stash->umax) newnmax = stash->umax/bs2; 211434d7ff9SSatish Balay else newnmax = DEFAULT_STASH_SIZE/bs2; 212c481ceb5SSatish Balay } else if (!stash->nmax) { /* resuing stash */ 213434d7ff9SSatish Balay if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2; 214434d7ff9SSatish Balay else newnmax = stash->oldnmax/bs2; 215434d7ff9SSatish Balay } else newnmax = stash->nmax*2; 2164c1ff481SSatish Balay if (newnmax < (stash->nmax + incr)) newnmax += 2*incr; 217d07ff455SSatish Balay 21875cae7c1SHong Zhang /* Get a MatStashSpace and attach it to stash */ 21975cae7c1SHong Zhang ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr); 220b087b6d6SSatish Balay if (!stash->space_head) { /* new stash or resuing stash->oldnmax */ 221b087b6d6SSatish Balay stash->space_head = stash->space; 22275cae7c1SHong Zhang } 223b087b6d6SSatish Balay 224bc5ccf88SSatish Balay stash->reallocs++; 22575cae7c1SHong Zhang stash->nmax = newnmax; 226bc5ccf88SSatish Balay PetscFunctionReturn(0); 227bc5ccf88SSatish Balay } 228bc5ccf88SSatish Balay /* 2298798bf22SSatish Balay MatStashValuesRow_Private - inserts values into the stash. This function 2304c1ff481SSatish Balay expects the values to be roworiented. Multiple columns belong to the same row 2314c1ff481SSatish Balay can be inserted with a single call to this function. 2324c1ff481SSatish Balay 2334c1ff481SSatish Balay Input Parameters: 2344c1ff481SSatish Balay stash - the stash 2354c1ff481SSatish Balay row - the global row correspoiding to the values 2364c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2374c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2384c1ff481SSatish Balay values - the values inserted 239bc5ccf88SSatish Balay */ 2404a2ae208SSatish Balay #undef __FUNCT__ 2414a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRow_Private" 242*b400d20cSBarry Smith PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscTruth ignorezeroentries) 243bc5ccf88SSatish Balay { 244dfbe8321SBarry Smith PetscErrorCode ierr; 245*b400d20cSBarry Smith PetscInt i,k,cnt = 0; 24675cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 247bc5ccf88SSatish Balay 248bc5ccf88SSatish Balay PetscFunctionBegin; 2494c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 25075cae7c1SHong Zhang if (!space || space->local_remaining < n){ 2518798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 2529417f4adSLois Curfman McInnes } 25375cae7c1SHong Zhang space = stash->space; 25475cae7c1SHong Zhang k = space->local_used; 2554c1ff481SSatish Balay for (i=0; i<n; i++) { 256*b400d20cSBarry Smith if (ignorezeroentries && !values[i]) continue; 25775cae7c1SHong Zhang space->idx[k] = row; 25875cae7c1SHong Zhang space->idy[k] = idxn[i]; 25975cae7c1SHong Zhang space->val[k] = values[i]; 26075cae7c1SHong Zhang k++; 261*b400d20cSBarry Smith cnt++; 2629417f4adSLois Curfman McInnes } 263*b400d20cSBarry Smith stash->n += cnt; 264*b400d20cSBarry Smith space->local_used += cnt; 265*b400d20cSBarry Smith space->local_remaining -= cnt; 266a2d1c673SSatish Balay PetscFunctionReturn(0); 267a2d1c673SSatish Balay } 26875cae7c1SHong Zhang 2694c1ff481SSatish Balay /* 2708798bf22SSatish Balay MatStashValuesCol_Private - inserts values into the stash. This function 2714c1ff481SSatish Balay expects the values to be columnoriented. Multiple columns belong to the same row 2724c1ff481SSatish Balay can be inserted with a single call to this function. 273a2d1c673SSatish Balay 2744c1ff481SSatish Balay Input Parameters: 2754c1ff481SSatish Balay stash - the stash 2764c1ff481SSatish Balay row - the global row correspoiding to the values 2774c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2784c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2794c1ff481SSatish Balay values - the values inserted 2804c1ff481SSatish Balay stepval - the consecutive values are sepated by a distance of stepval. 2814c1ff481SSatish Balay this happens because the input is columnoriented. 2824c1ff481SSatish Balay */ 2834a2ae208SSatish Balay #undef __FUNCT__ 2844a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesCol_Private" 285*b400d20cSBarry Smith PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscTruth ignorezeroentries) 286a2d1c673SSatish Balay { 287dfbe8321SBarry Smith PetscErrorCode ierr; 288*b400d20cSBarry Smith PetscInt i,k,cnt; 28975cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 290a2d1c673SSatish Balay 2914c1ff481SSatish Balay PetscFunctionBegin; 2924c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 29375cae7c1SHong Zhang if (!space || space->local_remaining < n){ 2948798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 2954c1ff481SSatish Balay } 29675cae7c1SHong Zhang space = stash->space; 29775cae7c1SHong Zhang k = space->local_used; 2984c1ff481SSatish Balay for (i=0; i<n; i++) { 299*b400d20cSBarry Smith if (ignorezeroentries && !values[i*stepval]) continue; 30075cae7c1SHong Zhang space->idx[k] = row; 30175cae7c1SHong Zhang space->idy[k] = idxn[i]; 30275cae7c1SHong Zhang space->val[k] = values[i*stepval]; 30375cae7c1SHong Zhang k++; 304*b400d20cSBarry Smith cnt++; 3054c1ff481SSatish Balay } 306*b400d20cSBarry Smith stash->n += cnt; 307*b400d20cSBarry Smith space->local_used += cnt; 308*b400d20cSBarry Smith space->local_remaining -= cnt; 3094c1ff481SSatish Balay PetscFunctionReturn(0); 3104c1ff481SSatish Balay } 3114c1ff481SSatish Balay 3124c1ff481SSatish Balay /* 3138798bf22SSatish Balay MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 3144c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 3154c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3164c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3174c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3184c1ff481SSatish Balay 3194c1ff481SSatish Balay Input Parameters: 3204c1ff481SSatish Balay stash - the stash 3214c1ff481SSatish Balay row - the global block-row correspoiding to the values 3224c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3234c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3244c1ff481SSatish Balay values. Each block is of size bs*bs. 3254c1ff481SSatish Balay values - the values inserted 3264c1ff481SSatish Balay rmax - the number of block-rows in the original block. 3274c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 3284c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3294c1ff481SSatish Balay */ 3304a2ae208SSatish Balay #undef __FUNCT__ 3314a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRowBlocked_Private" 33254f21887SBarry Smith PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 3334c1ff481SSatish Balay { 334dfbe8321SBarry Smith PetscErrorCode ierr; 33575cae7c1SHong Zhang PetscInt i,j,k,bs2,bs=stash->bs,l; 33654f21887SBarry Smith const PetscScalar *vals; 33754f21887SBarry Smith PetscScalar *array; 33875cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 339a2d1c673SSatish Balay 340a2d1c673SSatish Balay PetscFunctionBegin; 34175cae7c1SHong Zhang if (!space || space->local_remaining < n){ 3428798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 343a2d1c673SSatish Balay } 34475cae7c1SHong Zhang space = stash->space; 34575cae7c1SHong Zhang l = space->local_used; 34675cae7c1SHong Zhang bs2 = bs*bs; 3474c1ff481SSatish Balay for (i=0; i<n; i++) { 34875cae7c1SHong Zhang space->idx[l] = row; 34975cae7c1SHong Zhang space->idy[l] = idxn[i]; 35075cae7c1SHong Zhang /* Now copy over the block of values. Store the values column oriented. 35175cae7c1SHong Zhang This enables inserting multiple blocks belonging to a row with a single 35275cae7c1SHong Zhang funtion call */ 35375cae7c1SHong Zhang array = space->val + bs2*l; 35475cae7c1SHong Zhang vals = values + idx*bs2*n + bs*i; 35575cae7c1SHong Zhang for (j=0; j<bs; j++) { 35675cae7c1SHong Zhang for (k=0; k<bs; k++) array[k*bs] = vals[k]; 35775cae7c1SHong Zhang array++; 35875cae7c1SHong Zhang vals += cmax*bs; 35975cae7c1SHong Zhang } 36075cae7c1SHong Zhang l++; 361a2d1c673SSatish Balay } 3625bd3b8fbSHong Zhang stash->n += n; 36375cae7c1SHong Zhang space->local_used += n; 36475cae7c1SHong Zhang space->local_remaining -= n; 3654c1ff481SSatish Balay PetscFunctionReturn(0); 3664c1ff481SSatish Balay } 3674c1ff481SSatish Balay 3684c1ff481SSatish Balay /* 3698798bf22SSatish Balay MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 3704c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 3714c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3724c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3734c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3744c1ff481SSatish Balay 3754c1ff481SSatish Balay Input Parameters: 3764c1ff481SSatish Balay stash - the stash 3774c1ff481SSatish Balay row - the global block-row correspoiding to the values 3784c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3794c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3804c1ff481SSatish Balay values. Each block is of size bs*bs. 3814c1ff481SSatish Balay values - the values inserted 3824c1ff481SSatish Balay rmax - the number of block-rows in the original block. 3834c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 3844c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3854c1ff481SSatish Balay */ 3864a2ae208SSatish Balay #undef __FUNCT__ 3874a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesColBlocked_Private" 38854f21887SBarry Smith PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 3894c1ff481SSatish Balay { 390dfbe8321SBarry Smith PetscErrorCode ierr; 39175cae7c1SHong Zhang PetscInt i,j,k,bs2,bs=stash->bs,l; 39254f21887SBarry Smith const PetscScalar *vals; 39354f21887SBarry Smith PetscScalar *array; 39475cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 3954c1ff481SSatish Balay 3964c1ff481SSatish Balay PetscFunctionBegin; 39775cae7c1SHong Zhang if (!space || space->local_remaining < n){ 3988798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 3994c1ff481SSatish Balay } 40075cae7c1SHong Zhang space = stash->space; 40175cae7c1SHong Zhang l = space->local_used; 40275cae7c1SHong Zhang bs2 = bs*bs; 4034c1ff481SSatish Balay for (i=0; i<n; i++) { 40475cae7c1SHong Zhang space->idx[l] = row; 40575cae7c1SHong Zhang space->idy[l] = idxn[i]; 40675cae7c1SHong Zhang /* Now copy over the block of values. Store the values column oriented. 40775cae7c1SHong Zhang This enables inserting multiple blocks belonging to a row with a single 40875cae7c1SHong Zhang funtion call */ 40975cae7c1SHong Zhang array = space->val + bs2*l; 41075cae7c1SHong Zhang vals = values + idx*bs2*n + bs*i; 41175cae7c1SHong Zhang for (j=0; j<bs; j++) { 41275cae7c1SHong Zhang for (k=0; k<bs; k++) {array[k] = vals[k];} 41375cae7c1SHong Zhang array += bs; 41475cae7c1SHong Zhang vals += rmax*bs; 41575cae7c1SHong Zhang } 4165bd3b8fbSHong Zhang l++; 417a2d1c673SSatish Balay } 4185bd3b8fbSHong Zhang stash->n += n; 41975cae7c1SHong Zhang space->local_used += n; 42075cae7c1SHong Zhang space->local_remaining -= n; 4213a40ed3dSBarry Smith PetscFunctionReturn(0); 4229417f4adSLois Curfman McInnes } 4234c1ff481SSatish Balay /* 4248798bf22SSatish Balay MatStashScatterBegin_Private - Initiates the transfer of values to the 4254c1ff481SSatish Balay correct owners. This function goes through the stash, and check the 4264c1ff481SSatish Balay owners of each stashed value, and sends the values off to the owner 4274c1ff481SSatish Balay processors. 428bc5ccf88SSatish Balay 4294c1ff481SSatish Balay Input Parameters: 4304c1ff481SSatish Balay stash - the stash 4314c1ff481SSatish Balay owners - an array of size 'no-of-procs' which gives the ownership range 4324c1ff481SSatish Balay for each node. 4334c1ff481SSatish Balay 4344c1ff481SSatish Balay Notes: The 'owners' array in the cased of the blocked-stash has the 4354c1ff481SSatish Balay ranges specified blocked global indices, and for the regular stash in 4364c1ff481SSatish Balay the proper global indices. 4374c1ff481SSatish Balay */ 4384a2ae208SSatish Balay #undef __FUNCT__ 4394a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterBegin_Private" 4401e2582c4SBarry Smith PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners) 441bc5ccf88SSatish Balay { 442c1ac3661SBarry Smith PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 443fe09c992SBarry Smith PetscInt size=stash->size,nsends; 4446849ba73SBarry Smith PetscErrorCode ierr; 44575cae7c1SHong Zhang PetscInt count,*sindices,**rindices,i,j,idx,lastidx,l; 44654f21887SBarry Smith PetscScalar **rvalues,*svalues; 447bc5ccf88SSatish Balay MPI_Comm comm = stash->comm; 448563fb871SSatish Balay MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 449fe09c992SBarry Smith PetscMPIInt *nprocs,*nlengths,nreceives; 4505bd3b8fbSHong Zhang PetscInt *sp_idx,*sp_idy; 45154f21887SBarry Smith PetscScalar *sp_val; 4525bd3b8fbSHong Zhang PetscMatStashSpace space,space_next; 453bc5ccf88SSatish Balay 454bc5ccf88SSatish Balay PetscFunctionBegin; 4554c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 45675cae7c1SHong Zhang 457bc5ccf88SSatish Balay /* first count number of contributors to each processor */ 458fe09c992SBarry Smith ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&nprocs);CHKERRQ(ierr); 459fe09c992SBarry Smith ierr = PetscMemzero(nprocs,2*size*sizeof(PetscMPIInt));CHKERRQ(ierr); 460c1ac3661SBarry Smith ierr = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); 461a2d1c673SSatish Balay 462563fb871SSatish Balay nlengths = nprocs+size; 46375cae7c1SHong Zhang i = j = 0; 4647357eb19SBarry Smith lastidx = -1; 4655bd3b8fbSHong Zhang space = stash->space_head; 46675cae7c1SHong Zhang while (space != PETSC_NULL){ 46775cae7c1SHong Zhang space_next = space->next; 4685bd3b8fbSHong Zhang sp_idx = space->idx; 46975cae7c1SHong Zhang for (l=0; l<space->local_used; l++){ 4707357eb19SBarry Smith /* if indices are NOT locally sorted, need to start search at the beginning */ 4715bd3b8fbSHong Zhang if (lastidx > (idx = sp_idx[l])) j = 0; 4727357eb19SBarry Smith lastidx = idx; 4737357eb19SBarry Smith for (; j<size; j++) { 4744c1ff481SSatish Balay if (idx >= owners[j] && idx < owners[j+1]) { 475563fb871SSatish Balay nlengths[j]++; owner[i] = j; break; 476bc5ccf88SSatish Balay } 477bc5ccf88SSatish Balay } 47875cae7c1SHong Zhang i++; 47975cae7c1SHong Zhang } 48075cae7c1SHong Zhang space = space_next; 481bc5ccf88SSatish Balay } 482563fb871SSatish Balay /* Now check what procs get messages - and compute nsends. */ 483563fb871SSatish Balay for (i=0, nsends=0 ; i<size; i++) { 484563fb871SSatish Balay if (nlengths[i]) { nprocs[i] = 1; nsends ++;} 485563fb871SSatish Balay } 486bc5ccf88SSatish Balay 48754f21887SBarry Smith {PetscMPIInt *onodes,*olengths; 488563fb871SSatish Balay /* Determine the number of messages to expect, their lengths, from from-ids */ 489563fb871SSatish Balay ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr); 490563fb871SSatish Balay ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr); 491563fb871SSatish Balay /* since clubbing row,col - lengths are multiplied by 2 */ 492563fb871SSatish Balay for (i=0; i<nreceives; i++) olengths[i] *=2; 493563fb871SSatish Balay ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr); 494563fb871SSatish Balay /* values are size 'bs2' lengths (and remove earlier factor 2 */ 495563fb871SSatish Balay for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 496563fb871SSatish Balay ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr); 497563fb871SSatish Balay ierr = PetscFree(onodes);CHKERRQ(ierr); 498563fb871SSatish Balay ierr = PetscFree(olengths);CHKERRQ(ierr); 499bc5ccf88SSatish Balay } 500bc5ccf88SSatish Balay 501bc5ccf88SSatish Balay /* do sends: 502bc5ccf88SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 503bc5ccf88SSatish Balay the ith processor 504bc5ccf88SSatish Balay */ 505a77337e4SBarry Smith ierr = PetscMalloc((stash->n+1)*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)),&svalues);CHKERRQ(ierr); 506c1ac3661SBarry Smith sindices = (PetscInt*)(svalues + bs2*stash->n); 507b0a32e0cSBarry Smith ierr = PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 508c1ac3661SBarry Smith ierr = PetscMalloc(2*size*sizeof(PetscInt),&startv);CHKERRQ(ierr); 509bc5ccf88SSatish Balay starti = startv + size; 510a2d1c673SSatish Balay /* use 2 sends the first with all_a, the next with all_i and all_j */ 511bc5ccf88SSatish Balay startv[0] = 0; starti[0] = 0; 512bc5ccf88SSatish Balay for (i=1; i<size; i++) { 513563fb871SSatish Balay startv[i] = startv[i-1] + nlengths[i-1]; 514563fb871SSatish Balay starti[i] = starti[i-1] + nlengths[i-1]*2; 515bc5ccf88SSatish Balay } 51675cae7c1SHong Zhang 51775cae7c1SHong Zhang i = 0; 5185bd3b8fbSHong Zhang space = stash->space_head; 51975cae7c1SHong Zhang while (space != PETSC_NULL){ 52075cae7c1SHong Zhang space_next = space->next; 5215bd3b8fbSHong Zhang sp_idx = space->idx; 5225bd3b8fbSHong Zhang sp_idy = space->idy; 5235bd3b8fbSHong Zhang sp_val = space->val; 52475cae7c1SHong Zhang for (l=0; l<space->local_used; l++){ 525bc5ccf88SSatish Balay j = owner[i]; 526a2d1c673SSatish Balay if (bs2 == 1) { 5275bd3b8fbSHong Zhang svalues[startv[j]] = sp_val[l]; 528a2d1c673SSatish Balay } else { 529c1ac3661SBarry Smith PetscInt k; 53054f21887SBarry Smith PetscScalar *buf1,*buf2; 5314c1ff481SSatish Balay buf1 = svalues+bs2*startv[j]; 532b087b6d6SSatish Balay buf2 = space->val + bs2*l; 5334c1ff481SSatish Balay for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; } 534a2d1c673SSatish Balay } 5355bd3b8fbSHong Zhang sindices[starti[j]] = sp_idx[l]; 5365bd3b8fbSHong Zhang sindices[starti[j]+nlengths[j]] = sp_idy[l]; 537bc5ccf88SSatish Balay startv[j]++; 538bc5ccf88SSatish Balay starti[j]++; 53975cae7c1SHong Zhang i++; 54075cae7c1SHong Zhang } 54175cae7c1SHong Zhang space = space_next; 542bc5ccf88SSatish Balay } 543bc5ccf88SSatish Balay startv[0] = 0; 544563fb871SSatish Balay for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];} 545e5d0e772SSatish Balay 546bc5ccf88SSatish Balay for (i=0,count=0; i<size; i++) { 547563fb871SSatish Balay if (nprocs[i]) { 548563fb871SSatish Balay ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr); 549a77337e4SBarry Smith ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr); 550bc5ccf88SSatish Balay } 551b85c94c3SSatish Balay } 5526cf91177SBarry Smith #if defined(PETSC_USE_INFO) 5531e2582c4SBarry Smith ierr = PetscInfo1(mat,"No of messages: %d \n",nsends);CHKERRQ(ierr); 554e5d0e772SSatish Balay for (i=0; i<size; i++) { 555e5d0e772SSatish Balay if (nprocs[i]) { 556a77337e4SBarry Smith ierr = PetscInfo2(mat,"Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(PetscScalar)+2*sizeof(PetscInt));CHKERRQ(ierr); 557e5d0e772SSatish Balay } 558e5d0e772SSatish Balay } 559e5d0e772SSatish Balay #endif 560606d414cSSatish Balay ierr = PetscFree(owner);CHKERRQ(ierr); 561606d414cSSatish Balay ierr = PetscFree(startv);CHKERRQ(ierr); 562a2d1c673SSatish Balay /* This memory is reused in scatter end for a different purpose*/ 563a2d1c673SSatish Balay for (i=0; i<2*size; i++) nprocs[i] = -1; 564a2d1c673SSatish Balay stash->nprocs = nprocs; 565a2d1c673SSatish Balay 566563fb871SSatish Balay /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 567563fb871SSatish Balay ierr = PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 568563fb871SSatish Balay 569563fb871SSatish Balay for (i=0; i<nreceives; i++) { 570563fb871SSatish Balay recv_waits[2*i] = recv_waits1[i]; 571563fb871SSatish Balay recv_waits[2*i+1] = recv_waits2[i]; 572563fb871SSatish Balay } 573563fb871SSatish Balay stash->recv_waits = recv_waits; 574563fb871SSatish Balay ierr = PetscFree(recv_waits1);CHKERRQ(ierr); 575563fb871SSatish Balay ierr = PetscFree(recv_waits2);CHKERRQ(ierr); 576563fb871SSatish Balay 577bc5ccf88SSatish Balay stash->svalues = svalues; stash->rvalues = rvalues; 578563fb871SSatish Balay stash->rindices = rindices; stash->send_waits = send_waits; 579bc5ccf88SSatish Balay stash->nsends = nsends; stash->nrecvs = nreceives; 580bc5ccf88SSatish Balay PetscFunctionReturn(0); 581bc5ccf88SSatish Balay } 582bc5ccf88SSatish Balay 583a2d1c673SSatish Balay /* 5848798bf22SSatish Balay MatStashScatterGetMesg_Private - This function waits on the receives posted 5858798bf22SSatish Balay in the function MatStashScatterBegin_Private() and returns one message at 5864c1ff481SSatish Balay a time to the calling function. If no messages are left, it indicates this 5874c1ff481SSatish Balay by setting flg = 0, else it sets flg = 1. 5884c1ff481SSatish Balay 5894c1ff481SSatish Balay Input Parameters: 5904c1ff481SSatish Balay stash - the stash 5914c1ff481SSatish Balay 5924c1ff481SSatish Balay Output Parameters: 5934c1ff481SSatish Balay nvals - the number of entries in the current message. 5944c1ff481SSatish Balay rows - an array of row indices (or blocked indices) corresponding to the values 5954c1ff481SSatish Balay cols - an array of columnindices (or blocked indices) corresponding to the values 5964c1ff481SSatish Balay vals - the values 5974c1ff481SSatish Balay flg - 0 indicates no more message left, and the current call has no values associated. 5984c1ff481SSatish Balay 1 indicates that the current call successfully received a message, and the 5994c1ff481SSatish Balay other output parameters nvals,rows,cols,vals are set appropriately. 600a2d1c673SSatish Balay */ 6014a2ae208SSatish Balay #undef __FUNCT__ 6024a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterGetMesg_Private" 60354f21887SBarry Smith PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,PetscScalar **vals,PetscInt *flg) 604bc5ccf88SSatish Balay { 6056849ba73SBarry Smith PetscErrorCode ierr; 606fe09c992SBarry Smith PetscMPIInt i,*flg_v,i1,i2; 607fe09c992SBarry Smith PetscInt bs2; 608a2d1c673SSatish Balay MPI_Status recv_status; 609b0a32e0cSBarry Smith PetscTruth match_found = PETSC_FALSE; 610bc5ccf88SSatish Balay 611bc5ccf88SSatish Balay PetscFunctionBegin; 612bc5ccf88SSatish Balay 613a2d1c673SSatish Balay *flg = 0; /* When a message is discovered this is reset to 1 */ 614a2d1c673SSatish Balay /* Return if no more messages to process */ 615a2d1c673SSatish Balay if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); } 616a2d1c673SSatish Balay 617a2d1c673SSatish Balay flg_v = stash->nprocs; 6184c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 619a2d1c673SSatish Balay /* If a matching pair of receieves are found, process them, and return the data to 620a2d1c673SSatish Balay the calling function. Until then keep receiving messages */ 621a2d1c673SSatish Balay while (!match_found) { 622a2d1c673SSatish Balay ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr); 623a2d1c673SSatish Balay /* Now pack the received message into a structure which is useable by others */ 624a2d1c673SSatish Balay if (i % 2) { 625a77337e4SBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr); 626c1dc657dSBarry Smith flg_v[2*recv_status.MPI_SOURCE] = i/2; 627a2d1c673SSatish Balay *nvals = *nvals/bs2; 628563fb871SSatish Balay } else { 629563fb871SSatish Balay ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr); 630563fb871SSatish Balay flg_v[2*recv_status.MPI_SOURCE+1] = i/2; 631563fb871SSatish Balay *nvals = *nvals/2; /* This message has both row indices and col indices */ 632bc5ccf88SSatish Balay } 633a2d1c673SSatish Balay 634cb2b73ccSBarry Smith /* Check if we have both messages from this proc */ 635c1dc657dSBarry Smith i1 = flg_v[2*recv_status.MPI_SOURCE]; 636c1dc657dSBarry Smith i2 = flg_v[2*recv_status.MPI_SOURCE+1]; 637a2d1c673SSatish Balay if (i1 != -1 && i2 != -1) { 638563fb871SSatish Balay *rows = stash->rindices[i2]; 639a2d1c673SSatish Balay *cols = *rows + *nvals; 640563fb871SSatish Balay *vals = stash->rvalues[i1]; 641a2d1c673SSatish Balay *flg = 1; 642a2d1c673SSatish Balay stash->nprocessed ++; 64335d8aa7fSBarry Smith match_found = PETSC_TRUE; 644bc5ccf88SSatish Balay } 645bc5ccf88SSatish Balay } 646bc5ccf88SSatish Balay PetscFunctionReturn(0); 647bc5ccf88SSatish Balay } 648