1be1d678aSKris Buschelman #define PETSCMAT_DLL 22d5177cdSBarry Smith 37c4f633dSBarry Smith #include "private/matimpl.h" 45bd3b8fbSHong Zhang 5bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE 10000 64c1ff481SSatish Balay 79417f4adSLois Curfman McInnes /* 88798bf22SSatish Balay MatStashCreate_Private - Creates a stash,currently used for all the parallel 94c1ff481SSatish Balay matrix implementations. The stash is where elements of a matrix destined 104c1ff481SSatish Balay to be stored on other processors are kept until matrix assembly is done. 119417f4adSLois Curfman McInnes 124c1ff481SSatish Balay This is a simple minded stash. Simply adds entries to end of stash. 134c1ff481SSatish Balay 144c1ff481SSatish Balay Input Parameters: 154c1ff481SSatish Balay comm - communicator, required for scatters. 164c1ff481SSatish Balay bs - stash block size. used when stashing blocks of values 174c1ff481SSatish Balay 184c1ff481SSatish Balay Output Parameters: 194c1ff481SSatish Balay stash - the newly created stash 209417f4adSLois Curfman McInnes */ 214a2ae208SSatish Balay #undef __FUNCT__ 224a2ae208SSatish Balay #define __FUNCT__ "MatStashCreate_Private" 23c1ac3661SBarry Smith PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash) 249417f4adSLois Curfman McInnes { 25dfbe8321SBarry Smith PetscErrorCode ierr; 26533163c2SBarry Smith PetscInt max,*opt,nopt,i; 27ace3abfcSBarry Smith PetscBool flg; 28bc5ccf88SSatish Balay 293a40ed3dSBarry Smith PetscFunctionBegin; 30bc5ccf88SSatish Balay /* Require 2 tags,get the second using PetscCommGetNewTag() */ 31752ec6e0SSatish Balay stash->comm = comm; 32752ec6e0SSatish Balay ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr); 33a2d1c673SSatish Balay ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr); 34a2d1c673SSatish Balay ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr); 35a2d1c673SSatish Balay ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr); 36533163c2SBarry Smith ierr = PetscMalloc(2*stash->size*sizeof(PetscMPIInt),&stash->flg_v);CHKERRQ(ierr); 37533163c2SBarry Smith for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 38533163c2SBarry Smith 39bc5ccf88SSatish Balay 40434d7ff9SSatish Balay nopt = stash->size; 41d7d82daaSBarry Smith ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr); 42b0a32e0cSBarry Smith ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr); 43434d7ff9SSatish Balay if (flg) { 44434d7ff9SSatish Balay if (nopt == 1) max = opt[0]; 45434d7ff9SSatish Balay else if (nopt == stash->size) max = opt[stash->rank]; 46434d7ff9SSatish Balay else if (stash->rank < nopt) max = opt[stash->rank]; 47f4ab19daSSatish Balay else max = 0; /* Use default */ 48434d7ff9SSatish Balay stash->umax = max; 49434d7ff9SSatish Balay } else { 50434d7ff9SSatish Balay stash->umax = 0; 51434d7ff9SSatish Balay } 52606d414cSSatish Balay ierr = PetscFree(opt);CHKERRQ(ierr); 534c1ff481SSatish Balay if (bs <= 0) bs = 1; 54a2d1c673SSatish Balay 554c1ff481SSatish Balay stash->bs = bs; 569417f4adSLois Curfman McInnes stash->nmax = 0; 57434d7ff9SSatish Balay stash->oldnmax = 0; 589417f4adSLois Curfman McInnes stash->n = 0; 594c1ff481SSatish Balay stash->reallocs = -1; 6075cae7c1SHong Zhang stash->space_head = 0; 6175cae7c1SHong Zhang stash->space = 0; 629417f4adSLois Curfman McInnes 63bc5ccf88SSatish Balay stash->send_waits = 0; 64bc5ccf88SSatish Balay stash->recv_waits = 0; 65a2d1c673SSatish Balay stash->send_status = 0; 66bc5ccf88SSatish Balay stash->nsends = 0; 67bc5ccf88SSatish Balay stash->nrecvs = 0; 68bc5ccf88SSatish Balay stash->svalues = 0; 69bc5ccf88SSatish Balay stash->rvalues = 0; 70563fb871SSatish Balay stash->rindices = 0; 71a2d1c673SSatish Balay stash->nprocessed = 0; 7267318a8aSJed Brown 7367318a8aSJed Brown stash->reproduce = PETSC_FALSE; 74*acfcf0e5SJed Brown ierr = PetscOptionsGetBool(PETSC_NULL,"-matstash_reproduce",&stash->reproduce,PETSC_NULL);CHKERRQ(ierr); 753a40ed3dSBarry Smith PetscFunctionReturn(0); 769417f4adSLois Curfman McInnes } 779417f4adSLois Curfman McInnes 784c1ff481SSatish Balay /* 798798bf22SSatish Balay MatStashDestroy_Private - Destroy the stash 804c1ff481SSatish Balay */ 814a2ae208SSatish Balay #undef __FUNCT__ 824a2ae208SSatish Balay #define __FUNCT__ "MatStashDestroy_Private" 83dfbe8321SBarry Smith PetscErrorCode MatStashDestroy_Private(MatStash *stash) 849417f4adSLois Curfman McInnes { 85dfbe8321SBarry Smith PetscErrorCode ierr; 86a2d1c673SSatish Balay 87bc5ccf88SSatish Balay PetscFunctionBegin; 8875cae7c1SHong Zhang if (stash->space_head){ 8975cae7c1SHong Zhang ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr); 9075cae7c1SHong Zhang stash->space_head = 0; 9182740460SHong Zhang stash->space = 0; 9275cae7c1SHong Zhang } 93533163c2SBarry Smith ierr = PetscFree(stash->flg_v);CHKERRQ(ierr); 94bc5ccf88SSatish Balay PetscFunctionReturn(0); 95bc5ccf88SSatish Balay } 96bc5ccf88SSatish Balay 974c1ff481SSatish Balay /* 9867318a8aSJed Brown MatStashScatterEnd_Private - This is called as the final stage of 994c1ff481SSatish Balay scatter. The final stages of message passing is done here, and 10067318a8aSJed Brown all the memory used for message passing is cleaned up. This 1014c1ff481SSatish Balay routine also resets the stash, and deallocates the memory used 1024c1ff481SSatish Balay for the stash. It also keeps track of the current memory usage 1034c1ff481SSatish Balay so that the same value can be used the next time through. 1044c1ff481SSatish Balay */ 1054a2ae208SSatish Balay #undef __FUNCT__ 1064a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterEnd_Private" 107dfbe8321SBarry Smith PetscErrorCode MatStashScatterEnd_Private(MatStash *stash) 108bc5ccf88SSatish Balay { 1096849ba73SBarry Smith PetscErrorCode ierr; 110533163c2SBarry Smith PetscInt nsends=stash->nsends,bs2,oldnmax,i; 111a2d1c673SSatish Balay MPI_Status *send_status; 112a2d1c673SSatish Balay 1133a40ed3dSBarry Smith PetscFunctionBegin; 114533163c2SBarry Smith for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 115a2d1c673SSatish Balay /* wait on sends */ 116a2d1c673SSatish Balay if (nsends) { 11782502324SSatish Balay ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 118a2d1c673SSatish Balay ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr); 119606d414cSSatish Balay ierr = PetscFree(send_status);CHKERRQ(ierr); 120a2d1c673SSatish Balay } 121a2d1c673SSatish Balay 122c0c58ca7SSatish Balay /* Now update nmaxold to be app 10% more than max n used, this way the 123434d7ff9SSatish Balay wastage of space is reduced the next time this stash is used. 124434d7ff9SSatish Balay Also update the oldmax, only if it increases */ 125b9b97703SBarry Smith if (stash->n) { 12694b769a5SSatish Balay bs2 = stash->bs*stash->bs; 1278a9378f0SSatish Balay oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 128434d7ff9SSatish Balay if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 129b9b97703SBarry Smith } 130434d7ff9SSatish Balay 131d07ff455SSatish Balay stash->nmax = 0; 132d07ff455SSatish Balay stash->n = 0; 1334c1ff481SSatish Balay stash->reallocs = -1; 134a2d1c673SSatish Balay stash->nprocessed = 0; 13575cae7c1SHong Zhang if (stash->space_head){ 13675cae7c1SHong Zhang ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr); 13775cae7c1SHong Zhang stash->space_head = 0; 13882740460SHong Zhang stash->space = 0; 13975cae7c1SHong Zhang } 140606d414cSSatish Balay ierr = PetscFree(stash->send_waits);CHKERRQ(ierr); 141606d414cSSatish Balay stash->send_waits = 0; 142606d414cSSatish Balay ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr); 143606d414cSSatish Balay stash->recv_waits = 0; 144c05d87d6SBarry Smith ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr); 145606d414cSSatish Balay stash->svalues = 0; 146c05d87d6SBarry Smith ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr); 147606d414cSSatish Balay ierr = PetscFree(stash->rvalues);CHKERRQ(ierr); 148606d414cSSatish Balay stash->rvalues = 0; 149c05d87d6SBarry Smith ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr); 150563fb871SSatish Balay ierr = PetscFree(stash->rindices);CHKERRQ(ierr); 151563fb871SSatish Balay stash->rindices = 0; 1523a40ed3dSBarry Smith PetscFunctionReturn(0); 1539417f4adSLois Curfman McInnes } 1549417f4adSLois Curfman McInnes 1554c1ff481SSatish Balay /* 1568798bf22SSatish Balay MatStashGetInfo_Private - Gets the relavant statistics of the stash 1574c1ff481SSatish Balay 1584c1ff481SSatish Balay Input Parameters: 1594c1ff481SSatish Balay stash - the stash 16094b769a5SSatish Balay nstash - the size of the stash. Indicates the number of values stored. 1614c1ff481SSatish Balay reallocs - the number of additional mallocs incurred. 1624c1ff481SSatish Balay 1634c1ff481SSatish Balay */ 1644a2ae208SSatish Balay #undef __FUNCT__ 1654a2ae208SSatish Balay #define __FUNCT__ "MatStashGetInfo_Private" 166c1ac3661SBarry Smith PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs) 16797530c3fSBarry Smith { 168c1ac3661SBarry Smith PetscInt bs2 = stash->bs*stash->bs; 16994b769a5SSatish Balay 1703a40ed3dSBarry Smith PetscFunctionBegin; 1711ecfd215SBarry Smith if (nstash) *nstash = stash->n*bs2; 1721ecfd215SBarry Smith if (reallocs) { 173434d7ff9SSatish Balay if (stash->reallocs < 0) *reallocs = 0; 174434d7ff9SSatish Balay else *reallocs = stash->reallocs; 1751ecfd215SBarry Smith } 176bc5ccf88SSatish Balay PetscFunctionReturn(0); 177bc5ccf88SSatish Balay } 1784c1ff481SSatish Balay 1794c1ff481SSatish Balay /* 1808798bf22SSatish Balay MatStashSetInitialSize_Private - Sets the initial size of the stash 1814c1ff481SSatish Balay 1824c1ff481SSatish Balay Input Parameters: 1834c1ff481SSatish Balay stash - the stash 1844c1ff481SSatish Balay max - the value that is used as the max size of the stash. 1854c1ff481SSatish Balay this value is used while allocating memory. 1864c1ff481SSatish Balay */ 1874a2ae208SSatish Balay #undef __FUNCT__ 1884a2ae208SSatish Balay #define __FUNCT__ "MatStashSetInitialSize_Private" 189c1ac3661SBarry Smith PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max) 190bc5ccf88SSatish Balay { 191bc5ccf88SSatish Balay PetscFunctionBegin; 192434d7ff9SSatish Balay stash->umax = max; 1933a40ed3dSBarry Smith PetscFunctionReturn(0); 19497530c3fSBarry Smith } 19597530c3fSBarry Smith 1968798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called 1974c1ff481SSatish Balay when the space in the stash is not sufficient to add the new values 1984c1ff481SSatish Balay being inserted into the stash. 1994c1ff481SSatish Balay 2004c1ff481SSatish Balay Input Parameters: 2014c1ff481SSatish Balay stash - the stash 2024c1ff481SSatish Balay incr - the minimum increase requested 2034c1ff481SSatish Balay 2044c1ff481SSatish Balay Notes: 2054c1ff481SSatish Balay This routine doubles the currently used memory. 2064c1ff481SSatish Balay */ 2074a2ae208SSatish Balay #undef __FUNCT__ 2084a2ae208SSatish Balay #define __FUNCT__ "MatStashExpand_Private" 209c1ac3661SBarry Smith static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr) 2109417f4adSLois Curfman McInnes { 2116849ba73SBarry Smith PetscErrorCode ierr; 2125bd3b8fbSHong Zhang PetscInt newnmax,bs2= stash->bs*stash->bs; 2139417f4adSLois Curfman McInnes 2143a40ed3dSBarry Smith PetscFunctionBegin; 2159417f4adSLois Curfman McInnes /* allocate a larger stash */ 216c481ceb5SSatish Balay if (!stash->oldnmax && !stash->nmax) { /* new stash */ 217434d7ff9SSatish Balay if (stash->umax) newnmax = stash->umax/bs2; 218434d7ff9SSatish Balay else newnmax = DEFAULT_STASH_SIZE/bs2; 219c481ceb5SSatish Balay } else if (!stash->nmax) { /* resuing stash */ 220434d7ff9SSatish Balay if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2; 221434d7ff9SSatish Balay else newnmax = stash->oldnmax/bs2; 222434d7ff9SSatish Balay } else newnmax = stash->nmax*2; 2234c1ff481SSatish Balay if (newnmax < (stash->nmax + incr)) newnmax += 2*incr; 224d07ff455SSatish Balay 22575cae7c1SHong Zhang /* Get a MatStashSpace and attach it to stash */ 22675cae7c1SHong Zhang ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr); 227b087b6d6SSatish Balay if (!stash->space_head) { /* new stash or resuing stash->oldnmax */ 228b087b6d6SSatish Balay stash->space_head = stash->space; 22975cae7c1SHong Zhang } 230b087b6d6SSatish Balay 231bc5ccf88SSatish Balay stash->reallocs++; 23275cae7c1SHong Zhang stash->nmax = newnmax; 233bc5ccf88SSatish Balay PetscFunctionReturn(0); 234bc5ccf88SSatish Balay } 235bc5ccf88SSatish Balay /* 2368798bf22SSatish Balay MatStashValuesRow_Private - inserts values into the stash. This function 2374c1ff481SSatish Balay expects the values to be roworiented. Multiple columns belong to the same row 2384c1ff481SSatish Balay can be inserted with a single call to this function. 2394c1ff481SSatish Balay 2404c1ff481SSatish Balay Input Parameters: 2414c1ff481SSatish Balay stash - the stash 2424c1ff481SSatish Balay row - the global row correspoiding to the values 2434c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2444c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2454c1ff481SSatish Balay values - the values inserted 246bc5ccf88SSatish Balay */ 2474a2ae208SSatish Balay #undef __FUNCT__ 2484a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRow_Private" 249ace3abfcSBarry Smith PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries) 250bc5ccf88SSatish Balay { 251dfbe8321SBarry Smith PetscErrorCode ierr; 252b400d20cSBarry Smith PetscInt i,k,cnt = 0; 25375cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 254bc5ccf88SSatish Balay 255bc5ccf88SSatish Balay PetscFunctionBegin; 2564c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 25775cae7c1SHong Zhang if (!space || space->local_remaining < n){ 2588798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 2599417f4adSLois Curfman McInnes } 26075cae7c1SHong Zhang space = stash->space; 26175cae7c1SHong Zhang k = space->local_used; 2624c1ff481SSatish Balay for (i=0; i<n; i++) { 26388c3974fSBarry Smith if (ignorezeroentries && (values[i] == 0.0)) continue; 26475cae7c1SHong Zhang space->idx[k] = row; 26575cae7c1SHong Zhang space->idy[k] = idxn[i]; 26675cae7c1SHong Zhang space->val[k] = values[i]; 26775cae7c1SHong Zhang k++; 268b400d20cSBarry Smith cnt++; 2699417f4adSLois Curfman McInnes } 270b400d20cSBarry Smith stash->n += cnt; 271b400d20cSBarry Smith space->local_used += cnt; 272b400d20cSBarry Smith space->local_remaining -= cnt; 273a2d1c673SSatish Balay PetscFunctionReturn(0); 274a2d1c673SSatish Balay } 27575cae7c1SHong Zhang 2764c1ff481SSatish Balay /* 2778798bf22SSatish Balay MatStashValuesCol_Private - inserts values into the stash. This function 2784c1ff481SSatish Balay expects the values to be columnoriented. Multiple columns belong to the same row 2794c1ff481SSatish Balay can be inserted with a single call to this function. 280a2d1c673SSatish Balay 2814c1ff481SSatish Balay Input Parameters: 2824c1ff481SSatish Balay stash - the stash 2834c1ff481SSatish Balay row - the global row correspoiding to the values 2844c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2854c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2864c1ff481SSatish Balay values - the values inserted 2874c1ff481SSatish Balay stepval - the consecutive values are sepated by a distance of stepval. 2884c1ff481SSatish Balay this happens because the input is columnoriented. 2894c1ff481SSatish Balay */ 2904a2ae208SSatish Balay #undef __FUNCT__ 2914a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesCol_Private" 292ace3abfcSBarry Smith PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries) 293a2d1c673SSatish Balay { 294dfbe8321SBarry Smith PetscErrorCode ierr; 29550e9ab7cSBarry Smith PetscInt i,k,cnt = 0; 29675cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 297a2d1c673SSatish Balay 2984c1ff481SSatish Balay PetscFunctionBegin; 2994c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 30075cae7c1SHong Zhang if (!space || space->local_remaining < n){ 3018798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 3024c1ff481SSatish Balay } 30375cae7c1SHong Zhang space = stash->space; 30475cae7c1SHong Zhang k = space->local_used; 3054c1ff481SSatish Balay for (i=0; i<n; i++) { 30688c3974fSBarry Smith if (ignorezeroentries && (values[i*stepval] == 0.0)) continue; 30775cae7c1SHong Zhang space->idx[k] = row; 30875cae7c1SHong Zhang space->idy[k] = idxn[i]; 30975cae7c1SHong Zhang space->val[k] = values[i*stepval]; 31075cae7c1SHong Zhang k++; 311b400d20cSBarry Smith cnt++; 3124c1ff481SSatish Balay } 313b400d20cSBarry Smith stash->n += cnt; 314b400d20cSBarry Smith space->local_used += cnt; 315b400d20cSBarry Smith space->local_remaining -= cnt; 3164c1ff481SSatish Balay PetscFunctionReturn(0); 3174c1ff481SSatish Balay } 3184c1ff481SSatish Balay 3194c1ff481SSatish Balay /* 3208798bf22SSatish Balay MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 3214c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 3224c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3234c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3244c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3254c1ff481SSatish Balay 3264c1ff481SSatish Balay Input Parameters: 3274c1ff481SSatish Balay stash - the stash 3284c1ff481SSatish Balay row - the global block-row correspoiding to the values 3294c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3304c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3314c1ff481SSatish Balay values. Each block is of size bs*bs. 3324c1ff481SSatish Balay values - the values inserted 3334c1ff481SSatish Balay rmax - the number of block-rows in the original block. 3344c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 3354c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3364c1ff481SSatish Balay */ 3374a2ae208SSatish Balay #undef __FUNCT__ 3384a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRowBlocked_Private" 33954f21887SBarry Smith PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 3404c1ff481SSatish Balay { 341dfbe8321SBarry Smith PetscErrorCode ierr; 34275cae7c1SHong Zhang PetscInt i,j,k,bs2,bs=stash->bs,l; 34354f21887SBarry Smith const PetscScalar *vals; 34454f21887SBarry Smith PetscScalar *array; 34575cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 346a2d1c673SSatish Balay 347a2d1c673SSatish Balay PetscFunctionBegin; 34875cae7c1SHong Zhang if (!space || space->local_remaining < n){ 3498798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 350a2d1c673SSatish Balay } 35175cae7c1SHong Zhang space = stash->space; 35275cae7c1SHong Zhang l = space->local_used; 35375cae7c1SHong Zhang bs2 = bs*bs; 3544c1ff481SSatish Balay for (i=0; i<n; i++) { 35575cae7c1SHong Zhang space->idx[l] = row; 35675cae7c1SHong Zhang space->idy[l] = idxn[i]; 35775cae7c1SHong Zhang /* Now copy over the block of values. Store the values column oriented. 35875cae7c1SHong Zhang This enables inserting multiple blocks belonging to a row with a single 35975cae7c1SHong Zhang funtion call */ 36075cae7c1SHong Zhang array = space->val + bs2*l; 36175cae7c1SHong Zhang vals = values + idx*bs2*n + bs*i; 36275cae7c1SHong Zhang for (j=0; j<bs; j++) { 36375cae7c1SHong Zhang for (k=0; k<bs; k++) array[k*bs] = vals[k]; 36475cae7c1SHong Zhang array++; 36575cae7c1SHong Zhang vals += cmax*bs; 36675cae7c1SHong Zhang } 36775cae7c1SHong Zhang l++; 368a2d1c673SSatish Balay } 3695bd3b8fbSHong Zhang stash->n += n; 37075cae7c1SHong Zhang space->local_used += n; 37175cae7c1SHong Zhang space->local_remaining -= n; 3724c1ff481SSatish Balay PetscFunctionReturn(0); 3734c1ff481SSatish Balay } 3744c1ff481SSatish Balay 3754c1ff481SSatish Balay /* 3768798bf22SSatish Balay MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 3774c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 3784c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3794c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3804c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3814c1ff481SSatish Balay 3824c1ff481SSatish Balay Input Parameters: 3834c1ff481SSatish Balay stash - the stash 3844c1ff481SSatish Balay row - the global block-row correspoiding to the values 3854c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3864c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3874c1ff481SSatish Balay values. Each block is of size bs*bs. 3884c1ff481SSatish Balay values - the values inserted 3894c1ff481SSatish Balay rmax - the number of block-rows in the original block. 3904c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 3914c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3924c1ff481SSatish Balay */ 3934a2ae208SSatish Balay #undef __FUNCT__ 3944a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesColBlocked_Private" 39554f21887SBarry Smith PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 3964c1ff481SSatish Balay { 397dfbe8321SBarry Smith PetscErrorCode ierr; 39875cae7c1SHong Zhang PetscInt i,j,k,bs2,bs=stash->bs,l; 39954f21887SBarry Smith const PetscScalar *vals; 40054f21887SBarry Smith PetscScalar *array; 40175cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 4024c1ff481SSatish Balay 4034c1ff481SSatish Balay PetscFunctionBegin; 40475cae7c1SHong Zhang if (!space || space->local_remaining < n){ 4058798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 4064c1ff481SSatish Balay } 40775cae7c1SHong Zhang space = stash->space; 40875cae7c1SHong Zhang l = space->local_used; 40975cae7c1SHong Zhang bs2 = bs*bs; 4104c1ff481SSatish Balay for (i=0; i<n; i++) { 41175cae7c1SHong Zhang space->idx[l] = row; 41275cae7c1SHong Zhang space->idy[l] = idxn[i]; 41375cae7c1SHong Zhang /* Now copy over the block of values. Store the values column oriented. 41475cae7c1SHong Zhang This enables inserting multiple blocks belonging to a row with a single 41575cae7c1SHong Zhang funtion call */ 41675cae7c1SHong Zhang array = space->val + bs2*l; 41775cae7c1SHong Zhang vals = values + idx*bs2*n + bs*i; 41875cae7c1SHong Zhang for (j=0; j<bs; j++) { 41975cae7c1SHong Zhang for (k=0; k<bs; k++) {array[k] = vals[k];} 42075cae7c1SHong Zhang array += bs; 42175cae7c1SHong Zhang vals += rmax*bs; 42275cae7c1SHong Zhang } 4235bd3b8fbSHong Zhang l++; 424a2d1c673SSatish Balay } 4255bd3b8fbSHong Zhang stash->n += n; 42675cae7c1SHong Zhang space->local_used += n; 42775cae7c1SHong Zhang space->local_remaining -= n; 4283a40ed3dSBarry Smith PetscFunctionReturn(0); 4299417f4adSLois Curfman McInnes } 4304c1ff481SSatish Balay /* 4318798bf22SSatish Balay MatStashScatterBegin_Private - Initiates the transfer of values to the 4324c1ff481SSatish Balay correct owners. This function goes through the stash, and check the 4334c1ff481SSatish Balay owners of each stashed value, and sends the values off to the owner 4344c1ff481SSatish Balay processors. 435bc5ccf88SSatish Balay 4364c1ff481SSatish Balay Input Parameters: 4374c1ff481SSatish Balay stash - the stash 4384c1ff481SSatish Balay owners - an array of size 'no-of-procs' which gives the ownership range 4394c1ff481SSatish Balay for each node. 4404c1ff481SSatish Balay 4414c1ff481SSatish Balay Notes: The 'owners' array in the cased of the blocked-stash has the 4424c1ff481SSatish Balay ranges specified blocked global indices, and for the regular stash in 4434c1ff481SSatish Balay the proper global indices. 4444c1ff481SSatish Balay */ 4454a2ae208SSatish Balay #undef __FUNCT__ 4464a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterBegin_Private" 4471e2582c4SBarry Smith PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners) 448bc5ccf88SSatish Balay { 449c1ac3661SBarry Smith PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 450fe09c992SBarry Smith PetscInt size=stash->size,nsends; 4516849ba73SBarry Smith PetscErrorCode ierr; 45275cae7c1SHong Zhang PetscInt count,*sindices,**rindices,i,j,idx,lastidx,l; 45354f21887SBarry Smith PetscScalar **rvalues,*svalues; 454bc5ccf88SSatish Balay MPI_Comm comm = stash->comm; 455563fb871SSatish Balay MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 456fe09c992SBarry Smith PetscMPIInt *nprocs,*nlengths,nreceives; 4575bd3b8fbSHong Zhang PetscInt *sp_idx,*sp_idy; 45854f21887SBarry Smith PetscScalar *sp_val; 4595bd3b8fbSHong Zhang PetscMatStashSpace space,space_next; 460bc5ccf88SSatish Balay 461bc5ccf88SSatish Balay PetscFunctionBegin; 4624c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 46375cae7c1SHong Zhang 464bc5ccf88SSatish Balay /* first count number of contributors to each processor */ 465c05d87d6SBarry Smith ierr = PetscMalloc(size*sizeof(PetscMPIInt),&nprocs);CHKERRQ(ierr); 466c05d87d6SBarry Smith ierr = PetscMemzero(nprocs,size*sizeof(PetscMPIInt));CHKERRQ(ierr); 467c05d87d6SBarry Smith ierr = PetscMalloc(size*sizeof(PetscMPIInt),&nlengths);CHKERRQ(ierr); 468c05d87d6SBarry Smith ierr = PetscMemzero(nlengths,size*sizeof(PetscMPIInt));CHKERRQ(ierr); 469c1ac3661SBarry Smith ierr = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); 470a2d1c673SSatish Balay 47175cae7c1SHong Zhang i = j = 0; 4727357eb19SBarry Smith lastidx = -1; 4735bd3b8fbSHong Zhang space = stash->space_head; 47475cae7c1SHong Zhang while (space != PETSC_NULL){ 47575cae7c1SHong Zhang space_next = space->next; 4765bd3b8fbSHong Zhang sp_idx = space->idx; 47775cae7c1SHong Zhang for (l=0; l<space->local_used; l++){ 4787357eb19SBarry Smith /* if indices are NOT locally sorted, need to start search at the beginning */ 4795bd3b8fbSHong Zhang if (lastidx > (idx = sp_idx[l])) j = 0; 4807357eb19SBarry Smith lastidx = idx; 4817357eb19SBarry Smith for (; j<size; j++) { 4824c1ff481SSatish Balay if (idx >= owners[j] && idx < owners[j+1]) { 483563fb871SSatish Balay nlengths[j]++; owner[i] = j; break; 484bc5ccf88SSatish Balay } 485bc5ccf88SSatish Balay } 48675cae7c1SHong Zhang i++; 48775cae7c1SHong Zhang } 48875cae7c1SHong Zhang space = space_next; 489bc5ccf88SSatish Balay } 490563fb871SSatish Balay /* Now check what procs get messages - and compute nsends. */ 491563fb871SSatish Balay for (i=0, nsends=0 ; i<size; i++) { 492563fb871SSatish Balay if (nlengths[i]) { nprocs[i] = 1; nsends ++;} 493563fb871SSatish Balay } 494bc5ccf88SSatish Balay 49554f21887SBarry Smith {PetscMPIInt *onodes,*olengths; 496563fb871SSatish Balay /* Determine the number of messages to expect, their lengths, from from-ids */ 497563fb871SSatish Balay ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr); 498563fb871SSatish Balay ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr); 499563fb871SSatish Balay /* since clubbing row,col - lengths are multiplied by 2 */ 500563fb871SSatish Balay for (i=0; i<nreceives; i++) olengths[i] *=2; 501563fb871SSatish Balay ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr); 502563fb871SSatish Balay /* values are size 'bs2' lengths (and remove earlier factor 2 */ 503563fb871SSatish Balay for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 504563fb871SSatish Balay ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr); 505563fb871SSatish Balay ierr = PetscFree(onodes);CHKERRQ(ierr); 506563fb871SSatish Balay ierr = PetscFree(olengths);CHKERRQ(ierr); 507bc5ccf88SSatish Balay } 508bc5ccf88SSatish Balay 509bc5ccf88SSatish Balay /* do sends: 510bc5ccf88SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 511bc5ccf88SSatish Balay the ith processor 512bc5ccf88SSatish Balay */ 513c05d87d6SBarry Smith ierr = PetscMalloc2(bs2*stash->n,PetscScalar,&svalues,2*(stash->n+1),PetscInt,&sindices);CHKERRQ(ierr); 514533163c2SBarry Smith ierr = PetscMalloc(2*nsends*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 515c05d87d6SBarry Smith ierr = PetscMalloc2(size,PetscInt,&startv,size,PetscInt,&starti);CHKERRQ(ierr); 516a2d1c673SSatish Balay /* use 2 sends the first with all_a, the next with all_i and all_j */ 517bc5ccf88SSatish Balay startv[0] = 0; starti[0] = 0; 518bc5ccf88SSatish Balay for (i=1; i<size; i++) { 519563fb871SSatish Balay startv[i] = startv[i-1] + nlengths[i-1]; 520533163c2SBarry Smith starti[i] = starti[i-1] + 2*nlengths[i-1]; 521bc5ccf88SSatish Balay } 52275cae7c1SHong Zhang 52375cae7c1SHong Zhang i = 0; 5245bd3b8fbSHong Zhang space = stash->space_head; 52575cae7c1SHong Zhang while (space != PETSC_NULL){ 52675cae7c1SHong Zhang space_next = space->next; 5275bd3b8fbSHong Zhang sp_idx = space->idx; 5285bd3b8fbSHong Zhang sp_idy = space->idy; 5295bd3b8fbSHong Zhang sp_val = space->val; 53075cae7c1SHong Zhang for (l=0; l<space->local_used; l++){ 531bc5ccf88SSatish Balay j = owner[i]; 532a2d1c673SSatish Balay if (bs2 == 1) { 5335bd3b8fbSHong Zhang svalues[startv[j]] = sp_val[l]; 534a2d1c673SSatish Balay } else { 535c1ac3661SBarry Smith PetscInt k; 53654f21887SBarry Smith PetscScalar *buf1,*buf2; 5374c1ff481SSatish Balay buf1 = svalues+bs2*startv[j]; 538b087b6d6SSatish Balay buf2 = space->val + bs2*l; 5394c1ff481SSatish Balay for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; } 540a2d1c673SSatish Balay } 5415bd3b8fbSHong Zhang sindices[starti[j]] = sp_idx[l]; 5425bd3b8fbSHong Zhang sindices[starti[j]+nlengths[j]] = sp_idy[l]; 543bc5ccf88SSatish Balay startv[j]++; 544bc5ccf88SSatish Balay starti[j]++; 54575cae7c1SHong Zhang i++; 54675cae7c1SHong Zhang } 54775cae7c1SHong Zhang space = space_next; 548bc5ccf88SSatish Balay } 549bc5ccf88SSatish Balay startv[0] = 0; 550563fb871SSatish Balay for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];} 551e5d0e772SSatish Balay 552bc5ccf88SSatish Balay for (i=0,count=0; i<size; i++) { 553563fb871SSatish Balay if (nprocs[i]) { 554563fb871SSatish Balay ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr); 555a77337e4SBarry Smith ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr); 556bc5ccf88SSatish Balay } 557b85c94c3SSatish Balay } 5586cf91177SBarry Smith #if defined(PETSC_USE_INFO) 5591e2582c4SBarry Smith ierr = PetscInfo1(mat,"No of messages: %d \n",nsends);CHKERRQ(ierr); 560e5d0e772SSatish Balay for (i=0; i<size; i++) { 561e5d0e772SSatish Balay if (nprocs[i]) { 562a77337e4SBarry Smith ierr = PetscInfo2(mat,"Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(PetscScalar)+2*sizeof(PetscInt));CHKERRQ(ierr); 563e5d0e772SSatish Balay } 564e5d0e772SSatish Balay } 565e5d0e772SSatish Balay #endif 566c05d87d6SBarry Smith ierr = PetscFree(nlengths);CHKERRQ(ierr); 567606d414cSSatish Balay ierr = PetscFree(owner);CHKERRQ(ierr); 568c05d87d6SBarry Smith ierr = PetscFree2(startv,starti);CHKERRQ(ierr); 569c05d87d6SBarry Smith ierr = PetscFree(nprocs);CHKERRQ(ierr); 570a2d1c673SSatish Balay 571563fb871SSatish Balay /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 572533163c2SBarry Smith ierr = PetscMalloc(2*nreceives*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 573563fb871SSatish Balay 574563fb871SSatish Balay for (i=0; i<nreceives; i++) { 575563fb871SSatish Balay recv_waits[2*i] = recv_waits1[i]; 576563fb871SSatish Balay recv_waits[2*i+1] = recv_waits2[i]; 577563fb871SSatish Balay } 578563fb871SSatish Balay stash->recv_waits = recv_waits; 579563fb871SSatish Balay ierr = PetscFree(recv_waits1);CHKERRQ(ierr); 580563fb871SSatish Balay ierr = PetscFree(recv_waits2);CHKERRQ(ierr); 581563fb871SSatish Balay 582c05d87d6SBarry Smith stash->svalues = svalues; 583c05d87d6SBarry Smith stash->sindices = sindices; 584c05d87d6SBarry Smith stash->rvalues = rvalues; 585c05d87d6SBarry Smith stash->rindices = rindices; 586c05d87d6SBarry Smith stash->send_waits = send_waits; 587c05d87d6SBarry Smith stash->nsends = nsends; 588c05d87d6SBarry Smith stash->nrecvs = nreceives; 58967318a8aSJed Brown stash->reproduce_count = 0; 590bc5ccf88SSatish Balay PetscFunctionReturn(0); 591bc5ccf88SSatish Balay } 592bc5ccf88SSatish Balay 593a2d1c673SSatish Balay /* 5948798bf22SSatish Balay MatStashScatterGetMesg_Private - This function waits on the receives posted 5958798bf22SSatish Balay in the function MatStashScatterBegin_Private() and returns one message at 5964c1ff481SSatish Balay a time to the calling function. If no messages are left, it indicates this 5974c1ff481SSatish Balay by setting flg = 0, else it sets flg = 1. 5984c1ff481SSatish Balay 5994c1ff481SSatish Balay Input Parameters: 6004c1ff481SSatish Balay stash - the stash 6014c1ff481SSatish Balay 6024c1ff481SSatish Balay Output Parameters: 6034c1ff481SSatish Balay nvals - the number of entries in the current message. 6044c1ff481SSatish Balay rows - an array of row indices (or blocked indices) corresponding to the values 6054c1ff481SSatish Balay cols - an array of columnindices (or blocked indices) corresponding to the values 6064c1ff481SSatish Balay vals - the values 6074c1ff481SSatish Balay flg - 0 indicates no more message left, and the current call has no values associated. 6084c1ff481SSatish Balay 1 indicates that the current call successfully received a message, and the 6094c1ff481SSatish Balay other output parameters nvals,rows,cols,vals are set appropriately. 610a2d1c673SSatish Balay */ 6114a2ae208SSatish Balay #undef __FUNCT__ 6124a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterGetMesg_Private" 61354f21887SBarry Smith PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,PetscScalar **vals,PetscInt *flg) 614bc5ccf88SSatish Balay { 6156849ba73SBarry Smith PetscErrorCode ierr; 616533163c2SBarry Smith PetscMPIInt i,*flg_v = stash->flg_v,i1,i2; 617fe09c992SBarry Smith PetscInt bs2; 618a2d1c673SSatish Balay MPI_Status recv_status; 619ace3abfcSBarry Smith PetscBool match_found = PETSC_FALSE; 620bc5ccf88SSatish Balay 621bc5ccf88SSatish Balay PetscFunctionBegin; 622bc5ccf88SSatish Balay 623a2d1c673SSatish Balay *flg = 0; /* When a message is discovered this is reset to 1 */ 624a2d1c673SSatish Balay /* Return if no more messages to process */ 625a2d1c673SSatish Balay if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); } 626a2d1c673SSatish Balay 6274c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 62867318a8aSJed Brown /* If a matching pair of receives are found, process them, and return the data to 629a2d1c673SSatish Balay the calling function. Until then keep receiving messages */ 630a2d1c673SSatish Balay while (!match_found) { 631533163c2SBarry Smith CHKMEMQ; 63267318a8aSJed Brown if (stash->reproduce) { 63367318a8aSJed Brown i = stash->reproduce_count++; 63467318a8aSJed Brown ierr = MPI_Wait(stash->recv_waits+i,&recv_status);CHKERRQ(ierr); 63567318a8aSJed Brown } else { 636a2d1c673SSatish Balay ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr); 63767318a8aSJed Brown } 638533163c2SBarry Smith CHKMEMQ; 639e32f2f54SBarry Smith if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!"); 640533163c2SBarry Smith 64167318a8aSJed Brown /* Now pack the received message into a structure which is usable by others */ 642a2d1c673SSatish Balay if (i % 2) { 643a77337e4SBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr); 644c1dc657dSBarry Smith flg_v[2*recv_status.MPI_SOURCE] = i/2; 645a2d1c673SSatish Balay *nvals = *nvals/bs2; 646563fb871SSatish Balay } else { 647563fb871SSatish Balay ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr); 648563fb871SSatish Balay flg_v[2*recv_status.MPI_SOURCE+1] = i/2; 649563fb871SSatish Balay *nvals = *nvals/2; /* This message has both row indices and col indices */ 650bc5ccf88SSatish Balay } 651a2d1c673SSatish Balay 652cb2b73ccSBarry Smith /* Check if we have both messages from this proc */ 653c1dc657dSBarry Smith i1 = flg_v[2*recv_status.MPI_SOURCE]; 654c1dc657dSBarry Smith i2 = flg_v[2*recv_status.MPI_SOURCE+1]; 655a2d1c673SSatish Balay if (i1 != -1 && i2 != -1) { 656563fb871SSatish Balay *rows = stash->rindices[i2]; 657a2d1c673SSatish Balay *cols = *rows + *nvals; 658563fb871SSatish Balay *vals = stash->rvalues[i1]; 659a2d1c673SSatish Balay *flg = 1; 660a2d1c673SSatish Balay stash->nprocessed ++; 66135d8aa7fSBarry Smith match_found = PETSC_TRUE; 662bc5ccf88SSatish Balay } 663bc5ccf88SSatish Balay } 664bc5ccf88SSatish Balay PetscFunctionReturn(0); 665bc5ccf88SSatish Balay } 666