1be1d678aSKris Buschelman #define PETSCMAT_DLL 22d5177cdSBarry Smith 37c4f633dSBarry Smith #include "private/matimpl.h" 45bd3b8fbSHong Zhang 5bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE 10000 64c1ff481SSatish Balay 79417f4adSLois Curfman McInnes /* 88798bf22SSatish Balay MatStashCreate_Private - Creates a stash,currently used for all the parallel 94c1ff481SSatish Balay matrix implementations. The stash is where elements of a matrix destined 104c1ff481SSatish Balay to be stored on other processors are kept until matrix assembly is done. 119417f4adSLois Curfman McInnes 124c1ff481SSatish Balay This is a simple minded stash. Simply adds entries to end of stash. 134c1ff481SSatish Balay 144c1ff481SSatish Balay Input Parameters: 154c1ff481SSatish Balay comm - communicator, required for scatters. 164c1ff481SSatish Balay bs - stash block size. used when stashing blocks of values 174c1ff481SSatish Balay 184c1ff481SSatish Balay Output Parameters: 194c1ff481SSatish Balay stash - the newly created stash 209417f4adSLois Curfman McInnes */ 214a2ae208SSatish Balay #undef __FUNCT__ 224a2ae208SSatish Balay #define __FUNCT__ "MatStashCreate_Private" 23c1ac3661SBarry Smith PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash) 249417f4adSLois Curfman McInnes { 25dfbe8321SBarry Smith PetscErrorCode ierr; 26*533163c2SBarry Smith PetscInt max,*opt,nopt,i; 27f1af5d2fSBarry Smith PetscTruth flg; 28bc5ccf88SSatish Balay 293a40ed3dSBarry Smith PetscFunctionBegin; 30bc5ccf88SSatish Balay /* Require 2 tags,get the second using PetscCommGetNewTag() */ 31752ec6e0SSatish Balay stash->comm = comm; 32752ec6e0SSatish Balay ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr); 33a2d1c673SSatish Balay ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr); 34a2d1c673SSatish Balay ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr); 35a2d1c673SSatish Balay ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr); 36*533163c2SBarry Smith ierr = PetscMalloc(2*stash->size*sizeof(PetscMPIInt),&stash->flg_v);CHKERRQ(ierr); 37*533163c2SBarry Smith for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 38*533163c2SBarry Smith 39bc5ccf88SSatish Balay 40434d7ff9SSatish Balay nopt = stash->size; 41d7d82daaSBarry Smith ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr); 42b0a32e0cSBarry Smith ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr); 43434d7ff9SSatish Balay if (flg) { 44434d7ff9SSatish Balay if (nopt == 1) max = opt[0]; 45434d7ff9SSatish Balay else if (nopt == stash->size) max = opt[stash->rank]; 46434d7ff9SSatish Balay else if (stash->rank < nopt) max = opt[stash->rank]; 47f4ab19daSSatish Balay else max = 0; /* Use default */ 48434d7ff9SSatish Balay stash->umax = max; 49434d7ff9SSatish Balay } else { 50434d7ff9SSatish Balay stash->umax = 0; 51434d7ff9SSatish Balay } 52606d414cSSatish Balay ierr = PetscFree(opt);CHKERRQ(ierr); 534c1ff481SSatish Balay if (bs <= 0) bs = 1; 54a2d1c673SSatish Balay 554c1ff481SSatish Balay stash->bs = bs; 569417f4adSLois Curfman McInnes stash->nmax = 0; 57434d7ff9SSatish Balay stash->oldnmax = 0; 589417f4adSLois Curfman McInnes stash->n = 0; 594c1ff481SSatish Balay stash->reallocs = -1; 6075cae7c1SHong Zhang stash->space_head = 0; 6175cae7c1SHong Zhang stash->space = 0; 629417f4adSLois Curfman McInnes 63bc5ccf88SSatish Balay stash->send_waits = 0; 64bc5ccf88SSatish Balay stash->recv_waits = 0; 65a2d1c673SSatish Balay stash->send_status = 0; 66bc5ccf88SSatish Balay stash->nsends = 0; 67bc5ccf88SSatish Balay stash->nrecvs = 0; 68bc5ccf88SSatish Balay stash->svalues = 0; 69bc5ccf88SSatish Balay stash->rvalues = 0; 70563fb871SSatish Balay stash->rindices = 0; 71a2d1c673SSatish Balay stash->nprocessed = 0; 723a40ed3dSBarry Smith PetscFunctionReturn(0); 739417f4adSLois Curfman McInnes } 749417f4adSLois Curfman McInnes 754c1ff481SSatish Balay /* 768798bf22SSatish Balay MatStashDestroy_Private - Destroy the stash 774c1ff481SSatish Balay */ 784a2ae208SSatish Balay #undef __FUNCT__ 794a2ae208SSatish Balay #define __FUNCT__ "MatStashDestroy_Private" 80dfbe8321SBarry Smith PetscErrorCode MatStashDestroy_Private(MatStash *stash) 819417f4adSLois Curfman McInnes { 82dfbe8321SBarry Smith PetscErrorCode ierr; 83a2d1c673SSatish Balay 84bc5ccf88SSatish Balay PetscFunctionBegin; 8575cae7c1SHong Zhang if (stash->space_head){ 8675cae7c1SHong Zhang ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr); 8775cae7c1SHong Zhang stash->space_head = 0; 8882740460SHong Zhang stash->space = 0; 8975cae7c1SHong Zhang } 90*533163c2SBarry Smith ierr = PetscFree(stash->flg_v);CHKERRQ(ierr); 91bc5ccf88SSatish Balay PetscFunctionReturn(0); 92bc5ccf88SSatish Balay } 93bc5ccf88SSatish Balay 944c1ff481SSatish Balay /* 958798bf22SSatish Balay MatStashScatterEnd_Private - This is called as the fial stage of 964c1ff481SSatish Balay scatter. The final stages of messagepassing is done here, and 974c1ff481SSatish Balay all the memory used for messagepassing is cleanedu up. This 984c1ff481SSatish Balay routine also resets the stash, and deallocates the memory used 994c1ff481SSatish Balay for the stash. It also keeps track of the current memory usage 1004c1ff481SSatish Balay so that the same value can be used the next time through. 1014c1ff481SSatish Balay */ 1024a2ae208SSatish Balay #undef __FUNCT__ 1034a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterEnd_Private" 104dfbe8321SBarry Smith PetscErrorCode MatStashScatterEnd_Private(MatStash *stash) 105bc5ccf88SSatish Balay { 1066849ba73SBarry Smith PetscErrorCode ierr; 107*533163c2SBarry Smith PetscInt nsends=stash->nsends,bs2,oldnmax,i; 108a2d1c673SSatish Balay MPI_Status *send_status; 109a2d1c673SSatish Balay 1103a40ed3dSBarry Smith PetscFunctionBegin; 111*533163c2SBarry Smith for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 112a2d1c673SSatish Balay /* wait on sends */ 113a2d1c673SSatish Balay if (nsends) { 11482502324SSatish Balay ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 115a2d1c673SSatish Balay ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr); 116606d414cSSatish Balay ierr = PetscFree(send_status);CHKERRQ(ierr); 117a2d1c673SSatish Balay } 118a2d1c673SSatish Balay 119c0c58ca7SSatish Balay /* Now update nmaxold to be app 10% more than max n used, this way the 120434d7ff9SSatish Balay wastage of space is reduced the next time this stash is used. 121434d7ff9SSatish Balay Also update the oldmax, only if it increases */ 122b9b97703SBarry Smith if (stash->n) { 12394b769a5SSatish Balay bs2 = stash->bs*stash->bs; 1248a9378f0SSatish Balay oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 125434d7ff9SSatish Balay if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 126b9b97703SBarry Smith } 127434d7ff9SSatish Balay 128d07ff455SSatish Balay stash->nmax = 0; 129d07ff455SSatish Balay stash->n = 0; 1304c1ff481SSatish Balay stash->reallocs = -1; 131a2d1c673SSatish Balay stash->nprocessed = 0; 13275cae7c1SHong Zhang if (stash->space_head){ 13375cae7c1SHong Zhang ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr); 13475cae7c1SHong Zhang stash->space_head = 0; 13582740460SHong Zhang stash->space = 0; 13675cae7c1SHong Zhang } 137606d414cSSatish Balay ierr = PetscFree(stash->send_waits);CHKERRQ(ierr); 138606d414cSSatish Balay stash->send_waits = 0; 139606d414cSSatish Balay ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr); 140606d414cSSatish Balay stash->recv_waits = 0; 141c05d87d6SBarry Smith ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr); 142606d414cSSatish Balay stash->svalues = 0; 143c05d87d6SBarry Smith ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr); 144606d414cSSatish Balay ierr = PetscFree(stash->rvalues);CHKERRQ(ierr); 145606d414cSSatish Balay stash->rvalues = 0; 146c05d87d6SBarry Smith ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr); 147563fb871SSatish Balay ierr = PetscFree(stash->rindices);CHKERRQ(ierr); 148563fb871SSatish Balay stash->rindices = 0; 1493a40ed3dSBarry Smith PetscFunctionReturn(0); 1509417f4adSLois Curfman McInnes } 1519417f4adSLois Curfman McInnes 1524c1ff481SSatish Balay /* 1538798bf22SSatish Balay MatStashGetInfo_Private - Gets the relavant statistics of the stash 1544c1ff481SSatish Balay 1554c1ff481SSatish Balay Input Parameters: 1564c1ff481SSatish Balay stash - the stash 15794b769a5SSatish Balay nstash - the size of the stash. Indicates the number of values stored. 1584c1ff481SSatish Balay reallocs - the number of additional mallocs incurred. 1594c1ff481SSatish Balay 1604c1ff481SSatish Balay */ 1614a2ae208SSatish Balay #undef __FUNCT__ 1624a2ae208SSatish Balay #define __FUNCT__ "MatStashGetInfo_Private" 163c1ac3661SBarry Smith PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs) 16497530c3fSBarry Smith { 165c1ac3661SBarry Smith PetscInt bs2 = stash->bs*stash->bs; 16694b769a5SSatish Balay 1673a40ed3dSBarry Smith PetscFunctionBegin; 1681ecfd215SBarry Smith if (nstash) *nstash = stash->n*bs2; 1691ecfd215SBarry Smith if (reallocs) { 170434d7ff9SSatish Balay if (stash->reallocs < 0) *reallocs = 0; 171434d7ff9SSatish Balay else *reallocs = stash->reallocs; 1721ecfd215SBarry Smith } 173bc5ccf88SSatish Balay PetscFunctionReturn(0); 174bc5ccf88SSatish Balay } 1754c1ff481SSatish Balay 1764c1ff481SSatish Balay /* 1778798bf22SSatish Balay MatStashSetInitialSize_Private - Sets the initial size of the stash 1784c1ff481SSatish Balay 1794c1ff481SSatish Balay Input Parameters: 1804c1ff481SSatish Balay stash - the stash 1814c1ff481SSatish Balay max - the value that is used as the max size of the stash. 1824c1ff481SSatish Balay this value is used while allocating memory. 1834c1ff481SSatish Balay */ 1844a2ae208SSatish Balay #undef __FUNCT__ 1854a2ae208SSatish Balay #define __FUNCT__ "MatStashSetInitialSize_Private" 186c1ac3661SBarry Smith PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max) 187bc5ccf88SSatish Balay { 188bc5ccf88SSatish Balay PetscFunctionBegin; 189434d7ff9SSatish Balay stash->umax = max; 1903a40ed3dSBarry Smith PetscFunctionReturn(0); 19197530c3fSBarry Smith } 19297530c3fSBarry Smith 1938798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called 1944c1ff481SSatish Balay when the space in the stash is not sufficient to add the new values 1954c1ff481SSatish Balay being inserted into the stash. 1964c1ff481SSatish Balay 1974c1ff481SSatish Balay Input Parameters: 1984c1ff481SSatish Balay stash - the stash 1994c1ff481SSatish Balay incr - the minimum increase requested 2004c1ff481SSatish Balay 2014c1ff481SSatish Balay Notes: 2024c1ff481SSatish Balay This routine doubles the currently used memory. 2034c1ff481SSatish Balay */ 2044a2ae208SSatish Balay #undef __FUNCT__ 2054a2ae208SSatish Balay #define __FUNCT__ "MatStashExpand_Private" 206c1ac3661SBarry Smith static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr) 2079417f4adSLois Curfman McInnes { 2086849ba73SBarry Smith PetscErrorCode ierr; 2095bd3b8fbSHong Zhang PetscInt newnmax,bs2= stash->bs*stash->bs; 2109417f4adSLois Curfman McInnes 2113a40ed3dSBarry Smith PetscFunctionBegin; 2129417f4adSLois Curfman McInnes /* allocate a larger stash */ 213c481ceb5SSatish Balay if (!stash->oldnmax && !stash->nmax) { /* new stash */ 214434d7ff9SSatish Balay if (stash->umax) newnmax = stash->umax/bs2; 215434d7ff9SSatish Balay else newnmax = DEFAULT_STASH_SIZE/bs2; 216c481ceb5SSatish Balay } else if (!stash->nmax) { /* resuing stash */ 217434d7ff9SSatish Balay if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2; 218434d7ff9SSatish Balay else newnmax = stash->oldnmax/bs2; 219434d7ff9SSatish Balay } else newnmax = stash->nmax*2; 2204c1ff481SSatish Balay if (newnmax < (stash->nmax + incr)) newnmax += 2*incr; 221d07ff455SSatish Balay 22275cae7c1SHong Zhang /* Get a MatStashSpace and attach it to stash */ 22375cae7c1SHong Zhang ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr); 224b087b6d6SSatish Balay if (!stash->space_head) { /* new stash or resuing stash->oldnmax */ 225b087b6d6SSatish Balay stash->space_head = stash->space; 22675cae7c1SHong Zhang } 227b087b6d6SSatish Balay 228bc5ccf88SSatish Balay stash->reallocs++; 22975cae7c1SHong Zhang stash->nmax = newnmax; 230bc5ccf88SSatish Balay PetscFunctionReturn(0); 231bc5ccf88SSatish Balay } 232bc5ccf88SSatish Balay /* 2338798bf22SSatish Balay MatStashValuesRow_Private - inserts values into the stash. This function 2344c1ff481SSatish Balay expects the values to be roworiented. Multiple columns belong to the same row 2354c1ff481SSatish Balay can be inserted with a single call to this function. 2364c1ff481SSatish Balay 2374c1ff481SSatish Balay Input Parameters: 2384c1ff481SSatish Balay stash - the stash 2394c1ff481SSatish Balay row - the global row correspoiding to the values 2404c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2414c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2424c1ff481SSatish Balay values - the values inserted 243bc5ccf88SSatish Balay */ 2444a2ae208SSatish Balay #undef __FUNCT__ 2454a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRow_Private" 246b400d20cSBarry Smith PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscTruth ignorezeroentries) 247bc5ccf88SSatish Balay { 248dfbe8321SBarry Smith PetscErrorCode ierr; 249b400d20cSBarry Smith PetscInt i,k,cnt = 0; 25075cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 251bc5ccf88SSatish Balay 252bc5ccf88SSatish Balay PetscFunctionBegin; 2534c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 25475cae7c1SHong Zhang if (!space || space->local_remaining < n){ 2558798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 2569417f4adSLois Curfman McInnes } 25775cae7c1SHong Zhang space = stash->space; 25875cae7c1SHong Zhang k = space->local_used; 2594c1ff481SSatish Balay for (i=0; i<n; i++) { 26088c3974fSBarry Smith if (ignorezeroentries && (values[i] == 0.0)) continue; 26175cae7c1SHong Zhang space->idx[k] = row; 26275cae7c1SHong Zhang space->idy[k] = idxn[i]; 26375cae7c1SHong Zhang space->val[k] = values[i]; 26475cae7c1SHong Zhang k++; 265b400d20cSBarry Smith cnt++; 2669417f4adSLois Curfman McInnes } 267b400d20cSBarry Smith stash->n += cnt; 268b400d20cSBarry Smith space->local_used += cnt; 269b400d20cSBarry Smith space->local_remaining -= cnt; 270a2d1c673SSatish Balay PetscFunctionReturn(0); 271a2d1c673SSatish Balay } 27275cae7c1SHong Zhang 2734c1ff481SSatish Balay /* 2748798bf22SSatish Balay MatStashValuesCol_Private - inserts values into the stash. This function 2754c1ff481SSatish Balay expects the values to be columnoriented. Multiple columns belong to the same row 2764c1ff481SSatish Balay can be inserted with a single call to this function. 277a2d1c673SSatish Balay 2784c1ff481SSatish Balay Input Parameters: 2794c1ff481SSatish Balay stash - the stash 2804c1ff481SSatish Balay row - the global row correspoiding to the values 2814c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2824c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2834c1ff481SSatish Balay values - the values inserted 2844c1ff481SSatish Balay stepval - the consecutive values are sepated by a distance of stepval. 2854c1ff481SSatish Balay this happens because the input is columnoriented. 2864c1ff481SSatish Balay */ 2874a2ae208SSatish Balay #undef __FUNCT__ 2884a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesCol_Private" 289b400d20cSBarry Smith PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscTruth ignorezeroentries) 290a2d1c673SSatish Balay { 291dfbe8321SBarry Smith PetscErrorCode ierr; 29250e9ab7cSBarry Smith PetscInt i,k,cnt = 0; 29375cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 294a2d1c673SSatish Balay 2954c1ff481SSatish Balay PetscFunctionBegin; 2964c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 29775cae7c1SHong Zhang if (!space || space->local_remaining < n){ 2988798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 2994c1ff481SSatish Balay } 30075cae7c1SHong Zhang space = stash->space; 30175cae7c1SHong Zhang k = space->local_used; 3024c1ff481SSatish Balay for (i=0; i<n; i++) { 30388c3974fSBarry Smith if (ignorezeroentries && (values[i*stepval] == 0.0)) continue; 30475cae7c1SHong Zhang space->idx[k] = row; 30575cae7c1SHong Zhang space->idy[k] = idxn[i]; 30675cae7c1SHong Zhang space->val[k] = values[i*stepval]; 30775cae7c1SHong Zhang k++; 308b400d20cSBarry Smith cnt++; 3094c1ff481SSatish Balay } 310b400d20cSBarry Smith stash->n += cnt; 311b400d20cSBarry Smith space->local_used += cnt; 312b400d20cSBarry Smith space->local_remaining -= cnt; 3134c1ff481SSatish Balay PetscFunctionReturn(0); 3144c1ff481SSatish Balay } 3154c1ff481SSatish Balay 3164c1ff481SSatish Balay /* 3178798bf22SSatish Balay MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 3184c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 3194c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3204c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3214c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3224c1ff481SSatish Balay 3234c1ff481SSatish Balay Input Parameters: 3244c1ff481SSatish Balay stash - the stash 3254c1ff481SSatish Balay row - the global block-row correspoiding to the values 3264c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3274c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3284c1ff481SSatish Balay values. Each block is of size bs*bs. 3294c1ff481SSatish Balay values - the values inserted 3304c1ff481SSatish Balay rmax - the number of block-rows in the original block. 3314c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 3324c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3334c1ff481SSatish Balay */ 3344a2ae208SSatish Balay #undef __FUNCT__ 3354a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRowBlocked_Private" 33654f21887SBarry Smith PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 3374c1ff481SSatish Balay { 338dfbe8321SBarry Smith PetscErrorCode ierr; 33975cae7c1SHong Zhang PetscInt i,j,k,bs2,bs=stash->bs,l; 34054f21887SBarry Smith const PetscScalar *vals; 34154f21887SBarry Smith PetscScalar *array; 34275cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 343a2d1c673SSatish Balay 344a2d1c673SSatish Balay PetscFunctionBegin; 34575cae7c1SHong Zhang if (!space || space->local_remaining < n){ 3468798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 347a2d1c673SSatish Balay } 34875cae7c1SHong Zhang space = stash->space; 34975cae7c1SHong Zhang l = space->local_used; 35075cae7c1SHong Zhang bs2 = bs*bs; 3514c1ff481SSatish Balay for (i=0; i<n; i++) { 35275cae7c1SHong Zhang space->idx[l] = row; 35375cae7c1SHong Zhang space->idy[l] = idxn[i]; 35475cae7c1SHong Zhang /* Now copy over the block of values. Store the values column oriented. 35575cae7c1SHong Zhang This enables inserting multiple blocks belonging to a row with a single 35675cae7c1SHong Zhang funtion call */ 35775cae7c1SHong Zhang array = space->val + bs2*l; 35875cae7c1SHong Zhang vals = values + idx*bs2*n + bs*i; 35975cae7c1SHong Zhang for (j=0; j<bs; j++) { 36075cae7c1SHong Zhang for (k=0; k<bs; k++) array[k*bs] = vals[k]; 36175cae7c1SHong Zhang array++; 36275cae7c1SHong Zhang vals += cmax*bs; 36375cae7c1SHong Zhang } 36475cae7c1SHong Zhang l++; 365a2d1c673SSatish Balay } 3665bd3b8fbSHong Zhang stash->n += n; 36775cae7c1SHong Zhang space->local_used += n; 36875cae7c1SHong Zhang space->local_remaining -= n; 3694c1ff481SSatish Balay PetscFunctionReturn(0); 3704c1ff481SSatish Balay } 3714c1ff481SSatish Balay 3724c1ff481SSatish Balay /* 3738798bf22SSatish Balay MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 3744c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 3754c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3764c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3774c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3784c1ff481SSatish Balay 3794c1ff481SSatish Balay Input Parameters: 3804c1ff481SSatish Balay stash - the stash 3814c1ff481SSatish Balay row - the global block-row correspoiding to the values 3824c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3834c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3844c1ff481SSatish Balay values. Each block is of size bs*bs. 3854c1ff481SSatish Balay values - the values inserted 3864c1ff481SSatish Balay rmax - the number of block-rows in the original block. 3874c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 3884c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3894c1ff481SSatish Balay */ 3904a2ae208SSatish Balay #undef __FUNCT__ 3914a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesColBlocked_Private" 39254f21887SBarry Smith PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 3934c1ff481SSatish Balay { 394dfbe8321SBarry Smith PetscErrorCode ierr; 39575cae7c1SHong Zhang PetscInt i,j,k,bs2,bs=stash->bs,l; 39654f21887SBarry Smith const PetscScalar *vals; 39754f21887SBarry Smith PetscScalar *array; 39875cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 3994c1ff481SSatish Balay 4004c1ff481SSatish Balay PetscFunctionBegin; 40175cae7c1SHong Zhang if (!space || space->local_remaining < n){ 4028798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 4034c1ff481SSatish Balay } 40475cae7c1SHong Zhang space = stash->space; 40575cae7c1SHong Zhang l = space->local_used; 40675cae7c1SHong Zhang bs2 = bs*bs; 4074c1ff481SSatish Balay for (i=0; i<n; i++) { 40875cae7c1SHong Zhang space->idx[l] = row; 40975cae7c1SHong Zhang space->idy[l] = idxn[i]; 41075cae7c1SHong Zhang /* Now copy over the block of values. Store the values column oriented. 41175cae7c1SHong Zhang This enables inserting multiple blocks belonging to a row with a single 41275cae7c1SHong Zhang funtion call */ 41375cae7c1SHong Zhang array = space->val + bs2*l; 41475cae7c1SHong Zhang vals = values + idx*bs2*n + bs*i; 41575cae7c1SHong Zhang for (j=0; j<bs; j++) { 41675cae7c1SHong Zhang for (k=0; k<bs; k++) {array[k] = vals[k];} 41775cae7c1SHong Zhang array += bs; 41875cae7c1SHong Zhang vals += rmax*bs; 41975cae7c1SHong Zhang } 4205bd3b8fbSHong Zhang l++; 421a2d1c673SSatish Balay } 4225bd3b8fbSHong Zhang stash->n += n; 42375cae7c1SHong Zhang space->local_used += n; 42475cae7c1SHong Zhang space->local_remaining -= n; 4253a40ed3dSBarry Smith PetscFunctionReturn(0); 4269417f4adSLois Curfman McInnes } 4274c1ff481SSatish Balay /* 4288798bf22SSatish Balay MatStashScatterBegin_Private - Initiates the transfer of values to the 4294c1ff481SSatish Balay correct owners. This function goes through the stash, and check the 4304c1ff481SSatish Balay owners of each stashed value, and sends the values off to the owner 4314c1ff481SSatish Balay processors. 432bc5ccf88SSatish Balay 4334c1ff481SSatish Balay Input Parameters: 4344c1ff481SSatish Balay stash - the stash 4354c1ff481SSatish Balay owners - an array of size 'no-of-procs' which gives the ownership range 4364c1ff481SSatish Balay for each node. 4374c1ff481SSatish Balay 4384c1ff481SSatish Balay Notes: The 'owners' array in the cased of the blocked-stash has the 4394c1ff481SSatish Balay ranges specified blocked global indices, and for the regular stash in 4404c1ff481SSatish Balay the proper global indices. 4414c1ff481SSatish Balay */ 4424a2ae208SSatish Balay #undef __FUNCT__ 4434a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterBegin_Private" 4441e2582c4SBarry Smith PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners) 445bc5ccf88SSatish Balay { 446c1ac3661SBarry Smith PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 447fe09c992SBarry Smith PetscInt size=stash->size,nsends; 4486849ba73SBarry Smith PetscErrorCode ierr; 44975cae7c1SHong Zhang PetscInt count,*sindices,**rindices,i,j,idx,lastidx,l; 45054f21887SBarry Smith PetscScalar **rvalues,*svalues; 451bc5ccf88SSatish Balay MPI_Comm comm = stash->comm; 452563fb871SSatish Balay MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 453fe09c992SBarry Smith PetscMPIInt *nprocs,*nlengths,nreceives; 4545bd3b8fbSHong Zhang PetscInt *sp_idx,*sp_idy; 45554f21887SBarry Smith PetscScalar *sp_val; 4565bd3b8fbSHong Zhang PetscMatStashSpace space,space_next; 457bc5ccf88SSatish Balay 458bc5ccf88SSatish Balay PetscFunctionBegin; 4594c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 46075cae7c1SHong Zhang 461bc5ccf88SSatish Balay /* first count number of contributors to each processor */ 462c05d87d6SBarry Smith ierr = PetscMalloc(size*sizeof(PetscMPIInt),&nprocs);CHKERRQ(ierr); 463c05d87d6SBarry Smith ierr = PetscMemzero(nprocs,size*sizeof(PetscMPIInt));CHKERRQ(ierr); 464c05d87d6SBarry Smith ierr = PetscMalloc(size*sizeof(PetscMPIInt),&nlengths);CHKERRQ(ierr); 465c05d87d6SBarry Smith ierr = PetscMemzero(nlengths,size*sizeof(PetscMPIInt));CHKERRQ(ierr); 466c1ac3661SBarry Smith ierr = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); 467a2d1c673SSatish Balay 46875cae7c1SHong Zhang i = j = 0; 4697357eb19SBarry Smith lastidx = -1; 4705bd3b8fbSHong Zhang space = stash->space_head; 47175cae7c1SHong Zhang while (space != PETSC_NULL){ 47275cae7c1SHong Zhang space_next = space->next; 4735bd3b8fbSHong Zhang sp_idx = space->idx; 47475cae7c1SHong Zhang for (l=0; l<space->local_used; l++){ 4757357eb19SBarry Smith /* if indices are NOT locally sorted, need to start search at the beginning */ 4765bd3b8fbSHong Zhang if (lastidx > (idx = sp_idx[l])) j = 0; 4777357eb19SBarry Smith lastidx = idx; 4787357eb19SBarry Smith for (; j<size; j++) { 4794c1ff481SSatish Balay if (idx >= owners[j] && idx < owners[j+1]) { 480563fb871SSatish Balay nlengths[j]++; owner[i] = j; break; 481bc5ccf88SSatish Balay } 482bc5ccf88SSatish Balay } 48375cae7c1SHong Zhang i++; 48475cae7c1SHong Zhang } 48575cae7c1SHong Zhang space = space_next; 486bc5ccf88SSatish Balay } 487563fb871SSatish Balay /* Now check what procs get messages - and compute nsends. */ 488563fb871SSatish Balay for (i=0, nsends=0 ; i<size; i++) { 489563fb871SSatish Balay if (nlengths[i]) { nprocs[i] = 1; nsends ++;} 490563fb871SSatish Balay } 491bc5ccf88SSatish Balay 49254f21887SBarry Smith {PetscMPIInt *onodes,*olengths; 493563fb871SSatish Balay /* Determine the number of messages to expect, their lengths, from from-ids */ 494563fb871SSatish Balay ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr); 495563fb871SSatish Balay ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr); 496563fb871SSatish Balay /* since clubbing row,col - lengths are multiplied by 2 */ 497563fb871SSatish Balay for (i=0; i<nreceives; i++) olengths[i] *=2; 498563fb871SSatish Balay ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr); 499563fb871SSatish Balay /* values are size 'bs2' lengths (and remove earlier factor 2 */ 500563fb871SSatish Balay for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 501563fb871SSatish Balay ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr); 502563fb871SSatish Balay ierr = PetscFree(onodes);CHKERRQ(ierr); 503563fb871SSatish Balay ierr = PetscFree(olengths);CHKERRQ(ierr); 504bc5ccf88SSatish Balay } 505bc5ccf88SSatish Balay 506bc5ccf88SSatish Balay /* do sends: 507bc5ccf88SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 508bc5ccf88SSatish Balay the ith processor 509bc5ccf88SSatish Balay */ 510c05d87d6SBarry Smith ierr = PetscMalloc2(bs2*stash->n,PetscScalar,&svalues,2*(stash->n+1),PetscInt,&sindices);CHKERRQ(ierr); 511*533163c2SBarry Smith ierr = PetscMalloc(2*nsends*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 512c05d87d6SBarry Smith ierr = PetscMalloc2(size,PetscInt,&startv,size,PetscInt,&starti);CHKERRQ(ierr); 513a2d1c673SSatish Balay /* use 2 sends the first with all_a, the next with all_i and all_j */ 514bc5ccf88SSatish Balay startv[0] = 0; starti[0] = 0; 515bc5ccf88SSatish Balay for (i=1; i<size; i++) { 516563fb871SSatish Balay startv[i] = startv[i-1] + nlengths[i-1]; 517*533163c2SBarry Smith starti[i] = starti[i-1] + 2*nlengths[i-1]; 518bc5ccf88SSatish Balay } 51975cae7c1SHong Zhang 52075cae7c1SHong Zhang i = 0; 5215bd3b8fbSHong Zhang space = stash->space_head; 52275cae7c1SHong Zhang while (space != PETSC_NULL){ 52375cae7c1SHong Zhang space_next = space->next; 5245bd3b8fbSHong Zhang sp_idx = space->idx; 5255bd3b8fbSHong Zhang sp_idy = space->idy; 5265bd3b8fbSHong Zhang sp_val = space->val; 52775cae7c1SHong Zhang for (l=0; l<space->local_used; l++){ 528bc5ccf88SSatish Balay j = owner[i]; 529a2d1c673SSatish Balay if (bs2 == 1) { 5305bd3b8fbSHong Zhang svalues[startv[j]] = sp_val[l]; 531a2d1c673SSatish Balay } else { 532c1ac3661SBarry Smith PetscInt k; 53354f21887SBarry Smith PetscScalar *buf1,*buf2; 5344c1ff481SSatish Balay buf1 = svalues+bs2*startv[j]; 535b087b6d6SSatish Balay buf2 = space->val + bs2*l; 5364c1ff481SSatish Balay for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; } 537a2d1c673SSatish Balay } 5385bd3b8fbSHong Zhang sindices[starti[j]] = sp_idx[l]; 5395bd3b8fbSHong Zhang sindices[starti[j]+nlengths[j]] = sp_idy[l]; 540bc5ccf88SSatish Balay startv[j]++; 541bc5ccf88SSatish Balay starti[j]++; 54275cae7c1SHong Zhang i++; 54375cae7c1SHong Zhang } 54475cae7c1SHong Zhang space = space_next; 545bc5ccf88SSatish Balay } 546bc5ccf88SSatish Balay startv[0] = 0; 547563fb871SSatish Balay for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];} 548e5d0e772SSatish Balay 549bc5ccf88SSatish Balay for (i=0,count=0; i<size; i++) { 550563fb871SSatish Balay if (nprocs[i]) { 551563fb871SSatish Balay ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr); 552a77337e4SBarry Smith ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr); 553bc5ccf88SSatish Balay } 554b85c94c3SSatish Balay } 5556cf91177SBarry Smith #if defined(PETSC_USE_INFO) 5561e2582c4SBarry Smith ierr = PetscInfo1(mat,"No of messages: %d \n",nsends);CHKERRQ(ierr); 557e5d0e772SSatish Balay for (i=0; i<size; i++) { 558e5d0e772SSatish Balay if (nprocs[i]) { 559a77337e4SBarry Smith ierr = PetscInfo2(mat,"Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(PetscScalar)+2*sizeof(PetscInt));CHKERRQ(ierr); 560e5d0e772SSatish Balay } 561e5d0e772SSatish Balay } 562e5d0e772SSatish Balay #endif 563c05d87d6SBarry Smith ierr = PetscFree(nlengths);CHKERRQ(ierr); 564606d414cSSatish Balay ierr = PetscFree(owner);CHKERRQ(ierr); 565c05d87d6SBarry Smith ierr = PetscFree2(startv,starti);CHKERRQ(ierr); 566c05d87d6SBarry Smith ierr = PetscFree(nprocs);CHKERRQ(ierr); 567a2d1c673SSatish Balay 568563fb871SSatish Balay /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 569*533163c2SBarry Smith ierr = PetscMalloc(2*nreceives*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 570563fb871SSatish Balay 571563fb871SSatish Balay for (i=0; i<nreceives; i++) { 572563fb871SSatish Balay recv_waits[2*i] = recv_waits1[i]; 573563fb871SSatish Balay recv_waits[2*i+1] = recv_waits2[i]; 574563fb871SSatish Balay } 575563fb871SSatish Balay stash->recv_waits = recv_waits; 576563fb871SSatish Balay ierr = PetscFree(recv_waits1);CHKERRQ(ierr); 577563fb871SSatish Balay ierr = PetscFree(recv_waits2);CHKERRQ(ierr); 578563fb871SSatish Balay 579c05d87d6SBarry Smith stash->svalues = svalues; 580c05d87d6SBarry Smith stash->sindices = sindices; 581c05d87d6SBarry Smith stash->rvalues = rvalues; 582c05d87d6SBarry Smith stash->rindices = rindices; 583c05d87d6SBarry Smith stash->send_waits = send_waits; 584c05d87d6SBarry Smith stash->nsends = nsends; 585c05d87d6SBarry Smith stash->nrecvs = nreceives; 586bc5ccf88SSatish Balay PetscFunctionReturn(0); 587bc5ccf88SSatish Balay } 588bc5ccf88SSatish Balay 589a2d1c673SSatish Balay /* 5908798bf22SSatish Balay MatStashScatterGetMesg_Private - This function waits on the receives posted 5918798bf22SSatish Balay in the function MatStashScatterBegin_Private() and returns one message at 5924c1ff481SSatish Balay a time to the calling function. If no messages are left, it indicates this 5934c1ff481SSatish Balay by setting flg = 0, else it sets flg = 1. 5944c1ff481SSatish Balay 5954c1ff481SSatish Balay Input Parameters: 5964c1ff481SSatish Balay stash - the stash 5974c1ff481SSatish Balay 5984c1ff481SSatish Balay Output Parameters: 5994c1ff481SSatish Balay nvals - the number of entries in the current message. 6004c1ff481SSatish Balay rows - an array of row indices (or blocked indices) corresponding to the values 6014c1ff481SSatish Balay cols - an array of columnindices (or blocked indices) corresponding to the values 6024c1ff481SSatish Balay vals - the values 6034c1ff481SSatish Balay flg - 0 indicates no more message left, and the current call has no values associated. 6044c1ff481SSatish Balay 1 indicates that the current call successfully received a message, and the 6054c1ff481SSatish Balay other output parameters nvals,rows,cols,vals are set appropriately. 606a2d1c673SSatish Balay */ 6074a2ae208SSatish Balay #undef __FUNCT__ 6084a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterGetMesg_Private" 60954f21887SBarry Smith PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,PetscScalar **vals,PetscInt *flg) 610bc5ccf88SSatish Balay { 6116849ba73SBarry Smith PetscErrorCode ierr; 612*533163c2SBarry Smith PetscMPIInt i,*flg_v = stash->flg_v,i1,i2; 613fe09c992SBarry Smith PetscInt bs2; 614a2d1c673SSatish Balay MPI_Status recv_status; 615b0a32e0cSBarry Smith PetscTruth match_found = PETSC_FALSE; 616bc5ccf88SSatish Balay 617bc5ccf88SSatish Balay PetscFunctionBegin; 618bc5ccf88SSatish Balay 619a2d1c673SSatish Balay *flg = 0; /* When a message is discovered this is reset to 1 */ 620a2d1c673SSatish Balay /* Return if no more messages to process */ 621a2d1c673SSatish Balay if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); } 622a2d1c673SSatish Balay 6234c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 624a2d1c673SSatish Balay /* If a matching pair of receieves are found, process them, and return the data to 625a2d1c673SSatish Balay the calling function. Until then keep receiving messages */ 626a2d1c673SSatish Balay while (!match_found) { 627*533163c2SBarry Smith CHKMEMQ; 628a2d1c673SSatish Balay ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr); 629*533163c2SBarry Smith CHKMEMQ; 630*533163c2SBarry Smith if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_ERR_PLIB,"Negative MPI source!"); 631*533163c2SBarry Smith 632a2d1c673SSatish Balay /* Now pack the received message into a structure which is useable by others */ 633a2d1c673SSatish Balay if (i % 2) { 634a77337e4SBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr); 635c1dc657dSBarry Smith flg_v[2*recv_status.MPI_SOURCE] = i/2; 636a2d1c673SSatish Balay *nvals = *nvals/bs2; 637563fb871SSatish Balay } else { 638563fb871SSatish Balay ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr); 639563fb871SSatish Balay flg_v[2*recv_status.MPI_SOURCE+1] = i/2; 640563fb871SSatish Balay *nvals = *nvals/2; /* This message has both row indices and col indices */ 641bc5ccf88SSatish Balay } 642a2d1c673SSatish Balay 643cb2b73ccSBarry Smith /* Check if we have both messages from this proc */ 644c1dc657dSBarry Smith i1 = flg_v[2*recv_status.MPI_SOURCE]; 645c1dc657dSBarry Smith i2 = flg_v[2*recv_status.MPI_SOURCE+1]; 646a2d1c673SSatish Balay if (i1 != -1 && i2 != -1) { 647563fb871SSatish Balay *rows = stash->rindices[i2]; 648a2d1c673SSatish Balay *cols = *rows + *nvals; 649563fb871SSatish Balay *vals = stash->rvalues[i1]; 650a2d1c673SSatish Balay *flg = 1; 651a2d1c673SSatish Balay stash->nprocessed ++; 65235d8aa7fSBarry Smith match_found = PETSC_TRUE; 653bc5ccf88SSatish Balay } 654bc5ccf88SSatish Balay } 655bc5ccf88SSatish Balay PetscFunctionReturn(0); 656bc5ccf88SSatish Balay } 657