12d5177cdSBarry Smith 2*c6db04a5SJed Brown #include <private/matimpl.h> 35bd3b8fbSHong Zhang 4bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE 10000 54c1ff481SSatish Balay 69417f4adSLois Curfman McInnes /* 78798bf22SSatish Balay MatStashCreate_Private - Creates a stash,currently used for all the parallel 84c1ff481SSatish Balay matrix implementations. The stash is where elements of a matrix destined 94c1ff481SSatish Balay to be stored on other processors are kept until matrix assembly is done. 109417f4adSLois Curfman McInnes 114c1ff481SSatish Balay This is a simple minded stash. Simply adds entries to end of stash. 124c1ff481SSatish Balay 134c1ff481SSatish Balay Input Parameters: 144c1ff481SSatish Balay comm - communicator, required for scatters. 154c1ff481SSatish Balay bs - stash block size. used when stashing blocks of values 164c1ff481SSatish Balay 174c1ff481SSatish Balay Output Parameters: 184c1ff481SSatish Balay stash - the newly created stash 199417f4adSLois Curfman McInnes */ 204a2ae208SSatish Balay #undef __FUNCT__ 214a2ae208SSatish Balay #define __FUNCT__ "MatStashCreate_Private" 22c1ac3661SBarry Smith PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash) 239417f4adSLois Curfman McInnes { 24dfbe8321SBarry Smith PetscErrorCode ierr; 25533163c2SBarry Smith PetscInt max,*opt,nopt,i; 26ace3abfcSBarry Smith PetscBool flg; 27bc5ccf88SSatish Balay 283a40ed3dSBarry Smith PetscFunctionBegin; 29bc5ccf88SSatish Balay /* Require 2 tags,get the second using PetscCommGetNewTag() */ 30752ec6e0SSatish Balay stash->comm = comm; 31752ec6e0SSatish Balay ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr); 32a2d1c673SSatish Balay ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr); 33a2d1c673SSatish Balay ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr); 34a2d1c673SSatish Balay ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr); 35533163c2SBarry Smith ierr = PetscMalloc(2*stash->size*sizeof(PetscMPIInt),&stash->flg_v);CHKERRQ(ierr); 36533163c2SBarry Smith for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 37533163c2SBarry Smith 38bc5ccf88SSatish Balay 39434d7ff9SSatish Balay nopt = stash->size; 40d7d82daaSBarry Smith ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr); 41b0a32e0cSBarry Smith ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr); 42434d7ff9SSatish Balay if (flg) { 43434d7ff9SSatish Balay if (nopt == 1) max = opt[0]; 44434d7ff9SSatish Balay else if (nopt == stash->size) max = opt[stash->rank]; 45434d7ff9SSatish Balay else if (stash->rank < nopt) max = opt[stash->rank]; 46f4ab19daSSatish Balay else max = 0; /* Use default */ 47434d7ff9SSatish Balay stash->umax = max; 48434d7ff9SSatish Balay } else { 49434d7ff9SSatish Balay stash->umax = 0; 50434d7ff9SSatish Balay } 51606d414cSSatish Balay ierr = PetscFree(opt);CHKERRQ(ierr); 524c1ff481SSatish Balay if (bs <= 0) bs = 1; 53a2d1c673SSatish Balay 544c1ff481SSatish Balay stash->bs = bs; 559417f4adSLois Curfman McInnes stash->nmax = 0; 56434d7ff9SSatish Balay stash->oldnmax = 0; 579417f4adSLois Curfman McInnes stash->n = 0; 584c1ff481SSatish Balay stash->reallocs = -1; 5975cae7c1SHong Zhang stash->space_head = 0; 6075cae7c1SHong Zhang stash->space = 0; 619417f4adSLois Curfman McInnes 62bc5ccf88SSatish Balay stash->send_waits = 0; 63bc5ccf88SSatish Balay stash->recv_waits = 0; 64a2d1c673SSatish Balay stash->send_status = 0; 65bc5ccf88SSatish Balay stash->nsends = 0; 66bc5ccf88SSatish Balay stash->nrecvs = 0; 67bc5ccf88SSatish Balay stash->svalues = 0; 68bc5ccf88SSatish Balay stash->rvalues = 0; 69563fb871SSatish Balay stash->rindices = 0; 70a2d1c673SSatish Balay stash->nprocessed = 0; 7167318a8aSJed Brown 7267318a8aSJed Brown stash->reproduce = PETSC_FALSE; 73acfcf0e5SJed Brown ierr = PetscOptionsGetBool(PETSC_NULL,"-matstash_reproduce",&stash->reproduce,PETSC_NULL);CHKERRQ(ierr); 743a40ed3dSBarry Smith PetscFunctionReturn(0); 759417f4adSLois Curfman McInnes } 769417f4adSLois Curfman McInnes 774c1ff481SSatish Balay /* 788798bf22SSatish Balay MatStashDestroy_Private - Destroy the stash 794c1ff481SSatish Balay */ 804a2ae208SSatish Balay #undef __FUNCT__ 814a2ae208SSatish Balay #define __FUNCT__ "MatStashDestroy_Private" 82dfbe8321SBarry Smith PetscErrorCode MatStashDestroy_Private(MatStash *stash) 839417f4adSLois Curfman McInnes { 84dfbe8321SBarry Smith PetscErrorCode ierr; 85a2d1c673SSatish Balay 86bc5ccf88SSatish Balay PetscFunctionBegin; 8775cae7c1SHong Zhang if (stash->space_head){ 8875cae7c1SHong Zhang ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr); 8975cae7c1SHong Zhang stash->space_head = 0; 9082740460SHong Zhang stash->space = 0; 9175cae7c1SHong Zhang } 92533163c2SBarry Smith ierr = PetscFree(stash->flg_v);CHKERRQ(ierr); 93bc5ccf88SSatish Balay PetscFunctionReturn(0); 94bc5ccf88SSatish Balay } 95bc5ccf88SSatish Balay 964c1ff481SSatish Balay /* 9767318a8aSJed Brown MatStashScatterEnd_Private - This is called as the final stage of 984c1ff481SSatish Balay scatter. The final stages of message passing is done here, and 9967318a8aSJed Brown all the memory used for message passing is cleaned up. This 1004c1ff481SSatish Balay routine also resets the stash, and deallocates the memory used 1014c1ff481SSatish Balay for the stash. It also keeps track of the current memory usage 1024c1ff481SSatish Balay so that the same value can be used the next time through. 1034c1ff481SSatish Balay */ 1044a2ae208SSatish Balay #undef __FUNCT__ 1054a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterEnd_Private" 106dfbe8321SBarry Smith PetscErrorCode MatStashScatterEnd_Private(MatStash *stash) 107bc5ccf88SSatish Balay { 1086849ba73SBarry Smith PetscErrorCode ierr; 109533163c2SBarry Smith PetscInt nsends=stash->nsends,bs2,oldnmax,i; 110a2d1c673SSatish Balay MPI_Status *send_status; 111a2d1c673SSatish Balay 1123a40ed3dSBarry Smith PetscFunctionBegin; 113533163c2SBarry Smith for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 114a2d1c673SSatish Balay /* wait on sends */ 115a2d1c673SSatish Balay if (nsends) { 11682502324SSatish Balay ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 117a2d1c673SSatish Balay ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr); 118606d414cSSatish Balay ierr = PetscFree(send_status);CHKERRQ(ierr); 119a2d1c673SSatish Balay } 120a2d1c673SSatish Balay 121c0c58ca7SSatish Balay /* Now update nmaxold to be app 10% more than max n used, this way the 122434d7ff9SSatish Balay wastage of space is reduced the next time this stash is used. 123434d7ff9SSatish Balay Also update the oldmax, only if it increases */ 124b9b97703SBarry Smith if (stash->n) { 12594b769a5SSatish Balay bs2 = stash->bs*stash->bs; 1268a9378f0SSatish Balay oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 127434d7ff9SSatish Balay if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 128b9b97703SBarry Smith } 129434d7ff9SSatish Balay 130d07ff455SSatish Balay stash->nmax = 0; 131d07ff455SSatish Balay stash->n = 0; 1324c1ff481SSatish Balay stash->reallocs = -1; 133a2d1c673SSatish Balay stash->nprocessed = 0; 13475cae7c1SHong Zhang if (stash->space_head){ 13575cae7c1SHong Zhang ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr); 13675cae7c1SHong Zhang stash->space_head = 0; 13782740460SHong Zhang stash->space = 0; 13875cae7c1SHong Zhang } 139606d414cSSatish Balay ierr = PetscFree(stash->send_waits);CHKERRQ(ierr); 140606d414cSSatish Balay ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr); 141c05d87d6SBarry Smith ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr); 142c05d87d6SBarry Smith ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr); 143606d414cSSatish Balay ierr = PetscFree(stash->rvalues);CHKERRQ(ierr); 144c05d87d6SBarry Smith ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr); 145563fb871SSatish Balay ierr = PetscFree(stash->rindices);CHKERRQ(ierr); 1463a40ed3dSBarry Smith PetscFunctionReturn(0); 1479417f4adSLois Curfman McInnes } 1489417f4adSLois Curfman McInnes 1494c1ff481SSatish Balay /* 1508798bf22SSatish Balay MatStashGetInfo_Private - Gets the relavant statistics of the stash 1514c1ff481SSatish Balay 1524c1ff481SSatish Balay Input Parameters: 1534c1ff481SSatish Balay stash - the stash 15494b769a5SSatish Balay nstash - the size of the stash. Indicates the number of values stored. 1554c1ff481SSatish Balay reallocs - the number of additional mallocs incurred. 1564c1ff481SSatish Balay 1574c1ff481SSatish Balay */ 1584a2ae208SSatish Balay #undef __FUNCT__ 1594a2ae208SSatish Balay #define __FUNCT__ "MatStashGetInfo_Private" 160c1ac3661SBarry Smith PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs) 16197530c3fSBarry Smith { 162c1ac3661SBarry Smith PetscInt bs2 = stash->bs*stash->bs; 16394b769a5SSatish Balay 1643a40ed3dSBarry Smith PetscFunctionBegin; 1651ecfd215SBarry Smith if (nstash) *nstash = stash->n*bs2; 1661ecfd215SBarry Smith if (reallocs) { 167434d7ff9SSatish Balay if (stash->reallocs < 0) *reallocs = 0; 168434d7ff9SSatish Balay else *reallocs = stash->reallocs; 1691ecfd215SBarry Smith } 170bc5ccf88SSatish Balay PetscFunctionReturn(0); 171bc5ccf88SSatish Balay } 1724c1ff481SSatish Balay 1734c1ff481SSatish Balay /* 1748798bf22SSatish Balay MatStashSetInitialSize_Private - Sets the initial size of the stash 1754c1ff481SSatish Balay 1764c1ff481SSatish Balay Input Parameters: 1774c1ff481SSatish Balay stash - the stash 1784c1ff481SSatish Balay max - the value that is used as the max size of the stash. 1794c1ff481SSatish Balay this value is used while allocating memory. 1804c1ff481SSatish Balay */ 1814a2ae208SSatish Balay #undef __FUNCT__ 1824a2ae208SSatish Balay #define __FUNCT__ "MatStashSetInitialSize_Private" 183c1ac3661SBarry Smith PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max) 184bc5ccf88SSatish Balay { 185bc5ccf88SSatish Balay PetscFunctionBegin; 186434d7ff9SSatish Balay stash->umax = max; 1873a40ed3dSBarry Smith PetscFunctionReturn(0); 18897530c3fSBarry Smith } 18997530c3fSBarry Smith 1908798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called 1914c1ff481SSatish Balay when the space in the stash is not sufficient to add the new values 1924c1ff481SSatish Balay being inserted into the stash. 1934c1ff481SSatish Balay 1944c1ff481SSatish Balay Input Parameters: 1954c1ff481SSatish Balay stash - the stash 1964c1ff481SSatish Balay incr - the minimum increase requested 1974c1ff481SSatish Balay 1984c1ff481SSatish Balay Notes: 1994c1ff481SSatish Balay This routine doubles the currently used memory. 2004c1ff481SSatish Balay */ 2014a2ae208SSatish Balay #undef __FUNCT__ 2024a2ae208SSatish Balay #define __FUNCT__ "MatStashExpand_Private" 203c1ac3661SBarry Smith static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr) 2049417f4adSLois Curfman McInnes { 2056849ba73SBarry Smith PetscErrorCode ierr; 2065bd3b8fbSHong Zhang PetscInt newnmax,bs2= stash->bs*stash->bs; 2079417f4adSLois Curfman McInnes 2083a40ed3dSBarry Smith PetscFunctionBegin; 2099417f4adSLois Curfman McInnes /* allocate a larger stash */ 210c481ceb5SSatish Balay if (!stash->oldnmax && !stash->nmax) { /* new stash */ 211434d7ff9SSatish Balay if (stash->umax) newnmax = stash->umax/bs2; 212434d7ff9SSatish Balay else newnmax = DEFAULT_STASH_SIZE/bs2; 213c481ceb5SSatish Balay } else if (!stash->nmax) { /* resuing stash */ 214434d7ff9SSatish Balay if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2; 215434d7ff9SSatish Balay else newnmax = stash->oldnmax/bs2; 216434d7ff9SSatish Balay } else newnmax = stash->nmax*2; 2174c1ff481SSatish Balay if (newnmax < (stash->nmax + incr)) newnmax += 2*incr; 218d07ff455SSatish Balay 21975cae7c1SHong Zhang /* Get a MatStashSpace and attach it to stash */ 22075cae7c1SHong Zhang ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr); 221b087b6d6SSatish Balay if (!stash->space_head) { /* new stash or resuing stash->oldnmax */ 222b087b6d6SSatish Balay stash->space_head = stash->space; 22375cae7c1SHong Zhang } 224b087b6d6SSatish Balay 225bc5ccf88SSatish Balay stash->reallocs++; 22675cae7c1SHong Zhang stash->nmax = newnmax; 227bc5ccf88SSatish Balay PetscFunctionReturn(0); 228bc5ccf88SSatish Balay } 229bc5ccf88SSatish Balay /* 2308798bf22SSatish Balay MatStashValuesRow_Private - inserts values into the stash. This function 2314c1ff481SSatish Balay expects the values to be roworiented. Multiple columns belong to the same row 2324c1ff481SSatish Balay can be inserted with a single call to this function. 2334c1ff481SSatish Balay 2344c1ff481SSatish Balay Input Parameters: 2354c1ff481SSatish Balay stash - the stash 2364c1ff481SSatish Balay row - the global row correspoiding to the values 2374c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2384c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2394c1ff481SSatish Balay values - the values inserted 240bc5ccf88SSatish Balay */ 2414a2ae208SSatish Balay #undef __FUNCT__ 2424a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRow_Private" 243ace3abfcSBarry Smith PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries) 244bc5ccf88SSatish Balay { 245dfbe8321SBarry Smith PetscErrorCode ierr; 246b400d20cSBarry Smith PetscInt i,k,cnt = 0; 24775cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 248bc5ccf88SSatish Balay 249bc5ccf88SSatish Balay PetscFunctionBegin; 2504c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 25175cae7c1SHong Zhang if (!space || space->local_remaining < n){ 2528798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 2539417f4adSLois Curfman McInnes } 25475cae7c1SHong Zhang space = stash->space; 25575cae7c1SHong Zhang k = space->local_used; 2564c1ff481SSatish Balay for (i=0; i<n; i++) { 25788c3974fSBarry Smith if (ignorezeroentries && (values[i] == 0.0)) continue; 25875cae7c1SHong Zhang space->idx[k] = row; 25975cae7c1SHong Zhang space->idy[k] = idxn[i]; 26075cae7c1SHong Zhang space->val[k] = values[i]; 26175cae7c1SHong Zhang k++; 262b400d20cSBarry Smith cnt++; 2639417f4adSLois Curfman McInnes } 264b400d20cSBarry Smith stash->n += cnt; 265b400d20cSBarry Smith space->local_used += cnt; 266b400d20cSBarry Smith space->local_remaining -= cnt; 267a2d1c673SSatish Balay PetscFunctionReturn(0); 268a2d1c673SSatish Balay } 26975cae7c1SHong Zhang 2704c1ff481SSatish Balay /* 2718798bf22SSatish Balay MatStashValuesCol_Private - inserts values into the stash. This function 2724c1ff481SSatish Balay expects the values to be columnoriented. Multiple columns belong to the same row 2734c1ff481SSatish Balay can be inserted with a single call to this function. 274a2d1c673SSatish Balay 2754c1ff481SSatish Balay Input Parameters: 2764c1ff481SSatish Balay stash - the stash 2774c1ff481SSatish Balay row - the global row correspoiding to the values 2784c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2794c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2804c1ff481SSatish Balay values - the values inserted 2814c1ff481SSatish Balay stepval - the consecutive values are sepated by a distance of stepval. 2824c1ff481SSatish Balay this happens because the input is columnoriented. 2834c1ff481SSatish Balay */ 2844a2ae208SSatish Balay #undef __FUNCT__ 2854a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesCol_Private" 286ace3abfcSBarry Smith PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries) 287a2d1c673SSatish Balay { 288dfbe8321SBarry Smith PetscErrorCode ierr; 28950e9ab7cSBarry Smith PetscInt i,k,cnt = 0; 29075cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 291a2d1c673SSatish Balay 2924c1ff481SSatish Balay PetscFunctionBegin; 2934c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 29475cae7c1SHong Zhang if (!space || space->local_remaining < n){ 2958798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 2964c1ff481SSatish Balay } 29775cae7c1SHong Zhang space = stash->space; 29875cae7c1SHong Zhang k = space->local_used; 2994c1ff481SSatish Balay for (i=0; i<n; i++) { 30088c3974fSBarry Smith if (ignorezeroentries && (values[i*stepval] == 0.0)) continue; 30175cae7c1SHong Zhang space->idx[k] = row; 30275cae7c1SHong Zhang space->idy[k] = idxn[i]; 30375cae7c1SHong Zhang space->val[k] = values[i*stepval]; 30475cae7c1SHong Zhang k++; 305b400d20cSBarry Smith cnt++; 3064c1ff481SSatish Balay } 307b400d20cSBarry Smith stash->n += cnt; 308b400d20cSBarry Smith space->local_used += cnt; 309b400d20cSBarry Smith space->local_remaining -= cnt; 3104c1ff481SSatish Balay PetscFunctionReturn(0); 3114c1ff481SSatish Balay } 3124c1ff481SSatish Balay 3134c1ff481SSatish Balay /* 3148798bf22SSatish Balay MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 3154c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 3164c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3174c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3184c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3194c1ff481SSatish Balay 3204c1ff481SSatish Balay Input Parameters: 3214c1ff481SSatish Balay stash - the stash 3224c1ff481SSatish Balay row - the global block-row correspoiding to the values 3234c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3244c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3254c1ff481SSatish Balay values. Each block is of size bs*bs. 3264c1ff481SSatish Balay values - the values inserted 3274c1ff481SSatish Balay rmax - the number of block-rows in the original block. 3284c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 3294c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3304c1ff481SSatish Balay */ 3314a2ae208SSatish Balay #undef __FUNCT__ 3324a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRowBlocked_Private" 33354f21887SBarry Smith PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 3344c1ff481SSatish Balay { 335dfbe8321SBarry Smith PetscErrorCode ierr; 33675cae7c1SHong Zhang PetscInt i,j,k,bs2,bs=stash->bs,l; 33754f21887SBarry Smith const PetscScalar *vals; 33854f21887SBarry Smith PetscScalar *array; 33975cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 340a2d1c673SSatish Balay 341a2d1c673SSatish Balay PetscFunctionBegin; 34275cae7c1SHong Zhang if (!space || space->local_remaining < n){ 3438798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 344a2d1c673SSatish Balay } 34575cae7c1SHong Zhang space = stash->space; 34675cae7c1SHong Zhang l = space->local_used; 34775cae7c1SHong Zhang bs2 = bs*bs; 3484c1ff481SSatish Balay for (i=0; i<n; i++) { 34975cae7c1SHong Zhang space->idx[l] = row; 35075cae7c1SHong Zhang space->idy[l] = idxn[i]; 35175cae7c1SHong Zhang /* Now copy over the block of values. Store the values column oriented. 35275cae7c1SHong Zhang This enables inserting multiple blocks belonging to a row with a single 35375cae7c1SHong Zhang funtion call */ 35475cae7c1SHong Zhang array = space->val + bs2*l; 35575cae7c1SHong Zhang vals = values + idx*bs2*n + bs*i; 35675cae7c1SHong Zhang for (j=0; j<bs; j++) { 35775cae7c1SHong Zhang for (k=0; k<bs; k++) array[k*bs] = vals[k]; 35875cae7c1SHong Zhang array++; 35975cae7c1SHong Zhang vals += cmax*bs; 36075cae7c1SHong Zhang } 36175cae7c1SHong Zhang l++; 362a2d1c673SSatish Balay } 3635bd3b8fbSHong Zhang stash->n += n; 36475cae7c1SHong Zhang space->local_used += n; 36575cae7c1SHong Zhang space->local_remaining -= n; 3664c1ff481SSatish Balay PetscFunctionReturn(0); 3674c1ff481SSatish Balay } 3684c1ff481SSatish Balay 3694c1ff481SSatish Balay /* 3708798bf22SSatish Balay MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 3714c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 3724c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3734c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3744c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3754c1ff481SSatish Balay 3764c1ff481SSatish Balay Input Parameters: 3774c1ff481SSatish Balay stash - the stash 3784c1ff481SSatish Balay row - the global block-row correspoiding to the values 3794c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3804c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3814c1ff481SSatish Balay values. Each block is of size bs*bs. 3824c1ff481SSatish Balay values - the values inserted 3834c1ff481SSatish Balay rmax - the number of block-rows in the original block. 3844c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 3854c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3864c1ff481SSatish Balay */ 3874a2ae208SSatish Balay #undef __FUNCT__ 3884a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesColBlocked_Private" 38954f21887SBarry Smith PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 3904c1ff481SSatish Balay { 391dfbe8321SBarry Smith PetscErrorCode ierr; 39275cae7c1SHong Zhang PetscInt i,j,k,bs2,bs=stash->bs,l; 39354f21887SBarry Smith const PetscScalar *vals; 39454f21887SBarry Smith PetscScalar *array; 39575cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 3964c1ff481SSatish Balay 3974c1ff481SSatish Balay PetscFunctionBegin; 39875cae7c1SHong Zhang if (!space || space->local_remaining < n){ 3998798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 4004c1ff481SSatish Balay } 40175cae7c1SHong Zhang space = stash->space; 40275cae7c1SHong Zhang l = space->local_used; 40375cae7c1SHong Zhang bs2 = bs*bs; 4044c1ff481SSatish Balay for (i=0; i<n; i++) { 40575cae7c1SHong Zhang space->idx[l] = row; 40675cae7c1SHong Zhang space->idy[l] = idxn[i]; 40775cae7c1SHong Zhang /* Now copy over the block of values. Store the values column oriented. 40875cae7c1SHong Zhang This enables inserting multiple blocks belonging to a row with a single 40975cae7c1SHong Zhang funtion call */ 41075cae7c1SHong Zhang array = space->val + bs2*l; 41175cae7c1SHong Zhang vals = values + idx*bs2*n + bs*i; 41275cae7c1SHong Zhang for (j=0; j<bs; j++) { 41375cae7c1SHong Zhang for (k=0; k<bs; k++) {array[k] = vals[k];} 41475cae7c1SHong Zhang array += bs; 41575cae7c1SHong Zhang vals += rmax*bs; 41675cae7c1SHong Zhang } 4175bd3b8fbSHong Zhang l++; 418a2d1c673SSatish Balay } 4195bd3b8fbSHong Zhang stash->n += n; 42075cae7c1SHong Zhang space->local_used += n; 42175cae7c1SHong Zhang space->local_remaining -= n; 4223a40ed3dSBarry Smith PetscFunctionReturn(0); 4239417f4adSLois Curfman McInnes } 4244c1ff481SSatish Balay /* 4258798bf22SSatish Balay MatStashScatterBegin_Private - Initiates the transfer of values to the 4264c1ff481SSatish Balay correct owners. This function goes through the stash, and check the 4274c1ff481SSatish Balay owners of each stashed value, and sends the values off to the owner 4284c1ff481SSatish Balay processors. 429bc5ccf88SSatish Balay 4304c1ff481SSatish Balay Input Parameters: 4314c1ff481SSatish Balay stash - the stash 4324c1ff481SSatish Balay owners - an array of size 'no-of-procs' which gives the ownership range 4334c1ff481SSatish Balay for each node. 4344c1ff481SSatish Balay 4354c1ff481SSatish Balay Notes: The 'owners' array in the cased of the blocked-stash has the 4364c1ff481SSatish Balay ranges specified blocked global indices, and for the regular stash in 4374c1ff481SSatish Balay the proper global indices. 4384c1ff481SSatish Balay */ 4394a2ae208SSatish Balay #undef __FUNCT__ 4404a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterBegin_Private" 4411e2582c4SBarry Smith PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners) 442bc5ccf88SSatish Balay { 443c1ac3661SBarry Smith PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 444fe09c992SBarry Smith PetscInt size=stash->size,nsends; 4456849ba73SBarry Smith PetscErrorCode ierr; 44675cae7c1SHong Zhang PetscInt count,*sindices,**rindices,i,j,idx,lastidx,l; 44754f21887SBarry Smith PetscScalar **rvalues,*svalues; 448bc5ccf88SSatish Balay MPI_Comm comm = stash->comm; 449563fb871SSatish Balay MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 450fe09c992SBarry Smith PetscMPIInt *nprocs,*nlengths,nreceives; 4515bd3b8fbSHong Zhang PetscInt *sp_idx,*sp_idy; 45254f21887SBarry Smith PetscScalar *sp_val; 4535bd3b8fbSHong Zhang PetscMatStashSpace space,space_next; 454bc5ccf88SSatish Balay 455bc5ccf88SSatish Balay PetscFunctionBegin; 4564c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 45775cae7c1SHong Zhang 458bc5ccf88SSatish Balay /* first count number of contributors to each processor */ 459c05d87d6SBarry Smith ierr = PetscMalloc(size*sizeof(PetscMPIInt),&nprocs);CHKERRQ(ierr); 460c05d87d6SBarry Smith ierr = PetscMemzero(nprocs,size*sizeof(PetscMPIInt));CHKERRQ(ierr); 461c05d87d6SBarry Smith ierr = PetscMalloc(size*sizeof(PetscMPIInt),&nlengths);CHKERRQ(ierr); 462c05d87d6SBarry Smith ierr = PetscMemzero(nlengths,size*sizeof(PetscMPIInt));CHKERRQ(ierr); 463c1ac3661SBarry Smith ierr = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); 464a2d1c673SSatish Balay 46575cae7c1SHong Zhang i = j = 0; 4667357eb19SBarry Smith lastidx = -1; 4675bd3b8fbSHong Zhang space = stash->space_head; 46875cae7c1SHong Zhang while (space != PETSC_NULL){ 46975cae7c1SHong Zhang space_next = space->next; 4705bd3b8fbSHong Zhang sp_idx = space->idx; 47175cae7c1SHong Zhang for (l=0; l<space->local_used; l++){ 4727357eb19SBarry Smith /* if indices are NOT locally sorted, need to start search at the beginning */ 4735bd3b8fbSHong Zhang if (lastidx > (idx = sp_idx[l])) j = 0; 4747357eb19SBarry Smith lastidx = idx; 4757357eb19SBarry Smith for (; j<size; j++) { 4764c1ff481SSatish Balay if (idx >= owners[j] && idx < owners[j+1]) { 477563fb871SSatish Balay nlengths[j]++; owner[i] = j; break; 478bc5ccf88SSatish Balay } 479bc5ccf88SSatish Balay } 48075cae7c1SHong Zhang i++; 48175cae7c1SHong Zhang } 48275cae7c1SHong Zhang space = space_next; 483bc5ccf88SSatish Balay } 484563fb871SSatish Balay /* Now check what procs get messages - and compute nsends. */ 485563fb871SSatish Balay for (i=0, nsends=0 ; i<size; i++) { 486563fb871SSatish Balay if (nlengths[i]) { nprocs[i] = 1; nsends ++;} 487563fb871SSatish Balay } 488bc5ccf88SSatish Balay 48954f21887SBarry Smith {PetscMPIInt *onodes,*olengths; 490563fb871SSatish Balay /* Determine the number of messages to expect, their lengths, from from-ids */ 491563fb871SSatish Balay ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr); 492563fb871SSatish Balay ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr); 493563fb871SSatish Balay /* since clubbing row,col - lengths are multiplied by 2 */ 494563fb871SSatish Balay for (i=0; i<nreceives; i++) olengths[i] *=2; 495563fb871SSatish Balay ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr); 496563fb871SSatish Balay /* values are size 'bs2' lengths (and remove earlier factor 2 */ 497563fb871SSatish Balay for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 498563fb871SSatish Balay ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr); 499563fb871SSatish Balay ierr = PetscFree(onodes);CHKERRQ(ierr); 500563fb871SSatish Balay ierr = PetscFree(olengths);CHKERRQ(ierr); 501bc5ccf88SSatish Balay } 502bc5ccf88SSatish Balay 503bc5ccf88SSatish Balay /* do sends: 504bc5ccf88SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 505bc5ccf88SSatish Balay the ith processor 506bc5ccf88SSatish Balay */ 507c05d87d6SBarry Smith ierr = PetscMalloc2(bs2*stash->n,PetscScalar,&svalues,2*(stash->n+1),PetscInt,&sindices);CHKERRQ(ierr); 508533163c2SBarry Smith ierr = PetscMalloc(2*nsends*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 509c05d87d6SBarry Smith ierr = PetscMalloc2(size,PetscInt,&startv,size,PetscInt,&starti);CHKERRQ(ierr); 510a2d1c673SSatish Balay /* use 2 sends the first with all_a, the next with all_i and all_j */ 511bc5ccf88SSatish Balay startv[0] = 0; starti[0] = 0; 512bc5ccf88SSatish Balay for (i=1; i<size; i++) { 513563fb871SSatish Balay startv[i] = startv[i-1] + nlengths[i-1]; 514533163c2SBarry Smith starti[i] = starti[i-1] + 2*nlengths[i-1]; 515bc5ccf88SSatish Balay } 51675cae7c1SHong Zhang 51775cae7c1SHong Zhang i = 0; 5185bd3b8fbSHong Zhang space = stash->space_head; 51975cae7c1SHong Zhang while (space != PETSC_NULL){ 52075cae7c1SHong Zhang space_next = space->next; 5215bd3b8fbSHong Zhang sp_idx = space->idx; 5225bd3b8fbSHong Zhang sp_idy = space->idy; 5235bd3b8fbSHong Zhang sp_val = space->val; 52475cae7c1SHong Zhang for (l=0; l<space->local_used; l++){ 525bc5ccf88SSatish Balay j = owner[i]; 526a2d1c673SSatish Balay if (bs2 == 1) { 5275bd3b8fbSHong Zhang svalues[startv[j]] = sp_val[l]; 528a2d1c673SSatish Balay } else { 529c1ac3661SBarry Smith PetscInt k; 53054f21887SBarry Smith PetscScalar *buf1,*buf2; 5314c1ff481SSatish Balay buf1 = svalues+bs2*startv[j]; 532b087b6d6SSatish Balay buf2 = space->val + bs2*l; 5334c1ff481SSatish Balay for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; } 534a2d1c673SSatish Balay } 5355bd3b8fbSHong Zhang sindices[starti[j]] = sp_idx[l]; 5365bd3b8fbSHong Zhang sindices[starti[j]+nlengths[j]] = sp_idy[l]; 537bc5ccf88SSatish Balay startv[j]++; 538bc5ccf88SSatish Balay starti[j]++; 53975cae7c1SHong Zhang i++; 54075cae7c1SHong Zhang } 54175cae7c1SHong Zhang space = space_next; 542bc5ccf88SSatish Balay } 543bc5ccf88SSatish Balay startv[0] = 0; 544563fb871SSatish Balay for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];} 545e5d0e772SSatish Balay 546bc5ccf88SSatish Balay for (i=0,count=0; i<size; i++) { 547563fb871SSatish Balay if (nprocs[i]) { 548563fb871SSatish Balay ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr); 549a77337e4SBarry Smith ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr); 550bc5ccf88SSatish Balay } 551b85c94c3SSatish Balay } 5526cf91177SBarry Smith #if defined(PETSC_USE_INFO) 5531e2582c4SBarry Smith ierr = PetscInfo1(mat,"No of messages: %d \n",nsends);CHKERRQ(ierr); 554e5d0e772SSatish Balay for (i=0; i<size; i++) { 555e5d0e772SSatish Balay if (nprocs[i]) { 556a77337e4SBarry Smith ierr = PetscInfo2(mat,"Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(PetscScalar)+2*sizeof(PetscInt));CHKERRQ(ierr); 557e5d0e772SSatish Balay } 558e5d0e772SSatish Balay } 559e5d0e772SSatish Balay #endif 560c05d87d6SBarry Smith ierr = PetscFree(nlengths);CHKERRQ(ierr); 561606d414cSSatish Balay ierr = PetscFree(owner);CHKERRQ(ierr); 562c05d87d6SBarry Smith ierr = PetscFree2(startv,starti);CHKERRQ(ierr); 563c05d87d6SBarry Smith ierr = PetscFree(nprocs);CHKERRQ(ierr); 564a2d1c673SSatish Balay 565563fb871SSatish Balay /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 566533163c2SBarry Smith ierr = PetscMalloc(2*nreceives*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 567563fb871SSatish Balay 568563fb871SSatish Balay for (i=0; i<nreceives; i++) { 569563fb871SSatish Balay recv_waits[2*i] = recv_waits1[i]; 570563fb871SSatish Balay recv_waits[2*i+1] = recv_waits2[i]; 571563fb871SSatish Balay } 572563fb871SSatish Balay stash->recv_waits = recv_waits; 573563fb871SSatish Balay ierr = PetscFree(recv_waits1);CHKERRQ(ierr); 574563fb871SSatish Balay ierr = PetscFree(recv_waits2);CHKERRQ(ierr); 575563fb871SSatish Balay 576c05d87d6SBarry Smith stash->svalues = svalues; 577c05d87d6SBarry Smith stash->sindices = sindices; 578c05d87d6SBarry Smith stash->rvalues = rvalues; 579c05d87d6SBarry Smith stash->rindices = rindices; 580c05d87d6SBarry Smith stash->send_waits = send_waits; 581c05d87d6SBarry Smith stash->nsends = nsends; 582c05d87d6SBarry Smith stash->nrecvs = nreceives; 58367318a8aSJed Brown stash->reproduce_count = 0; 584bc5ccf88SSatish Balay PetscFunctionReturn(0); 585bc5ccf88SSatish Balay } 586bc5ccf88SSatish Balay 587a2d1c673SSatish Balay /* 5888798bf22SSatish Balay MatStashScatterGetMesg_Private - This function waits on the receives posted 5898798bf22SSatish Balay in the function MatStashScatterBegin_Private() and returns one message at 5904c1ff481SSatish Balay a time to the calling function. If no messages are left, it indicates this 5914c1ff481SSatish Balay by setting flg = 0, else it sets flg = 1. 5924c1ff481SSatish Balay 5934c1ff481SSatish Balay Input Parameters: 5944c1ff481SSatish Balay stash - the stash 5954c1ff481SSatish Balay 5964c1ff481SSatish Balay Output Parameters: 5974c1ff481SSatish Balay nvals - the number of entries in the current message. 5984c1ff481SSatish Balay rows - an array of row indices (or blocked indices) corresponding to the values 5994c1ff481SSatish Balay cols - an array of columnindices (or blocked indices) corresponding to the values 6004c1ff481SSatish Balay vals - the values 6014c1ff481SSatish Balay flg - 0 indicates no more message left, and the current call has no values associated. 6024c1ff481SSatish Balay 1 indicates that the current call successfully received a message, and the 6034c1ff481SSatish Balay other output parameters nvals,rows,cols,vals are set appropriately. 604a2d1c673SSatish Balay */ 6054a2ae208SSatish Balay #undef __FUNCT__ 6064a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterGetMesg_Private" 60754f21887SBarry Smith PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,PetscScalar **vals,PetscInt *flg) 608bc5ccf88SSatish Balay { 6096849ba73SBarry Smith PetscErrorCode ierr; 610533163c2SBarry Smith PetscMPIInt i,*flg_v = stash->flg_v,i1,i2; 611fe09c992SBarry Smith PetscInt bs2; 612a2d1c673SSatish Balay MPI_Status recv_status; 613ace3abfcSBarry Smith PetscBool match_found = PETSC_FALSE; 614bc5ccf88SSatish Balay 615bc5ccf88SSatish Balay PetscFunctionBegin; 616bc5ccf88SSatish Balay 617a2d1c673SSatish Balay *flg = 0; /* When a message is discovered this is reset to 1 */ 618a2d1c673SSatish Balay /* Return if no more messages to process */ 619a2d1c673SSatish Balay if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); } 620a2d1c673SSatish Balay 6214c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 62267318a8aSJed Brown /* If a matching pair of receives are found, process them, and return the data to 623a2d1c673SSatish Balay the calling function. Until then keep receiving messages */ 624a2d1c673SSatish Balay while (!match_found) { 625533163c2SBarry Smith CHKMEMQ; 62667318a8aSJed Brown if (stash->reproduce) { 62767318a8aSJed Brown i = stash->reproduce_count++; 62867318a8aSJed Brown ierr = MPI_Wait(stash->recv_waits+i,&recv_status);CHKERRQ(ierr); 62967318a8aSJed Brown } else { 630a2d1c673SSatish Balay ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr); 63167318a8aSJed Brown } 632533163c2SBarry Smith CHKMEMQ; 633e32f2f54SBarry Smith if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!"); 634533163c2SBarry Smith 63567318a8aSJed Brown /* Now pack the received message into a structure which is usable by others */ 636a2d1c673SSatish Balay if (i % 2) { 637a77337e4SBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr); 638c1dc657dSBarry Smith flg_v[2*recv_status.MPI_SOURCE] = i/2; 639a2d1c673SSatish Balay *nvals = *nvals/bs2; 640563fb871SSatish Balay } else { 641563fb871SSatish Balay ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr); 642563fb871SSatish Balay flg_v[2*recv_status.MPI_SOURCE+1] = i/2; 643563fb871SSatish Balay *nvals = *nvals/2; /* This message has both row indices and col indices */ 644bc5ccf88SSatish Balay } 645a2d1c673SSatish Balay 646cb2b73ccSBarry Smith /* Check if we have both messages from this proc */ 647c1dc657dSBarry Smith i1 = flg_v[2*recv_status.MPI_SOURCE]; 648c1dc657dSBarry Smith i2 = flg_v[2*recv_status.MPI_SOURCE+1]; 649a2d1c673SSatish Balay if (i1 != -1 && i2 != -1) { 650563fb871SSatish Balay *rows = stash->rindices[i2]; 651a2d1c673SSatish Balay *cols = *rows + *nvals; 652563fb871SSatish Balay *vals = stash->rvalues[i1]; 653a2d1c673SSatish Balay *flg = 1; 654a2d1c673SSatish Balay stash->nprocessed ++; 65535d8aa7fSBarry Smith match_found = PETSC_TRUE; 656bc5ccf88SSatish Balay } 657bc5ccf88SSatish Balay } 658bc5ccf88SSatish Balay PetscFunctionReturn(0); 659bc5ccf88SSatish Balay } 660