12d5177cdSBarry Smith 2af0996ceSBarry Smith #include <petsc/private/matimpl.h> 35bd3b8fbSHong Zhang 4bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE 10000 54c1ff481SSatish Balay 6ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterBegin_Ref(Mat,MatStash*,PetscInt*); 7ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*); 8ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterEnd_Ref(MatStash*); 9d7d60843SJed Brown static PetscErrorCode MatStashScatterBegin_BTS(Mat,MatStash*,PetscInt*); 10d7d60843SJed Brown static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*); 11d7d60843SJed Brown static PetscErrorCode MatStashScatterEnd_BTS(MatStash*); 12d7d60843SJed Brown static PetscErrorCode MatStashScatterDestroy_BTS(MatStash*); 13d7d60843SJed Brown 149417f4adSLois Curfman McInnes /* 158798bf22SSatish Balay MatStashCreate_Private - Creates a stash,currently used for all the parallel 164c1ff481SSatish Balay matrix implementations. The stash is where elements of a matrix destined 174c1ff481SSatish Balay to be stored on other processors are kept until matrix assembly is done. 189417f4adSLois Curfman McInnes 194c1ff481SSatish Balay This is a simple minded stash. Simply adds entries to end of stash. 204c1ff481SSatish Balay 214c1ff481SSatish Balay Input Parameters: 224c1ff481SSatish Balay comm - communicator, required for scatters. 234c1ff481SSatish Balay bs - stash block size. used when stashing blocks of values 244c1ff481SSatish Balay 254c1ff481SSatish Balay Output Parameters: 264c1ff481SSatish Balay stash - the newly created stash 279417f4adSLois Curfman McInnes */ 28c1ac3661SBarry Smith PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash) 299417f4adSLois Curfman McInnes { 30dfbe8321SBarry Smith PetscErrorCode ierr; 31533163c2SBarry Smith PetscInt max,*opt,nopt,i; 32ace3abfcSBarry Smith PetscBool flg; 33bc5ccf88SSatish Balay 343a40ed3dSBarry Smith PetscFunctionBegin; 35bc5ccf88SSatish Balay /* Require 2 tags,get the second using PetscCommGetNewTag() */ 36752ec6e0SSatish Balay stash->comm = comm; 378865f1eaSKarl Rupp 38752ec6e0SSatish Balay ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr); 39a2d1c673SSatish Balay ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr); 40a2d1c673SSatish Balay ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr); 41a2d1c673SSatish Balay ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr); 42785e854fSJed Brown ierr = PetscMalloc1(2*stash->size,&stash->flg_v);CHKERRQ(ierr); 43533163c2SBarry Smith for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 44533163c2SBarry Smith 45bc5ccf88SSatish Balay 46434d7ff9SSatish Balay nopt = stash->size; 47785e854fSJed Brown ierr = PetscMalloc1(nopt,&opt);CHKERRQ(ierr); 48c5929fdfSBarry Smith ierr = PetscOptionsGetIntArray(NULL,NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr); 49434d7ff9SSatish Balay if (flg) { 50434d7ff9SSatish Balay if (nopt == 1) max = opt[0]; 51434d7ff9SSatish Balay else if (nopt == stash->size) max = opt[stash->rank]; 52434d7ff9SSatish Balay else if (stash->rank < nopt) max = opt[stash->rank]; 53f4ab19daSSatish Balay else max = 0; /* Use default */ 54434d7ff9SSatish Balay stash->umax = max; 55434d7ff9SSatish Balay } else { 56434d7ff9SSatish Balay stash->umax = 0; 57434d7ff9SSatish Balay } 58606d414cSSatish Balay ierr = PetscFree(opt);CHKERRQ(ierr); 594c1ff481SSatish Balay if (bs <= 0) bs = 1; 60a2d1c673SSatish Balay 614c1ff481SSatish Balay stash->bs = bs; 629417f4adSLois Curfman McInnes stash->nmax = 0; 63434d7ff9SSatish Balay stash->oldnmax = 0; 649417f4adSLois Curfman McInnes stash->n = 0; 654c1ff481SSatish Balay stash->reallocs = -1; 6675cae7c1SHong Zhang stash->space_head = 0; 6775cae7c1SHong Zhang stash->space = 0; 689417f4adSLois Curfman McInnes 69bc5ccf88SSatish Balay stash->send_waits = 0; 70bc5ccf88SSatish Balay stash->recv_waits = 0; 71a2d1c673SSatish Balay stash->send_status = 0; 72bc5ccf88SSatish Balay stash->nsends = 0; 73bc5ccf88SSatish Balay stash->nrecvs = 0; 74bc5ccf88SSatish Balay stash->svalues = 0; 75bc5ccf88SSatish Balay stash->rvalues = 0; 76563fb871SSatish Balay stash->rindices = 0; 77a2d1c673SSatish Balay stash->nprocessed = 0; 7867318a8aSJed Brown stash->reproduce = PETSC_FALSE; 79d7d60843SJed Brown stash->blocktype = MPI_DATATYPE_NULL; 808865f1eaSKarl Rupp 81c5929fdfSBarry Smith ierr = PetscOptionsGetBool(NULL,NULL,"-matstash_reproduce",&stash->reproduce,NULL);CHKERRQ(ierr); 82*b30fb036SBarry Smith ierr = PetscOptionsGetBool(NULL,NULL,"-matstash_legacy",&flg,NULL);CHKERRQ(ierr); 83*b30fb036SBarry Smith if (!flg) { 84d7d60843SJed Brown stash->ScatterBegin = MatStashScatterBegin_BTS; 85d7d60843SJed Brown stash->ScatterGetMesg = MatStashScatterGetMesg_BTS; 86d7d60843SJed Brown stash->ScatterEnd = MatStashScatterEnd_BTS; 87d7d60843SJed Brown stash->ScatterDestroy = MatStashScatterDestroy_BTS; 88ac2b2aa0SJed Brown } else { 89ac2b2aa0SJed Brown stash->ScatterBegin = MatStashScatterBegin_Ref; 90ac2b2aa0SJed Brown stash->ScatterGetMesg = MatStashScatterGetMesg_Ref; 91ac2b2aa0SJed Brown stash->ScatterEnd = MatStashScatterEnd_Ref; 92ac2b2aa0SJed Brown stash->ScatterDestroy = NULL; 93ac2b2aa0SJed Brown } 943a40ed3dSBarry Smith PetscFunctionReturn(0); 959417f4adSLois Curfman McInnes } 969417f4adSLois Curfman McInnes 974c1ff481SSatish Balay /* 988798bf22SSatish Balay MatStashDestroy_Private - Destroy the stash 994c1ff481SSatish Balay */ 100dfbe8321SBarry Smith PetscErrorCode MatStashDestroy_Private(MatStash *stash) 1019417f4adSLois Curfman McInnes { 102dfbe8321SBarry Smith PetscErrorCode ierr; 103a2d1c673SSatish Balay 104bc5ccf88SSatish Balay PetscFunctionBegin; 1056bf464f9SBarry Smith ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr); 106ac2b2aa0SJed Brown if (stash->ScatterDestroy) {ierr = (*stash->ScatterDestroy)(stash);CHKERRQ(ierr);} 1078865f1eaSKarl Rupp 10882740460SHong Zhang stash->space = 0; 1098865f1eaSKarl Rupp 110533163c2SBarry Smith ierr = PetscFree(stash->flg_v);CHKERRQ(ierr); 111bc5ccf88SSatish Balay PetscFunctionReturn(0); 112bc5ccf88SSatish Balay } 113bc5ccf88SSatish Balay 1144c1ff481SSatish Balay /* 11567318a8aSJed Brown MatStashScatterEnd_Private - This is called as the final stage of 1164c1ff481SSatish Balay scatter. The final stages of message passing is done here, and 11767318a8aSJed Brown all the memory used for message passing is cleaned up. This 1184c1ff481SSatish Balay routine also resets the stash, and deallocates the memory used 1194c1ff481SSatish Balay for the stash. It also keeps track of the current memory usage 1204c1ff481SSatish Balay so that the same value can be used the next time through. 1214c1ff481SSatish Balay */ 122dfbe8321SBarry Smith PetscErrorCode MatStashScatterEnd_Private(MatStash *stash) 123bc5ccf88SSatish Balay { 1246849ba73SBarry Smith PetscErrorCode ierr; 125ac2b2aa0SJed Brown 126ac2b2aa0SJed Brown PetscFunctionBegin; 127ac2b2aa0SJed Brown ierr = (*stash->ScatterEnd)(stash);CHKERRQ(ierr); 128ac2b2aa0SJed Brown PetscFunctionReturn(0); 129ac2b2aa0SJed Brown } 130ac2b2aa0SJed Brown 131ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash) 132ac2b2aa0SJed Brown { 133ac2b2aa0SJed Brown PetscErrorCode ierr; 134533163c2SBarry Smith PetscInt nsends=stash->nsends,bs2,oldnmax,i; 135a2d1c673SSatish Balay MPI_Status *send_status; 136a2d1c673SSatish Balay 1373a40ed3dSBarry Smith PetscFunctionBegin; 138533163c2SBarry Smith for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 139a2d1c673SSatish Balay /* wait on sends */ 140a2d1c673SSatish Balay if (nsends) { 141785e854fSJed Brown ierr = PetscMalloc1(2*nsends,&send_status);CHKERRQ(ierr); 142a2d1c673SSatish Balay ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr); 143606d414cSSatish Balay ierr = PetscFree(send_status);CHKERRQ(ierr); 144a2d1c673SSatish Balay } 145a2d1c673SSatish Balay 146c0c58ca7SSatish Balay /* Now update nmaxold to be app 10% more than max n used, this way the 147434d7ff9SSatish Balay wastage of space is reduced the next time this stash is used. 148434d7ff9SSatish Balay Also update the oldmax, only if it increases */ 149b9b97703SBarry Smith if (stash->n) { 15094b769a5SSatish Balay bs2 = stash->bs*stash->bs; 1518a9378f0SSatish Balay oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 152434d7ff9SSatish Balay if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 153b9b97703SBarry Smith } 154434d7ff9SSatish Balay 155d07ff455SSatish Balay stash->nmax = 0; 156d07ff455SSatish Balay stash->n = 0; 1574c1ff481SSatish Balay stash->reallocs = -1; 158a2d1c673SSatish Balay stash->nprocessed = 0; 1598865f1eaSKarl Rupp 1606bf464f9SBarry Smith ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr); 1618865f1eaSKarl Rupp 16282740460SHong Zhang stash->space = 0; 1638865f1eaSKarl Rupp 164606d414cSSatish Balay ierr = PetscFree(stash->send_waits);CHKERRQ(ierr); 165606d414cSSatish Balay ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr); 166c05d87d6SBarry Smith ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr); 167c05d87d6SBarry Smith ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr); 168606d414cSSatish Balay ierr = PetscFree(stash->rvalues);CHKERRQ(ierr); 169c05d87d6SBarry Smith ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr); 170563fb871SSatish Balay ierr = PetscFree(stash->rindices);CHKERRQ(ierr); 1713a40ed3dSBarry Smith PetscFunctionReturn(0); 1729417f4adSLois Curfman McInnes } 1739417f4adSLois Curfman McInnes 1744c1ff481SSatish Balay /* 1758798bf22SSatish Balay MatStashGetInfo_Private - Gets the relavant statistics of the stash 1764c1ff481SSatish Balay 1774c1ff481SSatish Balay Input Parameters: 1784c1ff481SSatish Balay stash - the stash 17994b769a5SSatish Balay nstash - the size of the stash. Indicates the number of values stored. 1804c1ff481SSatish Balay reallocs - the number of additional mallocs incurred. 1814c1ff481SSatish Balay 1824c1ff481SSatish Balay */ 183c1ac3661SBarry Smith PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs) 18497530c3fSBarry Smith { 185c1ac3661SBarry Smith PetscInt bs2 = stash->bs*stash->bs; 18694b769a5SSatish Balay 1873a40ed3dSBarry Smith PetscFunctionBegin; 1881ecfd215SBarry Smith if (nstash) *nstash = stash->n*bs2; 1891ecfd215SBarry Smith if (reallocs) { 190434d7ff9SSatish Balay if (stash->reallocs < 0) *reallocs = 0; 191434d7ff9SSatish Balay else *reallocs = stash->reallocs; 1921ecfd215SBarry Smith } 193bc5ccf88SSatish Balay PetscFunctionReturn(0); 194bc5ccf88SSatish Balay } 1954c1ff481SSatish Balay 1964c1ff481SSatish Balay /* 1978798bf22SSatish Balay MatStashSetInitialSize_Private - Sets the initial size of the stash 1984c1ff481SSatish Balay 1994c1ff481SSatish Balay Input Parameters: 2004c1ff481SSatish Balay stash - the stash 2014c1ff481SSatish Balay max - the value that is used as the max size of the stash. 2024c1ff481SSatish Balay this value is used while allocating memory. 2034c1ff481SSatish Balay */ 204c1ac3661SBarry Smith PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max) 205bc5ccf88SSatish Balay { 206bc5ccf88SSatish Balay PetscFunctionBegin; 207434d7ff9SSatish Balay stash->umax = max; 2083a40ed3dSBarry Smith PetscFunctionReturn(0); 20997530c3fSBarry Smith } 21097530c3fSBarry Smith 2118798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called 2124c1ff481SSatish Balay when the space in the stash is not sufficient to add the new values 2134c1ff481SSatish Balay being inserted into the stash. 2144c1ff481SSatish Balay 2154c1ff481SSatish Balay Input Parameters: 2164c1ff481SSatish Balay stash - the stash 2174c1ff481SSatish Balay incr - the minimum increase requested 2184c1ff481SSatish Balay 2194c1ff481SSatish Balay Notes: 2204c1ff481SSatish Balay This routine doubles the currently used memory. 2214c1ff481SSatish Balay */ 222c1ac3661SBarry Smith static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr) 2239417f4adSLois Curfman McInnes { 2246849ba73SBarry Smith PetscErrorCode ierr; 2255bd3b8fbSHong Zhang PetscInt newnmax,bs2= stash->bs*stash->bs; 2269417f4adSLois Curfman McInnes 2273a40ed3dSBarry Smith PetscFunctionBegin; 2289417f4adSLois Curfman McInnes /* allocate a larger stash */ 229c481ceb5SSatish Balay if (!stash->oldnmax && !stash->nmax) { /* new stash */ 230434d7ff9SSatish Balay if (stash->umax) newnmax = stash->umax/bs2; 231434d7ff9SSatish Balay else newnmax = DEFAULT_STASH_SIZE/bs2; 232c481ceb5SSatish Balay } else if (!stash->nmax) { /* resuing stash */ 233434d7ff9SSatish Balay if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2; 234434d7ff9SSatish Balay else newnmax = stash->oldnmax/bs2; 235434d7ff9SSatish Balay } else newnmax = stash->nmax*2; 2364c1ff481SSatish Balay if (newnmax < (stash->nmax + incr)) newnmax += 2*incr; 237d07ff455SSatish Balay 23875cae7c1SHong Zhang /* Get a MatStashSpace and attach it to stash */ 23975cae7c1SHong Zhang ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr); 240b087b6d6SSatish Balay if (!stash->space_head) { /* new stash or resuing stash->oldnmax */ 241b087b6d6SSatish Balay stash->space_head = stash->space; 24275cae7c1SHong Zhang } 243b087b6d6SSatish Balay 244bc5ccf88SSatish Balay stash->reallocs++; 24575cae7c1SHong Zhang stash->nmax = newnmax; 246bc5ccf88SSatish Balay PetscFunctionReturn(0); 247bc5ccf88SSatish Balay } 248bc5ccf88SSatish Balay /* 2498798bf22SSatish Balay MatStashValuesRow_Private - inserts values into the stash. This function 2504c1ff481SSatish Balay expects the values to be roworiented. Multiple columns belong to the same row 2514c1ff481SSatish Balay can be inserted with a single call to this function. 2524c1ff481SSatish Balay 2534c1ff481SSatish Balay Input Parameters: 2544c1ff481SSatish Balay stash - the stash 2554c1ff481SSatish Balay row - the global row correspoiding to the values 2564c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2574c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2584c1ff481SSatish Balay values - the values inserted 259bc5ccf88SSatish Balay */ 260ace3abfcSBarry Smith PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries) 261bc5ccf88SSatish Balay { 262dfbe8321SBarry Smith PetscErrorCode ierr; 263b400d20cSBarry Smith PetscInt i,k,cnt = 0; 26475cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 265bc5ccf88SSatish Balay 266bc5ccf88SSatish Balay PetscFunctionBegin; 2674c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 26875cae7c1SHong Zhang if (!space || space->local_remaining < n) { 2698798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 2709417f4adSLois Curfman McInnes } 27175cae7c1SHong Zhang space = stash->space; 27275cae7c1SHong Zhang k = space->local_used; 2734c1ff481SSatish Balay for (i=0; i<n; i++) { 27488c3974fSBarry Smith if (ignorezeroentries && (values[i] == 0.0)) continue; 27575cae7c1SHong Zhang space->idx[k] = row; 27675cae7c1SHong Zhang space->idy[k] = idxn[i]; 27775cae7c1SHong Zhang space->val[k] = values[i]; 27875cae7c1SHong Zhang k++; 279b400d20cSBarry Smith cnt++; 2809417f4adSLois Curfman McInnes } 281b400d20cSBarry Smith stash->n += cnt; 282b400d20cSBarry Smith space->local_used += cnt; 283b400d20cSBarry Smith space->local_remaining -= cnt; 284a2d1c673SSatish Balay PetscFunctionReturn(0); 285a2d1c673SSatish Balay } 28675cae7c1SHong Zhang 2874c1ff481SSatish Balay /* 2888798bf22SSatish Balay MatStashValuesCol_Private - inserts values into the stash. This function 2894c1ff481SSatish Balay expects the values to be columnoriented. Multiple columns belong to the same row 2904c1ff481SSatish Balay can be inserted with a single call to this function. 291a2d1c673SSatish Balay 2924c1ff481SSatish Balay Input Parameters: 2934c1ff481SSatish Balay stash - the stash 2944c1ff481SSatish Balay row - the global row correspoiding to the values 2954c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2964c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2974c1ff481SSatish Balay values - the values inserted 2984c1ff481SSatish Balay stepval - the consecutive values are sepated by a distance of stepval. 2994c1ff481SSatish Balay this happens because the input is columnoriented. 3004c1ff481SSatish Balay */ 301ace3abfcSBarry Smith PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries) 302a2d1c673SSatish Balay { 303dfbe8321SBarry Smith PetscErrorCode ierr; 30450e9ab7cSBarry Smith PetscInt i,k,cnt = 0; 30575cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 306a2d1c673SSatish Balay 3074c1ff481SSatish Balay PetscFunctionBegin; 3084c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 30975cae7c1SHong Zhang if (!space || space->local_remaining < n) { 3108798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 3114c1ff481SSatish Balay } 31275cae7c1SHong Zhang space = stash->space; 31375cae7c1SHong Zhang k = space->local_used; 3144c1ff481SSatish Balay for (i=0; i<n; i++) { 31588c3974fSBarry Smith if (ignorezeroentries && (values[i*stepval] == 0.0)) continue; 31675cae7c1SHong Zhang space->idx[k] = row; 31775cae7c1SHong Zhang space->idy[k] = idxn[i]; 31875cae7c1SHong Zhang space->val[k] = values[i*stepval]; 31975cae7c1SHong Zhang k++; 320b400d20cSBarry Smith cnt++; 3214c1ff481SSatish Balay } 322b400d20cSBarry Smith stash->n += cnt; 323b400d20cSBarry Smith space->local_used += cnt; 324b400d20cSBarry Smith space->local_remaining -= cnt; 3254c1ff481SSatish Balay PetscFunctionReturn(0); 3264c1ff481SSatish Balay } 3274c1ff481SSatish Balay 3284c1ff481SSatish Balay /* 3298798bf22SSatish Balay MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 3304c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 3314c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3324c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3334c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3344c1ff481SSatish Balay 3354c1ff481SSatish Balay Input Parameters: 3364c1ff481SSatish Balay stash - the stash 3374c1ff481SSatish Balay row - the global block-row correspoiding to the values 3384c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3394c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3404c1ff481SSatish Balay values. Each block is of size bs*bs. 3414c1ff481SSatish Balay values - the values inserted 3424c1ff481SSatish Balay rmax - the number of block-rows in the original block. 3434c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 3444c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3454c1ff481SSatish Balay */ 34654f21887SBarry Smith PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 3474c1ff481SSatish Balay { 348dfbe8321SBarry Smith PetscErrorCode ierr; 34975cae7c1SHong Zhang PetscInt i,j,k,bs2,bs=stash->bs,l; 35054f21887SBarry Smith const PetscScalar *vals; 35154f21887SBarry Smith PetscScalar *array; 35275cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 353a2d1c673SSatish Balay 354a2d1c673SSatish Balay PetscFunctionBegin; 35575cae7c1SHong Zhang if (!space || space->local_remaining < n) { 3568798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 357a2d1c673SSatish Balay } 35875cae7c1SHong Zhang space = stash->space; 35975cae7c1SHong Zhang l = space->local_used; 36075cae7c1SHong Zhang bs2 = bs*bs; 3614c1ff481SSatish Balay for (i=0; i<n; i++) { 36275cae7c1SHong Zhang space->idx[l] = row; 36375cae7c1SHong Zhang space->idy[l] = idxn[i]; 36475cae7c1SHong Zhang /* Now copy over the block of values. Store the values column oriented. 36575cae7c1SHong Zhang This enables inserting multiple blocks belonging to a row with a single 36675cae7c1SHong Zhang funtion call */ 36775cae7c1SHong Zhang array = space->val + bs2*l; 36875cae7c1SHong Zhang vals = values + idx*bs2*n + bs*i; 36975cae7c1SHong Zhang for (j=0; j<bs; j++) { 37075cae7c1SHong Zhang for (k=0; k<bs; k++) array[k*bs] = vals[k]; 37175cae7c1SHong Zhang array++; 37275cae7c1SHong Zhang vals += cmax*bs; 37375cae7c1SHong Zhang } 37475cae7c1SHong Zhang l++; 375a2d1c673SSatish Balay } 3765bd3b8fbSHong Zhang stash->n += n; 37775cae7c1SHong Zhang space->local_used += n; 37875cae7c1SHong Zhang space->local_remaining -= n; 3794c1ff481SSatish Balay PetscFunctionReturn(0); 3804c1ff481SSatish Balay } 3814c1ff481SSatish Balay 3824c1ff481SSatish Balay /* 3838798bf22SSatish Balay MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 3844c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 3854c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3864c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3874c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3884c1ff481SSatish Balay 3894c1ff481SSatish Balay Input Parameters: 3904c1ff481SSatish Balay stash - the stash 3914c1ff481SSatish Balay row - the global block-row correspoiding to the values 3924c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3934c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3944c1ff481SSatish Balay values. Each block is of size bs*bs. 3954c1ff481SSatish Balay values - the values inserted 3964c1ff481SSatish Balay rmax - the number of block-rows in the original block. 3974c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 3984c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3994c1ff481SSatish Balay */ 40054f21887SBarry Smith PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 4014c1ff481SSatish Balay { 402dfbe8321SBarry Smith PetscErrorCode ierr; 40375cae7c1SHong Zhang PetscInt i,j,k,bs2,bs=stash->bs,l; 40454f21887SBarry Smith const PetscScalar *vals; 40554f21887SBarry Smith PetscScalar *array; 40675cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 4074c1ff481SSatish Balay 4084c1ff481SSatish Balay PetscFunctionBegin; 40975cae7c1SHong Zhang if (!space || space->local_remaining < n) { 4108798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 4114c1ff481SSatish Balay } 41275cae7c1SHong Zhang space = stash->space; 41375cae7c1SHong Zhang l = space->local_used; 41475cae7c1SHong Zhang bs2 = bs*bs; 4154c1ff481SSatish Balay for (i=0; i<n; i++) { 41675cae7c1SHong Zhang space->idx[l] = row; 41775cae7c1SHong Zhang space->idy[l] = idxn[i]; 41875cae7c1SHong Zhang /* Now copy over the block of values. Store the values column oriented. 41975cae7c1SHong Zhang This enables inserting multiple blocks belonging to a row with a single 42075cae7c1SHong Zhang funtion call */ 42175cae7c1SHong Zhang array = space->val + bs2*l; 42275cae7c1SHong Zhang vals = values + idx*bs2*n + bs*i; 42375cae7c1SHong Zhang for (j=0; j<bs; j++) { 4248865f1eaSKarl Rupp for (k=0; k<bs; k++) array[k] = vals[k]; 42575cae7c1SHong Zhang array += bs; 42675cae7c1SHong Zhang vals += rmax*bs; 42775cae7c1SHong Zhang } 4285bd3b8fbSHong Zhang l++; 429a2d1c673SSatish Balay } 4305bd3b8fbSHong Zhang stash->n += n; 43175cae7c1SHong Zhang space->local_used += n; 43275cae7c1SHong Zhang space->local_remaining -= n; 4333a40ed3dSBarry Smith PetscFunctionReturn(0); 4349417f4adSLois Curfman McInnes } 4354c1ff481SSatish Balay /* 4368798bf22SSatish Balay MatStashScatterBegin_Private - Initiates the transfer of values to the 4374c1ff481SSatish Balay correct owners. This function goes through the stash, and check the 4384c1ff481SSatish Balay owners of each stashed value, and sends the values off to the owner 4394c1ff481SSatish Balay processors. 440bc5ccf88SSatish Balay 4414c1ff481SSatish Balay Input Parameters: 4424c1ff481SSatish Balay stash - the stash 4434c1ff481SSatish Balay owners - an array of size 'no-of-procs' which gives the ownership range 4444c1ff481SSatish Balay for each node. 4454c1ff481SSatish Balay 4464c1ff481SSatish Balay Notes: The 'owners' array in the cased of the blocked-stash has the 4474c1ff481SSatish Balay ranges specified blocked global indices, and for the regular stash in 4484c1ff481SSatish Balay the proper global indices. 4494c1ff481SSatish Balay */ 4501e2582c4SBarry Smith PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners) 451bc5ccf88SSatish Balay { 452ac2b2aa0SJed Brown PetscErrorCode ierr; 453ac2b2aa0SJed Brown 454ac2b2aa0SJed Brown PetscFunctionBegin; 455ac2b2aa0SJed Brown ierr = (*stash->ScatterBegin)(mat,stash,owners);CHKERRQ(ierr); 456ac2b2aa0SJed Brown PetscFunctionReturn(0); 457ac2b2aa0SJed Brown } 458ac2b2aa0SJed Brown 459ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterBegin_Ref(Mat mat,MatStash *stash,PetscInt *owners) 460ac2b2aa0SJed Brown { 461c1ac3661SBarry Smith PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 462fe09c992SBarry Smith PetscInt size=stash->size,nsends; 4636849ba73SBarry Smith PetscErrorCode ierr; 46475cae7c1SHong Zhang PetscInt count,*sindices,**rindices,i,j,idx,lastidx,l; 46554f21887SBarry Smith PetscScalar **rvalues,*svalues; 466bc5ccf88SSatish Balay MPI_Comm comm = stash->comm; 467563fb871SSatish Balay MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 46876ec1555SBarry Smith PetscMPIInt *sizes,*nlengths,nreceives; 4695bd3b8fbSHong Zhang PetscInt *sp_idx,*sp_idy; 47054f21887SBarry Smith PetscScalar *sp_val; 4715bd3b8fbSHong Zhang PetscMatStashSpace space,space_next; 472bc5ccf88SSatish Balay 473bc5ccf88SSatish Balay PetscFunctionBegin; 4744b4eb8d3SJed Brown { /* make sure all processors are either in INSERTMODE or ADDMODE */ 4754b4eb8d3SJed Brown InsertMode addv; 476b2566f29SBarry Smith ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4774b4eb8d3SJed Brown if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 4784b4eb8d3SJed Brown mat->insertmode = addv; /* in case this processor had no cache */ 4794b4eb8d3SJed Brown } 4804b4eb8d3SJed Brown 4814c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 48275cae7c1SHong Zhang 483bc5ccf88SSatish Balay /* first count number of contributors to each processor */ 484037dbc42SBarry Smith ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr); 4851795a4d1SJed Brown ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr); 486037dbc42SBarry Smith ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr); 487a2d1c673SSatish Balay 48875cae7c1SHong Zhang i = j = 0; 4897357eb19SBarry Smith lastidx = -1; 4905bd3b8fbSHong Zhang space = stash->space_head; 4916c4ed002SBarry Smith while (space) { 49275cae7c1SHong Zhang space_next = space->next; 4935bd3b8fbSHong Zhang sp_idx = space->idx; 49475cae7c1SHong Zhang for (l=0; l<space->local_used; l++) { 4957357eb19SBarry Smith /* if indices are NOT locally sorted, need to start search at the beginning */ 4965bd3b8fbSHong Zhang if (lastidx > (idx = sp_idx[l])) j = 0; 4977357eb19SBarry Smith lastidx = idx; 4987357eb19SBarry Smith for (; j<size; j++) { 4994c1ff481SSatish Balay if (idx >= owners[j] && idx < owners[j+1]) { 500563fb871SSatish Balay nlengths[j]++; owner[i] = j; break; 501bc5ccf88SSatish Balay } 502bc5ccf88SSatish Balay } 50375cae7c1SHong Zhang i++; 50475cae7c1SHong Zhang } 50575cae7c1SHong Zhang space = space_next; 506bc5ccf88SSatish Balay } 507563fb871SSatish Balay /* Now check what procs get messages - and compute nsends. */ 508563fb871SSatish Balay for (i=0, nsends=0; i<size; i++) { 5098865f1eaSKarl Rupp if (nlengths[i]) { 51076ec1555SBarry Smith sizes[i] = 1; nsends++; 5118865f1eaSKarl Rupp } 512563fb871SSatish Balay } 513bc5ccf88SSatish Balay 51454f21887SBarry Smith {PetscMPIInt *onodes,*olengths; 515563fb871SSatish Balay /* Determine the number of messages to expect, their lengths, from from-ids */ 51676ec1555SBarry Smith ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr); 517563fb871SSatish Balay ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr); 518563fb871SSatish Balay /* since clubbing row,col - lengths are multiplied by 2 */ 519563fb871SSatish Balay for (i=0; i<nreceives; i++) olengths[i] *=2; 520563fb871SSatish Balay ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr); 521563fb871SSatish Balay /* values are size 'bs2' lengths (and remove earlier factor 2 */ 522563fb871SSatish Balay for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 523563fb871SSatish Balay ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr); 524563fb871SSatish Balay ierr = PetscFree(onodes);CHKERRQ(ierr); 5258865f1eaSKarl Rupp ierr = PetscFree(olengths);CHKERRQ(ierr);} 526bc5ccf88SSatish Balay 527bc5ccf88SSatish Balay /* do sends: 528bc5ccf88SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 529bc5ccf88SSatish Balay the ith processor 530bc5ccf88SSatish Balay */ 531dcca6d9dSJed Brown ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr); 532785e854fSJed Brown ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr); 533dcca6d9dSJed Brown ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr); 534a2d1c673SSatish Balay /* use 2 sends the first with all_a, the next with all_i and all_j */ 535bc5ccf88SSatish Balay startv[0] = 0; starti[0] = 0; 536bc5ccf88SSatish Balay for (i=1; i<size; i++) { 537563fb871SSatish Balay startv[i] = startv[i-1] + nlengths[i-1]; 538533163c2SBarry Smith starti[i] = starti[i-1] + 2*nlengths[i-1]; 539bc5ccf88SSatish Balay } 54075cae7c1SHong Zhang 54175cae7c1SHong Zhang i = 0; 5425bd3b8fbSHong Zhang space = stash->space_head; 5436c4ed002SBarry Smith while (space) { 54475cae7c1SHong Zhang space_next = space->next; 5455bd3b8fbSHong Zhang sp_idx = space->idx; 5465bd3b8fbSHong Zhang sp_idy = space->idy; 5475bd3b8fbSHong Zhang sp_val = space->val; 54875cae7c1SHong Zhang for (l=0; l<space->local_used; l++) { 549bc5ccf88SSatish Balay j = owner[i]; 550a2d1c673SSatish Balay if (bs2 == 1) { 5515bd3b8fbSHong Zhang svalues[startv[j]] = sp_val[l]; 552a2d1c673SSatish Balay } else { 553c1ac3661SBarry Smith PetscInt k; 55454f21887SBarry Smith PetscScalar *buf1,*buf2; 5554c1ff481SSatish Balay buf1 = svalues+bs2*startv[j]; 556b087b6d6SSatish Balay buf2 = space->val + bs2*l; 5578865f1eaSKarl Rupp for (k=0; k<bs2; k++) buf1[k] = buf2[k]; 558a2d1c673SSatish Balay } 5595bd3b8fbSHong Zhang sindices[starti[j]] = sp_idx[l]; 5605bd3b8fbSHong Zhang sindices[starti[j]+nlengths[j]] = sp_idy[l]; 561bc5ccf88SSatish Balay startv[j]++; 562bc5ccf88SSatish Balay starti[j]++; 56375cae7c1SHong Zhang i++; 56475cae7c1SHong Zhang } 56575cae7c1SHong Zhang space = space_next; 566bc5ccf88SSatish Balay } 567bc5ccf88SSatish Balay startv[0] = 0; 5688865f1eaSKarl Rupp for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1]; 569e5d0e772SSatish Balay 570bc5ccf88SSatish Balay for (i=0,count=0; i<size; i++) { 57176ec1555SBarry Smith if (sizes[i]) { 572563fb871SSatish Balay ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr); 573a77337e4SBarry Smith ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr); 574bc5ccf88SSatish Balay } 575b85c94c3SSatish Balay } 5766cf91177SBarry Smith #if defined(PETSC_USE_INFO) 57793157e10SBarry Smith ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr); 578e5d0e772SSatish Balay for (i=0; i<size; i++) { 57976ec1555SBarry Smith if (sizes[i]) { 58030c47e72SSatish Balay ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 581e5d0e772SSatish Balay } 582e5d0e772SSatish Balay } 583e5d0e772SSatish Balay #endif 584c05d87d6SBarry Smith ierr = PetscFree(nlengths);CHKERRQ(ierr); 585606d414cSSatish Balay ierr = PetscFree(owner);CHKERRQ(ierr); 586c05d87d6SBarry Smith ierr = PetscFree2(startv,starti);CHKERRQ(ierr); 58776ec1555SBarry Smith ierr = PetscFree(sizes);CHKERRQ(ierr); 588a2d1c673SSatish Balay 589563fb871SSatish Balay /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 590785e854fSJed Brown ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr); 591563fb871SSatish Balay 592563fb871SSatish Balay for (i=0; i<nreceives; i++) { 593563fb871SSatish Balay recv_waits[2*i] = recv_waits1[i]; 594563fb871SSatish Balay recv_waits[2*i+1] = recv_waits2[i]; 595563fb871SSatish Balay } 596563fb871SSatish Balay stash->recv_waits = recv_waits; 5978865f1eaSKarl Rupp 598563fb871SSatish Balay ierr = PetscFree(recv_waits1);CHKERRQ(ierr); 599563fb871SSatish Balay ierr = PetscFree(recv_waits2);CHKERRQ(ierr); 600563fb871SSatish Balay 601c05d87d6SBarry Smith stash->svalues = svalues; 602c05d87d6SBarry Smith stash->sindices = sindices; 603c05d87d6SBarry Smith stash->rvalues = rvalues; 604c05d87d6SBarry Smith stash->rindices = rindices; 605c05d87d6SBarry Smith stash->send_waits = send_waits; 606c05d87d6SBarry Smith stash->nsends = nsends; 607c05d87d6SBarry Smith stash->nrecvs = nreceives; 60867318a8aSJed Brown stash->reproduce_count = 0; 609bc5ccf88SSatish Balay PetscFunctionReturn(0); 610bc5ccf88SSatish Balay } 611bc5ccf88SSatish Balay 612a2d1c673SSatish Balay /* 6138798bf22SSatish Balay MatStashScatterGetMesg_Private - This function waits on the receives posted 6148798bf22SSatish Balay in the function MatStashScatterBegin_Private() and returns one message at 6154c1ff481SSatish Balay a time to the calling function. If no messages are left, it indicates this 6164c1ff481SSatish Balay by setting flg = 0, else it sets flg = 1. 6174c1ff481SSatish Balay 6184c1ff481SSatish Balay Input Parameters: 6194c1ff481SSatish Balay stash - the stash 6204c1ff481SSatish Balay 6214c1ff481SSatish Balay Output Parameters: 6224c1ff481SSatish Balay nvals - the number of entries in the current message. 6234c1ff481SSatish Balay rows - an array of row indices (or blocked indices) corresponding to the values 6244c1ff481SSatish Balay cols - an array of columnindices (or blocked indices) corresponding to the values 6254c1ff481SSatish Balay vals - the values 6264c1ff481SSatish Balay flg - 0 indicates no more message left, and the current call has no values associated. 6274c1ff481SSatish Balay 1 indicates that the current call successfully received a message, and the 6284c1ff481SSatish Balay other output parameters nvals,rows,cols,vals are set appropriately. 629a2d1c673SSatish Balay */ 63054f21887SBarry Smith PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg) 631bc5ccf88SSatish Balay { 6326849ba73SBarry Smith PetscErrorCode ierr; 633ac2b2aa0SJed Brown 634ac2b2aa0SJed Brown PetscFunctionBegin; 635ac2b2aa0SJed Brown ierr = (*stash->ScatterGetMesg)(stash,nvals,rows,cols,vals,flg);CHKERRQ(ierr); 636ac2b2aa0SJed Brown PetscFunctionReturn(0); 637ac2b2aa0SJed Brown } 638ac2b2aa0SJed Brown 639ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg) 640ac2b2aa0SJed Brown { 641ac2b2aa0SJed Brown PetscErrorCode ierr; 642533163c2SBarry Smith PetscMPIInt i,*flg_v = stash->flg_v,i1,i2; 643fe09c992SBarry Smith PetscInt bs2; 644a2d1c673SSatish Balay MPI_Status recv_status; 645ace3abfcSBarry Smith PetscBool match_found = PETSC_FALSE; 646bc5ccf88SSatish Balay 647bc5ccf88SSatish Balay PetscFunctionBegin; 648a2d1c673SSatish Balay *flg = 0; /* When a message is discovered this is reset to 1 */ 649a2d1c673SSatish Balay /* Return if no more messages to process */ 6508865f1eaSKarl Rupp if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(0); 651a2d1c673SSatish Balay 6524c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 65367318a8aSJed Brown /* If a matching pair of receives are found, process them, and return the data to 654a2d1c673SSatish Balay the calling function. Until then keep receiving messages */ 655a2d1c673SSatish Balay while (!match_found) { 65667318a8aSJed Brown if (stash->reproduce) { 65767318a8aSJed Brown i = stash->reproduce_count++; 65867318a8aSJed Brown ierr = MPI_Wait(stash->recv_waits+i,&recv_status);CHKERRQ(ierr); 65967318a8aSJed Brown } else { 660a2d1c673SSatish Balay ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr); 66167318a8aSJed Brown } 662e32f2f54SBarry Smith if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!"); 663533163c2SBarry Smith 66467318a8aSJed Brown /* Now pack the received message into a structure which is usable by others */ 665a2d1c673SSatish Balay if (i % 2) { 666a77337e4SBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr); 6678865f1eaSKarl Rupp 668c1dc657dSBarry Smith flg_v[2*recv_status.MPI_SOURCE] = i/2; 6698865f1eaSKarl Rupp 670a2d1c673SSatish Balay *nvals = *nvals/bs2; 671563fb871SSatish Balay } else { 672563fb871SSatish Balay ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr); 6738865f1eaSKarl Rupp 674563fb871SSatish Balay flg_v[2*recv_status.MPI_SOURCE+1] = i/2; 6758865f1eaSKarl Rupp 676563fb871SSatish Balay *nvals = *nvals/2; /* This message has both row indices and col indices */ 677bc5ccf88SSatish Balay } 678a2d1c673SSatish Balay 679cb2b73ccSBarry Smith /* Check if we have both messages from this proc */ 680c1dc657dSBarry Smith i1 = flg_v[2*recv_status.MPI_SOURCE]; 681c1dc657dSBarry Smith i2 = flg_v[2*recv_status.MPI_SOURCE+1]; 682a2d1c673SSatish Balay if (i1 != -1 && i2 != -1) { 683563fb871SSatish Balay *rows = stash->rindices[i2]; 684a2d1c673SSatish Balay *cols = *rows + *nvals; 685563fb871SSatish Balay *vals = stash->rvalues[i1]; 686a2d1c673SSatish Balay *flg = 1; 687a2d1c673SSatish Balay stash->nprocessed++; 68835d8aa7fSBarry Smith match_found = PETSC_TRUE; 689bc5ccf88SSatish Balay } 690bc5ccf88SSatish Balay } 691bc5ccf88SSatish Balay PetscFunctionReturn(0); 692bc5ccf88SSatish Balay } 693d7d60843SJed Brown 694d7d60843SJed Brown typedef struct { 695d7d60843SJed Brown PetscInt row; 696d7d60843SJed Brown PetscInt col; 697d7d60843SJed Brown PetscScalar vals[1]; /* Actually an array of length bs2 */ 698d7d60843SJed Brown } MatStashBlock; 699d7d60843SJed Brown 700d7d60843SJed Brown static PetscErrorCode MatStashSortCompress_Private(MatStash *stash,InsertMode insertmode) 701d7d60843SJed Brown { 702d7d60843SJed Brown PetscErrorCode ierr; 703d7d60843SJed Brown PetscMatStashSpace space; 704d7d60843SJed Brown PetscInt n = stash->n,bs = stash->bs,bs2 = bs*bs,cnt,*row,*col,*perm,rowstart,i; 705d7d60843SJed Brown PetscScalar **valptr; 706d7d60843SJed Brown 707d7d60843SJed Brown PetscFunctionBegin; 708d7d60843SJed Brown ierr = PetscMalloc4(n,&row,n,&col,n,&valptr,n,&perm);CHKERRQ(ierr); 709d7d60843SJed Brown for (space=stash->space_head,cnt=0; space; space=space->next) { 710d7d60843SJed Brown for (i=0; i<space->local_used; i++) { 711d7d60843SJed Brown row[cnt] = space->idx[i]; 712d7d60843SJed Brown col[cnt] = space->idy[i]; 713d7d60843SJed Brown valptr[cnt] = &space->val[i*bs2]; 714d7d60843SJed Brown perm[cnt] = cnt; /* Will tell us where to find valptr after sorting row[] and col[] */ 715d7d60843SJed Brown cnt++; 716d7d60843SJed Brown } 717d7d60843SJed Brown } 718d7d60843SJed Brown if (cnt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatStash n %D, but counted %D entries",n,cnt); 719d7d60843SJed Brown ierr = PetscSortIntWithArrayPair(n,row,col,perm);CHKERRQ(ierr); 720d7d60843SJed Brown /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */ 721d7d60843SJed Brown for (rowstart=0,cnt=0,i=1; i<=n; i++) { 722d7d60843SJed Brown if (i == n || row[i] != row[rowstart]) { /* Sort the last row. */ 723d7d60843SJed Brown PetscInt colstart; 724d7d60843SJed Brown ierr = PetscSortIntWithArray(i-rowstart,&col[rowstart],&perm[rowstart]);CHKERRQ(ierr); 725d7d60843SJed Brown for (colstart=rowstart; colstart<i; ) { /* Compress multiple insertions to the same location */ 726d7d60843SJed Brown PetscInt j,l; 727d7d60843SJed Brown MatStashBlock *block; 728d7d60843SJed Brown ierr = PetscSegBufferGet(stash->segsendblocks,1,&block);CHKERRQ(ierr); 729d7d60843SJed Brown block->row = row[rowstart]; 730d7d60843SJed Brown block->col = col[colstart]; 731d7d60843SJed Brown ierr = PetscMemcpy(block->vals,valptr[perm[colstart]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr); 732d7d60843SJed Brown for (j=colstart+1; j<i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */ 733d7d60843SJed Brown if (insertmode == ADD_VALUES) { 734d7d60843SJed Brown for (l=0; l<bs2; l++) block->vals[l] += valptr[perm[j]][l]; 735d7d60843SJed Brown } else { 736d7d60843SJed Brown ierr = PetscMemcpy(block->vals,valptr[perm[j]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr); 737d7d60843SJed Brown } 738d7d60843SJed Brown } 739d7d60843SJed Brown colstart = j; 740d7d60843SJed Brown } 741d7d60843SJed Brown rowstart = i; 742d7d60843SJed Brown } 743d7d60843SJed Brown } 744d7d60843SJed Brown ierr = PetscFree4(row,col,valptr,perm);CHKERRQ(ierr); 745d7d60843SJed Brown PetscFunctionReturn(0); 746d7d60843SJed Brown } 747d7d60843SJed Brown 748d7d60843SJed Brown static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash) 749d7d60843SJed Brown { 750d7d60843SJed Brown PetscErrorCode ierr; 751d7d60843SJed Brown 752d7d60843SJed Brown PetscFunctionBegin; 753d7d60843SJed Brown if (stash->blocktype == MPI_DATATYPE_NULL) { 754d7d60843SJed Brown PetscInt bs2 = PetscSqr(stash->bs); 755d7d60843SJed Brown PetscMPIInt blocklens[2]; 756d7d60843SJed Brown MPI_Aint displs[2]; 757d7d60843SJed Brown MPI_Datatype types[2],stype; 7589503c6c6SJed Brown /* C++ std::complex is not my favorite datatype. Since it is not POD, we cannot use offsetof to find the offset of 7599503c6c6SJed Brown * vals. But the layout is actually guaranteed by the standard, so we do a little dance here with struct 7609503c6c6SJed Brown * DummyBlock, substituting PetscReal for PetscComplex so that we can determine the offset. 7619503c6c6SJed Brown */ 7629503c6c6SJed Brown struct DummyBlock {PetscInt row,col; PetscReal vals;}; 763d7d60843SJed Brown 7649503c6c6SJed Brown stash->blocktype_size = offsetof(struct DummyBlock,vals) + bs2*sizeof(PetscScalar); 765d7d60843SJed Brown if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */ 766d7d60843SJed Brown stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt); 767d7d60843SJed Brown } 768d7d60843SJed Brown ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segsendblocks);CHKERRQ(ierr); 769d7d60843SJed Brown ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segrecvblocks);CHKERRQ(ierr); 770d7d60843SJed Brown ierr = PetscSegBufferCreate(sizeof(MatStashFrame),1,&stash->segrecvframe);CHKERRQ(ierr); 771d7d60843SJed Brown blocklens[0] = 2; 772d7d60843SJed Brown blocklens[1] = bs2; 7739503c6c6SJed Brown displs[0] = offsetof(struct DummyBlock,row); 7749503c6c6SJed Brown displs[1] = offsetof(struct DummyBlock,vals); 775d7d60843SJed Brown types[0] = MPIU_INT; 776d7d60843SJed Brown types[1] = MPIU_SCALAR; 777d7d60843SJed Brown ierr = MPI_Type_create_struct(2,blocklens,displs,types,&stype);CHKERRQ(ierr); 778d7d60843SJed Brown ierr = MPI_Type_commit(&stype);CHKERRQ(ierr); 779d7d60843SJed Brown ierr = MPI_Type_create_resized(stype,0,stash->blocktype_size,&stash->blocktype);CHKERRQ(ierr); /* MPI-2 */ 780d7d60843SJed Brown ierr = MPI_Type_commit(&stash->blocktype);CHKERRQ(ierr); 781d7d60843SJed Brown ierr = MPI_Type_free(&stype);CHKERRQ(ierr); 782d7d60843SJed Brown } 783d7d60843SJed Brown PetscFunctionReturn(0); 784d7d60843SJed Brown } 785d7d60843SJed Brown 786d7d60843SJed Brown /* Callback invoked after target rank has initiatied receive of rendezvous message. 787d7d60843SJed Brown * Here we post the main sends. 788d7d60843SJed Brown */ 789d7d60843SJed Brown static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rankid,PetscMPIInt rank,void *sdata,MPI_Request req[],void *ctx) 790d7d60843SJed Brown { 791d7d60843SJed Brown MatStash *stash = (MatStash*)ctx; 792d7d60843SJed Brown MatStashHeader *hdr = (MatStashHeader*)sdata; 793d7d60843SJed Brown PetscErrorCode ierr; 794d7d60843SJed Brown 795d7d60843SJed Brown PetscFunctionBegin; 796d7d60843SJed Brown if (rank != stash->sendranks[rankid]) SETERRQ3(comm,PETSC_ERR_PLIB,"BTS Send rank %d does not match sendranks[%d] %d",rank,rankid,stash->sendranks[rankid]); 797d7d60843SJed Brown ierr = MPI_Isend(stash->sendframes[rankid].buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr); 798d7d60843SJed Brown stash->sendframes[rankid].count = hdr->count; 799d7d60843SJed Brown stash->sendframes[rankid].pending = 1; 800d7d60843SJed Brown PetscFunctionReturn(0); 801d7d60843SJed Brown } 802d7d60843SJed Brown 803d7d60843SJed Brown /* Callback invoked by target after receiving rendezvous message. 804d7d60843SJed Brown * Here we post the main recvs. 805d7d60843SJed Brown */ 806d7d60843SJed Brown static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rank,void *rdata,MPI_Request req[],void *ctx) 807d7d60843SJed Brown { 808d7d60843SJed Brown MatStash *stash = (MatStash*)ctx; 809d7d60843SJed Brown MatStashHeader *hdr = (MatStashHeader*)rdata; 810d7d60843SJed Brown MatStashFrame *frame; 811d7d60843SJed Brown PetscErrorCode ierr; 812d7d60843SJed Brown 813d7d60843SJed Brown PetscFunctionBegin; 814d7d60843SJed Brown ierr = PetscSegBufferGet(stash->segrecvframe,1,&frame);CHKERRQ(ierr); 815d7d60843SJed Brown ierr = PetscSegBufferGet(stash->segrecvblocks,hdr->count,&frame->buffer);CHKERRQ(ierr); 816d7d60843SJed Brown ierr = MPI_Irecv(frame->buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr); 817d7d60843SJed Brown frame->count = hdr->count; 818d7d60843SJed Brown frame->pending = 1; 819d7d60843SJed Brown PetscFunctionReturn(0); 820d7d60843SJed Brown } 821d7d60843SJed Brown 822d7d60843SJed Brown /* 823d7d60843SJed Brown * owners[] contains the ownership ranges; may be indexed by either blocks or scalars 824d7d60843SJed Brown */ 825d7d60843SJed Brown static PetscErrorCode MatStashScatterBegin_BTS(Mat mat,MatStash *stash,PetscInt owners[]) 826d7d60843SJed Brown { 827d7d60843SJed Brown PetscErrorCode ierr; 828d7d60843SJed Brown size_t nblocks; 829d7d60843SJed Brown char *sendblocks; 830d7d60843SJed Brown 831d7d60843SJed Brown PetscFunctionBegin; 8324b4eb8d3SJed Brown #if defined(PETSC_USE_DEBUG) 8334b4eb8d3SJed Brown { /* make sure all processors are either in INSERTMODE or ADDMODE */ 8344b4eb8d3SJed Brown InsertMode addv; 835b2566f29SBarry Smith ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 8364b4eb8d3SJed Brown if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 8374b4eb8d3SJed Brown } 8384b4eb8d3SJed Brown #endif 8394b4eb8d3SJed Brown 84097da8949SJed Brown if (stash->subset_off_proc && !mat->subsetoffprocentries) { /* We won't use the old scatter context. */ 84197da8949SJed Brown ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr); 84297da8949SJed Brown } 84397da8949SJed Brown 844d7d60843SJed Brown ierr = MatStashBlockTypeSetUp(stash);CHKERRQ(ierr); 845d7d60843SJed Brown ierr = MatStashSortCompress_Private(stash,mat->insertmode);CHKERRQ(ierr); 846d7d60843SJed Brown ierr = PetscSegBufferGetSize(stash->segsendblocks,&nblocks);CHKERRQ(ierr); 847d7d60843SJed Brown ierr = PetscSegBufferExtractInPlace(stash->segsendblocks,&sendblocks);CHKERRQ(ierr); 84897da8949SJed Brown if (stash->subset_off_proc && mat->subsetoffprocentries) { /* Set up sendhdrs and sendframes for each rank that we sent before */ 84923b7d1baSJed Brown PetscInt i; 85023b7d1baSJed Brown size_t b; 85197da8949SJed Brown for (i=0,b=0; i<stash->nsendranks; i++) { 85297da8949SJed Brown stash->sendframes[i].buffer = &sendblocks[b*stash->blocktype_size]; 85397da8949SJed Brown /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */ 85497da8949SJed Brown stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */ 85597da8949SJed Brown for ( ; b<nblocks; b++) { 85697da8949SJed Brown MatStashBlock *sendblock_b = (MatStashBlock*)&sendblocks[b*stash->blocktype_size]; 85797da8949SJed Brown if (PetscUnlikely(sendblock_b->row < owners[stash->sendranks[i]])) SETERRQ2(stash->comm,PETSC_ERR_ARG_WRONG,"MAT_SUBSET_OFF_PROC_ENTRIES set, but row %D owned by %d not communicated in initial assembly",sendblock_b->row,stash->sendranks[i]); 85897da8949SJed Brown if (sendblock_b->row >= owners[stash->sendranks[i]+1]) break; 85997da8949SJed Brown stash->sendhdr[i].count++; 86097da8949SJed Brown } 86197da8949SJed Brown } 86297da8949SJed Brown } else { /* Dynamically count and pack (first time) */ 86323b7d1baSJed Brown PetscInt sendno; 86423b7d1baSJed Brown size_t i,rowstart; 865d7d60843SJed Brown 866d7d60843SJed Brown /* Count number of send ranks and allocate for sends */ 867d7d60843SJed Brown stash->nsendranks = 0; 868d7d60843SJed Brown for (rowstart=0; rowstart<nblocks; ) { 8697e2ea869SJed Brown PetscInt owner; 870d7d60843SJed Brown MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size]; 871d7d60843SJed Brown ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr); 872d7d60843SJed Brown if (owner < 0) owner = -(owner+2); 873d7d60843SJed Brown for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */ 874d7d60843SJed Brown MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size]; 8757e2ea869SJed Brown if (sendblock_i->row >= owners[owner+1]) break; 876d7d60843SJed Brown } 877d7d60843SJed Brown stash->nsendranks++; 878d7d60843SJed Brown rowstart = i; 879d7d60843SJed Brown } 880d7d60843SJed Brown ierr = PetscMalloc3(stash->nsendranks,&stash->sendranks,stash->nsendranks,&stash->sendhdr,stash->nsendranks,&stash->sendframes);CHKERRQ(ierr); 881d7d60843SJed Brown 882d7d60843SJed Brown /* Set up sendhdrs and sendframes */ 883d7d60843SJed Brown sendno = 0; 884d7d60843SJed Brown for (rowstart=0; rowstart<nblocks; ) { 885d7d60843SJed Brown PetscInt owner; 886d7d60843SJed Brown MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size]; 887d7d60843SJed Brown ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr); 888d7d60843SJed Brown if (owner < 0) owner = -(owner+2); 889d7d60843SJed Brown stash->sendranks[sendno] = owner; 890d7d60843SJed Brown for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */ 891d7d60843SJed Brown MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size]; 8927e2ea869SJed Brown if (sendblock_i->row >= owners[owner+1]) break; 893d7d60843SJed Brown } 894d7d60843SJed Brown stash->sendframes[sendno].buffer = sendblock_rowstart; 895d7d60843SJed Brown stash->sendframes[sendno].pending = 0; 896d7d60843SJed Brown stash->sendhdr[sendno].count = i - rowstart; 897d7d60843SJed Brown sendno++; 898d7d60843SJed Brown rowstart = i; 899d7d60843SJed Brown } 900d7d60843SJed Brown if (sendno != stash->nsendranks) SETERRQ2(stash->comm,PETSC_ERR_PLIB,"BTS counted %D sendranks, but %D sends",stash->nsendranks,sendno); 901d7d60843SJed Brown } 902d7d60843SJed Brown 9034b4eb8d3SJed Brown /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new 9044b4eb8d3SJed Brown * message or a dummy entry of some sort. */ 9054b4eb8d3SJed Brown if (mat->insertmode == INSERT_VALUES) { 90623b7d1baSJed Brown size_t i; 9074b4eb8d3SJed Brown for (i=0; i<nblocks; i++) { 9084b4eb8d3SJed Brown MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size]; 9094b4eb8d3SJed Brown sendblock_i->row = -(sendblock_i->row+1); 9104b4eb8d3SJed Brown } 9114b4eb8d3SJed Brown } 9124b4eb8d3SJed Brown 91397da8949SJed Brown if (stash->subset_off_proc && mat->subsetoffprocentries) { 91497da8949SJed Brown PetscMPIInt i,tag; 91597da8949SJed Brown ierr = PetscCommGetNewTag(stash->comm,&tag);CHKERRQ(ierr); 91697da8949SJed Brown for (i=0; i<stash->nrecvranks; i++) { 91797da8949SJed Brown ierr = MatStashBTSRecv_Private(stash->comm,&tag,stash->recvranks[i],&stash->recvhdr[i],&stash->recvreqs[i],stash);CHKERRQ(ierr); 91897da8949SJed Brown } 91997da8949SJed Brown for (i=0; i<stash->nsendranks; i++) { 92097da8949SJed Brown ierr = MatStashBTSSend_Private(stash->comm,&tag,i,stash->sendranks[i],&stash->sendhdr[i],&stash->sendreqs[i],stash);CHKERRQ(ierr); 92197da8949SJed Brown } 92297da8949SJed Brown stash->use_status = PETSC_TRUE; /* Use count from message status. */ 92397da8949SJed Brown } else { 924e0ddb6e8SJed Brown ierr = PetscCommBuildTwoSidedFReq(stash->comm,1,MPIU_INT,stash->nsendranks,stash->sendranks,(PetscInt*)stash->sendhdr, 925e0ddb6e8SJed Brown &stash->nrecvranks,&stash->recvranks,(PetscInt*)&stash->recvhdr,1,&stash->sendreqs,&stash->recvreqs, 926d7d60843SJed Brown MatStashBTSSend_Private,MatStashBTSRecv_Private,stash);CHKERRQ(ierr); 927b5ddc6f1SJed Brown ierr = PetscMalloc2(stash->nrecvranks,&stash->some_indices,stash->nrecvranks,&stash->some_statuses);CHKERRQ(ierr); 92897da8949SJed Brown stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */ 92997da8949SJed Brown } 930d7d60843SJed Brown 931d7d60843SJed Brown ierr = PetscSegBufferExtractInPlace(stash->segrecvframe,&stash->recvframes);CHKERRQ(ierr); 932d7d60843SJed Brown stash->recvframe_active = NULL; 933d7d60843SJed Brown stash->recvframe_i = 0; 934d7d60843SJed Brown stash->some_i = 0; 935d7d60843SJed Brown stash->some_count = 0; 936d7d60843SJed Brown stash->recvcount = 0; 93797da8949SJed Brown stash->subset_off_proc = mat->subsetoffprocentries; 9384b4eb8d3SJed Brown stash->insertmode = &mat->insertmode; 939d7d60843SJed Brown PetscFunctionReturn(0); 940d7d60843SJed Brown } 941d7d60843SJed Brown 942d7d60843SJed Brown static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash,PetscMPIInt *n,PetscInt **row,PetscInt **col,PetscScalar **val,PetscInt *flg) 943d7d60843SJed Brown { 944d7d60843SJed Brown PetscErrorCode ierr; 945d7d60843SJed Brown MatStashBlock *block; 946d7d60843SJed Brown 947d7d60843SJed Brown PetscFunctionBegin; 948d7d60843SJed Brown *flg = 0; 949d7d60843SJed Brown while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) { 950d7d60843SJed Brown if (stash->some_i == stash->some_count) { 951d7d60843SJed Brown if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(0); /* Done */ 952d7d60843SJed Brown ierr = MPI_Waitsome(stash->nrecvranks,stash->recvreqs,&stash->some_count,stash->some_indices,stash->use_status?stash->some_statuses:MPI_STATUSES_IGNORE);CHKERRQ(ierr); 953d7d60843SJed Brown stash->some_i = 0; 954d7d60843SJed Brown } 955d7d60843SJed Brown stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]]; 956d7d60843SJed Brown stash->recvframe_count = stash->recvframe_active->count; /* From header; maximum count */ 957d7d60843SJed Brown if (stash->use_status) { /* Count what was actually sent */ 958d7d60843SJed Brown ierr = MPI_Get_count(&stash->some_statuses[stash->some_i],stash->blocktype,&stash->recvframe_count);CHKERRQ(ierr); 959d7d60843SJed Brown } 9604b4eb8d3SJed Brown if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */ 9614b4eb8d3SJed Brown block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[0]; 9624b4eb8d3SJed Brown if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES; 9634b4eb8d3SJed Brown if (PetscUnlikely(*stash->insertmode == INSERT_VALUES && block->row >= 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling INSERT_VALUES, but rank %d requested ADD_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]); 9644b4eb8d3SJed Brown if (PetscUnlikely(*stash->insertmode == ADD_VALUES && block->row < 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling ADD_VALUES, but rank %d requested INSERT_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]); 9654b4eb8d3SJed Brown } 966d7d60843SJed Brown stash->some_i++; 967d7d60843SJed Brown stash->recvcount++; 968d7d60843SJed Brown stash->recvframe_i = 0; 969d7d60843SJed Brown } 970d7d60843SJed Brown *n = 1; 971d7d60843SJed Brown block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[stash->recvframe_i*stash->blocktype_size]; 9724b4eb8d3SJed Brown if (block->row < 0) block->row = -(block->row + 1); 973d7d60843SJed Brown *row = &block->row; 974d7d60843SJed Brown *col = &block->col; 975d7d60843SJed Brown *val = block->vals; 976d7d60843SJed Brown stash->recvframe_i++; 977d7d60843SJed Brown *flg = 1; 978d7d60843SJed Brown PetscFunctionReturn(0); 979d7d60843SJed Brown } 980d7d60843SJed Brown 981d7d60843SJed Brown static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash) 982d7d60843SJed Brown { 983d7d60843SJed Brown PetscErrorCode ierr; 984d7d60843SJed Brown 985d7d60843SJed Brown PetscFunctionBegin; 986d7d60843SJed Brown ierr = MPI_Waitall(stash->nsendranks,stash->sendreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 9873575f486SJed Brown if (stash->subset_off_proc) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks */ 9883575f486SJed Brown void *dummy; 9893575f486SJed Brown ierr = PetscSegBufferExtractInPlace(stash->segrecvblocks,&dummy);CHKERRQ(ierr); 9903575f486SJed Brown } else { /* No reuse, so collect everything. */ 991d7d60843SJed Brown ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr); 99297da8949SJed Brown } 993d7d60843SJed Brown 994d7d60843SJed Brown /* Now update nmaxold to be app 10% more than max n used, this way the 995d7d60843SJed Brown wastage of space is reduced the next time this stash is used. 996d7d60843SJed Brown Also update the oldmax, only if it increases */ 997d7d60843SJed Brown if (stash->n) { 998d7d60843SJed Brown PetscInt bs2 = stash->bs*stash->bs; 999d7d60843SJed Brown PetscInt oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 1000d7d60843SJed Brown if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 1001d7d60843SJed Brown } 1002d7d60843SJed Brown 1003d7d60843SJed Brown stash->nmax = 0; 1004d7d60843SJed Brown stash->n = 0; 1005d7d60843SJed Brown stash->reallocs = -1; 1006d7d60843SJed Brown stash->nprocessed = 0; 1007d7d60843SJed Brown 1008d7d60843SJed Brown ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr); 1009d7d60843SJed Brown 1010d7d60843SJed Brown stash->space = 0; 1011d7d60843SJed Brown 1012d7d60843SJed Brown PetscFunctionReturn(0); 1013d7d60843SJed Brown } 1014d7d60843SJed Brown 1015d7d60843SJed Brown static PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash) 1016d7d60843SJed Brown { 1017d7d60843SJed Brown PetscErrorCode ierr; 1018d7d60843SJed Brown 1019d7d60843SJed Brown PetscFunctionBegin; 1020d7d60843SJed Brown ierr = PetscSegBufferDestroy(&stash->segsendblocks);CHKERRQ(ierr); 1021d7d60843SJed Brown ierr = PetscSegBufferDestroy(&stash->segrecvframe);CHKERRQ(ierr); 1022d7d60843SJed Brown stash->recvframes = NULL; 1023d7d60843SJed Brown ierr = PetscSegBufferDestroy(&stash->segrecvblocks);CHKERRQ(ierr); 1024d7d60843SJed Brown if (stash->blocktype != MPI_DATATYPE_NULL) { 1025d7d60843SJed Brown ierr = MPI_Type_free(&stash->blocktype);CHKERRQ(ierr); 1026d7d60843SJed Brown } 1027d7d60843SJed Brown stash->nsendranks = 0; 1028d7d60843SJed Brown stash->nrecvranks = 0; 1029d7d60843SJed Brown ierr = PetscFree3(stash->sendranks,stash->sendhdr,stash->sendframes);CHKERRQ(ierr); 1030d7d60843SJed Brown ierr = PetscFree(stash->sendreqs);CHKERRQ(ierr); 1031d7d60843SJed Brown ierr = PetscFree(stash->recvreqs);CHKERRQ(ierr); 1032d7d60843SJed Brown ierr = PetscFree(stash->recvranks);CHKERRQ(ierr); 1033d7d60843SJed Brown ierr = PetscFree(stash->recvhdr);CHKERRQ(ierr); 1034d7d60843SJed Brown ierr = PetscFree2(stash->some_indices,stash->some_statuses);CHKERRQ(ierr); 1035d7d60843SJed Brown PetscFunctionReturn(0); 1036d7d60843SJed Brown } 1037