12d5177cdSBarry Smith 2af0996ceSBarry Smith #include <petsc/private/matimpl.h> 35bd3b8fbSHong Zhang 4bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE 10000 54c1ff481SSatish Balay 6ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterBegin_Ref(Mat,MatStash*,PetscInt*); 7d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*); 8d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*); 975d39b6aSBarry Smith #if !defined(PETSC_HAVE_MPIUNI) 10d7d60843SJed Brown static PetscErrorCode MatStashScatterBegin_BTS(Mat,MatStash*,PetscInt*); 11d7d60843SJed Brown static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*); 12d7d60843SJed Brown static PetscErrorCode MatStashScatterEnd_BTS(MatStash*); 1375d39b6aSBarry Smith #endif 14d7d60843SJed Brown 159417f4adSLois Curfman McInnes /* 168798bf22SSatish Balay MatStashCreate_Private - Creates a stash,currently used for all the parallel 174c1ff481SSatish Balay matrix implementations. The stash is where elements of a matrix destined 184c1ff481SSatish Balay to be stored on other processors are kept until matrix assembly is done. 199417f4adSLois Curfman McInnes 204c1ff481SSatish Balay This is a simple minded stash. Simply adds entries to end of stash. 214c1ff481SSatish Balay 224c1ff481SSatish Balay Input Parameters: 234c1ff481SSatish Balay comm - communicator, required for scatters. 244c1ff481SSatish Balay bs - stash block size. used when stashing blocks of values 254c1ff481SSatish Balay 264c1ff481SSatish Balay Output Parameters: 274c1ff481SSatish Balay stash - the newly created stash 289417f4adSLois Curfman McInnes */ 29c1ac3661SBarry Smith PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash) 309417f4adSLois Curfman McInnes { 31533163c2SBarry Smith PetscInt max,*opt,nopt,i; 32ace3abfcSBarry Smith PetscBool flg; 33bc5ccf88SSatish Balay 343a40ed3dSBarry Smith PetscFunctionBegin; 35bc5ccf88SSatish Balay /* Require 2 tags,get the second using PetscCommGetNewTag() */ 36752ec6e0SSatish Balay stash->comm = comm; 378865f1eaSKarl Rupp 389566063dSJacob Faibussowitsch PetscCall(PetscCommGetNewTag(stash->comm,&stash->tag1)); 399566063dSJacob Faibussowitsch PetscCall(PetscCommGetNewTag(stash->comm,&stash->tag2)); 409566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(stash->comm,&stash->size)); 419566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(stash->comm,&stash->rank)); 429566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2*stash->size,&stash->flg_v)); 43533163c2SBarry Smith for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 44533163c2SBarry Smith 45434d7ff9SSatish Balay nopt = stash->size; 469566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nopt,&opt)); 479566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetIntArray(NULL,NULL,"-matstash_initial_size",opt,&nopt,&flg)); 48434d7ff9SSatish Balay if (flg) { 49434d7ff9SSatish Balay if (nopt == 1) max = opt[0]; 50434d7ff9SSatish Balay else if (nopt == stash->size) max = opt[stash->rank]; 51434d7ff9SSatish Balay else if (stash->rank < nopt) max = opt[stash->rank]; 52f4ab19daSSatish Balay else max = 0; /* Use default */ 53434d7ff9SSatish Balay stash->umax = max; 54434d7ff9SSatish Balay } else { 55434d7ff9SSatish Balay stash->umax = 0; 56434d7ff9SSatish Balay } 579566063dSJacob Faibussowitsch PetscCall(PetscFree(opt)); 584c1ff481SSatish Balay if (bs <= 0) bs = 1; 59a2d1c673SSatish Balay 604c1ff481SSatish Balay stash->bs = bs; 619417f4adSLois Curfman McInnes stash->nmax = 0; 62434d7ff9SSatish Balay stash->oldnmax = 0; 639417f4adSLois Curfman McInnes stash->n = 0; 644c1ff481SSatish Balay stash->reallocs = -1; 65f4259b30SLisandro Dalcin stash->space_head = NULL; 66f4259b30SLisandro Dalcin stash->space = NULL; 679417f4adSLois Curfman McInnes 68f4259b30SLisandro Dalcin stash->send_waits = NULL; 69f4259b30SLisandro Dalcin stash->recv_waits = NULL; 70f4259b30SLisandro Dalcin stash->send_status = NULL; 71bc5ccf88SSatish Balay stash->nsends = 0; 72bc5ccf88SSatish Balay stash->nrecvs = 0; 73f4259b30SLisandro Dalcin stash->svalues = NULL; 74f4259b30SLisandro Dalcin stash->rvalues = NULL; 75f4259b30SLisandro Dalcin stash->rindices = NULL; 76a2d1c673SSatish Balay stash->nprocessed = 0; 7767318a8aSJed Brown stash->reproduce = PETSC_FALSE; 78d7d60843SJed Brown stash->blocktype = MPI_DATATYPE_NULL; 798865f1eaSKarl Rupp 809566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL,NULL,"-matstash_reproduce",&stash->reproduce,NULL)); 811667be42SBarry Smith #if !defined(PETSC_HAVE_MPIUNI) 82ca723aa4SStefano Zampini flg = PETSC_FALSE; 839566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL,NULL,"-matstash_legacy",&flg,NULL)); 84b30fb036SBarry Smith if (!flg) { 85d7d60843SJed Brown stash->ScatterBegin = MatStashScatterBegin_BTS; 86d7d60843SJed Brown stash->ScatterGetMesg = MatStashScatterGetMesg_BTS; 87d7d60843SJed Brown stash->ScatterEnd = MatStashScatterEnd_BTS; 88d7d60843SJed Brown stash->ScatterDestroy = MatStashScatterDestroy_BTS; 89ac2b2aa0SJed Brown } else { 901667be42SBarry Smith #endif 91ac2b2aa0SJed Brown stash->ScatterBegin = MatStashScatterBegin_Ref; 92ac2b2aa0SJed Brown stash->ScatterGetMesg = MatStashScatterGetMesg_Ref; 93ac2b2aa0SJed Brown stash->ScatterEnd = MatStashScatterEnd_Ref; 94ac2b2aa0SJed Brown stash->ScatterDestroy = NULL; 951667be42SBarry Smith #if !defined(PETSC_HAVE_MPIUNI) 96ac2b2aa0SJed Brown } 971667be42SBarry Smith #endif 983a40ed3dSBarry Smith PetscFunctionReturn(0); 999417f4adSLois Curfman McInnes } 1009417f4adSLois Curfman McInnes 1014c1ff481SSatish Balay /* 1028798bf22SSatish Balay MatStashDestroy_Private - Destroy the stash 1034c1ff481SSatish Balay */ 104dfbe8321SBarry Smith PetscErrorCode MatStashDestroy_Private(MatStash *stash) 1059417f4adSLois Curfman McInnes { 106bc5ccf88SSatish Balay PetscFunctionBegin; 1079566063dSJacob Faibussowitsch PetscCall(PetscMatStashSpaceDestroy(&stash->space_head)); 1089566063dSJacob Faibussowitsch if (stash->ScatterDestroy) PetscCall((*stash->ScatterDestroy)(stash)); 1098865f1eaSKarl Rupp 110f4259b30SLisandro Dalcin stash->space = NULL; 1118865f1eaSKarl Rupp 1129566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->flg_v)); 113bc5ccf88SSatish Balay PetscFunctionReturn(0); 114bc5ccf88SSatish Balay } 115bc5ccf88SSatish Balay 1164c1ff481SSatish Balay /* 11767318a8aSJed Brown MatStashScatterEnd_Private - This is called as the final stage of 1184c1ff481SSatish Balay scatter. The final stages of message passing is done here, and 11967318a8aSJed Brown all the memory used for message passing is cleaned up. This 1204c1ff481SSatish Balay routine also resets the stash, and deallocates the memory used 1214c1ff481SSatish Balay for the stash. It also keeps track of the current memory usage 1224c1ff481SSatish Balay so that the same value can be used the next time through. 1234c1ff481SSatish Balay */ 124dfbe8321SBarry Smith PetscErrorCode MatStashScatterEnd_Private(MatStash *stash) 125bc5ccf88SSatish Balay { 126ac2b2aa0SJed Brown PetscFunctionBegin; 1279566063dSJacob Faibussowitsch PetscCall((*stash->ScatterEnd)(stash)); 128ac2b2aa0SJed Brown PetscFunctionReturn(0); 129ac2b2aa0SJed Brown } 130ac2b2aa0SJed Brown 131d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash) 132ac2b2aa0SJed Brown { 133533163c2SBarry Smith PetscInt nsends=stash->nsends,bs2,oldnmax,i; 134a2d1c673SSatish Balay MPI_Status *send_status; 135a2d1c673SSatish Balay 1363a40ed3dSBarry Smith PetscFunctionBegin; 137533163c2SBarry Smith for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 138a2d1c673SSatish Balay /* wait on sends */ 139a2d1c673SSatish Balay if (nsends) { 1409566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2*nsends,&send_status)); 1419566063dSJacob Faibussowitsch PetscCallMPI(MPI_Waitall(2*nsends,stash->send_waits,send_status)); 1429566063dSJacob Faibussowitsch PetscCall(PetscFree(send_status)); 143a2d1c673SSatish Balay } 144a2d1c673SSatish Balay 145c0c58ca7SSatish Balay /* Now update nmaxold to be app 10% more than max n used, this way the 146434d7ff9SSatish Balay wastage of space is reduced the next time this stash is used. 147434d7ff9SSatish Balay Also update the oldmax, only if it increases */ 148b9b97703SBarry Smith if (stash->n) { 14994b769a5SSatish Balay bs2 = stash->bs*stash->bs; 1508a9378f0SSatish Balay oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 151434d7ff9SSatish Balay if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 152b9b97703SBarry Smith } 153434d7ff9SSatish Balay 154d07ff455SSatish Balay stash->nmax = 0; 155d07ff455SSatish Balay stash->n = 0; 1564c1ff481SSatish Balay stash->reallocs = -1; 157a2d1c673SSatish Balay stash->nprocessed = 0; 1588865f1eaSKarl Rupp 1599566063dSJacob Faibussowitsch PetscCall(PetscMatStashSpaceDestroy(&stash->space_head)); 1608865f1eaSKarl Rupp 161f4259b30SLisandro Dalcin stash->space = NULL; 1628865f1eaSKarl Rupp 1639566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->send_waits)); 1649566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->recv_waits)); 1659566063dSJacob Faibussowitsch PetscCall(PetscFree2(stash->svalues,stash->sindices)); 1669566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->rvalues[0])); 1679566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->rvalues)); 1689566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->rindices[0])); 1699566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->rindices)); 1703a40ed3dSBarry Smith PetscFunctionReturn(0); 1719417f4adSLois Curfman McInnes } 1729417f4adSLois Curfman McInnes 1734c1ff481SSatish Balay /* 1748798bf22SSatish Balay MatStashGetInfo_Private - Gets the relavant statistics of the stash 1754c1ff481SSatish Balay 1764c1ff481SSatish Balay Input Parameters: 1774c1ff481SSatish Balay stash - the stash 17894b769a5SSatish Balay nstash - the size of the stash. Indicates the number of values stored. 1794c1ff481SSatish Balay reallocs - the number of additional mallocs incurred. 1804c1ff481SSatish Balay 1814c1ff481SSatish Balay */ 182c1ac3661SBarry Smith PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs) 18397530c3fSBarry Smith { 184c1ac3661SBarry Smith PetscInt bs2 = stash->bs*stash->bs; 18594b769a5SSatish Balay 1863a40ed3dSBarry Smith PetscFunctionBegin; 1871ecfd215SBarry Smith if (nstash) *nstash = stash->n*bs2; 1881ecfd215SBarry Smith if (reallocs) { 189434d7ff9SSatish Balay if (stash->reallocs < 0) *reallocs = 0; 190434d7ff9SSatish Balay else *reallocs = stash->reallocs; 1911ecfd215SBarry Smith } 192bc5ccf88SSatish Balay PetscFunctionReturn(0); 193bc5ccf88SSatish Balay } 1944c1ff481SSatish Balay 1954c1ff481SSatish Balay /* 1968798bf22SSatish Balay MatStashSetInitialSize_Private - Sets the initial size of the stash 1974c1ff481SSatish Balay 1984c1ff481SSatish Balay Input Parameters: 1994c1ff481SSatish Balay stash - the stash 2004c1ff481SSatish Balay max - the value that is used as the max size of the stash. 2014c1ff481SSatish Balay this value is used while allocating memory. 2024c1ff481SSatish Balay */ 203c1ac3661SBarry Smith PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max) 204bc5ccf88SSatish Balay { 205bc5ccf88SSatish Balay PetscFunctionBegin; 206434d7ff9SSatish Balay stash->umax = max; 2073a40ed3dSBarry Smith PetscFunctionReturn(0); 20897530c3fSBarry Smith } 20997530c3fSBarry Smith 2108798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called 2114c1ff481SSatish Balay when the space in the stash is not sufficient to add the new values 2124c1ff481SSatish Balay being inserted into the stash. 2134c1ff481SSatish Balay 2144c1ff481SSatish Balay Input Parameters: 2154c1ff481SSatish Balay stash - the stash 2164c1ff481SSatish Balay incr - the minimum increase requested 2174c1ff481SSatish Balay 2184c1ff481SSatish Balay Notes: 2194c1ff481SSatish Balay This routine doubles the currently used memory. 2204c1ff481SSatish Balay */ 221c1ac3661SBarry Smith static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr) 2229417f4adSLois Curfman McInnes { 2235bd3b8fbSHong Zhang PetscInt newnmax,bs2= stash->bs*stash->bs; 2249417f4adSLois Curfman McInnes 2253a40ed3dSBarry Smith PetscFunctionBegin; 2269417f4adSLois Curfman McInnes /* allocate a larger stash */ 227c481ceb5SSatish Balay if (!stash->oldnmax && !stash->nmax) { /* new stash */ 228434d7ff9SSatish Balay if (stash->umax) newnmax = stash->umax/bs2; 229434d7ff9SSatish Balay else newnmax = DEFAULT_STASH_SIZE/bs2; 230c481ceb5SSatish Balay } else if (!stash->nmax) { /* resuing stash */ 231434d7ff9SSatish Balay if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2; 232434d7ff9SSatish Balay else newnmax = stash->oldnmax/bs2; 233434d7ff9SSatish Balay } else newnmax = stash->nmax*2; 2344c1ff481SSatish Balay if (newnmax < (stash->nmax + incr)) newnmax += 2*incr; 235d07ff455SSatish Balay 23675cae7c1SHong Zhang /* Get a MatStashSpace and attach it to stash */ 2379566063dSJacob Faibussowitsch PetscCall(PetscMatStashSpaceGet(bs2,newnmax,&stash->space)); 238b087b6d6SSatish Balay if (!stash->space_head) { /* new stash or resuing stash->oldnmax */ 239b087b6d6SSatish Balay stash->space_head = stash->space; 24075cae7c1SHong Zhang } 241b087b6d6SSatish Balay 242bc5ccf88SSatish Balay stash->reallocs++; 24375cae7c1SHong Zhang stash->nmax = newnmax; 244bc5ccf88SSatish Balay PetscFunctionReturn(0); 245bc5ccf88SSatish Balay } 246bc5ccf88SSatish Balay /* 2478798bf22SSatish Balay MatStashValuesRow_Private - inserts values into the stash. This function 2484c1ff481SSatish Balay expects the values to be roworiented. Multiple columns belong to the same row 2494c1ff481SSatish Balay can be inserted with a single call to this function. 2504c1ff481SSatish Balay 2514c1ff481SSatish Balay Input Parameters: 2524c1ff481SSatish Balay stash - the stash 2534c1ff481SSatish Balay row - the global row correspoiding to the values 2544c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2554c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2564c1ff481SSatish Balay values - the values inserted 257bc5ccf88SSatish Balay */ 258ace3abfcSBarry Smith PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries) 259bc5ccf88SSatish Balay { 260b400d20cSBarry Smith PetscInt i,k,cnt = 0; 26175cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 262bc5ccf88SSatish Balay 263bc5ccf88SSatish Balay PetscFunctionBegin; 2644c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 26575cae7c1SHong Zhang if (!space || space->local_remaining < n) { 2669566063dSJacob Faibussowitsch PetscCall(MatStashExpand_Private(stash,n)); 2679417f4adSLois Curfman McInnes } 26875cae7c1SHong Zhang space = stash->space; 26975cae7c1SHong Zhang k = space->local_used; 2704c1ff481SSatish Balay for (i=0; i<n; i++) { 27114069ce9SStefano Zampini if (ignorezeroentries && values && values[i] == 0.0) continue; 27275cae7c1SHong Zhang space->idx[k] = row; 27375cae7c1SHong Zhang space->idy[k] = idxn[i]; 27414069ce9SStefano Zampini space->val[k] = values ? values[i] : 0.0; 27575cae7c1SHong Zhang k++; 276b400d20cSBarry Smith cnt++; 2779417f4adSLois Curfman McInnes } 278b400d20cSBarry Smith stash->n += cnt; 279b400d20cSBarry Smith space->local_used += cnt; 280b400d20cSBarry Smith space->local_remaining -= cnt; 281a2d1c673SSatish Balay PetscFunctionReturn(0); 282a2d1c673SSatish Balay } 28375cae7c1SHong Zhang 2844c1ff481SSatish Balay /* 2858798bf22SSatish Balay MatStashValuesCol_Private - inserts values into the stash. This function 2864c1ff481SSatish Balay expects the values to be columnoriented. Multiple columns belong to the same row 2874c1ff481SSatish Balay can be inserted with a single call to this function. 288a2d1c673SSatish Balay 2894c1ff481SSatish Balay Input Parameters: 2904c1ff481SSatish Balay stash - the stash 2914c1ff481SSatish Balay row - the global row correspoiding to the values 2924c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2934c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2944c1ff481SSatish Balay values - the values inserted 2954c1ff481SSatish Balay stepval - the consecutive values are sepated by a distance of stepval. 2964c1ff481SSatish Balay this happens because the input is columnoriented. 2974c1ff481SSatish Balay */ 298ace3abfcSBarry Smith PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries) 299a2d1c673SSatish Balay { 30050e9ab7cSBarry Smith PetscInt i,k,cnt = 0; 30175cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 302a2d1c673SSatish Balay 3034c1ff481SSatish Balay PetscFunctionBegin; 3044c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 30575cae7c1SHong Zhang if (!space || space->local_remaining < n) { 3069566063dSJacob Faibussowitsch PetscCall(MatStashExpand_Private(stash,n)); 3074c1ff481SSatish Balay } 30875cae7c1SHong Zhang space = stash->space; 30975cae7c1SHong Zhang k = space->local_used; 3104c1ff481SSatish Balay for (i=0; i<n; i++) { 31114069ce9SStefano Zampini if (ignorezeroentries && values && values[i*stepval] == 0.0) continue; 31275cae7c1SHong Zhang space->idx[k] = row; 31375cae7c1SHong Zhang space->idy[k] = idxn[i]; 31414069ce9SStefano Zampini space->val[k] = values ? values[i*stepval] : 0.0; 31575cae7c1SHong Zhang k++; 316b400d20cSBarry Smith cnt++; 3174c1ff481SSatish Balay } 318b400d20cSBarry Smith stash->n += cnt; 319b400d20cSBarry Smith space->local_used += cnt; 320b400d20cSBarry Smith space->local_remaining -= cnt; 3214c1ff481SSatish Balay PetscFunctionReturn(0); 3224c1ff481SSatish Balay } 3234c1ff481SSatish Balay 3244c1ff481SSatish Balay /* 3258798bf22SSatish Balay MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 3264c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 3274c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3284c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3294c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3304c1ff481SSatish Balay 3314c1ff481SSatish Balay Input Parameters: 3324c1ff481SSatish Balay stash - the stash 3334c1ff481SSatish Balay row - the global block-row correspoiding to the values 3344c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3354c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3364c1ff481SSatish Balay values. Each block is of size bs*bs. 3374c1ff481SSatish Balay values - the values inserted 3384c1ff481SSatish Balay rmax - the number of block-rows in the original block. 339a5b23f4aSJose E. Roman cmax - the number of block-columns on the original block. 3404c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3414c1ff481SSatish Balay */ 34254f21887SBarry Smith PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 3434c1ff481SSatish Balay { 34475cae7c1SHong Zhang PetscInt i,j,k,bs2,bs=stash->bs,l; 34554f21887SBarry Smith const PetscScalar *vals; 34654f21887SBarry Smith PetscScalar *array; 34775cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 348a2d1c673SSatish Balay 349a2d1c673SSatish Balay PetscFunctionBegin; 35075cae7c1SHong Zhang if (!space || space->local_remaining < n) { 3519566063dSJacob Faibussowitsch PetscCall(MatStashExpand_Private(stash,n)); 352a2d1c673SSatish Balay } 35375cae7c1SHong Zhang space = stash->space; 35475cae7c1SHong Zhang l = space->local_used; 35575cae7c1SHong Zhang bs2 = bs*bs; 3564c1ff481SSatish Balay for (i=0; i<n; i++) { 35775cae7c1SHong Zhang space->idx[l] = row; 35875cae7c1SHong Zhang space->idy[l] = idxn[i]; 35975cae7c1SHong Zhang /* Now copy over the block of values. Store the values column oriented. 36075cae7c1SHong Zhang This enables inserting multiple blocks belonging to a row with a single 36175cae7c1SHong Zhang funtion call */ 36275cae7c1SHong Zhang array = space->val + bs2*l; 36375cae7c1SHong Zhang vals = values + idx*bs2*n + bs*i; 36475cae7c1SHong Zhang for (j=0; j<bs; j++) { 36514069ce9SStefano Zampini for (k=0; k<bs; k++) array[k*bs] = values ? vals[k] : 0.0; 36675cae7c1SHong Zhang array++; 36775cae7c1SHong Zhang vals += cmax*bs; 36875cae7c1SHong Zhang } 36975cae7c1SHong Zhang l++; 370a2d1c673SSatish Balay } 3715bd3b8fbSHong Zhang stash->n += n; 37275cae7c1SHong Zhang space->local_used += n; 37375cae7c1SHong Zhang space->local_remaining -= n; 3744c1ff481SSatish Balay PetscFunctionReturn(0); 3754c1ff481SSatish Balay } 3764c1ff481SSatish Balay 3774c1ff481SSatish Balay /* 3788798bf22SSatish Balay MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 3794c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 3804c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3814c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3824c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3834c1ff481SSatish Balay 3844c1ff481SSatish Balay Input Parameters: 3854c1ff481SSatish Balay stash - the stash 3864c1ff481SSatish Balay row - the global block-row correspoiding to the values 3874c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3884c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3894c1ff481SSatish Balay values. Each block is of size bs*bs. 3904c1ff481SSatish Balay values - the values inserted 3914c1ff481SSatish Balay rmax - the number of block-rows in the original block. 392a5b23f4aSJose E. Roman cmax - the number of block-columns on the original block. 3934c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3944c1ff481SSatish Balay */ 39554f21887SBarry Smith PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 3964c1ff481SSatish Balay { 39775cae7c1SHong Zhang PetscInt i,j,k,bs2,bs=stash->bs,l; 39854f21887SBarry Smith const PetscScalar *vals; 39954f21887SBarry Smith PetscScalar *array; 40075cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 4014c1ff481SSatish Balay 4024c1ff481SSatish Balay PetscFunctionBegin; 40375cae7c1SHong Zhang if (!space || space->local_remaining < n) { 4049566063dSJacob Faibussowitsch PetscCall(MatStashExpand_Private(stash,n)); 4054c1ff481SSatish Balay } 40675cae7c1SHong Zhang space = stash->space; 40775cae7c1SHong Zhang l = space->local_used; 40875cae7c1SHong Zhang bs2 = bs*bs; 4094c1ff481SSatish Balay for (i=0; i<n; i++) { 41075cae7c1SHong Zhang space->idx[l] = row; 41175cae7c1SHong Zhang space->idy[l] = idxn[i]; 41275cae7c1SHong Zhang /* Now copy over the block of values. Store the values column oriented. 41375cae7c1SHong Zhang This enables inserting multiple blocks belonging to a row with a single 41475cae7c1SHong Zhang funtion call */ 41575cae7c1SHong Zhang array = space->val + bs2*l; 41675cae7c1SHong Zhang vals = values + idx*bs2*n + bs*i; 41775cae7c1SHong Zhang for (j=0; j<bs; j++) { 41814069ce9SStefano Zampini for (k=0; k<bs; k++) array[k] = values ? vals[k] : 0.0; 41975cae7c1SHong Zhang array += bs; 42075cae7c1SHong Zhang vals += rmax*bs; 42175cae7c1SHong Zhang } 4225bd3b8fbSHong Zhang l++; 423a2d1c673SSatish Balay } 4245bd3b8fbSHong Zhang stash->n += n; 42575cae7c1SHong Zhang space->local_used += n; 42675cae7c1SHong Zhang space->local_remaining -= n; 4273a40ed3dSBarry Smith PetscFunctionReturn(0); 4289417f4adSLois Curfman McInnes } 4294c1ff481SSatish Balay /* 4308798bf22SSatish Balay MatStashScatterBegin_Private - Initiates the transfer of values to the 4314c1ff481SSatish Balay correct owners. This function goes through the stash, and check the 4324c1ff481SSatish Balay owners of each stashed value, and sends the values off to the owner 4334c1ff481SSatish Balay processors. 434bc5ccf88SSatish Balay 4354c1ff481SSatish Balay Input Parameters: 4364c1ff481SSatish Balay stash - the stash 4374c1ff481SSatish Balay owners - an array of size 'no-of-procs' which gives the ownership range 4384c1ff481SSatish Balay for each node. 4394c1ff481SSatish Balay 44095452b02SPatrick Sanan Notes: 44195452b02SPatrick Sanan The 'owners' array in the cased of the blocked-stash has the 4424c1ff481SSatish Balay ranges specified blocked global indices, and for the regular stash in 4434c1ff481SSatish Balay the proper global indices. 4444c1ff481SSatish Balay */ 4451e2582c4SBarry Smith PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners) 446bc5ccf88SSatish Balay { 447ac2b2aa0SJed Brown PetscFunctionBegin; 4489566063dSJacob Faibussowitsch PetscCall((*stash->ScatterBegin)(mat,stash,owners)); 449ac2b2aa0SJed Brown PetscFunctionReturn(0); 450ac2b2aa0SJed Brown } 451ac2b2aa0SJed Brown 452ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterBegin_Ref(Mat mat,MatStash *stash,PetscInt *owners) 453ac2b2aa0SJed Brown { 454c1ac3661SBarry Smith PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 455fe09c992SBarry Smith PetscInt size=stash->size,nsends; 45675cae7c1SHong Zhang PetscInt count,*sindices,**rindices,i,j,idx,lastidx,l; 45754f21887SBarry Smith PetscScalar **rvalues,*svalues; 458bc5ccf88SSatish Balay MPI_Comm comm = stash->comm; 459563fb871SSatish Balay MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 46076ec1555SBarry Smith PetscMPIInt *sizes,*nlengths,nreceives; 4615bd3b8fbSHong Zhang PetscInt *sp_idx,*sp_idy; 46254f21887SBarry Smith PetscScalar *sp_val; 4635bd3b8fbSHong Zhang PetscMatStashSpace space,space_next; 464bc5ccf88SSatish Balay 465bc5ccf88SSatish Balay PetscFunctionBegin; 4664b4eb8d3SJed Brown { /* make sure all processors are either in INSERTMODE or ADDMODE */ 4674b4eb8d3SJed Brown InsertMode addv; 4681c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat))); 46908401ef6SPierre Jolivet PetscCheck(addv != (ADD_VALUES|INSERT_VALUES),PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 4704b4eb8d3SJed Brown mat->insertmode = addv; /* in case this processor had no cache */ 4714b4eb8d3SJed Brown } 4724b4eb8d3SJed Brown 4734c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 47475cae7c1SHong Zhang 475bc5ccf88SSatish Balay /* first count number of contributors to each processor */ 4769566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(size,&nlengths)); 4779566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(stash->n+1,&owner)); 478a2d1c673SSatish Balay 47975cae7c1SHong Zhang i = j = 0; 4807357eb19SBarry Smith lastidx = -1; 4815bd3b8fbSHong Zhang space = stash->space_head; 4826c4ed002SBarry Smith while (space) { 48375cae7c1SHong Zhang space_next = space->next; 4845bd3b8fbSHong Zhang sp_idx = space->idx; 48575cae7c1SHong Zhang for (l=0; l<space->local_used; l++) { 4867357eb19SBarry Smith /* if indices are NOT locally sorted, need to start search at the beginning */ 4875bd3b8fbSHong Zhang if (lastidx > (idx = sp_idx[l])) j = 0; 4887357eb19SBarry Smith lastidx = idx; 4897357eb19SBarry Smith for (; j<size; j++) { 4904c1ff481SSatish Balay if (idx >= owners[j] && idx < owners[j+1]) { 491563fb871SSatish Balay nlengths[j]++; owner[i] = j; break; 492bc5ccf88SSatish Balay } 493bc5ccf88SSatish Balay } 49475cae7c1SHong Zhang i++; 49575cae7c1SHong Zhang } 49675cae7c1SHong Zhang space = space_next; 497bc5ccf88SSatish Balay } 498071fcb05SBarry Smith 499563fb871SSatish Balay /* Now check what procs get messages - and compute nsends. */ 5009566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(size,&sizes)); 501563fb871SSatish Balay for (i=0, nsends=0; i<size; i++) { 5028865f1eaSKarl Rupp if (nlengths[i]) { 50376ec1555SBarry Smith sizes[i] = 1; nsends++; 5048865f1eaSKarl Rupp } 505563fb871SSatish Balay } 506bc5ccf88SSatish Balay 50754f21887SBarry Smith {PetscMPIInt *onodes,*olengths; 508563fb871SSatish Balay /* Determine the number of messages to expect, their lengths, from from-ids */ 5099566063dSJacob Faibussowitsch PetscCall(PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives)); 5109566063dSJacob Faibussowitsch PetscCall(PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths)); 511563fb871SSatish Balay /* since clubbing row,col - lengths are multiplied by 2 */ 512563fb871SSatish Balay for (i=0; i<nreceives; i++) olengths[i] *=2; 5139566063dSJacob Faibussowitsch PetscCall(PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1)); 514563fb871SSatish Balay /* values are size 'bs2' lengths (and remove earlier factor 2 */ 515563fb871SSatish Balay for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 5169566063dSJacob Faibussowitsch PetscCall(PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2)); 5179566063dSJacob Faibussowitsch PetscCall(PetscFree(onodes)); 5189566063dSJacob Faibussowitsch PetscCall(PetscFree(olengths));} 519bc5ccf88SSatish Balay 520bc5ccf88SSatish Balay /* do sends: 521bc5ccf88SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 522bc5ccf88SSatish Balay the ith processor 523bc5ccf88SSatish Balay */ 5249566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices)); 5259566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2*nsends,&send_waits)); 5269566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(size,&startv,size,&starti)); 527a2d1c673SSatish Balay /* use 2 sends the first with all_a, the next with all_i and all_j */ 528bc5ccf88SSatish Balay startv[0] = 0; starti[0] = 0; 529bc5ccf88SSatish Balay for (i=1; i<size; i++) { 530563fb871SSatish Balay startv[i] = startv[i-1] + nlengths[i-1]; 531533163c2SBarry Smith starti[i] = starti[i-1] + 2*nlengths[i-1]; 532bc5ccf88SSatish Balay } 53375cae7c1SHong Zhang 53475cae7c1SHong Zhang i = 0; 5355bd3b8fbSHong Zhang space = stash->space_head; 5366c4ed002SBarry Smith while (space) { 53775cae7c1SHong Zhang space_next = space->next; 5385bd3b8fbSHong Zhang sp_idx = space->idx; 5395bd3b8fbSHong Zhang sp_idy = space->idy; 5405bd3b8fbSHong Zhang sp_val = space->val; 54175cae7c1SHong Zhang for (l=0; l<space->local_used; l++) { 542bc5ccf88SSatish Balay j = owner[i]; 543a2d1c673SSatish Balay if (bs2 == 1) { 5445bd3b8fbSHong Zhang svalues[startv[j]] = sp_val[l]; 545a2d1c673SSatish Balay } else { 546c1ac3661SBarry Smith PetscInt k; 54754f21887SBarry Smith PetscScalar *buf1,*buf2; 5484c1ff481SSatish Balay buf1 = svalues+bs2*startv[j]; 549b087b6d6SSatish Balay buf2 = space->val + bs2*l; 5508865f1eaSKarl Rupp for (k=0; k<bs2; k++) buf1[k] = buf2[k]; 551a2d1c673SSatish Balay } 5525bd3b8fbSHong Zhang sindices[starti[j]] = sp_idx[l]; 5535bd3b8fbSHong Zhang sindices[starti[j]+nlengths[j]] = sp_idy[l]; 554bc5ccf88SSatish Balay startv[j]++; 555bc5ccf88SSatish Balay starti[j]++; 55675cae7c1SHong Zhang i++; 55775cae7c1SHong Zhang } 55875cae7c1SHong Zhang space = space_next; 559bc5ccf88SSatish Balay } 560bc5ccf88SSatish Balay startv[0] = 0; 5618865f1eaSKarl Rupp for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1]; 562e5d0e772SSatish Balay 563bc5ccf88SSatish Balay for (i=0,count=0; i<size; i++) { 56476ec1555SBarry Smith if (sizes[i]) { 5659566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++)); 5669566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++)); 567bc5ccf88SSatish Balay } 568b85c94c3SSatish Balay } 5696cf91177SBarry Smith #if defined(PETSC_USE_INFO) 5709566063dSJacob Faibussowitsch PetscCall(PetscInfo(NULL,"No of messages: %" PetscInt_FMT " \n",nsends)); 571e5d0e772SSatish Balay for (i=0; i<size; i++) { 57276ec1555SBarry Smith if (sizes[i]) { 5739566063dSJacob Faibussowitsch PetscCall(PetscInfo(NULL,"Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n",i,(size_t)(nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt))))); 574e5d0e772SSatish Balay } 575e5d0e772SSatish Balay } 576e5d0e772SSatish Balay #endif 5779566063dSJacob Faibussowitsch PetscCall(PetscFree(nlengths)); 5789566063dSJacob Faibussowitsch PetscCall(PetscFree(owner)); 5799566063dSJacob Faibussowitsch PetscCall(PetscFree2(startv,starti)); 5809566063dSJacob Faibussowitsch PetscCall(PetscFree(sizes)); 581a2d1c673SSatish Balay 582563fb871SSatish Balay /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 5839566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2*nreceives,&recv_waits)); 584563fb871SSatish Balay 585563fb871SSatish Balay for (i=0; i<nreceives; i++) { 586563fb871SSatish Balay recv_waits[2*i] = recv_waits1[i]; 587563fb871SSatish Balay recv_waits[2*i+1] = recv_waits2[i]; 588563fb871SSatish Balay } 589563fb871SSatish Balay stash->recv_waits = recv_waits; 5908865f1eaSKarl Rupp 5919566063dSJacob Faibussowitsch PetscCall(PetscFree(recv_waits1)); 5929566063dSJacob Faibussowitsch PetscCall(PetscFree(recv_waits2)); 593563fb871SSatish Balay 594c05d87d6SBarry Smith stash->svalues = svalues; 595c05d87d6SBarry Smith stash->sindices = sindices; 596c05d87d6SBarry Smith stash->rvalues = rvalues; 597c05d87d6SBarry Smith stash->rindices = rindices; 598c05d87d6SBarry Smith stash->send_waits = send_waits; 599c05d87d6SBarry Smith stash->nsends = nsends; 600c05d87d6SBarry Smith stash->nrecvs = nreceives; 60167318a8aSJed Brown stash->reproduce_count = 0; 602bc5ccf88SSatish Balay PetscFunctionReturn(0); 603bc5ccf88SSatish Balay } 604bc5ccf88SSatish Balay 605a2d1c673SSatish Balay /* 6068798bf22SSatish Balay MatStashScatterGetMesg_Private - This function waits on the receives posted 6078798bf22SSatish Balay in the function MatStashScatterBegin_Private() and returns one message at 6084c1ff481SSatish Balay a time to the calling function. If no messages are left, it indicates this 6094c1ff481SSatish Balay by setting flg = 0, else it sets flg = 1. 6104c1ff481SSatish Balay 6114c1ff481SSatish Balay Input Parameters: 6124c1ff481SSatish Balay stash - the stash 6134c1ff481SSatish Balay 6144c1ff481SSatish Balay Output Parameters: 6154c1ff481SSatish Balay nvals - the number of entries in the current message. 6164c1ff481SSatish Balay rows - an array of row indices (or blocked indices) corresponding to the values 6174c1ff481SSatish Balay cols - an array of columnindices (or blocked indices) corresponding to the values 6184c1ff481SSatish Balay vals - the values 6194c1ff481SSatish Balay flg - 0 indicates no more message left, and the current call has no values associated. 6204c1ff481SSatish Balay 1 indicates that the current call successfully received a message, and the 6214c1ff481SSatish Balay other output parameters nvals,rows,cols,vals are set appropriately. 622a2d1c673SSatish Balay */ 62354f21887SBarry Smith PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg) 624bc5ccf88SSatish Balay { 625ac2b2aa0SJed Brown PetscFunctionBegin; 6269566063dSJacob Faibussowitsch PetscCall((*stash->ScatterGetMesg)(stash,nvals,rows,cols,vals,flg)); 627ac2b2aa0SJed Brown PetscFunctionReturn(0); 628ac2b2aa0SJed Brown } 629ac2b2aa0SJed Brown 630d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg) 631ac2b2aa0SJed Brown { 632533163c2SBarry Smith PetscMPIInt i,*flg_v = stash->flg_v,i1,i2; 633fe09c992SBarry Smith PetscInt bs2; 634a2d1c673SSatish Balay MPI_Status recv_status; 635ace3abfcSBarry Smith PetscBool match_found = PETSC_FALSE; 636bc5ccf88SSatish Balay 637bc5ccf88SSatish Balay PetscFunctionBegin; 638a2d1c673SSatish Balay *flg = 0; /* When a message is discovered this is reset to 1 */ 639a2d1c673SSatish Balay /* Return if no more messages to process */ 6408865f1eaSKarl Rupp if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(0); 641a2d1c673SSatish Balay 6424c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 64367318a8aSJed Brown /* If a matching pair of receives are found, process them, and return the data to 644a2d1c673SSatish Balay the calling function. Until then keep receiving messages */ 645a2d1c673SSatish Balay while (!match_found) { 64667318a8aSJed Brown if (stash->reproduce) { 64767318a8aSJed Brown i = stash->reproduce_count++; 6489566063dSJacob Faibussowitsch PetscCallMPI(MPI_Wait(stash->recv_waits+i,&recv_status)); 64967318a8aSJed Brown } else { 6509566063dSJacob Faibussowitsch PetscCallMPI(MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status)); 65167318a8aSJed Brown } 65208401ef6SPierre Jolivet PetscCheck(recv_status.MPI_SOURCE >= 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!"); 653533163c2SBarry Smith 65467318a8aSJed Brown /* Now pack the received message into a structure which is usable by others */ 655a2d1c673SSatish Balay if (i % 2) { 6569566063dSJacob Faibussowitsch PetscCallMPI(MPI_Get_count(&recv_status,MPIU_SCALAR,nvals)); 657c1dc657dSBarry Smith flg_v[2*recv_status.MPI_SOURCE] = i/2; 658a2d1c673SSatish Balay *nvals = *nvals/bs2; 659563fb871SSatish Balay } else { 6609566063dSJacob Faibussowitsch PetscCallMPI(MPI_Get_count(&recv_status,MPIU_INT,nvals)); 661563fb871SSatish Balay flg_v[2*recv_status.MPI_SOURCE+1] = i/2; 662563fb871SSatish Balay *nvals = *nvals/2; /* This message has both row indices and col indices */ 663bc5ccf88SSatish Balay } 664a2d1c673SSatish Balay 665cb2b73ccSBarry Smith /* Check if we have both messages from this proc */ 666c1dc657dSBarry Smith i1 = flg_v[2*recv_status.MPI_SOURCE]; 667c1dc657dSBarry Smith i2 = flg_v[2*recv_status.MPI_SOURCE+1]; 668a2d1c673SSatish Balay if (i1 != -1 && i2 != -1) { 669563fb871SSatish Balay *rows = stash->rindices[i2]; 670a2d1c673SSatish Balay *cols = *rows + *nvals; 671563fb871SSatish Balay *vals = stash->rvalues[i1]; 672a2d1c673SSatish Balay *flg = 1; 673a2d1c673SSatish Balay stash->nprocessed++; 67435d8aa7fSBarry Smith match_found = PETSC_TRUE; 675bc5ccf88SSatish Balay } 676bc5ccf88SSatish Balay } 677bc5ccf88SSatish Balay PetscFunctionReturn(0); 678bc5ccf88SSatish Balay } 679d7d60843SJed Brown 680e69ee1f7SSatish Balay #if !defined(PETSC_HAVE_MPIUNI) 681d7d60843SJed Brown typedef struct { 682d7d60843SJed Brown PetscInt row; 683d7d60843SJed Brown PetscInt col; 684d7d60843SJed Brown PetscScalar vals[1]; /* Actually an array of length bs2 */ 685d7d60843SJed Brown } MatStashBlock; 686d7d60843SJed Brown 687d7d60843SJed Brown static PetscErrorCode MatStashSortCompress_Private(MatStash *stash,InsertMode insertmode) 688d7d60843SJed Brown { 689d7d60843SJed Brown PetscMatStashSpace space; 690d7d60843SJed Brown PetscInt n = stash->n,bs = stash->bs,bs2 = bs*bs,cnt,*row,*col,*perm,rowstart,i; 691d7d60843SJed Brown PetscScalar **valptr; 692d7d60843SJed Brown 693d7d60843SJed Brown PetscFunctionBegin; 6949566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(n,&row,n,&col,n,&valptr,n,&perm)); 695d7d60843SJed Brown for (space=stash->space_head,cnt=0; space; space=space->next) { 696d7d60843SJed Brown for (i=0; i<space->local_used; i++) { 697d7d60843SJed Brown row[cnt] = space->idx[i]; 698d7d60843SJed Brown col[cnt] = space->idy[i]; 699d7d60843SJed Brown valptr[cnt] = &space->val[i*bs2]; 700d7d60843SJed Brown perm[cnt] = cnt; /* Will tell us where to find valptr after sorting row[] and col[] */ 701d7d60843SJed Brown cnt++; 702d7d60843SJed Brown } 703d7d60843SJed Brown } 70408401ef6SPierre Jolivet PetscCheck(cnt == n,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatStash n %" PetscInt_FMT ", but counted %" PetscInt_FMT " entries",n,cnt); 7059566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithArrayPair(n,row,col,perm)); 706d7d60843SJed Brown /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */ 707d7d60843SJed Brown for (rowstart=0,cnt=0,i=1; i<=n; i++) { 708d7d60843SJed Brown if (i == n || row[i] != row[rowstart]) { /* Sort the last row. */ 709d7d60843SJed Brown PetscInt colstart; 7109566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithArray(i-rowstart,&col[rowstart],&perm[rowstart])); 711d7d60843SJed Brown for (colstart=rowstart; colstart<i;) { /* Compress multiple insertions to the same location */ 712d7d60843SJed Brown PetscInt j,l; 713d7d60843SJed Brown MatStashBlock *block; 7149566063dSJacob Faibussowitsch PetscCall(PetscSegBufferGet(stash->segsendblocks,1,&block)); 715d7d60843SJed Brown block->row = row[rowstart]; 716d7d60843SJed Brown block->col = col[colstart]; 7179566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(block->vals,valptr[perm[colstart]],bs2)); 718d7d60843SJed Brown for (j=colstart+1; j<i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */ 719d7d60843SJed Brown if (insertmode == ADD_VALUES) { 720d7d60843SJed Brown for (l=0; l<bs2; l++) block->vals[l] += valptr[perm[j]][l]; 721d7d60843SJed Brown } else { 7229566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(block->vals,valptr[perm[j]],bs2)); 723d7d60843SJed Brown } 724d7d60843SJed Brown } 725d7d60843SJed Brown colstart = j; 726d7d60843SJed Brown } 727d7d60843SJed Brown rowstart = i; 728d7d60843SJed Brown } 729d7d60843SJed Brown } 7309566063dSJacob Faibussowitsch PetscCall(PetscFree4(row,col,valptr,perm)); 731d7d60843SJed Brown PetscFunctionReturn(0); 732d7d60843SJed Brown } 733d7d60843SJed Brown 734d7d60843SJed Brown static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash) 735d7d60843SJed Brown { 736d7d60843SJed Brown PetscFunctionBegin; 737d7d60843SJed Brown if (stash->blocktype == MPI_DATATYPE_NULL) { 738d7d60843SJed Brown PetscInt bs2 = PetscSqr(stash->bs); 739d7d60843SJed Brown PetscMPIInt blocklens[2]; 740d7d60843SJed Brown MPI_Aint displs[2]; 741d7d60843SJed Brown MPI_Datatype types[2],stype; 742223b4c07SBarry Smith /* 743223b4c07SBarry Smith DummyBlock is a type having standard layout, even when PetscScalar is C++ std::complex. 744f41aa5efSJunchao Zhang std::complex itself has standard layout, so does DummyBlock, recursively. 745a5b23f4aSJose E. Roman To be compatible with C++ std::complex, complex implementations on GPUs must also have standard layout, 746f41aa5efSJunchao Zhang though they can have different alignment, e.g, 16 bytes for double complex, instead of 8 bytes as in GCC stdlibc++. 747f41aa5efSJunchao Zhang offsetof(type, member) only requires type to have standard layout. Ref. https://en.cppreference.com/w/cpp/types/offsetof. 748f41aa5efSJunchao Zhang 749f41aa5efSJunchao Zhang We can test if std::complex has standard layout with the following code: 750f41aa5efSJunchao Zhang #include <iostream> 751f41aa5efSJunchao Zhang #include <type_traits> 752f41aa5efSJunchao Zhang #include <complex> 753f41aa5efSJunchao Zhang int main() { 754f41aa5efSJunchao Zhang std::cout << std::boolalpha; 755f41aa5efSJunchao Zhang std::cout << std::is_standard_layout<std::complex<double> >::value << '\n'; 756f41aa5efSJunchao Zhang } 757f41aa5efSJunchao Zhang Output: true 7589503c6c6SJed Brown */ 759f41aa5efSJunchao Zhang struct DummyBlock {PetscInt row,col; PetscScalar vals;}; 760d7d60843SJed Brown 7619503c6c6SJed Brown stash->blocktype_size = offsetof(struct DummyBlock,vals) + bs2*sizeof(PetscScalar); 762d7d60843SJed Brown if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */ 763d7d60843SJed Brown stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt); 764d7d60843SJed Brown } 7659566063dSJacob Faibussowitsch PetscCall(PetscSegBufferCreate(stash->blocktype_size,1,&stash->segsendblocks)); 7669566063dSJacob Faibussowitsch PetscCall(PetscSegBufferCreate(stash->blocktype_size,1,&stash->segrecvblocks)); 7679566063dSJacob Faibussowitsch PetscCall(PetscSegBufferCreate(sizeof(MatStashFrame),1,&stash->segrecvframe)); 768d7d60843SJed Brown blocklens[0] = 2; 769d7d60843SJed Brown blocklens[1] = bs2; 7709503c6c6SJed Brown displs[0] = offsetof(struct DummyBlock,row); 7719503c6c6SJed Brown displs[1] = offsetof(struct DummyBlock,vals); 772d7d60843SJed Brown types[0] = MPIU_INT; 773d7d60843SJed Brown types[1] = MPIU_SCALAR; 7749566063dSJacob Faibussowitsch PetscCallMPI(MPI_Type_create_struct(2,blocklens,displs,types,&stype)); 7759566063dSJacob Faibussowitsch PetscCallMPI(MPI_Type_commit(&stype)); 7769566063dSJacob Faibussowitsch PetscCallMPI(MPI_Type_create_resized(stype,0,stash->blocktype_size,&stash->blocktype)); 7779566063dSJacob Faibussowitsch PetscCallMPI(MPI_Type_commit(&stash->blocktype)); 7789566063dSJacob Faibussowitsch PetscCallMPI(MPI_Type_free(&stype)); 779d7d60843SJed Brown } 780d7d60843SJed Brown PetscFunctionReturn(0); 781d7d60843SJed Brown } 782d7d60843SJed Brown 783d7d60843SJed Brown /* Callback invoked after target rank has initiatied receive of rendezvous message. 784d7d60843SJed Brown * Here we post the main sends. 785d7d60843SJed Brown */ 786d7d60843SJed Brown static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rankid,PetscMPIInt rank,void *sdata,MPI_Request req[],void *ctx) 787d7d60843SJed Brown { 788d7d60843SJed Brown MatStash *stash = (MatStash*)ctx; 789d7d60843SJed Brown MatStashHeader *hdr = (MatStashHeader*)sdata; 790d7d60843SJed Brown 791d7d60843SJed Brown PetscFunctionBegin; 79208401ef6SPierre Jolivet PetscCheck(rank == stash->sendranks[rankid],comm,PETSC_ERR_PLIB,"BTS Send rank %d does not match sendranks[%d] %d",rank,rankid,stash->sendranks[rankid]); 7939566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(stash->sendframes[rankid].buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0])); 794d7d60843SJed Brown stash->sendframes[rankid].count = hdr->count; 795d7d60843SJed Brown stash->sendframes[rankid].pending = 1; 796d7d60843SJed Brown PetscFunctionReturn(0); 797d7d60843SJed Brown } 798d7d60843SJed Brown 799223b4c07SBarry Smith /* 800223b4c07SBarry Smith Callback invoked by target after receiving rendezvous message. 801223b4c07SBarry Smith Here we post the main recvs. 802d7d60843SJed Brown */ 803d7d60843SJed Brown static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rank,void *rdata,MPI_Request req[],void *ctx) 804d7d60843SJed Brown { 805d7d60843SJed Brown MatStash *stash = (MatStash*)ctx; 806d7d60843SJed Brown MatStashHeader *hdr = (MatStashHeader*)rdata; 807d7d60843SJed Brown MatStashFrame *frame; 808d7d60843SJed Brown 809d7d60843SJed Brown PetscFunctionBegin; 8109566063dSJacob Faibussowitsch PetscCall(PetscSegBufferGet(stash->segrecvframe,1,&frame)); 8119566063dSJacob Faibussowitsch PetscCall(PetscSegBufferGet(stash->segrecvblocks,hdr->count,&frame->buffer)); 8129566063dSJacob Faibussowitsch PetscCallMPI(MPI_Irecv(frame->buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0])); 813d7d60843SJed Brown frame->count = hdr->count; 814d7d60843SJed Brown frame->pending = 1; 815d7d60843SJed Brown PetscFunctionReturn(0); 816d7d60843SJed Brown } 817d7d60843SJed Brown 818d7d60843SJed Brown /* 819d7d60843SJed Brown * owners[] contains the ownership ranges; may be indexed by either blocks or scalars 820d7d60843SJed Brown */ 821d7d60843SJed Brown static PetscErrorCode MatStashScatterBegin_BTS(Mat mat,MatStash *stash,PetscInt owners[]) 822d7d60843SJed Brown { 823d7d60843SJed Brown size_t nblocks; 824d7d60843SJed Brown char *sendblocks; 825d7d60843SJed Brown 826d7d60843SJed Brown PetscFunctionBegin; 82776bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { /* make sure all processors are either in INSERTMODE or ADDMODE */ 8284b4eb8d3SJed Brown InsertMode addv; 8291c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat))); 83008401ef6SPierre Jolivet PetscCheck(addv != (ADD_VALUES|INSERT_VALUES),PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 8314b4eb8d3SJed Brown } 8324b4eb8d3SJed Brown 8339566063dSJacob Faibussowitsch PetscCall(MatStashBlockTypeSetUp(stash)); 8349566063dSJacob Faibussowitsch PetscCall(MatStashSortCompress_Private(stash,mat->insertmode)); 8359566063dSJacob Faibussowitsch PetscCall(PetscSegBufferGetSize(stash->segsendblocks,&nblocks)); 8369566063dSJacob Faibussowitsch PetscCall(PetscSegBufferExtractInPlace(stash->segsendblocks,&sendblocks)); 83760f34548SJunchao Zhang if (stash->first_assembly_done) { /* Set up sendhdrs and sendframes for each rank that we sent before */ 83823b7d1baSJed Brown PetscInt i; 83923b7d1baSJed Brown size_t b; 84097da8949SJed Brown for (i=0,b=0; i<stash->nsendranks; i++) { 84197da8949SJed Brown stash->sendframes[i].buffer = &sendblocks[b*stash->blocktype_size]; 84297da8949SJed Brown /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */ 84397da8949SJed Brown stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */ 84497da8949SJed Brown for (; b<nblocks; b++) { 84597da8949SJed Brown MatStashBlock *sendblock_b = (MatStashBlock*)&sendblocks[b*stash->blocktype_size]; 846*aed4548fSBarry Smith PetscCheck(sendblock_b->row >= owners[stash->sendranks[i]],stash->comm,PETSC_ERR_ARG_WRONG,"MAT_SUBSET_OFF_PROC_ENTRIES set, but row %" PetscInt_FMT " owned by %d not communicated in initial assembly",sendblock_b->row,stash->sendranks[i]); 84797da8949SJed Brown if (sendblock_b->row >= owners[stash->sendranks[i]+1]) break; 84897da8949SJed Brown stash->sendhdr[i].count++; 84997da8949SJed Brown } 85097da8949SJed Brown } 85197da8949SJed Brown } else { /* Dynamically count and pack (first time) */ 85223b7d1baSJed Brown PetscInt sendno; 85323b7d1baSJed Brown size_t i,rowstart; 854d7d60843SJed Brown 855d7d60843SJed Brown /* Count number of send ranks and allocate for sends */ 856d7d60843SJed Brown stash->nsendranks = 0; 857d7d60843SJed Brown for (rowstart=0; rowstart<nblocks;) { 8587e2ea869SJed Brown PetscInt owner; 859d7d60843SJed Brown MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size]; 8609566063dSJacob Faibussowitsch PetscCall(PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner)); 861d7d60843SJed Brown if (owner < 0) owner = -(owner+2); 862d7d60843SJed Brown for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */ 863d7d60843SJed Brown MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size]; 8647e2ea869SJed Brown if (sendblock_i->row >= owners[owner+1]) break; 865d7d60843SJed Brown } 866d7d60843SJed Brown stash->nsendranks++; 867d7d60843SJed Brown rowstart = i; 868d7d60843SJed Brown } 8699566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(stash->nsendranks,&stash->sendranks,stash->nsendranks,&stash->sendhdr,stash->nsendranks,&stash->sendframes)); 870d7d60843SJed Brown 871d7d60843SJed Brown /* Set up sendhdrs and sendframes */ 872d7d60843SJed Brown sendno = 0; 873d7d60843SJed Brown for (rowstart=0; rowstart<nblocks;) { 874d7d60843SJed Brown PetscInt owner; 875d7d60843SJed Brown MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size]; 8769566063dSJacob Faibussowitsch PetscCall(PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner)); 877d7d60843SJed Brown if (owner < 0) owner = -(owner+2); 878d7d60843SJed Brown stash->sendranks[sendno] = owner; 879d7d60843SJed Brown for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */ 880d7d60843SJed Brown MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size]; 8817e2ea869SJed Brown if (sendblock_i->row >= owners[owner+1]) break; 882d7d60843SJed Brown } 883d7d60843SJed Brown stash->sendframes[sendno].buffer = sendblock_rowstart; 884d7d60843SJed Brown stash->sendframes[sendno].pending = 0; 885d7d60843SJed Brown stash->sendhdr[sendno].count = i - rowstart; 886d7d60843SJed Brown sendno++; 887d7d60843SJed Brown rowstart = i; 888d7d60843SJed Brown } 88908401ef6SPierre Jolivet PetscCheck(sendno == stash->nsendranks,stash->comm,PETSC_ERR_PLIB,"BTS counted %d sendranks, but %" PetscInt_FMT " sends",stash->nsendranks,sendno); 890d7d60843SJed Brown } 891d7d60843SJed Brown 8924b4eb8d3SJed Brown /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new 8934b4eb8d3SJed Brown * message or a dummy entry of some sort. */ 8944b4eb8d3SJed Brown if (mat->insertmode == INSERT_VALUES) { 89523b7d1baSJed Brown size_t i; 8964b4eb8d3SJed Brown for (i=0; i<nblocks; i++) { 8974b4eb8d3SJed Brown MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size]; 8984b4eb8d3SJed Brown sendblock_i->row = -(sendblock_i->row+1); 8994b4eb8d3SJed Brown } 9004b4eb8d3SJed Brown } 9014b4eb8d3SJed Brown 90260f34548SJunchao Zhang if (stash->first_assembly_done) { 90397da8949SJed Brown PetscMPIInt i,tag; 9049566063dSJacob Faibussowitsch PetscCall(PetscCommGetNewTag(stash->comm,&tag)); 90597da8949SJed Brown for (i=0; i<stash->nrecvranks; i++) { 9069566063dSJacob Faibussowitsch PetscCall(MatStashBTSRecv_Private(stash->comm,&tag,stash->recvranks[i],&stash->recvhdr[i],&stash->recvreqs[i],stash)); 90797da8949SJed Brown } 90897da8949SJed Brown for (i=0; i<stash->nsendranks; i++) { 9099566063dSJacob Faibussowitsch PetscCall(MatStashBTSSend_Private(stash->comm,&tag,i,stash->sendranks[i],&stash->sendhdr[i],&stash->sendreqs[i],stash)); 91097da8949SJed Brown } 91197da8949SJed Brown stash->use_status = PETSC_TRUE; /* Use count from message status. */ 91297da8949SJed Brown } else { 913d0609cedSBarry Smith PetscCall(PetscCommBuildTwoSidedFReq(stash->comm,1,MPIU_INT,stash->nsendranks,stash->sendranks,(PetscInt*)stash->sendhdr, 914e0ddb6e8SJed Brown &stash->nrecvranks,&stash->recvranks,(PetscInt*)&stash->recvhdr,1,&stash->sendreqs,&stash->recvreqs, 915d0609cedSBarry Smith MatStashBTSSend_Private,MatStashBTSRecv_Private,stash)); 9169566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(stash->nrecvranks,&stash->some_indices,stash->nrecvranks,&stash->some_statuses)); 91797da8949SJed Brown stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */ 91897da8949SJed Brown } 919d7d60843SJed Brown 9209566063dSJacob Faibussowitsch PetscCall(PetscSegBufferExtractInPlace(stash->segrecvframe,&stash->recvframes)); 921d7d60843SJed Brown stash->recvframe_active = NULL; 922d7d60843SJed Brown stash->recvframe_i = 0; 923d7d60843SJed Brown stash->some_i = 0; 924d7d60843SJed Brown stash->some_count = 0; 925d7d60843SJed Brown stash->recvcount = 0; 92660f34548SJunchao Zhang stash->first_assembly_done = mat->assembly_subset; /* See the same logic in VecAssemblyBegin_MPI_BTS */ 9274b4eb8d3SJed Brown stash->insertmode = &mat->insertmode; 928d7d60843SJed Brown PetscFunctionReturn(0); 929d7d60843SJed Brown } 930d7d60843SJed Brown 931d7d60843SJed Brown static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash,PetscMPIInt *n,PetscInt **row,PetscInt **col,PetscScalar **val,PetscInt *flg) 932d7d60843SJed Brown { 933d7d60843SJed Brown MatStashBlock *block; 934d7d60843SJed Brown 935d7d60843SJed Brown PetscFunctionBegin; 936d7d60843SJed Brown *flg = 0; 937d7d60843SJed Brown while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) { 938d7d60843SJed Brown if (stash->some_i == stash->some_count) { 939d7d60843SJed Brown if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(0); /* Done */ 9409566063dSJacob Faibussowitsch PetscCallMPI(MPI_Waitsome(stash->nrecvranks,stash->recvreqs,&stash->some_count,stash->some_indices,stash->use_status?stash->some_statuses:MPI_STATUSES_IGNORE)); 941d7d60843SJed Brown stash->some_i = 0; 942d7d60843SJed Brown } 943d7d60843SJed Brown stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]]; 944d7d60843SJed Brown stash->recvframe_count = stash->recvframe_active->count; /* From header; maximum count */ 945d7d60843SJed Brown if (stash->use_status) { /* Count what was actually sent */ 9469566063dSJacob Faibussowitsch PetscCallMPI(MPI_Get_count(&stash->some_statuses[stash->some_i],stash->blocktype,&stash->recvframe_count)); 947d7d60843SJed Brown } 9484b4eb8d3SJed Brown if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */ 9494b4eb8d3SJed Brown block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[0]; 9504b4eb8d3SJed Brown if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES; 951*aed4548fSBarry Smith PetscCheck(*stash->insertmode != INSERT_VALUES || block->row < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling INSERT_VALUES, but rank %d requested ADD_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]); 952*aed4548fSBarry Smith PetscCheck(*stash->insertmode != ADD_VALUES || block->row >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling ADD_VALUES, but rank %d requested INSERT_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]); 9534b4eb8d3SJed Brown } 954d7d60843SJed Brown stash->some_i++; 955d7d60843SJed Brown stash->recvcount++; 956d7d60843SJed Brown stash->recvframe_i = 0; 957d7d60843SJed Brown } 958d7d60843SJed Brown *n = 1; 959d7d60843SJed Brown block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[stash->recvframe_i*stash->blocktype_size]; 9604b4eb8d3SJed Brown if (block->row < 0) block->row = -(block->row + 1); 961d7d60843SJed Brown *row = &block->row; 962d7d60843SJed Brown *col = &block->col; 963d7d60843SJed Brown *val = block->vals; 964d7d60843SJed Brown stash->recvframe_i++; 965d7d60843SJed Brown *flg = 1; 966d7d60843SJed Brown PetscFunctionReturn(0); 967d7d60843SJed Brown } 968d7d60843SJed Brown 969d7d60843SJed Brown static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash) 970d7d60843SJed Brown { 971d7d60843SJed Brown PetscFunctionBegin; 9729566063dSJacob Faibussowitsch PetscCallMPI(MPI_Waitall(stash->nsendranks,stash->sendreqs,MPI_STATUSES_IGNORE)); 97360f34548SJunchao Zhang if (stash->first_assembly_done) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks */ 974d2b3fd65SBarry Smith PetscCall(PetscSegBufferExtractInPlace(stash->segrecvblocks,NULL)); 9753575f486SJed Brown } else { /* No reuse, so collect everything. */ 9769566063dSJacob Faibussowitsch PetscCall(MatStashScatterDestroy_BTS(stash)); 97797da8949SJed Brown } 978d7d60843SJed Brown 979d7d60843SJed Brown /* Now update nmaxold to be app 10% more than max n used, this way the 980d7d60843SJed Brown wastage of space is reduced the next time this stash is used. 981d7d60843SJed Brown Also update the oldmax, only if it increases */ 982d7d60843SJed Brown if (stash->n) { 983d7d60843SJed Brown PetscInt bs2 = stash->bs*stash->bs; 984d7d60843SJed Brown PetscInt oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 985d7d60843SJed Brown if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 986d7d60843SJed Brown } 987d7d60843SJed Brown 988d7d60843SJed Brown stash->nmax = 0; 989d7d60843SJed Brown stash->n = 0; 990d7d60843SJed Brown stash->reallocs = -1; 991d7d60843SJed Brown stash->nprocessed = 0; 992d7d60843SJed Brown 9939566063dSJacob Faibussowitsch PetscCall(PetscMatStashSpaceDestroy(&stash->space_head)); 994d7d60843SJed Brown 995f4259b30SLisandro Dalcin stash->space = NULL; 996d7d60843SJed Brown 997d7d60843SJed Brown PetscFunctionReturn(0); 998d7d60843SJed Brown } 999d7d60843SJed Brown 100060f34548SJunchao Zhang PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash) 1001d7d60843SJed Brown { 1002d7d60843SJed Brown PetscFunctionBegin; 10039566063dSJacob Faibussowitsch PetscCall(PetscSegBufferDestroy(&stash->segsendblocks)); 10049566063dSJacob Faibussowitsch PetscCall(PetscSegBufferDestroy(&stash->segrecvframe)); 1005d7d60843SJed Brown stash->recvframes = NULL; 10069566063dSJacob Faibussowitsch PetscCall(PetscSegBufferDestroy(&stash->segrecvblocks)); 1007d7d60843SJed Brown if (stash->blocktype != MPI_DATATYPE_NULL) { 10089566063dSJacob Faibussowitsch PetscCallMPI(MPI_Type_free(&stash->blocktype)); 1009d7d60843SJed Brown } 1010d7d60843SJed Brown stash->nsendranks = 0; 1011d7d60843SJed Brown stash->nrecvranks = 0; 10129566063dSJacob Faibussowitsch PetscCall(PetscFree3(stash->sendranks,stash->sendhdr,stash->sendframes)); 10139566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->sendreqs)); 10149566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->recvreqs)); 10159566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->recvranks)); 10169566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->recvhdr)); 10179566063dSJacob Faibussowitsch PetscCall(PetscFree2(stash->some_indices,stash->some_statuses)); 1018d7d60843SJed Brown PetscFunctionReturn(0); 1019d7d60843SJed Brown } 10201667be42SBarry Smith #endif 1021