12d5177cdSBarry Smith 2af0996ceSBarry Smith #include <petsc/private/matimpl.h> 35bd3b8fbSHong Zhang 4bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE 10000 54c1ff481SSatish Balay 6ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterBegin_Ref(Mat, MatStash *, PetscInt *); 7d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *); 8d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *); 975d39b6aSBarry Smith #if !defined(PETSC_HAVE_MPIUNI) 10d7d60843SJed Brown static PetscErrorCode MatStashScatterBegin_BTS(Mat, MatStash *, PetscInt *); 11d7d60843SJed Brown static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *); 12d7d60843SJed Brown static PetscErrorCode MatStashScatterEnd_BTS(MatStash *); 1375d39b6aSBarry Smith #endif 14d7d60843SJed Brown 159417f4adSLois Curfman McInnes /* 168798bf22SSatish Balay MatStashCreate_Private - Creates a stash,currently used for all the parallel 174c1ff481SSatish Balay matrix implementations. The stash is where elements of a matrix destined 184c1ff481SSatish Balay to be stored on other processors are kept until matrix assembly is done. 199417f4adSLois Curfman McInnes 204c1ff481SSatish Balay This is a simple minded stash. Simply adds entries to end of stash. 214c1ff481SSatish Balay 224c1ff481SSatish Balay Input Parameters: 234c1ff481SSatish Balay comm - communicator, required for scatters. 244c1ff481SSatish Balay bs - stash block size. used when stashing blocks of values 254c1ff481SSatish Balay 264c1ff481SSatish Balay Output Parameters: 274c1ff481SSatish Balay stash - the newly created stash 289417f4adSLois Curfman McInnes */ 299371c9d4SSatish Balay PetscErrorCode MatStashCreate_Private(MPI_Comm comm, PetscInt bs, MatStash *stash) { 30533163c2SBarry Smith PetscInt max, *opt, nopt, i; 31ace3abfcSBarry Smith PetscBool flg; 32bc5ccf88SSatish Balay 333a40ed3dSBarry Smith PetscFunctionBegin; 34bc5ccf88SSatish Balay /* Require 2 tags,get the second using PetscCommGetNewTag() */ 35752ec6e0SSatish Balay stash->comm = comm; 368865f1eaSKarl Rupp 379566063dSJacob Faibussowitsch PetscCall(PetscCommGetNewTag(stash->comm, &stash->tag1)); 389566063dSJacob Faibussowitsch PetscCall(PetscCommGetNewTag(stash->comm, &stash->tag2)); 399566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(stash->comm, &stash->size)); 409566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(stash->comm, &stash->rank)); 419566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2 * stash->size, &stash->flg_v)); 42533163c2SBarry Smith for (i = 0; i < 2 * stash->size; i++) stash->flg_v[i] = -1; 43533163c2SBarry Smith 44434d7ff9SSatish Balay nopt = stash->size; 459566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nopt, &opt)); 469566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetIntArray(NULL, NULL, "-matstash_initial_size", opt, &nopt, &flg)); 47434d7ff9SSatish Balay if (flg) { 48434d7ff9SSatish Balay if (nopt == 1) max = opt[0]; 49434d7ff9SSatish Balay else if (nopt == stash->size) max = opt[stash->rank]; 50434d7ff9SSatish Balay else if (stash->rank < nopt) max = opt[stash->rank]; 51f4ab19daSSatish Balay else max = 0; /* Use default */ 52434d7ff9SSatish Balay stash->umax = max; 53434d7ff9SSatish Balay } else { 54434d7ff9SSatish Balay stash->umax = 0; 55434d7ff9SSatish Balay } 569566063dSJacob Faibussowitsch PetscCall(PetscFree(opt)); 574c1ff481SSatish Balay if (bs <= 0) bs = 1; 58a2d1c673SSatish Balay 594c1ff481SSatish Balay stash->bs = bs; 609417f4adSLois Curfman McInnes stash->nmax = 0; 61434d7ff9SSatish Balay stash->oldnmax = 0; 629417f4adSLois Curfman McInnes stash->n = 0; 634c1ff481SSatish Balay stash->reallocs = -1; 64f4259b30SLisandro Dalcin stash->space_head = NULL; 65f4259b30SLisandro Dalcin stash->space = NULL; 669417f4adSLois Curfman McInnes 67f4259b30SLisandro Dalcin stash->send_waits = NULL; 68f4259b30SLisandro Dalcin stash->recv_waits = NULL; 69f4259b30SLisandro Dalcin stash->send_status = NULL; 70bc5ccf88SSatish Balay stash->nsends = 0; 71bc5ccf88SSatish Balay stash->nrecvs = 0; 72f4259b30SLisandro Dalcin stash->svalues = NULL; 73f4259b30SLisandro Dalcin stash->rvalues = NULL; 74f4259b30SLisandro Dalcin stash->rindices = NULL; 75a2d1c673SSatish Balay stash->nprocessed = 0; 7667318a8aSJed Brown stash->reproduce = PETSC_FALSE; 77d7d60843SJed Brown stash->blocktype = MPI_DATATYPE_NULL; 788865f1eaSKarl Rupp 799566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL, NULL, "-matstash_reproduce", &stash->reproduce, NULL)); 801667be42SBarry Smith #if !defined(PETSC_HAVE_MPIUNI) 81ca723aa4SStefano Zampini flg = PETSC_FALSE; 829566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL, NULL, "-matstash_legacy", &flg, NULL)); 83b30fb036SBarry Smith if (!flg) { 84d7d60843SJed Brown stash->ScatterBegin = MatStashScatterBegin_BTS; 85d7d60843SJed Brown stash->ScatterGetMesg = MatStashScatterGetMesg_BTS; 86d7d60843SJed Brown stash->ScatterEnd = MatStashScatterEnd_BTS; 87d7d60843SJed Brown stash->ScatterDestroy = MatStashScatterDestroy_BTS; 88ac2b2aa0SJed Brown } else { 891667be42SBarry Smith #endif 90ac2b2aa0SJed Brown stash->ScatterBegin = MatStashScatterBegin_Ref; 91ac2b2aa0SJed Brown stash->ScatterGetMesg = MatStashScatterGetMesg_Ref; 92ac2b2aa0SJed Brown stash->ScatterEnd = MatStashScatterEnd_Ref; 93ac2b2aa0SJed Brown stash->ScatterDestroy = NULL; 941667be42SBarry Smith #if !defined(PETSC_HAVE_MPIUNI) 95ac2b2aa0SJed Brown } 961667be42SBarry Smith #endif 973a40ed3dSBarry Smith PetscFunctionReturn(0); 989417f4adSLois Curfman McInnes } 999417f4adSLois Curfman McInnes 1004c1ff481SSatish Balay /* 1018798bf22SSatish Balay MatStashDestroy_Private - Destroy the stash 1024c1ff481SSatish Balay */ 1039371c9d4SSatish Balay PetscErrorCode MatStashDestroy_Private(MatStash *stash) { 104bc5ccf88SSatish Balay PetscFunctionBegin; 1059566063dSJacob Faibussowitsch PetscCall(PetscMatStashSpaceDestroy(&stash->space_head)); 1069566063dSJacob Faibussowitsch if (stash->ScatterDestroy) PetscCall((*stash->ScatterDestroy)(stash)); 1078865f1eaSKarl Rupp 108f4259b30SLisandro Dalcin stash->space = NULL; 1098865f1eaSKarl Rupp 1109566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->flg_v)); 111bc5ccf88SSatish Balay PetscFunctionReturn(0); 112bc5ccf88SSatish Balay } 113bc5ccf88SSatish Balay 1144c1ff481SSatish Balay /* 11567318a8aSJed Brown MatStashScatterEnd_Private - This is called as the final stage of 1164c1ff481SSatish Balay scatter. The final stages of message passing is done here, and 11767318a8aSJed Brown all the memory used for message passing is cleaned up. This 1184c1ff481SSatish Balay routine also resets the stash, and deallocates the memory used 1194c1ff481SSatish Balay for the stash. It also keeps track of the current memory usage 1204c1ff481SSatish Balay so that the same value can be used the next time through. 1214c1ff481SSatish Balay */ 1229371c9d4SSatish Balay PetscErrorCode MatStashScatterEnd_Private(MatStash *stash) { 123ac2b2aa0SJed Brown PetscFunctionBegin; 1249566063dSJacob Faibussowitsch PetscCall((*stash->ScatterEnd)(stash)); 125ac2b2aa0SJed Brown PetscFunctionReturn(0); 126ac2b2aa0SJed Brown } 127ac2b2aa0SJed Brown 1289371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash) { 129533163c2SBarry Smith PetscInt nsends = stash->nsends, bs2, oldnmax, i; 130a2d1c673SSatish Balay MPI_Status *send_status; 131a2d1c673SSatish Balay 1323a40ed3dSBarry Smith PetscFunctionBegin; 133533163c2SBarry Smith for (i = 0; i < 2 * stash->size; i++) stash->flg_v[i] = -1; 134a2d1c673SSatish Balay /* wait on sends */ 135a2d1c673SSatish Balay if (nsends) { 1369566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2 * nsends, &send_status)); 1379566063dSJacob Faibussowitsch PetscCallMPI(MPI_Waitall(2 * nsends, stash->send_waits, send_status)); 1389566063dSJacob Faibussowitsch PetscCall(PetscFree(send_status)); 139a2d1c673SSatish Balay } 140a2d1c673SSatish Balay 141c0c58ca7SSatish Balay /* Now update nmaxold to be app 10% more than max n used, this way the 142434d7ff9SSatish Balay wastage of space is reduced the next time this stash is used. 143434d7ff9SSatish Balay Also update the oldmax, only if it increases */ 144b9b97703SBarry Smith if (stash->n) { 14594b769a5SSatish Balay bs2 = stash->bs * stash->bs; 1468a9378f0SSatish Balay oldnmax = ((int)(stash->n * 1.1) + 5) * bs2; 147434d7ff9SSatish Balay if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 148b9b97703SBarry Smith } 149434d7ff9SSatish Balay 150d07ff455SSatish Balay stash->nmax = 0; 151d07ff455SSatish Balay stash->n = 0; 1524c1ff481SSatish Balay stash->reallocs = -1; 153a2d1c673SSatish Balay stash->nprocessed = 0; 1548865f1eaSKarl Rupp 1559566063dSJacob Faibussowitsch PetscCall(PetscMatStashSpaceDestroy(&stash->space_head)); 1568865f1eaSKarl Rupp 157f4259b30SLisandro Dalcin stash->space = NULL; 1588865f1eaSKarl Rupp 1599566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->send_waits)); 1609566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->recv_waits)); 1619566063dSJacob Faibussowitsch PetscCall(PetscFree2(stash->svalues, stash->sindices)); 1629566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->rvalues[0])); 1639566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->rvalues)); 1649566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->rindices[0])); 1659566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->rindices)); 1663a40ed3dSBarry Smith PetscFunctionReturn(0); 1679417f4adSLois Curfman McInnes } 1689417f4adSLois Curfman McInnes 1694c1ff481SSatish Balay /* 1706aad120cSJose E. Roman MatStashGetInfo_Private - Gets the relevant statistics of the stash 1714c1ff481SSatish Balay 1724c1ff481SSatish Balay Input Parameters: 1734c1ff481SSatish Balay stash - the stash 17494b769a5SSatish Balay nstash - the size of the stash. Indicates the number of values stored. 1754c1ff481SSatish Balay reallocs - the number of additional mallocs incurred. 1764c1ff481SSatish Balay 1774c1ff481SSatish Balay */ 1789371c9d4SSatish Balay PetscErrorCode MatStashGetInfo_Private(MatStash *stash, PetscInt *nstash, PetscInt *reallocs) { 179c1ac3661SBarry Smith PetscInt bs2 = stash->bs * stash->bs; 18094b769a5SSatish Balay 1813a40ed3dSBarry Smith PetscFunctionBegin; 1821ecfd215SBarry Smith if (nstash) *nstash = stash->n * bs2; 1831ecfd215SBarry Smith if (reallocs) { 184434d7ff9SSatish Balay if (stash->reallocs < 0) *reallocs = 0; 185434d7ff9SSatish Balay else *reallocs = stash->reallocs; 1861ecfd215SBarry Smith } 187bc5ccf88SSatish Balay PetscFunctionReturn(0); 188bc5ccf88SSatish Balay } 1894c1ff481SSatish Balay 1904c1ff481SSatish Balay /* 1918798bf22SSatish Balay MatStashSetInitialSize_Private - Sets the initial size of the stash 1924c1ff481SSatish Balay 1934c1ff481SSatish Balay Input Parameters: 1944c1ff481SSatish Balay stash - the stash 1954c1ff481SSatish Balay max - the value that is used as the max size of the stash. 1964c1ff481SSatish Balay this value is used while allocating memory. 1974c1ff481SSatish Balay */ 1989371c9d4SSatish Balay PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash, PetscInt max) { 199bc5ccf88SSatish Balay PetscFunctionBegin; 200434d7ff9SSatish Balay stash->umax = max; 2013a40ed3dSBarry Smith PetscFunctionReturn(0); 20297530c3fSBarry Smith } 20397530c3fSBarry Smith 2048798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called 2054c1ff481SSatish Balay when the space in the stash is not sufficient to add the new values 2064c1ff481SSatish Balay being inserted into the stash. 2074c1ff481SSatish Balay 2084c1ff481SSatish Balay Input Parameters: 2094c1ff481SSatish Balay stash - the stash 2104c1ff481SSatish Balay incr - the minimum increase requested 2114c1ff481SSatish Balay 2124c1ff481SSatish Balay Notes: 2134c1ff481SSatish Balay This routine doubles the currently used memory. 2144c1ff481SSatish Balay */ 2159371c9d4SSatish Balay static PetscErrorCode MatStashExpand_Private(MatStash *stash, PetscInt incr) { 2165bd3b8fbSHong Zhang PetscInt newnmax, bs2 = stash->bs * stash->bs; 2179417f4adSLois Curfman McInnes 2183a40ed3dSBarry Smith PetscFunctionBegin; 2199417f4adSLois Curfman McInnes /* allocate a larger stash */ 220c481ceb5SSatish Balay if (!stash->oldnmax && !stash->nmax) { /* new stash */ 221434d7ff9SSatish Balay if (stash->umax) newnmax = stash->umax / bs2; 222434d7ff9SSatish Balay else newnmax = DEFAULT_STASH_SIZE / bs2; 223c481ceb5SSatish Balay } else if (!stash->nmax) { /* resuing stash */ 224434d7ff9SSatish Balay if (stash->umax > stash->oldnmax) newnmax = stash->umax / bs2; 225434d7ff9SSatish Balay else newnmax = stash->oldnmax / bs2; 226434d7ff9SSatish Balay } else newnmax = stash->nmax * 2; 2274c1ff481SSatish Balay if (newnmax < (stash->nmax + incr)) newnmax += 2 * incr; 228d07ff455SSatish Balay 22975cae7c1SHong Zhang /* Get a MatStashSpace and attach it to stash */ 2309566063dSJacob Faibussowitsch PetscCall(PetscMatStashSpaceGet(bs2, newnmax, &stash->space)); 231b087b6d6SSatish Balay if (!stash->space_head) { /* new stash or resuing stash->oldnmax */ 232b087b6d6SSatish Balay stash->space_head = stash->space; 23375cae7c1SHong Zhang } 234b087b6d6SSatish Balay 235bc5ccf88SSatish Balay stash->reallocs++; 23675cae7c1SHong Zhang stash->nmax = newnmax; 237bc5ccf88SSatish Balay PetscFunctionReturn(0); 238bc5ccf88SSatish Balay } 239bc5ccf88SSatish Balay /* 2408798bf22SSatish Balay MatStashValuesRow_Private - inserts values into the stash. This function 2414c1ff481SSatish Balay expects the values to be roworiented. Multiple columns belong to the same row 2424c1ff481SSatish Balay can be inserted with a single call to this function. 2434c1ff481SSatish Balay 2444c1ff481SSatish Balay Input Parameters: 2454c1ff481SSatish Balay stash - the stash 2464c1ff481SSatish Balay row - the global row correspoiding to the values 2474c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2484c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2494c1ff481SSatish Balay values - the values inserted 250bc5ccf88SSatish Balay */ 2519371c9d4SSatish Balay PetscErrorCode MatStashValuesRow_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscBool ignorezeroentries) { 252b400d20cSBarry Smith PetscInt i, k, cnt = 0; 25375cae7c1SHong Zhang PetscMatStashSpace space = stash->space; 254bc5ccf88SSatish Balay 255bc5ccf88SSatish Balay PetscFunctionBegin; 2564c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 257*48a46eb9SPierre Jolivet if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n)); 25875cae7c1SHong Zhang space = stash->space; 25975cae7c1SHong Zhang k = space->local_used; 2604c1ff481SSatish Balay for (i = 0; i < n; i++) { 26114069ce9SStefano Zampini if (ignorezeroentries && values && values[i] == 0.0) continue; 26275cae7c1SHong Zhang space->idx[k] = row; 26375cae7c1SHong Zhang space->idy[k] = idxn[i]; 26414069ce9SStefano Zampini space->val[k] = values ? values[i] : 0.0; 26575cae7c1SHong Zhang k++; 266b400d20cSBarry Smith cnt++; 2679417f4adSLois Curfman McInnes } 268b400d20cSBarry Smith stash->n += cnt; 269b400d20cSBarry Smith space->local_used += cnt; 270b400d20cSBarry Smith space->local_remaining -= cnt; 271a2d1c673SSatish Balay PetscFunctionReturn(0); 272a2d1c673SSatish Balay } 27375cae7c1SHong Zhang 2744c1ff481SSatish Balay /* 2758798bf22SSatish Balay MatStashValuesCol_Private - inserts values into the stash. This function 2764c1ff481SSatish Balay expects the values to be columnoriented. Multiple columns belong to the same row 2774c1ff481SSatish Balay can be inserted with a single call to this function. 278a2d1c673SSatish Balay 2794c1ff481SSatish Balay Input Parameters: 2804c1ff481SSatish Balay stash - the stash 2814c1ff481SSatish Balay row - the global row correspoiding to the values 2824c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2834c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2844c1ff481SSatish Balay values - the values inserted 2854c1ff481SSatish Balay stepval - the consecutive values are sepated by a distance of stepval. 2864c1ff481SSatish Balay this happens because the input is columnoriented. 2874c1ff481SSatish Balay */ 2889371c9d4SSatish Balay PetscErrorCode MatStashValuesCol_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt stepval, PetscBool ignorezeroentries) { 28950e9ab7cSBarry Smith PetscInt i, k, cnt = 0; 29075cae7c1SHong Zhang PetscMatStashSpace space = stash->space; 291a2d1c673SSatish Balay 2924c1ff481SSatish Balay PetscFunctionBegin; 2934c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 294*48a46eb9SPierre Jolivet if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n)); 29575cae7c1SHong Zhang space = stash->space; 29675cae7c1SHong Zhang k = space->local_used; 2974c1ff481SSatish Balay for (i = 0; i < n; i++) { 29814069ce9SStefano Zampini if (ignorezeroentries && values && values[i * stepval] == 0.0) continue; 29975cae7c1SHong Zhang space->idx[k] = row; 30075cae7c1SHong Zhang space->idy[k] = idxn[i]; 30114069ce9SStefano Zampini space->val[k] = values ? values[i * stepval] : 0.0; 30275cae7c1SHong Zhang k++; 303b400d20cSBarry Smith cnt++; 3044c1ff481SSatish Balay } 305b400d20cSBarry Smith stash->n += cnt; 306b400d20cSBarry Smith space->local_used += cnt; 307b400d20cSBarry Smith space->local_remaining -= cnt; 3084c1ff481SSatish Balay PetscFunctionReturn(0); 3094c1ff481SSatish Balay } 3104c1ff481SSatish Balay 3114c1ff481SSatish Balay /* 3128798bf22SSatish Balay MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 3134c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 3144c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3154c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3164c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3174c1ff481SSatish Balay 3184c1ff481SSatish Balay Input Parameters: 3194c1ff481SSatish Balay stash - the stash 3204c1ff481SSatish Balay row - the global block-row correspoiding to the values 3214c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3224c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3234c1ff481SSatish Balay values. Each block is of size bs*bs. 3244c1ff481SSatish Balay values - the values inserted 3254c1ff481SSatish Balay rmax - the number of block-rows in the original block. 326a5b23f4aSJose E. Roman cmax - the number of block-columns on the original block. 3274c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3284c1ff481SSatish Balay */ 3299371c9d4SSatish Balay PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt rmax, PetscInt cmax, PetscInt idx) { 33075cae7c1SHong Zhang PetscInt i, j, k, bs2, bs = stash->bs, l; 33154f21887SBarry Smith const PetscScalar *vals; 33254f21887SBarry Smith PetscScalar *array; 33375cae7c1SHong Zhang PetscMatStashSpace space = stash->space; 334a2d1c673SSatish Balay 335a2d1c673SSatish Balay PetscFunctionBegin; 336*48a46eb9SPierre Jolivet if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n)); 33775cae7c1SHong Zhang space = stash->space; 33875cae7c1SHong Zhang l = space->local_used; 33975cae7c1SHong Zhang bs2 = bs * bs; 3404c1ff481SSatish Balay for (i = 0; i < n; i++) { 34175cae7c1SHong Zhang space->idx[l] = row; 34275cae7c1SHong Zhang space->idy[l] = idxn[i]; 34375cae7c1SHong Zhang /* Now copy over the block of values. Store the values column oriented. 34475cae7c1SHong Zhang This enables inserting multiple blocks belonging to a row with a single 34575cae7c1SHong Zhang funtion call */ 34675cae7c1SHong Zhang array = space->val + bs2 * l; 34775cae7c1SHong Zhang vals = values + idx * bs2 * n + bs * i; 34875cae7c1SHong Zhang for (j = 0; j < bs; j++) { 34914069ce9SStefano Zampini for (k = 0; k < bs; k++) array[k * bs] = values ? vals[k] : 0.0; 35075cae7c1SHong Zhang array++; 35175cae7c1SHong Zhang vals += cmax * bs; 35275cae7c1SHong Zhang } 35375cae7c1SHong Zhang l++; 354a2d1c673SSatish Balay } 3555bd3b8fbSHong Zhang stash->n += n; 35675cae7c1SHong Zhang space->local_used += n; 35775cae7c1SHong Zhang space->local_remaining -= n; 3584c1ff481SSatish Balay PetscFunctionReturn(0); 3594c1ff481SSatish Balay } 3604c1ff481SSatish Balay 3614c1ff481SSatish Balay /* 3628798bf22SSatish Balay MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 3634c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 3644c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3654c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3664c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3674c1ff481SSatish Balay 3684c1ff481SSatish Balay Input Parameters: 3694c1ff481SSatish Balay stash - the stash 3704c1ff481SSatish Balay row - the global block-row correspoiding to the values 3714c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3724c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3734c1ff481SSatish Balay values. Each block is of size bs*bs. 3744c1ff481SSatish Balay values - the values inserted 3754c1ff481SSatish Balay rmax - the number of block-rows in the original block. 376a5b23f4aSJose E. Roman cmax - the number of block-columns on the original block. 3774c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3784c1ff481SSatish Balay */ 3799371c9d4SSatish Balay PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt rmax, PetscInt cmax, PetscInt idx) { 38075cae7c1SHong Zhang PetscInt i, j, k, bs2, bs = stash->bs, l; 38154f21887SBarry Smith const PetscScalar *vals; 38254f21887SBarry Smith PetscScalar *array; 38375cae7c1SHong Zhang PetscMatStashSpace space = stash->space; 3844c1ff481SSatish Balay 3854c1ff481SSatish Balay PetscFunctionBegin; 386*48a46eb9SPierre Jolivet if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n)); 38775cae7c1SHong Zhang space = stash->space; 38875cae7c1SHong Zhang l = space->local_used; 38975cae7c1SHong Zhang bs2 = bs * bs; 3904c1ff481SSatish Balay for (i = 0; i < n; i++) { 39175cae7c1SHong Zhang space->idx[l] = row; 39275cae7c1SHong Zhang space->idy[l] = idxn[i]; 39375cae7c1SHong Zhang /* Now copy over the block of values. Store the values column oriented. 39475cae7c1SHong Zhang This enables inserting multiple blocks belonging to a row with a single 39575cae7c1SHong Zhang funtion call */ 39675cae7c1SHong Zhang array = space->val + bs2 * l; 39775cae7c1SHong Zhang vals = values + idx * bs2 * n + bs * i; 39875cae7c1SHong Zhang for (j = 0; j < bs; j++) { 39914069ce9SStefano Zampini for (k = 0; k < bs; k++) array[k] = values ? vals[k] : 0.0; 40075cae7c1SHong Zhang array += bs; 40175cae7c1SHong Zhang vals += rmax * bs; 40275cae7c1SHong Zhang } 4035bd3b8fbSHong Zhang l++; 404a2d1c673SSatish Balay } 4055bd3b8fbSHong Zhang stash->n += n; 40675cae7c1SHong Zhang space->local_used += n; 40775cae7c1SHong Zhang space->local_remaining -= n; 4083a40ed3dSBarry Smith PetscFunctionReturn(0); 4099417f4adSLois Curfman McInnes } 4104c1ff481SSatish Balay /* 4118798bf22SSatish Balay MatStashScatterBegin_Private - Initiates the transfer of values to the 4124c1ff481SSatish Balay correct owners. This function goes through the stash, and check the 4134c1ff481SSatish Balay owners of each stashed value, and sends the values off to the owner 4144c1ff481SSatish Balay processors. 415bc5ccf88SSatish Balay 4164c1ff481SSatish Balay Input Parameters: 4174c1ff481SSatish Balay stash - the stash 4184c1ff481SSatish Balay owners - an array of size 'no-of-procs' which gives the ownership range 4194c1ff481SSatish Balay for each node. 4204c1ff481SSatish Balay 42195452b02SPatrick Sanan Notes: 42295452b02SPatrick Sanan The 'owners' array in the cased of the blocked-stash has the 4234c1ff481SSatish Balay ranges specified blocked global indices, and for the regular stash in 4244c1ff481SSatish Balay the proper global indices. 4254c1ff481SSatish Balay */ 4269371c9d4SSatish Balay PetscErrorCode MatStashScatterBegin_Private(Mat mat, MatStash *stash, PetscInt *owners) { 427ac2b2aa0SJed Brown PetscFunctionBegin; 4289566063dSJacob Faibussowitsch PetscCall((*stash->ScatterBegin)(mat, stash, owners)); 429ac2b2aa0SJed Brown PetscFunctionReturn(0); 430ac2b2aa0SJed Brown } 431ac2b2aa0SJed Brown 4329371c9d4SSatish Balay static PetscErrorCode MatStashScatterBegin_Ref(Mat mat, MatStash *stash, PetscInt *owners) { 433c1ac3661SBarry Smith PetscInt *owner, *startv, *starti, tag1 = stash->tag1, tag2 = stash->tag2, bs2; 434fe09c992SBarry Smith PetscInt size = stash->size, nsends; 43575cae7c1SHong Zhang PetscInt count, *sindices, **rindices, i, j, idx, lastidx, l; 43654f21887SBarry Smith PetscScalar **rvalues, *svalues; 437bc5ccf88SSatish Balay MPI_Comm comm = stash->comm; 438563fb871SSatish Balay MPI_Request *send_waits, *recv_waits, *recv_waits1, *recv_waits2; 43976ec1555SBarry Smith PetscMPIInt *sizes, *nlengths, nreceives; 4405bd3b8fbSHong Zhang PetscInt *sp_idx, *sp_idy; 44154f21887SBarry Smith PetscScalar *sp_val; 4425bd3b8fbSHong Zhang PetscMatStashSpace space, space_next; 443bc5ccf88SSatish Balay 444bc5ccf88SSatish Balay PetscFunctionBegin; 4454b4eb8d3SJed Brown { /* make sure all processors are either in INSERTMODE or ADDMODE */ 4464b4eb8d3SJed Brown InsertMode addv; 4471c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat))); 44808401ef6SPierre Jolivet PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added"); 4494b4eb8d3SJed Brown mat->insertmode = addv; /* in case this processor had no cache */ 4504b4eb8d3SJed Brown } 4514b4eb8d3SJed Brown 4524c1ff481SSatish Balay bs2 = stash->bs * stash->bs; 45375cae7c1SHong Zhang 454bc5ccf88SSatish Balay /* first count number of contributors to each processor */ 4559566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(size, &nlengths)); 4569566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(stash->n + 1, &owner)); 457a2d1c673SSatish Balay 45875cae7c1SHong Zhang i = j = 0; 4597357eb19SBarry Smith lastidx = -1; 4605bd3b8fbSHong Zhang space = stash->space_head; 4616c4ed002SBarry Smith while (space) { 46275cae7c1SHong Zhang space_next = space->next; 4635bd3b8fbSHong Zhang sp_idx = space->idx; 46475cae7c1SHong Zhang for (l = 0; l < space->local_used; l++) { 4657357eb19SBarry Smith /* if indices are NOT locally sorted, need to start search at the beginning */ 4665bd3b8fbSHong Zhang if (lastidx > (idx = sp_idx[l])) j = 0; 4677357eb19SBarry Smith lastidx = idx; 4687357eb19SBarry Smith for (; j < size; j++) { 4694c1ff481SSatish Balay if (idx >= owners[j] && idx < owners[j + 1]) { 4709371c9d4SSatish Balay nlengths[j]++; 4719371c9d4SSatish Balay owner[i] = j; 4729371c9d4SSatish Balay break; 473bc5ccf88SSatish Balay } 474bc5ccf88SSatish Balay } 47575cae7c1SHong Zhang i++; 47675cae7c1SHong Zhang } 47775cae7c1SHong Zhang space = space_next; 478bc5ccf88SSatish Balay } 479071fcb05SBarry Smith 480563fb871SSatish Balay /* Now check what procs get messages - and compute nsends. */ 4819566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(size, &sizes)); 482563fb871SSatish Balay for (i = 0, nsends = 0; i < size; i++) { 4838865f1eaSKarl Rupp if (nlengths[i]) { 4849371c9d4SSatish Balay sizes[i] = 1; 4859371c9d4SSatish Balay nsends++; 4868865f1eaSKarl Rupp } 487563fb871SSatish Balay } 488bc5ccf88SSatish Balay 4899371c9d4SSatish Balay { 4909371c9d4SSatish Balay PetscMPIInt *onodes, *olengths; 491563fb871SSatish Balay /* Determine the number of messages to expect, their lengths, from from-ids */ 4929566063dSJacob Faibussowitsch PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives)); 4939566063dSJacob Faibussowitsch PetscCall(PetscGatherMessageLengths(comm, nsends, nreceives, nlengths, &onodes, &olengths)); 494563fb871SSatish Balay /* since clubbing row,col - lengths are multiplied by 2 */ 495563fb871SSatish Balay for (i = 0; i < nreceives; i++) olengths[i] *= 2; 4969566063dSJacob Faibussowitsch PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1)); 497563fb871SSatish Balay /* values are size 'bs2' lengths (and remove earlier factor 2 */ 498563fb871SSatish Balay for (i = 0; i < nreceives; i++) olengths[i] = olengths[i] * bs2 / 2; 4999566063dSJacob Faibussowitsch PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2)); 5009566063dSJacob Faibussowitsch PetscCall(PetscFree(onodes)); 5019371c9d4SSatish Balay PetscCall(PetscFree(olengths)); 5029371c9d4SSatish Balay } 503bc5ccf88SSatish Balay 504bc5ccf88SSatish Balay /* do sends: 505bc5ccf88SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 506bc5ccf88SSatish Balay the ith processor 507bc5ccf88SSatish Balay */ 5089566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices)); 5099566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2 * nsends, &send_waits)); 5109566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(size, &startv, size, &starti)); 511a2d1c673SSatish Balay /* use 2 sends the first with all_a, the next with all_i and all_j */ 5129371c9d4SSatish Balay startv[0] = 0; 5139371c9d4SSatish Balay starti[0] = 0; 514bc5ccf88SSatish Balay for (i = 1; i < size; i++) { 515563fb871SSatish Balay startv[i] = startv[i - 1] + nlengths[i - 1]; 516533163c2SBarry Smith starti[i] = starti[i - 1] + 2 * nlengths[i - 1]; 517bc5ccf88SSatish Balay } 51875cae7c1SHong Zhang 51975cae7c1SHong Zhang i = 0; 5205bd3b8fbSHong Zhang space = stash->space_head; 5216c4ed002SBarry Smith while (space) { 52275cae7c1SHong Zhang space_next = space->next; 5235bd3b8fbSHong Zhang sp_idx = space->idx; 5245bd3b8fbSHong Zhang sp_idy = space->idy; 5255bd3b8fbSHong Zhang sp_val = space->val; 52675cae7c1SHong Zhang for (l = 0; l < space->local_used; l++) { 527bc5ccf88SSatish Balay j = owner[i]; 528a2d1c673SSatish Balay if (bs2 == 1) { 5295bd3b8fbSHong Zhang svalues[startv[j]] = sp_val[l]; 530a2d1c673SSatish Balay } else { 531c1ac3661SBarry Smith PetscInt k; 53254f21887SBarry Smith PetscScalar *buf1, *buf2; 5334c1ff481SSatish Balay buf1 = svalues + bs2 * startv[j]; 534b087b6d6SSatish Balay buf2 = space->val + bs2 * l; 5358865f1eaSKarl Rupp for (k = 0; k < bs2; k++) buf1[k] = buf2[k]; 536a2d1c673SSatish Balay } 5375bd3b8fbSHong Zhang sindices[starti[j]] = sp_idx[l]; 5385bd3b8fbSHong Zhang sindices[starti[j] + nlengths[j]] = sp_idy[l]; 539bc5ccf88SSatish Balay startv[j]++; 540bc5ccf88SSatish Balay starti[j]++; 54175cae7c1SHong Zhang i++; 54275cae7c1SHong Zhang } 54375cae7c1SHong Zhang space = space_next; 544bc5ccf88SSatish Balay } 545bc5ccf88SSatish Balay startv[0] = 0; 5468865f1eaSKarl Rupp for (i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1]; 547e5d0e772SSatish Balay 548bc5ccf88SSatish Balay for (i = 0, count = 0; i < size; i++) { 54976ec1555SBarry Smith if (sizes[i]) { 5509566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++)); 5519566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++)); 552bc5ccf88SSatish Balay } 553b85c94c3SSatish Balay } 5546cf91177SBarry Smith #if defined(PETSC_USE_INFO) 5559566063dSJacob Faibussowitsch PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT " \n", nsends)); 556e5d0e772SSatish Balay for (i = 0; i < size; i++) { 557*48a46eb9SPierre Jolivet if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt))))); 558e5d0e772SSatish Balay } 559e5d0e772SSatish Balay #endif 5609566063dSJacob Faibussowitsch PetscCall(PetscFree(nlengths)); 5619566063dSJacob Faibussowitsch PetscCall(PetscFree(owner)); 5629566063dSJacob Faibussowitsch PetscCall(PetscFree2(startv, starti)); 5639566063dSJacob Faibussowitsch PetscCall(PetscFree(sizes)); 564a2d1c673SSatish Balay 565563fb871SSatish Balay /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 5669566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2 * nreceives, &recv_waits)); 567563fb871SSatish Balay 568563fb871SSatish Balay for (i = 0; i < nreceives; i++) { 569563fb871SSatish Balay recv_waits[2 * i] = recv_waits1[i]; 570563fb871SSatish Balay recv_waits[2 * i + 1] = recv_waits2[i]; 571563fb871SSatish Balay } 572563fb871SSatish Balay stash->recv_waits = recv_waits; 5738865f1eaSKarl Rupp 5749566063dSJacob Faibussowitsch PetscCall(PetscFree(recv_waits1)); 5759566063dSJacob Faibussowitsch PetscCall(PetscFree(recv_waits2)); 576563fb871SSatish Balay 577c05d87d6SBarry Smith stash->svalues = svalues; 578c05d87d6SBarry Smith stash->sindices = sindices; 579c05d87d6SBarry Smith stash->rvalues = rvalues; 580c05d87d6SBarry Smith stash->rindices = rindices; 581c05d87d6SBarry Smith stash->send_waits = send_waits; 582c05d87d6SBarry Smith stash->nsends = nsends; 583c05d87d6SBarry Smith stash->nrecvs = nreceives; 58467318a8aSJed Brown stash->reproduce_count = 0; 585bc5ccf88SSatish Balay PetscFunctionReturn(0); 586bc5ccf88SSatish Balay } 587bc5ccf88SSatish Balay 588a2d1c673SSatish Balay /* 5898798bf22SSatish Balay MatStashScatterGetMesg_Private - This function waits on the receives posted 5908798bf22SSatish Balay in the function MatStashScatterBegin_Private() and returns one message at 5914c1ff481SSatish Balay a time to the calling function. If no messages are left, it indicates this 5924c1ff481SSatish Balay by setting flg = 0, else it sets flg = 1. 5934c1ff481SSatish Balay 5944c1ff481SSatish Balay Input Parameters: 5954c1ff481SSatish Balay stash - the stash 5964c1ff481SSatish Balay 5974c1ff481SSatish Balay Output Parameters: 5984c1ff481SSatish Balay nvals - the number of entries in the current message. 5994c1ff481SSatish Balay rows - an array of row indices (or blocked indices) corresponding to the values 6004c1ff481SSatish Balay cols - an array of columnindices (or blocked indices) corresponding to the values 6014c1ff481SSatish Balay vals - the values 6024c1ff481SSatish Balay flg - 0 indicates no more message left, and the current call has no values associated. 6034c1ff481SSatish Balay 1 indicates that the current call successfully received a message, and the 6044c1ff481SSatish Balay other output parameters nvals,rows,cols,vals are set appropriately. 605a2d1c673SSatish Balay */ 6069371c9d4SSatish Balay PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash, PetscMPIInt *nvals, PetscInt **rows, PetscInt **cols, PetscScalar **vals, PetscInt *flg) { 607ac2b2aa0SJed Brown PetscFunctionBegin; 6089566063dSJacob Faibussowitsch PetscCall((*stash->ScatterGetMesg)(stash, nvals, rows, cols, vals, flg)); 609ac2b2aa0SJed Brown PetscFunctionReturn(0); 610ac2b2aa0SJed Brown } 611ac2b2aa0SJed Brown 6129371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash, PetscMPIInt *nvals, PetscInt **rows, PetscInt **cols, PetscScalar **vals, PetscInt *flg) { 613533163c2SBarry Smith PetscMPIInt i, *flg_v = stash->flg_v, i1, i2; 614fe09c992SBarry Smith PetscInt bs2; 615a2d1c673SSatish Balay MPI_Status recv_status; 616ace3abfcSBarry Smith PetscBool match_found = PETSC_FALSE; 617bc5ccf88SSatish Balay 618bc5ccf88SSatish Balay PetscFunctionBegin; 619a2d1c673SSatish Balay *flg = 0; /* When a message is discovered this is reset to 1 */ 620a2d1c673SSatish Balay /* Return if no more messages to process */ 6218865f1eaSKarl Rupp if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(0); 622a2d1c673SSatish Balay 6234c1ff481SSatish Balay bs2 = stash->bs * stash->bs; 62467318a8aSJed Brown /* If a matching pair of receives are found, process them, and return the data to 625a2d1c673SSatish Balay the calling function. Until then keep receiving messages */ 626a2d1c673SSatish Balay while (!match_found) { 62767318a8aSJed Brown if (stash->reproduce) { 62867318a8aSJed Brown i = stash->reproduce_count++; 6299566063dSJacob Faibussowitsch PetscCallMPI(MPI_Wait(stash->recv_waits + i, &recv_status)); 63067318a8aSJed Brown } else { 6319566063dSJacob Faibussowitsch PetscCallMPI(MPI_Waitany(2 * stash->nrecvs, stash->recv_waits, &i, &recv_status)); 63267318a8aSJed Brown } 63308401ef6SPierre Jolivet PetscCheck(recv_status.MPI_SOURCE >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Negative MPI source!"); 634533163c2SBarry Smith 63567318a8aSJed Brown /* Now pack the received message into a structure which is usable by others */ 636a2d1c673SSatish Balay if (i % 2) { 6379566063dSJacob Faibussowitsch PetscCallMPI(MPI_Get_count(&recv_status, MPIU_SCALAR, nvals)); 638c1dc657dSBarry Smith flg_v[2 * recv_status.MPI_SOURCE] = i / 2; 639a2d1c673SSatish Balay *nvals = *nvals / bs2; 640563fb871SSatish Balay } else { 6419566063dSJacob Faibussowitsch PetscCallMPI(MPI_Get_count(&recv_status, MPIU_INT, nvals)); 642563fb871SSatish Balay flg_v[2 * recv_status.MPI_SOURCE + 1] = i / 2; 643563fb871SSatish Balay *nvals = *nvals / 2; /* This message has both row indices and col indices */ 644bc5ccf88SSatish Balay } 645a2d1c673SSatish Balay 646cb2b73ccSBarry Smith /* Check if we have both messages from this proc */ 647c1dc657dSBarry Smith i1 = flg_v[2 * recv_status.MPI_SOURCE]; 648c1dc657dSBarry Smith i2 = flg_v[2 * recv_status.MPI_SOURCE + 1]; 649a2d1c673SSatish Balay if (i1 != -1 && i2 != -1) { 650563fb871SSatish Balay *rows = stash->rindices[i2]; 651a2d1c673SSatish Balay *cols = *rows + *nvals; 652563fb871SSatish Balay *vals = stash->rvalues[i1]; 653a2d1c673SSatish Balay *flg = 1; 654a2d1c673SSatish Balay stash->nprocessed++; 65535d8aa7fSBarry Smith match_found = PETSC_TRUE; 656bc5ccf88SSatish Balay } 657bc5ccf88SSatish Balay } 658bc5ccf88SSatish Balay PetscFunctionReturn(0); 659bc5ccf88SSatish Balay } 660d7d60843SJed Brown 661e69ee1f7SSatish Balay #if !defined(PETSC_HAVE_MPIUNI) 662d7d60843SJed Brown typedef struct { 663d7d60843SJed Brown PetscInt row; 664d7d60843SJed Brown PetscInt col; 665d7d60843SJed Brown PetscScalar vals[1]; /* Actually an array of length bs2 */ 666d7d60843SJed Brown } MatStashBlock; 667d7d60843SJed Brown 6689371c9d4SSatish Balay static PetscErrorCode MatStashSortCompress_Private(MatStash *stash, InsertMode insertmode) { 669d7d60843SJed Brown PetscMatStashSpace space; 670d7d60843SJed Brown PetscInt n = stash->n, bs = stash->bs, bs2 = bs * bs, cnt, *row, *col, *perm, rowstart, i; 671d7d60843SJed Brown PetscScalar **valptr; 672d7d60843SJed Brown 673d7d60843SJed Brown PetscFunctionBegin; 6749566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(n, &row, n, &col, n, &valptr, n, &perm)); 675d7d60843SJed Brown for (space = stash->space_head, cnt = 0; space; space = space->next) { 676d7d60843SJed Brown for (i = 0; i < space->local_used; i++) { 677d7d60843SJed Brown row[cnt] = space->idx[i]; 678d7d60843SJed Brown col[cnt] = space->idy[i]; 679d7d60843SJed Brown valptr[cnt] = &space->val[i * bs2]; 680d7d60843SJed Brown perm[cnt] = cnt; /* Will tell us where to find valptr after sorting row[] and col[] */ 681d7d60843SJed Brown cnt++; 682d7d60843SJed Brown } 683d7d60843SJed Brown } 68408401ef6SPierre Jolivet PetscCheck(cnt == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatStash n %" PetscInt_FMT ", but counted %" PetscInt_FMT " entries", n, cnt); 6859566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithArrayPair(n, row, col, perm)); 686d7d60843SJed Brown /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */ 687d7d60843SJed Brown for (rowstart = 0, cnt = 0, i = 1; i <= n; i++) { 688d7d60843SJed Brown if (i == n || row[i] != row[rowstart]) { /* Sort the last row. */ 689d7d60843SJed Brown PetscInt colstart; 6909566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithArray(i - rowstart, &col[rowstart], &perm[rowstart])); 691d7d60843SJed Brown for (colstart = rowstart; colstart < i;) { /* Compress multiple insertions to the same location */ 692d7d60843SJed Brown PetscInt j, l; 693d7d60843SJed Brown MatStashBlock *block; 6949566063dSJacob Faibussowitsch PetscCall(PetscSegBufferGet(stash->segsendblocks, 1, &block)); 695d7d60843SJed Brown block->row = row[rowstart]; 696d7d60843SJed Brown block->col = col[colstart]; 6979566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(block->vals, valptr[perm[colstart]], bs2)); 698d7d60843SJed Brown for (j = colstart + 1; j < i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */ 699d7d60843SJed Brown if (insertmode == ADD_VALUES) { 700d7d60843SJed Brown for (l = 0; l < bs2; l++) block->vals[l] += valptr[perm[j]][l]; 701d7d60843SJed Brown } else { 7029566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(block->vals, valptr[perm[j]], bs2)); 703d7d60843SJed Brown } 704d7d60843SJed Brown } 705d7d60843SJed Brown colstart = j; 706d7d60843SJed Brown } 707d7d60843SJed Brown rowstart = i; 708d7d60843SJed Brown } 709d7d60843SJed Brown } 7109566063dSJacob Faibussowitsch PetscCall(PetscFree4(row, col, valptr, perm)); 711d7d60843SJed Brown PetscFunctionReturn(0); 712d7d60843SJed Brown } 713d7d60843SJed Brown 7149371c9d4SSatish Balay static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash) { 715d7d60843SJed Brown PetscFunctionBegin; 716d7d60843SJed Brown if (stash->blocktype == MPI_DATATYPE_NULL) { 717d7d60843SJed Brown PetscInt bs2 = PetscSqr(stash->bs); 718d7d60843SJed Brown PetscMPIInt blocklens[2]; 719d7d60843SJed Brown MPI_Aint displs[2]; 720d7d60843SJed Brown MPI_Datatype types[2], stype; 721223b4c07SBarry Smith /* 722223b4c07SBarry Smith DummyBlock is a type having standard layout, even when PetscScalar is C++ std::complex. 723f41aa5efSJunchao Zhang std::complex itself has standard layout, so does DummyBlock, recursively. 724a5b23f4aSJose E. Roman To be compatible with C++ std::complex, complex implementations on GPUs must also have standard layout, 725f41aa5efSJunchao Zhang though they can have different alignment, e.g, 16 bytes for double complex, instead of 8 bytes as in GCC stdlibc++. 726f41aa5efSJunchao Zhang offsetof(type, member) only requires type to have standard layout. Ref. https://en.cppreference.com/w/cpp/types/offsetof. 727f41aa5efSJunchao Zhang 728f41aa5efSJunchao Zhang We can test if std::complex has standard layout with the following code: 729f41aa5efSJunchao Zhang #include <iostream> 730f41aa5efSJunchao Zhang #include <type_traits> 731f41aa5efSJunchao Zhang #include <complex> 732f41aa5efSJunchao Zhang int main() { 733f41aa5efSJunchao Zhang std::cout << std::boolalpha; 734f41aa5efSJunchao Zhang std::cout << std::is_standard_layout<std::complex<double> >::value << '\n'; 735f41aa5efSJunchao Zhang } 736f41aa5efSJunchao Zhang Output: true 7379503c6c6SJed Brown */ 7389371c9d4SSatish Balay struct DummyBlock { 7399371c9d4SSatish Balay PetscInt row, col; 7409371c9d4SSatish Balay PetscScalar vals; 7419371c9d4SSatish Balay }; 742d7d60843SJed Brown 7439503c6c6SJed Brown stash->blocktype_size = offsetof(struct DummyBlock, vals) + bs2 * sizeof(PetscScalar); 744d7d60843SJed Brown if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */ 745d7d60843SJed Brown stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt); 746d7d60843SJed Brown } 7479566063dSJacob Faibussowitsch PetscCall(PetscSegBufferCreate(stash->blocktype_size, 1, &stash->segsendblocks)); 7489566063dSJacob Faibussowitsch PetscCall(PetscSegBufferCreate(stash->blocktype_size, 1, &stash->segrecvblocks)); 7499566063dSJacob Faibussowitsch PetscCall(PetscSegBufferCreate(sizeof(MatStashFrame), 1, &stash->segrecvframe)); 750d7d60843SJed Brown blocklens[0] = 2; 751d7d60843SJed Brown blocklens[1] = bs2; 7529503c6c6SJed Brown displs[0] = offsetof(struct DummyBlock, row); 7539503c6c6SJed Brown displs[1] = offsetof(struct DummyBlock, vals); 754d7d60843SJed Brown types[0] = MPIU_INT; 755d7d60843SJed Brown types[1] = MPIU_SCALAR; 7569566063dSJacob Faibussowitsch PetscCallMPI(MPI_Type_create_struct(2, blocklens, displs, types, &stype)); 7579566063dSJacob Faibussowitsch PetscCallMPI(MPI_Type_commit(&stype)); 7589566063dSJacob Faibussowitsch PetscCallMPI(MPI_Type_create_resized(stype, 0, stash->blocktype_size, &stash->blocktype)); 7599566063dSJacob Faibussowitsch PetscCallMPI(MPI_Type_commit(&stash->blocktype)); 7609566063dSJacob Faibussowitsch PetscCallMPI(MPI_Type_free(&stype)); 761d7d60843SJed Brown } 762d7d60843SJed Brown PetscFunctionReturn(0); 763d7d60843SJed Brown } 764d7d60843SJed Brown 765d7d60843SJed Brown /* Callback invoked after target rank has initiatied receive of rendezvous message. 766d7d60843SJed Brown * Here we post the main sends. 767d7d60843SJed Brown */ 7689371c9d4SSatish Balay static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm, const PetscMPIInt tag[], PetscMPIInt rankid, PetscMPIInt rank, void *sdata, MPI_Request req[], void *ctx) { 769d7d60843SJed Brown MatStash *stash = (MatStash *)ctx; 770d7d60843SJed Brown MatStashHeader *hdr = (MatStashHeader *)sdata; 771d7d60843SJed Brown 772d7d60843SJed Brown PetscFunctionBegin; 77308401ef6SPierre Jolivet PetscCheck(rank == stash->sendranks[rankid], comm, PETSC_ERR_PLIB, "BTS Send rank %d does not match sendranks[%d] %d", rank, rankid, stash->sendranks[rankid]); 7749566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(stash->sendframes[rankid].buffer, hdr->count, stash->blocktype, rank, tag[0], comm, &req[0])); 775d7d60843SJed Brown stash->sendframes[rankid].count = hdr->count; 776d7d60843SJed Brown stash->sendframes[rankid].pending = 1; 777d7d60843SJed Brown PetscFunctionReturn(0); 778d7d60843SJed Brown } 779d7d60843SJed Brown 780223b4c07SBarry Smith /* 781223b4c07SBarry Smith Callback invoked by target after receiving rendezvous message. 782223b4c07SBarry Smith Here we post the main recvs. 783d7d60843SJed Brown */ 7849371c9d4SSatish Balay static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm, const PetscMPIInt tag[], PetscMPIInt rank, void *rdata, MPI_Request req[], void *ctx) { 785d7d60843SJed Brown MatStash *stash = (MatStash *)ctx; 786d7d60843SJed Brown MatStashHeader *hdr = (MatStashHeader *)rdata; 787d7d60843SJed Brown MatStashFrame *frame; 788d7d60843SJed Brown 789d7d60843SJed Brown PetscFunctionBegin; 7909566063dSJacob Faibussowitsch PetscCall(PetscSegBufferGet(stash->segrecvframe, 1, &frame)); 7919566063dSJacob Faibussowitsch PetscCall(PetscSegBufferGet(stash->segrecvblocks, hdr->count, &frame->buffer)); 7929566063dSJacob Faibussowitsch PetscCallMPI(MPI_Irecv(frame->buffer, hdr->count, stash->blocktype, rank, tag[0], comm, &req[0])); 793d7d60843SJed Brown frame->count = hdr->count; 794d7d60843SJed Brown frame->pending = 1; 795d7d60843SJed Brown PetscFunctionReturn(0); 796d7d60843SJed Brown } 797d7d60843SJed Brown 798d7d60843SJed Brown /* 799d7d60843SJed Brown * owners[] contains the ownership ranges; may be indexed by either blocks or scalars 800d7d60843SJed Brown */ 8019371c9d4SSatish Balay static PetscErrorCode MatStashScatterBegin_BTS(Mat mat, MatStash *stash, PetscInt owners[]) { 802d7d60843SJed Brown size_t nblocks; 803d7d60843SJed Brown char *sendblocks; 804d7d60843SJed Brown 805d7d60843SJed Brown PetscFunctionBegin; 80676bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { /* make sure all processors are either in INSERTMODE or ADDMODE */ 8074b4eb8d3SJed Brown InsertMode addv; 8081c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat))); 80908401ef6SPierre Jolivet PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added"); 8104b4eb8d3SJed Brown } 8114b4eb8d3SJed Brown 8129566063dSJacob Faibussowitsch PetscCall(MatStashBlockTypeSetUp(stash)); 8139566063dSJacob Faibussowitsch PetscCall(MatStashSortCompress_Private(stash, mat->insertmode)); 8149566063dSJacob Faibussowitsch PetscCall(PetscSegBufferGetSize(stash->segsendblocks, &nblocks)); 8159566063dSJacob Faibussowitsch PetscCall(PetscSegBufferExtractInPlace(stash->segsendblocks, &sendblocks)); 81660f34548SJunchao Zhang if (stash->first_assembly_done) { /* Set up sendhdrs and sendframes for each rank that we sent before */ 81723b7d1baSJed Brown PetscInt i; 81823b7d1baSJed Brown size_t b; 81997da8949SJed Brown for (i = 0, b = 0; i < stash->nsendranks; i++) { 82097da8949SJed Brown stash->sendframes[i].buffer = &sendblocks[b * stash->blocktype_size]; 82197da8949SJed Brown /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */ 82297da8949SJed Brown stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */ 82397da8949SJed Brown for (; b < nblocks; b++) { 82497da8949SJed Brown MatStashBlock *sendblock_b = (MatStashBlock *)&sendblocks[b * stash->blocktype_size]; 825aed4548fSBarry Smith PetscCheck(sendblock_b->row >= owners[stash->sendranks[i]], stash->comm, PETSC_ERR_ARG_WRONG, "MAT_SUBSET_OFF_PROC_ENTRIES set, but row %" PetscInt_FMT " owned by %d not communicated in initial assembly", sendblock_b->row, stash->sendranks[i]); 82697da8949SJed Brown if (sendblock_b->row >= owners[stash->sendranks[i] + 1]) break; 82797da8949SJed Brown stash->sendhdr[i].count++; 82897da8949SJed Brown } 82997da8949SJed Brown } 83097da8949SJed Brown } else { /* Dynamically count and pack (first time) */ 83123b7d1baSJed Brown PetscInt sendno; 83223b7d1baSJed Brown size_t i, rowstart; 833d7d60843SJed Brown 834d7d60843SJed Brown /* Count number of send ranks and allocate for sends */ 835d7d60843SJed Brown stash->nsendranks = 0; 836d7d60843SJed Brown for (rowstart = 0; rowstart < nblocks;) { 8377e2ea869SJed Brown PetscInt owner; 838d7d60843SJed Brown MatStashBlock *sendblock_rowstart = (MatStashBlock *)&sendblocks[rowstart * stash->blocktype_size]; 8399566063dSJacob Faibussowitsch PetscCall(PetscFindInt(sendblock_rowstart->row, stash->size + 1, owners, &owner)); 840d7d60843SJed Brown if (owner < 0) owner = -(owner + 2); 841d7d60843SJed Brown for (i = rowstart + 1; i < nblocks; i++) { /* Move forward through a run of blocks with the same owner */ 842d7d60843SJed Brown MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size]; 8437e2ea869SJed Brown if (sendblock_i->row >= owners[owner + 1]) break; 844d7d60843SJed Brown } 845d7d60843SJed Brown stash->nsendranks++; 846d7d60843SJed Brown rowstart = i; 847d7d60843SJed Brown } 8489566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(stash->nsendranks, &stash->sendranks, stash->nsendranks, &stash->sendhdr, stash->nsendranks, &stash->sendframes)); 849d7d60843SJed Brown 850d7d60843SJed Brown /* Set up sendhdrs and sendframes */ 851d7d60843SJed Brown sendno = 0; 852d7d60843SJed Brown for (rowstart = 0; rowstart < nblocks;) { 853d7d60843SJed Brown PetscInt owner; 854d7d60843SJed Brown MatStashBlock *sendblock_rowstart = (MatStashBlock *)&sendblocks[rowstart * stash->blocktype_size]; 8559566063dSJacob Faibussowitsch PetscCall(PetscFindInt(sendblock_rowstart->row, stash->size + 1, owners, &owner)); 856d7d60843SJed Brown if (owner < 0) owner = -(owner + 2); 857d7d60843SJed Brown stash->sendranks[sendno] = owner; 858d7d60843SJed Brown for (i = rowstart + 1; i < nblocks; i++) { /* Move forward through a run of blocks with the same owner */ 859d7d60843SJed Brown MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size]; 8607e2ea869SJed Brown if (sendblock_i->row >= owners[owner + 1]) break; 861d7d60843SJed Brown } 862d7d60843SJed Brown stash->sendframes[sendno].buffer = sendblock_rowstart; 863d7d60843SJed Brown stash->sendframes[sendno].pending = 0; 864d7d60843SJed Brown stash->sendhdr[sendno].count = i - rowstart; 865d7d60843SJed Brown sendno++; 866d7d60843SJed Brown rowstart = i; 867d7d60843SJed Brown } 86808401ef6SPierre Jolivet PetscCheck(sendno == stash->nsendranks, stash->comm, PETSC_ERR_PLIB, "BTS counted %d sendranks, but %" PetscInt_FMT " sends", stash->nsendranks, sendno); 869d7d60843SJed Brown } 870d7d60843SJed Brown 8714b4eb8d3SJed Brown /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new 8724b4eb8d3SJed Brown * message or a dummy entry of some sort. */ 8734b4eb8d3SJed Brown if (mat->insertmode == INSERT_VALUES) { 87423b7d1baSJed Brown size_t i; 8754b4eb8d3SJed Brown for (i = 0; i < nblocks; i++) { 8764b4eb8d3SJed Brown MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size]; 8774b4eb8d3SJed Brown sendblock_i->row = -(sendblock_i->row + 1); 8784b4eb8d3SJed Brown } 8794b4eb8d3SJed Brown } 8804b4eb8d3SJed Brown 88160f34548SJunchao Zhang if (stash->first_assembly_done) { 88297da8949SJed Brown PetscMPIInt i, tag; 8839566063dSJacob Faibussowitsch PetscCall(PetscCommGetNewTag(stash->comm, &tag)); 884*48a46eb9SPierre Jolivet for (i = 0; i < stash->nrecvranks; i++) PetscCall(MatStashBTSRecv_Private(stash->comm, &tag, stash->recvranks[i], &stash->recvhdr[i], &stash->recvreqs[i], stash)); 885*48a46eb9SPierre Jolivet for (i = 0; i < stash->nsendranks; i++) PetscCall(MatStashBTSSend_Private(stash->comm, &tag, i, stash->sendranks[i], &stash->sendhdr[i], &stash->sendreqs[i], stash)); 88697da8949SJed Brown stash->use_status = PETSC_TRUE; /* Use count from message status. */ 88797da8949SJed Brown } else { 8889371c9d4SSatish Balay PetscCall(PetscCommBuildTwoSidedFReq(stash->comm, 1, MPIU_INT, stash->nsendranks, stash->sendranks, (PetscInt *)stash->sendhdr, &stash->nrecvranks, &stash->recvranks, (PetscInt *)&stash->recvhdr, 1, &stash->sendreqs, &stash->recvreqs, MatStashBTSSend_Private, MatStashBTSRecv_Private, stash)); 8899566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(stash->nrecvranks, &stash->some_indices, stash->nrecvranks, &stash->some_statuses)); 89097da8949SJed Brown stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */ 89197da8949SJed Brown } 892d7d60843SJed Brown 8939566063dSJacob Faibussowitsch PetscCall(PetscSegBufferExtractInPlace(stash->segrecvframe, &stash->recvframes)); 894d7d60843SJed Brown stash->recvframe_active = NULL; 895d7d60843SJed Brown stash->recvframe_i = 0; 896d7d60843SJed Brown stash->some_i = 0; 897d7d60843SJed Brown stash->some_count = 0; 898d7d60843SJed Brown stash->recvcount = 0; 89960f34548SJunchao Zhang stash->first_assembly_done = mat->assembly_subset; /* See the same logic in VecAssemblyBegin_MPI_BTS */ 9004b4eb8d3SJed Brown stash->insertmode = &mat->insertmode; 901d7d60843SJed Brown PetscFunctionReturn(0); 902d7d60843SJed Brown } 903d7d60843SJed Brown 9049371c9d4SSatish Balay static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash, PetscMPIInt *n, PetscInt **row, PetscInt **col, PetscScalar **val, PetscInt *flg) { 905d7d60843SJed Brown MatStashBlock *block; 906d7d60843SJed Brown 907d7d60843SJed Brown PetscFunctionBegin; 908d7d60843SJed Brown *flg = 0; 909d7d60843SJed Brown while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) { 910d7d60843SJed Brown if (stash->some_i == stash->some_count) { 911d7d60843SJed Brown if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(0); /* Done */ 9129566063dSJacob Faibussowitsch PetscCallMPI(MPI_Waitsome(stash->nrecvranks, stash->recvreqs, &stash->some_count, stash->some_indices, stash->use_status ? stash->some_statuses : MPI_STATUSES_IGNORE)); 913d7d60843SJed Brown stash->some_i = 0; 914d7d60843SJed Brown } 915d7d60843SJed Brown stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]]; 916d7d60843SJed Brown stash->recvframe_count = stash->recvframe_active->count; /* From header; maximum count */ 917d7d60843SJed Brown if (stash->use_status) { /* Count what was actually sent */ 9189566063dSJacob Faibussowitsch PetscCallMPI(MPI_Get_count(&stash->some_statuses[stash->some_i], stash->blocktype, &stash->recvframe_count)); 919d7d60843SJed Brown } 9204b4eb8d3SJed Brown if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */ 9214b4eb8d3SJed Brown block = (MatStashBlock *)&((char *)stash->recvframe_active->buffer)[0]; 9224b4eb8d3SJed Brown if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES; 923aed4548fSBarry Smith PetscCheck(*stash->insertmode != INSERT_VALUES || block->row < 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Assembling INSERT_VALUES, but rank %d requested ADD_VALUES", stash->recvranks[stash->some_indices[stash->some_i]]); 924aed4548fSBarry Smith PetscCheck(*stash->insertmode != ADD_VALUES || block->row >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Assembling ADD_VALUES, but rank %d requested INSERT_VALUES", stash->recvranks[stash->some_indices[stash->some_i]]); 9254b4eb8d3SJed Brown } 926d7d60843SJed Brown stash->some_i++; 927d7d60843SJed Brown stash->recvcount++; 928d7d60843SJed Brown stash->recvframe_i = 0; 929d7d60843SJed Brown } 930d7d60843SJed Brown *n = 1; 931d7d60843SJed Brown block = (MatStashBlock *)&((char *)stash->recvframe_active->buffer)[stash->recvframe_i * stash->blocktype_size]; 9324b4eb8d3SJed Brown if (block->row < 0) block->row = -(block->row + 1); 933d7d60843SJed Brown *row = &block->row; 934d7d60843SJed Brown *col = &block->col; 935d7d60843SJed Brown *val = block->vals; 936d7d60843SJed Brown stash->recvframe_i++; 937d7d60843SJed Brown *flg = 1; 938d7d60843SJed Brown PetscFunctionReturn(0); 939d7d60843SJed Brown } 940d7d60843SJed Brown 9419371c9d4SSatish Balay static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash) { 942d7d60843SJed Brown PetscFunctionBegin; 9439566063dSJacob Faibussowitsch PetscCallMPI(MPI_Waitall(stash->nsendranks, stash->sendreqs, MPI_STATUSES_IGNORE)); 94460f34548SJunchao Zhang if (stash->first_assembly_done) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks */ 945d2b3fd65SBarry Smith PetscCall(PetscSegBufferExtractInPlace(stash->segrecvblocks, NULL)); 9463575f486SJed Brown } else { /* No reuse, so collect everything. */ 9479566063dSJacob Faibussowitsch PetscCall(MatStashScatterDestroy_BTS(stash)); 94897da8949SJed Brown } 949d7d60843SJed Brown 950d7d60843SJed Brown /* Now update nmaxold to be app 10% more than max n used, this way the 951d7d60843SJed Brown wastage of space is reduced the next time this stash is used. 952d7d60843SJed Brown Also update the oldmax, only if it increases */ 953d7d60843SJed Brown if (stash->n) { 954d7d60843SJed Brown PetscInt bs2 = stash->bs * stash->bs; 955d7d60843SJed Brown PetscInt oldnmax = ((int)(stash->n * 1.1) + 5) * bs2; 956d7d60843SJed Brown if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 957d7d60843SJed Brown } 958d7d60843SJed Brown 959d7d60843SJed Brown stash->nmax = 0; 960d7d60843SJed Brown stash->n = 0; 961d7d60843SJed Brown stash->reallocs = -1; 962d7d60843SJed Brown stash->nprocessed = 0; 963d7d60843SJed Brown 9649566063dSJacob Faibussowitsch PetscCall(PetscMatStashSpaceDestroy(&stash->space_head)); 965d7d60843SJed Brown 966f4259b30SLisandro Dalcin stash->space = NULL; 967d7d60843SJed Brown 968d7d60843SJed Brown PetscFunctionReturn(0); 969d7d60843SJed Brown } 970d7d60843SJed Brown 9719371c9d4SSatish Balay PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash) { 972d7d60843SJed Brown PetscFunctionBegin; 9739566063dSJacob Faibussowitsch PetscCall(PetscSegBufferDestroy(&stash->segsendblocks)); 9749566063dSJacob Faibussowitsch PetscCall(PetscSegBufferDestroy(&stash->segrecvframe)); 975d7d60843SJed Brown stash->recvframes = NULL; 9769566063dSJacob Faibussowitsch PetscCall(PetscSegBufferDestroy(&stash->segrecvblocks)); 977*48a46eb9SPierre Jolivet if (stash->blocktype != MPI_DATATYPE_NULL) PetscCallMPI(MPI_Type_free(&stash->blocktype)); 978d7d60843SJed Brown stash->nsendranks = 0; 979d7d60843SJed Brown stash->nrecvranks = 0; 9809566063dSJacob Faibussowitsch PetscCall(PetscFree3(stash->sendranks, stash->sendhdr, stash->sendframes)); 9819566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->sendreqs)); 9829566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->recvreqs)); 9839566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->recvranks)); 9849566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->recvhdr)); 9859566063dSJacob Faibussowitsch PetscCall(PetscFree2(stash->some_indices, stash->some_statuses)); 986d7d60843SJed Brown PetscFunctionReturn(0); 987d7d60843SJed Brown } 9881667be42SBarry Smith #endif 989