1af0996ceSBarry Smith #include <petsc/private/matimpl.h> 25bd3b8fbSHong Zhang 3bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE 10000 44c1ff481SSatish Balay 5ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterBegin_Ref(Mat, MatStash *, PetscInt *); 6d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *); 7d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *); 875d39b6aSBarry Smith #if !defined(PETSC_HAVE_MPIUNI) 9d7d60843SJed Brown static PetscErrorCode MatStashScatterBegin_BTS(Mat, MatStash *, PetscInt *); 10d7d60843SJed Brown static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *); 11d7d60843SJed Brown static PetscErrorCode MatStashScatterEnd_BTS(MatStash *); 1275d39b6aSBarry Smith #endif 13d7d60843SJed Brown 149417f4adSLois Curfman McInnes /* 158798bf22SSatish Balay MatStashCreate_Private - Creates a stash,currently used for all the parallel 164c1ff481SSatish Balay matrix implementations. The stash is where elements of a matrix destined 174c1ff481SSatish Balay to be stored on other processors are kept until matrix assembly is done. 189417f4adSLois Curfman McInnes 194c1ff481SSatish Balay This is a simple minded stash. Simply adds entries to end of stash. 204c1ff481SSatish Balay 214c1ff481SSatish Balay Input Parameters: 224c1ff481SSatish Balay comm - communicator, required for scatters. 234c1ff481SSatish Balay bs - stash block size. used when stashing blocks of values 244c1ff481SSatish Balay 252fe279fdSBarry Smith Output Parameter: 264c1ff481SSatish Balay stash - the newly created stash 279417f4adSLois Curfman McInnes */ 28d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashCreate_Private(MPI_Comm comm, PetscInt bs, MatStash *stash) 29d71ae5a4SJacob Faibussowitsch { 30533163c2SBarry Smith PetscInt max, *opt, nopt, i; 31ace3abfcSBarry Smith PetscBool flg; 32bc5ccf88SSatish Balay 333a40ed3dSBarry Smith PetscFunctionBegin; 34bc5ccf88SSatish Balay /* Require 2 tags,get the second using PetscCommGetNewTag() */ 35752ec6e0SSatish Balay stash->comm = comm; 368865f1eaSKarl Rupp 379566063dSJacob Faibussowitsch PetscCall(PetscCommGetNewTag(stash->comm, &stash->tag1)); 389566063dSJacob Faibussowitsch PetscCall(PetscCommGetNewTag(stash->comm, &stash->tag2)); 399566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(stash->comm, &stash->size)); 409566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(stash->comm, &stash->rank)); 419566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2 * stash->size, &stash->flg_v)); 42533163c2SBarry Smith for (i = 0; i < 2 * stash->size; i++) stash->flg_v[i] = -1; 43533163c2SBarry Smith 44434d7ff9SSatish Balay nopt = stash->size; 459566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nopt, &opt)); 469566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetIntArray(NULL, NULL, "-matstash_initial_size", opt, &nopt, &flg)); 47434d7ff9SSatish Balay if (flg) { 48434d7ff9SSatish Balay if (nopt == 1) max = opt[0]; 49434d7ff9SSatish Balay else if (nopt == stash->size) max = opt[stash->rank]; 50434d7ff9SSatish Balay else if (stash->rank < nopt) max = opt[stash->rank]; 51f4ab19daSSatish Balay else max = 0; /* Use default */ 52434d7ff9SSatish Balay stash->umax = max; 53434d7ff9SSatish Balay } else { 54434d7ff9SSatish Balay stash->umax = 0; 55434d7ff9SSatish Balay } 569566063dSJacob Faibussowitsch PetscCall(PetscFree(opt)); 574c1ff481SSatish Balay if (bs <= 0) bs = 1; 58a2d1c673SSatish Balay 594c1ff481SSatish Balay stash->bs = bs; 609417f4adSLois Curfman McInnes stash->nmax = 0; 61434d7ff9SSatish Balay stash->oldnmax = 0; 629417f4adSLois Curfman McInnes stash->n = 0; 634c1ff481SSatish Balay stash->reallocs = -1; 64f4259b30SLisandro Dalcin stash->space_head = NULL; 65f4259b30SLisandro Dalcin stash->space = NULL; 669417f4adSLois Curfman McInnes 67f4259b30SLisandro Dalcin stash->send_waits = NULL; 68f4259b30SLisandro Dalcin stash->recv_waits = NULL; 69f4259b30SLisandro Dalcin stash->send_status = NULL; 70bc5ccf88SSatish Balay stash->nsends = 0; 71bc5ccf88SSatish Balay stash->nrecvs = 0; 72f4259b30SLisandro Dalcin stash->svalues = NULL; 73f4259b30SLisandro Dalcin stash->rvalues = NULL; 74f4259b30SLisandro Dalcin stash->rindices = NULL; 75a2d1c673SSatish Balay stash->nprocessed = 0; 7667318a8aSJed Brown stash->reproduce = PETSC_FALSE; 77d7d60843SJed Brown stash->blocktype = MPI_DATATYPE_NULL; 788865f1eaSKarl Rupp 799566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL, NULL, "-matstash_reproduce", &stash->reproduce, NULL)); 801667be42SBarry Smith #if !defined(PETSC_HAVE_MPIUNI) 81ca723aa4SStefano Zampini flg = PETSC_FALSE; 829566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL, NULL, "-matstash_legacy", &flg, NULL)); 83b30fb036SBarry Smith if (!flg) { 84d7d60843SJed Brown stash->ScatterBegin = MatStashScatterBegin_BTS; 85d7d60843SJed Brown stash->ScatterGetMesg = MatStashScatterGetMesg_BTS; 86d7d60843SJed Brown stash->ScatterEnd = MatStashScatterEnd_BTS; 87d7d60843SJed Brown stash->ScatterDestroy = MatStashScatterDestroy_BTS; 88ac2b2aa0SJed Brown } else { 891667be42SBarry Smith #endif 90ac2b2aa0SJed Brown stash->ScatterBegin = MatStashScatterBegin_Ref; 91ac2b2aa0SJed Brown stash->ScatterGetMesg = MatStashScatterGetMesg_Ref; 92ac2b2aa0SJed Brown stash->ScatterEnd = MatStashScatterEnd_Ref; 93ac2b2aa0SJed Brown stash->ScatterDestroy = NULL; 941667be42SBarry Smith #if !defined(PETSC_HAVE_MPIUNI) 95ac2b2aa0SJed Brown } 961667be42SBarry Smith #endif 973ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 989417f4adSLois Curfman McInnes } 999417f4adSLois Curfman McInnes 1004c1ff481SSatish Balay /* 1018798bf22SSatish Balay MatStashDestroy_Private - Destroy the stash 1024c1ff481SSatish Balay */ 103d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashDestroy_Private(MatStash *stash) 104d71ae5a4SJacob Faibussowitsch { 105bc5ccf88SSatish Balay PetscFunctionBegin; 1069566063dSJacob Faibussowitsch PetscCall(PetscMatStashSpaceDestroy(&stash->space_head)); 1079566063dSJacob Faibussowitsch if (stash->ScatterDestroy) PetscCall((*stash->ScatterDestroy)(stash)); 108f4259b30SLisandro Dalcin stash->space = NULL; 1099566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->flg_v)); 1103ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 111bc5ccf88SSatish Balay } 112bc5ccf88SSatish Balay 1134c1ff481SSatish Balay /* 11467318a8aSJed Brown MatStashScatterEnd_Private - This is called as the final stage of 1154c1ff481SSatish Balay scatter. The final stages of message passing is done here, and 11667318a8aSJed Brown all the memory used for message passing is cleaned up. This 1174c1ff481SSatish Balay routine also resets the stash, and deallocates the memory used 1184c1ff481SSatish Balay for the stash. It also keeps track of the current memory usage 1194c1ff481SSatish Balay so that the same value can be used the next time through. 1204c1ff481SSatish Balay */ 121d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashScatterEnd_Private(MatStash *stash) 122d71ae5a4SJacob Faibussowitsch { 123ac2b2aa0SJed Brown PetscFunctionBegin; 1249566063dSJacob Faibussowitsch PetscCall((*stash->ScatterEnd)(stash)); 1253ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 126ac2b2aa0SJed Brown } 127ac2b2aa0SJed Brown 128d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash) 129d71ae5a4SJacob Faibussowitsch { 130*6497c311SBarry Smith PetscMPIInt nsends = stash->nsends; 131*6497c311SBarry Smith PetscInt bs2, oldnmax; 132a2d1c673SSatish Balay MPI_Status *send_status; 133a2d1c673SSatish Balay 1343a40ed3dSBarry Smith PetscFunctionBegin; 135*6497c311SBarry Smith for (PetscMPIInt i = 0; i < 2 * stash->size; i++) stash->flg_v[i] = -1; 136a2d1c673SSatish Balay /* wait on sends */ 137a2d1c673SSatish Balay if (nsends) { 1389566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2 * nsends, &send_status)); 1399566063dSJacob Faibussowitsch PetscCallMPI(MPI_Waitall(2 * nsends, stash->send_waits, send_status)); 1409566063dSJacob Faibussowitsch PetscCall(PetscFree(send_status)); 141a2d1c673SSatish Balay } 142a2d1c673SSatish Balay 143c0c58ca7SSatish Balay /* Now update nmaxold to be app 10% more than max n used, this way the 144434d7ff9SSatish Balay wastage of space is reduced the next time this stash is used. 145434d7ff9SSatish Balay Also update the oldmax, only if it increases */ 146b9b97703SBarry Smith if (stash->n) { 14794b769a5SSatish Balay bs2 = stash->bs * stash->bs; 1488a9378f0SSatish Balay oldnmax = ((int)(stash->n * 1.1) + 5) * bs2; 149434d7ff9SSatish Balay if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 150b9b97703SBarry Smith } 151434d7ff9SSatish Balay 152d07ff455SSatish Balay stash->nmax = 0; 153d07ff455SSatish Balay stash->n = 0; 1544c1ff481SSatish Balay stash->reallocs = -1; 155a2d1c673SSatish Balay stash->nprocessed = 0; 1568865f1eaSKarl Rupp 1579566063dSJacob Faibussowitsch PetscCall(PetscMatStashSpaceDestroy(&stash->space_head)); 1588865f1eaSKarl Rupp 159f4259b30SLisandro Dalcin stash->space = NULL; 1608865f1eaSKarl Rupp 1619566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->send_waits)); 1629566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->recv_waits)); 1639566063dSJacob Faibussowitsch PetscCall(PetscFree2(stash->svalues, stash->sindices)); 1649566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->rvalues[0])); 1659566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->rvalues)); 1669566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->rindices[0])); 1679566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->rindices)); 1683ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1699417f4adSLois Curfman McInnes } 1709417f4adSLois Curfman McInnes 1714c1ff481SSatish Balay /* 1726aad120cSJose E. Roman MatStashGetInfo_Private - Gets the relevant statistics of the stash 1734c1ff481SSatish Balay 1744c1ff481SSatish Balay Input Parameters: 1754c1ff481SSatish Balay stash - the stash 17694b769a5SSatish Balay nstash - the size of the stash. Indicates the number of values stored. 1774c1ff481SSatish Balay reallocs - the number of additional mallocs incurred. 1784c1ff481SSatish Balay 1794c1ff481SSatish Balay */ 180d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashGetInfo_Private(MatStash *stash, PetscInt *nstash, PetscInt *reallocs) 181d71ae5a4SJacob Faibussowitsch { 182c1ac3661SBarry Smith PetscInt bs2 = stash->bs * stash->bs; 18394b769a5SSatish Balay 1843a40ed3dSBarry Smith PetscFunctionBegin; 1851ecfd215SBarry Smith if (nstash) *nstash = stash->n * bs2; 1861ecfd215SBarry Smith if (reallocs) { 187434d7ff9SSatish Balay if (stash->reallocs < 0) *reallocs = 0; 188434d7ff9SSatish Balay else *reallocs = stash->reallocs; 1891ecfd215SBarry Smith } 1903ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 191bc5ccf88SSatish Balay } 1924c1ff481SSatish Balay 1934c1ff481SSatish Balay /* 1948798bf22SSatish Balay MatStashSetInitialSize_Private - Sets the initial size of the stash 1954c1ff481SSatish Balay 1964c1ff481SSatish Balay Input Parameters: 1974c1ff481SSatish Balay stash - the stash 1984c1ff481SSatish Balay max - the value that is used as the max size of the stash. 1994c1ff481SSatish Balay this value is used while allocating memory. 2004c1ff481SSatish Balay */ 201d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash, PetscInt max) 202d71ae5a4SJacob Faibussowitsch { 203bc5ccf88SSatish Balay PetscFunctionBegin; 204434d7ff9SSatish Balay stash->umax = max; 2053ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 20697530c3fSBarry Smith } 20797530c3fSBarry Smith 2088798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called 2094c1ff481SSatish Balay when the space in the stash is not sufficient to add the new values 2104c1ff481SSatish Balay being inserted into the stash. 2114c1ff481SSatish Balay 2124c1ff481SSatish Balay Input Parameters: 2134c1ff481SSatish Balay stash - the stash 2144c1ff481SSatish Balay incr - the minimum increase requested 2154c1ff481SSatish Balay 21611a5261eSBarry Smith Note: 2174c1ff481SSatish Balay This routine doubles the currently used memory. 2184c1ff481SSatish Balay */ 219d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashExpand_Private(MatStash *stash, PetscInt incr) 220d71ae5a4SJacob Faibussowitsch { 2215bd3b8fbSHong Zhang PetscInt newnmax, bs2 = stash->bs * stash->bs; 2229417f4adSLois Curfman McInnes 2233a40ed3dSBarry Smith PetscFunctionBegin; 2249417f4adSLois Curfman McInnes /* allocate a larger stash */ 225c481ceb5SSatish Balay if (!stash->oldnmax && !stash->nmax) { /* new stash */ 226434d7ff9SSatish Balay if (stash->umax) newnmax = stash->umax / bs2; 227434d7ff9SSatish Balay else newnmax = DEFAULT_STASH_SIZE / bs2; 228a678f235SPierre Jolivet } else if (!stash->nmax) { /* reusing stash */ 229434d7ff9SSatish Balay if (stash->umax > stash->oldnmax) newnmax = stash->umax / bs2; 230434d7ff9SSatish Balay else newnmax = stash->oldnmax / bs2; 231434d7ff9SSatish Balay } else newnmax = stash->nmax * 2; 2324c1ff481SSatish Balay if (newnmax < (stash->nmax + incr)) newnmax += 2 * incr; 233d07ff455SSatish Balay 23475cae7c1SHong Zhang /* Get a MatStashSpace and attach it to stash */ 2359566063dSJacob Faibussowitsch PetscCall(PetscMatStashSpaceGet(bs2, newnmax, &stash->space)); 236a678f235SPierre Jolivet if (!stash->space_head) { /* new stash or reusing stash->oldnmax */ 237b087b6d6SSatish Balay stash->space_head = stash->space; 23875cae7c1SHong Zhang } 239b087b6d6SSatish Balay 240bc5ccf88SSatish Balay stash->reallocs++; 24175cae7c1SHong Zhang stash->nmax = newnmax; 2423ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 243bc5ccf88SSatish Balay } 244bc5ccf88SSatish Balay /* 2458798bf22SSatish Balay MatStashValuesRow_Private - inserts values into the stash. This function 2465e116b59SBarry Smith expects the values to be row-oriented. Multiple columns belong to the same row 2474c1ff481SSatish Balay can be inserted with a single call to this function. 2484c1ff481SSatish Balay 2494c1ff481SSatish Balay Input Parameters: 2504c1ff481SSatish Balay stash - the stash 2515e116b59SBarry Smith row - the global row corresponding to the values 2524c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2534c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2544c1ff481SSatish Balay values - the values inserted 255bc5ccf88SSatish Balay */ 256d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashValuesRow_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscBool ignorezeroentries) 257d71ae5a4SJacob Faibussowitsch { 258b400d20cSBarry Smith PetscInt i, k, cnt = 0; 25975cae7c1SHong Zhang PetscMatStashSpace space = stash->space; 260bc5ccf88SSatish Balay 261bc5ccf88SSatish Balay PetscFunctionBegin; 2624c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 26348a46eb9SPierre Jolivet if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n)); 26475cae7c1SHong Zhang space = stash->space; 26575cae7c1SHong Zhang k = space->local_used; 2664c1ff481SSatish Balay for (i = 0; i < n; i++) { 26714069ce9SStefano Zampini if (ignorezeroentries && values && values[i] == 0.0) continue; 26875cae7c1SHong Zhang space->idx[k] = row; 26975cae7c1SHong Zhang space->idy[k] = idxn[i]; 27014069ce9SStefano Zampini space->val[k] = values ? values[i] : 0.0; 27175cae7c1SHong Zhang k++; 272b400d20cSBarry Smith cnt++; 2739417f4adSLois Curfman McInnes } 274b400d20cSBarry Smith stash->n += cnt; 275b400d20cSBarry Smith space->local_used += cnt; 276b400d20cSBarry Smith space->local_remaining -= cnt; 2773ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 278a2d1c673SSatish Balay } 27975cae7c1SHong Zhang 2804c1ff481SSatish Balay /* 2818798bf22SSatish Balay MatStashValuesCol_Private - inserts values into the stash. This function 2825e116b59SBarry Smith expects the values to be column-oriented. Multiple columns belong to the same row 2834c1ff481SSatish Balay can be inserted with a single call to this function. 284a2d1c673SSatish Balay 2854c1ff481SSatish Balay Input Parameters: 2864c1ff481SSatish Balay stash - the stash 2875e116b59SBarry Smith row - the global row corresponding to the values 2884c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2894c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2904c1ff481SSatish Balay values - the values inserted 2914c1ff481SSatish Balay stepval - the consecutive values are sepated by a distance of stepval. 2925e116b59SBarry Smith this happens because the input is column-oriented. 2934c1ff481SSatish Balay */ 294d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashValuesCol_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt stepval, PetscBool ignorezeroentries) 295d71ae5a4SJacob Faibussowitsch { 29650e9ab7cSBarry Smith PetscInt i, k, cnt = 0; 29775cae7c1SHong Zhang PetscMatStashSpace space = stash->space; 298a2d1c673SSatish Balay 2994c1ff481SSatish Balay PetscFunctionBegin; 3004c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 30148a46eb9SPierre Jolivet if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n)); 30275cae7c1SHong Zhang space = stash->space; 30375cae7c1SHong Zhang k = space->local_used; 3044c1ff481SSatish Balay for (i = 0; i < n; i++) { 30514069ce9SStefano Zampini if (ignorezeroentries && values && values[i * stepval] == 0.0) continue; 30675cae7c1SHong Zhang space->idx[k] = row; 30775cae7c1SHong Zhang space->idy[k] = idxn[i]; 30814069ce9SStefano Zampini space->val[k] = values ? values[i * stepval] : 0.0; 30975cae7c1SHong Zhang k++; 310b400d20cSBarry Smith cnt++; 3114c1ff481SSatish Balay } 312b400d20cSBarry Smith stash->n += cnt; 313b400d20cSBarry Smith space->local_used += cnt; 314b400d20cSBarry Smith space->local_remaining -= cnt; 3153ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3164c1ff481SSatish Balay } 3174c1ff481SSatish Balay 3184c1ff481SSatish Balay /* 3198798bf22SSatish Balay MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 3205e116b59SBarry Smith This function expects the values to be row-oriented. Multiple columns belong 3214c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3224c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3234c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3244c1ff481SSatish Balay 3254c1ff481SSatish Balay Input Parameters: 3264c1ff481SSatish Balay stash - the stash 3275e116b59SBarry Smith row - the global block-row corresponding to the values 3284c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3294c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3304c1ff481SSatish Balay values. Each block is of size bs*bs. 3314c1ff481SSatish Balay values - the values inserted 3324c1ff481SSatish Balay rmax - the number of block-rows in the original block. 333a5b23f4aSJose E. Roman cmax - the number of block-columns on the original block. 3344c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3354c1ff481SSatish Balay */ 336d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt rmax, PetscInt cmax, PetscInt idx) 337d71ae5a4SJacob Faibussowitsch { 33875cae7c1SHong Zhang PetscInt i, j, k, bs2, bs = stash->bs, l; 33954f21887SBarry Smith const PetscScalar *vals; 34054f21887SBarry Smith PetscScalar *array; 34175cae7c1SHong Zhang PetscMatStashSpace space = stash->space; 342a2d1c673SSatish Balay 343a2d1c673SSatish Balay PetscFunctionBegin; 34448a46eb9SPierre Jolivet if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n)); 34575cae7c1SHong Zhang space = stash->space; 34675cae7c1SHong Zhang l = space->local_used; 34775cae7c1SHong Zhang bs2 = bs * bs; 3484c1ff481SSatish Balay for (i = 0; i < n; i++) { 34975cae7c1SHong Zhang space->idx[l] = row; 35075cae7c1SHong Zhang space->idy[l] = idxn[i]; 3515e116b59SBarry Smith /* Now copy over the block of values. Store the values column-oriented. 35275cae7c1SHong Zhang This enables inserting multiple blocks belonging to a row with a single 353da81f932SPierre Jolivet function call */ 35475cae7c1SHong Zhang array = space->val + bs2 * l; 35575cae7c1SHong Zhang vals = values + idx * bs2 * n + bs * i; 35675cae7c1SHong Zhang for (j = 0; j < bs; j++) { 35714069ce9SStefano Zampini for (k = 0; k < bs; k++) array[k * bs] = values ? vals[k] : 0.0; 35875cae7c1SHong Zhang array++; 35975cae7c1SHong Zhang vals += cmax * bs; 36075cae7c1SHong Zhang } 36175cae7c1SHong Zhang l++; 362a2d1c673SSatish Balay } 3635bd3b8fbSHong Zhang stash->n += n; 36475cae7c1SHong Zhang space->local_used += n; 36575cae7c1SHong Zhang space->local_remaining -= n; 3663ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3674c1ff481SSatish Balay } 3684c1ff481SSatish Balay 3694c1ff481SSatish Balay /* 3708798bf22SSatish Balay MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 3715e116b59SBarry Smith This function expects the values to be column-oriented. Multiple columns belong 3724c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3734c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3744c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3754c1ff481SSatish Balay 3764c1ff481SSatish Balay Input Parameters: 3774c1ff481SSatish Balay stash - the stash 3785e116b59SBarry Smith row - the global block-row corresponding to the values 3794c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3804c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3814c1ff481SSatish Balay values. Each block is of size bs*bs. 3824c1ff481SSatish Balay values - the values inserted 3834c1ff481SSatish Balay rmax - the number of block-rows in the original block. 384a5b23f4aSJose E. Roman cmax - the number of block-columns on the original block. 3854c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3864c1ff481SSatish Balay */ 387d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt rmax, PetscInt cmax, PetscInt idx) 388d71ae5a4SJacob Faibussowitsch { 38975cae7c1SHong Zhang PetscInt i, j, k, bs2, bs = stash->bs, l; 39054f21887SBarry Smith const PetscScalar *vals; 39154f21887SBarry Smith PetscScalar *array; 39275cae7c1SHong Zhang PetscMatStashSpace space = stash->space; 3934c1ff481SSatish Balay 3944c1ff481SSatish Balay PetscFunctionBegin; 39548a46eb9SPierre Jolivet if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n)); 39675cae7c1SHong Zhang space = stash->space; 39775cae7c1SHong Zhang l = space->local_used; 39875cae7c1SHong Zhang bs2 = bs * bs; 3994c1ff481SSatish Balay for (i = 0; i < n; i++) { 40075cae7c1SHong Zhang space->idx[l] = row; 40175cae7c1SHong Zhang space->idy[l] = idxn[i]; 4025e116b59SBarry Smith /* Now copy over the block of values. Store the values column-oriented. 40375cae7c1SHong Zhang This enables inserting multiple blocks belonging to a row with a single 404da81f932SPierre Jolivet function call */ 40575cae7c1SHong Zhang array = space->val + bs2 * l; 40675cae7c1SHong Zhang vals = values + idx * bs2 * n + bs * i; 40775cae7c1SHong Zhang for (j = 0; j < bs; j++) { 40814069ce9SStefano Zampini for (k = 0; k < bs; k++) array[k] = values ? vals[k] : 0.0; 40975cae7c1SHong Zhang array += bs; 41075cae7c1SHong Zhang vals += rmax * bs; 41175cae7c1SHong Zhang } 4125bd3b8fbSHong Zhang l++; 413a2d1c673SSatish Balay } 4145bd3b8fbSHong Zhang stash->n += n; 41575cae7c1SHong Zhang space->local_used += n; 41675cae7c1SHong Zhang space->local_remaining -= n; 4173ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4189417f4adSLois Curfman McInnes } 4194c1ff481SSatish Balay /* 4208798bf22SSatish Balay MatStashScatterBegin_Private - Initiates the transfer of values to the 4214c1ff481SSatish Balay correct owners. This function goes through the stash, and check the 4224c1ff481SSatish Balay owners of each stashed value, and sends the values off to the owner 4234c1ff481SSatish Balay processors. 424bc5ccf88SSatish Balay 4254c1ff481SSatish Balay Input Parameters: 4264c1ff481SSatish Balay stash - the stash 4274c1ff481SSatish Balay owners - an array of size 'no-of-procs' which gives the ownership range 4284c1ff481SSatish Balay for each node. 4294c1ff481SSatish Balay 43011a5261eSBarry Smith Note: 43195452b02SPatrick Sanan The 'owners' array in the cased of the blocked-stash has the 4324c1ff481SSatish Balay ranges specified blocked global indices, and for the regular stash in 4334c1ff481SSatish Balay the proper global indices. 4344c1ff481SSatish Balay */ 435d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashScatterBegin_Private(Mat mat, MatStash *stash, PetscInt *owners) 436d71ae5a4SJacob Faibussowitsch { 437ac2b2aa0SJed Brown PetscFunctionBegin; 4389566063dSJacob Faibussowitsch PetscCall((*stash->ScatterBegin)(mat, stash, owners)); 4393ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 440ac2b2aa0SJed Brown } 441ac2b2aa0SJed Brown 442d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashScatterBegin_Ref(Mat mat, MatStash *stash, PetscInt *owners) 443d71ae5a4SJacob Faibussowitsch { 444*6497c311SBarry Smith PetscInt *owner, *startv, *starti, bs2; 445*6497c311SBarry Smith PetscInt size = stash->size; 446*6497c311SBarry Smith PetscInt *sindices, **rindices, j, ii, idx, lastidx, l; 44754f21887SBarry Smith PetscScalar **rvalues, *svalues; 448bc5ccf88SSatish Balay MPI_Comm comm = stash->comm; 449563fb871SSatish Balay MPI_Request *send_waits, *recv_waits, *recv_waits1, *recv_waits2; 450*6497c311SBarry Smith PetscMPIInt *sizes, *nlengths, nreceives, nsends, tag1 = stash->tag1, tag2 = stash->tag2; 4515bd3b8fbSHong Zhang PetscInt *sp_idx, *sp_idy; 45254f21887SBarry Smith PetscScalar *sp_val; 4535bd3b8fbSHong Zhang PetscMatStashSpace space, space_next; 454bc5ccf88SSatish Balay 455bc5ccf88SSatish Balay PetscFunctionBegin; 4564b4eb8d3SJed Brown { /* make sure all processors are either in INSERTMODE or ADDMODE */ 4574b4eb8d3SJed Brown InsertMode addv; 4581c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat))); 45908401ef6SPierre Jolivet PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added"); 4604b4eb8d3SJed Brown mat->insertmode = addv; /* in case this processor had no cache */ 4614b4eb8d3SJed Brown } 4624b4eb8d3SJed Brown 4634c1ff481SSatish Balay bs2 = stash->bs * stash->bs; 46475cae7c1SHong Zhang 465bc5ccf88SSatish Balay /* first count number of contributors to each processor */ 4669566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(size, &nlengths)); 4679566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(stash->n + 1, &owner)); 468a2d1c673SSatish Balay 469*6497c311SBarry Smith ii = j = 0; 4707357eb19SBarry Smith lastidx = -1; 4715bd3b8fbSHong Zhang space = stash->space_head; 4726c4ed002SBarry Smith while (space) { 47375cae7c1SHong Zhang space_next = space->next; 4745bd3b8fbSHong Zhang sp_idx = space->idx; 47575cae7c1SHong Zhang for (l = 0; l < space->local_used; l++) { 4767357eb19SBarry Smith /* if indices are NOT locally sorted, need to start search at the beginning */ 4775bd3b8fbSHong Zhang if (lastidx > (idx = sp_idx[l])) j = 0; 4787357eb19SBarry Smith lastidx = idx; 4797357eb19SBarry Smith for (; j < size; j++) { 4804c1ff481SSatish Balay if (idx >= owners[j] && idx < owners[j + 1]) { 4819371c9d4SSatish Balay nlengths[j]++; 482*6497c311SBarry Smith owner[ii] = j; 4839371c9d4SSatish Balay break; 484bc5ccf88SSatish Balay } 485bc5ccf88SSatish Balay } 486*6497c311SBarry Smith ii++; 48775cae7c1SHong Zhang } 48875cae7c1SHong Zhang space = space_next; 489bc5ccf88SSatish Balay } 490071fcb05SBarry Smith 491563fb871SSatish Balay /* Now check what procs get messages - and compute nsends. */ 4929566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(size, &sizes)); 493*6497c311SBarry Smith nsends = 0; 494*6497c311SBarry Smith for (PetscMPIInt i = 0; i < size; i++) { 4958865f1eaSKarl Rupp if (nlengths[i]) { 4969371c9d4SSatish Balay sizes[i] = 1; 4979371c9d4SSatish Balay nsends++; 4988865f1eaSKarl Rupp } 499563fb871SSatish Balay } 500bc5ccf88SSatish Balay 5019371c9d4SSatish Balay { 5029371c9d4SSatish Balay PetscMPIInt *onodes, *olengths; 503563fb871SSatish Balay /* Determine the number of messages to expect, their lengths, from from-ids */ 5049566063dSJacob Faibussowitsch PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives)); 5059566063dSJacob Faibussowitsch PetscCall(PetscGatherMessageLengths(comm, nsends, nreceives, nlengths, &onodes, &olengths)); 506563fb871SSatish Balay /* since clubbing row,col - lengths are multiplied by 2 */ 507*6497c311SBarry Smith for (PetscMPIInt i = 0; i < nreceives; i++) olengths[i] *= 2; 5089566063dSJacob Faibussowitsch PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1)); 509563fb871SSatish Balay /* values are size 'bs2' lengths (and remove earlier factor 2 */ 510*6497c311SBarry Smith for (PetscMPIInt i = 0; i < nreceives; i++) PetscCall(PetscMPIIntCast(olengths[i] * bs2 / 2, &olengths[i])); 5119566063dSJacob Faibussowitsch PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2)); 5129566063dSJacob Faibussowitsch PetscCall(PetscFree(onodes)); 5139371c9d4SSatish Balay PetscCall(PetscFree(olengths)); 5149371c9d4SSatish Balay } 515bc5ccf88SSatish Balay 516bc5ccf88SSatish Balay /* do sends: 517bc5ccf88SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 518bc5ccf88SSatish Balay the ith processor 519bc5ccf88SSatish Balay */ 5209566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices)); 5219566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2 * nsends, &send_waits)); 5229566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(size, &startv, size, &starti)); 523a2d1c673SSatish Balay /* use 2 sends the first with all_a, the next with all_i and all_j */ 5249371c9d4SSatish Balay startv[0] = 0; 5259371c9d4SSatish Balay starti[0] = 0; 526*6497c311SBarry Smith for (PetscMPIInt i = 1; i < size; i++) { 527563fb871SSatish Balay startv[i] = startv[i - 1] + nlengths[i - 1]; 528533163c2SBarry Smith starti[i] = starti[i - 1] + 2 * nlengths[i - 1]; 529bc5ccf88SSatish Balay } 53075cae7c1SHong Zhang 531*6497c311SBarry Smith ii = 0; 5325bd3b8fbSHong Zhang space = stash->space_head; 5336c4ed002SBarry Smith while (space) { 53475cae7c1SHong Zhang space_next = space->next; 5355bd3b8fbSHong Zhang sp_idx = space->idx; 5365bd3b8fbSHong Zhang sp_idy = space->idy; 5375bd3b8fbSHong Zhang sp_val = space->val; 53875cae7c1SHong Zhang for (l = 0; l < space->local_used; l++) { 539*6497c311SBarry Smith j = owner[ii]; 540a2d1c673SSatish Balay if (bs2 == 1) { 5415bd3b8fbSHong Zhang svalues[startv[j]] = sp_val[l]; 542a2d1c673SSatish Balay } else { 543c1ac3661SBarry Smith PetscInt k; 54454f21887SBarry Smith PetscScalar *buf1, *buf2; 5454c1ff481SSatish Balay buf1 = svalues + bs2 * startv[j]; 546b087b6d6SSatish Balay buf2 = space->val + bs2 * l; 5478865f1eaSKarl Rupp for (k = 0; k < bs2; k++) buf1[k] = buf2[k]; 548a2d1c673SSatish Balay } 5495bd3b8fbSHong Zhang sindices[starti[j]] = sp_idx[l]; 5505bd3b8fbSHong Zhang sindices[starti[j] + nlengths[j]] = sp_idy[l]; 551bc5ccf88SSatish Balay startv[j]++; 552bc5ccf88SSatish Balay starti[j]++; 553*6497c311SBarry Smith ii++; 55475cae7c1SHong Zhang } 55575cae7c1SHong Zhang space = space_next; 556bc5ccf88SSatish Balay } 557bc5ccf88SSatish Balay startv[0] = 0; 558*6497c311SBarry Smith for (PetscMPIInt i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1]; 559e5d0e772SSatish Balay 560*6497c311SBarry Smith for (PetscMPIInt i = 0, count = 0; i < size; i++) { 56176ec1555SBarry Smith if (sizes[i]) { 562*6497c311SBarry Smith PetscCallMPI(MPIU_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++)); 563*6497c311SBarry Smith PetscCallMPI(MPIU_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++)); 564bc5ccf88SSatish Balay } 565b85c94c3SSatish Balay } 5666cf91177SBarry Smith #if defined(PETSC_USE_INFO) 567*6497c311SBarry Smith PetscCall(PetscInfo(NULL, "No of messages: %d \n", nsends)); 568*6497c311SBarry Smith for (PetscMPIInt i = 0; i < size; i++) { 569*6497c311SBarry Smith if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %d: size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt))))); 570e5d0e772SSatish Balay } 571e5d0e772SSatish Balay #endif 5729566063dSJacob Faibussowitsch PetscCall(PetscFree(nlengths)); 5739566063dSJacob Faibussowitsch PetscCall(PetscFree(owner)); 5749566063dSJacob Faibussowitsch PetscCall(PetscFree2(startv, starti)); 5759566063dSJacob Faibussowitsch PetscCall(PetscFree(sizes)); 576a2d1c673SSatish Balay 577563fb871SSatish Balay /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 5789566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2 * nreceives, &recv_waits)); 579563fb871SSatish Balay 580*6497c311SBarry Smith for (PetscMPIInt i = 0; i < nreceives; i++) { 581563fb871SSatish Balay recv_waits[2 * i] = recv_waits1[i]; 582563fb871SSatish Balay recv_waits[2 * i + 1] = recv_waits2[i]; 583563fb871SSatish Balay } 584563fb871SSatish Balay stash->recv_waits = recv_waits; 5858865f1eaSKarl Rupp 5869566063dSJacob Faibussowitsch PetscCall(PetscFree(recv_waits1)); 5879566063dSJacob Faibussowitsch PetscCall(PetscFree(recv_waits2)); 588563fb871SSatish Balay 589c05d87d6SBarry Smith stash->svalues = svalues; 590c05d87d6SBarry Smith stash->sindices = sindices; 591c05d87d6SBarry Smith stash->rvalues = rvalues; 592c05d87d6SBarry Smith stash->rindices = rindices; 593c05d87d6SBarry Smith stash->send_waits = send_waits; 594c05d87d6SBarry Smith stash->nsends = nsends; 595c05d87d6SBarry Smith stash->nrecvs = nreceives; 59667318a8aSJed Brown stash->reproduce_count = 0; 5973ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 598bc5ccf88SSatish Balay } 599bc5ccf88SSatish Balay 600a2d1c673SSatish Balay /* 6018798bf22SSatish Balay MatStashScatterGetMesg_Private - This function waits on the receives posted 6028798bf22SSatish Balay in the function MatStashScatterBegin_Private() and returns one message at 6034c1ff481SSatish Balay a time to the calling function. If no messages are left, it indicates this 6044c1ff481SSatish Balay by setting flg = 0, else it sets flg = 1. 6054c1ff481SSatish Balay 6062fe279fdSBarry Smith Input Parameter: 6074c1ff481SSatish Balay stash - the stash 6084c1ff481SSatish Balay 6094c1ff481SSatish Balay Output Parameters: 6104c1ff481SSatish Balay nvals - the number of entries in the current message. 6114c1ff481SSatish Balay rows - an array of row indices (or blocked indices) corresponding to the values 6124c1ff481SSatish Balay cols - an array of columnindices (or blocked indices) corresponding to the values 6134c1ff481SSatish Balay vals - the values 6144c1ff481SSatish Balay flg - 0 indicates no more message left, and the current call has no values associated. 6154c1ff481SSatish Balay 1 indicates that the current call successfully received a message, and the 6164c1ff481SSatish Balay other output parameters nvals,rows,cols,vals are set appropriately. 617a2d1c673SSatish Balay */ 618d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash, PetscMPIInt *nvals, PetscInt **rows, PetscInt **cols, PetscScalar **vals, PetscInt *flg) 619d71ae5a4SJacob Faibussowitsch { 620ac2b2aa0SJed Brown PetscFunctionBegin; 6219566063dSJacob Faibussowitsch PetscCall((*stash->ScatterGetMesg)(stash, nvals, rows, cols, vals, flg)); 6223ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 623ac2b2aa0SJed Brown } 624ac2b2aa0SJed Brown 625d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash, PetscMPIInt *nvals, PetscInt **rows, PetscInt **cols, PetscScalar **vals, PetscInt *flg) 626d71ae5a4SJacob Faibussowitsch { 627533163c2SBarry Smith PetscMPIInt i, *flg_v = stash->flg_v, i1, i2; 628fe09c992SBarry Smith PetscInt bs2; 629a2d1c673SSatish Balay MPI_Status recv_status; 630ace3abfcSBarry Smith PetscBool match_found = PETSC_FALSE; 631bc5ccf88SSatish Balay 632bc5ccf88SSatish Balay PetscFunctionBegin; 633a2d1c673SSatish Balay *flg = 0; /* When a message is discovered this is reset to 1 */ 634a2d1c673SSatish Balay /* Return if no more messages to process */ 6353ba16761SJacob Faibussowitsch if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(PETSC_SUCCESS); 636a2d1c673SSatish Balay 6374c1ff481SSatish Balay bs2 = stash->bs * stash->bs; 63867318a8aSJed Brown /* If a matching pair of receives are found, process them, and return the data to 639a2d1c673SSatish Balay the calling function. Until then keep receiving messages */ 640a2d1c673SSatish Balay while (!match_found) { 64167318a8aSJed Brown if (stash->reproduce) { 64267318a8aSJed Brown i = stash->reproduce_count++; 6439566063dSJacob Faibussowitsch PetscCallMPI(MPI_Wait(stash->recv_waits + i, &recv_status)); 64467318a8aSJed Brown } else { 6459566063dSJacob Faibussowitsch PetscCallMPI(MPI_Waitany(2 * stash->nrecvs, stash->recv_waits, &i, &recv_status)); 64667318a8aSJed Brown } 64708401ef6SPierre Jolivet PetscCheck(recv_status.MPI_SOURCE >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Negative MPI source!"); 648533163c2SBarry Smith 64967318a8aSJed Brown /* Now pack the received message into a structure which is usable by others */ 650a2d1c673SSatish Balay if (i % 2) { 6519566063dSJacob Faibussowitsch PetscCallMPI(MPI_Get_count(&recv_status, MPIU_SCALAR, nvals)); 652c1dc657dSBarry Smith flg_v[2 * recv_status.MPI_SOURCE] = i / 2; 653a2d1c673SSatish Balay *nvals = *nvals / bs2; 654563fb871SSatish Balay } else { 6559566063dSJacob Faibussowitsch PetscCallMPI(MPI_Get_count(&recv_status, MPIU_INT, nvals)); 656563fb871SSatish Balay flg_v[2 * recv_status.MPI_SOURCE + 1] = i / 2; 657563fb871SSatish Balay *nvals = *nvals / 2; /* This message has both row indices and col indices */ 658bc5ccf88SSatish Balay } 659a2d1c673SSatish Balay 660cb2b73ccSBarry Smith /* Check if we have both messages from this proc */ 661c1dc657dSBarry Smith i1 = flg_v[2 * recv_status.MPI_SOURCE]; 662c1dc657dSBarry Smith i2 = flg_v[2 * recv_status.MPI_SOURCE + 1]; 663a2d1c673SSatish Balay if (i1 != -1 && i2 != -1) { 664563fb871SSatish Balay *rows = stash->rindices[i2]; 665a2d1c673SSatish Balay *cols = *rows + *nvals; 666563fb871SSatish Balay *vals = stash->rvalues[i1]; 667a2d1c673SSatish Balay *flg = 1; 668a2d1c673SSatish Balay stash->nprocessed++; 66935d8aa7fSBarry Smith match_found = PETSC_TRUE; 670bc5ccf88SSatish Balay } 671bc5ccf88SSatish Balay } 6723ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 673bc5ccf88SSatish Balay } 674d7d60843SJed Brown 675e69ee1f7SSatish Balay #if !defined(PETSC_HAVE_MPIUNI) 676d7d60843SJed Brown typedef struct { 677d7d60843SJed Brown PetscInt row; 678d7d60843SJed Brown PetscInt col; 679d7d60843SJed Brown PetscScalar vals[1]; /* Actually an array of length bs2 */ 680d7d60843SJed Brown } MatStashBlock; 681d7d60843SJed Brown 682d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashSortCompress_Private(MatStash *stash, InsertMode insertmode) 683d71ae5a4SJacob Faibussowitsch { 684d7d60843SJed Brown PetscMatStashSpace space; 685d7d60843SJed Brown PetscInt n = stash->n, bs = stash->bs, bs2 = bs * bs, cnt, *row, *col, *perm, rowstart, i; 686d7d60843SJed Brown PetscScalar **valptr; 687d7d60843SJed Brown 688d7d60843SJed Brown PetscFunctionBegin; 6899566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(n, &row, n, &col, n, &valptr, n, &perm)); 690d7d60843SJed Brown for (space = stash->space_head, cnt = 0; space; space = space->next) { 691d7d60843SJed Brown for (i = 0; i < space->local_used; i++) { 692d7d60843SJed Brown row[cnt] = space->idx[i]; 693d7d60843SJed Brown col[cnt] = space->idy[i]; 694d7d60843SJed Brown valptr[cnt] = &space->val[i * bs2]; 695d7d60843SJed Brown perm[cnt] = cnt; /* Will tell us where to find valptr after sorting row[] and col[] */ 696d7d60843SJed Brown cnt++; 697d7d60843SJed Brown } 698d7d60843SJed Brown } 69908401ef6SPierre Jolivet PetscCheck(cnt == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatStash n %" PetscInt_FMT ", but counted %" PetscInt_FMT " entries", n, cnt); 7009566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithArrayPair(n, row, col, perm)); 701d7d60843SJed Brown /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */ 702d7d60843SJed Brown for (rowstart = 0, cnt = 0, i = 1; i <= n; i++) { 703d7d60843SJed Brown if (i == n || row[i] != row[rowstart]) { /* Sort the last row. */ 704d7d60843SJed Brown PetscInt colstart; 7059566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithArray(i - rowstart, &col[rowstart], &perm[rowstart])); 706d7d60843SJed Brown for (colstart = rowstart; colstart < i;) { /* Compress multiple insertions to the same location */ 707d7d60843SJed Brown PetscInt j, l; 708d7d60843SJed Brown MatStashBlock *block; 7099566063dSJacob Faibussowitsch PetscCall(PetscSegBufferGet(stash->segsendblocks, 1, &block)); 710d7d60843SJed Brown block->row = row[rowstart]; 711d7d60843SJed Brown block->col = col[colstart]; 7129566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(block->vals, valptr[perm[colstart]], bs2)); 713d7d60843SJed Brown for (j = colstart + 1; j < i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */ 714d7d60843SJed Brown if (insertmode == ADD_VALUES) { 715d7d60843SJed Brown for (l = 0; l < bs2; l++) block->vals[l] += valptr[perm[j]][l]; 716d7d60843SJed Brown } else { 7179566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(block->vals, valptr[perm[j]], bs2)); 718d7d60843SJed Brown } 719d7d60843SJed Brown } 720d7d60843SJed Brown colstart = j; 721d7d60843SJed Brown } 722d7d60843SJed Brown rowstart = i; 723d7d60843SJed Brown } 724d7d60843SJed Brown } 7259566063dSJacob Faibussowitsch PetscCall(PetscFree4(row, col, valptr, perm)); 7263ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 727d7d60843SJed Brown } 728d7d60843SJed Brown 729d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash) 730d71ae5a4SJacob Faibussowitsch { 731d7d60843SJed Brown PetscFunctionBegin; 732d7d60843SJed Brown if (stash->blocktype == MPI_DATATYPE_NULL) { 733d7d60843SJed Brown PetscInt bs2 = PetscSqr(stash->bs); 734d7d60843SJed Brown PetscMPIInt blocklens[2]; 735d7d60843SJed Brown MPI_Aint displs[2]; 736d7d60843SJed Brown MPI_Datatype types[2], stype; 737223b4c07SBarry Smith /* 738223b4c07SBarry Smith DummyBlock is a type having standard layout, even when PetscScalar is C++ std::complex. 739f41aa5efSJunchao Zhang std::complex itself has standard layout, so does DummyBlock, recursively. 740a5b23f4aSJose E. Roman To be compatible with C++ std::complex, complex implementations on GPUs must also have standard layout, 741f41aa5efSJunchao Zhang though they can have different alignment, e.g, 16 bytes for double complex, instead of 8 bytes as in GCC stdlibc++. 742f41aa5efSJunchao Zhang offsetof(type, member) only requires type to have standard layout. Ref. https://en.cppreference.com/w/cpp/types/offsetof. 743f41aa5efSJunchao Zhang 744f41aa5efSJunchao Zhang We can test if std::complex has standard layout with the following code: 745f41aa5efSJunchao Zhang #include <iostream> 746f41aa5efSJunchao Zhang #include <type_traits> 747f41aa5efSJunchao Zhang #include <complex> 748f41aa5efSJunchao Zhang int main() { 749f41aa5efSJunchao Zhang std::cout << std::boolalpha; 750f41aa5efSJunchao Zhang std::cout << std::is_standard_layout<std::complex<double> >::value << '\n'; 751f41aa5efSJunchao Zhang } 752f41aa5efSJunchao Zhang Output: true 7539503c6c6SJed Brown */ 7549371c9d4SSatish Balay struct DummyBlock { 7559371c9d4SSatish Balay PetscInt row, col; 7569371c9d4SSatish Balay PetscScalar vals; 7579371c9d4SSatish Balay }; 758d7d60843SJed Brown 7599503c6c6SJed Brown stash->blocktype_size = offsetof(struct DummyBlock, vals) + bs2 * sizeof(PetscScalar); 760d7d60843SJed Brown if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */ 761d7d60843SJed Brown stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt); 762d7d60843SJed Brown } 7639566063dSJacob Faibussowitsch PetscCall(PetscSegBufferCreate(stash->blocktype_size, 1, &stash->segsendblocks)); 7649566063dSJacob Faibussowitsch PetscCall(PetscSegBufferCreate(stash->blocktype_size, 1, &stash->segrecvblocks)); 7659566063dSJacob Faibussowitsch PetscCall(PetscSegBufferCreate(sizeof(MatStashFrame), 1, &stash->segrecvframe)); 766d7d60843SJed Brown blocklens[0] = 2; 767*6497c311SBarry Smith blocklens[1] = (PetscMPIInt)bs2; 7689503c6c6SJed Brown displs[0] = offsetof(struct DummyBlock, row); 7699503c6c6SJed Brown displs[1] = offsetof(struct DummyBlock, vals); 770d7d60843SJed Brown types[0] = MPIU_INT; 771d7d60843SJed Brown types[1] = MPIU_SCALAR; 7729566063dSJacob Faibussowitsch PetscCallMPI(MPI_Type_create_struct(2, blocklens, displs, types, &stype)); 7739566063dSJacob Faibussowitsch PetscCallMPI(MPI_Type_commit(&stype)); 7749566063dSJacob Faibussowitsch PetscCallMPI(MPI_Type_create_resized(stype, 0, stash->blocktype_size, &stash->blocktype)); 7759566063dSJacob Faibussowitsch PetscCallMPI(MPI_Type_commit(&stash->blocktype)); 7769566063dSJacob Faibussowitsch PetscCallMPI(MPI_Type_free(&stype)); 777d7d60843SJed Brown } 7783ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 779d7d60843SJed Brown } 780d7d60843SJed Brown 781da81f932SPierre Jolivet /* Callback invoked after target rank has initiated receive of rendezvous message. 782d7d60843SJed Brown * Here we post the main sends. 783d7d60843SJed Brown */ 784d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm, const PetscMPIInt tag[], PetscMPIInt rankid, PetscMPIInt rank, void *sdata, MPI_Request req[], void *ctx) 785d71ae5a4SJacob Faibussowitsch { 786d7d60843SJed Brown MatStash *stash = (MatStash *)ctx; 787d7d60843SJed Brown MatStashHeader *hdr = (MatStashHeader *)sdata; 788d7d60843SJed Brown 789d7d60843SJed Brown PetscFunctionBegin; 79008401ef6SPierre Jolivet PetscCheck(rank == stash->sendranks[rankid], comm, PETSC_ERR_PLIB, "BTS Send rank %d does not match sendranks[%d] %d", rank, rankid, stash->sendranks[rankid]); 791*6497c311SBarry Smith PetscCallMPI(MPIU_Isend(stash->sendframes[rankid].buffer, hdr->count, stash->blocktype, rank, tag[0], comm, &req[0])); 792d7d60843SJed Brown stash->sendframes[rankid].count = hdr->count; 793d7d60843SJed Brown stash->sendframes[rankid].pending = 1; 7943ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 795d7d60843SJed Brown } 796d7d60843SJed Brown 797223b4c07SBarry Smith /* 798223b4c07SBarry Smith Callback invoked by target after receiving rendezvous message. 799223b4c07SBarry Smith Here we post the main recvs. 800d7d60843SJed Brown */ 801d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm, const PetscMPIInt tag[], PetscMPIInt rank, void *rdata, MPI_Request req[], void *ctx) 802d71ae5a4SJacob Faibussowitsch { 803d7d60843SJed Brown MatStash *stash = (MatStash *)ctx; 804d7d60843SJed Brown MatStashHeader *hdr = (MatStashHeader *)rdata; 805d7d60843SJed Brown MatStashFrame *frame; 806d7d60843SJed Brown 807d7d60843SJed Brown PetscFunctionBegin; 8089566063dSJacob Faibussowitsch PetscCall(PetscSegBufferGet(stash->segrecvframe, 1, &frame)); 8099566063dSJacob Faibussowitsch PetscCall(PetscSegBufferGet(stash->segrecvblocks, hdr->count, &frame->buffer)); 810*6497c311SBarry Smith PetscCallMPI(MPIU_Irecv(frame->buffer, hdr->count, stash->blocktype, rank, tag[0], comm, &req[0])); 811d7d60843SJed Brown frame->count = hdr->count; 812d7d60843SJed Brown frame->pending = 1; 8133ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 814d7d60843SJed Brown } 815d7d60843SJed Brown 816d7d60843SJed Brown /* 817d7d60843SJed Brown * owners[] contains the ownership ranges; may be indexed by either blocks or scalars 818d7d60843SJed Brown */ 819d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashScatterBegin_BTS(Mat mat, MatStash *stash, PetscInt owners[]) 820d71ae5a4SJacob Faibussowitsch { 821*6497c311SBarry Smith PetscCount nblocks; 822d7d60843SJed Brown char *sendblocks; 823d7d60843SJed Brown 824d7d60843SJed Brown PetscFunctionBegin; 82576bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { /* make sure all processors are either in INSERTMODE or ADDMODE */ 8264b4eb8d3SJed Brown InsertMode addv; 8271c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat))); 82808401ef6SPierre Jolivet PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added"); 8294b4eb8d3SJed Brown } 8304b4eb8d3SJed Brown 8319566063dSJacob Faibussowitsch PetscCall(MatStashBlockTypeSetUp(stash)); 8329566063dSJacob Faibussowitsch PetscCall(MatStashSortCompress_Private(stash, mat->insertmode)); 8339566063dSJacob Faibussowitsch PetscCall(PetscSegBufferGetSize(stash->segsendblocks, &nblocks)); 8349566063dSJacob Faibussowitsch PetscCall(PetscSegBufferExtractInPlace(stash->segsendblocks, &sendblocks)); 83560f34548SJunchao Zhang if (stash->first_assembly_done) { /* Set up sendhdrs and sendframes for each rank that we sent before */ 83623b7d1baSJed Brown PetscInt i; 837*6497c311SBarry Smith PetscCount b; 83897da8949SJed Brown for (i = 0, b = 0; i < stash->nsendranks; i++) { 83997da8949SJed Brown stash->sendframes[i].buffer = &sendblocks[b * stash->blocktype_size]; 84097da8949SJed Brown /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */ 84197da8949SJed Brown stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */ 84297da8949SJed Brown for (; b < nblocks; b++) { 84397da8949SJed Brown MatStashBlock *sendblock_b = (MatStashBlock *)&sendblocks[b * stash->blocktype_size]; 844aed4548fSBarry Smith PetscCheck(sendblock_b->row >= owners[stash->sendranks[i]], stash->comm, PETSC_ERR_ARG_WRONG, "MAT_SUBSET_OFF_PROC_ENTRIES set, but row %" PetscInt_FMT " owned by %d not communicated in initial assembly", sendblock_b->row, stash->sendranks[i]); 84597da8949SJed Brown if (sendblock_b->row >= owners[stash->sendranks[i] + 1]) break; 84697da8949SJed Brown stash->sendhdr[i].count++; 84797da8949SJed Brown } 84897da8949SJed Brown } 84997da8949SJed Brown } else { /* Dynamically count and pack (first time) */ 85023b7d1baSJed Brown PetscInt sendno; 851*6497c311SBarry Smith PetscCount i, rowstart; 852d7d60843SJed Brown 853d7d60843SJed Brown /* Count number of send ranks and allocate for sends */ 854d7d60843SJed Brown stash->nsendranks = 0; 855d7d60843SJed Brown for (rowstart = 0; rowstart < nblocks;) { 8567e2ea869SJed Brown PetscInt owner; 857d7d60843SJed Brown MatStashBlock *sendblock_rowstart = (MatStashBlock *)&sendblocks[rowstart * stash->blocktype_size]; 858*6497c311SBarry Smith 8599566063dSJacob Faibussowitsch PetscCall(PetscFindInt(sendblock_rowstart->row, stash->size + 1, owners, &owner)); 860d7d60843SJed Brown if (owner < 0) owner = -(owner + 2); 861d7d60843SJed Brown for (i = rowstart + 1; i < nblocks; i++) { /* Move forward through a run of blocks with the same owner */ 862d7d60843SJed Brown MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size]; 863*6497c311SBarry Smith 8647e2ea869SJed Brown if (sendblock_i->row >= owners[owner + 1]) break; 865d7d60843SJed Brown } 866d7d60843SJed Brown stash->nsendranks++; 867d7d60843SJed Brown rowstart = i; 868d7d60843SJed Brown } 8699566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(stash->nsendranks, &stash->sendranks, stash->nsendranks, &stash->sendhdr, stash->nsendranks, &stash->sendframes)); 870d7d60843SJed Brown 871d7d60843SJed Brown /* Set up sendhdrs and sendframes */ 872d7d60843SJed Brown sendno = 0; 873d7d60843SJed Brown for (rowstart = 0; rowstart < nblocks;) { 874*6497c311SBarry Smith PetscInt iowner; 875*6497c311SBarry Smith PetscMPIInt owner; 876d7d60843SJed Brown MatStashBlock *sendblock_rowstart = (MatStashBlock *)&sendblocks[rowstart * stash->blocktype_size]; 877*6497c311SBarry Smith 878*6497c311SBarry Smith PetscCall(PetscFindInt(sendblock_rowstart->row, stash->size + 1, owners, &iowner)); 879*6497c311SBarry Smith PetscCall(PetscMPIIntCast(iowner, &owner)); 880d7d60843SJed Brown if (owner < 0) owner = -(owner + 2); 881d7d60843SJed Brown stash->sendranks[sendno] = owner; 882d7d60843SJed Brown for (i = rowstart + 1; i < nblocks; i++) { /* Move forward through a run of blocks with the same owner */ 883d7d60843SJed Brown MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size]; 884*6497c311SBarry Smith 8857e2ea869SJed Brown if (sendblock_i->row >= owners[owner + 1]) break; 886d7d60843SJed Brown } 887d7d60843SJed Brown stash->sendframes[sendno].buffer = sendblock_rowstart; 888d7d60843SJed Brown stash->sendframes[sendno].pending = 0; 889*6497c311SBarry Smith PetscCall(PetscIntCast(i - rowstart, &stash->sendhdr[sendno].count)); 890d7d60843SJed Brown sendno++; 891d7d60843SJed Brown rowstart = i; 892d7d60843SJed Brown } 89308401ef6SPierre Jolivet PetscCheck(sendno == stash->nsendranks, stash->comm, PETSC_ERR_PLIB, "BTS counted %d sendranks, but %" PetscInt_FMT " sends", stash->nsendranks, sendno); 894d7d60843SJed Brown } 895d7d60843SJed Brown 8964b4eb8d3SJed Brown /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new 8974b4eb8d3SJed Brown * message or a dummy entry of some sort. */ 8984b4eb8d3SJed Brown if (mat->insertmode == INSERT_VALUES) { 899*6497c311SBarry Smith for (PetscCount i = 0; i < nblocks; i++) { 9004b4eb8d3SJed Brown MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size]; 9014b4eb8d3SJed Brown sendblock_i->row = -(sendblock_i->row + 1); 9024b4eb8d3SJed Brown } 9034b4eb8d3SJed Brown } 9044b4eb8d3SJed Brown 90560f34548SJunchao Zhang if (stash->first_assembly_done) { 90697da8949SJed Brown PetscMPIInt i, tag; 907*6497c311SBarry Smith 9089566063dSJacob Faibussowitsch PetscCall(PetscCommGetNewTag(stash->comm, &tag)); 90948a46eb9SPierre Jolivet for (i = 0; i < stash->nrecvranks; i++) PetscCall(MatStashBTSRecv_Private(stash->comm, &tag, stash->recvranks[i], &stash->recvhdr[i], &stash->recvreqs[i], stash)); 91048a46eb9SPierre Jolivet for (i = 0; i < stash->nsendranks; i++) PetscCall(MatStashBTSSend_Private(stash->comm, &tag, i, stash->sendranks[i], &stash->sendhdr[i], &stash->sendreqs[i], stash)); 91197da8949SJed Brown stash->use_status = PETSC_TRUE; /* Use count from message status. */ 91297da8949SJed Brown } else { 9139371c9d4SSatish Balay PetscCall(PetscCommBuildTwoSidedFReq(stash->comm, 1, MPIU_INT, stash->nsendranks, stash->sendranks, (PetscInt *)stash->sendhdr, &stash->nrecvranks, &stash->recvranks, (PetscInt *)&stash->recvhdr, 1, &stash->sendreqs, &stash->recvreqs, MatStashBTSSend_Private, MatStashBTSRecv_Private, stash)); 9149566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(stash->nrecvranks, &stash->some_indices, stash->nrecvranks, &stash->some_statuses)); 91597da8949SJed Brown stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */ 91697da8949SJed Brown } 917d7d60843SJed Brown 9189566063dSJacob Faibussowitsch PetscCall(PetscSegBufferExtractInPlace(stash->segrecvframe, &stash->recvframes)); 919d7d60843SJed Brown stash->recvframe_active = NULL; 920d7d60843SJed Brown stash->recvframe_i = 0; 921d7d60843SJed Brown stash->some_i = 0; 922d7d60843SJed Brown stash->some_count = 0; 923d7d60843SJed Brown stash->recvcount = 0; 92460f34548SJunchao Zhang stash->first_assembly_done = mat->assembly_subset; /* See the same logic in VecAssemblyBegin_MPI_BTS */ 9254b4eb8d3SJed Brown stash->insertmode = &mat->insertmode; 9263ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 927d7d60843SJed Brown } 928d7d60843SJed Brown 929d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash, PetscMPIInt *n, PetscInt **row, PetscInt **col, PetscScalar **val, PetscInt *flg) 930d71ae5a4SJacob Faibussowitsch { 931d7d60843SJed Brown MatStashBlock *block; 932d7d60843SJed Brown 933d7d60843SJed Brown PetscFunctionBegin; 934d7d60843SJed Brown *flg = 0; 935d7d60843SJed Brown while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) { 936d7d60843SJed Brown if (stash->some_i == stash->some_count) { 9373ba16761SJacob Faibussowitsch if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(PETSC_SUCCESS); /* Done */ 9389566063dSJacob Faibussowitsch PetscCallMPI(MPI_Waitsome(stash->nrecvranks, stash->recvreqs, &stash->some_count, stash->some_indices, stash->use_status ? stash->some_statuses : MPI_STATUSES_IGNORE)); 939d7d60843SJed Brown stash->some_i = 0; 940d7d60843SJed Brown } 941d7d60843SJed Brown stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]]; 942d7d60843SJed Brown stash->recvframe_count = stash->recvframe_active->count; /* From header; maximum count */ 943d7d60843SJed Brown if (stash->use_status) { /* Count what was actually sent */ 944*6497c311SBarry Smith PetscMPIInt ic; 945*6497c311SBarry Smith 946*6497c311SBarry Smith PetscCallMPI(MPI_Get_count(&stash->some_statuses[stash->some_i], stash->blocktype, &ic)); 947*6497c311SBarry Smith stash->recvframe_count = ic; 948d7d60843SJed Brown } 9494b4eb8d3SJed Brown if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */ 9504b4eb8d3SJed Brown block = (MatStashBlock *)&((char *)stash->recvframe_active->buffer)[0]; 9514b4eb8d3SJed Brown if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES; 952aed4548fSBarry Smith PetscCheck(*stash->insertmode != INSERT_VALUES || block->row < 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Assembling INSERT_VALUES, but rank %d requested ADD_VALUES", stash->recvranks[stash->some_indices[stash->some_i]]); 953aed4548fSBarry Smith PetscCheck(*stash->insertmode != ADD_VALUES || block->row >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Assembling ADD_VALUES, but rank %d requested INSERT_VALUES", stash->recvranks[stash->some_indices[stash->some_i]]); 9544b4eb8d3SJed Brown } 955d7d60843SJed Brown stash->some_i++; 956d7d60843SJed Brown stash->recvcount++; 957d7d60843SJed Brown stash->recvframe_i = 0; 958d7d60843SJed Brown } 959d7d60843SJed Brown *n = 1; 960d7d60843SJed Brown block = (MatStashBlock *)&((char *)stash->recvframe_active->buffer)[stash->recvframe_i * stash->blocktype_size]; 9614b4eb8d3SJed Brown if (block->row < 0) block->row = -(block->row + 1); 962d7d60843SJed Brown *row = &block->row; 963d7d60843SJed Brown *col = &block->col; 964d7d60843SJed Brown *val = block->vals; 965d7d60843SJed Brown stash->recvframe_i++; 966d7d60843SJed Brown *flg = 1; 9673ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 968d7d60843SJed Brown } 969d7d60843SJed Brown 970d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash) 971d71ae5a4SJacob Faibussowitsch { 972d7d60843SJed Brown PetscFunctionBegin; 9739566063dSJacob Faibussowitsch PetscCallMPI(MPI_Waitall(stash->nsendranks, stash->sendreqs, MPI_STATUSES_IGNORE)); 97460f34548SJunchao Zhang if (stash->first_assembly_done) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks */ 975d2b3fd65SBarry Smith PetscCall(PetscSegBufferExtractInPlace(stash->segrecvblocks, NULL)); 9763575f486SJed Brown } else { /* No reuse, so collect everything. */ 9779566063dSJacob Faibussowitsch PetscCall(MatStashScatterDestroy_BTS(stash)); 97897da8949SJed Brown } 979d7d60843SJed Brown 980d7d60843SJed Brown /* Now update nmaxold to be app 10% more than max n used, this way the 981d7d60843SJed Brown wastage of space is reduced the next time this stash is used. 982d7d60843SJed Brown Also update the oldmax, only if it increases */ 983d7d60843SJed Brown if (stash->n) { 984d7d60843SJed Brown PetscInt bs2 = stash->bs * stash->bs; 985d7d60843SJed Brown PetscInt oldnmax = ((int)(stash->n * 1.1) + 5) * bs2; 986d7d60843SJed Brown if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 987d7d60843SJed Brown } 988d7d60843SJed Brown 989d7d60843SJed Brown stash->nmax = 0; 990d7d60843SJed Brown stash->n = 0; 991d7d60843SJed Brown stash->reallocs = -1; 992d7d60843SJed Brown stash->nprocessed = 0; 993d7d60843SJed Brown 9949566063dSJacob Faibussowitsch PetscCall(PetscMatStashSpaceDestroy(&stash->space_head)); 995d7d60843SJed Brown 996f4259b30SLisandro Dalcin stash->space = NULL; 9973ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 998d7d60843SJed Brown } 999d7d60843SJed Brown 1000d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash) 1001d71ae5a4SJacob Faibussowitsch { 1002d7d60843SJed Brown PetscFunctionBegin; 10039566063dSJacob Faibussowitsch PetscCall(PetscSegBufferDestroy(&stash->segsendblocks)); 10049566063dSJacob Faibussowitsch PetscCall(PetscSegBufferDestroy(&stash->segrecvframe)); 1005d7d60843SJed Brown stash->recvframes = NULL; 10069566063dSJacob Faibussowitsch PetscCall(PetscSegBufferDestroy(&stash->segrecvblocks)); 100748a46eb9SPierre Jolivet if (stash->blocktype != MPI_DATATYPE_NULL) PetscCallMPI(MPI_Type_free(&stash->blocktype)); 1008d7d60843SJed Brown stash->nsendranks = 0; 1009d7d60843SJed Brown stash->nrecvranks = 0; 10109566063dSJacob Faibussowitsch PetscCall(PetscFree3(stash->sendranks, stash->sendhdr, stash->sendframes)); 10119566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->sendreqs)); 10129566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->recvreqs)); 10139566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->recvranks)); 10149566063dSJacob Faibussowitsch PetscCall(PetscFree(stash->recvhdr)); 10159566063dSJacob Faibussowitsch PetscCall(PetscFree2(stash->some_indices, stash->some_statuses)); 10163ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1017d7d60843SJed Brown } 10181667be42SBarry Smith #endif 1019