12d5177cdSBarry Smith 2af0996ceSBarry Smith #include <petsc/private/matimpl.h> 35bd3b8fbSHong Zhang 4bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE 10000 54c1ff481SSatish Balay 6ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterBegin_Ref(Mat,MatStash*,PetscInt*); 7d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*); 8d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*); 975d39b6aSBarry Smith #if !defined(PETSC_HAVE_MPIUNI) 10d7d60843SJed Brown static PetscErrorCode MatStashScatterBegin_BTS(Mat,MatStash*,PetscInt*); 11d7d60843SJed Brown static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*); 12d7d60843SJed Brown static PetscErrorCode MatStashScatterEnd_BTS(MatStash*); 1375d39b6aSBarry Smith #endif 14d7d60843SJed Brown 159417f4adSLois Curfman McInnes /* 168798bf22SSatish Balay MatStashCreate_Private - Creates a stash,currently used for all the parallel 174c1ff481SSatish Balay matrix implementations. The stash is where elements of a matrix destined 184c1ff481SSatish Balay to be stored on other processors are kept until matrix assembly is done. 199417f4adSLois Curfman McInnes 204c1ff481SSatish Balay This is a simple minded stash. Simply adds entries to end of stash. 214c1ff481SSatish Balay 224c1ff481SSatish Balay Input Parameters: 234c1ff481SSatish Balay comm - communicator, required for scatters. 244c1ff481SSatish Balay bs - stash block size. used when stashing blocks of values 254c1ff481SSatish Balay 264c1ff481SSatish Balay Output Parameters: 274c1ff481SSatish Balay stash - the newly created stash 289417f4adSLois Curfman McInnes */ 29c1ac3661SBarry Smith PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash) 309417f4adSLois Curfman McInnes { 31dfbe8321SBarry Smith PetscErrorCode ierr; 32533163c2SBarry Smith PetscInt max,*opt,nopt,i; 33ace3abfcSBarry Smith PetscBool flg; 34bc5ccf88SSatish Balay 353a40ed3dSBarry Smith PetscFunctionBegin; 36bc5ccf88SSatish Balay /* Require 2 tags,get the second using PetscCommGetNewTag() */ 37752ec6e0SSatish Balay stash->comm = comm; 388865f1eaSKarl Rupp 39752ec6e0SSatish Balay ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr); 40a2d1c673SSatish Balay ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr); 41ffc4695bSBarry Smith ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRMPI(ierr); 42ffc4695bSBarry Smith ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRMPI(ierr); 43785e854fSJed Brown ierr = PetscMalloc1(2*stash->size,&stash->flg_v);CHKERRQ(ierr); 44533163c2SBarry Smith for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 45533163c2SBarry Smith 46434d7ff9SSatish Balay nopt = stash->size; 47785e854fSJed Brown ierr = PetscMalloc1(nopt,&opt);CHKERRQ(ierr); 48c5929fdfSBarry Smith ierr = PetscOptionsGetIntArray(NULL,NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr); 49434d7ff9SSatish Balay if (flg) { 50434d7ff9SSatish Balay if (nopt == 1) max = opt[0]; 51434d7ff9SSatish Balay else if (nopt == stash->size) max = opt[stash->rank]; 52434d7ff9SSatish Balay else if (stash->rank < nopt) max = opt[stash->rank]; 53f4ab19daSSatish Balay else max = 0; /* Use default */ 54434d7ff9SSatish Balay stash->umax = max; 55434d7ff9SSatish Balay } else { 56434d7ff9SSatish Balay stash->umax = 0; 57434d7ff9SSatish Balay } 58606d414cSSatish Balay ierr = PetscFree(opt);CHKERRQ(ierr); 594c1ff481SSatish Balay if (bs <= 0) bs = 1; 60a2d1c673SSatish Balay 614c1ff481SSatish Balay stash->bs = bs; 629417f4adSLois Curfman McInnes stash->nmax = 0; 63434d7ff9SSatish Balay stash->oldnmax = 0; 649417f4adSLois Curfman McInnes stash->n = 0; 654c1ff481SSatish Balay stash->reallocs = -1; 66f4259b30SLisandro Dalcin stash->space_head = NULL; 67f4259b30SLisandro Dalcin stash->space = NULL; 689417f4adSLois Curfman McInnes 69f4259b30SLisandro Dalcin stash->send_waits = NULL; 70f4259b30SLisandro Dalcin stash->recv_waits = NULL; 71f4259b30SLisandro Dalcin stash->send_status = NULL; 72bc5ccf88SSatish Balay stash->nsends = 0; 73bc5ccf88SSatish Balay stash->nrecvs = 0; 74f4259b30SLisandro Dalcin stash->svalues = NULL; 75f4259b30SLisandro Dalcin stash->rvalues = NULL; 76f4259b30SLisandro Dalcin stash->rindices = NULL; 77a2d1c673SSatish Balay stash->nprocessed = 0; 7867318a8aSJed Brown stash->reproduce = PETSC_FALSE; 79d7d60843SJed Brown stash->blocktype = MPI_DATATYPE_NULL; 808865f1eaSKarl Rupp 81c5929fdfSBarry Smith ierr = PetscOptionsGetBool(NULL,NULL,"-matstash_reproduce",&stash->reproduce,NULL);CHKERRQ(ierr); 821667be42SBarry Smith #if !defined(PETSC_HAVE_MPIUNI) 83ca723aa4SStefano Zampini flg = PETSC_FALSE; 84b30fb036SBarry Smith ierr = PetscOptionsGetBool(NULL,NULL,"-matstash_legacy",&flg,NULL);CHKERRQ(ierr); 85b30fb036SBarry Smith if (!flg) { 86d7d60843SJed Brown stash->ScatterBegin = MatStashScatterBegin_BTS; 87d7d60843SJed Brown stash->ScatterGetMesg = MatStashScatterGetMesg_BTS; 88d7d60843SJed Brown stash->ScatterEnd = MatStashScatterEnd_BTS; 89d7d60843SJed Brown stash->ScatterDestroy = MatStashScatterDestroy_BTS; 90ac2b2aa0SJed Brown } else { 911667be42SBarry Smith #endif 92ac2b2aa0SJed Brown stash->ScatterBegin = MatStashScatterBegin_Ref; 93ac2b2aa0SJed Brown stash->ScatterGetMesg = MatStashScatterGetMesg_Ref; 94ac2b2aa0SJed Brown stash->ScatterEnd = MatStashScatterEnd_Ref; 95ac2b2aa0SJed Brown stash->ScatterDestroy = NULL; 961667be42SBarry Smith #if !defined(PETSC_HAVE_MPIUNI) 97ac2b2aa0SJed Brown } 981667be42SBarry Smith #endif 993a40ed3dSBarry Smith PetscFunctionReturn(0); 1009417f4adSLois Curfman McInnes } 1019417f4adSLois Curfman McInnes 1024c1ff481SSatish Balay /* 1038798bf22SSatish Balay MatStashDestroy_Private - Destroy the stash 1044c1ff481SSatish Balay */ 105dfbe8321SBarry Smith PetscErrorCode MatStashDestroy_Private(MatStash *stash) 1069417f4adSLois Curfman McInnes { 107dfbe8321SBarry Smith PetscErrorCode ierr; 108a2d1c673SSatish Balay 109bc5ccf88SSatish Balay PetscFunctionBegin; 1106bf464f9SBarry Smith ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr); 111ac2b2aa0SJed Brown if (stash->ScatterDestroy) {ierr = (*stash->ScatterDestroy)(stash);CHKERRQ(ierr);} 1128865f1eaSKarl Rupp 113f4259b30SLisandro Dalcin stash->space = NULL; 1148865f1eaSKarl Rupp 115533163c2SBarry Smith ierr = PetscFree(stash->flg_v);CHKERRQ(ierr); 116bc5ccf88SSatish Balay PetscFunctionReturn(0); 117bc5ccf88SSatish Balay } 118bc5ccf88SSatish Balay 1194c1ff481SSatish Balay /* 12067318a8aSJed Brown MatStashScatterEnd_Private - This is called as the final stage of 1214c1ff481SSatish Balay scatter. The final stages of message passing is done here, and 12267318a8aSJed Brown all the memory used for message passing is cleaned up. This 1234c1ff481SSatish Balay routine also resets the stash, and deallocates the memory used 1244c1ff481SSatish Balay for the stash. It also keeps track of the current memory usage 1254c1ff481SSatish Balay so that the same value can be used the next time through. 1264c1ff481SSatish Balay */ 127dfbe8321SBarry Smith PetscErrorCode MatStashScatterEnd_Private(MatStash *stash) 128bc5ccf88SSatish Balay { 1296849ba73SBarry Smith PetscErrorCode ierr; 130ac2b2aa0SJed Brown 131ac2b2aa0SJed Brown PetscFunctionBegin; 132ac2b2aa0SJed Brown ierr = (*stash->ScatterEnd)(stash);CHKERRQ(ierr); 133ac2b2aa0SJed Brown PetscFunctionReturn(0); 134ac2b2aa0SJed Brown } 135ac2b2aa0SJed Brown 136d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash) 137ac2b2aa0SJed Brown { 138ac2b2aa0SJed Brown PetscErrorCode ierr; 139533163c2SBarry Smith PetscInt nsends=stash->nsends,bs2,oldnmax,i; 140a2d1c673SSatish Balay MPI_Status *send_status; 141a2d1c673SSatish Balay 1423a40ed3dSBarry Smith PetscFunctionBegin; 143533163c2SBarry Smith for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 144a2d1c673SSatish Balay /* wait on sends */ 145a2d1c673SSatish Balay if (nsends) { 146785e854fSJed Brown ierr = PetscMalloc1(2*nsends,&send_status);CHKERRQ(ierr); 147ffc4695bSBarry Smith ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRMPI(ierr); 148606d414cSSatish Balay ierr = PetscFree(send_status);CHKERRQ(ierr); 149a2d1c673SSatish Balay } 150a2d1c673SSatish Balay 151c0c58ca7SSatish Balay /* Now update nmaxold to be app 10% more than max n used, this way the 152434d7ff9SSatish Balay wastage of space is reduced the next time this stash is used. 153434d7ff9SSatish Balay Also update the oldmax, only if it increases */ 154b9b97703SBarry Smith if (stash->n) { 15594b769a5SSatish Balay bs2 = stash->bs*stash->bs; 1568a9378f0SSatish Balay oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 157434d7ff9SSatish Balay if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 158b9b97703SBarry Smith } 159434d7ff9SSatish Balay 160d07ff455SSatish Balay stash->nmax = 0; 161d07ff455SSatish Balay stash->n = 0; 1624c1ff481SSatish Balay stash->reallocs = -1; 163a2d1c673SSatish Balay stash->nprocessed = 0; 1648865f1eaSKarl Rupp 1656bf464f9SBarry Smith ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr); 1668865f1eaSKarl Rupp 167f4259b30SLisandro Dalcin stash->space = NULL; 1688865f1eaSKarl Rupp 169606d414cSSatish Balay ierr = PetscFree(stash->send_waits);CHKERRQ(ierr); 170606d414cSSatish Balay ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr); 171c05d87d6SBarry Smith ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr); 172c05d87d6SBarry Smith ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr); 173606d414cSSatish Balay ierr = PetscFree(stash->rvalues);CHKERRQ(ierr); 174c05d87d6SBarry Smith ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr); 175563fb871SSatish Balay ierr = PetscFree(stash->rindices);CHKERRQ(ierr); 1763a40ed3dSBarry Smith PetscFunctionReturn(0); 1779417f4adSLois Curfman McInnes } 1789417f4adSLois Curfman McInnes 1794c1ff481SSatish Balay /* 1808798bf22SSatish Balay MatStashGetInfo_Private - Gets the relavant statistics of the stash 1814c1ff481SSatish Balay 1824c1ff481SSatish Balay Input Parameters: 1834c1ff481SSatish Balay stash - the stash 18494b769a5SSatish Balay nstash - the size of the stash. Indicates the number of values stored. 1854c1ff481SSatish Balay reallocs - the number of additional mallocs incurred. 1864c1ff481SSatish Balay 1874c1ff481SSatish Balay */ 188c1ac3661SBarry Smith PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs) 18997530c3fSBarry Smith { 190c1ac3661SBarry Smith PetscInt bs2 = stash->bs*stash->bs; 19194b769a5SSatish Balay 1923a40ed3dSBarry Smith PetscFunctionBegin; 1931ecfd215SBarry Smith if (nstash) *nstash = stash->n*bs2; 1941ecfd215SBarry Smith if (reallocs) { 195434d7ff9SSatish Balay if (stash->reallocs < 0) *reallocs = 0; 196434d7ff9SSatish Balay else *reallocs = stash->reallocs; 1971ecfd215SBarry Smith } 198bc5ccf88SSatish Balay PetscFunctionReturn(0); 199bc5ccf88SSatish Balay } 2004c1ff481SSatish Balay 2014c1ff481SSatish Balay /* 2028798bf22SSatish Balay MatStashSetInitialSize_Private - Sets the initial size of the stash 2034c1ff481SSatish Balay 2044c1ff481SSatish Balay Input Parameters: 2054c1ff481SSatish Balay stash - the stash 2064c1ff481SSatish Balay max - the value that is used as the max size of the stash. 2074c1ff481SSatish Balay this value is used while allocating memory. 2084c1ff481SSatish Balay */ 209c1ac3661SBarry Smith PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max) 210bc5ccf88SSatish Balay { 211bc5ccf88SSatish Balay PetscFunctionBegin; 212434d7ff9SSatish Balay stash->umax = max; 2133a40ed3dSBarry Smith PetscFunctionReturn(0); 21497530c3fSBarry Smith } 21597530c3fSBarry Smith 2168798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called 2174c1ff481SSatish Balay when the space in the stash is not sufficient to add the new values 2184c1ff481SSatish Balay being inserted into the stash. 2194c1ff481SSatish Balay 2204c1ff481SSatish Balay Input Parameters: 2214c1ff481SSatish Balay stash - the stash 2224c1ff481SSatish Balay incr - the minimum increase requested 2234c1ff481SSatish Balay 2244c1ff481SSatish Balay Notes: 2254c1ff481SSatish Balay This routine doubles the currently used memory. 2264c1ff481SSatish Balay */ 227c1ac3661SBarry Smith static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr) 2289417f4adSLois Curfman McInnes { 2296849ba73SBarry Smith PetscErrorCode ierr; 2305bd3b8fbSHong Zhang PetscInt newnmax,bs2= stash->bs*stash->bs; 2319417f4adSLois Curfman McInnes 2323a40ed3dSBarry Smith PetscFunctionBegin; 2339417f4adSLois Curfman McInnes /* allocate a larger stash */ 234c481ceb5SSatish Balay if (!stash->oldnmax && !stash->nmax) { /* new stash */ 235434d7ff9SSatish Balay if (stash->umax) newnmax = stash->umax/bs2; 236434d7ff9SSatish Balay else newnmax = DEFAULT_STASH_SIZE/bs2; 237c481ceb5SSatish Balay } else if (!stash->nmax) { /* resuing stash */ 238434d7ff9SSatish Balay if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2; 239434d7ff9SSatish Balay else newnmax = stash->oldnmax/bs2; 240434d7ff9SSatish Balay } else newnmax = stash->nmax*2; 2414c1ff481SSatish Balay if (newnmax < (stash->nmax + incr)) newnmax += 2*incr; 242d07ff455SSatish Balay 24375cae7c1SHong Zhang /* Get a MatStashSpace and attach it to stash */ 24475cae7c1SHong Zhang ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr); 245b087b6d6SSatish Balay if (!stash->space_head) { /* new stash or resuing stash->oldnmax */ 246b087b6d6SSatish Balay stash->space_head = stash->space; 24775cae7c1SHong Zhang } 248b087b6d6SSatish Balay 249bc5ccf88SSatish Balay stash->reallocs++; 25075cae7c1SHong Zhang stash->nmax = newnmax; 251bc5ccf88SSatish Balay PetscFunctionReturn(0); 252bc5ccf88SSatish Balay } 253bc5ccf88SSatish Balay /* 2548798bf22SSatish Balay MatStashValuesRow_Private - inserts values into the stash. This function 2554c1ff481SSatish Balay expects the values to be roworiented. Multiple columns belong to the same row 2564c1ff481SSatish Balay can be inserted with a single call to this function. 2574c1ff481SSatish Balay 2584c1ff481SSatish Balay Input Parameters: 2594c1ff481SSatish Balay stash - the stash 2604c1ff481SSatish Balay row - the global row correspoiding to the values 2614c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2624c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2634c1ff481SSatish Balay values - the values inserted 264bc5ccf88SSatish Balay */ 265ace3abfcSBarry Smith PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries) 266bc5ccf88SSatish Balay { 267dfbe8321SBarry Smith PetscErrorCode ierr; 268b400d20cSBarry Smith PetscInt i,k,cnt = 0; 26975cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 270bc5ccf88SSatish Balay 271bc5ccf88SSatish Balay PetscFunctionBegin; 2724c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 27375cae7c1SHong Zhang if (!space || space->local_remaining < n) { 2748798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 2759417f4adSLois Curfman McInnes } 27675cae7c1SHong Zhang space = stash->space; 27775cae7c1SHong Zhang k = space->local_used; 2784c1ff481SSatish Balay for (i=0; i<n; i++) { 27914069ce9SStefano Zampini if (ignorezeroentries && values && values[i] == 0.0) continue; 28075cae7c1SHong Zhang space->idx[k] = row; 28175cae7c1SHong Zhang space->idy[k] = idxn[i]; 28214069ce9SStefano Zampini space->val[k] = values ? values[i] : 0.0; 28375cae7c1SHong Zhang k++; 284b400d20cSBarry Smith cnt++; 2859417f4adSLois Curfman McInnes } 286b400d20cSBarry Smith stash->n += cnt; 287b400d20cSBarry Smith space->local_used += cnt; 288b400d20cSBarry Smith space->local_remaining -= cnt; 289a2d1c673SSatish Balay PetscFunctionReturn(0); 290a2d1c673SSatish Balay } 29175cae7c1SHong Zhang 2924c1ff481SSatish Balay /* 2938798bf22SSatish Balay MatStashValuesCol_Private - inserts values into the stash. This function 2944c1ff481SSatish Balay expects the values to be columnoriented. Multiple columns belong to the same row 2954c1ff481SSatish Balay can be inserted with a single call to this function. 296a2d1c673SSatish Balay 2974c1ff481SSatish Balay Input Parameters: 2984c1ff481SSatish Balay stash - the stash 2994c1ff481SSatish Balay row - the global row correspoiding to the values 3004c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3014c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 3024c1ff481SSatish Balay values - the values inserted 3034c1ff481SSatish Balay stepval - the consecutive values are sepated by a distance of stepval. 3044c1ff481SSatish Balay this happens because the input is columnoriented. 3054c1ff481SSatish Balay */ 306ace3abfcSBarry Smith PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries) 307a2d1c673SSatish Balay { 308dfbe8321SBarry Smith PetscErrorCode ierr; 30950e9ab7cSBarry Smith PetscInt i,k,cnt = 0; 31075cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 311a2d1c673SSatish Balay 3124c1ff481SSatish Balay PetscFunctionBegin; 3134c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 31475cae7c1SHong Zhang if (!space || space->local_remaining < n) { 3158798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 3164c1ff481SSatish Balay } 31775cae7c1SHong Zhang space = stash->space; 31875cae7c1SHong Zhang k = space->local_used; 3194c1ff481SSatish Balay for (i=0; i<n; i++) { 32014069ce9SStefano Zampini if (ignorezeroentries && values && values[i*stepval] == 0.0) continue; 32175cae7c1SHong Zhang space->idx[k] = row; 32275cae7c1SHong Zhang space->idy[k] = idxn[i]; 32314069ce9SStefano Zampini space->val[k] = values ? values[i*stepval] : 0.0; 32475cae7c1SHong Zhang k++; 325b400d20cSBarry Smith cnt++; 3264c1ff481SSatish Balay } 327b400d20cSBarry Smith stash->n += cnt; 328b400d20cSBarry Smith space->local_used += cnt; 329b400d20cSBarry Smith space->local_remaining -= cnt; 3304c1ff481SSatish Balay PetscFunctionReturn(0); 3314c1ff481SSatish Balay } 3324c1ff481SSatish Balay 3334c1ff481SSatish Balay /* 3348798bf22SSatish Balay MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 3354c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 3364c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3374c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3384c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3394c1ff481SSatish Balay 3404c1ff481SSatish Balay Input Parameters: 3414c1ff481SSatish Balay stash - the stash 3424c1ff481SSatish Balay row - the global block-row correspoiding to the values 3434c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3444c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3454c1ff481SSatish Balay values. Each block is of size bs*bs. 3464c1ff481SSatish Balay values - the values inserted 3474c1ff481SSatish Balay rmax - the number of block-rows in the original block. 348a5b23f4aSJose E. Roman cmax - the number of block-columns on the original block. 3494c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3504c1ff481SSatish Balay */ 35154f21887SBarry Smith PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 3524c1ff481SSatish Balay { 353dfbe8321SBarry Smith PetscErrorCode ierr; 35475cae7c1SHong Zhang PetscInt i,j,k,bs2,bs=stash->bs,l; 35554f21887SBarry Smith const PetscScalar *vals; 35654f21887SBarry Smith PetscScalar *array; 35775cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 358a2d1c673SSatish Balay 359a2d1c673SSatish Balay PetscFunctionBegin; 36075cae7c1SHong Zhang if (!space || space->local_remaining < n) { 3618798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 362a2d1c673SSatish Balay } 36375cae7c1SHong Zhang space = stash->space; 36475cae7c1SHong Zhang l = space->local_used; 36575cae7c1SHong Zhang bs2 = bs*bs; 3664c1ff481SSatish Balay for (i=0; i<n; i++) { 36775cae7c1SHong Zhang space->idx[l] = row; 36875cae7c1SHong Zhang space->idy[l] = idxn[i]; 36975cae7c1SHong Zhang /* Now copy over the block of values. Store the values column oriented. 37075cae7c1SHong Zhang This enables inserting multiple blocks belonging to a row with a single 37175cae7c1SHong Zhang funtion call */ 37275cae7c1SHong Zhang array = space->val + bs2*l; 37375cae7c1SHong Zhang vals = values + idx*bs2*n + bs*i; 37475cae7c1SHong Zhang for (j=0; j<bs; j++) { 37514069ce9SStefano Zampini for (k=0; k<bs; k++) array[k*bs] = values ? vals[k] : 0.0; 37675cae7c1SHong Zhang array++; 37775cae7c1SHong Zhang vals += cmax*bs; 37875cae7c1SHong Zhang } 37975cae7c1SHong Zhang l++; 380a2d1c673SSatish Balay } 3815bd3b8fbSHong Zhang stash->n += n; 38275cae7c1SHong Zhang space->local_used += n; 38375cae7c1SHong Zhang space->local_remaining -= n; 3844c1ff481SSatish Balay PetscFunctionReturn(0); 3854c1ff481SSatish Balay } 3864c1ff481SSatish Balay 3874c1ff481SSatish Balay /* 3888798bf22SSatish Balay MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 3894c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 3904c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3914c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3924c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3934c1ff481SSatish Balay 3944c1ff481SSatish Balay Input Parameters: 3954c1ff481SSatish Balay stash - the stash 3964c1ff481SSatish Balay row - the global block-row correspoiding to the values 3974c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3984c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3994c1ff481SSatish Balay values. Each block is of size bs*bs. 4004c1ff481SSatish Balay values - the values inserted 4014c1ff481SSatish Balay rmax - the number of block-rows in the original block. 402a5b23f4aSJose E. Roman cmax - the number of block-columns on the original block. 4034c1ff481SSatish Balay idx - the index of the current block-row in the original block. 4044c1ff481SSatish Balay */ 40554f21887SBarry Smith PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 4064c1ff481SSatish Balay { 407dfbe8321SBarry Smith PetscErrorCode ierr; 40875cae7c1SHong Zhang PetscInt i,j,k,bs2,bs=stash->bs,l; 40954f21887SBarry Smith const PetscScalar *vals; 41054f21887SBarry Smith PetscScalar *array; 41175cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 4124c1ff481SSatish Balay 4134c1ff481SSatish Balay PetscFunctionBegin; 41475cae7c1SHong Zhang if (!space || space->local_remaining < n) { 4158798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 4164c1ff481SSatish Balay } 41775cae7c1SHong Zhang space = stash->space; 41875cae7c1SHong Zhang l = space->local_used; 41975cae7c1SHong Zhang bs2 = bs*bs; 4204c1ff481SSatish Balay for (i=0; i<n; i++) { 42175cae7c1SHong Zhang space->idx[l] = row; 42275cae7c1SHong Zhang space->idy[l] = idxn[i]; 42375cae7c1SHong Zhang /* Now copy over the block of values. Store the values column oriented. 42475cae7c1SHong Zhang This enables inserting multiple blocks belonging to a row with a single 42575cae7c1SHong Zhang funtion call */ 42675cae7c1SHong Zhang array = space->val + bs2*l; 42775cae7c1SHong Zhang vals = values + idx*bs2*n + bs*i; 42875cae7c1SHong Zhang for (j=0; j<bs; j++) { 42914069ce9SStefano Zampini for (k=0; k<bs; k++) array[k] = values ? vals[k] : 0.0; 43075cae7c1SHong Zhang array += bs; 43175cae7c1SHong Zhang vals += rmax*bs; 43275cae7c1SHong Zhang } 4335bd3b8fbSHong Zhang l++; 434a2d1c673SSatish Balay } 4355bd3b8fbSHong Zhang stash->n += n; 43675cae7c1SHong Zhang space->local_used += n; 43775cae7c1SHong Zhang space->local_remaining -= n; 4383a40ed3dSBarry Smith PetscFunctionReturn(0); 4399417f4adSLois Curfman McInnes } 4404c1ff481SSatish Balay /* 4418798bf22SSatish Balay MatStashScatterBegin_Private - Initiates the transfer of values to the 4424c1ff481SSatish Balay correct owners. This function goes through the stash, and check the 4434c1ff481SSatish Balay owners of each stashed value, and sends the values off to the owner 4444c1ff481SSatish Balay processors. 445bc5ccf88SSatish Balay 4464c1ff481SSatish Balay Input Parameters: 4474c1ff481SSatish Balay stash - the stash 4484c1ff481SSatish Balay owners - an array of size 'no-of-procs' which gives the ownership range 4494c1ff481SSatish Balay for each node. 4504c1ff481SSatish Balay 45195452b02SPatrick Sanan Notes: 45295452b02SPatrick Sanan The 'owners' array in the cased of the blocked-stash has the 4534c1ff481SSatish Balay ranges specified blocked global indices, and for the regular stash in 4544c1ff481SSatish Balay the proper global indices. 4554c1ff481SSatish Balay */ 4561e2582c4SBarry Smith PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners) 457bc5ccf88SSatish Balay { 458ac2b2aa0SJed Brown PetscErrorCode ierr; 459ac2b2aa0SJed Brown 460ac2b2aa0SJed Brown PetscFunctionBegin; 461ac2b2aa0SJed Brown ierr = (*stash->ScatterBegin)(mat,stash,owners);CHKERRQ(ierr); 462ac2b2aa0SJed Brown PetscFunctionReturn(0); 463ac2b2aa0SJed Brown } 464ac2b2aa0SJed Brown 465ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterBegin_Ref(Mat mat,MatStash *stash,PetscInt *owners) 466ac2b2aa0SJed Brown { 467c1ac3661SBarry Smith PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 468fe09c992SBarry Smith PetscInt size=stash->size,nsends; 4696849ba73SBarry Smith PetscErrorCode ierr; 47075cae7c1SHong Zhang PetscInt count,*sindices,**rindices,i,j,idx,lastidx,l; 47154f21887SBarry Smith PetscScalar **rvalues,*svalues; 472bc5ccf88SSatish Balay MPI_Comm comm = stash->comm; 473563fb871SSatish Balay MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 47476ec1555SBarry Smith PetscMPIInt *sizes,*nlengths,nreceives; 4755bd3b8fbSHong Zhang PetscInt *sp_idx,*sp_idy; 47654f21887SBarry Smith PetscScalar *sp_val; 4775bd3b8fbSHong Zhang PetscMatStashSpace space,space_next; 478bc5ccf88SSatish Balay 479bc5ccf88SSatish Balay PetscFunctionBegin; 4804b4eb8d3SJed Brown { /* make sure all processors are either in INSERTMODE or ADDMODE */ 4814b4eb8d3SJed Brown InsertMode addv; 482820f2d46SBarry Smith ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 483*2c71b3e2SJacob Faibussowitsch PetscCheckFalse(addv == (ADD_VALUES|INSERT_VALUES),PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 4844b4eb8d3SJed Brown mat->insertmode = addv; /* in case this processor had no cache */ 4854b4eb8d3SJed Brown } 4864b4eb8d3SJed Brown 4874c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 48875cae7c1SHong Zhang 489bc5ccf88SSatish Balay /* first count number of contributors to each processor */ 4901795a4d1SJed Brown ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr); 491037dbc42SBarry Smith ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr); 492a2d1c673SSatish Balay 49375cae7c1SHong Zhang i = j = 0; 4947357eb19SBarry Smith lastidx = -1; 4955bd3b8fbSHong Zhang space = stash->space_head; 4966c4ed002SBarry Smith while (space) { 49775cae7c1SHong Zhang space_next = space->next; 4985bd3b8fbSHong Zhang sp_idx = space->idx; 49975cae7c1SHong Zhang for (l=0; l<space->local_used; l++) { 5007357eb19SBarry Smith /* if indices are NOT locally sorted, need to start search at the beginning */ 5015bd3b8fbSHong Zhang if (lastidx > (idx = sp_idx[l])) j = 0; 5027357eb19SBarry Smith lastidx = idx; 5037357eb19SBarry Smith for (; j<size; j++) { 5044c1ff481SSatish Balay if (idx >= owners[j] && idx < owners[j+1]) { 505563fb871SSatish Balay nlengths[j]++; owner[i] = j; break; 506bc5ccf88SSatish Balay } 507bc5ccf88SSatish Balay } 50875cae7c1SHong Zhang i++; 50975cae7c1SHong Zhang } 51075cae7c1SHong Zhang space = space_next; 511bc5ccf88SSatish Balay } 512071fcb05SBarry Smith 513563fb871SSatish Balay /* Now check what procs get messages - and compute nsends. */ 514071fcb05SBarry Smith ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr); 515563fb871SSatish Balay for (i=0, nsends=0; i<size; i++) { 5168865f1eaSKarl Rupp if (nlengths[i]) { 51776ec1555SBarry Smith sizes[i] = 1; nsends++; 5188865f1eaSKarl Rupp } 519563fb871SSatish Balay } 520bc5ccf88SSatish Balay 52154f21887SBarry Smith {PetscMPIInt *onodes,*olengths; 522563fb871SSatish Balay /* Determine the number of messages to expect, their lengths, from from-ids */ 52376ec1555SBarry Smith ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr); 524563fb871SSatish Balay ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr); 525563fb871SSatish Balay /* since clubbing row,col - lengths are multiplied by 2 */ 526563fb871SSatish Balay for (i=0; i<nreceives; i++) olengths[i] *=2; 527563fb871SSatish Balay ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr); 528563fb871SSatish Balay /* values are size 'bs2' lengths (and remove earlier factor 2 */ 529563fb871SSatish Balay for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 530563fb871SSatish Balay ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr); 531563fb871SSatish Balay ierr = PetscFree(onodes);CHKERRQ(ierr); 5328865f1eaSKarl Rupp ierr = PetscFree(olengths);CHKERRQ(ierr);} 533bc5ccf88SSatish Balay 534bc5ccf88SSatish Balay /* do sends: 535bc5ccf88SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 536bc5ccf88SSatish Balay the ith processor 537bc5ccf88SSatish Balay */ 538dcca6d9dSJed Brown ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr); 539785e854fSJed Brown ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr); 540dcca6d9dSJed Brown ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr); 541a2d1c673SSatish Balay /* use 2 sends the first with all_a, the next with all_i and all_j */ 542bc5ccf88SSatish Balay startv[0] = 0; starti[0] = 0; 543bc5ccf88SSatish Balay for (i=1; i<size; i++) { 544563fb871SSatish Balay startv[i] = startv[i-1] + nlengths[i-1]; 545533163c2SBarry Smith starti[i] = starti[i-1] + 2*nlengths[i-1]; 546bc5ccf88SSatish Balay } 54775cae7c1SHong Zhang 54875cae7c1SHong Zhang i = 0; 5495bd3b8fbSHong Zhang space = stash->space_head; 5506c4ed002SBarry Smith while (space) { 55175cae7c1SHong Zhang space_next = space->next; 5525bd3b8fbSHong Zhang sp_idx = space->idx; 5535bd3b8fbSHong Zhang sp_idy = space->idy; 5545bd3b8fbSHong Zhang sp_val = space->val; 55575cae7c1SHong Zhang for (l=0; l<space->local_used; l++) { 556bc5ccf88SSatish Balay j = owner[i]; 557a2d1c673SSatish Balay if (bs2 == 1) { 5585bd3b8fbSHong Zhang svalues[startv[j]] = sp_val[l]; 559a2d1c673SSatish Balay } else { 560c1ac3661SBarry Smith PetscInt k; 56154f21887SBarry Smith PetscScalar *buf1,*buf2; 5624c1ff481SSatish Balay buf1 = svalues+bs2*startv[j]; 563b087b6d6SSatish Balay buf2 = space->val + bs2*l; 5648865f1eaSKarl Rupp for (k=0; k<bs2; k++) buf1[k] = buf2[k]; 565a2d1c673SSatish Balay } 5665bd3b8fbSHong Zhang sindices[starti[j]] = sp_idx[l]; 5675bd3b8fbSHong Zhang sindices[starti[j]+nlengths[j]] = sp_idy[l]; 568bc5ccf88SSatish Balay startv[j]++; 569bc5ccf88SSatish Balay starti[j]++; 57075cae7c1SHong Zhang i++; 57175cae7c1SHong Zhang } 57275cae7c1SHong Zhang space = space_next; 573bc5ccf88SSatish Balay } 574bc5ccf88SSatish Balay startv[0] = 0; 5758865f1eaSKarl Rupp for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1]; 576e5d0e772SSatish Balay 577bc5ccf88SSatish Balay for (i=0,count=0; i<size; i++) { 57876ec1555SBarry Smith if (sizes[i]) { 579ffc4695bSBarry Smith ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRMPI(ierr); 580ffc4695bSBarry Smith ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRMPI(ierr); 581bc5ccf88SSatish Balay } 582b85c94c3SSatish Balay } 5836cf91177SBarry Smith #if defined(PETSC_USE_INFO) 5847d3de750SJacob Faibussowitsch ierr = PetscInfo(NULL,"No of messages: %" PetscInt_FMT " \n",nsends);CHKERRQ(ierr); 585e5d0e772SSatish Balay for (i=0; i<size; i++) { 58676ec1555SBarry Smith if (sizes[i]) { 5877d3de750SJacob Faibussowitsch ierr = PetscInfo(NULL,"Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n",i,(size_t)(nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt))));CHKERRQ(ierr); 588e5d0e772SSatish Balay } 589e5d0e772SSatish Balay } 590e5d0e772SSatish Balay #endif 591c05d87d6SBarry Smith ierr = PetscFree(nlengths);CHKERRQ(ierr); 592606d414cSSatish Balay ierr = PetscFree(owner);CHKERRQ(ierr); 593c05d87d6SBarry Smith ierr = PetscFree2(startv,starti);CHKERRQ(ierr); 59476ec1555SBarry Smith ierr = PetscFree(sizes);CHKERRQ(ierr); 595a2d1c673SSatish Balay 596563fb871SSatish Balay /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 597785e854fSJed Brown ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr); 598563fb871SSatish Balay 599563fb871SSatish Balay for (i=0; i<nreceives; i++) { 600563fb871SSatish Balay recv_waits[2*i] = recv_waits1[i]; 601563fb871SSatish Balay recv_waits[2*i+1] = recv_waits2[i]; 602563fb871SSatish Balay } 603563fb871SSatish Balay stash->recv_waits = recv_waits; 6048865f1eaSKarl Rupp 605563fb871SSatish Balay ierr = PetscFree(recv_waits1);CHKERRQ(ierr); 606563fb871SSatish Balay ierr = PetscFree(recv_waits2);CHKERRQ(ierr); 607563fb871SSatish Balay 608c05d87d6SBarry Smith stash->svalues = svalues; 609c05d87d6SBarry Smith stash->sindices = sindices; 610c05d87d6SBarry Smith stash->rvalues = rvalues; 611c05d87d6SBarry Smith stash->rindices = rindices; 612c05d87d6SBarry Smith stash->send_waits = send_waits; 613c05d87d6SBarry Smith stash->nsends = nsends; 614c05d87d6SBarry Smith stash->nrecvs = nreceives; 61567318a8aSJed Brown stash->reproduce_count = 0; 616bc5ccf88SSatish Balay PetscFunctionReturn(0); 617bc5ccf88SSatish Balay } 618bc5ccf88SSatish Balay 619a2d1c673SSatish Balay /* 6208798bf22SSatish Balay MatStashScatterGetMesg_Private - This function waits on the receives posted 6218798bf22SSatish Balay in the function MatStashScatterBegin_Private() and returns one message at 6224c1ff481SSatish Balay a time to the calling function. If no messages are left, it indicates this 6234c1ff481SSatish Balay by setting flg = 0, else it sets flg = 1. 6244c1ff481SSatish Balay 6254c1ff481SSatish Balay Input Parameters: 6264c1ff481SSatish Balay stash - the stash 6274c1ff481SSatish Balay 6284c1ff481SSatish Balay Output Parameters: 6294c1ff481SSatish Balay nvals - the number of entries in the current message. 6304c1ff481SSatish Balay rows - an array of row indices (or blocked indices) corresponding to the values 6314c1ff481SSatish Balay cols - an array of columnindices (or blocked indices) corresponding to the values 6324c1ff481SSatish Balay vals - the values 6334c1ff481SSatish Balay flg - 0 indicates no more message left, and the current call has no values associated. 6344c1ff481SSatish Balay 1 indicates that the current call successfully received a message, and the 6354c1ff481SSatish Balay other output parameters nvals,rows,cols,vals are set appropriately. 636a2d1c673SSatish Balay */ 63754f21887SBarry Smith PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg) 638bc5ccf88SSatish Balay { 6396849ba73SBarry Smith PetscErrorCode ierr; 640ac2b2aa0SJed Brown 641ac2b2aa0SJed Brown PetscFunctionBegin; 642ac2b2aa0SJed Brown ierr = (*stash->ScatterGetMesg)(stash,nvals,rows,cols,vals,flg);CHKERRQ(ierr); 643ac2b2aa0SJed Brown PetscFunctionReturn(0); 644ac2b2aa0SJed Brown } 645ac2b2aa0SJed Brown 646d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg) 647ac2b2aa0SJed Brown { 648ac2b2aa0SJed Brown PetscErrorCode ierr; 649533163c2SBarry Smith PetscMPIInt i,*flg_v = stash->flg_v,i1,i2; 650fe09c992SBarry Smith PetscInt bs2; 651a2d1c673SSatish Balay MPI_Status recv_status; 652ace3abfcSBarry Smith PetscBool match_found = PETSC_FALSE; 653bc5ccf88SSatish Balay 654bc5ccf88SSatish Balay PetscFunctionBegin; 655a2d1c673SSatish Balay *flg = 0; /* When a message is discovered this is reset to 1 */ 656a2d1c673SSatish Balay /* Return if no more messages to process */ 6578865f1eaSKarl Rupp if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(0); 658a2d1c673SSatish Balay 6594c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 66067318a8aSJed Brown /* If a matching pair of receives are found, process them, and return the data to 661a2d1c673SSatish Balay the calling function. Until then keep receiving messages */ 662a2d1c673SSatish Balay while (!match_found) { 66367318a8aSJed Brown if (stash->reproduce) { 66467318a8aSJed Brown i = stash->reproduce_count++; 665ffc4695bSBarry Smith ierr = MPI_Wait(stash->recv_waits+i,&recv_status);CHKERRMPI(ierr); 66667318a8aSJed Brown } else { 667ffc4695bSBarry Smith ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRMPI(ierr); 66867318a8aSJed Brown } 669*2c71b3e2SJacob Faibussowitsch PetscCheckFalse(recv_status.MPI_SOURCE < 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!"); 670533163c2SBarry Smith 67167318a8aSJed Brown /* Now pack the received message into a structure which is usable by others */ 672a2d1c673SSatish Balay if (i % 2) { 673ffc4695bSBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRMPI(ierr); 6748865f1eaSKarl Rupp 675c1dc657dSBarry Smith flg_v[2*recv_status.MPI_SOURCE] = i/2; 6768865f1eaSKarl Rupp 677a2d1c673SSatish Balay *nvals = *nvals/bs2; 678563fb871SSatish Balay } else { 679ffc4695bSBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRMPI(ierr); 6808865f1eaSKarl Rupp 681563fb871SSatish Balay flg_v[2*recv_status.MPI_SOURCE+1] = i/2; 6828865f1eaSKarl Rupp 683563fb871SSatish Balay *nvals = *nvals/2; /* This message has both row indices and col indices */ 684bc5ccf88SSatish Balay } 685a2d1c673SSatish Balay 686cb2b73ccSBarry Smith /* Check if we have both messages from this proc */ 687c1dc657dSBarry Smith i1 = flg_v[2*recv_status.MPI_SOURCE]; 688c1dc657dSBarry Smith i2 = flg_v[2*recv_status.MPI_SOURCE+1]; 689a2d1c673SSatish Balay if (i1 != -1 && i2 != -1) { 690563fb871SSatish Balay *rows = stash->rindices[i2]; 691a2d1c673SSatish Balay *cols = *rows + *nvals; 692563fb871SSatish Balay *vals = stash->rvalues[i1]; 693a2d1c673SSatish Balay *flg = 1; 694a2d1c673SSatish Balay stash->nprocessed++; 69535d8aa7fSBarry Smith match_found = PETSC_TRUE; 696bc5ccf88SSatish Balay } 697bc5ccf88SSatish Balay } 698bc5ccf88SSatish Balay PetscFunctionReturn(0); 699bc5ccf88SSatish Balay } 700d7d60843SJed Brown 701e69ee1f7SSatish Balay #if !defined(PETSC_HAVE_MPIUNI) 702d7d60843SJed Brown typedef struct { 703d7d60843SJed Brown PetscInt row; 704d7d60843SJed Brown PetscInt col; 705d7d60843SJed Brown PetscScalar vals[1]; /* Actually an array of length bs2 */ 706d7d60843SJed Brown } MatStashBlock; 707d7d60843SJed Brown 708d7d60843SJed Brown static PetscErrorCode MatStashSortCompress_Private(MatStash *stash,InsertMode insertmode) 709d7d60843SJed Brown { 710d7d60843SJed Brown PetscErrorCode ierr; 711d7d60843SJed Brown PetscMatStashSpace space; 712d7d60843SJed Brown PetscInt n = stash->n,bs = stash->bs,bs2 = bs*bs,cnt,*row,*col,*perm,rowstart,i; 713d7d60843SJed Brown PetscScalar **valptr; 714d7d60843SJed Brown 715d7d60843SJed Brown PetscFunctionBegin; 716d7d60843SJed Brown ierr = PetscMalloc4(n,&row,n,&col,n,&valptr,n,&perm);CHKERRQ(ierr); 717d7d60843SJed Brown for (space=stash->space_head,cnt=0; space; space=space->next) { 718d7d60843SJed Brown for (i=0; i<space->local_used; i++) { 719d7d60843SJed Brown row[cnt] = space->idx[i]; 720d7d60843SJed Brown col[cnt] = space->idy[i]; 721d7d60843SJed Brown valptr[cnt] = &space->val[i*bs2]; 722d7d60843SJed Brown perm[cnt] = cnt; /* Will tell us where to find valptr after sorting row[] and col[] */ 723d7d60843SJed Brown cnt++; 724d7d60843SJed Brown } 725d7d60843SJed Brown } 726*2c71b3e2SJacob Faibussowitsch PetscCheckFalse(cnt != n,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatStash n %" PetscInt_FMT ", but counted %" PetscInt_FMT " entries",n,cnt); 727d7d60843SJed Brown ierr = PetscSortIntWithArrayPair(n,row,col,perm);CHKERRQ(ierr); 728d7d60843SJed Brown /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */ 729d7d60843SJed Brown for (rowstart=0,cnt=0,i=1; i<=n; i++) { 730d7d60843SJed Brown if (i == n || row[i] != row[rowstart]) { /* Sort the last row. */ 731d7d60843SJed Brown PetscInt colstart; 732d7d60843SJed Brown ierr = PetscSortIntWithArray(i-rowstart,&col[rowstart],&perm[rowstart]);CHKERRQ(ierr); 733d7d60843SJed Brown for (colstart=rowstart; colstart<i;) { /* Compress multiple insertions to the same location */ 734d7d60843SJed Brown PetscInt j,l; 735d7d60843SJed Brown MatStashBlock *block; 736d7d60843SJed Brown ierr = PetscSegBufferGet(stash->segsendblocks,1,&block);CHKERRQ(ierr); 737d7d60843SJed Brown block->row = row[rowstart]; 738d7d60843SJed Brown block->col = col[colstart]; 739580bdb30SBarry Smith ierr = PetscArraycpy(block->vals,valptr[perm[colstart]],bs2);CHKERRQ(ierr); 740d7d60843SJed Brown for (j=colstart+1; j<i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */ 741d7d60843SJed Brown if (insertmode == ADD_VALUES) { 742d7d60843SJed Brown for (l=0; l<bs2; l++) block->vals[l] += valptr[perm[j]][l]; 743d7d60843SJed Brown } else { 744580bdb30SBarry Smith ierr = PetscArraycpy(block->vals,valptr[perm[j]],bs2);CHKERRQ(ierr); 745d7d60843SJed Brown } 746d7d60843SJed Brown } 747d7d60843SJed Brown colstart = j; 748d7d60843SJed Brown } 749d7d60843SJed Brown rowstart = i; 750d7d60843SJed Brown } 751d7d60843SJed Brown } 752d7d60843SJed Brown ierr = PetscFree4(row,col,valptr,perm);CHKERRQ(ierr); 753d7d60843SJed Brown PetscFunctionReturn(0); 754d7d60843SJed Brown } 755d7d60843SJed Brown 756d7d60843SJed Brown static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash) 757d7d60843SJed Brown { 758d7d60843SJed Brown PetscErrorCode ierr; 759d7d60843SJed Brown 760d7d60843SJed Brown PetscFunctionBegin; 761d7d60843SJed Brown if (stash->blocktype == MPI_DATATYPE_NULL) { 762d7d60843SJed Brown PetscInt bs2 = PetscSqr(stash->bs); 763d7d60843SJed Brown PetscMPIInt blocklens[2]; 764d7d60843SJed Brown MPI_Aint displs[2]; 765d7d60843SJed Brown MPI_Datatype types[2],stype; 766f41aa5efSJunchao Zhang /* Note that DummyBlock is a type having standard layout, even when PetscScalar is C++ std::complex. 767f41aa5efSJunchao Zhang std::complex itself has standard layout, so does DummyBlock, recursively. 768a5b23f4aSJose E. Roman To be compatible with C++ std::complex, complex implementations on GPUs must also have standard layout, 769f41aa5efSJunchao Zhang though they can have different alignment, e.g, 16 bytes for double complex, instead of 8 bytes as in GCC stdlibc++. 770f41aa5efSJunchao Zhang offsetof(type, member) only requires type to have standard layout. Ref. https://en.cppreference.com/w/cpp/types/offsetof. 771f41aa5efSJunchao Zhang 772f41aa5efSJunchao Zhang We can test if std::complex has standard layout with the following code: 773f41aa5efSJunchao Zhang #include <iostream> 774f41aa5efSJunchao Zhang #include <type_traits> 775f41aa5efSJunchao Zhang #include <complex> 776f41aa5efSJunchao Zhang int main() { 777f41aa5efSJunchao Zhang std::cout << std::boolalpha; 778f41aa5efSJunchao Zhang std::cout << std::is_standard_layout<std::complex<double> >::value << '\n'; 779f41aa5efSJunchao Zhang } 780f41aa5efSJunchao Zhang Output: true 7819503c6c6SJed Brown */ 782f41aa5efSJunchao Zhang struct DummyBlock {PetscInt row,col; PetscScalar vals;}; 783d7d60843SJed Brown 7849503c6c6SJed Brown stash->blocktype_size = offsetof(struct DummyBlock,vals) + bs2*sizeof(PetscScalar); 785d7d60843SJed Brown if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */ 786d7d60843SJed Brown stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt); 787d7d60843SJed Brown } 788d7d60843SJed Brown ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segsendblocks);CHKERRQ(ierr); 789d7d60843SJed Brown ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segrecvblocks);CHKERRQ(ierr); 790d7d60843SJed Brown ierr = PetscSegBufferCreate(sizeof(MatStashFrame),1,&stash->segrecvframe);CHKERRQ(ierr); 791d7d60843SJed Brown blocklens[0] = 2; 792d7d60843SJed Brown blocklens[1] = bs2; 7939503c6c6SJed Brown displs[0] = offsetof(struct DummyBlock,row); 7949503c6c6SJed Brown displs[1] = offsetof(struct DummyBlock,vals); 795d7d60843SJed Brown types[0] = MPIU_INT; 796d7d60843SJed Brown types[1] = MPIU_SCALAR; 797ffc4695bSBarry Smith ierr = MPI_Type_create_struct(2,blocklens,displs,types,&stype);CHKERRMPI(ierr); 798ffc4695bSBarry Smith ierr = MPI_Type_commit(&stype);CHKERRMPI(ierr); 799ffc4695bSBarry Smith ierr = MPI_Type_create_resized(stype,0,stash->blocktype_size,&stash->blocktype);CHKERRMPI(ierr); 800ffc4695bSBarry Smith ierr = MPI_Type_commit(&stash->blocktype);CHKERRMPI(ierr); 801ffc4695bSBarry Smith ierr = MPI_Type_free(&stype);CHKERRMPI(ierr); 802d7d60843SJed Brown } 803d7d60843SJed Brown PetscFunctionReturn(0); 804d7d60843SJed Brown } 805d7d60843SJed Brown 806d7d60843SJed Brown /* Callback invoked after target rank has initiatied receive of rendezvous message. 807d7d60843SJed Brown * Here we post the main sends. 808d7d60843SJed Brown */ 809d7d60843SJed Brown static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rankid,PetscMPIInt rank,void *sdata,MPI_Request req[],void *ctx) 810d7d60843SJed Brown { 811d7d60843SJed Brown MatStash *stash = (MatStash*)ctx; 812d7d60843SJed Brown MatStashHeader *hdr = (MatStashHeader*)sdata; 813d7d60843SJed Brown PetscErrorCode ierr; 814d7d60843SJed Brown 815d7d60843SJed Brown PetscFunctionBegin; 816*2c71b3e2SJacob Faibussowitsch PetscCheckFalse(rank != stash->sendranks[rankid],comm,PETSC_ERR_PLIB,"BTS Send rank %d does not match sendranks[%d] %d",rank,rankid,stash->sendranks[rankid]); 817ffc4695bSBarry Smith ierr = MPI_Isend(stash->sendframes[rankid].buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRMPI(ierr); 818d7d60843SJed Brown stash->sendframes[rankid].count = hdr->count; 819d7d60843SJed Brown stash->sendframes[rankid].pending = 1; 820d7d60843SJed Brown PetscFunctionReturn(0); 821d7d60843SJed Brown } 822d7d60843SJed Brown 823d7d60843SJed Brown /* Callback invoked by target after receiving rendezvous message. 824d7d60843SJed Brown * Here we post the main recvs. 825d7d60843SJed Brown */ 826d7d60843SJed Brown static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rank,void *rdata,MPI_Request req[],void *ctx) 827d7d60843SJed Brown { 828d7d60843SJed Brown MatStash *stash = (MatStash*)ctx; 829d7d60843SJed Brown MatStashHeader *hdr = (MatStashHeader*)rdata; 830d7d60843SJed Brown MatStashFrame *frame; 831d7d60843SJed Brown PetscErrorCode ierr; 832d7d60843SJed Brown 833d7d60843SJed Brown PetscFunctionBegin; 834d7d60843SJed Brown ierr = PetscSegBufferGet(stash->segrecvframe,1,&frame);CHKERRQ(ierr); 835d7d60843SJed Brown ierr = PetscSegBufferGet(stash->segrecvblocks,hdr->count,&frame->buffer);CHKERRQ(ierr); 836ffc4695bSBarry Smith ierr = MPI_Irecv(frame->buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRMPI(ierr); 837d7d60843SJed Brown frame->count = hdr->count; 838d7d60843SJed Brown frame->pending = 1; 839d7d60843SJed Brown PetscFunctionReturn(0); 840d7d60843SJed Brown } 841d7d60843SJed Brown 842d7d60843SJed Brown /* 843d7d60843SJed Brown * owners[] contains the ownership ranges; may be indexed by either blocks or scalars 844d7d60843SJed Brown */ 845d7d60843SJed Brown static PetscErrorCode MatStashScatterBegin_BTS(Mat mat,MatStash *stash,PetscInt owners[]) 846d7d60843SJed Brown { 847d7d60843SJed Brown PetscErrorCode ierr; 848d7d60843SJed Brown size_t nblocks; 849d7d60843SJed Brown char *sendblocks; 850d7d60843SJed Brown 851d7d60843SJed Brown PetscFunctionBegin; 85276bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { /* make sure all processors are either in INSERTMODE or ADDMODE */ 8534b4eb8d3SJed Brown InsertMode addv; 854820f2d46SBarry Smith ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 855*2c71b3e2SJacob Faibussowitsch PetscCheckFalse(addv == (ADD_VALUES|INSERT_VALUES),PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 8564b4eb8d3SJed Brown } 8574b4eb8d3SJed Brown 858d7d60843SJed Brown ierr = MatStashBlockTypeSetUp(stash);CHKERRQ(ierr); 859d7d60843SJed Brown ierr = MatStashSortCompress_Private(stash,mat->insertmode);CHKERRQ(ierr); 860d7d60843SJed Brown ierr = PetscSegBufferGetSize(stash->segsendblocks,&nblocks);CHKERRQ(ierr); 861d7d60843SJed Brown ierr = PetscSegBufferExtractInPlace(stash->segsendblocks,&sendblocks);CHKERRQ(ierr); 86260f34548SJunchao Zhang if (stash->first_assembly_done) { /* Set up sendhdrs and sendframes for each rank that we sent before */ 86323b7d1baSJed Brown PetscInt i; 86423b7d1baSJed Brown size_t b; 86597da8949SJed Brown for (i=0,b=0; i<stash->nsendranks; i++) { 86697da8949SJed Brown stash->sendframes[i].buffer = &sendblocks[b*stash->blocktype_size]; 86797da8949SJed Brown /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */ 86897da8949SJed Brown stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */ 86997da8949SJed Brown for (; b<nblocks; b++) { 87097da8949SJed Brown MatStashBlock *sendblock_b = (MatStashBlock*)&sendblocks[b*stash->blocktype_size]; 871*2c71b3e2SJacob Faibussowitsch PetscCheckFalse(sendblock_b->row < owners[stash->sendranks[i]],stash->comm,PETSC_ERR_ARG_WRONG,"MAT_SUBSET_OFF_PROC_ENTRIES set, but row %" PetscInt_FMT " owned by %d not communicated in initial assembly",sendblock_b->row,stash->sendranks[i]); 87297da8949SJed Brown if (sendblock_b->row >= owners[stash->sendranks[i]+1]) break; 87397da8949SJed Brown stash->sendhdr[i].count++; 87497da8949SJed Brown } 87597da8949SJed Brown } 87697da8949SJed Brown } else { /* Dynamically count and pack (first time) */ 87723b7d1baSJed Brown PetscInt sendno; 87823b7d1baSJed Brown size_t i,rowstart; 879d7d60843SJed Brown 880d7d60843SJed Brown /* Count number of send ranks and allocate for sends */ 881d7d60843SJed Brown stash->nsendranks = 0; 882d7d60843SJed Brown for (rowstart=0; rowstart<nblocks;) { 8837e2ea869SJed Brown PetscInt owner; 884d7d60843SJed Brown MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size]; 885d7d60843SJed Brown ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr); 886d7d60843SJed Brown if (owner < 0) owner = -(owner+2); 887d7d60843SJed Brown for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */ 888d7d60843SJed Brown MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size]; 8897e2ea869SJed Brown if (sendblock_i->row >= owners[owner+1]) break; 890d7d60843SJed Brown } 891d7d60843SJed Brown stash->nsendranks++; 892d7d60843SJed Brown rowstart = i; 893d7d60843SJed Brown } 894d7d60843SJed Brown ierr = PetscMalloc3(stash->nsendranks,&stash->sendranks,stash->nsendranks,&stash->sendhdr,stash->nsendranks,&stash->sendframes);CHKERRQ(ierr); 895d7d60843SJed Brown 896d7d60843SJed Brown /* Set up sendhdrs and sendframes */ 897d7d60843SJed Brown sendno = 0; 898d7d60843SJed Brown for (rowstart=0; rowstart<nblocks;) { 899d7d60843SJed Brown PetscInt owner; 900d7d60843SJed Brown MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size]; 901d7d60843SJed Brown ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr); 902d7d60843SJed Brown if (owner < 0) owner = -(owner+2); 903d7d60843SJed Brown stash->sendranks[sendno] = owner; 904d7d60843SJed Brown for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */ 905d7d60843SJed Brown MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size]; 9067e2ea869SJed Brown if (sendblock_i->row >= owners[owner+1]) break; 907d7d60843SJed Brown } 908d7d60843SJed Brown stash->sendframes[sendno].buffer = sendblock_rowstart; 909d7d60843SJed Brown stash->sendframes[sendno].pending = 0; 910d7d60843SJed Brown stash->sendhdr[sendno].count = i - rowstart; 911d7d60843SJed Brown sendno++; 912d7d60843SJed Brown rowstart = i; 913d7d60843SJed Brown } 914*2c71b3e2SJacob Faibussowitsch PetscCheckFalse(sendno != stash->nsendranks,stash->comm,PETSC_ERR_PLIB,"BTS counted %d sendranks, but %" PetscInt_FMT " sends",stash->nsendranks,sendno); 915d7d60843SJed Brown } 916d7d60843SJed Brown 9174b4eb8d3SJed Brown /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new 9184b4eb8d3SJed Brown * message or a dummy entry of some sort. */ 9194b4eb8d3SJed Brown if (mat->insertmode == INSERT_VALUES) { 92023b7d1baSJed Brown size_t i; 9214b4eb8d3SJed Brown for (i=0; i<nblocks; i++) { 9224b4eb8d3SJed Brown MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size]; 9234b4eb8d3SJed Brown sendblock_i->row = -(sendblock_i->row+1); 9244b4eb8d3SJed Brown } 9254b4eb8d3SJed Brown } 9264b4eb8d3SJed Brown 92760f34548SJunchao Zhang if (stash->first_assembly_done) { 92897da8949SJed Brown PetscMPIInt i,tag; 92997da8949SJed Brown ierr = PetscCommGetNewTag(stash->comm,&tag);CHKERRQ(ierr); 93097da8949SJed Brown for (i=0; i<stash->nrecvranks; i++) { 93197da8949SJed Brown ierr = MatStashBTSRecv_Private(stash->comm,&tag,stash->recvranks[i],&stash->recvhdr[i],&stash->recvreqs[i],stash);CHKERRQ(ierr); 93297da8949SJed Brown } 93397da8949SJed Brown for (i=0; i<stash->nsendranks; i++) { 93497da8949SJed Brown ierr = MatStashBTSSend_Private(stash->comm,&tag,i,stash->sendranks[i],&stash->sendhdr[i],&stash->sendreqs[i],stash);CHKERRQ(ierr); 93597da8949SJed Brown } 93697da8949SJed Brown stash->use_status = PETSC_TRUE; /* Use count from message status. */ 93797da8949SJed Brown } else { 938e0ddb6e8SJed Brown ierr = PetscCommBuildTwoSidedFReq(stash->comm,1,MPIU_INT,stash->nsendranks,stash->sendranks,(PetscInt*)stash->sendhdr, 939e0ddb6e8SJed Brown &stash->nrecvranks,&stash->recvranks,(PetscInt*)&stash->recvhdr,1,&stash->sendreqs,&stash->recvreqs, 940d7d60843SJed Brown MatStashBTSSend_Private,MatStashBTSRecv_Private,stash);CHKERRQ(ierr); 941b5ddc6f1SJed Brown ierr = PetscMalloc2(stash->nrecvranks,&stash->some_indices,stash->nrecvranks,&stash->some_statuses);CHKERRQ(ierr); 94297da8949SJed Brown stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */ 94397da8949SJed Brown } 944d7d60843SJed Brown 945d7d60843SJed Brown ierr = PetscSegBufferExtractInPlace(stash->segrecvframe,&stash->recvframes);CHKERRQ(ierr); 946d7d60843SJed Brown stash->recvframe_active = NULL; 947d7d60843SJed Brown stash->recvframe_i = 0; 948d7d60843SJed Brown stash->some_i = 0; 949d7d60843SJed Brown stash->some_count = 0; 950d7d60843SJed Brown stash->recvcount = 0; 95160f34548SJunchao Zhang stash->first_assembly_done = mat->assembly_subset; /* See the same logic in VecAssemblyBegin_MPI_BTS */ 9524b4eb8d3SJed Brown stash->insertmode = &mat->insertmode; 953d7d60843SJed Brown PetscFunctionReturn(0); 954d7d60843SJed Brown } 955d7d60843SJed Brown 956d7d60843SJed Brown static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash,PetscMPIInt *n,PetscInt **row,PetscInt **col,PetscScalar **val,PetscInt *flg) 957d7d60843SJed Brown { 958d7d60843SJed Brown PetscErrorCode ierr; 959d7d60843SJed Brown MatStashBlock *block; 960d7d60843SJed Brown 961d7d60843SJed Brown PetscFunctionBegin; 962d7d60843SJed Brown *flg = 0; 963d7d60843SJed Brown while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) { 964d7d60843SJed Brown if (stash->some_i == stash->some_count) { 965d7d60843SJed Brown if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(0); /* Done */ 966ffc4695bSBarry Smith ierr = MPI_Waitsome(stash->nrecvranks,stash->recvreqs,&stash->some_count,stash->some_indices,stash->use_status?stash->some_statuses:MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 967d7d60843SJed Brown stash->some_i = 0; 968d7d60843SJed Brown } 969d7d60843SJed Brown stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]]; 970d7d60843SJed Brown stash->recvframe_count = stash->recvframe_active->count; /* From header; maximum count */ 971d7d60843SJed Brown if (stash->use_status) { /* Count what was actually sent */ 972ffc4695bSBarry Smith ierr = MPI_Get_count(&stash->some_statuses[stash->some_i],stash->blocktype,&stash->recvframe_count);CHKERRMPI(ierr); 973d7d60843SJed Brown } 9744b4eb8d3SJed Brown if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */ 9754b4eb8d3SJed Brown block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[0]; 9764b4eb8d3SJed Brown if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES; 977*2c71b3e2SJacob Faibussowitsch PetscCheckFalse(*stash->insertmode == INSERT_VALUES && block->row >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling INSERT_VALUES, but rank %d requested ADD_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]); 978*2c71b3e2SJacob Faibussowitsch PetscCheckFalse(*stash->insertmode == ADD_VALUES && block->row < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling ADD_VALUES, but rank %d requested INSERT_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]); 9794b4eb8d3SJed Brown } 980d7d60843SJed Brown stash->some_i++; 981d7d60843SJed Brown stash->recvcount++; 982d7d60843SJed Brown stash->recvframe_i = 0; 983d7d60843SJed Brown } 984d7d60843SJed Brown *n = 1; 985d7d60843SJed Brown block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[stash->recvframe_i*stash->blocktype_size]; 9864b4eb8d3SJed Brown if (block->row < 0) block->row = -(block->row + 1); 987d7d60843SJed Brown *row = &block->row; 988d7d60843SJed Brown *col = &block->col; 989d7d60843SJed Brown *val = block->vals; 990d7d60843SJed Brown stash->recvframe_i++; 991d7d60843SJed Brown *flg = 1; 992d7d60843SJed Brown PetscFunctionReturn(0); 993d7d60843SJed Brown } 994d7d60843SJed Brown 995d7d60843SJed Brown static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash) 996d7d60843SJed Brown { 997d7d60843SJed Brown PetscErrorCode ierr; 998d7d60843SJed Brown 999d7d60843SJed Brown PetscFunctionBegin; 1000ffc4695bSBarry Smith ierr = MPI_Waitall(stash->nsendranks,stash->sendreqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 100160f34548SJunchao Zhang if (stash->first_assembly_done) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks */ 10023575f486SJed Brown void *dummy; 10033575f486SJed Brown ierr = PetscSegBufferExtractInPlace(stash->segrecvblocks,&dummy);CHKERRQ(ierr); 10043575f486SJed Brown } else { /* No reuse, so collect everything. */ 1005d7d60843SJed Brown ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr); 100697da8949SJed Brown } 1007d7d60843SJed Brown 1008d7d60843SJed Brown /* Now update nmaxold to be app 10% more than max n used, this way the 1009d7d60843SJed Brown wastage of space is reduced the next time this stash is used. 1010d7d60843SJed Brown Also update the oldmax, only if it increases */ 1011d7d60843SJed Brown if (stash->n) { 1012d7d60843SJed Brown PetscInt bs2 = stash->bs*stash->bs; 1013d7d60843SJed Brown PetscInt oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 1014d7d60843SJed Brown if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 1015d7d60843SJed Brown } 1016d7d60843SJed Brown 1017d7d60843SJed Brown stash->nmax = 0; 1018d7d60843SJed Brown stash->n = 0; 1019d7d60843SJed Brown stash->reallocs = -1; 1020d7d60843SJed Brown stash->nprocessed = 0; 1021d7d60843SJed Brown 1022d7d60843SJed Brown ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr); 1023d7d60843SJed Brown 1024f4259b30SLisandro Dalcin stash->space = NULL; 1025d7d60843SJed Brown 1026d7d60843SJed Brown PetscFunctionReturn(0); 1027d7d60843SJed Brown } 1028d7d60843SJed Brown 102960f34548SJunchao Zhang PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash) 1030d7d60843SJed Brown { 1031d7d60843SJed Brown PetscErrorCode ierr; 1032d7d60843SJed Brown 1033d7d60843SJed Brown PetscFunctionBegin; 1034d7d60843SJed Brown ierr = PetscSegBufferDestroy(&stash->segsendblocks);CHKERRQ(ierr); 1035d7d60843SJed Brown ierr = PetscSegBufferDestroy(&stash->segrecvframe);CHKERRQ(ierr); 1036d7d60843SJed Brown stash->recvframes = NULL; 1037d7d60843SJed Brown ierr = PetscSegBufferDestroy(&stash->segrecvblocks);CHKERRQ(ierr); 1038d7d60843SJed Brown if (stash->blocktype != MPI_DATATYPE_NULL) { 1039ffc4695bSBarry Smith ierr = MPI_Type_free(&stash->blocktype);CHKERRMPI(ierr); 1040d7d60843SJed Brown } 1041d7d60843SJed Brown stash->nsendranks = 0; 1042d7d60843SJed Brown stash->nrecvranks = 0; 1043d7d60843SJed Brown ierr = PetscFree3(stash->sendranks,stash->sendhdr,stash->sendframes);CHKERRQ(ierr); 1044d7d60843SJed Brown ierr = PetscFree(stash->sendreqs);CHKERRQ(ierr); 1045d7d60843SJed Brown ierr = PetscFree(stash->recvreqs);CHKERRQ(ierr); 1046d7d60843SJed Brown ierr = PetscFree(stash->recvranks);CHKERRQ(ierr); 1047d7d60843SJed Brown ierr = PetscFree(stash->recvhdr);CHKERRQ(ierr); 1048d7d60843SJed Brown ierr = PetscFree2(stash->some_indices,stash->some_statuses);CHKERRQ(ierr); 1049d7d60843SJed Brown PetscFunctionReturn(0); 1050d7d60843SJed Brown } 10511667be42SBarry Smith #endif 1052