12d5177cdSBarry Smith 2af0996ceSBarry Smith #include <petsc/private/matimpl.h> 35bd3b8fbSHong Zhang 4bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE 10000 54c1ff481SSatish Balay 6ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterBegin_Ref(Mat,MatStash*,PetscInt*); 7ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*); 8ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterEnd_Ref(MatStash*); 975d39b6aSBarry Smith #if !defined(PETSC_HAVE_MPIUNI) 10d7d60843SJed Brown static PetscErrorCode MatStashScatterBegin_BTS(Mat,MatStash*,PetscInt*); 11d7d60843SJed Brown static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*); 12d7d60843SJed Brown static PetscErrorCode MatStashScatterEnd_BTS(MatStash*); 13d7d60843SJed Brown static PetscErrorCode MatStashScatterDestroy_BTS(MatStash*); 1475d39b6aSBarry Smith #endif 15d7d60843SJed Brown 169417f4adSLois Curfman McInnes /* 178798bf22SSatish Balay MatStashCreate_Private - Creates a stash,currently used for all the parallel 184c1ff481SSatish Balay matrix implementations. The stash is where elements of a matrix destined 194c1ff481SSatish Balay to be stored on other processors are kept until matrix assembly is done. 209417f4adSLois Curfman McInnes 214c1ff481SSatish Balay This is a simple minded stash. Simply adds entries to end of stash. 224c1ff481SSatish Balay 234c1ff481SSatish Balay Input Parameters: 244c1ff481SSatish Balay comm - communicator, required for scatters. 254c1ff481SSatish Balay bs - stash block size. used when stashing blocks of values 264c1ff481SSatish Balay 274c1ff481SSatish Balay Output Parameters: 284c1ff481SSatish Balay stash - the newly created stash 299417f4adSLois Curfman McInnes */ 30c1ac3661SBarry Smith PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash) 319417f4adSLois Curfman McInnes { 32dfbe8321SBarry Smith PetscErrorCode ierr; 33533163c2SBarry Smith PetscInt max,*opt,nopt,i; 34ace3abfcSBarry Smith PetscBool flg; 35bc5ccf88SSatish Balay 363a40ed3dSBarry Smith PetscFunctionBegin; 37bc5ccf88SSatish Balay /* Require 2 tags,get the second using PetscCommGetNewTag() */ 38752ec6e0SSatish Balay stash->comm = comm; 398865f1eaSKarl Rupp 40752ec6e0SSatish Balay ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr); 41a2d1c673SSatish Balay ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr); 42a2d1c673SSatish Balay ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr); 43a2d1c673SSatish Balay ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr); 44785e854fSJed Brown ierr = PetscMalloc1(2*stash->size,&stash->flg_v);CHKERRQ(ierr); 45533163c2SBarry Smith for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 46533163c2SBarry Smith 47bc5ccf88SSatish Balay 48434d7ff9SSatish Balay nopt = stash->size; 49785e854fSJed Brown ierr = PetscMalloc1(nopt,&opt);CHKERRQ(ierr); 50c5929fdfSBarry Smith ierr = PetscOptionsGetIntArray(NULL,NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr); 51434d7ff9SSatish Balay if (flg) { 52434d7ff9SSatish Balay if (nopt == 1) max = opt[0]; 53434d7ff9SSatish Balay else if (nopt == stash->size) max = opt[stash->rank]; 54434d7ff9SSatish Balay else if (stash->rank < nopt) max = opt[stash->rank]; 55f4ab19daSSatish Balay else max = 0; /* Use default */ 56434d7ff9SSatish Balay stash->umax = max; 57434d7ff9SSatish Balay } else { 58434d7ff9SSatish Balay stash->umax = 0; 59434d7ff9SSatish Balay } 60606d414cSSatish Balay ierr = PetscFree(opt);CHKERRQ(ierr); 614c1ff481SSatish Balay if (bs <= 0) bs = 1; 62a2d1c673SSatish Balay 634c1ff481SSatish Balay stash->bs = bs; 649417f4adSLois Curfman McInnes stash->nmax = 0; 65434d7ff9SSatish Balay stash->oldnmax = 0; 669417f4adSLois Curfman McInnes stash->n = 0; 674c1ff481SSatish Balay stash->reallocs = -1; 6875cae7c1SHong Zhang stash->space_head = 0; 6975cae7c1SHong Zhang stash->space = 0; 709417f4adSLois Curfman McInnes 71bc5ccf88SSatish Balay stash->send_waits = 0; 72bc5ccf88SSatish Balay stash->recv_waits = 0; 73a2d1c673SSatish Balay stash->send_status = 0; 74bc5ccf88SSatish Balay stash->nsends = 0; 75bc5ccf88SSatish Balay stash->nrecvs = 0; 76bc5ccf88SSatish Balay stash->svalues = 0; 77bc5ccf88SSatish Balay stash->rvalues = 0; 78563fb871SSatish Balay stash->rindices = 0; 79a2d1c673SSatish Balay stash->nprocessed = 0; 8067318a8aSJed Brown stash->reproduce = PETSC_FALSE; 81d7d60843SJed Brown stash->blocktype = MPI_DATATYPE_NULL; 828865f1eaSKarl Rupp 83c5929fdfSBarry Smith ierr = PetscOptionsGetBool(NULL,NULL,"-matstash_reproduce",&stash->reproduce,NULL);CHKERRQ(ierr); 841667be42SBarry Smith #if !defined(PETSC_HAVE_MPIUNI) 85b30fb036SBarry Smith ierr = PetscOptionsGetBool(NULL,NULL,"-matstash_legacy",&flg,NULL);CHKERRQ(ierr); 86b30fb036SBarry Smith if (!flg) { 87d7d60843SJed Brown stash->ScatterBegin = MatStashScatterBegin_BTS; 88d7d60843SJed Brown stash->ScatterGetMesg = MatStashScatterGetMesg_BTS; 89d7d60843SJed Brown stash->ScatterEnd = MatStashScatterEnd_BTS; 90d7d60843SJed Brown stash->ScatterDestroy = MatStashScatterDestroy_BTS; 91ac2b2aa0SJed Brown } else { 921667be42SBarry Smith #endif 93ac2b2aa0SJed Brown stash->ScatterBegin = MatStashScatterBegin_Ref; 94ac2b2aa0SJed Brown stash->ScatterGetMesg = MatStashScatterGetMesg_Ref; 95ac2b2aa0SJed Brown stash->ScatterEnd = MatStashScatterEnd_Ref; 96ac2b2aa0SJed Brown stash->ScatterDestroy = NULL; 971667be42SBarry Smith #if !defined(PETSC_HAVE_MPIUNI) 98ac2b2aa0SJed Brown } 991667be42SBarry Smith #endif 1003a40ed3dSBarry Smith PetscFunctionReturn(0); 1019417f4adSLois Curfman McInnes } 1029417f4adSLois Curfman McInnes 1034c1ff481SSatish Balay /* 1048798bf22SSatish Balay MatStashDestroy_Private - Destroy the stash 1054c1ff481SSatish Balay */ 106dfbe8321SBarry Smith PetscErrorCode MatStashDestroy_Private(MatStash *stash) 1079417f4adSLois Curfman McInnes { 108dfbe8321SBarry Smith PetscErrorCode ierr; 109a2d1c673SSatish Balay 110bc5ccf88SSatish Balay PetscFunctionBegin; 1116bf464f9SBarry Smith ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr); 112ac2b2aa0SJed Brown if (stash->ScatterDestroy) {ierr = (*stash->ScatterDestroy)(stash);CHKERRQ(ierr);} 1138865f1eaSKarl Rupp 11482740460SHong Zhang stash->space = 0; 1158865f1eaSKarl Rupp 116533163c2SBarry Smith ierr = PetscFree(stash->flg_v);CHKERRQ(ierr); 117bc5ccf88SSatish Balay PetscFunctionReturn(0); 118bc5ccf88SSatish Balay } 119bc5ccf88SSatish Balay 1204c1ff481SSatish Balay /* 12167318a8aSJed Brown MatStashScatterEnd_Private - This is called as the final stage of 1224c1ff481SSatish Balay scatter. The final stages of message passing is done here, and 12367318a8aSJed Brown all the memory used for message passing is cleaned up. This 1244c1ff481SSatish Balay routine also resets the stash, and deallocates the memory used 1254c1ff481SSatish Balay for the stash. It also keeps track of the current memory usage 1264c1ff481SSatish Balay so that the same value can be used the next time through. 1274c1ff481SSatish Balay */ 128dfbe8321SBarry Smith PetscErrorCode MatStashScatterEnd_Private(MatStash *stash) 129bc5ccf88SSatish Balay { 1306849ba73SBarry Smith PetscErrorCode ierr; 131ac2b2aa0SJed Brown 132ac2b2aa0SJed Brown PetscFunctionBegin; 133ac2b2aa0SJed Brown ierr = (*stash->ScatterEnd)(stash);CHKERRQ(ierr); 134ac2b2aa0SJed Brown PetscFunctionReturn(0); 135ac2b2aa0SJed Brown } 136ac2b2aa0SJed Brown 137ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash) 138ac2b2aa0SJed Brown { 139ac2b2aa0SJed Brown PetscErrorCode ierr; 140533163c2SBarry Smith PetscInt nsends=stash->nsends,bs2,oldnmax,i; 141a2d1c673SSatish Balay MPI_Status *send_status; 142a2d1c673SSatish Balay 1433a40ed3dSBarry Smith PetscFunctionBegin; 144533163c2SBarry Smith for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 145a2d1c673SSatish Balay /* wait on sends */ 146a2d1c673SSatish Balay if (nsends) { 147785e854fSJed Brown ierr = PetscMalloc1(2*nsends,&send_status);CHKERRQ(ierr); 148a2d1c673SSatish Balay ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr); 149606d414cSSatish Balay ierr = PetscFree(send_status);CHKERRQ(ierr); 150a2d1c673SSatish Balay } 151a2d1c673SSatish Balay 152c0c58ca7SSatish Balay /* Now update nmaxold to be app 10% more than max n used, this way the 153434d7ff9SSatish Balay wastage of space is reduced the next time this stash is used. 154434d7ff9SSatish Balay Also update the oldmax, only if it increases */ 155b9b97703SBarry Smith if (stash->n) { 15694b769a5SSatish Balay bs2 = stash->bs*stash->bs; 1578a9378f0SSatish Balay oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 158434d7ff9SSatish Balay if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 159b9b97703SBarry Smith } 160434d7ff9SSatish Balay 161d07ff455SSatish Balay stash->nmax = 0; 162d07ff455SSatish Balay stash->n = 0; 1634c1ff481SSatish Balay stash->reallocs = -1; 164a2d1c673SSatish Balay stash->nprocessed = 0; 1658865f1eaSKarl Rupp 1666bf464f9SBarry Smith ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr); 1678865f1eaSKarl Rupp 16882740460SHong Zhang stash->space = 0; 1698865f1eaSKarl Rupp 170606d414cSSatish Balay ierr = PetscFree(stash->send_waits);CHKERRQ(ierr); 171606d414cSSatish Balay ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr); 172c05d87d6SBarry Smith ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr); 173c05d87d6SBarry Smith ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr); 174606d414cSSatish Balay ierr = PetscFree(stash->rvalues);CHKERRQ(ierr); 175c05d87d6SBarry Smith ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr); 176563fb871SSatish Balay ierr = PetscFree(stash->rindices);CHKERRQ(ierr); 1773a40ed3dSBarry Smith PetscFunctionReturn(0); 1789417f4adSLois Curfman McInnes } 1799417f4adSLois Curfman McInnes 1804c1ff481SSatish Balay /* 1818798bf22SSatish Balay MatStashGetInfo_Private - Gets the relavant statistics of the stash 1824c1ff481SSatish Balay 1834c1ff481SSatish Balay Input Parameters: 1844c1ff481SSatish Balay stash - the stash 18594b769a5SSatish Balay nstash - the size of the stash. Indicates the number of values stored. 1864c1ff481SSatish Balay reallocs - the number of additional mallocs incurred. 1874c1ff481SSatish Balay 1884c1ff481SSatish Balay */ 189c1ac3661SBarry Smith PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs) 19097530c3fSBarry Smith { 191c1ac3661SBarry Smith PetscInt bs2 = stash->bs*stash->bs; 19294b769a5SSatish Balay 1933a40ed3dSBarry Smith PetscFunctionBegin; 1941ecfd215SBarry Smith if (nstash) *nstash = stash->n*bs2; 1951ecfd215SBarry Smith if (reallocs) { 196434d7ff9SSatish Balay if (stash->reallocs < 0) *reallocs = 0; 197434d7ff9SSatish Balay else *reallocs = stash->reallocs; 1981ecfd215SBarry Smith } 199bc5ccf88SSatish Balay PetscFunctionReturn(0); 200bc5ccf88SSatish Balay } 2014c1ff481SSatish Balay 2024c1ff481SSatish Balay /* 2038798bf22SSatish Balay MatStashSetInitialSize_Private - Sets the initial size of the stash 2044c1ff481SSatish Balay 2054c1ff481SSatish Balay Input Parameters: 2064c1ff481SSatish Balay stash - the stash 2074c1ff481SSatish Balay max - the value that is used as the max size of the stash. 2084c1ff481SSatish Balay this value is used while allocating memory. 2094c1ff481SSatish Balay */ 210c1ac3661SBarry Smith PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max) 211bc5ccf88SSatish Balay { 212bc5ccf88SSatish Balay PetscFunctionBegin; 213434d7ff9SSatish Balay stash->umax = max; 2143a40ed3dSBarry Smith PetscFunctionReturn(0); 21597530c3fSBarry Smith } 21697530c3fSBarry Smith 2178798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called 2184c1ff481SSatish Balay when the space in the stash is not sufficient to add the new values 2194c1ff481SSatish Balay being inserted into the stash. 2204c1ff481SSatish Balay 2214c1ff481SSatish Balay Input Parameters: 2224c1ff481SSatish Balay stash - the stash 2234c1ff481SSatish Balay incr - the minimum increase requested 2244c1ff481SSatish Balay 2254c1ff481SSatish Balay Notes: 2264c1ff481SSatish Balay This routine doubles the currently used memory. 2274c1ff481SSatish Balay */ 228c1ac3661SBarry Smith static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr) 2299417f4adSLois Curfman McInnes { 2306849ba73SBarry Smith PetscErrorCode ierr; 2315bd3b8fbSHong Zhang PetscInt newnmax,bs2= stash->bs*stash->bs; 2329417f4adSLois Curfman McInnes 2333a40ed3dSBarry Smith PetscFunctionBegin; 2349417f4adSLois Curfman McInnes /* allocate a larger stash */ 235c481ceb5SSatish Balay if (!stash->oldnmax && !stash->nmax) { /* new stash */ 236434d7ff9SSatish Balay if (stash->umax) newnmax = stash->umax/bs2; 237434d7ff9SSatish Balay else newnmax = DEFAULT_STASH_SIZE/bs2; 238c481ceb5SSatish Balay } else if (!stash->nmax) { /* resuing stash */ 239434d7ff9SSatish Balay if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2; 240434d7ff9SSatish Balay else newnmax = stash->oldnmax/bs2; 241434d7ff9SSatish Balay } else newnmax = stash->nmax*2; 2424c1ff481SSatish Balay if (newnmax < (stash->nmax + incr)) newnmax += 2*incr; 243d07ff455SSatish Balay 24475cae7c1SHong Zhang /* Get a MatStashSpace and attach it to stash */ 24575cae7c1SHong Zhang ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr); 246b087b6d6SSatish Balay if (!stash->space_head) { /* new stash or resuing stash->oldnmax */ 247b087b6d6SSatish Balay stash->space_head = stash->space; 24875cae7c1SHong Zhang } 249b087b6d6SSatish Balay 250bc5ccf88SSatish Balay stash->reallocs++; 25175cae7c1SHong Zhang stash->nmax = newnmax; 252bc5ccf88SSatish Balay PetscFunctionReturn(0); 253bc5ccf88SSatish Balay } 254bc5ccf88SSatish Balay /* 2558798bf22SSatish Balay MatStashValuesRow_Private - inserts values into the stash. This function 2564c1ff481SSatish Balay expects the values to be roworiented. Multiple columns belong to the same row 2574c1ff481SSatish Balay can be inserted with a single call to this function. 2584c1ff481SSatish Balay 2594c1ff481SSatish Balay Input Parameters: 2604c1ff481SSatish Balay stash - the stash 2614c1ff481SSatish Balay row - the global row correspoiding to the values 2624c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2634c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2644c1ff481SSatish Balay values - the values inserted 265bc5ccf88SSatish Balay */ 266ace3abfcSBarry Smith PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries) 267bc5ccf88SSatish Balay { 268dfbe8321SBarry Smith PetscErrorCode ierr; 269b400d20cSBarry Smith PetscInt i,k,cnt = 0; 27075cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 271bc5ccf88SSatish Balay 272bc5ccf88SSatish Balay PetscFunctionBegin; 2734c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 27475cae7c1SHong Zhang if (!space || space->local_remaining < n) { 2758798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 2769417f4adSLois Curfman McInnes } 27775cae7c1SHong Zhang space = stash->space; 27875cae7c1SHong Zhang k = space->local_used; 2794c1ff481SSatish Balay for (i=0; i<n; i++) { 28088c3974fSBarry Smith if (ignorezeroentries && (values[i] == 0.0)) continue; 28175cae7c1SHong Zhang space->idx[k] = row; 28275cae7c1SHong Zhang space->idy[k] = idxn[i]; 28375cae7c1SHong Zhang space->val[k] = values[i]; 28475cae7c1SHong Zhang k++; 285b400d20cSBarry Smith cnt++; 2869417f4adSLois Curfman McInnes } 287b400d20cSBarry Smith stash->n += cnt; 288b400d20cSBarry Smith space->local_used += cnt; 289b400d20cSBarry Smith space->local_remaining -= cnt; 290a2d1c673SSatish Balay PetscFunctionReturn(0); 291a2d1c673SSatish Balay } 29275cae7c1SHong Zhang 2934c1ff481SSatish Balay /* 2948798bf22SSatish Balay MatStashValuesCol_Private - inserts values into the stash. This function 2954c1ff481SSatish Balay expects the values to be columnoriented. Multiple columns belong to the same row 2964c1ff481SSatish Balay can be inserted with a single call to this function. 297a2d1c673SSatish Balay 2984c1ff481SSatish Balay Input Parameters: 2994c1ff481SSatish Balay stash - the stash 3004c1ff481SSatish Balay row - the global row correspoiding to the values 3014c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3024c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 3034c1ff481SSatish Balay values - the values inserted 3044c1ff481SSatish Balay stepval - the consecutive values are sepated by a distance of stepval. 3054c1ff481SSatish Balay this happens because the input is columnoriented. 3064c1ff481SSatish Balay */ 307ace3abfcSBarry Smith PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries) 308a2d1c673SSatish Balay { 309dfbe8321SBarry Smith PetscErrorCode ierr; 31050e9ab7cSBarry Smith PetscInt i,k,cnt = 0; 31175cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 312a2d1c673SSatish Balay 3134c1ff481SSatish Balay PetscFunctionBegin; 3144c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 31575cae7c1SHong Zhang if (!space || space->local_remaining < n) { 3168798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 3174c1ff481SSatish Balay } 31875cae7c1SHong Zhang space = stash->space; 31975cae7c1SHong Zhang k = space->local_used; 3204c1ff481SSatish Balay for (i=0; i<n; i++) { 32188c3974fSBarry Smith if (ignorezeroentries && (values[i*stepval] == 0.0)) continue; 32275cae7c1SHong Zhang space->idx[k] = row; 32375cae7c1SHong Zhang space->idy[k] = idxn[i]; 32475cae7c1SHong Zhang space->val[k] = values[i*stepval]; 32575cae7c1SHong Zhang k++; 326b400d20cSBarry Smith cnt++; 3274c1ff481SSatish Balay } 328b400d20cSBarry Smith stash->n += cnt; 329b400d20cSBarry Smith space->local_used += cnt; 330b400d20cSBarry Smith space->local_remaining -= cnt; 3314c1ff481SSatish Balay PetscFunctionReturn(0); 3324c1ff481SSatish Balay } 3334c1ff481SSatish Balay 3344c1ff481SSatish Balay /* 3358798bf22SSatish Balay MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 3364c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 3374c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3384c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3394c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3404c1ff481SSatish Balay 3414c1ff481SSatish Balay Input Parameters: 3424c1ff481SSatish Balay stash - the stash 3434c1ff481SSatish Balay row - the global block-row correspoiding to the values 3444c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3454c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3464c1ff481SSatish Balay values. Each block is of size bs*bs. 3474c1ff481SSatish Balay values - the values inserted 3484c1ff481SSatish Balay rmax - the number of block-rows in the original block. 3494c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 3504c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3514c1ff481SSatish Balay */ 35254f21887SBarry Smith PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 3534c1ff481SSatish Balay { 354dfbe8321SBarry Smith PetscErrorCode ierr; 35575cae7c1SHong Zhang PetscInt i,j,k,bs2,bs=stash->bs,l; 35654f21887SBarry Smith const PetscScalar *vals; 35754f21887SBarry Smith PetscScalar *array; 35875cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 359a2d1c673SSatish Balay 360a2d1c673SSatish Balay PetscFunctionBegin; 36175cae7c1SHong Zhang if (!space || space->local_remaining < n) { 3628798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 363a2d1c673SSatish Balay } 36475cae7c1SHong Zhang space = stash->space; 36575cae7c1SHong Zhang l = space->local_used; 36675cae7c1SHong Zhang bs2 = bs*bs; 3674c1ff481SSatish Balay for (i=0; i<n; i++) { 36875cae7c1SHong Zhang space->idx[l] = row; 36975cae7c1SHong Zhang space->idy[l] = idxn[i]; 37075cae7c1SHong Zhang /* Now copy over the block of values. Store the values column oriented. 37175cae7c1SHong Zhang This enables inserting multiple blocks belonging to a row with a single 37275cae7c1SHong Zhang funtion call */ 37375cae7c1SHong Zhang array = space->val + bs2*l; 37475cae7c1SHong Zhang vals = values + idx*bs2*n + bs*i; 37575cae7c1SHong Zhang for (j=0; j<bs; j++) { 37675cae7c1SHong Zhang for (k=0; k<bs; k++) array[k*bs] = vals[k]; 37775cae7c1SHong Zhang array++; 37875cae7c1SHong Zhang vals += cmax*bs; 37975cae7c1SHong Zhang } 38075cae7c1SHong Zhang l++; 381a2d1c673SSatish Balay } 3825bd3b8fbSHong Zhang stash->n += n; 38375cae7c1SHong Zhang space->local_used += n; 38475cae7c1SHong Zhang space->local_remaining -= n; 3854c1ff481SSatish Balay PetscFunctionReturn(0); 3864c1ff481SSatish Balay } 3874c1ff481SSatish Balay 3884c1ff481SSatish Balay /* 3898798bf22SSatish Balay MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 3904c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 3914c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3924c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3934c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3944c1ff481SSatish Balay 3954c1ff481SSatish Balay Input Parameters: 3964c1ff481SSatish Balay stash - the stash 3974c1ff481SSatish Balay row - the global block-row correspoiding to the values 3984c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3994c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 4004c1ff481SSatish Balay values. Each block is of size bs*bs. 4014c1ff481SSatish Balay values - the values inserted 4024c1ff481SSatish Balay rmax - the number of block-rows in the original block. 4034c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 4044c1ff481SSatish Balay idx - the index of the current block-row in the original block. 4054c1ff481SSatish Balay */ 40654f21887SBarry Smith PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 4074c1ff481SSatish Balay { 408dfbe8321SBarry Smith PetscErrorCode ierr; 40975cae7c1SHong Zhang PetscInt i,j,k,bs2,bs=stash->bs,l; 41054f21887SBarry Smith const PetscScalar *vals; 41154f21887SBarry Smith PetscScalar *array; 41275cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 4134c1ff481SSatish Balay 4144c1ff481SSatish Balay PetscFunctionBegin; 41575cae7c1SHong Zhang if (!space || space->local_remaining < n) { 4168798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 4174c1ff481SSatish Balay } 41875cae7c1SHong Zhang space = stash->space; 41975cae7c1SHong Zhang l = space->local_used; 42075cae7c1SHong Zhang bs2 = bs*bs; 4214c1ff481SSatish Balay for (i=0; i<n; i++) { 42275cae7c1SHong Zhang space->idx[l] = row; 42375cae7c1SHong Zhang space->idy[l] = idxn[i]; 42475cae7c1SHong Zhang /* Now copy over the block of values. Store the values column oriented. 42575cae7c1SHong Zhang This enables inserting multiple blocks belonging to a row with a single 42675cae7c1SHong Zhang funtion call */ 42775cae7c1SHong Zhang array = space->val + bs2*l; 42875cae7c1SHong Zhang vals = values + idx*bs2*n + bs*i; 42975cae7c1SHong Zhang for (j=0; j<bs; j++) { 4308865f1eaSKarl Rupp for (k=0; k<bs; k++) array[k] = vals[k]; 43175cae7c1SHong Zhang array += bs; 43275cae7c1SHong Zhang vals += rmax*bs; 43375cae7c1SHong Zhang } 4345bd3b8fbSHong Zhang l++; 435a2d1c673SSatish Balay } 4365bd3b8fbSHong Zhang stash->n += n; 43775cae7c1SHong Zhang space->local_used += n; 43875cae7c1SHong Zhang space->local_remaining -= n; 4393a40ed3dSBarry Smith PetscFunctionReturn(0); 4409417f4adSLois Curfman McInnes } 4414c1ff481SSatish Balay /* 4428798bf22SSatish Balay MatStashScatterBegin_Private - Initiates the transfer of values to the 4434c1ff481SSatish Balay correct owners. This function goes through the stash, and check the 4444c1ff481SSatish Balay owners of each stashed value, and sends the values off to the owner 4454c1ff481SSatish Balay processors. 446bc5ccf88SSatish Balay 4474c1ff481SSatish Balay Input Parameters: 4484c1ff481SSatish Balay stash - the stash 4494c1ff481SSatish Balay owners - an array of size 'no-of-procs' which gives the ownership range 4504c1ff481SSatish Balay for each node. 4514c1ff481SSatish Balay 4524c1ff481SSatish Balay Notes: The 'owners' array in the cased of the blocked-stash has the 4534c1ff481SSatish Balay ranges specified blocked global indices, and for the regular stash in 4544c1ff481SSatish Balay the proper global indices. 4554c1ff481SSatish Balay */ 4561e2582c4SBarry Smith PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners) 457bc5ccf88SSatish Balay { 458ac2b2aa0SJed Brown PetscErrorCode ierr; 459ac2b2aa0SJed Brown 460ac2b2aa0SJed Brown PetscFunctionBegin; 461ac2b2aa0SJed Brown ierr = (*stash->ScatterBegin)(mat,stash,owners);CHKERRQ(ierr); 462ac2b2aa0SJed Brown PetscFunctionReturn(0); 463ac2b2aa0SJed Brown } 464ac2b2aa0SJed Brown 465ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterBegin_Ref(Mat mat,MatStash *stash,PetscInt *owners) 466ac2b2aa0SJed Brown { 467c1ac3661SBarry Smith PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 468fe09c992SBarry Smith PetscInt size=stash->size,nsends; 4696849ba73SBarry Smith PetscErrorCode ierr; 47075cae7c1SHong Zhang PetscInt count,*sindices,**rindices,i,j,idx,lastidx,l; 47154f21887SBarry Smith PetscScalar **rvalues,*svalues; 472bc5ccf88SSatish Balay MPI_Comm comm = stash->comm; 473563fb871SSatish Balay MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 47476ec1555SBarry Smith PetscMPIInt *sizes,*nlengths,nreceives; 4755bd3b8fbSHong Zhang PetscInt *sp_idx,*sp_idy; 47654f21887SBarry Smith PetscScalar *sp_val; 4775bd3b8fbSHong Zhang PetscMatStashSpace space,space_next; 478bc5ccf88SSatish Balay 479bc5ccf88SSatish Balay PetscFunctionBegin; 4804b4eb8d3SJed Brown { /* make sure all processors are either in INSERTMODE or ADDMODE */ 4814b4eb8d3SJed Brown InsertMode addv; 482b2566f29SBarry Smith ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4834b4eb8d3SJed Brown if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 4844b4eb8d3SJed Brown mat->insertmode = addv; /* in case this processor had no cache */ 4854b4eb8d3SJed Brown } 4864b4eb8d3SJed Brown 4874c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 48875cae7c1SHong Zhang 489bc5ccf88SSatish Balay /* first count number of contributors to each processor */ 490037dbc42SBarry Smith ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr); 4911795a4d1SJed Brown ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr); 492037dbc42SBarry Smith ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr); 493a2d1c673SSatish Balay 49475cae7c1SHong Zhang i = j = 0; 4957357eb19SBarry Smith lastidx = -1; 4965bd3b8fbSHong Zhang space = stash->space_head; 4976c4ed002SBarry Smith while (space) { 49875cae7c1SHong Zhang space_next = space->next; 4995bd3b8fbSHong Zhang sp_idx = space->idx; 50075cae7c1SHong Zhang for (l=0; l<space->local_used; l++) { 5017357eb19SBarry Smith /* if indices are NOT locally sorted, need to start search at the beginning */ 5025bd3b8fbSHong Zhang if (lastidx > (idx = sp_idx[l])) j = 0; 5037357eb19SBarry Smith lastidx = idx; 5047357eb19SBarry Smith for (; j<size; j++) { 5054c1ff481SSatish Balay if (idx >= owners[j] && idx < owners[j+1]) { 506563fb871SSatish Balay nlengths[j]++; owner[i] = j; break; 507bc5ccf88SSatish Balay } 508bc5ccf88SSatish Balay } 50975cae7c1SHong Zhang i++; 51075cae7c1SHong Zhang } 51175cae7c1SHong Zhang space = space_next; 512bc5ccf88SSatish Balay } 513563fb871SSatish Balay /* Now check what procs get messages - and compute nsends. */ 514563fb871SSatish Balay for (i=0, nsends=0; i<size; i++) { 5158865f1eaSKarl Rupp if (nlengths[i]) { 51676ec1555SBarry Smith sizes[i] = 1; nsends++; 5178865f1eaSKarl Rupp } 518563fb871SSatish Balay } 519bc5ccf88SSatish Balay 52054f21887SBarry Smith {PetscMPIInt *onodes,*olengths; 521563fb871SSatish Balay /* Determine the number of messages to expect, their lengths, from from-ids */ 52276ec1555SBarry Smith ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr); 523563fb871SSatish Balay ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr); 524563fb871SSatish Balay /* since clubbing row,col - lengths are multiplied by 2 */ 525563fb871SSatish Balay for (i=0; i<nreceives; i++) olengths[i] *=2; 526563fb871SSatish Balay ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr); 527563fb871SSatish Balay /* values are size 'bs2' lengths (and remove earlier factor 2 */ 528563fb871SSatish Balay for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 529563fb871SSatish Balay ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr); 530563fb871SSatish Balay ierr = PetscFree(onodes);CHKERRQ(ierr); 5318865f1eaSKarl Rupp ierr = PetscFree(olengths);CHKERRQ(ierr);} 532bc5ccf88SSatish Balay 533bc5ccf88SSatish Balay /* do sends: 534bc5ccf88SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 535bc5ccf88SSatish Balay the ith processor 536bc5ccf88SSatish Balay */ 537dcca6d9dSJed Brown ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr); 538785e854fSJed Brown ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr); 539dcca6d9dSJed Brown ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr); 540a2d1c673SSatish Balay /* use 2 sends the first with all_a, the next with all_i and all_j */ 541bc5ccf88SSatish Balay startv[0] = 0; starti[0] = 0; 542bc5ccf88SSatish Balay for (i=1; i<size; i++) { 543563fb871SSatish Balay startv[i] = startv[i-1] + nlengths[i-1]; 544533163c2SBarry Smith starti[i] = starti[i-1] + 2*nlengths[i-1]; 545bc5ccf88SSatish Balay } 54675cae7c1SHong Zhang 54775cae7c1SHong Zhang i = 0; 5485bd3b8fbSHong Zhang space = stash->space_head; 5496c4ed002SBarry Smith while (space) { 55075cae7c1SHong Zhang space_next = space->next; 5515bd3b8fbSHong Zhang sp_idx = space->idx; 5525bd3b8fbSHong Zhang sp_idy = space->idy; 5535bd3b8fbSHong Zhang sp_val = space->val; 55475cae7c1SHong Zhang for (l=0; l<space->local_used; l++) { 555bc5ccf88SSatish Balay j = owner[i]; 556a2d1c673SSatish Balay if (bs2 == 1) { 5575bd3b8fbSHong Zhang svalues[startv[j]] = sp_val[l]; 558a2d1c673SSatish Balay } else { 559c1ac3661SBarry Smith PetscInt k; 56054f21887SBarry Smith PetscScalar *buf1,*buf2; 5614c1ff481SSatish Balay buf1 = svalues+bs2*startv[j]; 562b087b6d6SSatish Balay buf2 = space->val + bs2*l; 5638865f1eaSKarl Rupp for (k=0; k<bs2; k++) buf1[k] = buf2[k]; 564a2d1c673SSatish Balay } 5655bd3b8fbSHong Zhang sindices[starti[j]] = sp_idx[l]; 5665bd3b8fbSHong Zhang sindices[starti[j]+nlengths[j]] = sp_idy[l]; 567bc5ccf88SSatish Balay startv[j]++; 568bc5ccf88SSatish Balay starti[j]++; 56975cae7c1SHong Zhang i++; 57075cae7c1SHong Zhang } 57175cae7c1SHong Zhang space = space_next; 572bc5ccf88SSatish Balay } 573bc5ccf88SSatish Balay startv[0] = 0; 5748865f1eaSKarl Rupp for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1]; 575e5d0e772SSatish Balay 576bc5ccf88SSatish Balay for (i=0,count=0; i<size; i++) { 57776ec1555SBarry Smith if (sizes[i]) { 578563fb871SSatish Balay ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr); 579a77337e4SBarry Smith ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr); 580bc5ccf88SSatish Balay } 581b85c94c3SSatish Balay } 5826cf91177SBarry Smith #if defined(PETSC_USE_INFO) 58393157e10SBarry Smith ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr); 584e5d0e772SSatish Balay for (i=0; i<size; i++) { 58576ec1555SBarry Smith if (sizes[i]) { 58630c47e72SSatish Balay ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 587e5d0e772SSatish Balay } 588e5d0e772SSatish Balay } 589e5d0e772SSatish Balay #endif 590c05d87d6SBarry Smith ierr = PetscFree(nlengths);CHKERRQ(ierr); 591606d414cSSatish Balay ierr = PetscFree(owner);CHKERRQ(ierr); 592c05d87d6SBarry Smith ierr = PetscFree2(startv,starti);CHKERRQ(ierr); 59376ec1555SBarry Smith ierr = PetscFree(sizes);CHKERRQ(ierr); 594a2d1c673SSatish Balay 595563fb871SSatish Balay /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 596785e854fSJed Brown ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr); 597563fb871SSatish Balay 598563fb871SSatish Balay for (i=0; i<nreceives; i++) { 599563fb871SSatish Balay recv_waits[2*i] = recv_waits1[i]; 600563fb871SSatish Balay recv_waits[2*i+1] = recv_waits2[i]; 601563fb871SSatish Balay } 602563fb871SSatish Balay stash->recv_waits = recv_waits; 6038865f1eaSKarl Rupp 604563fb871SSatish Balay ierr = PetscFree(recv_waits1);CHKERRQ(ierr); 605563fb871SSatish Balay ierr = PetscFree(recv_waits2);CHKERRQ(ierr); 606563fb871SSatish Balay 607c05d87d6SBarry Smith stash->svalues = svalues; 608c05d87d6SBarry Smith stash->sindices = sindices; 609c05d87d6SBarry Smith stash->rvalues = rvalues; 610c05d87d6SBarry Smith stash->rindices = rindices; 611c05d87d6SBarry Smith stash->send_waits = send_waits; 612c05d87d6SBarry Smith stash->nsends = nsends; 613c05d87d6SBarry Smith stash->nrecvs = nreceives; 61467318a8aSJed Brown stash->reproduce_count = 0; 615bc5ccf88SSatish Balay PetscFunctionReturn(0); 616bc5ccf88SSatish Balay } 617bc5ccf88SSatish Balay 618a2d1c673SSatish Balay /* 6198798bf22SSatish Balay MatStashScatterGetMesg_Private - This function waits on the receives posted 6208798bf22SSatish Balay in the function MatStashScatterBegin_Private() and returns one message at 6214c1ff481SSatish Balay a time to the calling function. If no messages are left, it indicates this 6224c1ff481SSatish Balay by setting flg = 0, else it sets flg = 1. 6234c1ff481SSatish Balay 6244c1ff481SSatish Balay Input Parameters: 6254c1ff481SSatish Balay stash - the stash 6264c1ff481SSatish Balay 6274c1ff481SSatish Balay Output Parameters: 6284c1ff481SSatish Balay nvals - the number of entries in the current message. 6294c1ff481SSatish Balay rows - an array of row indices (or blocked indices) corresponding to the values 6304c1ff481SSatish Balay cols - an array of columnindices (or blocked indices) corresponding to the values 6314c1ff481SSatish Balay vals - the values 6324c1ff481SSatish Balay flg - 0 indicates no more message left, and the current call has no values associated. 6334c1ff481SSatish Balay 1 indicates that the current call successfully received a message, and the 6344c1ff481SSatish Balay other output parameters nvals,rows,cols,vals are set appropriately. 635a2d1c673SSatish Balay */ 63654f21887SBarry Smith PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg) 637bc5ccf88SSatish Balay { 6386849ba73SBarry Smith PetscErrorCode ierr; 639ac2b2aa0SJed Brown 640ac2b2aa0SJed Brown PetscFunctionBegin; 641ac2b2aa0SJed Brown ierr = (*stash->ScatterGetMesg)(stash,nvals,rows,cols,vals,flg);CHKERRQ(ierr); 642ac2b2aa0SJed Brown PetscFunctionReturn(0); 643ac2b2aa0SJed Brown } 644ac2b2aa0SJed Brown 645ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg) 646ac2b2aa0SJed Brown { 647ac2b2aa0SJed Brown PetscErrorCode ierr; 648533163c2SBarry Smith PetscMPIInt i,*flg_v = stash->flg_v,i1,i2; 649fe09c992SBarry Smith PetscInt bs2; 650a2d1c673SSatish Balay MPI_Status recv_status; 651ace3abfcSBarry Smith PetscBool match_found = PETSC_FALSE; 652bc5ccf88SSatish Balay 653bc5ccf88SSatish Balay PetscFunctionBegin; 654a2d1c673SSatish Balay *flg = 0; /* When a message is discovered this is reset to 1 */ 655a2d1c673SSatish Balay /* Return if no more messages to process */ 6568865f1eaSKarl Rupp if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(0); 657a2d1c673SSatish Balay 6584c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 65967318a8aSJed Brown /* If a matching pair of receives are found, process them, and return the data to 660a2d1c673SSatish Balay the calling function. Until then keep receiving messages */ 661a2d1c673SSatish Balay while (!match_found) { 66267318a8aSJed Brown if (stash->reproduce) { 66367318a8aSJed Brown i = stash->reproduce_count++; 66467318a8aSJed Brown ierr = MPI_Wait(stash->recv_waits+i,&recv_status);CHKERRQ(ierr); 66567318a8aSJed Brown } else { 666a2d1c673SSatish Balay ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr); 66767318a8aSJed Brown } 668e32f2f54SBarry Smith if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!"); 669533163c2SBarry Smith 67067318a8aSJed Brown /* Now pack the received message into a structure which is usable by others */ 671a2d1c673SSatish Balay if (i % 2) { 672a77337e4SBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr); 6738865f1eaSKarl Rupp 674c1dc657dSBarry Smith flg_v[2*recv_status.MPI_SOURCE] = i/2; 6758865f1eaSKarl Rupp 676a2d1c673SSatish Balay *nvals = *nvals/bs2; 677563fb871SSatish Balay } else { 678563fb871SSatish Balay ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr); 6798865f1eaSKarl Rupp 680563fb871SSatish Balay flg_v[2*recv_status.MPI_SOURCE+1] = i/2; 6818865f1eaSKarl Rupp 682563fb871SSatish Balay *nvals = *nvals/2; /* This message has both row indices and col indices */ 683bc5ccf88SSatish Balay } 684a2d1c673SSatish Balay 685cb2b73ccSBarry Smith /* Check if we have both messages from this proc */ 686c1dc657dSBarry Smith i1 = flg_v[2*recv_status.MPI_SOURCE]; 687c1dc657dSBarry Smith i2 = flg_v[2*recv_status.MPI_SOURCE+1]; 688a2d1c673SSatish Balay if (i1 != -1 && i2 != -1) { 689563fb871SSatish Balay *rows = stash->rindices[i2]; 690a2d1c673SSatish Balay *cols = *rows + *nvals; 691563fb871SSatish Balay *vals = stash->rvalues[i1]; 692a2d1c673SSatish Balay *flg = 1; 693a2d1c673SSatish Balay stash->nprocessed++; 69435d8aa7fSBarry Smith match_found = PETSC_TRUE; 695bc5ccf88SSatish Balay } 696bc5ccf88SSatish Balay } 697bc5ccf88SSatish Balay PetscFunctionReturn(0); 698bc5ccf88SSatish Balay } 699d7d60843SJed Brown 700*e69ee1f7SSatish Balay #if !defined(PETSC_HAVE_MPIUNI) 701d7d60843SJed Brown typedef struct { 702d7d60843SJed Brown PetscInt row; 703d7d60843SJed Brown PetscInt col; 704d7d60843SJed Brown PetscScalar vals[1]; /* Actually an array of length bs2 */ 705d7d60843SJed Brown } MatStashBlock; 706d7d60843SJed Brown 707d7d60843SJed Brown static PetscErrorCode MatStashSortCompress_Private(MatStash *stash,InsertMode insertmode) 708d7d60843SJed Brown { 709d7d60843SJed Brown PetscErrorCode ierr; 710d7d60843SJed Brown PetscMatStashSpace space; 711d7d60843SJed Brown PetscInt n = stash->n,bs = stash->bs,bs2 = bs*bs,cnt,*row,*col,*perm,rowstart,i; 712d7d60843SJed Brown PetscScalar **valptr; 713d7d60843SJed Brown 714d7d60843SJed Brown PetscFunctionBegin; 715d7d60843SJed Brown ierr = PetscMalloc4(n,&row,n,&col,n,&valptr,n,&perm);CHKERRQ(ierr); 716d7d60843SJed Brown for (space=stash->space_head,cnt=0; space; space=space->next) { 717d7d60843SJed Brown for (i=0; i<space->local_used; i++) { 718d7d60843SJed Brown row[cnt] = space->idx[i]; 719d7d60843SJed Brown col[cnt] = space->idy[i]; 720d7d60843SJed Brown valptr[cnt] = &space->val[i*bs2]; 721d7d60843SJed Brown perm[cnt] = cnt; /* Will tell us where to find valptr after sorting row[] and col[] */ 722d7d60843SJed Brown cnt++; 723d7d60843SJed Brown } 724d7d60843SJed Brown } 725d7d60843SJed Brown if (cnt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatStash n %D, but counted %D entries",n,cnt); 726d7d60843SJed Brown ierr = PetscSortIntWithArrayPair(n,row,col,perm);CHKERRQ(ierr); 727d7d60843SJed Brown /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */ 728d7d60843SJed Brown for (rowstart=0,cnt=0,i=1; i<=n; i++) { 729d7d60843SJed Brown if (i == n || row[i] != row[rowstart]) { /* Sort the last row. */ 730d7d60843SJed Brown PetscInt colstart; 731d7d60843SJed Brown ierr = PetscSortIntWithArray(i-rowstart,&col[rowstart],&perm[rowstart]);CHKERRQ(ierr); 732d7d60843SJed Brown for (colstart=rowstart; colstart<i; ) { /* Compress multiple insertions to the same location */ 733d7d60843SJed Brown PetscInt j,l; 734d7d60843SJed Brown MatStashBlock *block; 735d7d60843SJed Brown ierr = PetscSegBufferGet(stash->segsendblocks,1,&block);CHKERRQ(ierr); 736d7d60843SJed Brown block->row = row[rowstart]; 737d7d60843SJed Brown block->col = col[colstart]; 738d7d60843SJed Brown ierr = PetscMemcpy(block->vals,valptr[perm[colstart]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr); 739d7d60843SJed Brown for (j=colstart+1; j<i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */ 740d7d60843SJed Brown if (insertmode == ADD_VALUES) { 741d7d60843SJed Brown for (l=0; l<bs2; l++) block->vals[l] += valptr[perm[j]][l]; 742d7d60843SJed Brown } else { 743d7d60843SJed Brown ierr = PetscMemcpy(block->vals,valptr[perm[j]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr); 744d7d60843SJed Brown } 745d7d60843SJed Brown } 746d7d60843SJed Brown colstart = j; 747d7d60843SJed Brown } 748d7d60843SJed Brown rowstart = i; 749d7d60843SJed Brown } 750d7d60843SJed Brown } 751d7d60843SJed Brown ierr = PetscFree4(row,col,valptr,perm);CHKERRQ(ierr); 752d7d60843SJed Brown PetscFunctionReturn(0); 753d7d60843SJed Brown } 754d7d60843SJed Brown 755d7d60843SJed Brown static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash) 756d7d60843SJed Brown { 757d7d60843SJed Brown PetscErrorCode ierr; 758d7d60843SJed Brown 759d7d60843SJed Brown PetscFunctionBegin; 760d7d60843SJed Brown if (stash->blocktype == MPI_DATATYPE_NULL) { 761d7d60843SJed Brown PetscInt bs2 = PetscSqr(stash->bs); 762d7d60843SJed Brown PetscMPIInt blocklens[2]; 763d7d60843SJed Brown MPI_Aint displs[2]; 764d7d60843SJed Brown MPI_Datatype types[2],stype; 7659503c6c6SJed Brown /* C++ std::complex is not my favorite datatype. Since it is not POD, we cannot use offsetof to find the offset of 7669503c6c6SJed Brown * vals. But the layout is actually guaranteed by the standard, so we do a little dance here with struct 7679503c6c6SJed Brown * DummyBlock, substituting PetscReal for PetscComplex so that we can determine the offset. 7689503c6c6SJed Brown */ 7699503c6c6SJed Brown struct DummyBlock {PetscInt row,col; PetscReal vals;}; 770d7d60843SJed Brown 7719503c6c6SJed Brown stash->blocktype_size = offsetof(struct DummyBlock,vals) + bs2*sizeof(PetscScalar); 772d7d60843SJed Brown if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */ 773d7d60843SJed Brown stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt); 774d7d60843SJed Brown } 775d7d60843SJed Brown ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segsendblocks);CHKERRQ(ierr); 776d7d60843SJed Brown ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segrecvblocks);CHKERRQ(ierr); 777d7d60843SJed Brown ierr = PetscSegBufferCreate(sizeof(MatStashFrame),1,&stash->segrecvframe);CHKERRQ(ierr); 778d7d60843SJed Brown blocklens[0] = 2; 779d7d60843SJed Brown blocklens[1] = bs2; 7809503c6c6SJed Brown displs[0] = offsetof(struct DummyBlock,row); 7819503c6c6SJed Brown displs[1] = offsetof(struct DummyBlock,vals); 782d7d60843SJed Brown types[0] = MPIU_INT; 783d7d60843SJed Brown types[1] = MPIU_SCALAR; 784d7d60843SJed Brown ierr = MPI_Type_create_struct(2,blocklens,displs,types,&stype);CHKERRQ(ierr); 785d7d60843SJed Brown ierr = MPI_Type_commit(&stype);CHKERRQ(ierr); 786d7d60843SJed Brown ierr = MPI_Type_create_resized(stype,0,stash->blocktype_size,&stash->blocktype);CHKERRQ(ierr); /* MPI-2 */ 787d7d60843SJed Brown ierr = MPI_Type_commit(&stash->blocktype);CHKERRQ(ierr); 788d7d60843SJed Brown ierr = MPI_Type_free(&stype);CHKERRQ(ierr); 789d7d60843SJed Brown } 790d7d60843SJed Brown PetscFunctionReturn(0); 791d7d60843SJed Brown } 792d7d60843SJed Brown 793d7d60843SJed Brown /* Callback invoked after target rank has initiatied receive of rendezvous message. 794d7d60843SJed Brown * Here we post the main sends. 795d7d60843SJed Brown */ 796d7d60843SJed Brown static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rankid,PetscMPIInt rank,void *sdata,MPI_Request req[],void *ctx) 797d7d60843SJed Brown { 798d7d60843SJed Brown MatStash *stash = (MatStash*)ctx; 799d7d60843SJed Brown MatStashHeader *hdr = (MatStashHeader*)sdata; 800d7d60843SJed Brown PetscErrorCode ierr; 801d7d60843SJed Brown 802d7d60843SJed Brown PetscFunctionBegin; 803d7d60843SJed Brown if (rank != stash->sendranks[rankid]) SETERRQ3(comm,PETSC_ERR_PLIB,"BTS Send rank %d does not match sendranks[%d] %d",rank,rankid,stash->sendranks[rankid]); 804d7d60843SJed Brown ierr = MPI_Isend(stash->sendframes[rankid].buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr); 805d7d60843SJed Brown stash->sendframes[rankid].count = hdr->count; 806d7d60843SJed Brown stash->sendframes[rankid].pending = 1; 807d7d60843SJed Brown PetscFunctionReturn(0); 808d7d60843SJed Brown } 809d7d60843SJed Brown 810d7d60843SJed Brown /* Callback invoked by target after receiving rendezvous message. 811d7d60843SJed Brown * Here we post the main recvs. 812d7d60843SJed Brown */ 813d7d60843SJed Brown static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rank,void *rdata,MPI_Request req[],void *ctx) 814d7d60843SJed Brown { 815d7d60843SJed Brown MatStash *stash = (MatStash*)ctx; 816d7d60843SJed Brown MatStashHeader *hdr = (MatStashHeader*)rdata; 817d7d60843SJed Brown MatStashFrame *frame; 818d7d60843SJed Brown PetscErrorCode ierr; 819d7d60843SJed Brown 820d7d60843SJed Brown PetscFunctionBegin; 821d7d60843SJed Brown ierr = PetscSegBufferGet(stash->segrecvframe,1,&frame);CHKERRQ(ierr); 822d7d60843SJed Brown ierr = PetscSegBufferGet(stash->segrecvblocks,hdr->count,&frame->buffer);CHKERRQ(ierr); 823d7d60843SJed Brown ierr = MPI_Irecv(frame->buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr); 824d7d60843SJed Brown frame->count = hdr->count; 825d7d60843SJed Brown frame->pending = 1; 826d7d60843SJed Brown PetscFunctionReturn(0); 827d7d60843SJed Brown } 828d7d60843SJed Brown 829d7d60843SJed Brown /* 830d7d60843SJed Brown * owners[] contains the ownership ranges; may be indexed by either blocks or scalars 831d7d60843SJed Brown */ 832d7d60843SJed Brown static PetscErrorCode MatStashScatterBegin_BTS(Mat mat,MatStash *stash,PetscInt owners[]) 833d7d60843SJed Brown { 834d7d60843SJed Brown PetscErrorCode ierr; 835d7d60843SJed Brown size_t nblocks; 836d7d60843SJed Brown char *sendblocks; 837d7d60843SJed Brown 838d7d60843SJed Brown PetscFunctionBegin; 8394b4eb8d3SJed Brown #if defined(PETSC_USE_DEBUG) 8404b4eb8d3SJed Brown { /* make sure all processors are either in INSERTMODE or ADDMODE */ 8414b4eb8d3SJed Brown InsertMode addv; 842b2566f29SBarry Smith ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 8434b4eb8d3SJed Brown if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 8444b4eb8d3SJed Brown } 8454b4eb8d3SJed Brown #endif 8464b4eb8d3SJed Brown 84797da8949SJed Brown if (stash->subset_off_proc && !mat->subsetoffprocentries) { /* We won't use the old scatter context. */ 84897da8949SJed Brown ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr); 84997da8949SJed Brown } 85097da8949SJed Brown 851d7d60843SJed Brown ierr = MatStashBlockTypeSetUp(stash);CHKERRQ(ierr); 852d7d60843SJed Brown ierr = MatStashSortCompress_Private(stash,mat->insertmode);CHKERRQ(ierr); 853d7d60843SJed Brown ierr = PetscSegBufferGetSize(stash->segsendblocks,&nblocks);CHKERRQ(ierr); 854d7d60843SJed Brown ierr = PetscSegBufferExtractInPlace(stash->segsendblocks,&sendblocks);CHKERRQ(ierr); 85597da8949SJed Brown if (stash->subset_off_proc && mat->subsetoffprocentries) { /* Set up sendhdrs and sendframes for each rank that we sent before */ 85623b7d1baSJed Brown PetscInt i; 85723b7d1baSJed Brown size_t b; 85897da8949SJed Brown for (i=0,b=0; i<stash->nsendranks; i++) { 85997da8949SJed Brown stash->sendframes[i].buffer = &sendblocks[b*stash->blocktype_size]; 86097da8949SJed Brown /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */ 86197da8949SJed Brown stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */ 86297da8949SJed Brown for ( ; b<nblocks; b++) { 86397da8949SJed Brown MatStashBlock *sendblock_b = (MatStashBlock*)&sendblocks[b*stash->blocktype_size]; 86497da8949SJed Brown if (PetscUnlikely(sendblock_b->row < owners[stash->sendranks[i]])) SETERRQ2(stash->comm,PETSC_ERR_ARG_WRONG,"MAT_SUBSET_OFF_PROC_ENTRIES set, but row %D owned by %d not communicated in initial assembly",sendblock_b->row,stash->sendranks[i]); 86597da8949SJed Brown if (sendblock_b->row >= owners[stash->sendranks[i]+1]) break; 86697da8949SJed Brown stash->sendhdr[i].count++; 86797da8949SJed Brown } 86897da8949SJed Brown } 86997da8949SJed Brown } else { /* Dynamically count and pack (first time) */ 87023b7d1baSJed Brown PetscInt sendno; 87123b7d1baSJed Brown size_t i,rowstart; 872d7d60843SJed Brown 873d7d60843SJed Brown /* Count number of send ranks and allocate for sends */ 874d7d60843SJed Brown stash->nsendranks = 0; 875d7d60843SJed Brown for (rowstart=0; rowstart<nblocks; ) { 8767e2ea869SJed Brown PetscInt owner; 877d7d60843SJed Brown MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size]; 878d7d60843SJed Brown ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr); 879d7d60843SJed Brown if (owner < 0) owner = -(owner+2); 880d7d60843SJed Brown for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */ 881d7d60843SJed Brown MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size]; 8827e2ea869SJed Brown if (sendblock_i->row >= owners[owner+1]) break; 883d7d60843SJed Brown } 884d7d60843SJed Brown stash->nsendranks++; 885d7d60843SJed Brown rowstart = i; 886d7d60843SJed Brown } 887d7d60843SJed Brown ierr = PetscMalloc3(stash->nsendranks,&stash->sendranks,stash->nsendranks,&stash->sendhdr,stash->nsendranks,&stash->sendframes);CHKERRQ(ierr); 888d7d60843SJed Brown 889d7d60843SJed Brown /* Set up sendhdrs and sendframes */ 890d7d60843SJed Brown sendno = 0; 891d7d60843SJed Brown for (rowstart=0; rowstart<nblocks; ) { 892d7d60843SJed Brown PetscInt owner; 893d7d60843SJed Brown MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size]; 894d7d60843SJed Brown ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr); 895d7d60843SJed Brown if (owner < 0) owner = -(owner+2); 896d7d60843SJed Brown stash->sendranks[sendno] = owner; 897d7d60843SJed Brown for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */ 898d7d60843SJed Brown MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size]; 8997e2ea869SJed Brown if (sendblock_i->row >= owners[owner+1]) break; 900d7d60843SJed Brown } 901d7d60843SJed Brown stash->sendframes[sendno].buffer = sendblock_rowstart; 902d7d60843SJed Brown stash->sendframes[sendno].pending = 0; 903d7d60843SJed Brown stash->sendhdr[sendno].count = i - rowstart; 904d7d60843SJed Brown sendno++; 905d7d60843SJed Brown rowstart = i; 906d7d60843SJed Brown } 907d7d60843SJed Brown if (sendno != stash->nsendranks) SETERRQ2(stash->comm,PETSC_ERR_PLIB,"BTS counted %D sendranks, but %D sends",stash->nsendranks,sendno); 908d7d60843SJed Brown } 909d7d60843SJed Brown 9104b4eb8d3SJed Brown /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new 9114b4eb8d3SJed Brown * message or a dummy entry of some sort. */ 9124b4eb8d3SJed Brown if (mat->insertmode == INSERT_VALUES) { 91323b7d1baSJed Brown size_t i; 9144b4eb8d3SJed Brown for (i=0; i<nblocks; i++) { 9154b4eb8d3SJed Brown MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size]; 9164b4eb8d3SJed Brown sendblock_i->row = -(sendblock_i->row+1); 9174b4eb8d3SJed Brown } 9184b4eb8d3SJed Brown } 9194b4eb8d3SJed Brown 92097da8949SJed Brown if (stash->subset_off_proc && mat->subsetoffprocentries) { 92197da8949SJed Brown PetscMPIInt i,tag; 92297da8949SJed Brown ierr = PetscCommGetNewTag(stash->comm,&tag);CHKERRQ(ierr); 92397da8949SJed Brown for (i=0; i<stash->nrecvranks; i++) { 92497da8949SJed Brown ierr = MatStashBTSRecv_Private(stash->comm,&tag,stash->recvranks[i],&stash->recvhdr[i],&stash->recvreqs[i],stash);CHKERRQ(ierr); 92597da8949SJed Brown } 92697da8949SJed Brown for (i=0; i<stash->nsendranks; i++) { 92797da8949SJed Brown ierr = MatStashBTSSend_Private(stash->comm,&tag,i,stash->sendranks[i],&stash->sendhdr[i],&stash->sendreqs[i],stash);CHKERRQ(ierr); 92897da8949SJed Brown } 92997da8949SJed Brown stash->use_status = PETSC_TRUE; /* Use count from message status. */ 93097da8949SJed Brown } else { 931e0ddb6e8SJed Brown ierr = PetscCommBuildTwoSidedFReq(stash->comm,1,MPIU_INT,stash->nsendranks,stash->sendranks,(PetscInt*)stash->sendhdr, 932e0ddb6e8SJed Brown &stash->nrecvranks,&stash->recvranks,(PetscInt*)&stash->recvhdr,1,&stash->sendreqs,&stash->recvreqs, 933d7d60843SJed Brown MatStashBTSSend_Private,MatStashBTSRecv_Private,stash);CHKERRQ(ierr); 934b5ddc6f1SJed Brown ierr = PetscMalloc2(stash->nrecvranks,&stash->some_indices,stash->nrecvranks,&stash->some_statuses);CHKERRQ(ierr); 93597da8949SJed Brown stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */ 93697da8949SJed Brown } 937d7d60843SJed Brown 938d7d60843SJed Brown ierr = PetscSegBufferExtractInPlace(stash->segrecvframe,&stash->recvframes);CHKERRQ(ierr); 939d7d60843SJed Brown stash->recvframe_active = NULL; 940d7d60843SJed Brown stash->recvframe_i = 0; 941d7d60843SJed Brown stash->some_i = 0; 942d7d60843SJed Brown stash->some_count = 0; 943d7d60843SJed Brown stash->recvcount = 0; 94497da8949SJed Brown stash->subset_off_proc = mat->subsetoffprocentries; 9454b4eb8d3SJed Brown stash->insertmode = &mat->insertmode; 946d7d60843SJed Brown PetscFunctionReturn(0); 947d7d60843SJed Brown } 948d7d60843SJed Brown 949d7d60843SJed Brown static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash,PetscMPIInt *n,PetscInt **row,PetscInt **col,PetscScalar **val,PetscInt *flg) 950d7d60843SJed Brown { 951d7d60843SJed Brown PetscErrorCode ierr; 952d7d60843SJed Brown MatStashBlock *block; 953d7d60843SJed Brown 954d7d60843SJed Brown PetscFunctionBegin; 955d7d60843SJed Brown *flg = 0; 956d7d60843SJed Brown while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) { 957d7d60843SJed Brown if (stash->some_i == stash->some_count) { 958d7d60843SJed Brown if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(0); /* Done */ 959d7d60843SJed Brown ierr = MPI_Waitsome(stash->nrecvranks,stash->recvreqs,&stash->some_count,stash->some_indices,stash->use_status?stash->some_statuses:MPI_STATUSES_IGNORE);CHKERRQ(ierr); 960d7d60843SJed Brown stash->some_i = 0; 961d7d60843SJed Brown } 962d7d60843SJed Brown stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]]; 963d7d60843SJed Brown stash->recvframe_count = stash->recvframe_active->count; /* From header; maximum count */ 964d7d60843SJed Brown if (stash->use_status) { /* Count what was actually sent */ 965d7d60843SJed Brown ierr = MPI_Get_count(&stash->some_statuses[stash->some_i],stash->blocktype,&stash->recvframe_count);CHKERRQ(ierr); 966d7d60843SJed Brown } 9674b4eb8d3SJed Brown if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */ 9684b4eb8d3SJed Brown block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[0]; 9694b4eb8d3SJed Brown if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES; 9704b4eb8d3SJed Brown if (PetscUnlikely(*stash->insertmode == INSERT_VALUES && block->row >= 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling INSERT_VALUES, but rank %d requested ADD_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]); 9714b4eb8d3SJed Brown if (PetscUnlikely(*stash->insertmode == ADD_VALUES && block->row < 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling ADD_VALUES, but rank %d requested INSERT_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]); 9724b4eb8d3SJed Brown } 973d7d60843SJed Brown stash->some_i++; 974d7d60843SJed Brown stash->recvcount++; 975d7d60843SJed Brown stash->recvframe_i = 0; 976d7d60843SJed Brown } 977d7d60843SJed Brown *n = 1; 978d7d60843SJed Brown block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[stash->recvframe_i*stash->blocktype_size]; 9794b4eb8d3SJed Brown if (block->row < 0) block->row = -(block->row + 1); 980d7d60843SJed Brown *row = &block->row; 981d7d60843SJed Brown *col = &block->col; 982d7d60843SJed Brown *val = block->vals; 983d7d60843SJed Brown stash->recvframe_i++; 984d7d60843SJed Brown *flg = 1; 985d7d60843SJed Brown PetscFunctionReturn(0); 986d7d60843SJed Brown } 987d7d60843SJed Brown 988d7d60843SJed Brown static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash) 989d7d60843SJed Brown { 990d7d60843SJed Brown PetscErrorCode ierr; 991d7d60843SJed Brown 992d7d60843SJed Brown PetscFunctionBegin; 993d7d60843SJed Brown ierr = MPI_Waitall(stash->nsendranks,stash->sendreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 9943575f486SJed Brown if (stash->subset_off_proc) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks */ 9953575f486SJed Brown void *dummy; 9963575f486SJed Brown ierr = PetscSegBufferExtractInPlace(stash->segrecvblocks,&dummy);CHKERRQ(ierr); 9973575f486SJed Brown } else { /* No reuse, so collect everything. */ 998d7d60843SJed Brown ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr); 99997da8949SJed Brown } 1000d7d60843SJed Brown 1001d7d60843SJed Brown /* Now update nmaxold to be app 10% more than max n used, this way the 1002d7d60843SJed Brown wastage of space is reduced the next time this stash is used. 1003d7d60843SJed Brown Also update the oldmax, only if it increases */ 1004d7d60843SJed Brown if (stash->n) { 1005d7d60843SJed Brown PetscInt bs2 = stash->bs*stash->bs; 1006d7d60843SJed Brown PetscInt oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 1007d7d60843SJed Brown if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 1008d7d60843SJed Brown } 1009d7d60843SJed Brown 1010d7d60843SJed Brown stash->nmax = 0; 1011d7d60843SJed Brown stash->n = 0; 1012d7d60843SJed Brown stash->reallocs = -1; 1013d7d60843SJed Brown stash->nprocessed = 0; 1014d7d60843SJed Brown 1015d7d60843SJed Brown ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr); 1016d7d60843SJed Brown 1017d7d60843SJed Brown stash->space = 0; 1018d7d60843SJed Brown 1019d7d60843SJed Brown PetscFunctionReturn(0); 1020d7d60843SJed Brown } 1021d7d60843SJed Brown 1022d7d60843SJed Brown static PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash) 1023d7d60843SJed Brown { 1024d7d60843SJed Brown PetscErrorCode ierr; 1025d7d60843SJed Brown 1026d7d60843SJed Brown PetscFunctionBegin; 1027d7d60843SJed Brown ierr = PetscSegBufferDestroy(&stash->segsendblocks);CHKERRQ(ierr); 1028d7d60843SJed Brown ierr = PetscSegBufferDestroy(&stash->segrecvframe);CHKERRQ(ierr); 1029d7d60843SJed Brown stash->recvframes = NULL; 1030d7d60843SJed Brown ierr = PetscSegBufferDestroy(&stash->segrecvblocks);CHKERRQ(ierr); 1031d7d60843SJed Brown if (stash->blocktype != MPI_DATATYPE_NULL) { 1032d7d60843SJed Brown ierr = MPI_Type_free(&stash->blocktype);CHKERRQ(ierr); 1033d7d60843SJed Brown } 1034d7d60843SJed Brown stash->nsendranks = 0; 1035d7d60843SJed Brown stash->nrecvranks = 0; 1036d7d60843SJed Brown ierr = PetscFree3(stash->sendranks,stash->sendhdr,stash->sendframes);CHKERRQ(ierr); 1037d7d60843SJed Brown ierr = PetscFree(stash->sendreqs);CHKERRQ(ierr); 1038d7d60843SJed Brown ierr = PetscFree(stash->recvreqs);CHKERRQ(ierr); 1039d7d60843SJed Brown ierr = PetscFree(stash->recvranks);CHKERRQ(ierr); 1040d7d60843SJed Brown ierr = PetscFree(stash->recvhdr);CHKERRQ(ierr); 1041d7d60843SJed Brown ierr = PetscFree2(stash->some_indices,stash->some_statuses);CHKERRQ(ierr); 1042d7d60843SJed Brown PetscFunctionReturn(0); 1043d7d60843SJed Brown } 10441667be42SBarry Smith #endif 1045