12d5177cdSBarry Smith 2b45d2f2cSJed Brown #include <petsc-private/matimpl.h> 35bd3b8fbSHong Zhang 4bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE 10000 54c1ff481SSatish Balay 6ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterBegin_Ref(Mat,MatStash*,PetscInt*); 7ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*); 8ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterEnd_Ref(MatStash*); 9d7d60843SJed Brown static PetscErrorCode MatStashScatterBegin_BTS(Mat,MatStash*,PetscInt*); 10d7d60843SJed Brown static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*); 11d7d60843SJed Brown static PetscErrorCode MatStashScatterEnd_BTS(MatStash*); 12d7d60843SJed Brown static PetscErrorCode MatStashScatterDestroy_BTS(MatStash*); 13d7d60843SJed Brown 149417f4adSLois Curfman McInnes /* 158798bf22SSatish Balay MatStashCreate_Private - Creates a stash,currently used for all the parallel 164c1ff481SSatish Balay matrix implementations. The stash is where elements of a matrix destined 174c1ff481SSatish Balay to be stored on other processors are kept until matrix assembly is done. 189417f4adSLois Curfman McInnes 194c1ff481SSatish Balay This is a simple minded stash. Simply adds entries to end of stash. 204c1ff481SSatish Balay 214c1ff481SSatish Balay Input Parameters: 224c1ff481SSatish Balay comm - communicator, required for scatters. 234c1ff481SSatish Balay bs - stash block size. used when stashing blocks of values 244c1ff481SSatish Balay 254c1ff481SSatish Balay Output Parameters: 264c1ff481SSatish Balay stash - the newly created stash 279417f4adSLois Curfman McInnes */ 284a2ae208SSatish Balay #undef __FUNCT__ 294a2ae208SSatish Balay #define __FUNCT__ "MatStashCreate_Private" 30c1ac3661SBarry Smith PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash) 319417f4adSLois Curfman McInnes { 32dfbe8321SBarry Smith PetscErrorCode ierr; 33533163c2SBarry Smith PetscInt max,*opt,nopt,i; 34ace3abfcSBarry Smith PetscBool flg; 35bc5ccf88SSatish Balay 363a40ed3dSBarry Smith PetscFunctionBegin; 37bc5ccf88SSatish Balay /* Require 2 tags,get the second using PetscCommGetNewTag() */ 38752ec6e0SSatish Balay stash->comm = comm; 398865f1eaSKarl Rupp 40752ec6e0SSatish Balay ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr); 41a2d1c673SSatish Balay ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr); 42a2d1c673SSatish Balay ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr); 43a2d1c673SSatish Balay ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr); 44785e854fSJed Brown ierr = PetscMalloc1(2*stash->size,&stash->flg_v);CHKERRQ(ierr); 45533163c2SBarry Smith for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 46533163c2SBarry Smith 47bc5ccf88SSatish Balay 48434d7ff9SSatish Balay nopt = stash->size; 49785e854fSJed Brown ierr = PetscMalloc1(nopt,&opt);CHKERRQ(ierr); 500298fd71SBarry Smith ierr = PetscOptionsGetIntArray(NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr); 51434d7ff9SSatish Balay if (flg) { 52434d7ff9SSatish Balay if (nopt == 1) max = opt[0]; 53434d7ff9SSatish Balay else if (nopt == stash->size) max = opt[stash->rank]; 54434d7ff9SSatish Balay else if (stash->rank < nopt) max = opt[stash->rank]; 55f4ab19daSSatish Balay else max = 0; /* Use default */ 56434d7ff9SSatish Balay stash->umax = max; 57434d7ff9SSatish Balay } else { 58434d7ff9SSatish Balay stash->umax = 0; 59434d7ff9SSatish Balay } 60606d414cSSatish Balay ierr = PetscFree(opt);CHKERRQ(ierr); 614c1ff481SSatish Balay if (bs <= 0) bs = 1; 62a2d1c673SSatish Balay 634c1ff481SSatish Balay stash->bs = bs; 649417f4adSLois Curfman McInnes stash->nmax = 0; 65434d7ff9SSatish Balay stash->oldnmax = 0; 669417f4adSLois Curfman McInnes stash->n = 0; 674c1ff481SSatish Balay stash->reallocs = -1; 6875cae7c1SHong Zhang stash->space_head = 0; 6975cae7c1SHong Zhang stash->space = 0; 709417f4adSLois Curfman McInnes 71bc5ccf88SSatish Balay stash->send_waits = 0; 72bc5ccf88SSatish Balay stash->recv_waits = 0; 73a2d1c673SSatish Balay stash->send_status = 0; 74bc5ccf88SSatish Balay stash->nsends = 0; 75bc5ccf88SSatish Balay stash->nrecvs = 0; 76bc5ccf88SSatish Balay stash->svalues = 0; 77bc5ccf88SSatish Balay stash->rvalues = 0; 78563fb871SSatish Balay stash->rindices = 0; 79a2d1c673SSatish Balay stash->nprocessed = 0; 8067318a8aSJed Brown stash->reproduce = PETSC_FALSE; 81d7d60843SJed Brown stash->blocktype = MPI_DATATYPE_NULL; 828865f1eaSKarl Rupp 830298fd71SBarry Smith ierr = PetscOptionsGetBool(NULL,"-matstash_reproduce",&stash->reproduce,NULL);CHKERRQ(ierr); 84ac2b2aa0SJed Brown ierr = PetscOptionsGetBool(NULL,"-matstash_bts",&flg,NULL);CHKERRQ(ierr); 85ac2b2aa0SJed Brown if (flg) { 86d7d60843SJed Brown stash->ScatterBegin = MatStashScatterBegin_BTS; 87d7d60843SJed Brown stash->ScatterGetMesg = MatStashScatterGetMesg_BTS; 88d7d60843SJed Brown stash->ScatterEnd = MatStashScatterEnd_BTS; 89d7d60843SJed Brown stash->ScatterDestroy = MatStashScatterDestroy_BTS; 90ac2b2aa0SJed Brown } else { 91ac2b2aa0SJed Brown stash->ScatterBegin = MatStashScatterBegin_Ref; 92ac2b2aa0SJed Brown stash->ScatterGetMesg = MatStashScatterGetMesg_Ref; 93ac2b2aa0SJed Brown stash->ScatterEnd = MatStashScatterEnd_Ref; 94ac2b2aa0SJed Brown stash->ScatterDestroy = NULL; 95ac2b2aa0SJed Brown } 963a40ed3dSBarry Smith PetscFunctionReturn(0); 979417f4adSLois Curfman McInnes } 989417f4adSLois Curfman McInnes 994c1ff481SSatish Balay /* 1008798bf22SSatish Balay MatStashDestroy_Private - Destroy the stash 1014c1ff481SSatish Balay */ 1024a2ae208SSatish Balay #undef __FUNCT__ 1034a2ae208SSatish Balay #define __FUNCT__ "MatStashDestroy_Private" 104dfbe8321SBarry Smith PetscErrorCode MatStashDestroy_Private(MatStash *stash) 1059417f4adSLois Curfman McInnes { 106dfbe8321SBarry Smith PetscErrorCode ierr; 107a2d1c673SSatish Balay 108bc5ccf88SSatish Balay PetscFunctionBegin; 1096bf464f9SBarry Smith ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr); 110ac2b2aa0SJed Brown if (stash->ScatterDestroy) {ierr = (*stash->ScatterDestroy)(stash);CHKERRQ(ierr);} 1118865f1eaSKarl Rupp 11282740460SHong Zhang stash->space = 0; 1138865f1eaSKarl Rupp 114533163c2SBarry Smith ierr = PetscFree(stash->flg_v);CHKERRQ(ierr); 115bc5ccf88SSatish Balay PetscFunctionReturn(0); 116bc5ccf88SSatish Balay } 117bc5ccf88SSatish Balay 1184c1ff481SSatish Balay /* 11967318a8aSJed Brown MatStashScatterEnd_Private - This is called as the final stage of 1204c1ff481SSatish Balay scatter. The final stages of message passing is done here, and 12167318a8aSJed Brown all the memory used for message passing is cleaned up. This 1224c1ff481SSatish Balay routine also resets the stash, and deallocates the memory used 1234c1ff481SSatish Balay for the stash. It also keeps track of the current memory usage 1244c1ff481SSatish Balay so that the same value can be used the next time through. 1254c1ff481SSatish Balay */ 1264a2ae208SSatish Balay #undef __FUNCT__ 1274a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterEnd_Private" 128dfbe8321SBarry Smith PetscErrorCode MatStashScatterEnd_Private(MatStash *stash) 129bc5ccf88SSatish Balay { 1306849ba73SBarry Smith PetscErrorCode ierr; 131ac2b2aa0SJed Brown 132ac2b2aa0SJed Brown PetscFunctionBegin; 133ac2b2aa0SJed Brown ierr = (*stash->ScatterEnd)(stash);CHKERRQ(ierr); 134ac2b2aa0SJed Brown PetscFunctionReturn(0); 135ac2b2aa0SJed Brown } 136ac2b2aa0SJed Brown 137ac2b2aa0SJed Brown #undef __FUNCT__ 138ac2b2aa0SJed Brown #define __FUNCT__ "MatStashScatterEnd_Ref" 139ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash) 140ac2b2aa0SJed Brown { 141ac2b2aa0SJed Brown PetscErrorCode ierr; 142533163c2SBarry Smith PetscInt nsends=stash->nsends,bs2,oldnmax,i; 143a2d1c673SSatish Balay MPI_Status *send_status; 144a2d1c673SSatish Balay 1453a40ed3dSBarry Smith PetscFunctionBegin; 146533163c2SBarry Smith for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 147a2d1c673SSatish Balay /* wait on sends */ 148a2d1c673SSatish Balay if (nsends) { 149785e854fSJed Brown ierr = PetscMalloc1(2*nsends,&send_status);CHKERRQ(ierr); 150a2d1c673SSatish Balay ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr); 151606d414cSSatish Balay ierr = PetscFree(send_status);CHKERRQ(ierr); 152a2d1c673SSatish Balay } 153a2d1c673SSatish Balay 154c0c58ca7SSatish Balay /* Now update nmaxold to be app 10% more than max n used, this way the 155434d7ff9SSatish Balay wastage of space is reduced the next time this stash is used. 156434d7ff9SSatish Balay Also update the oldmax, only if it increases */ 157b9b97703SBarry Smith if (stash->n) { 15894b769a5SSatish Balay bs2 = stash->bs*stash->bs; 1598a9378f0SSatish Balay oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 160434d7ff9SSatish Balay if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 161b9b97703SBarry Smith } 162434d7ff9SSatish Balay 163d07ff455SSatish Balay stash->nmax = 0; 164d07ff455SSatish Balay stash->n = 0; 1654c1ff481SSatish Balay stash->reallocs = -1; 166a2d1c673SSatish Balay stash->nprocessed = 0; 1678865f1eaSKarl Rupp 1686bf464f9SBarry Smith ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr); 1698865f1eaSKarl Rupp 17082740460SHong Zhang stash->space = 0; 1718865f1eaSKarl Rupp 172606d414cSSatish Balay ierr = PetscFree(stash->send_waits);CHKERRQ(ierr); 173606d414cSSatish Balay ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr); 174c05d87d6SBarry Smith ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr); 175c05d87d6SBarry Smith ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr); 176606d414cSSatish Balay ierr = PetscFree(stash->rvalues);CHKERRQ(ierr); 177c05d87d6SBarry Smith ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr); 178563fb871SSatish Balay ierr = PetscFree(stash->rindices);CHKERRQ(ierr); 1793a40ed3dSBarry Smith PetscFunctionReturn(0); 1809417f4adSLois Curfman McInnes } 1819417f4adSLois Curfman McInnes 1824c1ff481SSatish Balay /* 1838798bf22SSatish Balay MatStashGetInfo_Private - Gets the relavant statistics of the stash 1844c1ff481SSatish Balay 1854c1ff481SSatish Balay Input Parameters: 1864c1ff481SSatish Balay stash - the stash 18794b769a5SSatish Balay nstash - the size of the stash. Indicates the number of values stored. 1884c1ff481SSatish Balay reallocs - the number of additional mallocs incurred. 1894c1ff481SSatish Balay 1904c1ff481SSatish Balay */ 1914a2ae208SSatish Balay #undef __FUNCT__ 1924a2ae208SSatish Balay #define __FUNCT__ "MatStashGetInfo_Private" 193c1ac3661SBarry Smith PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs) 19497530c3fSBarry Smith { 195c1ac3661SBarry Smith PetscInt bs2 = stash->bs*stash->bs; 19694b769a5SSatish Balay 1973a40ed3dSBarry Smith PetscFunctionBegin; 1981ecfd215SBarry Smith if (nstash) *nstash = stash->n*bs2; 1991ecfd215SBarry Smith if (reallocs) { 200434d7ff9SSatish Balay if (stash->reallocs < 0) *reallocs = 0; 201434d7ff9SSatish Balay else *reallocs = stash->reallocs; 2021ecfd215SBarry Smith } 203bc5ccf88SSatish Balay PetscFunctionReturn(0); 204bc5ccf88SSatish Balay } 2054c1ff481SSatish Balay 2064c1ff481SSatish Balay /* 2078798bf22SSatish Balay MatStashSetInitialSize_Private - Sets the initial size of the stash 2084c1ff481SSatish Balay 2094c1ff481SSatish Balay Input Parameters: 2104c1ff481SSatish Balay stash - the stash 2114c1ff481SSatish Balay max - the value that is used as the max size of the stash. 2124c1ff481SSatish Balay this value is used while allocating memory. 2134c1ff481SSatish Balay */ 2144a2ae208SSatish Balay #undef __FUNCT__ 2154a2ae208SSatish Balay #define __FUNCT__ "MatStashSetInitialSize_Private" 216c1ac3661SBarry Smith PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max) 217bc5ccf88SSatish Balay { 218bc5ccf88SSatish Balay PetscFunctionBegin; 219434d7ff9SSatish Balay stash->umax = max; 2203a40ed3dSBarry Smith PetscFunctionReturn(0); 22197530c3fSBarry Smith } 22297530c3fSBarry Smith 2238798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called 2244c1ff481SSatish Balay when the space in the stash is not sufficient to add the new values 2254c1ff481SSatish Balay being inserted into the stash. 2264c1ff481SSatish Balay 2274c1ff481SSatish Balay Input Parameters: 2284c1ff481SSatish Balay stash - the stash 2294c1ff481SSatish Balay incr - the minimum increase requested 2304c1ff481SSatish Balay 2314c1ff481SSatish Balay Notes: 2324c1ff481SSatish Balay This routine doubles the currently used memory. 2334c1ff481SSatish Balay */ 2344a2ae208SSatish Balay #undef __FUNCT__ 2354a2ae208SSatish Balay #define __FUNCT__ "MatStashExpand_Private" 236c1ac3661SBarry Smith static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr) 2379417f4adSLois Curfman McInnes { 2386849ba73SBarry Smith PetscErrorCode ierr; 2395bd3b8fbSHong Zhang PetscInt newnmax,bs2= stash->bs*stash->bs; 2409417f4adSLois Curfman McInnes 2413a40ed3dSBarry Smith PetscFunctionBegin; 2429417f4adSLois Curfman McInnes /* allocate a larger stash */ 243c481ceb5SSatish Balay if (!stash->oldnmax && !stash->nmax) { /* new stash */ 244434d7ff9SSatish Balay if (stash->umax) newnmax = stash->umax/bs2; 245434d7ff9SSatish Balay else newnmax = DEFAULT_STASH_SIZE/bs2; 246c481ceb5SSatish Balay } else if (!stash->nmax) { /* resuing stash */ 247434d7ff9SSatish Balay if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2; 248434d7ff9SSatish Balay else newnmax = stash->oldnmax/bs2; 249434d7ff9SSatish Balay } else newnmax = stash->nmax*2; 2504c1ff481SSatish Balay if (newnmax < (stash->nmax + incr)) newnmax += 2*incr; 251d07ff455SSatish Balay 25275cae7c1SHong Zhang /* Get a MatStashSpace and attach it to stash */ 25375cae7c1SHong Zhang ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr); 254b087b6d6SSatish Balay if (!stash->space_head) { /* new stash or resuing stash->oldnmax */ 255b087b6d6SSatish Balay stash->space_head = stash->space; 25675cae7c1SHong Zhang } 257b087b6d6SSatish Balay 258bc5ccf88SSatish Balay stash->reallocs++; 25975cae7c1SHong Zhang stash->nmax = newnmax; 260bc5ccf88SSatish Balay PetscFunctionReturn(0); 261bc5ccf88SSatish Balay } 262bc5ccf88SSatish Balay /* 2638798bf22SSatish Balay MatStashValuesRow_Private - inserts values into the stash. This function 2644c1ff481SSatish Balay expects the values to be roworiented. Multiple columns belong to the same row 2654c1ff481SSatish Balay can be inserted with a single call to this function. 2664c1ff481SSatish Balay 2674c1ff481SSatish Balay Input Parameters: 2684c1ff481SSatish Balay stash - the stash 2694c1ff481SSatish Balay row - the global row correspoiding to the values 2704c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2714c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2724c1ff481SSatish Balay values - the values inserted 273bc5ccf88SSatish Balay */ 2744a2ae208SSatish Balay #undef __FUNCT__ 2754a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRow_Private" 276ace3abfcSBarry Smith PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries) 277bc5ccf88SSatish Balay { 278dfbe8321SBarry Smith PetscErrorCode ierr; 279b400d20cSBarry Smith PetscInt i,k,cnt = 0; 28075cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 281bc5ccf88SSatish Balay 282bc5ccf88SSatish Balay PetscFunctionBegin; 2834c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 28475cae7c1SHong Zhang if (!space || space->local_remaining < n) { 2858798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 2869417f4adSLois Curfman McInnes } 28775cae7c1SHong Zhang space = stash->space; 28875cae7c1SHong Zhang k = space->local_used; 2894c1ff481SSatish Balay for (i=0; i<n; i++) { 29088c3974fSBarry Smith if (ignorezeroentries && (values[i] == 0.0)) continue; 29175cae7c1SHong Zhang space->idx[k] = row; 29275cae7c1SHong Zhang space->idy[k] = idxn[i]; 29375cae7c1SHong Zhang space->val[k] = values[i]; 29475cae7c1SHong Zhang k++; 295b400d20cSBarry Smith cnt++; 2969417f4adSLois Curfman McInnes } 297b400d20cSBarry Smith stash->n += cnt; 298b400d20cSBarry Smith space->local_used += cnt; 299b400d20cSBarry Smith space->local_remaining -= cnt; 300a2d1c673SSatish Balay PetscFunctionReturn(0); 301a2d1c673SSatish Balay } 30275cae7c1SHong Zhang 3034c1ff481SSatish Balay /* 3048798bf22SSatish Balay MatStashValuesCol_Private - inserts values into the stash. This function 3054c1ff481SSatish Balay expects the values to be columnoriented. Multiple columns belong to the same row 3064c1ff481SSatish Balay can be inserted with a single call to this function. 307a2d1c673SSatish Balay 3084c1ff481SSatish Balay Input Parameters: 3094c1ff481SSatish Balay stash - the stash 3104c1ff481SSatish Balay row - the global row correspoiding to the values 3114c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3124c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 3134c1ff481SSatish Balay values - the values inserted 3144c1ff481SSatish Balay stepval - the consecutive values are sepated by a distance of stepval. 3154c1ff481SSatish Balay this happens because the input is columnoriented. 3164c1ff481SSatish Balay */ 3174a2ae208SSatish Balay #undef __FUNCT__ 3184a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesCol_Private" 319ace3abfcSBarry Smith PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries) 320a2d1c673SSatish Balay { 321dfbe8321SBarry Smith PetscErrorCode ierr; 32250e9ab7cSBarry Smith PetscInt i,k,cnt = 0; 32375cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 324a2d1c673SSatish Balay 3254c1ff481SSatish Balay PetscFunctionBegin; 3264c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 32775cae7c1SHong Zhang if (!space || space->local_remaining < n) { 3288798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 3294c1ff481SSatish Balay } 33075cae7c1SHong Zhang space = stash->space; 33175cae7c1SHong Zhang k = space->local_used; 3324c1ff481SSatish Balay for (i=0; i<n; i++) { 33388c3974fSBarry Smith if (ignorezeroentries && (values[i*stepval] == 0.0)) continue; 33475cae7c1SHong Zhang space->idx[k] = row; 33575cae7c1SHong Zhang space->idy[k] = idxn[i]; 33675cae7c1SHong Zhang space->val[k] = values[i*stepval]; 33775cae7c1SHong Zhang k++; 338b400d20cSBarry Smith cnt++; 3394c1ff481SSatish Balay } 340b400d20cSBarry Smith stash->n += cnt; 341b400d20cSBarry Smith space->local_used += cnt; 342b400d20cSBarry Smith space->local_remaining -= cnt; 3434c1ff481SSatish Balay PetscFunctionReturn(0); 3444c1ff481SSatish Balay } 3454c1ff481SSatish Balay 3464c1ff481SSatish Balay /* 3478798bf22SSatish Balay MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 3484c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 3494c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3504c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3514c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3524c1ff481SSatish Balay 3534c1ff481SSatish Balay Input Parameters: 3544c1ff481SSatish Balay stash - the stash 3554c1ff481SSatish Balay row - the global block-row correspoiding to the values 3564c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3574c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3584c1ff481SSatish Balay values. Each block is of size bs*bs. 3594c1ff481SSatish Balay values - the values inserted 3604c1ff481SSatish Balay rmax - the number of block-rows in the original block. 3614c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 3624c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3634c1ff481SSatish Balay */ 3644a2ae208SSatish Balay #undef __FUNCT__ 3654a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRowBlocked_Private" 36654f21887SBarry Smith PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 3674c1ff481SSatish Balay { 368dfbe8321SBarry Smith PetscErrorCode ierr; 36975cae7c1SHong Zhang PetscInt i,j,k,bs2,bs=stash->bs,l; 37054f21887SBarry Smith const PetscScalar *vals; 37154f21887SBarry Smith PetscScalar *array; 37275cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 373a2d1c673SSatish Balay 374a2d1c673SSatish Balay PetscFunctionBegin; 37575cae7c1SHong Zhang if (!space || space->local_remaining < n) { 3768798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 377a2d1c673SSatish Balay } 37875cae7c1SHong Zhang space = stash->space; 37975cae7c1SHong Zhang l = space->local_used; 38075cae7c1SHong Zhang bs2 = bs*bs; 3814c1ff481SSatish Balay for (i=0; i<n; i++) { 38275cae7c1SHong Zhang space->idx[l] = row; 38375cae7c1SHong Zhang space->idy[l] = idxn[i]; 38475cae7c1SHong Zhang /* Now copy over the block of values. Store the values column oriented. 38575cae7c1SHong Zhang This enables inserting multiple blocks belonging to a row with a single 38675cae7c1SHong Zhang funtion call */ 38775cae7c1SHong Zhang array = space->val + bs2*l; 38875cae7c1SHong Zhang vals = values + idx*bs2*n + bs*i; 38975cae7c1SHong Zhang for (j=0; j<bs; j++) { 39075cae7c1SHong Zhang for (k=0; k<bs; k++) array[k*bs] = vals[k]; 39175cae7c1SHong Zhang array++; 39275cae7c1SHong Zhang vals += cmax*bs; 39375cae7c1SHong Zhang } 39475cae7c1SHong Zhang l++; 395a2d1c673SSatish Balay } 3965bd3b8fbSHong Zhang stash->n += n; 39775cae7c1SHong Zhang space->local_used += n; 39875cae7c1SHong Zhang space->local_remaining -= n; 3994c1ff481SSatish Balay PetscFunctionReturn(0); 4004c1ff481SSatish Balay } 4014c1ff481SSatish Balay 4024c1ff481SSatish Balay /* 4038798bf22SSatish Balay MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 4044c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 4054c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 4064c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 4074c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 4084c1ff481SSatish Balay 4094c1ff481SSatish Balay Input Parameters: 4104c1ff481SSatish Balay stash - the stash 4114c1ff481SSatish Balay row - the global block-row correspoiding to the values 4124c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 4134c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 4144c1ff481SSatish Balay values. Each block is of size bs*bs. 4154c1ff481SSatish Balay values - the values inserted 4164c1ff481SSatish Balay rmax - the number of block-rows in the original block. 4174c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 4184c1ff481SSatish Balay idx - the index of the current block-row in the original block. 4194c1ff481SSatish Balay */ 4204a2ae208SSatish Balay #undef __FUNCT__ 4214a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesColBlocked_Private" 42254f21887SBarry Smith PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 4234c1ff481SSatish Balay { 424dfbe8321SBarry Smith PetscErrorCode ierr; 42575cae7c1SHong Zhang PetscInt i,j,k,bs2,bs=stash->bs,l; 42654f21887SBarry Smith const PetscScalar *vals; 42754f21887SBarry Smith PetscScalar *array; 42875cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 4294c1ff481SSatish Balay 4304c1ff481SSatish Balay PetscFunctionBegin; 43175cae7c1SHong Zhang if (!space || space->local_remaining < n) { 4328798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 4334c1ff481SSatish Balay } 43475cae7c1SHong Zhang space = stash->space; 43575cae7c1SHong Zhang l = space->local_used; 43675cae7c1SHong Zhang bs2 = bs*bs; 4374c1ff481SSatish Balay for (i=0; i<n; i++) { 43875cae7c1SHong Zhang space->idx[l] = row; 43975cae7c1SHong Zhang space->idy[l] = idxn[i]; 44075cae7c1SHong Zhang /* Now copy over the block of values. Store the values column oriented. 44175cae7c1SHong Zhang This enables inserting multiple blocks belonging to a row with a single 44275cae7c1SHong Zhang funtion call */ 44375cae7c1SHong Zhang array = space->val + bs2*l; 44475cae7c1SHong Zhang vals = values + idx*bs2*n + bs*i; 44575cae7c1SHong Zhang for (j=0; j<bs; j++) { 4468865f1eaSKarl Rupp for (k=0; k<bs; k++) array[k] = vals[k]; 44775cae7c1SHong Zhang array += bs; 44875cae7c1SHong Zhang vals += rmax*bs; 44975cae7c1SHong Zhang } 4505bd3b8fbSHong Zhang l++; 451a2d1c673SSatish Balay } 4525bd3b8fbSHong Zhang stash->n += n; 45375cae7c1SHong Zhang space->local_used += n; 45475cae7c1SHong Zhang space->local_remaining -= n; 4553a40ed3dSBarry Smith PetscFunctionReturn(0); 4569417f4adSLois Curfman McInnes } 4574c1ff481SSatish Balay /* 4588798bf22SSatish Balay MatStashScatterBegin_Private - Initiates the transfer of values to the 4594c1ff481SSatish Balay correct owners. This function goes through the stash, and check the 4604c1ff481SSatish Balay owners of each stashed value, and sends the values off to the owner 4614c1ff481SSatish Balay processors. 462bc5ccf88SSatish Balay 4634c1ff481SSatish Balay Input Parameters: 4644c1ff481SSatish Balay stash - the stash 4654c1ff481SSatish Balay owners - an array of size 'no-of-procs' which gives the ownership range 4664c1ff481SSatish Balay for each node. 4674c1ff481SSatish Balay 4684c1ff481SSatish Balay Notes: The 'owners' array in the cased of the blocked-stash has the 4694c1ff481SSatish Balay ranges specified blocked global indices, and for the regular stash in 4704c1ff481SSatish Balay the proper global indices. 4714c1ff481SSatish Balay */ 4724a2ae208SSatish Balay #undef __FUNCT__ 4734a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterBegin_Private" 4741e2582c4SBarry Smith PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners) 475bc5ccf88SSatish Balay { 476ac2b2aa0SJed Brown PetscErrorCode ierr; 477ac2b2aa0SJed Brown 478ac2b2aa0SJed Brown PetscFunctionBegin; 479ac2b2aa0SJed Brown ierr = (*stash->ScatterBegin)(mat,stash,owners);CHKERRQ(ierr); 480ac2b2aa0SJed Brown PetscFunctionReturn(0); 481ac2b2aa0SJed Brown } 482ac2b2aa0SJed Brown 483ac2b2aa0SJed Brown #undef __FUNCT__ 484ac2b2aa0SJed Brown #define __FUNCT__ "MatStashScatterBegin_Ref" 485ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterBegin_Ref(Mat mat,MatStash *stash,PetscInt *owners) 486ac2b2aa0SJed Brown { 487c1ac3661SBarry Smith PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 488fe09c992SBarry Smith PetscInt size=stash->size,nsends; 4896849ba73SBarry Smith PetscErrorCode ierr; 49075cae7c1SHong Zhang PetscInt count,*sindices,**rindices,i,j,idx,lastidx,l; 49154f21887SBarry Smith PetscScalar **rvalues,*svalues; 492bc5ccf88SSatish Balay MPI_Comm comm = stash->comm; 493563fb871SSatish Balay MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 49476ec1555SBarry Smith PetscMPIInt *sizes,*nlengths,nreceives; 4955bd3b8fbSHong Zhang PetscInt *sp_idx,*sp_idy; 49654f21887SBarry Smith PetscScalar *sp_val; 4975bd3b8fbSHong Zhang PetscMatStashSpace space,space_next; 498bc5ccf88SSatish Balay 499bc5ccf88SSatish Balay PetscFunctionBegin; 5004c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 50175cae7c1SHong Zhang 502bc5ccf88SSatish Balay /* first count number of contributors to each processor */ 503037dbc42SBarry Smith ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr); 5041795a4d1SJed Brown ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr); 505037dbc42SBarry Smith ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr); 506a2d1c673SSatish Balay 50775cae7c1SHong Zhang i = j = 0; 5087357eb19SBarry Smith lastidx = -1; 5095bd3b8fbSHong Zhang space = stash->space_head; 5100298fd71SBarry Smith while (space != NULL) { 51175cae7c1SHong Zhang space_next = space->next; 5125bd3b8fbSHong Zhang sp_idx = space->idx; 51375cae7c1SHong Zhang for (l=0; l<space->local_used; l++) { 5147357eb19SBarry Smith /* if indices are NOT locally sorted, need to start search at the beginning */ 5155bd3b8fbSHong Zhang if (lastidx > (idx = sp_idx[l])) j = 0; 5167357eb19SBarry Smith lastidx = idx; 5177357eb19SBarry Smith for (; j<size; j++) { 5184c1ff481SSatish Balay if (idx >= owners[j] && idx < owners[j+1]) { 519563fb871SSatish Balay nlengths[j]++; owner[i] = j; break; 520bc5ccf88SSatish Balay } 521bc5ccf88SSatish Balay } 52275cae7c1SHong Zhang i++; 52375cae7c1SHong Zhang } 52475cae7c1SHong Zhang space = space_next; 525bc5ccf88SSatish Balay } 526563fb871SSatish Balay /* Now check what procs get messages - and compute nsends. */ 527563fb871SSatish Balay for (i=0, nsends=0; i<size; i++) { 5288865f1eaSKarl Rupp if (nlengths[i]) { 52976ec1555SBarry Smith sizes[i] = 1; nsends++; 5308865f1eaSKarl Rupp } 531563fb871SSatish Balay } 532bc5ccf88SSatish Balay 53354f21887SBarry Smith {PetscMPIInt *onodes,*olengths; 534563fb871SSatish Balay /* Determine the number of messages to expect, their lengths, from from-ids */ 53576ec1555SBarry Smith ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr); 536563fb871SSatish Balay ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr); 537563fb871SSatish Balay /* since clubbing row,col - lengths are multiplied by 2 */ 538563fb871SSatish Balay for (i=0; i<nreceives; i++) olengths[i] *=2; 539563fb871SSatish Balay ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr); 540563fb871SSatish Balay /* values are size 'bs2' lengths (and remove earlier factor 2 */ 541563fb871SSatish Balay for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 542563fb871SSatish Balay ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr); 543563fb871SSatish Balay ierr = PetscFree(onodes);CHKERRQ(ierr); 5448865f1eaSKarl Rupp ierr = PetscFree(olengths);CHKERRQ(ierr);} 545bc5ccf88SSatish Balay 546bc5ccf88SSatish Balay /* do sends: 547bc5ccf88SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 548bc5ccf88SSatish Balay the ith processor 549bc5ccf88SSatish Balay */ 550dcca6d9dSJed Brown ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr); 551785e854fSJed Brown ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr); 552dcca6d9dSJed Brown ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr); 553a2d1c673SSatish Balay /* use 2 sends the first with all_a, the next with all_i and all_j */ 554bc5ccf88SSatish Balay startv[0] = 0; starti[0] = 0; 555bc5ccf88SSatish Balay for (i=1; i<size; i++) { 556563fb871SSatish Balay startv[i] = startv[i-1] + nlengths[i-1]; 557533163c2SBarry Smith starti[i] = starti[i-1] + 2*nlengths[i-1]; 558bc5ccf88SSatish Balay } 55975cae7c1SHong Zhang 56075cae7c1SHong Zhang i = 0; 5615bd3b8fbSHong Zhang space = stash->space_head; 5620298fd71SBarry Smith while (space != NULL) { 56375cae7c1SHong Zhang space_next = space->next; 5645bd3b8fbSHong Zhang sp_idx = space->idx; 5655bd3b8fbSHong Zhang sp_idy = space->idy; 5665bd3b8fbSHong Zhang sp_val = space->val; 56775cae7c1SHong Zhang for (l=0; l<space->local_used; l++) { 568bc5ccf88SSatish Balay j = owner[i]; 569a2d1c673SSatish Balay if (bs2 == 1) { 5705bd3b8fbSHong Zhang svalues[startv[j]] = sp_val[l]; 571a2d1c673SSatish Balay } else { 572c1ac3661SBarry Smith PetscInt k; 57354f21887SBarry Smith PetscScalar *buf1,*buf2; 5744c1ff481SSatish Balay buf1 = svalues+bs2*startv[j]; 575b087b6d6SSatish Balay buf2 = space->val + bs2*l; 5768865f1eaSKarl Rupp for (k=0; k<bs2; k++) buf1[k] = buf2[k]; 577a2d1c673SSatish Balay } 5785bd3b8fbSHong Zhang sindices[starti[j]] = sp_idx[l]; 5795bd3b8fbSHong Zhang sindices[starti[j]+nlengths[j]] = sp_idy[l]; 580bc5ccf88SSatish Balay startv[j]++; 581bc5ccf88SSatish Balay starti[j]++; 58275cae7c1SHong Zhang i++; 58375cae7c1SHong Zhang } 58475cae7c1SHong Zhang space = space_next; 585bc5ccf88SSatish Balay } 586bc5ccf88SSatish Balay startv[0] = 0; 5878865f1eaSKarl Rupp for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1]; 588e5d0e772SSatish Balay 589bc5ccf88SSatish Balay for (i=0,count=0; i<size; i++) { 59076ec1555SBarry Smith if (sizes[i]) { 591563fb871SSatish Balay ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr); 592a77337e4SBarry Smith ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr); 593bc5ccf88SSatish Balay } 594b85c94c3SSatish Balay } 5956cf91177SBarry Smith #if defined(PETSC_USE_INFO) 59693157e10SBarry Smith ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr); 597e5d0e772SSatish Balay for (i=0; i<size; i++) { 59876ec1555SBarry Smith if (sizes[i]) { 59930c47e72SSatish Balay ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 600e5d0e772SSatish Balay } 601e5d0e772SSatish Balay } 602e5d0e772SSatish Balay #endif 603c05d87d6SBarry Smith ierr = PetscFree(nlengths);CHKERRQ(ierr); 604606d414cSSatish Balay ierr = PetscFree(owner);CHKERRQ(ierr); 605c05d87d6SBarry Smith ierr = PetscFree2(startv,starti);CHKERRQ(ierr); 60676ec1555SBarry Smith ierr = PetscFree(sizes);CHKERRQ(ierr); 607a2d1c673SSatish Balay 608563fb871SSatish Balay /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 609785e854fSJed Brown ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr); 610563fb871SSatish Balay 611563fb871SSatish Balay for (i=0; i<nreceives; i++) { 612563fb871SSatish Balay recv_waits[2*i] = recv_waits1[i]; 613563fb871SSatish Balay recv_waits[2*i+1] = recv_waits2[i]; 614563fb871SSatish Balay } 615563fb871SSatish Balay stash->recv_waits = recv_waits; 6168865f1eaSKarl Rupp 617563fb871SSatish Balay ierr = PetscFree(recv_waits1);CHKERRQ(ierr); 618563fb871SSatish Balay ierr = PetscFree(recv_waits2);CHKERRQ(ierr); 619563fb871SSatish Balay 620c05d87d6SBarry Smith stash->svalues = svalues; 621c05d87d6SBarry Smith stash->sindices = sindices; 622c05d87d6SBarry Smith stash->rvalues = rvalues; 623c05d87d6SBarry Smith stash->rindices = rindices; 624c05d87d6SBarry Smith stash->send_waits = send_waits; 625c05d87d6SBarry Smith stash->nsends = nsends; 626c05d87d6SBarry Smith stash->nrecvs = nreceives; 62767318a8aSJed Brown stash->reproduce_count = 0; 628bc5ccf88SSatish Balay PetscFunctionReturn(0); 629bc5ccf88SSatish Balay } 630bc5ccf88SSatish Balay 631a2d1c673SSatish Balay /* 6328798bf22SSatish Balay MatStashScatterGetMesg_Private - This function waits on the receives posted 6338798bf22SSatish Balay in the function MatStashScatterBegin_Private() and returns one message at 6344c1ff481SSatish Balay a time to the calling function. If no messages are left, it indicates this 6354c1ff481SSatish Balay by setting flg = 0, else it sets flg = 1. 6364c1ff481SSatish Balay 6374c1ff481SSatish Balay Input Parameters: 6384c1ff481SSatish Balay stash - the stash 6394c1ff481SSatish Balay 6404c1ff481SSatish Balay Output Parameters: 6414c1ff481SSatish Balay nvals - the number of entries in the current message. 6424c1ff481SSatish Balay rows - an array of row indices (or blocked indices) corresponding to the values 6434c1ff481SSatish Balay cols - an array of columnindices (or blocked indices) corresponding to the values 6444c1ff481SSatish Balay vals - the values 6454c1ff481SSatish Balay flg - 0 indicates no more message left, and the current call has no values associated. 6464c1ff481SSatish Balay 1 indicates that the current call successfully received a message, and the 6474c1ff481SSatish Balay other output parameters nvals,rows,cols,vals are set appropriately. 648a2d1c673SSatish Balay */ 6494a2ae208SSatish Balay #undef __FUNCT__ 6504a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterGetMesg_Private" 65154f21887SBarry Smith PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg) 652bc5ccf88SSatish Balay { 6536849ba73SBarry Smith PetscErrorCode ierr; 654ac2b2aa0SJed Brown 655ac2b2aa0SJed Brown PetscFunctionBegin; 656ac2b2aa0SJed Brown ierr = (*stash->ScatterGetMesg)(stash,nvals,rows,cols,vals,flg);CHKERRQ(ierr); 657ac2b2aa0SJed Brown PetscFunctionReturn(0); 658ac2b2aa0SJed Brown } 659ac2b2aa0SJed Brown 660ac2b2aa0SJed Brown #undef __FUNCT__ 661ac2b2aa0SJed Brown #define __FUNCT__ "MatStashScatterGetMesg_Ref" 662ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg) 663ac2b2aa0SJed Brown { 664ac2b2aa0SJed Brown PetscErrorCode ierr; 665533163c2SBarry Smith PetscMPIInt i,*flg_v = stash->flg_v,i1,i2; 666fe09c992SBarry Smith PetscInt bs2; 667a2d1c673SSatish Balay MPI_Status recv_status; 668ace3abfcSBarry Smith PetscBool match_found = PETSC_FALSE; 669bc5ccf88SSatish Balay 670bc5ccf88SSatish Balay PetscFunctionBegin; 671a2d1c673SSatish Balay *flg = 0; /* When a message is discovered this is reset to 1 */ 672a2d1c673SSatish Balay /* Return if no more messages to process */ 6738865f1eaSKarl Rupp if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(0); 674a2d1c673SSatish Balay 6754c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 67667318a8aSJed Brown /* If a matching pair of receives are found, process them, and return the data to 677a2d1c673SSatish Balay the calling function. Until then keep receiving messages */ 678a2d1c673SSatish Balay while (!match_found) { 67967318a8aSJed Brown if (stash->reproduce) { 68067318a8aSJed Brown i = stash->reproduce_count++; 68167318a8aSJed Brown ierr = MPI_Wait(stash->recv_waits+i,&recv_status);CHKERRQ(ierr); 68267318a8aSJed Brown } else { 683a2d1c673SSatish Balay ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr); 68467318a8aSJed Brown } 685e32f2f54SBarry Smith if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!"); 686533163c2SBarry Smith 68767318a8aSJed Brown /* Now pack the received message into a structure which is usable by others */ 688a2d1c673SSatish Balay if (i % 2) { 689a77337e4SBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr); 6908865f1eaSKarl Rupp 691c1dc657dSBarry Smith flg_v[2*recv_status.MPI_SOURCE] = i/2; 6928865f1eaSKarl Rupp 693a2d1c673SSatish Balay *nvals = *nvals/bs2; 694563fb871SSatish Balay } else { 695563fb871SSatish Balay ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr); 6968865f1eaSKarl Rupp 697563fb871SSatish Balay flg_v[2*recv_status.MPI_SOURCE+1] = i/2; 6988865f1eaSKarl Rupp 699563fb871SSatish Balay *nvals = *nvals/2; /* This message has both row indices and col indices */ 700bc5ccf88SSatish Balay } 701a2d1c673SSatish Balay 702cb2b73ccSBarry Smith /* Check if we have both messages from this proc */ 703c1dc657dSBarry Smith i1 = flg_v[2*recv_status.MPI_SOURCE]; 704c1dc657dSBarry Smith i2 = flg_v[2*recv_status.MPI_SOURCE+1]; 705a2d1c673SSatish Balay if (i1 != -1 && i2 != -1) { 706563fb871SSatish Balay *rows = stash->rindices[i2]; 707a2d1c673SSatish Balay *cols = *rows + *nvals; 708563fb871SSatish Balay *vals = stash->rvalues[i1]; 709a2d1c673SSatish Balay *flg = 1; 710a2d1c673SSatish Balay stash->nprocessed++; 71135d8aa7fSBarry Smith match_found = PETSC_TRUE; 712bc5ccf88SSatish Balay } 713bc5ccf88SSatish Balay } 714bc5ccf88SSatish Balay PetscFunctionReturn(0); 715bc5ccf88SSatish Balay } 716d7d60843SJed Brown 717d7d60843SJed Brown typedef struct { 718d7d60843SJed Brown PetscInt row; 719d7d60843SJed Brown PetscInt col; 720d7d60843SJed Brown PetscScalar vals[1]; /* Actually an array of length bs2 */ 721d7d60843SJed Brown } MatStashBlock; 722d7d60843SJed Brown 723d7d60843SJed Brown #undef __FUNCT__ 724d7d60843SJed Brown #define __FUNCT__ "MatStashSortCompress_Private" 725d7d60843SJed Brown static PetscErrorCode MatStashSortCompress_Private(MatStash *stash,InsertMode insertmode) 726d7d60843SJed Brown { 727d7d60843SJed Brown PetscErrorCode ierr; 728d7d60843SJed Brown PetscMatStashSpace space; 729d7d60843SJed Brown PetscInt n = stash->n,bs = stash->bs,bs2 = bs*bs,cnt,*row,*col,*perm,rowstart,i; 730d7d60843SJed Brown PetscScalar **valptr; 731d7d60843SJed Brown 732d7d60843SJed Brown PetscFunctionBegin; 733d7d60843SJed Brown ierr = PetscMalloc4(n,&row,n,&col,n,&valptr,n,&perm);CHKERRQ(ierr); 734d7d60843SJed Brown for (space=stash->space_head,cnt=0; space; space=space->next) { 735d7d60843SJed Brown for (i=0; i<space->local_used; i++) { 736d7d60843SJed Brown row[cnt] = space->idx[i]; 737d7d60843SJed Brown col[cnt] = space->idy[i]; 738d7d60843SJed Brown valptr[cnt] = &space->val[i*bs2]; 739d7d60843SJed Brown perm[cnt] = cnt; /* Will tell us where to find valptr after sorting row[] and col[] */ 740d7d60843SJed Brown cnt++; 741d7d60843SJed Brown } 742d7d60843SJed Brown } 743d7d60843SJed Brown if (cnt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatStash n %D, but counted %D entries",n,cnt); 744d7d60843SJed Brown ierr = PetscSortIntWithArrayPair(n,row,col,perm);CHKERRQ(ierr); 745d7d60843SJed Brown /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */ 746d7d60843SJed Brown for (rowstart=0,cnt=0,i=1; i<=n; i++) { 747d7d60843SJed Brown if (i == n || row[i] != row[rowstart]) { /* Sort the last row. */ 748d7d60843SJed Brown PetscInt colstart; 749d7d60843SJed Brown ierr = PetscSortIntWithArray(i-rowstart,&col[rowstart],&perm[rowstart]);CHKERRQ(ierr); 750d7d60843SJed Brown for (colstart=rowstart; colstart<i; ) { /* Compress multiple insertions to the same location */ 751d7d60843SJed Brown PetscInt j,l; 752d7d60843SJed Brown MatStashBlock *block; 753d7d60843SJed Brown ierr = PetscSegBufferGet(stash->segsendblocks,1,&block);CHKERRQ(ierr); 754d7d60843SJed Brown block->row = row[rowstart]; 755d7d60843SJed Brown block->col = col[colstart]; 756d7d60843SJed Brown ierr = PetscMemcpy(block->vals,valptr[perm[colstart]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr); 757d7d60843SJed Brown for (j=colstart+1; j<i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */ 758d7d60843SJed Brown if (insertmode == ADD_VALUES) { 759d7d60843SJed Brown for (l=0; l<bs2; l++) block->vals[l] += valptr[perm[j]][l]; 760d7d60843SJed Brown } else { 761d7d60843SJed Brown ierr = PetscMemcpy(block->vals,valptr[perm[j]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr); 762d7d60843SJed Brown } 763d7d60843SJed Brown } 764d7d60843SJed Brown colstart = j; 765d7d60843SJed Brown } 766d7d60843SJed Brown rowstart = i; 767d7d60843SJed Brown } 768d7d60843SJed Brown } 769d7d60843SJed Brown ierr = PetscFree4(row,col,valptr,perm);CHKERRQ(ierr); 770d7d60843SJed Brown PetscFunctionReturn(0); 771d7d60843SJed Brown } 772d7d60843SJed Brown 773d7d60843SJed Brown #undef __FUNCT__ 774d7d60843SJed Brown #define __FUNCT__ "MatStashBlockTypeSetUp" 775d7d60843SJed Brown static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash) 776d7d60843SJed Brown { 777d7d60843SJed Brown PetscErrorCode ierr; 778d7d60843SJed Brown 779d7d60843SJed Brown PetscFunctionBegin; 780d7d60843SJed Brown if (stash->blocktype == MPI_DATATYPE_NULL) { 781d7d60843SJed Brown PetscInt bs2 = PetscSqr(stash->bs); 782d7d60843SJed Brown PetscMPIInt blocklens[2]; 783d7d60843SJed Brown MPI_Aint displs[2]; 784d7d60843SJed Brown MPI_Datatype types[2],stype; 785d7d60843SJed Brown 786d7d60843SJed Brown stash->blocktype_size = offsetof(MatStashBlock,vals) + bs2*sizeof(PetscScalar); 787d7d60843SJed Brown if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */ 788d7d60843SJed Brown stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt); 789d7d60843SJed Brown } 790d7d60843SJed Brown ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segsendblocks);CHKERRQ(ierr); 791d7d60843SJed Brown ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segrecvblocks);CHKERRQ(ierr); 792d7d60843SJed Brown ierr = PetscSegBufferCreate(sizeof(MatStashFrame),1,&stash->segrecvframe);CHKERRQ(ierr); 793d7d60843SJed Brown blocklens[0] = 2; 794d7d60843SJed Brown blocklens[1] = bs2; 795d7d60843SJed Brown displs[0] = offsetof(MatStashBlock,row); 796d7d60843SJed Brown displs[1] = offsetof(MatStashBlock,vals); 797d7d60843SJed Brown types[0] = MPIU_INT; 798d7d60843SJed Brown types[1] = MPIU_SCALAR; 799d7d60843SJed Brown ierr = MPI_Type_create_struct(2,blocklens,displs,types,&stype);CHKERRQ(ierr); 800d7d60843SJed Brown ierr = MPI_Type_commit(&stype);CHKERRQ(ierr); 801d7d60843SJed Brown ierr = MPI_Type_create_resized(stype,0,stash->blocktype_size,&stash->blocktype);CHKERRQ(ierr); /* MPI-2 */ 802d7d60843SJed Brown ierr = MPI_Type_commit(&stash->blocktype);CHKERRQ(ierr); 803d7d60843SJed Brown ierr = MPI_Type_free(&stype);CHKERRQ(ierr); 804d7d60843SJed Brown } 805d7d60843SJed Brown PetscFunctionReturn(0); 806d7d60843SJed Brown } 807d7d60843SJed Brown 808d7d60843SJed Brown #undef __FUNCT__ 809d7d60843SJed Brown #define __FUNCT__ "MatStashBTSSend_Private" 810d7d60843SJed Brown /* Callback invoked after target rank has initiatied receive of rendezvous message. 811d7d60843SJed Brown * Here we post the main sends. 812d7d60843SJed Brown */ 813d7d60843SJed Brown static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rankid,PetscMPIInt rank,void *sdata,MPI_Request req[],void *ctx) 814d7d60843SJed Brown { 815d7d60843SJed Brown MatStash *stash = (MatStash*)ctx; 816d7d60843SJed Brown MatStashHeader *hdr = (MatStashHeader*)sdata; 817d7d60843SJed Brown PetscErrorCode ierr; 818d7d60843SJed Brown 819d7d60843SJed Brown PetscFunctionBegin; 820d7d60843SJed Brown if (rank != stash->sendranks[rankid]) SETERRQ3(comm,PETSC_ERR_PLIB,"BTS Send rank %d does not match sendranks[%d] %d",rank,rankid,stash->sendranks[rankid]); 821d7d60843SJed Brown ierr = MPI_Isend(stash->sendframes[rankid].buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr); 822d7d60843SJed Brown stash->sendframes[rankid].count = hdr->count; 823d7d60843SJed Brown stash->sendframes[rankid].pending = 1; 824d7d60843SJed Brown PetscFunctionReturn(0); 825d7d60843SJed Brown } 826d7d60843SJed Brown 827d7d60843SJed Brown #undef __FUNCT__ 828d7d60843SJed Brown #define __FUNCT__ "MatStashBTSRecv_Private" 829d7d60843SJed Brown /* Callback invoked by target after receiving rendezvous message. 830d7d60843SJed Brown * Here we post the main recvs. 831d7d60843SJed Brown */ 832d7d60843SJed Brown static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rank,void *rdata,MPI_Request req[],void *ctx) 833d7d60843SJed Brown { 834d7d60843SJed Brown MatStash *stash = (MatStash*)ctx; 835d7d60843SJed Brown MatStashHeader *hdr = (MatStashHeader*)rdata; 836d7d60843SJed Brown MatStashFrame *frame; 837d7d60843SJed Brown PetscErrorCode ierr; 838d7d60843SJed Brown 839d7d60843SJed Brown PetscFunctionBegin; 840d7d60843SJed Brown ierr = PetscSegBufferGet(stash->segrecvframe,1,&frame);CHKERRQ(ierr); 841d7d60843SJed Brown ierr = PetscSegBufferGet(stash->segrecvblocks,hdr->count,&frame->buffer);CHKERRQ(ierr); 842d7d60843SJed Brown ierr = MPI_Irecv(frame->buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr); 843d7d60843SJed Brown frame->count = hdr->count; 844d7d60843SJed Brown frame->pending = 1; 845d7d60843SJed Brown PetscFunctionReturn(0); 846d7d60843SJed Brown } 847d7d60843SJed Brown 848d7d60843SJed Brown #undef __FUNCT__ 849d7d60843SJed Brown #define __FUNCT__ "MatStashScatterBegin_BTS" 850d7d60843SJed Brown /* 851d7d60843SJed Brown * owners[] contains the ownership ranges; may be indexed by either blocks or scalars 852d7d60843SJed Brown */ 853d7d60843SJed Brown static PetscErrorCode MatStashScatterBegin_BTS(Mat mat,MatStash *stash,PetscInt owners[]) 854d7d60843SJed Brown { 855d7d60843SJed Brown PetscErrorCode ierr; 856d7d60843SJed Brown size_t nblocks; 857d7d60843SJed Brown char *sendblocks; 858d7d60843SJed Brown 859d7d60843SJed Brown PetscFunctionBegin; 86097da8949SJed Brown if (stash->subset_off_proc && !mat->subsetoffprocentries) { /* We won't use the old scatter context. */ 86197da8949SJed Brown ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr); 86297da8949SJed Brown } 86397da8949SJed Brown 864d7d60843SJed Brown ierr = MatStashBlockTypeSetUp(stash);CHKERRQ(ierr); 865d7d60843SJed Brown ierr = MatStashSortCompress_Private(stash,mat->insertmode);CHKERRQ(ierr); 866d7d60843SJed Brown ierr = PetscSegBufferGetSize(stash->segsendblocks,&nblocks);CHKERRQ(ierr); 867d7d60843SJed Brown ierr = PetscSegBufferExtractInPlace(stash->segsendblocks,&sendblocks);CHKERRQ(ierr); 86897da8949SJed Brown if (stash->subset_off_proc && mat->subsetoffprocentries) { /* Set up sendhdrs and sendframes for each rank that we sent before */ 86997da8949SJed Brown PetscInt i,b; 87097da8949SJed Brown for (i=0,b=0; i<stash->nsendranks; i++) { 87197da8949SJed Brown stash->sendframes[i].buffer = &sendblocks[b*stash->blocktype_size]; 87297da8949SJed Brown /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */ 87397da8949SJed Brown stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */ 87497da8949SJed Brown for ( ; b<nblocks; b++) { 87597da8949SJed Brown MatStashBlock *sendblock_b = (MatStashBlock*)&sendblocks[b*stash->blocktype_size]; 87697da8949SJed Brown if (PetscUnlikely(sendblock_b->row < owners[stash->sendranks[i]])) SETERRQ2(stash->comm,PETSC_ERR_ARG_WRONG,"MAT_SUBSET_OFF_PROC_ENTRIES set, but row %D owned by %d not communicated in initial assembly",sendblock_b->row,stash->sendranks[i]); 87797da8949SJed Brown if (sendblock_b->row >= owners[stash->sendranks[i]+1]) break; 87897da8949SJed Brown stash->sendhdr[i].count++; 87997da8949SJed Brown } 88097da8949SJed Brown } 88197da8949SJed Brown } else { /* Dynamically count and pack (first time) */ 882d7d60843SJed Brown PetscInt i,rowstart,sendno; 883d7d60843SJed Brown 884d7d60843SJed Brown /* Count number of send ranks and allocate for sends */ 885d7d60843SJed Brown stash->nsendranks = 0; 886d7d60843SJed Brown for (rowstart=0; rowstart<nblocks; ) { 887*7e2ea869SJed Brown PetscInt owner; 888d7d60843SJed Brown MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size]; 889d7d60843SJed Brown ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr); 890d7d60843SJed Brown if (owner < 0) owner = -(owner+2); 891d7d60843SJed Brown for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */ 892d7d60843SJed Brown MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size]; 893*7e2ea869SJed Brown if (sendblock_i->row >= owners[owner+1]) break; 894d7d60843SJed Brown } 895d7d60843SJed Brown stash->nsendranks++; 896d7d60843SJed Brown rowstart = i; 897d7d60843SJed Brown } 898d7d60843SJed Brown ierr = PetscMalloc3(stash->nsendranks,&stash->sendranks,stash->nsendranks,&stash->sendhdr,stash->nsendranks,&stash->sendframes);CHKERRQ(ierr); 899d7d60843SJed Brown 900d7d60843SJed Brown /* Set up sendhdrs and sendframes */ 901d7d60843SJed Brown sendno = 0; 902d7d60843SJed Brown for (rowstart=0; rowstart<nblocks; ) { 903d7d60843SJed Brown PetscInt owner; 904d7d60843SJed Brown MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size]; 905d7d60843SJed Brown ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr); 906d7d60843SJed Brown if (owner < 0) owner = -(owner+2); 907d7d60843SJed Brown stash->sendranks[sendno] = owner; 908d7d60843SJed Brown for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */ 909d7d60843SJed Brown MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size]; 910*7e2ea869SJed Brown if (sendblock_i->row >= owners[owner+1]) break; 911d7d60843SJed Brown } 912d7d60843SJed Brown stash->sendframes[sendno].buffer = sendblock_rowstart; 913d7d60843SJed Brown stash->sendframes[sendno].pending = 0; 914d7d60843SJed Brown stash->sendhdr[sendno].count = i - rowstart; 915d7d60843SJed Brown stash->sendhdr[sendno].insertmode = mat->insertmode; 916d7d60843SJed Brown sendno++; 917d7d60843SJed Brown rowstart = i; 918d7d60843SJed Brown } 919d7d60843SJed Brown if (sendno != stash->nsendranks) SETERRQ2(stash->comm,PETSC_ERR_PLIB,"BTS counted %D sendranks, but %D sends",stash->nsendranks,sendno); 920d7d60843SJed Brown } 921d7d60843SJed Brown 92297da8949SJed Brown if (stash->subset_off_proc && mat->subsetoffprocentries) { 92397da8949SJed Brown PetscMPIInt i,tag; 92497da8949SJed Brown ierr = PetscCommGetNewTag(stash->comm,&tag);CHKERRQ(ierr); 92597da8949SJed Brown for (i=0; i<stash->nrecvranks; i++) { 92697da8949SJed Brown ierr = MatStashBTSRecv_Private(stash->comm,&tag,stash->recvranks[i],&stash->recvhdr[i],&stash->recvreqs[i],stash);CHKERRQ(ierr); 92797da8949SJed Brown } 92897da8949SJed Brown for (i=0; i<stash->nsendranks; i++) { 92997da8949SJed Brown ierr = MatStashBTSSend_Private(stash->comm,&tag,i,stash->sendranks[i],&stash->sendhdr[i],&stash->sendreqs[i],stash);CHKERRQ(ierr); 93097da8949SJed Brown } 93197da8949SJed Brown stash->use_status = PETSC_TRUE; /* Use count from message status. */ 93297da8949SJed Brown } else { 933d7d60843SJed Brown ierr = PetscCommBuildTwoSidedFReq(stash->comm,2,MPIU_INT,stash->nsendranks,stash->sendranks,stash->sendhdr, 934d7d60843SJed Brown &stash->nrecvranks,&stash->recvranks,&stash->recvhdr,1,&stash->sendreqs,&stash->recvreqs, 935d7d60843SJed Brown MatStashBTSSend_Private,MatStashBTSRecv_Private,stash);CHKERRQ(ierr); 93697da8949SJed Brown stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */ 93797da8949SJed Brown } 938d7d60843SJed Brown 939d7d60843SJed Brown ierr = PetscMalloc2(stash->nrecvranks,&stash->some_indices,stash->nrecvranks,&stash->some_statuses);CHKERRQ(ierr); 940d7d60843SJed Brown ierr = PetscSegBufferExtractInPlace(stash->segrecvframe,&stash->recvframes);CHKERRQ(ierr); 941d7d60843SJed Brown stash->recvframe_active = NULL; 942d7d60843SJed Brown stash->recvframe_i = 0; 943d7d60843SJed Brown stash->some_i = 0; 944d7d60843SJed Brown stash->some_count = 0; 945d7d60843SJed Brown stash->recvcount = 0; 94697da8949SJed Brown stash->subset_off_proc = mat->subsetoffprocentries; 947d7d60843SJed Brown PetscFunctionReturn(0); 948d7d60843SJed Brown } 949d7d60843SJed Brown 950d7d60843SJed Brown #undef __FUNCT__ 951d7d60843SJed Brown #define __FUNCT__ "MatStashScatterGetMesg_BTS" 952d7d60843SJed Brown static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash,PetscMPIInt *n,PetscInt **row,PetscInt **col,PetscScalar **val,PetscInt *flg) 953d7d60843SJed Brown { 954d7d60843SJed Brown PetscErrorCode ierr; 955d7d60843SJed Brown MatStashBlock *block; 956d7d60843SJed Brown 957d7d60843SJed Brown PetscFunctionBegin; 958d7d60843SJed Brown *flg = 0; 959d7d60843SJed Brown while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) { 960d7d60843SJed Brown if (stash->some_i == stash->some_count) { 961d7d60843SJed Brown if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(0); /* Done */ 962d7d60843SJed Brown ierr = MPI_Waitsome(stash->nrecvranks,stash->recvreqs,&stash->some_count,stash->some_indices,stash->use_status?stash->some_statuses:MPI_STATUSES_IGNORE);CHKERRQ(ierr); 963d7d60843SJed Brown stash->some_i = 0; 964d7d60843SJed Brown } 965d7d60843SJed Brown stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]]; 966d7d60843SJed Brown stash->recvframe_count = stash->recvframe_active->count; /* From header; maximum count */ 967d7d60843SJed Brown if (stash->use_status) { /* Count what was actually sent */ 968d7d60843SJed Brown ierr = MPI_Get_count(&stash->some_statuses[stash->some_i],stash->blocktype,&stash->recvframe_count);CHKERRQ(ierr); 969d7d60843SJed Brown } 970d7d60843SJed Brown stash->some_i++; 971d7d60843SJed Brown stash->recvcount++; 972d7d60843SJed Brown stash->recvframe_i = 0; 973d7d60843SJed Brown } 974d7d60843SJed Brown *n = 1; 975d7d60843SJed Brown block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[stash->recvframe_i*stash->blocktype_size]; 976d7d60843SJed Brown *row = &block->row; 977d7d60843SJed Brown *col = &block->col; 978d7d60843SJed Brown *val = block->vals; 979d7d60843SJed Brown stash->recvframe_i++; 980d7d60843SJed Brown *flg = 1; 981d7d60843SJed Brown PetscFunctionReturn(0); 982d7d60843SJed Brown } 983d7d60843SJed Brown 984d7d60843SJed Brown #undef __FUNCT__ 985d7d60843SJed Brown #define __FUNCT__ "MatStashScatterEnd_BTS" 986d7d60843SJed Brown static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash) 987d7d60843SJed Brown { 988d7d60843SJed Brown PetscErrorCode ierr; 989d7d60843SJed Brown 990d7d60843SJed Brown PetscFunctionBegin; 991d7d60843SJed Brown ierr = MPI_Waitall(stash->nsendranks,stash->sendreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 99297da8949SJed Brown if (!stash->subset_off_proc) { /* Only collect the communication contexts if it won't be reused. */ 993d7d60843SJed Brown ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr); 99497da8949SJed Brown } 995d7d60843SJed Brown 996d7d60843SJed Brown /* Now update nmaxold to be app 10% more than max n used, this way the 997d7d60843SJed Brown wastage of space is reduced the next time this stash is used. 998d7d60843SJed Brown Also update the oldmax, only if it increases */ 999d7d60843SJed Brown if (stash->n) { 1000d7d60843SJed Brown PetscInt bs2 = stash->bs*stash->bs; 1001d7d60843SJed Brown PetscInt oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 1002d7d60843SJed Brown if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 1003d7d60843SJed Brown } 1004d7d60843SJed Brown 1005d7d60843SJed Brown stash->nmax = 0; 1006d7d60843SJed Brown stash->n = 0; 1007d7d60843SJed Brown stash->reallocs = -1; 1008d7d60843SJed Brown stash->nprocessed = 0; 1009d7d60843SJed Brown 1010d7d60843SJed Brown ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr); 1011d7d60843SJed Brown 1012d7d60843SJed Brown stash->space = 0; 1013d7d60843SJed Brown 1014d7d60843SJed Brown PetscFunctionReturn(0); 1015d7d60843SJed Brown } 1016d7d60843SJed Brown 1017d7d60843SJed Brown #undef __FUNCT__ 1018d7d60843SJed Brown #define __FUNCT__ "MatStashScatterDestroy_BTS" 1019d7d60843SJed Brown static PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash) 1020d7d60843SJed Brown { 1021d7d60843SJed Brown PetscErrorCode ierr; 1022d7d60843SJed Brown 1023d7d60843SJed Brown PetscFunctionBegin; 1024d7d60843SJed Brown ierr = PetscSegBufferDestroy(&stash->segsendblocks);CHKERRQ(ierr); 1025d7d60843SJed Brown ierr = PetscSegBufferDestroy(&stash->segrecvframe);CHKERRQ(ierr); 1026d7d60843SJed Brown stash->recvframes = NULL; 1027d7d60843SJed Brown ierr = PetscSegBufferDestroy(&stash->segrecvblocks);CHKERRQ(ierr); 1028d7d60843SJed Brown if (stash->blocktype != MPI_DATATYPE_NULL) { 1029d7d60843SJed Brown ierr = MPI_Type_free(&stash->blocktype);CHKERRQ(ierr); 1030d7d60843SJed Brown } 1031d7d60843SJed Brown stash->nsendranks = 0; 1032d7d60843SJed Brown stash->nrecvranks = 0; 1033d7d60843SJed Brown ierr = PetscFree3(stash->sendranks,stash->sendhdr,stash->sendframes);CHKERRQ(ierr); 1034d7d60843SJed Brown ierr = PetscFree(stash->sendreqs);CHKERRQ(ierr); 1035d7d60843SJed Brown ierr = PetscFree(stash->recvreqs);CHKERRQ(ierr); 1036d7d60843SJed Brown ierr = PetscFree(stash->recvranks);CHKERRQ(ierr); 1037d7d60843SJed Brown ierr = PetscFree(stash->recvhdr);CHKERRQ(ierr); 1038d7d60843SJed Brown ierr = PetscFree2(stash->some_indices,stash->some_statuses);CHKERRQ(ierr); 1039d7d60843SJed Brown PetscFunctionReturn(0); 1040d7d60843SJed Brown } 1041