12d5177cdSBarry Smith 2b45d2f2cSJed Brown #include <petsc-private/matimpl.h> 35bd3b8fbSHong Zhang 4bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE 10000 54c1ff481SSatish Balay 6ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterBegin_Ref(Mat,MatStash*,PetscInt*); 7ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*); 8ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterEnd_Ref(MatStash*); 9d7d60843SJed Brown static PetscErrorCode MatStashScatterBegin_BTS(Mat,MatStash*,PetscInt*); 10d7d60843SJed Brown static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*); 11d7d60843SJed Brown static PetscErrorCode MatStashScatterEnd_BTS(MatStash*); 12d7d60843SJed Brown static PetscErrorCode MatStashScatterDestroy_BTS(MatStash*); 13d7d60843SJed Brown 149417f4adSLois Curfman McInnes /* 158798bf22SSatish Balay MatStashCreate_Private - Creates a stash,currently used for all the parallel 164c1ff481SSatish Balay matrix implementations. The stash is where elements of a matrix destined 174c1ff481SSatish Balay to be stored on other processors are kept until matrix assembly is done. 189417f4adSLois Curfman McInnes 194c1ff481SSatish Balay This is a simple minded stash. Simply adds entries to end of stash. 204c1ff481SSatish Balay 214c1ff481SSatish Balay Input Parameters: 224c1ff481SSatish Balay comm - communicator, required for scatters. 234c1ff481SSatish Balay bs - stash block size. used when stashing blocks of values 244c1ff481SSatish Balay 254c1ff481SSatish Balay Output Parameters: 264c1ff481SSatish Balay stash - the newly created stash 279417f4adSLois Curfman McInnes */ 284a2ae208SSatish Balay #undef __FUNCT__ 294a2ae208SSatish Balay #define __FUNCT__ "MatStashCreate_Private" 30c1ac3661SBarry Smith PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash) 319417f4adSLois Curfman McInnes { 32dfbe8321SBarry Smith PetscErrorCode ierr; 33533163c2SBarry Smith PetscInt max,*opt,nopt,i; 34ace3abfcSBarry Smith PetscBool flg; 35bc5ccf88SSatish Balay 363a40ed3dSBarry Smith PetscFunctionBegin; 37bc5ccf88SSatish Balay /* Require 2 tags,get the second using PetscCommGetNewTag() */ 38752ec6e0SSatish Balay stash->comm = comm; 398865f1eaSKarl Rupp 40752ec6e0SSatish Balay ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr); 41a2d1c673SSatish Balay ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr); 42a2d1c673SSatish Balay ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr); 43a2d1c673SSatish Balay ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr); 44785e854fSJed Brown ierr = PetscMalloc1(2*stash->size,&stash->flg_v);CHKERRQ(ierr); 45533163c2SBarry Smith for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 46533163c2SBarry Smith 47bc5ccf88SSatish Balay 48434d7ff9SSatish Balay nopt = stash->size; 49785e854fSJed Brown ierr = PetscMalloc1(nopt,&opt);CHKERRQ(ierr); 500298fd71SBarry Smith ierr = PetscOptionsGetIntArray(NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr); 51434d7ff9SSatish Balay if (flg) { 52434d7ff9SSatish Balay if (nopt == 1) max = opt[0]; 53434d7ff9SSatish Balay else if (nopt == stash->size) max = opt[stash->rank]; 54434d7ff9SSatish Balay else if (stash->rank < nopt) max = opt[stash->rank]; 55f4ab19daSSatish Balay else max = 0; /* Use default */ 56434d7ff9SSatish Balay stash->umax = max; 57434d7ff9SSatish Balay } else { 58434d7ff9SSatish Balay stash->umax = 0; 59434d7ff9SSatish Balay } 60606d414cSSatish Balay ierr = PetscFree(opt);CHKERRQ(ierr); 614c1ff481SSatish Balay if (bs <= 0) bs = 1; 62a2d1c673SSatish Balay 634c1ff481SSatish Balay stash->bs = bs; 649417f4adSLois Curfman McInnes stash->nmax = 0; 65434d7ff9SSatish Balay stash->oldnmax = 0; 669417f4adSLois Curfman McInnes stash->n = 0; 674c1ff481SSatish Balay stash->reallocs = -1; 6875cae7c1SHong Zhang stash->space_head = 0; 6975cae7c1SHong Zhang stash->space = 0; 709417f4adSLois Curfman McInnes 71bc5ccf88SSatish Balay stash->send_waits = 0; 72bc5ccf88SSatish Balay stash->recv_waits = 0; 73a2d1c673SSatish Balay stash->send_status = 0; 74bc5ccf88SSatish Balay stash->nsends = 0; 75bc5ccf88SSatish Balay stash->nrecvs = 0; 76bc5ccf88SSatish Balay stash->svalues = 0; 77bc5ccf88SSatish Balay stash->rvalues = 0; 78563fb871SSatish Balay stash->rindices = 0; 79a2d1c673SSatish Balay stash->nprocessed = 0; 8067318a8aSJed Brown stash->reproduce = PETSC_FALSE; 81d7d60843SJed Brown stash->blocktype = MPI_DATATYPE_NULL; 828865f1eaSKarl Rupp 830298fd71SBarry Smith ierr = PetscOptionsGetBool(NULL,"-matstash_reproduce",&stash->reproduce,NULL);CHKERRQ(ierr); 84ac2b2aa0SJed Brown ierr = PetscOptionsGetBool(NULL,"-matstash_bts",&flg,NULL);CHKERRQ(ierr); 85ac2b2aa0SJed Brown if (flg) { 86d7d60843SJed Brown stash->ScatterBegin = MatStashScatterBegin_BTS; 87d7d60843SJed Brown stash->ScatterGetMesg = MatStashScatterGetMesg_BTS; 88d7d60843SJed Brown stash->ScatterEnd = MatStashScatterEnd_BTS; 89d7d60843SJed Brown stash->ScatterDestroy = MatStashScatterDestroy_BTS; 90ac2b2aa0SJed Brown } else { 91ac2b2aa0SJed Brown stash->ScatterBegin = MatStashScatterBegin_Ref; 92ac2b2aa0SJed Brown stash->ScatterGetMesg = MatStashScatterGetMesg_Ref; 93ac2b2aa0SJed Brown stash->ScatterEnd = MatStashScatterEnd_Ref; 94ac2b2aa0SJed Brown stash->ScatterDestroy = NULL; 95ac2b2aa0SJed Brown } 963a40ed3dSBarry Smith PetscFunctionReturn(0); 979417f4adSLois Curfman McInnes } 989417f4adSLois Curfman McInnes 994c1ff481SSatish Balay /* 1008798bf22SSatish Balay MatStashDestroy_Private - Destroy the stash 1014c1ff481SSatish Balay */ 1024a2ae208SSatish Balay #undef __FUNCT__ 1034a2ae208SSatish Balay #define __FUNCT__ "MatStashDestroy_Private" 104dfbe8321SBarry Smith PetscErrorCode MatStashDestroy_Private(MatStash *stash) 1059417f4adSLois Curfman McInnes { 106dfbe8321SBarry Smith PetscErrorCode ierr; 107a2d1c673SSatish Balay 108bc5ccf88SSatish Balay PetscFunctionBegin; 1096bf464f9SBarry Smith ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr); 110ac2b2aa0SJed Brown if (stash->ScatterDestroy) {ierr = (*stash->ScatterDestroy)(stash);CHKERRQ(ierr);} 1118865f1eaSKarl Rupp 11282740460SHong Zhang stash->space = 0; 1138865f1eaSKarl Rupp 114533163c2SBarry Smith ierr = PetscFree(stash->flg_v);CHKERRQ(ierr); 115bc5ccf88SSatish Balay PetscFunctionReturn(0); 116bc5ccf88SSatish Balay } 117bc5ccf88SSatish Balay 1184c1ff481SSatish Balay /* 11967318a8aSJed Brown MatStashScatterEnd_Private - This is called as the final stage of 1204c1ff481SSatish Balay scatter. The final stages of message passing is done here, and 12167318a8aSJed Brown all the memory used for message passing is cleaned up. This 1224c1ff481SSatish Balay routine also resets the stash, and deallocates the memory used 1234c1ff481SSatish Balay for the stash. It also keeps track of the current memory usage 1244c1ff481SSatish Balay so that the same value can be used the next time through. 1254c1ff481SSatish Balay */ 1264a2ae208SSatish Balay #undef __FUNCT__ 1274a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterEnd_Private" 128dfbe8321SBarry Smith PetscErrorCode MatStashScatterEnd_Private(MatStash *stash) 129bc5ccf88SSatish Balay { 1306849ba73SBarry Smith PetscErrorCode ierr; 131ac2b2aa0SJed Brown 132ac2b2aa0SJed Brown PetscFunctionBegin; 133ac2b2aa0SJed Brown ierr = (*stash->ScatterEnd)(stash);CHKERRQ(ierr); 134ac2b2aa0SJed Brown PetscFunctionReturn(0); 135ac2b2aa0SJed Brown } 136ac2b2aa0SJed Brown 137ac2b2aa0SJed Brown #undef __FUNCT__ 138ac2b2aa0SJed Brown #define __FUNCT__ "MatStashScatterEnd_Ref" 139ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash) 140ac2b2aa0SJed Brown { 141ac2b2aa0SJed Brown PetscErrorCode ierr; 142533163c2SBarry Smith PetscInt nsends=stash->nsends,bs2,oldnmax,i; 143a2d1c673SSatish Balay MPI_Status *send_status; 144a2d1c673SSatish Balay 1453a40ed3dSBarry Smith PetscFunctionBegin; 146533163c2SBarry Smith for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 147a2d1c673SSatish Balay /* wait on sends */ 148a2d1c673SSatish Balay if (nsends) { 149785e854fSJed Brown ierr = PetscMalloc1(2*nsends,&send_status);CHKERRQ(ierr); 150a2d1c673SSatish Balay ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr); 151606d414cSSatish Balay ierr = PetscFree(send_status);CHKERRQ(ierr); 152a2d1c673SSatish Balay } 153a2d1c673SSatish Balay 154c0c58ca7SSatish Balay /* Now update nmaxold to be app 10% more than max n used, this way the 155434d7ff9SSatish Balay wastage of space is reduced the next time this stash is used. 156434d7ff9SSatish Balay Also update the oldmax, only if it increases */ 157b9b97703SBarry Smith if (stash->n) { 15894b769a5SSatish Balay bs2 = stash->bs*stash->bs; 1598a9378f0SSatish Balay oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 160434d7ff9SSatish Balay if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 161b9b97703SBarry Smith } 162434d7ff9SSatish Balay 163d07ff455SSatish Balay stash->nmax = 0; 164d07ff455SSatish Balay stash->n = 0; 1654c1ff481SSatish Balay stash->reallocs = -1; 166a2d1c673SSatish Balay stash->nprocessed = 0; 1678865f1eaSKarl Rupp 1686bf464f9SBarry Smith ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr); 1698865f1eaSKarl Rupp 17082740460SHong Zhang stash->space = 0; 1718865f1eaSKarl Rupp 172606d414cSSatish Balay ierr = PetscFree(stash->send_waits);CHKERRQ(ierr); 173606d414cSSatish Balay ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr); 174c05d87d6SBarry Smith ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr); 175c05d87d6SBarry Smith ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr); 176606d414cSSatish Balay ierr = PetscFree(stash->rvalues);CHKERRQ(ierr); 177c05d87d6SBarry Smith ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr); 178563fb871SSatish Balay ierr = PetscFree(stash->rindices);CHKERRQ(ierr); 1793a40ed3dSBarry Smith PetscFunctionReturn(0); 1809417f4adSLois Curfman McInnes } 1819417f4adSLois Curfman McInnes 1824c1ff481SSatish Balay /* 1838798bf22SSatish Balay MatStashGetInfo_Private - Gets the relavant statistics of the stash 1844c1ff481SSatish Balay 1854c1ff481SSatish Balay Input Parameters: 1864c1ff481SSatish Balay stash - the stash 18794b769a5SSatish Balay nstash - the size of the stash. Indicates the number of values stored. 1884c1ff481SSatish Balay reallocs - the number of additional mallocs incurred. 1894c1ff481SSatish Balay 1904c1ff481SSatish Balay */ 1914a2ae208SSatish Balay #undef __FUNCT__ 1924a2ae208SSatish Balay #define __FUNCT__ "MatStashGetInfo_Private" 193c1ac3661SBarry Smith PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs) 19497530c3fSBarry Smith { 195c1ac3661SBarry Smith PetscInt bs2 = stash->bs*stash->bs; 19694b769a5SSatish Balay 1973a40ed3dSBarry Smith PetscFunctionBegin; 1981ecfd215SBarry Smith if (nstash) *nstash = stash->n*bs2; 1991ecfd215SBarry Smith if (reallocs) { 200434d7ff9SSatish Balay if (stash->reallocs < 0) *reallocs = 0; 201434d7ff9SSatish Balay else *reallocs = stash->reallocs; 2021ecfd215SBarry Smith } 203bc5ccf88SSatish Balay PetscFunctionReturn(0); 204bc5ccf88SSatish Balay } 2054c1ff481SSatish Balay 2064c1ff481SSatish Balay /* 2078798bf22SSatish Balay MatStashSetInitialSize_Private - Sets the initial size of the stash 2084c1ff481SSatish Balay 2094c1ff481SSatish Balay Input Parameters: 2104c1ff481SSatish Balay stash - the stash 2114c1ff481SSatish Balay max - the value that is used as the max size of the stash. 2124c1ff481SSatish Balay this value is used while allocating memory. 2134c1ff481SSatish Balay */ 2144a2ae208SSatish Balay #undef __FUNCT__ 2154a2ae208SSatish Balay #define __FUNCT__ "MatStashSetInitialSize_Private" 216c1ac3661SBarry Smith PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max) 217bc5ccf88SSatish Balay { 218bc5ccf88SSatish Balay PetscFunctionBegin; 219434d7ff9SSatish Balay stash->umax = max; 2203a40ed3dSBarry Smith PetscFunctionReturn(0); 22197530c3fSBarry Smith } 22297530c3fSBarry Smith 2238798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called 2244c1ff481SSatish Balay when the space in the stash is not sufficient to add the new values 2254c1ff481SSatish Balay being inserted into the stash. 2264c1ff481SSatish Balay 2274c1ff481SSatish Balay Input Parameters: 2284c1ff481SSatish Balay stash - the stash 2294c1ff481SSatish Balay incr - the minimum increase requested 2304c1ff481SSatish Balay 2314c1ff481SSatish Balay Notes: 2324c1ff481SSatish Balay This routine doubles the currently used memory. 2334c1ff481SSatish Balay */ 2344a2ae208SSatish Balay #undef __FUNCT__ 2354a2ae208SSatish Balay #define __FUNCT__ "MatStashExpand_Private" 236c1ac3661SBarry Smith static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr) 2379417f4adSLois Curfman McInnes { 2386849ba73SBarry Smith PetscErrorCode ierr; 2395bd3b8fbSHong Zhang PetscInt newnmax,bs2= stash->bs*stash->bs; 2409417f4adSLois Curfman McInnes 2413a40ed3dSBarry Smith PetscFunctionBegin; 2429417f4adSLois Curfman McInnes /* allocate a larger stash */ 243c481ceb5SSatish Balay if (!stash->oldnmax && !stash->nmax) { /* new stash */ 244434d7ff9SSatish Balay if (stash->umax) newnmax = stash->umax/bs2; 245434d7ff9SSatish Balay else newnmax = DEFAULT_STASH_SIZE/bs2; 246c481ceb5SSatish Balay } else if (!stash->nmax) { /* resuing stash */ 247434d7ff9SSatish Balay if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2; 248434d7ff9SSatish Balay else newnmax = stash->oldnmax/bs2; 249434d7ff9SSatish Balay } else newnmax = stash->nmax*2; 2504c1ff481SSatish Balay if (newnmax < (stash->nmax + incr)) newnmax += 2*incr; 251d07ff455SSatish Balay 25275cae7c1SHong Zhang /* Get a MatStashSpace and attach it to stash */ 25375cae7c1SHong Zhang ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr); 254b087b6d6SSatish Balay if (!stash->space_head) { /* new stash or resuing stash->oldnmax */ 255b087b6d6SSatish Balay stash->space_head = stash->space; 25675cae7c1SHong Zhang } 257b087b6d6SSatish Balay 258bc5ccf88SSatish Balay stash->reallocs++; 25975cae7c1SHong Zhang stash->nmax = newnmax; 260bc5ccf88SSatish Balay PetscFunctionReturn(0); 261bc5ccf88SSatish Balay } 262bc5ccf88SSatish Balay /* 2638798bf22SSatish Balay MatStashValuesRow_Private - inserts values into the stash. This function 2644c1ff481SSatish Balay expects the values to be roworiented. Multiple columns belong to the same row 2654c1ff481SSatish Balay can be inserted with a single call to this function. 2664c1ff481SSatish Balay 2674c1ff481SSatish Balay Input Parameters: 2684c1ff481SSatish Balay stash - the stash 2694c1ff481SSatish Balay row - the global row correspoiding to the values 2704c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2714c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2724c1ff481SSatish Balay values - the values inserted 273bc5ccf88SSatish Balay */ 2744a2ae208SSatish Balay #undef __FUNCT__ 2754a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRow_Private" 276ace3abfcSBarry Smith PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries) 277bc5ccf88SSatish Balay { 278dfbe8321SBarry Smith PetscErrorCode ierr; 279b400d20cSBarry Smith PetscInt i,k,cnt = 0; 28075cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 281bc5ccf88SSatish Balay 282bc5ccf88SSatish Balay PetscFunctionBegin; 2834c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 28475cae7c1SHong Zhang if (!space || space->local_remaining < n) { 2858798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 2869417f4adSLois Curfman McInnes } 28775cae7c1SHong Zhang space = stash->space; 28875cae7c1SHong Zhang k = space->local_used; 2894c1ff481SSatish Balay for (i=0; i<n; i++) { 29088c3974fSBarry Smith if (ignorezeroentries && (values[i] == 0.0)) continue; 29175cae7c1SHong Zhang space->idx[k] = row; 29275cae7c1SHong Zhang space->idy[k] = idxn[i]; 29375cae7c1SHong Zhang space->val[k] = values[i]; 29475cae7c1SHong Zhang k++; 295b400d20cSBarry Smith cnt++; 2969417f4adSLois Curfman McInnes } 297b400d20cSBarry Smith stash->n += cnt; 298b400d20cSBarry Smith space->local_used += cnt; 299b400d20cSBarry Smith space->local_remaining -= cnt; 300a2d1c673SSatish Balay PetscFunctionReturn(0); 301a2d1c673SSatish Balay } 30275cae7c1SHong Zhang 3034c1ff481SSatish Balay /* 3048798bf22SSatish Balay MatStashValuesCol_Private - inserts values into the stash. This function 3054c1ff481SSatish Balay expects the values to be columnoriented. Multiple columns belong to the same row 3064c1ff481SSatish Balay can be inserted with a single call to this function. 307a2d1c673SSatish Balay 3084c1ff481SSatish Balay Input Parameters: 3094c1ff481SSatish Balay stash - the stash 3104c1ff481SSatish Balay row - the global row correspoiding to the values 3114c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3124c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 3134c1ff481SSatish Balay values - the values inserted 3144c1ff481SSatish Balay stepval - the consecutive values are sepated by a distance of stepval. 3154c1ff481SSatish Balay this happens because the input is columnoriented. 3164c1ff481SSatish Balay */ 3174a2ae208SSatish Balay #undef __FUNCT__ 3184a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesCol_Private" 319ace3abfcSBarry Smith PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries) 320a2d1c673SSatish Balay { 321dfbe8321SBarry Smith PetscErrorCode ierr; 32250e9ab7cSBarry Smith PetscInt i,k,cnt = 0; 32375cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 324a2d1c673SSatish Balay 3254c1ff481SSatish Balay PetscFunctionBegin; 3264c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 32775cae7c1SHong Zhang if (!space || space->local_remaining < n) { 3288798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 3294c1ff481SSatish Balay } 33075cae7c1SHong Zhang space = stash->space; 33175cae7c1SHong Zhang k = space->local_used; 3324c1ff481SSatish Balay for (i=0; i<n; i++) { 33388c3974fSBarry Smith if (ignorezeroentries && (values[i*stepval] == 0.0)) continue; 33475cae7c1SHong Zhang space->idx[k] = row; 33575cae7c1SHong Zhang space->idy[k] = idxn[i]; 33675cae7c1SHong Zhang space->val[k] = values[i*stepval]; 33775cae7c1SHong Zhang k++; 338b400d20cSBarry Smith cnt++; 3394c1ff481SSatish Balay } 340b400d20cSBarry Smith stash->n += cnt; 341b400d20cSBarry Smith space->local_used += cnt; 342b400d20cSBarry Smith space->local_remaining -= cnt; 3434c1ff481SSatish Balay PetscFunctionReturn(0); 3444c1ff481SSatish Balay } 3454c1ff481SSatish Balay 3464c1ff481SSatish Balay /* 3478798bf22SSatish Balay MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 3484c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 3494c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3504c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3514c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3524c1ff481SSatish Balay 3534c1ff481SSatish Balay Input Parameters: 3544c1ff481SSatish Balay stash - the stash 3554c1ff481SSatish Balay row - the global block-row correspoiding to the values 3564c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3574c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3584c1ff481SSatish Balay values. Each block is of size bs*bs. 3594c1ff481SSatish Balay values - the values inserted 3604c1ff481SSatish Balay rmax - the number of block-rows in the original block. 3614c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 3624c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3634c1ff481SSatish Balay */ 3644a2ae208SSatish Balay #undef __FUNCT__ 3654a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRowBlocked_Private" 36654f21887SBarry Smith PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 3674c1ff481SSatish Balay { 368dfbe8321SBarry Smith PetscErrorCode ierr; 36975cae7c1SHong Zhang PetscInt i,j,k,bs2,bs=stash->bs,l; 37054f21887SBarry Smith const PetscScalar *vals; 37154f21887SBarry Smith PetscScalar *array; 37275cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 373a2d1c673SSatish Balay 374a2d1c673SSatish Balay PetscFunctionBegin; 37575cae7c1SHong Zhang if (!space || space->local_remaining < n) { 3768798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 377a2d1c673SSatish Balay } 37875cae7c1SHong Zhang space = stash->space; 37975cae7c1SHong Zhang l = space->local_used; 38075cae7c1SHong Zhang bs2 = bs*bs; 3814c1ff481SSatish Balay for (i=0; i<n; i++) { 38275cae7c1SHong Zhang space->idx[l] = row; 38375cae7c1SHong Zhang space->idy[l] = idxn[i]; 38475cae7c1SHong Zhang /* Now copy over the block of values. Store the values column oriented. 38575cae7c1SHong Zhang This enables inserting multiple blocks belonging to a row with a single 38675cae7c1SHong Zhang funtion call */ 38775cae7c1SHong Zhang array = space->val + bs2*l; 38875cae7c1SHong Zhang vals = values + idx*bs2*n + bs*i; 38975cae7c1SHong Zhang for (j=0; j<bs; j++) { 39075cae7c1SHong Zhang for (k=0; k<bs; k++) array[k*bs] = vals[k]; 39175cae7c1SHong Zhang array++; 39275cae7c1SHong Zhang vals += cmax*bs; 39375cae7c1SHong Zhang } 39475cae7c1SHong Zhang l++; 395a2d1c673SSatish Balay } 3965bd3b8fbSHong Zhang stash->n += n; 39775cae7c1SHong Zhang space->local_used += n; 39875cae7c1SHong Zhang space->local_remaining -= n; 3994c1ff481SSatish Balay PetscFunctionReturn(0); 4004c1ff481SSatish Balay } 4014c1ff481SSatish Balay 4024c1ff481SSatish Balay /* 4038798bf22SSatish Balay MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 4044c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 4054c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 4064c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 4074c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 4084c1ff481SSatish Balay 4094c1ff481SSatish Balay Input Parameters: 4104c1ff481SSatish Balay stash - the stash 4114c1ff481SSatish Balay row - the global block-row correspoiding to the values 4124c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 4134c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 4144c1ff481SSatish Balay values. Each block is of size bs*bs. 4154c1ff481SSatish Balay values - the values inserted 4164c1ff481SSatish Balay rmax - the number of block-rows in the original block. 4174c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 4184c1ff481SSatish Balay idx - the index of the current block-row in the original block. 4194c1ff481SSatish Balay */ 4204a2ae208SSatish Balay #undef __FUNCT__ 4214a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesColBlocked_Private" 42254f21887SBarry Smith PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 4234c1ff481SSatish Balay { 424dfbe8321SBarry Smith PetscErrorCode ierr; 42575cae7c1SHong Zhang PetscInt i,j,k,bs2,bs=stash->bs,l; 42654f21887SBarry Smith const PetscScalar *vals; 42754f21887SBarry Smith PetscScalar *array; 42875cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 4294c1ff481SSatish Balay 4304c1ff481SSatish Balay PetscFunctionBegin; 43175cae7c1SHong Zhang if (!space || space->local_remaining < n) { 4328798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 4334c1ff481SSatish Balay } 43475cae7c1SHong Zhang space = stash->space; 43575cae7c1SHong Zhang l = space->local_used; 43675cae7c1SHong Zhang bs2 = bs*bs; 4374c1ff481SSatish Balay for (i=0; i<n; i++) { 43875cae7c1SHong Zhang space->idx[l] = row; 43975cae7c1SHong Zhang space->idy[l] = idxn[i]; 44075cae7c1SHong Zhang /* Now copy over the block of values. Store the values column oriented. 44175cae7c1SHong Zhang This enables inserting multiple blocks belonging to a row with a single 44275cae7c1SHong Zhang funtion call */ 44375cae7c1SHong Zhang array = space->val + bs2*l; 44475cae7c1SHong Zhang vals = values + idx*bs2*n + bs*i; 44575cae7c1SHong Zhang for (j=0; j<bs; j++) { 4468865f1eaSKarl Rupp for (k=0; k<bs; k++) array[k] = vals[k]; 44775cae7c1SHong Zhang array += bs; 44875cae7c1SHong Zhang vals += rmax*bs; 44975cae7c1SHong Zhang } 4505bd3b8fbSHong Zhang l++; 451a2d1c673SSatish Balay } 4525bd3b8fbSHong Zhang stash->n += n; 45375cae7c1SHong Zhang space->local_used += n; 45475cae7c1SHong Zhang space->local_remaining -= n; 4553a40ed3dSBarry Smith PetscFunctionReturn(0); 4569417f4adSLois Curfman McInnes } 4574c1ff481SSatish Balay /* 4588798bf22SSatish Balay MatStashScatterBegin_Private - Initiates the transfer of values to the 4594c1ff481SSatish Balay correct owners. This function goes through the stash, and check the 4604c1ff481SSatish Balay owners of each stashed value, and sends the values off to the owner 4614c1ff481SSatish Balay processors. 462bc5ccf88SSatish Balay 4634c1ff481SSatish Balay Input Parameters: 4644c1ff481SSatish Balay stash - the stash 4654c1ff481SSatish Balay owners - an array of size 'no-of-procs' which gives the ownership range 4664c1ff481SSatish Balay for each node. 4674c1ff481SSatish Balay 4684c1ff481SSatish Balay Notes: The 'owners' array in the cased of the blocked-stash has the 4694c1ff481SSatish Balay ranges specified blocked global indices, and for the regular stash in 4704c1ff481SSatish Balay the proper global indices. 4714c1ff481SSatish Balay */ 4724a2ae208SSatish Balay #undef __FUNCT__ 4734a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterBegin_Private" 4741e2582c4SBarry Smith PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners) 475bc5ccf88SSatish Balay { 476ac2b2aa0SJed Brown PetscErrorCode ierr; 477ac2b2aa0SJed Brown 478ac2b2aa0SJed Brown PetscFunctionBegin; 479ac2b2aa0SJed Brown ierr = (*stash->ScatterBegin)(mat,stash,owners);CHKERRQ(ierr); 480ac2b2aa0SJed Brown PetscFunctionReturn(0); 481ac2b2aa0SJed Brown } 482ac2b2aa0SJed Brown 483ac2b2aa0SJed Brown #undef __FUNCT__ 484ac2b2aa0SJed Brown #define __FUNCT__ "MatStashScatterBegin_Ref" 485ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterBegin_Ref(Mat mat,MatStash *stash,PetscInt *owners) 486ac2b2aa0SJed Brown { 487c1ac3661SBarry Smith PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 488fe09c992SBarry Smith PetscInt size=stash->size,nsends; 4896849ba73SBarry Smith PetscErrorCode ierr; 49075cae7c1SHong Zhang PetscInt count,*sindices,**rindices,i,j,idx,lastidx,l; 49154f21887SBarry Smith PetscScalar **rvalues,*svalues; 492bc5ccf88SSatish Balay MPI_Comm comm = stash->comm; 493563fb871SSatish Balay MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 49476ec1555SBarry Smith PetscMPIInt *sizes,*nlengths,nreceives; 4955bd3b8fbSHong Zhang PetscInt *sp_idx,*sp_idy; 49654f21887SBarry Smith PetscScalar *sp_val; 4975bd3b8fbSHong Zhang PetscMatStashSpace space,space_next; 498bc5ccf88SSatish Balay 499bc5ccf88SSatish Balay PetscFunctionBegin; 500*4b4eb8d3SJed Brown { /* make sure all processors are either in INSERTMODE or ADDMODE */ 501*4b4eb8d3SJed Brown InsertMode addv; 502*4b4eb8d3SJed Brown ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 503*4b4eb8d3SJed Brown if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 504*4b4eb8d3SJed Brown mat->insertmode = addv; /* in case this processor had no cache */ 505*4b4eb8d3SJed Brown } 506*4b4eb8d3SJed Brown 5074c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 50875cae7c1SHong Zhang 509bc5ccf88SSatish Balay /* first count number of contributors to each processor */ 510037dbc42SBarry Smith ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr); 5111795a4d1SJed Brown ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr); 512037dbc42SBarry Smith ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr); 513a2d1c673SSatish Balay 51475cae7c1SHong Zhang i = j = 0; 5157357eb19SBarry Smith lastidx = -1; 5165bd3b8fbSHong Zhang space = stash->space_head; 5170298fd71SBarry Smith while (space != NULL) { 51875cae7c1SHong Zhang space_next = space->next; 5195bd3b8fbSHong Zhang sp_idx = space->idx; 52075cae7c1SHong Zhang for (l=0; l<space->local_used; l++) { 5217357eb19SBarry Smith /* if indices are NOT locally sorted, need to start search at the beginning */ 5225bd3b8fbSHong Zhang if (lastidx > (idx = sp_idx[l])) j = 0; 5237357eb19SBarry Smith lastidx = idx; 5247357eb19SBarry Smith for (; j<size; j++) { 5254c1ff481SSatish Balay if (idx >= owners[j] && idx < owners[j+1]) { 526563fb871SSatish Balay nlengths[j]++; owner[i] = j; break; 527bc5ccf88SSatish Balay } 528bc5ccf88SSatish Balay } 52975cae7c1SHong Zhang i++; 53075cae7c1SHong Zhang } 53175cae7c1SHong Zhang space = space_next; 532bc5ccf88SSatish Balay } 533563fb871SSatish Balay /* Now check what procs get messages - and compute nsends. */ 534563fb871SSatish Balay for (i=0, nsends=0; i<size; i++) { 5358865f1eaSKarl Rupp if (nlengths[i]) { 53676ec1555SBarry Smith sizes[i] = 1; nsends++; 5378865f1eaSKarl Rupp } 538563fb871SSatish Balay } 539bc5ccf88SSatish Balay 54054f21887SBarry Smith {PetscMPIInt *onodes,*olengths; 541563fb871SSatish Balay /* Determine the number of messages to expect, their lengths, from from-ids */ 54276ec1555SBarry Smith ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr); 543563fb871SSatish Balay ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr); 544563fb871SSatish Balay /* since clubbing row,col - lengths are multiplied by 2 */ 545563fb871SSatish Balay for (i=0; i<nreceives; i++) olengths[i] *=2; 546563fb871SSatish Balay ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr); 547563fb871SSatish Balay /* values are size 'bs2' lengths (and remove earlier factor 2 */ 548563fb871SSatish Balay for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 549563fb871SSatish Balay ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr); 550563fb871SSatish Balay ierr = PetscFree(onodes);CHKERRQ(ierr); 5518865f1eaSKarl Rupp ierr = PetscFree(olengths);CHKERRQ(ierr);} 552bc5ccf88SSatish Balay 553bc5ccf88SSatish Balay /* do sends: 554bc5ccf88SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 555bc5ccf88SSatish Balay the ith processor 556bc5ccf88SSatish Balay */ 557dcca6d9dSJed Brown ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr); 558785e854fSJed Brown ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr); 559dcca6d9dSJed Brown ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr); 560a2d1c673SSatish Balay /* use 2 sends the first with all_a, the next with all_i and all_j */ 561bc5ccf88SSatish Balay startv[0] = 0; starti[0] = 0; 562bc5ccf88SSatish Balay for (i=1; i<size; i++) { 563563fb871SSatish Balay startv[i] = startv[i-1] + nlengths[i-1]; 564533163c2SBarry Smith starti[i] = starti[i-1] + 2*nlengths[i-1]; 565bc5ccf88SSatish Balay } 56675cae7c1SHong Zhang 56775cae7c1SHong Zhang i = 0; 5685bd3b8fbSHong Zhang space = stash->space_head; 5690298fd71SBarry Smith while (space != NULL) { 57075cae7c1SHong Zhang space_next = space->next; 5715bd3b8fbSHong Zhang sp_idx = space->idx; 5725bd3b8fbSHong Zhang sp_idy = space->idy; 5735bd3b8fbSHong Zhang sp_val = space->val; 57475cae7c1SHong Zhang for (l=0; l<space->local_used; l++) { 575bc5ccf88SSatish Balay j = owner[i]; 576a2d1c673SSatish Balay if (bs2 == 1) { 5775bd3b8fbSHong Zhang svalues[startv[j]] = sp_val[l]; 578a2d1c673SSatish Balay } else { 579c1ac3661SBarry Smith PetscInt k; 58054f21887SBarry Smith PetscScalar *buf1,*buf2; 5814c1ff481SSatish Balay buf1 = svalues+bs2*startv[j]; 582b087b6d6SSatish Balay buf2 = space->val + bs2*l; 5838865f1eaSKarl Rupp for (k=0; k<bs2; k++) buf1[k] = buf2[k]; 584a2d1c673SSatish Balay } 5855bd3b8fbSHong Zhang sindices[starti[j]] = sp_idx[l]; 5865bd3b8fbSHong Zhang sindices[starti[j]+nlengths[j]] = sp_idy[l]; 587bc5ccf88SSatish Balay startv[j]++; 588bc5ccf88SSatish Balay starti[j]++; 58975cae7c1SHong Zhang i++; 59075cae7c1SHong Zhang } 59175cae7c1SHong Zhang space = space_next; 592bc5ccf88SSatish Balay } 593bc5ccf88SSatish Balay startv[0] = 0; 5948865f1eaSKarl Rupp for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1]; 595e5d0e772SSatish Balay 596bc5ccf88SSatish Balay for (i=0,count=0; i<size; i++) { 59776ec1555SBarry Smith if (sizes[i]) { 598563fb871SSatish Balay ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr); 599a77337e4SBarry Smith ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr); 600bc5ccf88SSatish Balay } 601b85c94c3SSatish Balay } 6026cf91177SBarry Smith #if defined(PETSC_USE_INFO) 60393157e10SBarry Smith ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr); 604e5d0e772SSatish Balay for (i=0; i<size; i++) { 60576ec1555SBarry Smith if (sizes[i]) { 60630c47e72SSatish Balay ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 607e5d0e772SSatish Balay } 608e5d0e772SSatish Balay } 609e5d0e772SSatish Balay #endif 610c05d87d6SBarry Smith ierr = PetscFree(nlengths);CHKERRQ(ierr); 611606d414cSSatish Balay ierr = PetscFree(owner);CHKERRQ(ierr); 612c05d87d6SBarry Smith ierr = PetscFree2(startv,starti);CHKERRQ(ierr); 61376ec1555SBarry Smith ierr = PetscFree(sizes);CHKERRQ(ierr); 614a2d1c673SSatish Balay 615563fb871SSatish Balay /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 616785e854fSJed Brown ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr); 617563fb871SSatish Balay 618563fb871SSatish Balay for (i=0; i<nreceives; i++) { 619563fb871SSatish Balay recv_waits[2*i] = recv_waits1[i]; 620563fb871SSatish Balay recv_waits[2*i+1] = recv_waits2[i]; 621563fb871SSatish Balay } 622563fb871SSatish Balay stash->recv_waits = recv_waits; 6238865f1eaSKarl Rupp 624563fb871SSatish Balay ierr = PetscFree(recv_waits1);CHKERRQ(ierr); 625563fb871SSatish Balay ierr = PetscFree(recv_waits2);CHKERRQ(ierr); 626563fb871SSatish Balay 627c05d87d6SBarry Smith stash->svalues = svalues; 628c05d87d6SBarry Smith stash->sindices = sindices; 629c05d87d6SBarry Smith stash->rvalues = rvalues; 630c05d87d6SBarry Smith stash->rindices = rindices; 631c05d87d6SBarry Smith stash->send_waits = send_waits; 632c05d87d6SBarry Smith stash->nsends = nsends; 633c05d87d6SBarry Smith stash->nrecvs = nreceives; 63467318a8aSJed Brown stash->reproduce_count = 0; 635bc5ccf88SSatish Balay PetscFunctionReturn(0); 636bc5ccf88SSatish Balay } 637bc5ccf88SSatish Balay 638a2d1c673SSatish Balay /* 6398798bf22SSatish Balay MatStashScatterGetMesg_Private - This function waits on the receives posted 6408798bf22SSatish Balay in the function MatStashScatterBegin_Private() and returns one message at 6414c1ff481SSatish Balay a time to the calling function. If no messages are left, it indicates this 6424c1ff481SSatish Balay by setting flg = 0, else it sets flg = 1. 6434c1ff481SSatish Balay 6444c1ff481SSatish Balay Input Parameters: 6454c1ff481SSatish Balay stash - the stash 6464c1ff481SSatish Balay 6474c1ff481SSatish Balay Output Parameters: 6484c1ff481SSatish Balay nvals - the number of entries in the current message. 6494c1ff481SSatish Balay rows - an array of row indices (or blocked indices) corresponding to the values 6504c1ff481SSatish Balay cols - an array of columnindices (or blocked indices) corresponding to the values 6514c1ff481SSatish Balay vals - the values 6524c1ff481SSatish Balay flg - 0 indicates no more message left, and the current call has no values associated. 6534c1ff481SSatish Balay 1 indicates that the current call successfully received a message, and the 6544c1ff481SSatish Balay other output parameters nvals,rows,cols,vals are set appropriately. 655a2d1c673SSatish Balay */ 6564a2ae208SSatish Balay #undef __FUNCT__ 6574a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterGetMesg_Private" 65854f21887SBarry Smith PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg) 659bc5ccf88SSatish Balay { 6606849ba73SBarry Smith PetscErrorCode ierr; 661ac2b2aa0SJed Brown 662ac2b2aa0SJed Brown PetscFunctionBegin; 663ac2b2aa0SJed Brown ierr = (*stash->ScatterGetMesg)(stash,nvals,rows,cols,vals,flg);CHKERRQ(ierr); 664ac2b2aa0SJed Brown PetscFunctionReturn(0); 665ac2b2aa0SJed Brown } 666ac2b2aa0SJed Brown 667ac2b2aa0SJed Brown #undef __FUNCT__ 668ac2b2aa0SJed Brown #define __FUNCT__ "MatStashScatterGetMesg_Ref" 669ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg) 670ac2b2aa0SJed Brown { 671ac2b2aa0SJed Brown PetscErrorCode ierr; 672533163c2SBarry Smith PetscMPIInt i,*flg_v = stash->flg_v,i1,i2; 673fe09c992SBarry Smith PetscInt bs2; 674a2d1c673SSatish Balay MPI_Status recv_status; 675ace3abfcSBarry Smith PetscBool match_found = PETSC_FALSE; 676bc5ccf88SSatish Balay 677bc5ccf88SSatish Balay PetscFunctionBegin; 678a2d1c673SSatish Balay *flg = 0; /* When a message is discovered this is reset to 1 */ 679a2d1c673SSatish Balay /* Return if no more messages to process */ 6808865f1eaSKarl Rupp if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(0); 681a2d1c673SSatish Balay 6824c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 68367318a8aSJed Brown /* If a matching pair of receives are found, process them, and return the data to 684a2d1c673SSatish Balay the calling function. Until then keep receiving messages */ 685a2d1c673SSatish Balay while (!match_found) { 68667318a8aSJed Brown if (stash->reproduce) { 68767318a8aSJed Brown i = stash->reproduce_count++; 68867318a8aSJed Brown ierr = MPI_Wait(stash->recv_waits+i,&recv_status);CHKERRQ(ierr); 68967318a8aSJed Brown } else { 690a2d1c673SSatish Balay ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr); 69167318a8aSJed Brown } 692e32f2f54SBarry Smith if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!"); 693533163c2SBarry Smith 69467318a8aSJed Brown /* Now pack the received message into a structure which is usable by others */ 695a2d1c673SSatish Balay if (i % 2) { 696a77337e4SBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr); 6978865f1eaSKarl Rupp 698c1dc657dSBarry Smith flg_v[2*recv_status.MPI_SOURCE] = i/2; 6998865f1eaSKarl Rupp 700a2d1c673SSatish Balay *nvals = *nvals/bs2; 701563fb871SSatish Balay } else { 702563fb871SSatish Balay ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr); 7038865f1eaSKarl Rupp 704563fb871SSatish Balay flg_v[2*recv_status.MPI_SOURCE+1] = i/2; 7058865f1eaSKarl Rupp 706563fb871SSatish Balay *nvals = *nvals/2; /* This message has both row indices and col indices */ 707bc5ccf88SSatish Balay } 708a2d1c673SSatish Balay 709cb2b73ccSBarry Smith /* Check if we have both messages from this proc */ 710c1dc657dSBarry Smith i1 = flg_v[2*recv_status.MPI_SOURCE]; 711c1dc657dSBarry Smith i2 = flg_v[2*recv_status.MPI_SOURCE+1]; 712a2d1c673SSatish Balay if (i1 != -1 && i2 != -1) { 713563fb871SSatish Balay *rows = stash->rindices[i2]; 714a2d1c673SSatish Balay *cols = *rows + *nvals; 715563fb871SSatish Balay *vals = stash->rvalues[i1]; 716a2d1c673SSatish Balay *flg = 1; 717a2d1c673SSatish Balay stash->nprocessed++; 71835d8aa7fSBarry Smith match_found = PETSC_TRUE; 719bc5ccf88SSatish Balay } 720bc5ccf88SSatish Balay } 721bc5ccf88SSatish Balay PetscFunctionReturn(0); 722bc5ccf88SSatish Balay } 723d7d60843SJed Brown 724d7d60843SJed Brown typedef struct { 725d7d60843SJed Brown PetscInt row; 726d7d60843SJed Brown PetscInt col; 727d7d60843SJed Brown PetscScalar vals[1]; /* Actually an array of length bs2 */ 728d7d60843SJed Brown } MatStashBlock; 729d7d60843SJed Brown 730d7d60843SJed Brown #undef __FUNCT__ 731d7d60843SJed Brown #define __FUNCT__ "MatStashSortCompress_Private" 732d7d60843SJed Brown static PetscErrorCode MatStashSortCompress_Private(MatStash *stash,InsertMode insertmode) 733d7d60843SJed Brown { 734d7d60843SJed Brown PetscErrorCode ierr; 735d7d60843SJed Brown PetscMatStashSpace space; 736d7d60843SJed Brown PetscInt n = stash->n,bs = stash->bs,bs2 = bs*bs,cnt,*row,*col,*perm,rowstart,i; 737d7d60843SJed Brown PetscScalar **valptr; 738d7d60843SJed Brown 739d7d60843SJed Brown PetscFunctionBegin; 740d7d60843SJed Brown ierr = PetscMalloc4(n,&row,n,&col,n,&valptr,n,&perm);CHKERRQ(ierr); 741d7d60843SJed Brown for (space=stash->space_head,cnt=0; space; space=space->next) { 742d7d60843SJed Brown for (i=0; i<space->local_used; i++) { 743d7d60843SJed Brown row[cnt] = space->idx[i]; 744d7d60843SJed Brown col[cnt] = space->idy[i]; 745d7d60843SJed Brown valptr[cnt] = &space->val[i*bs2]; 746d7d60843SJed Brown perm[cnt] = cnt; /* Will tell us where to find valptr after sorting row[] and col[] */ 747d7d60843SJed Brown cnt++; 748d7d60843SJed Brown } 749d7d60843SJed Brown } 750d7d60843SJed Brown if (cnt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatStash n %D, but counted %D entries",n,cnt); 751d7d60843SJed Brown ierr = PetscSortIntWithArrayPair(n,row,col,perm);CHKERRQ(ierr); 752d7d60843SJed Brown /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */ 753d7d60843SJed Brown for (rowstart=0,cnt=0,i=1; i<=n; i++) { 754d7d60843SJed Brown if (i == n || row[i] != row[rowstart]) { /* Sort the last row. */ 755d7d60843SJed Brown PetscInt colstart; 756d7d60843SJed Brown ierr = PetscSortIntWithArray(i-rowstart,&col[rowstart],&perm[rowstart]);CHKERRQ(ierr); 757d7d60843SJed Brown for (colstart=rowstart; colstart<i; ) { /* Compress multiple insertions to the same location */ 758d7d60843SJed Brown PetscInt j,l; 759d7d60843SJed Brown MatStashBlock *block; 760d7d60843SJed Brown ierr = PetscSegBufferGet(stash->segsendblocks,1,&block);CHKERRQ(ierr); 761d7d60843SJed Brown block->row = row[rowstart]; 762d7d60843SJed Brown block->col = col[colstart]; 763d7d60843SJed Brown ierr = PetscMemcpy(block->vals,valptr[perm[colstart]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr); 764d7d60843SJed Brown for (j=colstart+1; j<i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */ 765d7d60843SJed Brown if (insertmode == ADD_VALUES) { 766d7d60843SJed Brown for (l=0; l<bs2; l++) block->vals[l] += valptr[perm[j]][l]; 767d7d60843SJed Brown } else { 768d7d60843SJed Brown ierr = PetscMemcpy(block->vals,valptr[perm[j]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr); 769d7d60843SJed Brown } 770d7d60843SJed Brown } 771d7d60843SJed Brown colstart = j; 772d7d60843SJed Brown } 773d7d60843SJed Brown rowstart = i; 774d7d60843SJed Brown } 775d7d60843SJed Brown } 776d7d60843SJed Brown ierr = PetscFree4(row,col,valptr,perm);CHKERRQ(ierr); 777d7d60843SJed Brown PetscFunctionReturn(0); 778d7d60843SJed Brown } 779d7d60843SJed Brown 780d7d60843SJed Brown #undef __FUNCT__ 781d7d60843SJed Brown #define __FUNCT__ "MatStashBlockTypeSetUp" 782d7d60843SJed Brown static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash) 783d7d60843SJed Brown { 784d7d60843SJed Brown PetscErrorCode ierr; 785d7d60843SJed Brown 786d7d60843SJed Brown PetscFunctionBegin; 787d7d60843SJed Brown if (stash->blocktype == MPI_DATATYPE_NULL) { 788d7d60843SJed Brown PetscInt bs2 = PetscSqr(stash->bs); 789d7d60843SJed Brown PetscMPIInt blocklens[2]; 790d7d60843SJed Brown MPI_Aint displs[2]; 791d7d60843SJed Brown MPI_Datatype types[2],stype; 792d7d60843SJed Brown 793d7d60843SJed Brown stash->blocktype_size = offsetof(MatStashBlock,vals) + bs2*sizeof(PetscScalar); 794d7d60843SJed Brown if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */ 795d7d60843SJed Brown stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt); 796d7d60843SJed Brown } 797d7d60843SJed Brown ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segsendblocks);CHKERRQ(ierr); 798d7d60843SJed Brown ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segrecvblocks);CHKERRQ(ierr); 799d7d60843SJed Brown ierr = PetscSegBufferCreate(sizeof(MatStashFrame),1,&stash->segrecvframe);CHKERRQ(ierr); 800d7d60843SJed Brown blocklens[0] = 2; 801d7d60843SJed Brown blocklens[1] = bs2; 802d7d60843SJed Brown displs[0] = offsetof(MatStashBlock,row); 803d7d60843SJed Brown displs[1] = offsetof(MatStashBlock,vals); 804d7d60843SJed Brown types[0] = MPIU_INT; 805d7d60843SJed Brown types[1] = MPIU_SCALAR; 806d7d60843SJed Brown ierr = MPI_Type_create_struct(2,blocklens,displs,types,&stype);CHKERRQ(ierr); 807d7d60843SJed Brown ierr = MPI_Type_commit(&stype);CHKERRQ(ierr); 808d7d60843SJed Brown ierr = MPI_Type_create_resized(stype,0,stash->blocktype_size,&stash->blocktype);CHKERRQ(ierr); /* MPI-2 */ 809d7d60843SJed Brown ierr = MPI_Type_commit(&stash->blocktype);CHKERRQ(ierr); 810d7d60843SJed Brown ierr = MPI_Type_free(&stype);CHKERRQ(ierr); 811d7d60843SJed Brown } 812d7d60843SJed Brown PetscFunctionReturn(0); 813d7d60843SJed Brown } 814d7d60843SJed Brown 815d7d60843SJed Brown #undef __FUNCT__ 816d7d60843SJed Brown #define __FUNCT__ "MatStashBTSSend_Private" 817d7d60843SJed Brown /* Callback invoked after target rank has initiatied receive of rendezvous message. 818d7d60843SJed Brown * Here we post the main sends. 819d7d60843SJed Brown */ 820d7d60843SJed Brown static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rankid,PetscMPIInt rank,void *sdata,MPI_Request req[],void *ctx) 821d7d60843SJed Brown { 822d7d60843SJed Brown MatStash *stash = (MatStash*)ctx; 823d7d60843SJed Brown MatStashHeader *hdr = (MatStashHeader*)sdata; 824d7d60843SJed Brown PetscErrorCode ierr; 825d7d60843SJed Brown 826d7d60843SJed Brown PetscFunctionBegin; 827d7d60843SJed Brown if (rank != stash->sendranks[rankid]) SETERRQ3(comm,PETSC_ERR_PLIB,"BTS Send rank %d does not match sendranks[%d] %d",rank,rankid,stash->sendranks[rankid]); 828d7d60843SJed Brown ierr = MPI_Isend(stash->sendframes[rankid].buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr); 829d7d60843SJed Brown stash->sendframes[rankid].count = hdr->count; 830d7d60843SJed Brown stash->sendframes[rankid].pending = 1; 831d7d60843SJed Brown PetscFunctionReturn(0); 832d7d60843SJed Brown } 833d7d60843SJed Brown 834d7d60843SJed Brown #undef __FUNCT__ 835d7d60843SJed Brown #define __FUNCT__ "MatStashBTSRecv_Private" 836d7d60843SJed Brown /* Callback invoked by target after receiving rendezvous message. 837d7d60843SJed Brown * Here we post the main recvs. 838d7d60843SJed Brown */ 839d7d60843SJed Brown static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rank,void *rdata,MPI_Request req[],void *ctx) 840d7d60843SJed Brown { 841d7d60843SJed Brown MatStash *stash = (MatStash*)ctx; 842d7d60843SJed Brown MatStashHeader *hdr = (MatStashHeader*)rdata; 843d7d60843SJed Brown MatStashFrame *frame; 844d7d60843SJed Brown PetscErrorCode ierr; 845d7d60843SJed Brown 846d7d60843SJed Brown PetscFunctionBegin; 847d7d60843SJed Brown ierr = PetscSegBufferGet(stash->segrecvframe,1,&frame);CHKERRQ(ierr); 848d7d60843SJed Brown ierr = PetscSegBufferGet(stash->segrecvblocks,hdr->count,&frame->buffer);CHKERRQ(ierr); 849d7d60843SJed Brown ierr = MPI_Irecv(frame->buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr); 850d7d60843SJed Brown frame->count = hdr->count; 851d7d60843SJed Brown frame->pending = 1; 852d7d60843SJed Brown PetscFunctionReturn(0); 853d7d60843SJed Brown } 854d7d60843SJed Brown 855d7d60843SJed Brown #undef __FUNCT__ 856d7d60843SJed Brown #define __FUNCT__ "MatStashScatterBegin_BTS" 857d7d60843SJed Brown /* 858d7d60843SJed Brown * owners[] contains the ownership ranges; may be indexed by either blocks or scalars 859d7d60843SJed Brown */ 860d7d60843SJed Brown static PetscErrorCode MatStashScatterBegin_BTS(Mat mat,MatStash *stash,PetscInt owners[]) 861d7d60843SJed Brown { 862d7d60843SJed Brown PetscErrorCode ierr; 863d7d60843SJed Brown size_t nblocks; 864d7d60843SJed Brown char *sendblocks; 865d7d60843SJed Brown 866d7d60843SJed Brown PetscFunctionBegin; 867*4b4eb8d3SJed Brown #if defined(PETSC_USE_DEBUG) 868*4b4eb8d3SJed Brown { /* make sure all processors are either in INSERTMODE or ADDMODE */ 869*4b4eb8d3SJed Brown InsertMode addv; 870*4b4eb8d3SJed Brown ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 871*4b4eb8d3SJed Brown if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 872*4b4eb8d3SJed Brown } 873*4b4eb8d3SJed Brown #endif 874*4b4eb8d3SJed Brown 87597da8949SJed Brown if (stash->subset_off_proc && !mat->subsetoffprocentries) { /* We won't use the old scatter context. */ 87697da8949SJed Brown ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr); 87797da8949SJed Brown } 87897da8949SJed Brown 879d7d60843SJed Brown ierr = MatStashBlockTypeSetUp(stash);CHKERRQ(ierr); 880d7d60843SJed Brown ierr = MatStashSortCompress_Private(stash,mat->insertmode);CHKERRQ(ierr); 881d7d60843SJed Brown ierr = PetscSegBufferGetSize(stash->segsendblocks,&nblocks);CHKERRQ(ierr); 882d7d60843SJed Brown ierr = PetscSegBufferExtractInPlace(stash->segsendblocks,&sendblocks);CHKERRQ(ierr); 88397da8949SJed Brown if (stash->subset_off_proc && mat->subsetoffprocentries) { /* Set up sendhdrs and sendframes for each rank that we sent before */ 88497da8949SJed Brown PetscInt i,b; 88597da8949SJed Brown for (i=0,b=0; i<stash->nsendranks; i++) { 88697da8949SJed Brown stash->sendframes[i].buffer = &sendblocks[b*stash->blocktype_size]; 88797da8949SJed Brown /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */ 88897da8949SJed Brown stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */ 88997da8949SJed Brown for ( ; b<nblocks; b++) { 89097da8949SJed Brown MatStashBlock *sendblock_b = (MatStashBlock*)&sendblocks[b*stash->blocktype_size]; 89197da8949SJed Brown if (PetscUnlikely(sendblock_b->row < owners[stash->sendranks[i]])) SETERRQ2(stash->comm,PETSC_ERR_ARG_WRONG,"MAT_SUBSET_OFF_PROC_ENTRIES set, but row %D owned by %d not communicated in initial assembly",sendblock_b->row,stash->sendranks[i]); 89297da8949SJed Brown if (sendblock_b->row >= owners[stash->sendranks[i]+1]) break; 89397da8949SJed Brown stash->sendhdr[i].count++; 89497da8949SJed Brown } 89597da8949SJed Brown } 89697da8949SJed Brown } else { /* Dynamically count and pack (first time) */ 897d7d60843SJed Brown PetscInt i,rowstart,sendno; 898d7d60843SJed Brown 899d7d60843SJed Brown /* Count number of send ranks and allocate for sends */ 900d7d60843SJed Brown stash->nsendranks = 0; 901d7d60843SJed Brown for (rowstart=0; rowstart<nblocks; ) { 9027e2ea869SJed Brown PetscInt owner; 903d7d60843SJed Brown MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size]; 904d7d60843SJed Brown ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr); 905d7d60843SJed Brown if (owner < 0) owner = -(owner+2); 906d7d60843SJed Brown for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */ 907d7d60843SJed Brown MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size]; 9087e2ea869SJed Brown if (sendblock_i->row >= owners[owner+1]) break; 909d7d60843SJed Brown } 910d7d60843SJed Brown stash->nsendranks++; 911d7d60843SJed Brown rowstart = i; 912d7d60843SJed Brown } 913d7d60843SJed Brown ierr = PetscMalloc3(stash->nsendranks,&stash->sendranks,stash->nsendranks,&stash->sendhdr,stash->nsendranks,&stash->sendframes);CHKERRQ(ierr); 914d7d60843SJed Brown 915d7d60843SJed Brown /* Set up sendhdrs and sendframes */ 916d7d60843SJed Brown sendno = 0; 917d7d60843SJed Brown for (rowstart=0; rowstart<nblocks; ) { 918d7d60843SJed Brown PetscInt owner; 919d7d60843SJed Brown MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size]; 920d7d60843SJed Brown ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr); 921d7d60843SJed Brown if (owner < 0) owner = -(owner+2); 922d7d60843SJed Brown stash->sendranks[sendno] = owner; 923d7d60843SJed Brown for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */ 924d7d60843SJed Brown MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size]; 9257e2ea869SJed Brown if (sendblock_i->row >= owners[owner+1]) break; 926d7d60843SJed Brown } 927d7d60843SJed Brown stash->sendframes[sendno].buffer = sendblock_rowstart; 928d7d60843SJed Brown stash->sendframes[sendno].pending = 0; 929d7d60843SJed Brown stash->sendhdr[sendno].count = i - rowstart; 930d7d60843SJed Brown sendno++; 931d7d60843SJed Brown rowstart = i; 932d7d60843SJed Brown } 933d7d60843SJed Brown if (sendno != stash->nsendranks) SETERRQ2(stash->comm,PETSC_ERR_PLIB,"BTS counted %D sendranks, but %D sends",stash->nsendranks,sendno); 934d7d60843SJed Brown } 935d7d60843SJed Brown 936*4b4eb8d3SJed Brown /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new 937*4b4eb8d3SJed Brown * message or a dummy entry of some sort. */ 938*4b4eb8d3SJed Brown if (mat->insertmode == INSERT_VALUES) { 939*4b4eb8d3SJed Brown PetscInt i; 940*4b4eb8d3SJed Brown for (i=0; i<nblocks; i++) { 941*4b4eb8d3SJed Brown MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size]; 942*4b4eb8d3SJed Brown sendblock_i->row = -(sendblock_i->row+1); 943*4b4eb8d3SJed Brown } 944*4b4eb8d3SJed Brown } 945*4b4eb8d3SJed Brown 94697da8949SJed Brown if (stash->subset_off_proc && mat->subsetoffprocentries) { 94797da8949SJed Brown PetscMPIInt i,tag; 94897da8949SJed Brown ierr = PetscCommGetNewTag(stash->comm,&tag);CHKERRQ(ierr); 94997da8949SJed Brown for (i=0; i<stash->nrecvranks; i++) { 95097da8949SJed Brown ierr = MatStashBTSRecv_Private(stash->comm,&tag,stash->recvranks[i],&stash->recvhdr[i],&stash->recvreqs[i],stash);CHKERRQ(ierr); 95197da8949SJed Brown } 95297da8949SJed Brown for (i=0; i<stash->nsendranks; i++) { 95397da8949SJed Brown ierr = MatStashBTSSend_Private(stash->comm,&tag,i,stash->sendranks[i],&stash->sendhdr[i],&stash->sendreqs[i],stash);CHKERRQ(ierr); 95497da8949SJed Brown } 95597da8949SJed Brown stash->use_status = PETSC_TRUE; /* Use count from message status. */ 95697da8949SJed Brown } else { 957*4b4eb8d3SJed Brown ierr = PetscCommBuildTwoSidedFReq(stash->comm,1,MPIU_INT,stash->nsendranks,stash->sendranks,stash->sendhdr, 958d7d60843SJed Brown &stash->nrecvranks,&stash->recvranks,&stash->recvhdr,1,&stash->sendreqs,&stash->recvreqs, 959d7d60843SJed Brown MatStashBTSSend_Private,MatStashBTSRecv_Private,stash);CHKERRQ(ierr); 96097da8949SJed Brown stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */ 96197da8949SJed Brown } 962d7d60843SJed Brown 963d7d60843SJed Brown ierr = PetscMalloc2(stash->nrecvranks,&stash->some_indices,stash->nrecvranks,&stash->some_statuses);CHKERRQ(ierr); 964d7d60843SJed Brown ierr = PetscSegBufferExtractInPlace(stash->segrecvframe,&stash->recvframes);CHKERRQ(ierr); 965d7d60843SJed Brown stash->recvframe_active = NULL; 966d7d60843SJed Brown stash->recvframe_i = 0; 967d7d60843SJed Brown stash->some_i = 0; 968d7d60843SJed Brown stash->some_count = 0; 969d7d60843SJed Brown stash->recvcount = 0; 97097da8949SJed Brown stash->subset_off_proc = mat->subsetoffprocentries; 971*4b4eb8d3SJed Brown stash->insertmode = &mat->insertmode; 972d7d60843SJed Brown PetscFunctionReturn(0); 973d7d60843SJed Brown } 974d7d60843SJed Brown 975d7d60843SJed Brown #undef __FUNCT__ 976d7d60843SJed Brown #define __FUNCT__ "MatStashScatterGetMesg_BTS" 977d7d60843SJed Brown static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash,PetscMPIInt *n,PetscInt **row,PetscInt **col,PetscScalar **val,PetscInt *flg) 978d7d60843SJed Brown { 979d7d60843SJed Brown PetscErrorCode ierr; 980d7d60843SJed Brown MatStashBlock *block; 981d7d60843SJed Brown 982d7d60843SJed Brown PetscFunctionBegin; 983d7d60843SJed Brown *flg = 0; 984d7d60843SJed Brown while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) { 985d7d60843SJed Brown if (stash->some_i == stash->some_count) { 986d7d60843SJed Brown if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(0); /* Done */ 987d7d60843SJed Brown ierr = MPI_Waitsome(stash->nrecvranks,stash->recvreqs,&stash->some_count,stash->some_indices,stash->use_status?stash->some_statuses:MPI_STATUSES_IGNORE);CHKERRQ(ierr); 988d7d60843SJed Brown stash->some_i = 0; 989d7d60843SJed Brown } 990d7d60843SJed Brown stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]]; 991d7d60843SJed Brown stash->recvframe_count = stash->recvframe_active->count; /* From header; maximum count */ 992d7d60843SJed Brown if (stash->use_status) { /* Count what was actually sent */ 993d7d60843SJed Brown ierr = MPI_Get_count(&stash->some_statuses[stash->some_i],stash->blocktype,&stash->recvframe_count);CHKERRQ(ierr); 994d7d60843SJed Brown } 995*4b4eb8d3SJed Brown if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */ 996*4b4eb8d3SJed Brown block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[0]; 997*4b4eb8d3SJed Brown if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES; 998*4b4eb8d3SJed Brown if (PetscUnlikely(*stash->insertmode == INSERT_VALUES && block->row >= 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling INSERT_VALUES, but rank %d requested ADD_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]); 999*4b4eb8d3SJed Brown if (PetscUnlikely(*stash->insertmode == ADD_VALUES && block->row < 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling ADD_VALUES, but rank %d requested INSERT_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]); 1000*4b4eb8d3SJed Brown } 1001d7d60843SJed Brown stash->some_i++; 1002d7d60843SJed Brown stash->recvcount++; 1003d7d60843SJed Brown stash->recvframe_i = 0; 1004d7d60843SJed Brown } 1005d7d60843SJed Brown *n = 1; 1006d7d60843SJed Brown block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[stash->recvframe_i*stash->blocktype_size]; 1007*4b4eb8d3SJed Brown if (block->row < 0) block->row = -(block->row + 1); 1008d7d60843SJed Brown *row = &block->row; 1009d7d60843SJed Brown *col = &block->col; 1010d7d60843SJed Brown *val = block->vals; 1011d7d60843SJed Brown stash->recvframe_i++; 1012d7d60843SJed Brown *flg = 1; 1013d7d60843SJed Brown PetscFunctionReturn(0); 1014d7d60843SJed Brown } 1015d7d60843SJed Brown 1016d7d60843SJed Brown #undef __FUNCT__ 1017d7d60843SJed Brown #define __FUNCT__ "MatStashScatterEnd_BTS" 1018d7d60843SJed Brown static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash) 1019d7d60843SJed Brown { 1020d7d60843SJed Brown PetscErrorCode ierr; 1021d7d60843SJed Brown 1022d7d60843SJed Brown PetscFunctionBegin; 1023d7d60843SJed Brown ierr = MPI_Waitall(stash->nsendranks,stash->sendreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 10243575f486SJed Brown if (stash->subset_off_proc) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks */ 10253575f486SJed Brown void *dummy; 10263575f486SJed Brown ierr = PetscSegBufferExtractInPlace(stash->segrecvblocks,&dummy);CHKERRQ(ierr); 10273575f486SJed Brown } else { /* No reuse, so collect everything. */ 1028d7d60843SJed Brown ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr); 102997da8949SJed Brown } 1030d7d60843SJed Brown 1031d7d60843SJed Brown /* Now update nmaxold to be app 10% more than max n used, this way the 1032d7d60843SJed Brown wastage of space is reduced the next time this stash is used. 1033d7d60843SJed Brown Also update the oldmax, only if it increases */ 1034d7d60843SJed Brown if (stash->n) { 1035d7d60843SJed Brown PetscInt bs2 = stash->bs*stash->bs; 1036d7d60843SJed Brown PetscInt oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 1037d7d60843SJed Brown if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 1038d7d60843SJed Brown } 1039d7d60843SJed Brown 1040d7d60843SJed Brown stash->nmax = 0; 1041d7d60843SJed Brown stash->n = 0; 1042d7d60843SJed Brown stash->reallocs = -1; 1043d7d60843SJed Brown stash->nprocessed = 0; 1044d7d60843SJed Brown 1045d7d60843SJed Brown ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr); 1046d7d60843SJed Brown 1047d7d60843SJed Brown stash->space = 0; 1048d7d60843SJed Brown 1049d7d60843SJed Brown PetscFunctionReturn(0); 1050d7d60843SJed Brown } 1051d7d60843SJed Brown 1052d7d60843SJed Brown #undef __FUNCT__ 1053d7d60843SJed Brown #define __FUNCT__ "MatStashScatterDestroy_BTS" 1054d7d60843SJed Brown static PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash) 1055d7d60843SJed Brown { 1056d7d60843SJed Brown PetscErrorCode ierr; 1057d7d60843SJed Brown 1058d7d60843SJed Brown PetscFunctionBegin; 1059d7d60843SJed Brown ierr = PetscSegBufferDestroy(&stash->segsendblocks);CHKERRQ(ierr); 1060d7d60843SJed Brown ierr = PetscSegBufferDestroy(&stash->segrecvframe);CHKERRQ(ierr); 1061d7d60843SJed Brown stash->recvframes = NULL; 1062d7d60843SJed Brown ierr = PetscSegBufferDestroy(&stash->segrecvblocks);CHKERRQ(ierr); 1063d7d60843SJed Brown if (stash->blocktype != MPI_DATATYPE_NULL) { 1064d7d60843SJed Brown ierr = MPI_Type_free(&stash->blocktype);CHKERRQ(ierr); 1065d7d60843SJed Brown } 1066d7d60843SJed Brown stash->nsendranks = 0; 1067d7d60843SJed Brown stash->nrecvranks = 0; 1068d7d60843SJed Brown ierr = PetscFree3(stash->sendranks,stash->sendhdr,stash->sendframes);CHKERRQ(ierr); 1069d7d60843SJed Brown ierr = PetscFree(stash->sendreqs);CHKERRQ(ierr); 1070d7d60843SJed Brown ierr = PetscFree(stash->recvreqs);CHKERRQ(ierr); 1071d7d60843SJed Brown ierr = PetscFree(stash->recvranks);CHKERRQ(ierr); 1072d7d60843SJed Brown ierr = PetscFree(stash->recvhdr);CHKERRQ(ierr); 1073d7d60843SJed Brown ierr = PetscFree2(stash->some_indices,stash->some_statuses);CHKERRQ(ierr); 1074d7d60843SJed Brown PetscFunctionReturn(0); 1075d7d60843SJed Brown } 1076