xref: /petsc/src/mat/utils/matstash.c (revision 23b7d1baef04d53868c2a66b4a50a76e84c309e7)
12d5177cdSBarry Smith 
2b45d2f2cSJed Brown #include <petsc-private/matimpl.h>
35bd3b8fbSHong Zhang 
4bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE   10000
54c1ff481SSatish Balay 
6ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterBegin_Ref(Mat,MatStash*,PetscInt*);
7ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
8ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
9d7d60843SJed Brown static PetscErrorCode MatStashScatterBegin_BTS(Mat,MatStash*,PetscInt*);
10d7d60843SJed Brown static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
11d7d60843SJed Brown static PetscErrorCode MatStashScatterEnd_BTS(MatStash*);
12d7d60843SJed Brown static PetscErrorCode MatStashScatterDestroy_BTS(MatStash*);
13d7d60843SJed Brown 
149417f4adSLois Curfman McInnes /*
158798bf22SSatish Balay   MatStashCreate_Private - Creates a stash,currently used for all the parallel
164c1ff481SSatish Balay   matrix implementations. The stash is where elements of a matrix destined
174c1ff481SSatish Balay   to be stored on other processors are kept until matrix assembly is done.
189417f4adSLois Curfman McInnes 
194c1ff481SSatish Balay   This is a simple minded stash. Simply adds entries to end of stash.
204c1ff481SSatish Balay 
214c1ff481SSatish Balay   Input Parameters:
224c1ff481SSatish Balay   comm - communicator, required for scatters.
234c1ff481SSatish Balay   bs   - stash block size. used when stashing blocks of values
244c1ff481SSatish Balay 
254c1ff481SSatish Balay   Output Parameters:
264c1ff481SSatish Balay   stash    - the newly created stash
279417f4adSLois Curfman McInnes */
284a2ae208SSatish Balay #undef __FUNCT__
294a2ae208SSatish Balay #define __FUNCT__ "MatStashCreate_Private"
30c1ac3661SBarry Smith PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
319417f4adSLois Curfman McInnes {
32dfbe8321SBarry Smith   PetscErrorCode ierr;
33533163c2SBarry Smith   PetscInt       max,*opt,nopt,i;
34ace3abfcSBarry Smith   PetscBool      flg;
35bc5ccf88SSatish Balay 
363a40ed3dSBarry Smith   PetscFunctionBegin;
37bc5ccf88SSatish Balay   /* Require 2 tags,get the second using PetscCommGetNewTag() */
38752ec6e0SSatish Balay   stash->comm = comm;
398865f1eaSKarl Rupp 
40752ec6e0SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
41a2d1c673SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
42a2d1c673SSatish Balay   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
43a2d1c673SSatish Balay   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
44785e854fSJed Brown   ierr = PetscMalloc1(2*stash->size,&stash->flg_v);CHKERRQ(ierr);
45533163c2SBarry Smith   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
46533163c2SBarry Smith 
47bc5ccf88SSatish Balay 
48434d7ff9SSatish Balay   nopt = stash->size;
49785e854fSJed Brown   ierr = PetscMalloc1(nopt,&opt);CHKERRQ(ierr);
500298fd71SBarry Smith   ierr = PetscOptionsGetIntArray(NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
51434d7ff9SSatish Balay   if (flg) {
52434d7ff9SSatish Balay     if (nopt == 1)                max = opt[0];
53434d7ff9SSatish Balay     else if (nopt == stash->size) max = opt[stash->rank];
54434d7ff9SSatish Balay     else if (stash->rank < nopt)  max = opt[stash->rank];
55f4ab19daSSatish Balay     else                          max = 0; /* Use default */
56434d7ff9SSatish Balay     stash->umax = max;
57434d7ff9SSatish Balay   } else {
58434d7ff9SSatish Balay     stash->umax = 0;
59434d7ff9SSatish Balay   }
60606d414cSSatish Balay   ierr = PetscFree(opt);CHKERRQ(ierr);
614c1ff481SSatish Balay   if (bs <= 0) bs = 1;
62a2d1c673SSatish Balay 
634c1ff481SSatish Balay   stash->bs         = bs;
649417f4adSLois Curfman McInnes   stash->nmax       = 0;
65434d7ff9SSatish Balay   stash->oldnmax    = 0;
669417f4adSLois Curfman McInnes   stash->n          = 0;
674c1ff481SSatish Balay   stash->reallocs   = -1;
6875cae7c1SHong Zhang   stash->space_head = 0;
6975cae7c1SHong Zhang   stash->space      = 0;
709417f4adSLois Curfman McInnes 
71bc5ccf88SSatish Balay   stash->send_waits  = 0;
72bc5ccf88SSatish Balay   stash->recv_waits  = 0;
73a2d1c673SSatish Balay   stash->send_status = 0;
74bc5ccf88SSatish Balay   stash->nsends      = 0;
75bc5ccf88SSatish Balay   stash->nrecvs      = 0;
76bc5ccf88SSatish Balay   stash->svalues     = 0;
77bc5ccf88SSatish Balay   stash->rvalues     = 0;
78563fb871SSatish Balay   stash->rindices    = 0;
79a2d1c673SSatish Balay   stash->nprocessed  = 0;
8067318a8aSJed Brown   stash->reproduce   = PETSC_FALSE;
81d7d60843SJed Brown   stash->blocktype   = MPI_DATATYPE_NULL;
828865f1eaSKarl Rupp 
830298fd71SBarry Smith   ierr = PetscOptionsGetBool(NULL,"-matstash_reproduce",&stash->reproduce,NULL);CHKERRQ(ierr);
84ac2b2aa0SJed Brown   ierr = PetscOptionsGetBool(NULL,"-matstash_bts",&flg,NULL);CHKERRQ(ierr);
85ac2b2aa0SJed Brown   if (flg) {
86d7d60843SJed Brown     stash->ScatterBegin   = MatStashScatterBegin_BTS;
87d7d60843SJed Brown     stash->ScatterGetMesg = MatStashScatterGetMesg_BTS;
88d7d60843SJed Brown     stash->ScatterEnd     = MatStashScatterEnd_BTS;
89d7d60843SJed Brown     stash->ScatterDestroy = MatStashScatterDestroy_BTS;
90ac2b2aa0SJed Brown   } else {
91ac2b2aa0SJed Brown     stash->ScatterBegin   = MatStashScatterBegin_Ref;
92ac2b2aa0SJed Brown     stash->ScatterGetMesg = MatStashScatterGetMesg_Ref;
93ac2b2aa0SJed Brown     stash->ScatterEnd     = MatStashScatterEnd_Ref;
94ac2b2aa0SJed Brown     stash->ScatterDestroy = NULL;
95ac2b2aa0SJed Brown   }
963a40ed3dSBarry Smith   PetscFunctionReturn(0);
979417f4adSLois Curfman McInnes }
989417f4adSLois Curfman McInnes 
994c1ff481SSatish Balay /*
1008798bf22SSatish Balay    MatStashDestroy_Private - Destroy the stash
1014c1ff481SSatish Balay */
1024a2ae208SSatish Balay #undef __FUNCT__
1034a2ae208SSatish Balay #define __FUNCT__ "MatStashDestroy_Private"
104dfbe8321SBarry Smith PetscErrorCode MatStashDestroy_Private(MatStash *stash)
1059417f4adSLois Curfman McInnes {
106dfbe8321SBarry Smith   PetscErrorCode ierr;
107a2d1c673SSatish Balay 
108bc5ccf88SSatish Balay   PetscFunctionBegin;
1096bf464f9SBarry Smith   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
110ac2b2aa0SJed Brown   if (stash->ScatterDestroy) {ierr = (*stash->ScatterDestroy)(stash);CHKERRQ(ierr);}
1118865f1eaSKarl Rupp 
11282740460SHong Zhang   stash->space = 0;
1138865f1eaSKarl Rupp 
114533163c2SBarry Smith   ierr = PetscFree(stash->flg_v);CHKERRQ(ierr);
115bc5ccf88SSatish Balay   PetscFunctionReturn(0);
116bc5ccf88SSatish Balay }
117bc5ccf88SSatish Balay 
1184c1ff481SSatish Balay /*
11967318a8aSJed Brown    MatStashScatterEnd_Private - This is called as the final stage of
1204c1ff481SSatish Balay    scatter. The final stages of message passing is done here, and
12167318a8aSJed Brown    all the memory used for message passing is cleaned up. This
1224c1ff481SSatish Balay    routine also resets the stash, and deallocates the memory used
1234c1ff481SSatish Balay    for the stash. It also keeps track of the current memory usage
1244c1ff481SSatish Balay    so that the same value can be used the next time through.
1254c1ff481SSatish Balay */
1264a2ae208SSatish Balay #undef __FUNCT__
1274a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterEnd_Private"
128dfbe8321SBarry Smith PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
129bc5ccf88SSatish Balay {
1306849ba73SBarry Smith   PetscErrorCode ierr;
131ac2b2aa0SJed Brown 
132ac2b2aa0SJed Brown   PetscFunctionBegin;
133ac2b2aa0SJed Brown   ierr = (*stash->ScatterEnd)(stash);CHKERRQ(ierr);
134ac2b2aa0SJed Brown   PetscFunctionReturn(0);
135ac2b2aa0SJed Brown }
136ac2b2aa0SJed Brown 
137ac2b2aa0SJed Brown #undef __FUNCT__
138ac2b2aa0SJed Brown #define __FUNCT__ "MatStashScatterEnd_Ref"
139ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash)
140ac2b2aa0SJed Brown {
141ac2b2aa0SJed Brown   PetscErrorCode ierr;
142533163c2SBarry Smith   PetscInt       nsends=stash->nsends,bs2,oldnmax,i;
143a2d1c673SSatish Balay   MPI_Status     *send_status;
144a2d1c673SSatish Balay 
1453a40ed3dSBarry Smith   PetscFunctionBegin;
146533163c2SBarry Smith   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
147a2d1c673SSatish Balay   /* wait on sends */
148a2d1c673SSatish Balay   if (nsends) {
149785e854fSJed Brown     ierr = PetscMalloc1(2*nsends,&send_status);CHKERRQ(ierr);
150a2d1c673SSatish Balay     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
151606d414cSSatish Balay     ierr = PetscFree(send_status);CHKERRQ(ierr);
152a2d1c673SSatish Balay   }
153a2d1c673SSatish Balay 
154c0c58ca7SSatish Balay   /* Now update nmaxold to be app 10% more than max n used, this way the
155434d7ff9SSatish Balay      wastage of space is reduced the next time this stash is used.
156434d7ff9SSatish Balay      Also update the oldmax, only if it increases */
157b9b97703SBarry Smith   if (stash->n) {
15894b769a5SSatish Balay     bs2     = stash->bs*stash->bs;
1598a9378f0SSatish Balay     oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
160434d7ff9SSatish Balay     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
161b9b97703SBarry Smith   }
162434d7ff9SSatish Balay 
163d07ff455SSatish Balay   stash->nmax       = 0;
164d07ff455SSatish Balay   stash->n          = 0;
1654c1ff481SSatish Balay   stash->reallocs   = -1;
166a2d1c673SSatish Balay   stash->nprocessed = 0;
1678865f1eaSKarl Rupp 
1686bf464f9SBarry Smith   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
1698865f1eaSKarl Rupp 
17082740460SHong Zhang   stash->space = 0;
1718865f1eaSKarl Rupp 
172606d414cSSatish Balay   ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
173606d414cSSatish Balay   ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
174c05d87d6SBarry Smith   ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr);
175c05d87d6SBarry Smith   ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr);
176606d414cSSatish Balay   ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
177c05d87d6SBarry Smith   ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr);
178563fb871SSatish Balay   ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
1793a40ed3dSBarry Smith   PetscFunctionReturn(0);
1809417f4adSLois Curfman McInnes }
1819417f4adSLois Curfman McInnes 
1824c1ff481SSatish Balay /*
1838798bf22SSatish Balay    MatStashGetInfo_Private - Gets the relavant statistics of the stash
1844c1ff481SSatish Balay 
1854c1ff481SSatish Balay    Input Parameters:
1864c1ff481SSatish Balay    stash    - the stash
18794b769a5SSatish Balay    nstash   - the size of the stash. Indicates the number of values stored.
1884c1ff481SSatish Balay    reallocs - the number of additional mallocs incurred.
1894c1ff481SSatish Balay 
1904c1ff481SSatish Balay */
1914a2ae208SSatish Balay #undef __FUNCT__
1924a2ae208SSatish Balay #define __FUNCT__ "MatStashGetInfo_Private"
193c1ac3661SBarry Smith PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
19497530c3fSBarry Smith {
195c1ac3661SBarry Smith   PetscInt bs2 = stash->bs*stash->bs;
19694b769a5SSatish Balay 
1973a40ed3dSBarry Smith   PetscFunctionBegin;
1981ecfd215SBarry Smith   if (nstash) *nstash = stash->n*bs2;
1991ecfd215SBarry Smith   if (reallocs) {
200434d7ff9SSatish Balay     if (stash->reallocs < 0) *reallocs = 0;
201434d7ff9SSatish Balay     else                     *reallocs = stash->reallocs;
2021ecfd215SBarry Smith   }
203bc5ccf88SSatish Balay   PetscFunctionReturn(0);
204bc5ccf88SSatish Balay }
2054c1ff481SSatish Balay 
2064c1ff481SSatish Balay /*
2078798bf22SSatish Balay    MatStashSetInitialSize_Private - Sets the initial size of the stash
2084c1ff481SSatish Balay 
2094c1ff481SSatish Balay    Input Parameters:
2104c1ff481SSatish Balay    stash  - the stash
2114c1ff481SSatish Balay    max    - the value that is used as the max size of the stash.
2124c1ff481SSatish Balay             this value is used while allocating memory.
2134c1ff481SSatish Balay */
2144a2ae208SSatish Balay #undef __FUNCT__
2154a2ae208SSatish Balay #define __FUNCT__ "MatStashSetInitialSize_Private"
216c1ac3661SBarry Smith PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
217bc5ccf88SSatish Balay {
218bc5ccf88SSatish Balay   PetscFunctionBegin;
219434d7ff9SSatish Balay   stash->umax = max;
2203a40ed3dSBarry Smith   PetscFunctionReturn(0);
22197530c3fSBarry Smith }
22297530c3fSBarry Smith 
2238798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called
2244c1ff481SSatish Balay    when the space in the stash is not sufficient to add the new values
2254c1ff481SSatish Balay    being inserted into the stash.
2264c1ff481SSatish Balay 
2274c1ff481SSatish Balay    Input Parameters:
2284c1ff481SSatish Balay    stash - the stash
2294c1ff481SSatish Balay    incr  - the minimum increase requested
2304c1ff481SSatish Balay 
2314c1ff481SSatish Balay    Notes:
2324c1ff481SSatish Balay    This routine doubles the currently used memory.
2334c1ff481SSatish Balay  */
2344a2ae208SSatish Balay #undef __FUNCT__
2354a2ae208SSatish Balay #define __FUNCT__ "MatStashExpand_Private"
236c1ac3661SBarry Smith static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
2379417f4adSLois Curfman McInnes {
2386849ba73SBarry Smith   PetscErrorCode ierr;
2395bd3b8fbSHong Zhang   PetscInt       newnmax,bs2= stash->bs*stash->bs;
2409417f4adSLois Curfman McInnes 
2413a40ed3dSBarry Smith   PetscFunctionBegin;
2429417f4adSLois Curfman McInnes   /* allocate a larger stash */
243c481ceb5SSatish Balay   if (!stash->oldnmax && !stash->nmax) { /* new stash */
244434d7ff9SSatish Balay     if (stash->umax)                  newnmax = stash->umax/bs2;
245434d7ff9SSatish Balay     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
246c481ceb5SSatish Balay   } else if (!stash->nmax) { /* resuing stash */
247434d7ff9SSatish Balay     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
248434d7ff9SSatish Balay     else                              newnmax = stash->oldnmax/bs2;
249434d7ff9SSatish Balay   } else                              newnmax = stash->nmax*2;
2504c1ff481SSatish Balay   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
251d07ff455SSatish Balay 
25275cae7c1SHong Zhang   /* Get a MatStashSpace and attach it to stash */
25375cae7c1SHong Zhang   ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr);
254b087b6d6SSatish Balay   if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
255b087b6d6SSatish Balay     stash->space_head = stash->space;
25675cae7c1SHong Zhang   }
257b087b6d6SSatish Balay 
258bc5ccf88SSatish Balay   stash->reallocs++;
25975cae7c1SHong Zhang   stash->nmax = newnmax;
260bc5ccf88SSatish Balay   PetscFunctionReturn(0);
261bc5ccf88SSatish Balay }
262bc5ccf88SSatish Balay /*
2638798bf22SSatish Balay   MatStashValuesRow_Private - inserts values into the stash. This function
2644c1ff481SSatish Balay   expects the values to be roworiented. Multiple columns belong to the same row
2654c1ff481SSatish Balay   can be inserted with a single call to this function.
2664c1ff481SSatish Balay 
2674c1ff481SSatish Balay   Input Parameters:
2684c1ff481SSatish Balay   stash  - the stash
2694c1ff481SSatish Balay   row    - the global row correspoiding to the values
2704c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
2714c1ff481SSatish Balay   idxn   - the global column indices corresponding to each of the values.
2724c1ff481SSatish Balay   values - the values inserted
273bc5ccf88SSatish Balay */
2744a2ae208SSatish Balay #undef __FUNCT__
2754a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRow_Private"
276ace3abfcSBarry Smith PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries)
277bc5ccf88SSatish Balay {
278dfbe8321SBarry Smith   PetscErrorCode     ierr;
279b400d20cSBarry Smith   PetscInt           i,k,cnt = 0;
28075cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
281bc5ccf88SSatish Balay 
282bc5ccf88SSatish Balay   PetscFunctionBegin;
2834c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
28475cae7c1SHong Zhang   if (!space || space->local_remaining < n) {
2858798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
2869417f4adSLois Curfman McInnes   }
28775cae7c1SHong Zhang   space = stash->space;
28875cae7c1SHong Zhang   k     = space->local_used;
2894c1ff481SSatish Balay   for (i=0; i<n; i++) {
29088c3974fSBarry Smith     if (ignorezeroentries && (values[i] == 0.0)) continue;
29175cae7c1SHong Zhang     space->idx[k] = row;
29275cae7c1SHong Zhang     space->idy[k] = idxn[i];
29375cae7c1SHong Zhang     space->val[k] = values[i];
29475cae7c1SHong Zhang     k++;
295b400d20cSBarry Smith     cnt++;
2969417f4adSLois Curfman McInnes   }
297b400d20cSBarry Smith   stash->n               += cnt;
298b400d20cSBarry Smith   space->local_used      += cnt;
299b400d20cSBarry Smith   space->local_remaining -= cnt;
300a2d1c673SSatish Balay   PetscFunctionReturn(0);
301a2d1c673SSatish Balay }
30275cae7c1SHong Zhang 
3034c1ff481SSatish Balay /*
3048798bf22SSatish Balay   MatStashValuesCol_Private - inserts values into the stash. This function
3054c1ff481SSatish Balay   expects the values to be columnoriented. Multiple columns belong to the same row
3064c1ff481SSatish Balay   can be inserted with a single call to this function.
307a2d1c673SSatish Balay 
3084c1ff481SSatish Balay   Input Parameters:
3094c1ff481SSatish Balay   stash   - the stash
3104c1ff481SSatish Balay   row     - the global row correspoiding to the values
3114c1ff481SSatish Balay   n       - the number of elements inserted. All elements belong to the above row.
3124c1ff481SSatish Balay   idxn    - the global column indices corresponding to each of the values.
3134c1ff481SSatish Balay   values  - the values inserted
3144c1ff481SSatish Balay   stepval - the consecutive values are sepated by a distance of stepval.
3154c1ff481SSatish Balay             this happens because the input is columnoriented.
3164c1ff481SSatish Balay */
3174a2ae208SSatish Balay #undef __FUNCT__
3184a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesCol_Private"
319ace3abfcSBarry Smith PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries)
320a2d1c673SSatish Balay {
321dfbe8321SBarry Smith   PetscErrorCode     ierr;
32250e9ab7cSBarry Smith   PetscInt           i,k,cnt = 0;
32375cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
324a2d1c673SSatish Balay 
3254c1ff481SSatish Balay   PetscFunctionBegin;
3264c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
32775cae7c1SHong Zhang   if (!space || space->local_remaining < n) {
3288798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
3294c1ff481SSatish Balay   }
33075cae7c1SHong Zhang   space = stash->space;
33175cae7c1SHong Zhang   k     = space->local_used;
3324c1ff481SSatish Balay   for (i=0; i<n; i++) {
33388c3974fSBarry Smith     if (ignorezeroentries && (values[i*stepval] == 0.0)) continue;
33475cae7c1SHong Zhang     space->idx[k] = row;
33575cae7c1SHong Zhang     space->idy[k] = idxn[i];
33675cae7c1SHong Zhang     space->val[k] = values[i*stepval];
33775cae7c1SHong Zhang     k++;
338b400d20cSBarry Smith     cnt++;
3394c1ff481SSatish Balay   }
340b400d20cSBarry Smith   stash->n               += cnt;
341b400d20cSBarry Smith   space->local_used      += cnt;
342b400d20cSBarry Smith   space->local_remaining -= cnt;
3434c1ff481SSatish Balay   PetscFunctionReturn(0);
3444c1ff481SSatish Balay }
3454c1ff481SSatish Balay 
3464c1ff481SSatish Balay /*
3478798bf22SSatish Balay   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
3484c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3494c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3504c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3514c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3524c1ff481SSatish Balay 
3534c1ff481SSatish Balay   Input Parameters:
3544c1ff481SSatish Balay   stash  - the stash
3554c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3564c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3574c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3584c1ff481SSatish Balay            values. Each block is of size bs*bs.
3594c1ff481SSatish Balay   values - the values inserted
3604c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3614c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3624c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3634c1ff481SSatish Balay */
3644a2ae208SSatish Balay #undef __FUNCT__
3654a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRowBlocked_Private"
36654f21887SBarry Smith PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
3674c1ff481SSatish Balay {
368dfbe8321SBarry Smith   PetscErrorCode     ierr;
36975cae7c1SHong Zhang   PetscInt           i,j,k,bs2,bs=stash->bs,l;
37054f21887SBarry Smith   const PetscScalar  *vals;
37154f21887SBarry Smith   PetscScalar        *array;
37275cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
373a2d1c673SSatish Balay 
374a2d1c673SSatish Balay   PetscFunctionBegin;
37575cae7c1SHong Zhang   if (!space || space->local_remaining < n) {
3768798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
377a2d1c673SSatish Balay   }
37875cae7c1SHong Zhang   space = stash->space;
37975cae7c1SHong Zhang   l     = space->local_used;
38075cae7c1SHong Zhang   bs2   = bs*bs;
3814c1ff481SSatish Balay   for (i=0; i<n; i++) {
38275cae7c1SHong Zhang     space->idx[l] = row;
38375cae7c1SHong Zhang     space->idy[l] = idxn[i];
38475cae7c1SHong Zhang     /* Now copy over the block of values. Store the values column oriented.
38575cae7c1SHong Zhang        This enables inserting multiple blocks belonging to a row with a single
38675cae7c1SHong Zhang        funtion call */
38775cae7c1SHong Zhang     array = space->val + bs2*l;
38875cae7c1SHong Zhang     vals  = values + idx*bs2*n + bs*i;
38975cae7c1SHong Zhang     for (j=0; j<bs; j++) {
39075cae7c1SHong Zhang       for (k=0; k<bs; k++) array[k*bs] = vals[k];
39175cae7c1SHong Zhang       array++;
39275cae7c1SHong Zhang       vals += cmax*bs;
39375cae7c1SHong Zhang     }
39475cae7c1SHong Zhang     l++;
395a2d1c673SSatish Balay   }
3965bd3b8fbSHong Zhang   stash->n               += n;
39775cae7c1SHong Zhang   space->local_used      += n;
39875cae7c1SHong Zhang   space->local_remaining -= n;
3994c1ff481SSatish Balay   PetscFunctionReturn(0);
4004c1ff481SSatish Balay }
4014c1ff481SSatish Balay 
4024c1ff481SSatish Balay /*
4038798bf22SSatish Balay   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
4044c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
4054c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
4064c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
4074c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
4084c1ff481SSatish Balay 
4094c1ff481SSatish Balay   Input Parameters:
4104c1ff481SSatish Balay   stash  - the stash
4114c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
4124c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
4134c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
4144c1ff481SSatish Balay            values. Each block is of size bs*bs.
4154c1ff481SSatish Balay   values - the values inserted
4164c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
4174c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
4184c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
4194c1ff481SSatish Balay */
4204a2ae208SSatish Balay #undef __FUNCT__
4214a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesColBlocked_Private"
42254f21887SBarry Smith PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
4234c1ff481SSatish Balay {
424dfbe8321SBarry Smith   PetscErrorCode     ierr;
42575cae7c1SHong Zhang   PetscInt           i,j,k,bs2,bs=stash->bs,l;
42654f21887SBarry Smith   const PetscScalar  *vals;
42754f21887SBarry Smith   PetscScalar        *array;
42875cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
4294c1ff481SSatish Balay 
4304c1ff481SSatish Balay   PetscFunctionBegin;
43175cae7c1SHong Zhang   if (!space || space->local_remaining < n) {
4328798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
4334c1ff481SSatish Balay   }
43475cae7c1SHong Zhang   space = stash->space;
43575cae7c1SHong Zhang   l     = space->local_used;
43675cae7c1SHong Zhang   bs2   = bs*bs;
4374c1ff481SSatish Balay   for (i=0; i<n; i++) {
43875cae7c1SHong Zhang     space->idx[l] = row;
43975cae7c1SHong Zhang     space->idy[l] = idxn[i];
44075cae7c1SHong Zhang     /* Now copy over the block of values. Store the values column oriented.
44175cae7c1SHong Zhang      This enables inserting multiple blocks belonging to a row with a single
44275cae7c1SHong Zhang      funtion call */
44375cae7c1SHong Zhang     array = space->val + bs2*l;
44475cae7c1SHong Zhang     vals  = values + idx*bs2*n + bs*i;
44575cae7c1SHong Zhang     for (j=0; j<bs; j++) {
4468865f1eaSKarl Rupp       for (k=0; k<bs; k++) array[k] = vals[k];
44775cae7c1SHong Zhang       array += bs;
44875cae7c1SHong Zhang       vals  += rmax*bs;
44975cae7c1SHong Zhang     }
4505bd3b8fbSHong Zhang     l++;
451a2d1c673SSatish Balay   }
4525bd3b8fbSHong Zhang   stash->n               += n;
45375cae7c1SHong Zhang   space->local_used      += n;
45475cae7c1SHong Zhang   space->local_remaining -= n;
4553a40ed3dSBarry Smith   PetscFunctionReturn(0);
4569417f4adSLois Curfman McInnes }
4574c1ff481SSatish Balay /*
4588798bf22SSatish Balay   MatStashScatterBegin_Private - Initiates the transfer of values to the
4594c1ff481SSatish Balay   correct owners. This function goes through the stash, and check the
4604c1ff481SSatish Balay   owners of each stashed value, and sends the values off to the owner
4614c1ff481SSatish Balay   processors.
462bc5ccf88SSatish Balay 
4634c1ff481SSatish Balay   Input Parameters:
4644c1ff481SSatish Balay   stash  - the stash
4654c1ff481SSatish Balay   owners - an array of size 'no-of-procs' which gives the ownership range
4664c1ff481SSatish Balay            for each node.
4674c1ff481SSatish Balay 
4684c1ff481SSatish Balay   Notes: The 'owners' array in the cased of the blocked-stash has the
4694c1ff481SSatish Balay   ranges specified blocked global indices, and for the regular stash in
4704c1ff481SSatish Balay   the proper global indices.
4714c1ff481SSatish Balay */
4724a2ae208SSatish Balay #undef __FUNCT__
4734a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterBegin_Private"
4741e2582c4SBarry Smith PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners)
475bc5ccf88SSatish Balay {
476ac2b2aa0SJed Brown   PetscErrorCode ierr;
477ac2b2aa0SJed Brown 
478ac2b2aa0SJed Brown   PetscFunctionBegin;
479ac2b2aa0SJed Brown   ierr = (*stash->ScatterBegin)(mat,stash,owners);CHKERRQ(ierr);
480ac2b2aa0SJed Brown   PetscFunctionReturn(0);
481ac2b2aa0SJed Brown }
482ac2b2aa0SJed Brown 
483ac2b2aa0SJed Brown #undef __FUNCT__
484ac2b2aa0SJed Brown #define __FUNCT__ "MatStashScatterBegin_Ref"
485ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterBegin_Ref(Mat mat,MatStash *stash,PetscInt *owners)
486ac2b2aa0SJed Brown {
487c1ac3661SBarry Smith   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
488fe09c992SBarry Smith   PetscInt           size=stash->size,nsends;
4896849ba73SBarry Smith   PetscErrorCode     ierr;
49075cae7c1SHong Zhang   PetscInt           count,*sindices,**rindices,i,j,idx,lastidx,l;
49154f21887SBarry Smith   PetscScalar        **rvalues,*svalues;
492bc5ccf88SSatish Balay   MPI_Comm           comm = stash->comm;
493563fb871SSatish Balay   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
49476ec1555SBarry Smith   PetscMPIInt        *sizes,*nlengths,nreceives;
4955bd3b8fbSHong Zhang   PetscInt           *sp_idx,*sp_idy;
49654f21887SBarry Smith   PetscScalar        *sp_val;
4975bd3b8fbSHong Zhang   PetscMatStashSpace space,space_next;
498bc5ccf88SSatish Balay 
499bc5ccf88SSatish Balay   PetscFunctionBegin;
5004b4eb8d3SJed Brown   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
5014b4eb8d3SJed Brown     InsertMode addv;
5024b4eb8d3SJed Brown     ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
5034b4eb8d3SJed Brown     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
5044b4eb8d3SJed Brown     mat->insertmode = addv; /* in case this processor had no cache */
5054b4eb8d3SJed Brown   }
5064b4eb8d3SJed Brown 
5074c1ff481SSatish Balay   bs2 = stash->bs*stash->bs;
50875cae7c1SHong Zhang 
509bc5ccf88SSatish Balay   /*  first count number of contributors to each processor */
510037dbc42SBarry Smith   ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr);
5111795a4d1SJed Brown   ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr);
512037dbc42SBarry Smith   ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr);
513a2d1c673SSatish Balay 
51475cae7c1SHong Zhang   i       = j    = 0;
5157357eb19SBarry Smith   lastidx = -1;
5165bd3b8fbSHong Zhang   space   = stash->space_head;
5170298fd71SBarry Smith   while (space != NULL) {
51875cae7c1SHong Zhang     space_next = space->next;
5195bd3b8fbSHong Zhang     sp_idx     = space->idx;
52075cae7c1SHong Zhang     for (l=0; l<space->local_used; l++) {
5217357eb19SBarry Smith       /* if indices are NOT locally sorted, need to start search at the beginning */
5225bd3b8fbSHong Zhang       if (lastidx > (idx = sp_idx[l])) j = 0;
5237357eb19SBarry Smith       lastidx = idx;
5247357eb19SBarry Smith       for (; j<size; j++) {
5254c1ff481SSatish Balay         if (idx >= owners[j] && idx < owners[j+1]) {
526563fb871SSatish Balay           nlengths[j]++; owner[i] = j; break;
527bc5ccf88SSatish Balay         }
528bc5ccf88SSatish Balay       }
52975cae7c1SHong Zhang       i++;
53075cae7c1SHong Zhang     }
53175cae7c1SHong Zhang     space = space_next;
532bc5ccf88SSatish Balay   }
533563fb871SSatish Balay   /* Now check what procs get messages - and compute nsends. */
534563fb871SSatish Balay   for (i=0, nsends=0; i<size; i++) {
5358865f1eaSKarl Rupp     if (nlengths[i]) {
53676ec1555SBarry Smith       sizes[i] = 1; nsends++;
5378865f1eaSKarl Rupp     }
538563fb871SSatish Balay   }
539bc5ccf88SSatish Balay 
54054f21887SBarry Smith   {PetscMPIInt *onodes,*olengths;
541563fb871SSatish Balay    /* Determine the number of messages to expect, their lengths, from from-ids */
54276ec1555SBarry Smith    ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr);
543563fb871SSatish Balay    ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
544563fb871SSatish Balay    /* since clubbing row,col - lengths are multiplied by 2 */
545563fb871SSatish Balay    for (i=0; i<nreceives; i++) olengths[i] *=2;
546563fb871SSatish Balay    ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
547563fb871SSatish Balay    /* values are size 'bs2' lengths (and remove earlier factor 2 */
548563fb871SSatish Balay    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
549563fb871SSatish Balay    ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
550563fb871SSatish Balay    ierr = PetscFree(onodes);CHKERRQ(ierr);
5518865f1eaSKarl Rupp    ierr = PetscFree(olengths);CHKERRQ(ierr);}
552bc5ccf88SSatish Balay 
553bc5ccf88SSatish Balay   /* do sends:
554bc5ccf88SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
555bc5ccf88SSatish Balay          the ith processor
556bc5ccf88SSatish Balay   */
557dcca6d9dSJed Brown   ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr);
558785e854fSJed Brown   ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr);
559dcca6d9dSJed Brown   ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr);
560a2d1c673SSatish Balay   /* use 2 sends the first with all_a, the next with all_i and all_j */
561bc5ccf88SSatish Balay   startv[0] = 0; starti[0] = 0;
562bc5ccf88SSatish Balay   for (i=1; i<size; i++) {
563563fb871SSatish Balay     startv[i] = startv[i-1] + nlengths[i-1];
564533163c2SBarry Smith     starti[i] = starti[i-1] + 2*nlengths[i-1];
565bc5ccf88SSatish Balay   }
56675cae7c1SHong Zhang 
56775cae7c1SHong Zhang   i     = 0;
5685bd3b8fbSHong Zhang   space = stash->space_head;
5690298fd71SBarry Smith   while (space != NULL) {
57075cae7c1SHong Zhang     space_next = space->next;
5715bd3b8fbSHong Zhang     sp_idx     = space->idx;
5725bd3b8fbSHong Zhang     sp_idy     = space->idy;
5735bd3b8fbSHong Zhang     sp_val     = space->val;
57475cae7c1SHong Zhang     for (l=0; l<space->local_used; l++) {
575bc5ccf88SSatish Balay       j = owner[i];
576a2d1c673SSatish Balay       if (bs2 == 1) {
5775bd3b8fbSHong Zhang         svalues[startv[j]] = sp_val[l];
578a2d1c673SSatish Balay       } else {
579c1ac3661SBarry Smith         PetscInt    k;
58054f21887SBarry Smith         PetscScalar *buf1,*buf2;
5814c1ff481SSatish Balay         buf1 = svalues+bs2*startv[j];
582b087b6d6SSatish Balay         buf2 = space->val + bs2*l;
5838865f1eaSKarl Rupp         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
584a2d1c673SSatish Balay       }
5855bd3b8fbSHong Zhang       sindices[starti[j]]             = sp_idx[l];
5865bd3b8fbSHong Zhang       sindices[starti[j]+nlengths[j]] = sp_idy[l];
587bc5ccf88SSatish Balay       startv[j]++;
588bc5ccf88SSatish Balay       starti[j]++;
58975cae7c1SHong Zhang       i++;
59075cae7c1SHong Zhang     }
59175cae7c1SHong Zhang     space = space_next;
592bc5ccf88SSatish Balay   }
593bc5ccf88SSatish Balay   startv[0] = 0;
5948865f1eaSKarl Rupp   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
595e5d0e772SSatish Balay 
596bc5ccf88SSatish Balay   for (i=0,count=0; i<size; i++) {
59776ec1555SBarry Smith     if (sizes[i]) {
598563fb871SSatish Balay       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
599a77337e4SBarry Smith       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
600bc5ccf88SSatish Balay     }
601b85c94c3SSatish Balay   }
6026cf91177SBarry Smith #if defined(PETSC_USE_INFO)
60393157e10SBarry Smith   ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr);
604e5d0e772SSatish Balay   for (i=0; i<size; i++) {
60576ec1555SBarry Smith     if (sizes[i]) {
60630c47e72SSatish Balay       ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
607e5d0e772SSatish Balay     }
608e5d0e772SSatish Balay   }
609e5d0e772SSatish Balay #endif
610c05d87d6SBarry Smith   ierr = PetscFree(nlengths);CHKERRQ(ierr);
611606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
612c05d87d6SBarry Smith   ierr = PetscFree2(startv,starti);CHKERRQ(ierr);
61376ec1555SBarry Smith   ierr = PetscFree(sizes);CHKERRQ(ierr);
614a2d1c673SSatish Balay 
615563fb871SSatish Balay   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
616785e854fSJed Brown   ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr);
617563fb871SSatish Balay 
618563fb871SSatish Balay   for (i=0; i<nreceives; i++) {
619563fb871SSatish Balay     recv_waits[2*i]   = recv_waits1[i];
620563fb871SSatish Balay     recv_waits[2*i+1] = recv_waits2[i];
621563fb871SSatish Balay   }
622563fb871SSatish Balay   stash->recv_waits = recv_waits;
6238865f1eaSKarl Rupp 
624563fb871SSatish Balay   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
625563fb871SSatish Balay   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
626563fb871SSatish Balay 
627c05d87d6SBarry Smith   stash->svalues         = svalues;
628c05d87d6SBarry Smith   stash->sindices        = sindices;
629c05d87d6SBarry Smith   stash->rvalues         = rvalues;
630c05d87d6SBarry Smith   stash->rindices        = rindices;
631c05d87d6SBarry Smith   stash->send_waits      = send_waits;
632c05d87d6SBarry Smith   stash->nsends          = nsends;
633c05d87d6SBarry Smith   stash->nrecvs          = nreceives;
63467318a8aSJed Brown   stash->reproduce_count = 0;
635bc5ccf88SSatish Balay   PetscFunctionReturn(0);
636bc5ccf88SSatish Balay }
637bc5ccf88SSatish Balay 
638a2d1c673SSatish Balay /*
6398798bf22SSatish Balay    MatStashScatterGetMesg_Private - This function waits on the receives posted
6408798bf22SSatish Balay    in the function MatStashScatterBegin_Private() and returns one message at
6414c1ff481SSatish Balay    a time to the calling function. If no messages are left, it indicates this
6424c1ff481SSatish Balay    by setting flg = 0, else it sets flg = 1.
6434c1ff481SSatish Balay 
6444c1ff481SSatish Balay    Input Parameters:
6454c1ff481SSatish Balay    stash - the stash
6464c1ff481SSatish Balay 
6474c1ff481SSatish Balay    Output Parameters:
6484c1ff481SSatish Balay    nvals - the number of entries in the current message.
6494c1ff481SSatish Balay    rows  - an array of row indices (or blocked indices) corresponding to the values
6504c1ff481SSatish Balay    cols  - an array of columnindices (or blocked indices) corresponding to the values
6514c1ff481SSatish Balay    vals  - the values
6524c1ff481SSatish Balay    flg   - 0 indicates no more message left, and the current call has no values associated.
6534c1ff481SSatish Balay            1 indicates that the current call successfully received a message, and the
6544c1ff481SSatish Balay              other output parameters nvals,rows,cols,vals are set appropriately.
655a2d1c673SSatish Balay */
6564a2ae208SSatish Balay #undef __FUNCT__
6574a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterGetMesg_Private"
65854f21887SBarry Smith PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
659bc5ccf88SSatish Balay {
6606849ba73SBarry Smith   PetscErrorCode ierr;
661ac2b2aa0SJed Brown 
662ac2b2aa0SJed Brown   PetscFunctionBegin;
663ac2b2aa0SJed Brown   ierr = (*stash->ScatterGetMesg)(stash,nvals,rows,cols,vals,flg);CHKERRQ(ierr);
664ac2b2aa0SJed Brown   PetscFunctionReturn(0);
665ac2b2aa0SJed Brown }
666ac2b2aa0SJed Brown 
667ac2b2aa0SJed Brown #undef __FUNCT__
668ac2b2aa0SJed Brown #define __FUNCT__ "MatStashScatterGetMesg_Ref"
669ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
670ac2b2aa0SJed Brown {
671ac2b2aa0SJed Brown   PetscErrorCode ierr;
672533163c2SBarry Smith   PetscMPIInt    i,*flg_v = stash->flg_v,i1,i2;
673fe09c992SBarry Smith   PetscInt       bs2;
674a2d1c673SSatish Balay   MPI_Status     recv_status;
675ace3abfcSBarry Smith   PetscBool      match_found = PETSC_FALSE;
676bc5ccf88SSatish Balay 
677bc5ccf88SSatish Balay   PetscFunctionBegin;
678a2d1c673SSatish Balay   *flg = 0; /* When a message is discovered this is reset to 1 */
679a2d1c673SSatish Balay   /* Return if no more messages to process */
6808865f1eaSKarl Rupp   if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(0);
681a2d1c673SSatish Balay 
6824c1ff481SSatish Balay   bs2 = stash->bs*stash->bs;
68367318a8aSJed Brown   /* If a matching pair of receives are found, process them, and return the data to
684a2d1c673SSatish Balay      the calling function. Until then keep receiving messages */
685a2d1c673SSatish Balay   while (!match_found) {
68667318a8aSJed Brown     if (stash->reproduce) {
68767318a8aSJed Brown       i    = stash->reproduce_count++;
68867318a8aSJed Brown       ierr = MPI_Wait(stash->recv_waits+i,&recv_status);CHKERRQ(ierr);
68967318a8aSJed Brown     } else {
690a2d1c673SSatish Balay       ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
69167318a8aSJed Brown     }
692e32f2f54SBarry Smith     if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!");
693533163c2SBarry Smith 
69467318a8aSJed Brown     /* Now pack the received message into a structure which is usable by others */
695a2d1c673SSatish Balay     if (i % 2) {
696a77337e4SBarry Smith       ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr);
6978865f1eaSKarl Rupp 
698c1dc657dSBarry Smith       flg_v[2*recv_status.MPI_SOURCE] = i/2;
6998865f1eaSKarl Rupp 
700a2d1c673SSatish Balay       *nvals = *nvals/bs2;
701563fb871SSatish Balay     } else {
702563fb871SSatish Balay       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
7038865f1eaSKarl Rupp 
704563fb871SSatish Balay       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
7058865f1eaSKarl Rupp 
706563fb871SSatish Balay       *nvals = *nvals/2; /* This message has both row indices and col indices */
707bc5ccf88SSatish Balay     }
708a2d1c673SSatish Balay 
709cb2b73ccSBarry Smith     /* Check if we have both messages from this proc */
710c1dc657dSBarry Smith     i1 = flg_v[2*recv_status.MPI_SOURCE];
711c1dc657dSBarry Smith     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
712a2d1c673SSatish Balay     if (i1 != -1 && i2 != -1) {
713563fb871SSatish Balay       *rows = stash->rindices[i2];
714a2d1c673SSatish Balay       *cols = *rows + *nvals;
715563fb871SSatish Balay       *vals = stash->rvalues[i1];
716a2d1c673SSatish Balay       *flg  = 1;
717a2d1c673SSatish Balay       stash->nprocessed++;
71835d8aa7fSBarry Smith       match_found = PETSC_TRUE;
719bc5ccf88SSatish Balay     }
720bc5ccf88SSatish Balay   }
721bc5ccf88SSatish Balay   PetscFunctionReturn(0);
722bc5ccf88SSatish Balay }
723d7d60843SJed Brown 
724d7d60843SJed Brown typedef struct {
725d7d60843SJed Brown   PetscInt row;
726d7d60843SJed Brown   PetscInt col;
727d7d60843SJed Brown   PetscScalar vals[1];          /* Actually an array of length bs2 */
728d7d60843SJed Brown } MatStashBlock;
729d7d60843SJed Brown 
730d7d60843SJed Brown #undef __FUNCT__
731d7d60843SJed Brown #define __FUNCT__ "MatStashSortCompress_Private"
732d7d60843SJed Brown static PetscErrorCode MatStashSortCompress_Private(MatStash *stash,InsertMode insertmode)
733d7d60843SJed Brown {
734d7d60843SJed Brown   PetscErrorCode ierr;
735d7d60843SJed Brown   PetscMatStashSpace space;
736d7d60843SJed Brown   PetscInt n = stash->n,bs = stash->bs,bs2 = bs*bs,cnt,*row,*col,*perm,rowstart,i;
737d7d60843SJed Brown   PetscScalar **valptr;
738d7d60843SJed Brown 
739d7d60843SJed Brown   PetscFunctionBegin;
740d7d60843SJed Brown   ierr = PetscMalloc4(n,&row,n,&col,n,&valptr,n,&perm);CHKERRQ(ierr);
741d7d60843SJed Brown   for (space=stash->space_head,cnt=0; space; space=space->next) {
742d7d60843SJed Brown     for (i=0; i<space->local_used; i++) {
743d7d60843SJed Brown       row[cnt] = space->idx[i];
744d7d60843SJed Brown       col[cnt] = space->idy[i];
745d7d60843SJed Brown       valptr[cnt] = &space->val[i*bs2];
746d7d60843SJed Brown       perm[cnt] = cnt;          /* Will tell us where to find valptr after sorting row[] and col[] */
747d7d60843SJed Brown       cnt++;
748d7d60843SJed Brown     }
749d7d60843SJed Brown   }
750d7d60843SJed Brown   if (cnt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatStash n %D, but counted %D entries",n,cnt);
751d7d60843SJed Brown   ierr = PetscSortIntWithArrayPair(n,row,col,perm);CHKERRQ(ierr);
752d7d60843SJed Brown   /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */
753d7d60843SJed Brown   for (rowstart=0,cnt=0,i=1; i<=n; i++) {
754d7d60843SJed Brown     if (i == n || row[i] != row[rowstart]) {         /* Sort the last row. */
755d7d60843SJed Brown       PetscInt colstart;
756d7d60843SJed Brown       ierr = PetscSortIntWithArray(i-rowstart,&col[rowstart],&perm[rowstart]);CHKERRQ(ierr);
757d7d60843SJed Brown       for (colstart=rowstart; colstart<i; ) { /* Compress multiple insertions to the same location */
758d7d60843SJed Brown         PetscInt j,l;
759d7d60843SJed Brown         MatStashBlock *block;
760d7d60843SJed Brown         ierr = PetscSegBufferGet(stash->segsendblocks,1,&block);CHKERRQ(ierr);
761d7d60843SJed Brown         block->row = row[rowstart];
762d7d60843SJed Brown         block->col = col[colstart];
763d7d60843SJed Brown         ierr = PetscMemcpy(block->vals,valptr[perm[colstart]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr);
764d7d60843SJed Brown         for (j=colstart+1; j<i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */
765d7d60843SJed Brown           if (insertmode == ADD_VALUES) {
766d7d60843SJed Brown             for (l=0; l<bs2; l++) block->vals[l] += valptr[perm[j]][l];
767d7d60843SJed Brown           } else {
768d7d60843SJed Brown             ierr = PetscMemcpy(block->vals,valptr[perm[j]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr);
769d7d60843SJed Brown           }
770d7d60843SJed Brown         }
771d7d60843SJed Brown         colstart = j;
772d7d60843SJed Brown       }
773d7d60843SJed Brown       rowstart = i;
774d7d60843SJed Brown     }
775d7d60843SJed Brown   }
776d7d60843SJed Brown   ierr = PetscFree4(row,col,valptr,perm);CHKERRQ(ierr);
777d7d60843SJed Brown   PetscFunctionReturn(0);
778d7d60843SJed Brown }
779d7d60843SJed Brown 
780d7d60843SJed Brown #undef __FUNCT__
781d7d60843SJed Brown #define __FUNCT__ "MatStashBlockTypeSetUp"
782d7d60843SJed Brown static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash)
783d7d60843SJed Brown {
784d7d60843SJed Brown   PetscErrorCode ierr;
785d7d60843SJed Brown 
786d7d60843SJed Brown   PetscFunctionBegin;
787d7d60843SJed Brown   if (stash->blocktype == MPI_DATATYPE_NULL) {
788d7d60843SJed Brown     PetscInt     bs2 = PetscSqr(stash->bs);
789d7d60843SJed Brown     PetscMPIInt  blocklens[2];
790d7d60843SJed Brown     MPI_Aint     displs[2];
791d7d60843SJed Brown     MPI_Datatype types[2],stype;
7929503c6c6SJed Brown     /* C++ std::complex is not my favorite datatype.  Since it is not POD, we cannot use offsetof to find the offset of
7939503c6c6SJed Brown      * vals.  But the layout is actually guaranteed by the standard, so we do a little dance here with struct
7949503c6c6SJed Brown      * DummyBlock, substituting PetscReal for PetscComplex so that we can determine the offset.
7959503c6c6SJed Brown      */
7969503c6c6SJed Brown     struct DummyBlock {PetscInt row,col; PetscReal vals;};
797d7d60843SJed Brown 
7989503c6c6SJed Brown     stash->blocktype_size = offsetof(struct DummyBlock,vals) + bs2*sizeof(PetscScalar);
799d7d60843SJed Brown     if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */
800d7d60843SJed Brown       stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt);
801d7d60843SJed Brown     }
802d7d60843SJed Brown     ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segsendblocks);CHKERRQ(ierr);
803d7d60843SJed Brown     ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segrecvblocks);CHKERRQ(ierr);
804d7d60843SJed Brown     ierr = PetscSegBufferCreate(sizeof(MatStashFrame),1,&stash->segrecvframe);CHKERRQ(ierr);
805d7d60843SJed Brown     blocklens[0] = 2;
806d7d60843SJed Brown     blocklens[1] = bs2;
8079503c6c6SJed Brown     displs[0] = offsetof(struct DummyBlock,row);
8089503c6c6SJed Brown     displs[1] = offsetof(struct DummyBlock,vals);
809d7d60843SJed Brown     types[0] = MPIU_INT;
810d7d60843SJed Brown     types[1] = MPIU_SCALAR;
811d7d60843SJed Brown     ierr = MPI_Type_create_struct(2,blocklens,displs,types,&stype);CHKERRQ(ierr);
812d7d60843SJed Brown     ierr = MPI_Type_commit(&stype);CHKERRQ(ierr);
813d7d60843SJed Brown     ierr = MPI_Type_create_resized(stype,0,stash->blocktype_size,&stash->blocktype);CHKERRQ(ierr); /* MPI-2 */
814d7d60843SJed Brown     ierr = MPI_Type_commit(&stash->blocktype);CHKERRQ(ierr);
815d7d60843SJed Brown     ierr = MPI_Type_free(&stype);CHKERRQ(ierr);
816d7d60843SJed Brown   }
817d7d60843SJed Brown   PetscFunctionReturn(0);
818d7d60843SJed Brown }
819d7d60843SJed Brown 
820d7d60843SJed Brown #undef __FUNCT__
821d7d60843SJed Brown #define __FUNCT__ "MatStashBTSSend_Private"
822d7d60843SJed Brown /* Callback invoked after target rank has initiatied receive of rendezvous message.
823d7d60843SJed Brown  * Here we post the main sends.
824d7d60843SJed Brown  */
825d7d60843SJed Brown static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rankid,PetscMPIInt rank,void *sdata,MPI_Request req[],void *ctx)
826d7d60843SJed Brown {
827d7d60843SJed Brown   MatStash *stash = (MatStash*)ctx;
828d7d60843SJed Brown   MatStashHeader *hdr = (MatStashHeader*)sdata;
829d7d60843SJed Brown   PetscErrorCode ierr;
830d7d60843SJed Brown 
831d7d60843SJed Brown   PetscFunctionBegin;
832d7d60843SJed Brown   if (rank != stash->sendranks[rankid]) SETERRQ3(comm,PETSC_ERR_PLIB,"BTS Send rank %d does not match sendranks[%d] %d",rank,rankid,stash->sendranks[rankid]);
833d7d60843SJed Brown   ierr = MPI_Isend(stash->sendframes[rankid].buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr);
834d7d60843SJed Brown   stash->sendframes[rankid].count = hdr->count;
835d7d60843SJed Brown   stash->sendframes[rankid].pending = 1;
836d7d60843SJed Brown   PetscFunctionReturn(0);
837d7d60843SJed Brown }
838d7d60843SJed Brown 
839d7d60843SJed Brown #undef __FUNCT__
840d7d60843SJed Brown #define __FUNCT__ "MatStashBTSRecv_Private"
841d7d60843SJed Brown /* Callback invoked by target after receiving rendezvous message.
842d7d60843SJed Brown  * Here we post the main recvs.
843d7d60843SJed Brown  */
844d7d60843SJed Brown static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rank,void *rdata,MPI_Request req[],void *ctx)
845d7d60843SJed Brown {
846d7d60843SJed Brown   MatStash *stash = (MatStash*)ctx;
847d7d60843SJed Brown   MatStashHeader *hdr = (MatStashHeader*)rdata;
848d7d60843SJed Brown   MatStashFrame *frame;
849d7d60843SJed Brown   PetscErrorCode ierr;
850d7d60843SJed Brown 
851d7d60843SJed Brown   PetscFunctionBegin;
852d7d60843SJed Brown   ierr = PetscSegBufferGet(stash->segrecvframe,1,&frame);CHKERRQ(ierr);
853d7d60843SJed Brown   ierr = PetscSegBufferGet(stash->segrecvblocks,hdr->count,&frame->buffer);CHKERRQ(ierr);
854d7d60843SJed Brown   ierr = MPI_Irecv(frame->buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr);
855d7d60843SJed Brown   frame->count = hdr->count;
856d7d60843SJed Brown   frame->pending = 1;
857d7d60843SJed Brown   PetscFunctionReturn(0);
858d7d60843SJed Brown }
859d7d60843SJed Brown 
860d7d60843SJed Brown #undef __FUNCT__
861d7d60843SJed Brown #define __FUNCT__ "MatStashScatterBegin_BTS"
862d7d60843SJed Brown /*
863d7d60843SJed Brown  * owners[] contains the ownership ranges; may be indexed by either blocks or scalars
864d7d60843SJed Brown  */
865d7d60843SJed Brown static PetscErrorCode MatStashScatterBegin_BTS(Mat mat,MatStash *stash,PetscInt owners[])
866d7d60843SJed Brown {
867d7d60843SJed Brown   PetscErrorCode ierr;
868d7d60843SJed Brown   size_t nblocks;
869d7d60843SJed Brown   char *sendblocks;
870d7d60843SJed Brown 
871d7d60843SJed Brown   PetscFunctionBegin;
8724b4eb8d3SJed Brown #if defined(PETSC_USE_DEBUG)
8734b4eb8d3SJed Brown   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
8744b4eb8d3SJed Brown     InsertMode addv;
8754b4eb8d3SJed Brown     ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
8764b4eb8d3SJed Brown     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
8774b4eb8d3SJed Brown   }
8784b4eb8d3SJed Brown #endif
8794b4eb8d3SJed Brown 
88097da8949SJed Brown   if (stash->subset_off_proc && !mat->subsetoffprocentries) { /* We won't use the old scatter context. */
88197da8949SJed Brown     ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr);
88297da8949SJed Brown   }
88397da8949SJed Brown 
884d7d60843SJed Brown   ierr = MatStashBlockTypeSetUp(stash);CHKERRQ(ierr);
885d7d60843SJed Brown   ierr = MatStashSortCompress_Private(stash,mat->insertmode);CHKERRQ(ierr);
886d7d60843SJed Brown   ierr = PetscSegBufferGetSize(stash->segsendblocks,&nblocks);CHKERRQ(ierr);
887d7d60843SJed Brown   ierr = PetscSegBufferExtractInPlace(stash->segsendblocks,&sendblocks);CHKERRQ(ierr);
88897da8949SJed Brown   if (stash->subset_off_proc && mat->subsetoffprocentries) { /* Set up sendhdrs and sendframes for each rank that we sent before */
889*23b7d1baSJed Brown     PetscInt i;
890*23b7d1baSJed Brown     size_t b;
89197da8949SJed Brown     for (i=0,b=0; i<stash->nsendranks; i++) {
89297da8949SJed Brown       stash->sendframes[i].buffer = &sendblocks[b*stash->blocktype_size];
89397da8949SJed Brown       /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */
89497da8949SJed Brown       stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */
89597da8949SJed Brown       for ( ; b<nblocks; b++) {
89697da8949SJed Brown         MatStashBlock *sendblock_b = (MatStashBlock*)&sendblocks[b*stash->blocktype_size];
89797da8949SJed Brown         if (PetscUnlikely(sendblock_b->row < owners[stash->sendranks[i]])) SETERRQ2(stash->comm,PETSC_ERR_ARG_WRONG,"MAT_SUBSET_OFF_PROC_ENTRIES set, but row %D owned by %d not communicated in initial assembly",sendblock_b->row,stash->sendranks[i]);
89897da8949SJed Brown         if (sendblock_b->row >= owners[stash->sendranks[i]+1]) break;
89997da8949SJed Brown         stash->sendhdr[i].count++;
90097da8949SJed Brown       }
90197da8949SJed Brown     }
90297da8949SJed Brown   } else {                      /* Dynamically count and pack (first time) */
903*23b7d1baSJed Brown     PetscInt sendno;
904*23b7d1baSJed Brown     size_t i,rowstart;
905d7d60843SJed Brown 
906d7d60843SJed Brown     /* Count number of send ranks and allocate for sends */
907d7d60843SJed Brown     stash->nsendranks = 0;
908d7d60843SJed Brown     for (rowstart=0; rowstart<nblocks; ) {
9097e2ea869SJed Brown       PetscInt owner;
910d7d60843SJed Brown       MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
911d7d60843SJed Brown       ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr);
912d7d60843SJed Brown       if (owner < 0) owner = -(owner+2);
913d7d60843SJed Brown       for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
914d7d60843SJed Brown         MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
9157e2ea869SJed Brown         if (sendblock_i->row >= owners[owner+1]) break;
916d7d60843SJed Brown       }
917d7d60843SJed Brown       stash->nsendranks++;
918d7d60843SJed Brown       rowstart = i;
919d7d60843SJed Brown     }
920d7d60843SJed Brown     ierr = PetscMalloc3(stash->nsendranks,&stash->sendranks,stash->nsendranks,&stash->sendhdr,stash->nsendranks,&stash->sendframes);CHKERRQ(ierr);
921d7d60843SJed Brown 
922d7d60843SJed Brown     /* Set up sendhdrs and sendframes */
923d7d60843SJed Brown     sendno = 0;
924d7d60843SJed Brown     for (rowstart=0; rowstart<nblocks; ) {
925d7d60843SJed Brown       PetscInt owner;
926d7d60843SJed Brown       MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
927d7d60843SJed Brown       ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr);
928d7d60843SJed Brown       if (owner < 0) owner = -(owner+2);
929d7d60843SJed Brown       stash->sendranks[sendno] = owner;
930d7d60843SJed Brown       for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
931d7d60843SJed Brown         MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
9327e2ea869SJed Brown         if (sendblock_i->row >= owners[owner+1]) break;
933d7d60843SJed Brown       }
934d7d60843SJed Brown       stash->sendframes[sendno].buffer = sendblock_rowstart;
935d7d60843SJed Brown       stash->sendframes[sendno].pending = 0;
936d7d60843SJed Brown       stash->sendhdr[sendno].count = i - rowstart;
937d7d60843SJed Brown       sendno++;
938d7d60843SJed Brown       rowstart = i;
939d7d60843SJed Brown     }
940d7d60843SJed Brown     if (sendno != stash->nsendranks) SETERRQ2(stash->comm,PETSC_ERR_PLIB,"BTS counted %D sendranks, but %D sends",stash->nsendranks,sendno);
941d7d60843SJed Brown   }
942d7d60843SJed Brown 
9434b4eb8d3SJed Brown   /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new
9444b4eb8d3SJed Brown    * message or a dummy entry of some sort. */
9454b4eb8d3SJed Brown   if (mat->insertmode == INSERT_VALUES) {
946*23b7d1baSJed Brown     size_t i;
9474b4eb8d3SJed Brown     for (i=0; i<nblocks; i++) {
9484b4eb8d3SJed Brown       MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
9494b4eb8d3SJed Brown       sendblock_i->row = -(sendblock_i->row+1);
9504b4eb8d3SJed Brown     }
9514b4eb8d3SJed Brown   }
9524b4eb8d3SJed Brown 
95397da8949SJed Brown   if (stash->subset_off_proc && mat->subsetoffprocentries) {
95497da8949SJed Brown     PetscMPIInt i,tag;
95597da8949SJed Brown     ierr = PetscCommGetNewTag(stash->comm,&tag);CHKERRQ(ierr);
95697da8949SJed Brown     for (i=0; i<stash->nrecvranks; i++) {
95797da8949SJed Brown       ierr = MatStashBTSRecv_Private(stash->comm,&tag,stash->recvranks[i],&stash->recvhdr[i],&stash->recvreqs[i],stash);CHKERRQ(ierr);
95897da8949SJed Brown     }
95997da8949SJed Brown     for (i=0; i<stash->nsendranks; i++) {
96097da8949SJed Brown       ierr = MatStashBTSSend_Private(stash->comm,&tag,i,stash->sendranks[i],&stash->sendhdr[i],&stash->sendreqs[i],stash);CHKERRQ(ierr);
96197da8949SJed Brown     }
96297da8949SJed Brown     stash->use_status = PETSC_TRUE; /* Use count from message status. */
96397da8949SJed Brown   } else {
964e0ddb6e8SJed Brown     ierr = PetscCommBuildTwoSidedFReq(stash->comm,1,MPIU_INT,stash->nsendranks,stash->sendranks,(PetscInt*)stash->sendhdr,
965e0ddb6e8SJed Brown                                       &stash->nrecvranks,&stash->recvranks,(PetscInt*)&stash->recvhdr,1,&stash->sendreqs,&stash->recvreqs,
966d7d60843SJed Brown                                       MatStashBTSSend_Private,MatStashBTSRecv_Private,stash);CHKERRQ(ierr);
967b5ddc6f1SJed Brown     ierr = PetscMalloc2(stash->nrecvranks,&stash->some_indices,stash->nrecvranks,&stash->some_statuses);CHKERRQ(ierr);
96897da8949SJed Brown     stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */
96997da8949SJed Brown   }
970d7d60843SJed Brown 
971d7d60843SJed Brown   ierr = PetscSegBufferExtractInPlace(stash->segrecvframe,&stash->recvframes);CHKERRQ(ierr);
972d7d60843SJed Brown   stash->recvframe_active = NULL;
973d7d60843SJed Brown   stash->recvframe_i      = 0;
974d7d60843SJed Brown   stash->some_i           = 0;
975d7d60843SJed Brown   stash->some_count       = 0;
976d7d60843SJed Brown   stash->recvcount        = 0;
97797da8949SJed Brown   stash->subset_off_proc  = mat->subsetoffprocentries;
9784b4eb8d3SJed Brown   stash->insertmode       = &mat->insertmode;
979d7d60843SJed Brown   PetscFunctionReturn(0);
980d7d60843SJed Brown }
981d7d60843SJed Brown 
982d7d60843SJed Brown #undef __FUNCT__
983d7d60843SJed Brown #define __FUNCT__ "MatStashScatterGetMesg_BTS"
984d7d60843SJed Brown static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash,PetscMPIInt *n,PetscInt **row,PetscInt **col,PetscScalar **val,PetscInt *flg)
985d7d60843SJed Brown {
986d7d60843SJed Brown   PetscErrorCode ierr;
987d7d60843SJed Brown   MatStashBlock *block;
988d7d60843SJed Brown 
989d7d60843SJed Brown   PetscFunctionBegin;
990d7d60843SJed Brown   *flg = 0;
991d7d60843SJed Brown   while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) {
992d7d60843SJed Brown     if (stash->some_i == stash->some_count) {
993d7d60843SJed Brown       if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(0); /* Done */
994d7d60843SJed Brown       ierr = MPI_Waitsome(stash->nrecvranks,stash->recvreqs,&stash->some_count,stash->some_indices,stash->use_status?stash->some_statuses:MPI_STATUSES_IGNORE);CHKERRQ(ierr);
995d7d60843SJed Brown       stash->some_i = 0;
996d7d60843SJed Brown     }
997d7d60843SJed Brown     stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]];
998d7d60843SJed Brown     stash->recvframe_count = stash->recvframe_active->count; /* From header; maximum count */
999d7d60843SJed Brown     if (stash->use_status) { /* Count what was actually sent */
1000d7d60843SJed Brown       ierr = MPI_Get_count(&stash->some_statuses[stash->some_i],stash->blocktype,&stash->recvframe_count);CHKERRQ(ierr);
1001d7d60843SJed Brown     }
10024b4eb8d3SJed Brown     if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */
10034b4eb8d3SJed Brown       block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[0];
10044b4eb8d3SJed Brown       if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES;
10054b4eb8d3SJed Brown       if (PetscUnlikely(*stash->insertmode == INSERT_VALUES && block->row >= 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling INSERT_VALUES, but rank %d requested ADD_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]);
10064b4eb8d3SJed Brown       if (PetscUnlikely(*stash->insertmode == ADD_VALUES && block->row < 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling ADD_VALUES, but rank %d requested INSERT_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]);
10074b4eb8d3SJed Brown     }
1008d7d60843SJed Brown     stash->some_i++;
1009d7d60843SJed Brown     stash->recvcount++;
1010d7d60843SJed Brown     stash->recvframe_i = 0;
1011d7d60843SJed Brown   }
1012d7d60843SJed Brown   *n = 1;
1013d7d60843SJed Brown   block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[stash->recvframe_i*stash->blocktype_size];
10144b4eb8d3SJed Brown   if (block->row < 0) block->row = -(block->row + 1);
1015d7d60843SJed Brown   *row = &block->row;
1016d7d60843SJed Brown   *col = &block->col;
1017d7d60843SJed Brown   *val = block->vals;
1018d7d60843SJed Brown   stash->recvframe_i++;
1019d7d60843SJed Brown   *flg = 1;
1020d7d60843SJed Brown   PetscFunctionReturn(0);
1021d7d60843SJed Brown }
1022d7d60843SJed Brown 
1023d7d60843SJed Brown #undef __FUNCT__
1024d7d60843SJed Brown #define __FUNCT__ "MatStashScatterEnd_BTS"
1025d7d60843SJed Brown static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash)
1026d7d60843SJed Brown {
1027d7d60843SJed Brown   PetscErrorCode ierr;
1028d7d60843SJed Brown 
1029d7d60843SJed Brown   PetscFunctionBegin;
1030d7d60843SJed Brown   ierr = MPI_Waitall(stash->nsendranks,stash->sendreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
10313575f486SJed Brown   if (stash->subset_off_proc) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks  */
10323575f486SJed Brown     void *dummy;
10333575f486SJed Brown     ierr = PetscSegBufferExtractInPlace(stash->segrecvblocks,&dummy);CHKERRQ(ierr);
10343575f486SJed Brown   } else {                      /* No reuse, so collect everything. */
1035d7d60843SJed Brown     ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr);
103697da8949SJed Brown   }
1037d7d60843SJed Brown 
1038d7d60843SJed Brown   /* Now update nmaxold to be app 10% more than max n used, this way the
1039d7d60843SJed Brown      wastage of space is reduced the next time this stash is used.
1040d7d60843SJed Brown      Also update the oldmax, only if it increases */
1041d7d60843SJed Brown   if (stash->n) {
1042d7d60843SJed Brown     PetscInt bs2     = stash->bs*stash->bs;
1043d7d60843SJed Brown     PetscInt oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
1044d7d60843SJed Brown     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
1045d7d60843SJed Brown   }
1046d7d60843SJed Brown 
1047d7d60843SJed Brown   stash->nmax       = 0;
1048d7d60843SJed Brown   stash->n          = 0;
1049d7d60843SJed Brown   stash->reallocs   = -1;
1050d7d60843SJed Brown   stash->nprocessed = 0;
1051d7d60843SJed Brown 
1052d7d60843SJed Brown   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
1053d7d60843SJed Brown 
1054d7d60843SJed Brown   stash->space = 0;
1055d7d60843SJed Brown 
1056d7d60843SJed Brown   PetscFunctionReturn(0);
1057d7d60843SJed Brown }
1058d7d60843SJed Brown 
1059d7d60843SJed Brown #undef __FUNCT__
1060d7d60843SJed Brown #define __FUNCT__ "MatStashScatterDestroy_BTS"
1061d7d60843SJed Brown static PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash)
1062d7d60843SJed Brown {
1063d7d60843SJed Brown   PetscErrorCode ierr;
1064d7d60843SJed Brown 
1065d7d60843SJed Brown   PetscFunctionBegin;
1066d7d60843SJed Brown   ierr = PetscSegBufferDestroy(&stash->segsendblocks);CHKERRQ(ierr);
1067d7d60843SJed Brown   ierr = PetscSegBufferDestroy(&stash->segrecvframe);CHKERRQ(ierr);
1068d7d60843SJed Brown   stash->recvframes = NULL;
1069d7d60843SJed Brown   ierr = PetscSegBufferDestroy(&stash->segrecvblocks);CHKERRQ(ierr);
1070d7d60843SJed Brown   if (stash->blocktype != MPI_DATATYPE_NULL) {
1071d7d60843SJed Brown     ierr = MPI_Type_free(&stash->blocktype);CHKERRQ(ierr);
1072d7d60843SJed Brown   }
1073d7d60843SJed Brown   stash->nsendranks = 0;
1074d7d60843SJed Brown   stash->nrecvranks = 0;
1075d7d60843SJed Brown   ierr = PetscFree3(stash->sendranks,stash->sendhdr,stash->sendframes);CHKERRQ(ierr);
1076d7d60843SJed Brown   ierr = PetscFree(stash->sendreqs);CHKERRQ(ierr);
1077d7d60843SJed Brown   ierr = PetscFree(stash->recvreqs);CHKERRQ(ierr);
1078d7d60843SJed Brown   ierr = PetscFree(stash->recvranks);CHKERRQ(ierr);
1079d7d60843SJed Brown   ierr = PetscFree(stash->recvhdr);CHKERRQ(ierr);
1080d7d60843SJed Brown   ierr = PetscFree2(stash->some_indices,stash->some_statuses);CHKERRQ(ierr);
1081d7d60843SJed Brown   PetscFunctionReturn(0);
1082d7d60843SJed Brown }
1083