xref: /petsc/src/mat/utils/matstash.c (revision 1667be421921b5a18c088baf27915dd8c3ba83b4)
12d5177cdSBarry Smith 
2af0996ceSBarry Smith #include <petsc/private/matimpl.h>
35bd3b8fbSHong Zhang 
4bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE   10000
54c1ff481SSatish Balay 
6ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterBegin_Ref(Mat,MatStash*,PetscInt*);
7ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
8ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
9d7d60843SJed Brown static PetscErrorCode MatStashScatterBegin_BTS(Mat,MatStash*,PetscInt*);
10d7d60843SJed Brown static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
11d7d60843SJed Brown static PetscErrorCode MatStashScatterEnd_BTS(MatStash*);
12d7d60843SJed Brown static PetscErrorCode MatStashScatterDestroy_BTS(MatStash*);
13d7d60843SJed Brown 
149417f4adSLois Curfman McInnes /*
158798bf22SSatish Balay   MatStashCreate_Private - Creates a stash,currently used for all the parallel
164c1ff481SSatish Balay   matrix implementations. The stash is where elements of a matrix destined
174c1ff481SSatish Balay   to be stored on other processors are kept until matrix assembly is done.
189417f4adSLois Curfman McInnes 
194c1ff481SSatish Balay   This is a simple minded stash. Simply adds entries to end of stash.
204c1ff481SSatish Balay 
214c1ff481SSatish Balay   Input Parameters:
224c1ff481SSatish Balay   comm - communicator, required for scatters.
234c1ff481SSatish Balay   bs   - stash block size. used when stashing blocks of values
244c1ff481SSatish Balay 
254c1ff481SSatish Balay   Output Parameters:
264c1ff481SSatish Balay   stash    - the newly created stash
279417f4adSLois Curfman McInnes */
28c1ac3661SBarry Smith PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
299417f4adSLois Curfman McInnes {
30dfbe8321SBarry Smith   PetscErrorCode ierr;
31533163c2SBarry Smith   PetscInt       max,*opt,nopt,i;
32ace3abfcSBarry Smith   PetscBool      flg;
33bc5ccf88SSatish Balay 
343a40ed3dSBarry Smith   PetscFunctionBegin;
35bc5ccf88SSatish Balay   /* Require 2 tags,get the second using PetscCommGetNewTag() */
36752ec6e0SSatish Balay   stash->comm = comm;
378865f1eaSKarl Rupp 
38752ec6e0SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
39a2d1c673SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
40a2d1c673SSatish Balay   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
41a2d1c673SSatish Balay   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
42785e854fSJed Brown   ierr = PetscMalloc1(2*stash->size,&stash->flg_v);CHKERRQ(ierr);
43533163c2SBarry Smith   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
44533163c2SBarry Smith 
45bc5ccf88SSatish Balay 
46434d7ff9SSatish Balay   nopt = stash->size;
47785e854fSJed Brown   ierr = PetscMalloc1(nopt,&opt);CHKERRQ(ierr);
48c5929fdfSBarry Smith   ierr = PetscOptionsGetIntArray(NULL,NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
49434d7ff9SSatish Balay   if (flg) {
50434d7ff9SSatish Balay     if (nopt == 1)                max = opt[0];
51434d7ff9SSatish Balay     else if (nopt == stash->size) max = opt[stash->rank];
52434d7ff9SSatish Balay     else if (stash->rank < nopt)  max = opt[stash->rank];
53f4ab19daSSatish Balay     else                          max = 0; /* Use default */
54434d7ff9SSatish Balay     stash->umax = max;
55434d7ff9SSatish Balay   } else {
56434d7ff9SSatish Balay     stash->umax = 0;
57434d7ff9SSatish Balay   }
58606d414cSSatish Balay   ierr = PetscFree(opt);CHKERRQ(ierr);
594c1ff481SSatish Balay   if (bs <= 0) bs = 1;
60a2d1c673SSatish Balay 
614c1ff481SSatish Balay   stash->bs         = bs;
629417f4adSLois Curfman McInnes   stash->nmax       = 0;
63434d7ff9SSatish Balay   stash->oldnmax    = 0;
649417f4adSLois Curfman McInnes   stash->n          = 0;
654c1ff481SSatish Balay   stash->reallocs   = -1;
6675cae7c1SHong Zhang   stash->space_head = 0;
6775cae7c1SHong Zhang   stash->space      = 0;
689417f4adSLois Curfman McInnes 
69bc5ccf88SSatish Balay   stash->send_waits  = 0;
70bc5ccf88SSatish Balay   stash->recv_waits  = 0;
71a2d1c673SSatish Balay   stash->send_status = 0;
72bc5ccf88SSatish Balay   stash->nsends      = 0;
73bc5ccf88SSatish Balay   stash->nrecvs      = 0;
74bc5ccf88SSatish Balay   stash->svalues     = 0;
75bc5ccf88SSatish Balay   stash->rvalues     = 0;
76563fb871SSatish Balay   stash->rindices    = 0;
77a2d1c673SSatish Balay   stash->nprocessed  = 0;
7867318a8aSJed Brown   stash->reproduce   = PETSC_FALSE;
79d7d60843SJed Brown   stash->blocktype   = MPI_DATATYPE_NULL;
808865f1eaSKarl Rupp 
81c5929fdfSBarry Smith   ierr = PetscOptionsGetBool(NULL,NULL,"-matstash_reproduce",&stash->reproduce,NULL);CHKERRQ(ierr);
82*1667be42SBarry Smith #if !defined(PETSC_HAVE_MPIUNI)
83b30fb036SBarry Smith   ierr = PetscOptionsGetBool(NULL,NULL,"-matstash_legacy",&flg,NULL);CHKERRQ(ierr);
84b30fb036SBarry Smith   if (!flg) {
85d7d60843SJed Brown     stash->ScatterBegin   = MatStashScatterBegin_BTS;
86d7d60843SJed Brown     stash->ScatterGetMesg = MatStashScatterGetMesg_BTS;
87d7d60843SJed Brown     stash->ScatterEnd     = MatStashScatterEnd_BTS;
88d7d60843SJed Brown     stash->ScatterDestroy = MatStashScatterDestroy_BTS;
89ac2b2aa0SJed Brown   } else {
90*1667be42SBarry Smith #endif
91ac2b2aa0SJed Brown     stash->ScatterBegin   = MatStashScatterBegin_Ref;
92ac2b2aa0SJed Brown     stash->ScatterGetMesg = MatStashScatterGetMesg_Ref;
93ac2b2aa0SJed Brown     stash->ScatterEnd     = MatStashScatterEnd_Ref;
94ac2b2aa0SJed Brown     stash->ScatterDestroy = NULL;
95*1667be42SBarry Smith #if !defined(PETSC_HAVE_MPIUNI)
96ac2b2aa0SJed Brown   }
97*1667be42SBarry Smith #endif
983a40ed3dSBarry Smith   PetscFunctionReturn(0);
999417f4adSLois Curfman McInnes }
1009417f4adSLois Curfman McInnes 
1014c1ff481SSatish Balay /*
1028798bf22SSatish Balay    MatStashDestroy_Private - Destroy the stash
1034c1ff481SSatish Balay */
104dfbe8321SBarry Smith PetscErrorCode MatStashDestroy_Private(MatStash *stash)
1059417f4adSLois Curfman McInnes {
106dfbe8321SBarry Smith   PetscErrorCode ierr;
107a2d1c673SSatish Balay 
108bc5ccf88SSatish Balay   PetscFunctionBegin;
1096bf464f9SBarry Smith   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
110ac2b2aa0SJed Brown   if (stash->ScatterDestroy) {ierr = (*stash->ScatterDestroy)(stash);CHKERRQ(ierr);}
1118865f1eaSKarl Rupp 
11282740460SHong Zhang   stash->space = 0;
1138865f1eaSKarl Rupp 
114533163c2SBarry Smith   ierr = PetscFree(stash->flg_v);CHKERRQ(ierr);
115bc5ccf88SSatish Balay   PetscFunctionReturn(0);
116bc5ccf88SSatish Balay }
117bc5ccf88SSatish Balay 
1184c1ff481SSatish Balay /*
11967318a8aSJed Brown    MatStashScatterEnd_Private - This is called as the final stage of
1204c1ff481SSatish Balay    scatter. The final stages of message passing is done here, and
12167318a8aSJed Brown    all the memory used for message passing is cleaned up. This
1224c1ff481SSatish Balay    routine also resets the stash, and deallocates the memory used
1234c1ff481SSatish Balay    for the stash. It also keeps track of the current memory usage
1244c1ff481SSatish Balay    so that the same value can be used the next time through.
1254c1ff481SSatish Balay */
126dfbe8321SBarry Smith PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
127bc5ccf88SSatish Balay {
1286849ba73SBarry Smith   PetscErrorCode ierr;
129ac2b2aa0SJed Brown 
130ac2b2aa0SJed Brown   PetscFunctionBegin;
131ac2b2aa0SJed Brown   ierr = (*stash->ScatterEnd)(stash);CHKERRQ(ierr);
132ac2b2aa0SJed Brown   PetscFunctionReturn(0);
133ac2b2aa0SJed Brown }
134ac2b2aa0SJed Brown 
135ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash)
136ac2b2aa0SJed Brown {
137ac2b2aa0SJed Brown   PetscErrorCode ierr;
138533163c2SBarry Smith   PetscInt       nsends=stash->nsends,bs2,oldnmax,i;
139a2d1c673SSatish Balay   MPI_Status     *send_status;
140a2d1c673SSatish Balay 
1413a40ed3dSBarry Smith   PetscFunctionBegin;
142533163c2SBarry Smith   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
143a2d1c673SSatish Balay   /* wait on sends */
144a2d1c673SSatish Balay   if (nsends) {
145785e854fSJed Brown     ierr = PetscMalloc1(2*nsends,&send_status);CHKERRQ(ierr);
146a2d1c673SSatish Balay     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
147606d414cSSatish Balay     ierr = PetscFree(send_status);CHKERRQ(ierr);
148a2d1c673SSatish Balay   }
149a2d1c673SSatish Balay 
150c0c58ca7SSatish Balay   /* Now update nmaxold to be app 10% more than max n used, this way the
151434d7ff9SSatish Balay      wastage of space is reduced the next time this stash is used.
152434d7ff9SSatish Balay      Also update the oldmax, only if it increases */
153b9b97703SBarry Smith   if (stash->n) {
15494b769a5SSatish Balay     bs2     = stash->bs*stash->bs;
1558a9378f0SSatish Balay     oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
156434d7ff9SSatish Balay     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
157b9b97703SBarry Smith   }
158434d7ff9SSatish Balay 
159d07ff455SSatish Balay   stash->nmax       = 0;
160d07ff455SSatish Balay   stash->n          = 0;
1614c1ff481SSatish Balay   stash->reallocs   = -1;
162a2d1c673SSatish Balay   stash->nprocessed = 0;
1638865f1eaSKarl Rupp 
1646bf464f9SBarry Smith   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
1658865f1eaSKarl Rupp 
16682740460SHong Zhang   stash->space = 0;
1678865f1eaSKarl Rupp 
168606d414cSSatish Balay   ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
169606d414cSSatish Balay   ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
170c05d87d6SBarry Smith   ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr);
171c05d87d6SBarry Smith   ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr);
172606d414cSSatish Balay   ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
173c05d87d6SBarry Smith   ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr);
174563fb871SSatish Balay   ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
1753a40ed3dSBarry Smith   PetscFunctionReturn(0);
1769417f4adSLois Curfman McInnes }
1779417f4adSLois Curfman McInnes 
1784c1ff481SSatish Balay /*
1798798bf22SSatish Balay    MatStashGetInfo_Private - Gets the relavant statistics of the stash
1804c1ff481SSatish Balay 
1814c1ff481SSatish Balay    Input Parameters:
1824c1ff481SSatish Balay    stash    - the stash
18394b769a5SSatish Balay    nstash   - the size of the stash. Indicates the number of values stored.
1844c1ff481SSatish Balay    reallocs - the number of additional mallocs incurred.
1854c1ff481SSatish Balay 
1864c1ff481SSatish Balay */
187c1ac3661SBarry Smith PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
18897530c3fSBarry Smith {
189c1ac3661SBarry Smith   PetscInt bs2 = stash->bs*stash->bs;
19094b769a5SSatish Balay 
1913a40ed3dSBarry Smith   PetscFunctionBegin;
1921ecfd215SBarry Smith   if (nstash) *nstash = stash->n*bs2;
1931ecfd215SBarry Smith   if (reallocs) {
194434d7ff9SSatish Balay     if (stash->reallocs < 0) *reallocs = 0;
195434d7ff9SSatish Balay     else                     *reallocs = stash->reallocs;
1961ecfd215SBarry Smith   }
197bc5ccf88SSatish Balay   PetscFunctionReturn(0);
198bc5ccf88SSatish Balay }
1994c1ff481SSatish Balay 
2004c1ff481SSatish Balay /*
2018798bf22SSatish Balay    MatStashSetInitialSize_Private - Sets the initial size of the stash
2024c1ff481SSatish Balay 
2034c1ff481SSatish Balay    Input Parameters:
2044c1ff481SSatish Balay    stash  - the stash
2054c1ff481SSatish Balay    max    - the value that is used as the max size of the stash.
2064c1ff481SSatish Balay             this value is used while allocating memory.
2074c1ff481SSatish Balay */
208c1ac3661SBarry Smith PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
209bc5ccf88SSatish Balay {
210bc5ccf88SSatish Balay   PetscFunctionBegin;
211434d7ff9SSatish Balay   stash->umax = max;
2123a40ed3dSBarry Smith   PetscFunctionReturn(0);
21397530c3fSBarry Smith }
21497530c3fSBarry Smith 
2158798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called
2164c1ff481SSatish Balay    when the space in the stash is not sufficient to add the new values
2174c1ff481SSatish Balay    being inserted into the stash.
2184c1ff481SSatish Balay 
2194c1ff481SSatish Balay    Input Parameters:
2204c1ff481SSatish Balay    stash - the stash
2214c1ff481SSatish Balay    incr  - the minimum increase requested
2224c1ff481SSatish Balay 
2234c1ff481SSatish Balay    Notes:
2244c1ff481SSatish Balay    This routine doubles the currently used memory.
2254c1ff481SSatish Balay  */
226c1ac3661SBarry Smith static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
2279417f4adSLois Curfman McInnes {
2286849ba73SBarry Smith   PetscErrorCode ierr;
2295bd3b8fbSHong Zhang   PetscInt       newnmax,bs2= stash->bs*stash->bs;
2309417f4adSLois Curfman McInnes 
2313a40ed3dSBarry Smith   PetscFunctionBegin;
2329417f4adSLois Curfman McInnes   /* allocate a larger stash */
233c481ceb5SSatish Balay   if (!stash->oldnmax && !stash->nmax) { /* new stash */
234434d7ff9SSatish Balay     if (stash->umax)                  newnmax = stash->umax/bs2;
235434d7ff9SSatish Balay     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
236c481ceb5SSatish Balay   } else if (!stash->nmax) { /* resuing stash */
237434d7ff9SSatish Balay     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
238434d7ff9SSatish Balay     else                              newnmax = stash->oldnmax/bs2;
239434d7ff9SSatish Balay   } else                              newnmax = stash->nmax*2;
2404c1ff481SSatish Balay   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
241d07ff455SSatish Balay 
24275cae7c1SHong Zhang   /* Get a MatStashSpace and attach it to stash */
24375cae7c1SHong Zhang   ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr);
244b087b6d6SSatish Balay   if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
245b087b6d6SSatish Balay     stash->space_head = stash->space;
24675cae7c1SHong Zhang   }
247b087b6d6SSatish Balay 
248bc5ccf88SSatish Balay   stash->reallocs++;
24975cae7c1SHong Zhang   stash->nmax = newnmax;
250bc5ccf88SSatish Balay   PetscFunctionReturn(0);
251bc5ccf88SSatish Balay }
252bc5ccf88SSatish Balay /*
2538798bf22SSatish Balay   MatStashValuesRow_Private - inserts values into the stash. This function
2544c1ff481SSatish Balay   expects the values to be roworiented. Multiple columns belong to the same row
2554c1ff481SSatish Balay   can be inserted with a single call to this function.
2564c1ff481SSatish Balay 
2574c1ff481SSatish Balay   Input Parameters:
2584c1ff481SSatish Balay   stash  - the stash
2594c1ff481SSatish Balay   row    - the global row correspoiding to the values
2604c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
2614c1ff481SSatish Balay   idxn   - the global column indices corresponding to each of the values.
2624c1ff481SSatish Balay   values - the values inserted
263bc5ccf88SSatish Balay */
264ace3abfcSBarry Smith PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries)
265bc5ccf88SSatish Balay {
266dfbe8321SBarry Smith   PetscErrorCode     ierr;
267b400d20cSBarry Smith   PetscInt           i,k,cnt = 0;
26875cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
269bc5ccf88SSatish Balay 
270bc5ccf88SSatish Balay   PetscFunctionBegin;
2714c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
27275cae7c1SHong Zhang   if (!space || space->local_remaining < n) {
2738798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
2749417f4adSLois Curfman McInnes   }
27575cae7c1SHong Zhang   space = stash->space;
27675cae7c1SHong Zhang   k     = space->local_used;
2774c1ff481SSatish Balay   for (i=0; i<n; i++) {
27888c3974fSBarry Smith     if (ignorezeroentries && (values[i] == 0.0)) continue;
27975cae7c1SHong Zhang     space->idx[k] = row;
28075cae7c1SHong Zhang     space->idy[k] = idxn[i];
28175cae7c1SHong Zhang     space->val[k] = values[i];
28275cae7c1SHong Zhang     k++;
283b400d20cSBarry Smith     cnt++;
2849417f4adSLois Curfman McInnes   }
285b400d20cSBarry Smith   stash->n               += cnt;
286b400d20cSBarry Smith   space->local_used      += cnt;
287b400d20cSBarry Smith   space->local_remaining -= cnt;
288a2d1c673SSatish Balay   PetscFunctionReturn(0);
289a2d1c673SSatish Balay }
29075cae7c1SHong Zhang 
2914c1ff481SSatish Balay /*
2928798bf22SSatish Balay   MatStashValuesCol_Private - inserts values into the stash. This function
2934c1ff481SSatish Balay   expects the values to be columnoriented. Multiple columns belong to the same row
2944c1ff481SSatish Balay   can be inserted with a single call to this function.
295a2d1c673SSatish Balay 
2964c1ff481SSatish Balay   Input Parameters:
2974c1ff481SSatish Balay   stash   - the stash
2984c1ff481SSatish Balay   row     - the global row correspoiding to the values
2994c1ff481SSatish Balay   n       - the number of elements inserted. All elements belong to the above row.
3004c1ff481SSatish Balay   idxn    - the global column indices corresponding to each of the values.
3014c1ff481SSatish Balay   values  - the values inserted
3024c1ff481SSatish Balay   stepval - the consecutive values are sepated by a distance of stepval.
3034c1ff481SSatish Balay             this happens because the input is columnoriented.
3044c1ff481SSatish Balay */
305ace3abfcSBarry Smith PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries)
306a2d1c673SSatish Balay {
307dfbe8321SBarry Smith   PetscErrorCode     ierr;
30850e9ab7cSBarry Smith   PetscInt           i,k,cnt = 0;
30975cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
310a2d1c673SSatish Balay 
3114c1ff481SSatish Balay   PetscFunctionBegin;
3124c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
31375cae7c1SHong Zhang   if (!space || space->local_remaining < n) {
3148798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
3154c1ff481SSatish Balay   }
31675cae7c1SHong Zhang   space = stash->space;
31775cae7c1SHong Zhang   k     = space->local_used;
3184c1ff481SSatish Balay   for (i=0; i<n; i++) {
31988c3974fSBarry Smith     if (ignorezeroentries && (values[i*stepval] == 0.0)) continue;
32075cae7c1SHong Zhang     space->idx[k] = row;
32175cae7c1SHong Zhang     space->idy[k] = idxn[i];
32275cae7c1SHong Zhang     space->val[k] = values[i*stepval];
32375cae7c1SHong Zhang     k++;
324b400d20cSBarry Smith     cnt++;
3254c1ff481SSatish Balay   }
326b400d20cSBarry Smith   stash->n               += cnt;
327b400d20cSBarry Smith   space->local_used      += cnt;
328b400d20cSBarry Smith   space->local_remaining -= cnt;
3294c1ff481SSatish Balay   PetscFunctionReturn(0);
3304c1ff481SSatish Balay }
3314c1ff481SSatish Balay 
3324c1ff481SSatish Balay /*
3338798bf22SSatish Balay   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
3344c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3354c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3364c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3374c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3384c1ff481SSatish Balay 
3394c1ff481SSatish Balay   Input Parameters:
3404c1ff481SSatish Balay   stash  - the stash
3414c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3424c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3434c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3444c1ff481SSatish Balay            values. Each block is of size bs*bs.
3454c1ff481SSatish Balay   values - the values inserted
3464c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3474c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3484c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3494c1ff481SSatish Balay */
35054f21887SBarry Smith PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
3514c1ff481SSatish Balay {
352dfbe8321SBarry Smith   PetscErrorCode     ierr;
35375cae7c1SHong Zhang   PetscInt           i,j,k,bs2,bs=stash->bs,l;
35454f21887SBarry Smith   const PetscScalar  *vals;
35554f21887SBarry Smith   PetscScalar        *array;
35675cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
357a2d1c673SSatish Balay 
358a2d1c673SSatish Balay   PetscFunctionBegin;
35975cae7c1SHong Zhang   if (!space || space->local_remaining < n) {
3608798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
361a2d1c673SSatish Balay   }
36275cae7c1SHong Zhang   space = stash->space;
36375cae7c1SHong Zhang   l     = space->local_used;
36475cae7c1SHong Zhang   bs2   = bs*bs;
3654c1ff481SSatish Balay   for (i=0; i<n; i++) {
36675cae7c1SHong Zhang     space->idx[l] = row;
36775cae7c1SHong Zhang     space->idy[l] = idxn[i];
36875cae7c1SHong Zhang     /* Now copy over the block of values. Store the values column oriented.
36975cae7c1SHong Zhang        This enables inserting multiple blocks belonging to a row with a single
37075cae7c1SHong Zhang        funtion call */
37175cae7c1SHong Zhang     array = space->val + bs2*l;
37275cae7c1SHong Zhang     vals  = values + idx*bs2*n + bs*i;
37375cae7c1SHong Zhang     for (j=0; j<bs; j++) {
37475cae7c1SHong Zhang       for (k=0; k<bs; k++) array[k*bs] = vals[k];
37575cae7c1SHong Zhang       array++;
37675cae7c1SHong Zhang       vals += cmax*bs;
37775cae7c1SHong Zhang     }
37875cae7c1SHong Zhang     l++;
379a2d1c673SSatish Balay   }
3805bd3b8fbSHong Zhang   stash->n               += n;
38175cae7c1SHong Zhang   space->local_used      += n;
38275cae7c1SHong Zhang   space->local_remaining -= n;
3834c1ff481SSatish Balay   PetscFunctionReturn(0);
3844c1ff481SSatish Balay }
3854c1ff481SSatish Balay 
3864c1ff481SSatish Balay /*
3878798bf22SSatish Balay   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
3884c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3894c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3904c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3914c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3924c1ff481SSatish Balay 
3934c1ff481SSatish Balay   Input Parameters:
3944c1ff481SSatish Balay   stash  - the stash
3954c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3964c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3974c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3984c1ff481SSatish Balay            values. Each block is of size bs*bs.
3994c1ff481SSatish Balay   values - the values inserted
4004c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
4014c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
4024c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
4034c1ff481SSatish Balay */
40454f21887SBarry Smith PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
4054c1ff481SSatish Balay {
406dfbe8321SBarry Smith   PetscErrorCode     ierr;
40775cae7c1SHong Zhang   PetscInt           i,j,k,bs2,bs=stash->bs,l;
40854f21887SBarry Smith   const PetscScalar  *vals;
40954f21887SBarry Smith   PetscScalar        *array;
41075cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
4114c1ff481SSatish Balay 
4124c1ff481SSatish Balay   PetscFunctionBegin;
41375cae7c1SHong Zhang   if (!space || space->local_remaining < n) {
4148798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
4154c1ff481SSatish Balay   }
41675cae7c1SHong Zhang   space = stash->space;
41775cae7c1SHong Zhang   l     = space->local_used;
41875cae7c1SHong Zhang   bs2   = bs*bs;
4194c1ff481SSatish Balay   for (i=0; i<n; i++) {
42075cae7c1SHong Zhang     space->idx[l] = row;
42175cae7c1SHong Zhang     space->idy[l] = idxn[i];
42275cae7c1SHong Zhang     /* Now copy over the block of values. Store the values column oriented.
42375cae7c1SHong Zhang      This enables inserting multiple blocks belonging to a row with a single
42475cae7c1SHong Zhang      funtion call */
42575cae7c1SHong Zhang     array = space->val + bs2*l;
42675cae7c1SHong Zhang     vals  = values + idx*bs2*n + bs*i;
42775cae7c1SHong Zhang     for (j=0; j<bs; j++) {
4288865f1eaSKarl Rupp       for (k=0; k<bs; k++) array[k] = vals[k];
42975cae7c1SHong Zhang       array += bs;
43075cae7c1SHong Zhang       vals  += rmax*bs;
43175cae7c1SHong Zhang     }
4325bd3b8fbSHong Zhang     l++;
433a2d1c673SSatish Balay   }
4345bd3b8fbSHong Zhang   stash->n               += n;
43575cae7c1SHong Zhang   space->local_used      += n;
43675cae7c1SHong Zhang   space->local_remaining -= n;
4373a40ed3dSBarry Smith   PetscFunctionReturn(0);
4389417f4adSLois Curfman McInnes }
4394c1ff481SSatish Balay /*
4408798bf22SSatish Balay   MatStashScatterBegin_Private - Initiates the transfer of values to the
4414c1ff481SSatish Balay   correct owners. This function goes through the stash, and check the
4424c1ff481SSatish Balay   owners of each stashed value, and sends the values off to the owner
4434c1ff481SSatish Balay   processors.
444bc5ccf88SSatish Balay 
4454c1ff481SSatish Balay   Input Parameters:
4464c1ff481SSatish Balay   stash  - the stash
4474c1ff481SSatish Balay   owners - an array of size 'no-of-procs' which gives the ownership range
4484c1ff481SSatish Balay            for each node.
4494c1ff481SSatish Balay 
4504c1ff481SSatish Balay   Notes: The 'owners' array in the cased of the blocked-stash has the
4514c1ff481SSatish Balay   ranges specified blocked global indices, and for the regular stash in
4524c1ff481SSatish Balay   the proper global indices.
4534c1ff481SSatish Balay */
4541e2582c4SBarry Smith PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners)
455bc5ccf88SSatish Balay {
456ac2b2aa0SJed Brown   PetscErrorCode ierr;
457ac2b2aa0SJed Brown 
458ac2b2aa0SJed Brown   PetscFunctionBegin;
459ac2b2aa0SJed Brown   ierr = (*stash->ScatterBegin)(mat,stash,owners);CHKERRQ(ierr);
460ac2b2aa0SJed Brown   PetscFunctionReturn(0);
461ac2b2aa0SJed Brown }
462ac2b2aa0SJed Brown 
463ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterBegin_Ref(Mat mat,MatStash *stash,PetscInt *owners)
464ac2b2aa0SJed Brown {
465c1ac3661SBarry Smith   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
466fe09c992SBarry Smith   PetscInt           size=stash->size,nsends;
4676849ba73SBarry Smith   PetscErrorCode     ierr;
46875cae7c1SHong Zhang   PetscInt           count,*sindices,**rindices,i,j,idx,lastidx,l;
46954f21887SBarry Smith   PetscScalar        **rvalues,*svalues;
470bc5ccf88SSatish Balay   MPI_Comm           comm = stash->comm;
471563fb871SSatish Balay   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
47276ec1555SBarry Smith   PetscMPIInt        *sizes,*nlengths,nreceives;
4735bd3b8fbSHong Zhang   PetscInt           *sp_idx,*sp_idy;
47454f21887SBarry Smith   PetscScalar        *sp_val;
4755bd3b8fbSHong Zhang   PetscMatStashSpace space,space_next;
476bc5ccf88SSatish Balay 
477bc5ccf88SSatish Balay   PetscFunctionBegin;
4784b4eb8d3SJed Brown   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
4794b4eb8d3SJed Brown     InsertMode addv;
480b2566f29SBarry Smith     ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
4814b4eb8d3SJed Brown     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
4824b4eb8d3SJed Brown     mat->insertmode = addv; /* in case this processor had no cache */
4834b4eb8d3SJed Brown   }
4844b4eb8d3SJed Brown 
4854c1ff481SSatish Balay   bs2 = stash->bs*stash->bs;
48675cae7c1SHong Zhang 
487bc5ccf88SSatish Balay   /*  first count number of contributors to each processor */
488037dbc42SBarry Smith   ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr);
4891795a4d1SJed Brown   ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr);
490037dbc42SBarry Smith   ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr);
491a2d1c673SSatish Balay 
49275cae7c1SHong Zhang   i       = j    = 0;
4937357eb19SBarry Smith   lastidx = -1;
4945bd3b8fbSHong Zhang   space   = stash->space_head;
4956c4ed002SBarry Smith   while (space) {
49675cae7c1SHong Zhang     space_next = space->next;
4975bd3b8fbSHong Zhang     sp_idx     = space->idx;
49875cae7c1SHong Zhang     for (l=0; l<space->local_used; l++) {
4997357eb19SBarry Smith       /* if indices are NOT locally sorted, need to start search at the beginning */
5005bd3b8fbSHong Zhang       if (lastidx > (idx = sp_idx[l])) j = 0;
5017357eb19SBarry Smith       lastidx = idx;
5027357eb19SBarry Smith       for (; j<size; j++) {
5034c1ff481SSatish Balay         if (idx >= owners[j] && idx < owners[j+1]) {
504563fb871SSatish Balay           nlengths[j]++; owner[i] = j; break;
505bc5ccf88SSatish Balay         }
506bc5ccf88SSatish Balay       }
50775cae7c1SHong Zhang       i++;
50875cae7c1SHong Zhang     }
50975cae7c1SHong Zhang     space = space_next;
510bc5ccf88SSatish Balay   }
511563fb871SSatish Balay   /* Now check what procs get messages - and compute nsends. */
512563fb871SSatish Balay   for (i=0, nsends=0; i<size; i++) {
5138865f1eaSKarl Rupp     if (nlengths[i]) {
51476ec1555SBarry Smith       sizes[i] = 1; nsends++;
5158865f1eaSKarl Rupp     }
516563fb871SSatish Balay   }
517bc5ccf88SSatish Balay 
51854f21887SBarry Smith   {PetscMPIInt *onodes,*olengths;
519563fb871SSatish Balay    /* Determine the number of messages to expect, their lengths, from from-ids */
52076ec1555SBarry Smith    ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr);
521563fb871SSatish Balay    ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
522563fb871SSatish Balay    /* since clubbing row,col - lengths are multiplied by 2 */
523563fb871SSatish Balay    for (i=0; i<nreceives; i++) olengths[i] *=2;
524563fb871SSatish Balay    ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
525563fb871SSatish Balay    /* values are size 'bs2' lengths (and remove earlier factor 2 */
526563fb871SSatish Balay    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
527563fb871SSatish Balay    ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
528563fb871SSatish Balay    ierr = PetscFree(onodes);CHKERRQ(ierr);
5298865f1eaSKarl Rupp    ierr = PetscFree(olengths);CHKERRQ(ierr);}
530bc5ccf88SSatish Balay 
531bc5ccf88SSatish Balay   /* do sends:
532bc5ccf88SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
533bc5ccf88SSatish Balay          the ith processor
534bc5ccf88SSatish Balay   */
535dcca6d9dSJed Brown   ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr);
536785e854fSJed Brown   ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr);
537dcca6d9dSJed Brown   ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr);
538a2d1c673SSatish Balay   /* use 2 sends the first with all_a, the next with all_i and all_j */
539bc5ccf88SSatish Balay   startv[0] = 0; starti[0] = 0;
540bc5ccf88SSatish Balay   for (i=1; i<size; i++) {
541563fb871SSatish Balay     startv[i] = startv[i-1] + nlengths[i-1];
542533163c2SBarry Smith     starti[i] = starti[i-1] + 2*nlengths[i-1];
543bc5ccf88SSatish Balay   }
54475cae7c1SHong Zhang 
54575cae7c1SHong Zhang   i     = 0;
5465bd3b8fbSHong Zhang   space = stash->space_head;
5476c4ed002SBarry Smith   while (space) {
54875cae7c1SHong Zhang     space_next = space->next;
5495bd3b8fbSHong Zhang     sp_idx     = space->idx;
5505bd3b8fbSHong Zhang     sp_idy     = space->idy;
5515bd3b8fbSHong Zhang     sp_val     = space->val;
55275cae7c1SHong Zhang     for (l=0; l<space->local_used; l++) {
553bc5ccf88SSatish Balay       j = owner[i];
554a2d1c673SSatish Balay       if (bs2 == 1) {
5555bd3b8fbSHong Zhang         svalues[startv[j]] = sp_val[l];
556a2d1c673SSatish Balay       } else {
557c1ac3661SBarry Smith         PetscInt    k;
55854f21887SBarry Smith         PetscScalar *buf1,*buf2;
5594c1ff481SSatish Balay         buf1 = svalues+bs2*startv[j];
560b087b6d6SSatish Balay         buf2 = space->val + bs2*l;
5618865f1eaSKarl Rupp         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
562a2d1c673SSatish Balay       }
5635bd3b8fbSHong Zhang       sindices[starti[j]]             = sp_idx[l];
5645bd3b8fbSHong Zhang       sindices[starti[j]+nlengths[j]] = sp_idy[l];
565bc5ccf88SSatish Balay       startv[j]++;
566bc5ccf88SSatish Balay       starti[j]++;
56775cae7c1SHong Zhang       i++;
56875cae7c1SHong Zhang     }
56975cae7c1SHong Zhang     space = space_next;
570bc5ccf88SSatish Balay   }
571bc5ccf88SSatish Balay   startv[0] = 0;
5728865f1eaSKarl Rupp   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
573e5d0e772SSatish Balay 
574bc5ccf88SSatish Balay   for (i=0,count=0; i<size; i++) {
57576ec1555SBarry Smith     if (sizes[i]) {
576563fb871SSatish Balay       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
577a77337e4SBarry Smith       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
578bc5ccf88SSatish Balay     }
579b85c94c3SSatish Balay   }
5806cf91177SBarry Smith #if defined(PETSC_USE_INFO)
58193157e10SBarry Smith   ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr);
582e5d0e772SSatish Balay   for (i=0; i<size; i++) {
58376ec1555SBarry Smith     if (sizes[i]) {
58430c47e72SSatish Balay       ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
585e5d0e772SSatish Balay     }
586e5d0e772SSatish Balay   }
587e5d0e772SSatish Balay #endif
588c05d87d6SBarry Smith   ierr = PetscFree(nlengths);CHKERRQ(ierr);
589606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
590c05d87d6SBarry Smith   ierr = PetscFree2(startv,starti);CHKERRQ(ierr);
59176ec1555SBarry Smith   ierr = PetscFree(sizes);CHKERRQ(ierr);
592a2d1c673SSatish Balay 
593563fb871SSatish Balay   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
594785e854fSJed Brown   ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr);
595563fb871SSatish Balay 
596563fb871SSatish Balay   for (i=0; i<nreceives; i++) {
597563fb871SSatish Balay     recv_waits[2*i]   = recv_waits1[i];
598563fb871SSatish Balay     recv_waits[2*i+1] = recv_waits2[i];
599563fb871SSatish Balay   }
600563fb871SSatish Balay   stash->recv_waits = recv_waits;
6018865f1eaSKarl Rupp 
602563fb871SSatish Balay   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
603563fb871SSatish Balay   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
604563fb871SSatish Balay 
605c05d87d6SBarry Smith   stash->svalues         = svalues;
606c05d87d6SBarry Smith   stash->sindices        = sindices;
607c05d87d6SBarry Smith   stash->rvalues         = rvalues;
608c05d87d6SBarry Smith   stash->rindices        = rindices;
609c05d87d6SBarry Smith   stash->send_waits      = send_waits;
610c05d87d6SBarry Smith   stash->nsends          = nsends;
611c05d87d6SBarry Smith   stash->nrecvs          = nreceives;
61267318a8aSJed Brown   stash->reproduce_count = 0;
613bc5ccf88SSatish Balay   PetscFunctionReturn(0);
614bc5ccf88SSatish Balay }
615bc5ccf88SSatish Balay 
616a2d1c673SSatish Balay /*
6178798bf22SSatish Balay    MatStashScatterGetMesg_Private - This function waits on the receives posted
6188798bf22SSatish Balay    in the function MatStashScatterBegin_Private() and returns one message at
6194c1ff481SSatish Balay    a time to the calling function. If no messages are left, it indicates this
6204c1ff481SSatish Balay    by setting flg = 0, else it sets flg = 1.
6214c1ff481SSatish Balay 
6224c1ff481SSatish Balay    Input Parameters:
6234c1ff481SSatish Balay    stash - the stash
6244c1ff481SSatish Balay 
6254c1ff481SSatish Balay    Output Parameters:
6264c1ff481SSatish Balay    nvals - the number of entries in the current message.
6274c1ff481SSatish Balay    rows  - an array of row indices (or blocked indices) corresponding to the values
6284c1ff481SSatish Balay    cols  - an array of columnindices (or blocked indices) corresponding to the values
6294c1ff481SSatish Balay    vals  - the values
6304c1ff481SSatish Balay    flg   - 0 indicates no more message left, and the current call has no values associated.
6314c1ff481SSatish Balay            1 indicates that the current call successfully received a message, and the
6324c1ff481SSatish Balay              other output parameters nvals,rows,cols,vals are set appropriately.
633a2d1c673SSatish Balay */
63454f21887SBarry Smith PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
635bc5ccf88SSatish Balay {
6366849ba73SBarry Smith   PetscErrorCode ierr;
637ac2b2aa0SJed Brown 
638ac2b2aa0SJed Brown   PetscFunctionBegin;
639ac2b2aa0SJed Brown   ierr = (*stash->ScatterGetMesg)(stash,nvals,rows,cols,vals,flg);CHKERRQ(ierr);
640ac2b2aa0SJed Brown   PetscFunctionReturn(0);
641ac2b2aa0SJed Brown }
642ac2b2aa0SJed Brown 
643ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
644ac2b2aa0SJed Brown {
645ac2b2aa0SJed Brown   PetscErrorCode ierr;
646533163c2SBarry Smith   PetscMPIInt    i,*flg_v = stash->flg_v,i1,i2;
647fe09c992SBarry Smith   PetscInt       bs2;
648a2d1c673SSatish Balay   MPI_Status     recv_status;
649ace3abfcSBarry Smith   PetscBool      match_found = PETSC_FALSE;
650bc5ccf88SSatish Balay 
651bc5ccf88SSatish Balay   PetscFunctionBegin;
652a2d1c673SSatish Balay   *flg = 0; /* When a message is discovered this is reset to 1 */
653a2d1c673SSatish Balay   /* Return if no more messages to process */
6548865f1eaSKarl Rupp   if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(0);
655a2d1c673SSatish Balay 
6564c1ff481SSatish Balay   bs2 = stash->bs*stash->bs;
65767318a8aSJed Brown   /* If a matching pair of receives are found, process them, and return the data to
658a2d1c673SSatish Balay      the calling function. Until then keep receiving messages */
659a2d1c673SSatish Balay   while (!match_found) {
66067318a8aSJed Brown     if (stash->reproduce) {
66167318a8aSJed Brown       i    = stash->reproduce_count++;
66267318a8aSJed Brown       ierr = MPI_Wait(stash->recv_waits+i,&recv_status);CHKERRQ(ierr);
66367318a8aSJed Brown     } else {
664a2d1c673SSatish Balay       ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
66567318a8aSJed Brown     }
666e32f2f54SBarry Smith     if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!");
667533163c2SBarry Smith 
66867318a8aSJed Brown     /* Now pack the received message into a structure which is usable by others */
669a2d1c673SSatish Balay     if (i % 2) {
670a77337e4SBarry Smith       ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr);
6718865f1eaSKarl Rupp 
672c1dc657dSBarry Smith       flg_v[2*recv_status.MPI_SOURCE] = i/2;
6738865f1eaSKarl Rupp 
674a2d1c673SSatish Balay       *nvals = *nvals/bs2;
675563fb871SSatish Balay     } else {
676563fb871SSatish Balay       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
6778865f1eaSKarl Rupp 
678563fb871SSatish Balay       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
6798865f1eaSKarl Rupp 
680563fb871SSatish Balay       *nvals = *nvals/2; /* This message has both row indices and col indices */
681bc5ccf88SSatish Balay     }
682a2d1c673SSatish Balay 
683cb2b73ccSBarry Smith     /* Check if we have both messages from this proc */
684c1dc657dSBarry Smith     i1 = flg_v[2*recv_status.MPI_SOURCE];
685c1dc657dSBarry Smith     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
686a2d1c673SSatish Balay     if (i1 != -1 && i2 != -1) {
687563fb871SSatish Balay       *rows = stash->rindices[i2];
688a2d1c673SSatish Balay       *cols = *rows + *nvals;
689563fb871SSatish Balay       *vals = stash->rvalues[i1];
690a2d1c673SSatish Balay       *flg  = 1;
691a2d1c673SSatish Balay       stash->nprocessed++;
69235d8aa7fSBarry Smith       match_found = PETSC_TRUE;
693bc5ccf88SSatish Balay     }
694bc5ccf88SSatish Balay   }
695bc5ccf88SSatish Balay   PetscFunctionReturn(0);
696bc5ccf88SSatish Balay }
697d7d60843SJed Brown 
698d7d60843SJed Brown typedef struct {
699d7d60843SJed Brown   PetscInt row;
700d7d60843SJed Brown   PetscInt col;
701d7d60843SJed Brown   PetscScalar vals[1];          /* Actually an array of length bs2 */
702d7d60843SJed Brown } MatStashBlock;
703d7d60843SJed Brown 
704d7d60843SJed Brown static PetscErrorCode MatStashSortCompress_Private(MatStash *stash,InsertMode insertmode)
705d7d60843SJed Brown {
706d7d60843SJed Brown   PetscErrorCode ierr;
707d7d60843SJed Brown   PetscMatStashSpace space;
708d7d60843SJed Brown   PetscInt n = stash->n,bs = stash->bs,bs2 = bs*bs,cnt,*row,*col,*perm,rowstart,i;
709d7d60843SJed Brown   PetscScalar **valptr;
710d7d60843SJed Brown 
711d7d60843SJed Brown   PetscFunctionBegin;
712d7d60843SJed Brown   ierr = PetscMalloc4(n,&row,n,&col,n,&valptr,n,&perm);CHKERRQ(ierr);
713d7d60843SJed Brown   for (space=stash->space_head,cnt=0; space; space=space->next) {
714d7d60843SJed Brown     for (i=0; i<space->local_used; i++) {
715d7d60843SJed Brown       row[cnt] = space->idx[i];
716d7d60843SJed Brown       col[cnt] = space->idy[i];
717d7d60843SJed Brown       valptr[cnt] = &space->val[i*bs2];
718d7d60843SJed Brown       perm[cnt] = cnt;          /* Will tell us where to find valptr after sorting row[] and col[] */
719d7d60843SJed Brown       cnt++;
720d7d60843SJed Brown     }
721d7d60843SJed Brown   }
722d7d60843SJed Brown   if (cnt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatStash n %D, but counted %D entries",n,cnt);
723d7d60843SJed Brown   ierr = PetscSortIntWithArrayPair(n,row,col,perm);CHKERRQ(ierr);
724d7d60843SJed Brown   /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */
725d7d60843SJed Brown   for (rowstart=0,cnt=0,i=1; i<=n; i++) {
726d7d60843SJed Brown     if (i == n || row[i] != row[rowstart]) {         /* Sort the last row. */
727d7d60843SJed Brown       PetscInt colstart;
728d7d60843SJed Brown       ierr = PetscSortIntWithArray(i-rowstart,&col[rowstart],&perm[rowstart]);CHKERRQ(ierr);
729d7d60843SJed Brown       for (colstart=rowstart; colstart<i; ) { /* Compress multiple insertions to the same location */
730d7d60843SJed Brown         PetscInt j,l;
731d7d60843SJed Brown         MatStashBlock *block;
732d7d60843SJed Brown         ierr = PetscSegBufferGet(stash->segsendblocks,1,&block);CHKERRQ(ierr);
733d7d60843SJed Brown         block->row = row[rowstart];
734d7d60843SJed Brown         block->col = col[colstart];
735d7d60843SJed Brown         ierr = PetscMemcpy(block->vals,valptr[perm[colstart]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr);
736d7d60843SJed Brown         for (j=colstart+1; j<i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */
737d7d60843SJed Brown           if (insertmode == ADD_VALUES) {
738d7d60843SJed Brown             for (l=0; l<bs2; l++) block->vals[l] += valptr[perm[j]][l];
739d7d60843SJed Brown           } else {
740d7d60843SJed Brown             ierr = PetscMemcpy(block->vals,valptr[perm[j]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr);
741d7d60843SJed Brown           }
742d7d60843SJed Brown         }
743d7d60843SJed Brown         colstart = j;
744d7d60843SJed Brown       }
745d7d60843SJed Brown       rowstart = i;
746d7d60843SJed Brown     }
747d7d60843SJed Brown   }
748d7d60843SJed Brown   ierr = PetscFree4(row,col,valptr,perm);CHKERRQ(ierr);
749d7d60843SJed Brown   PetscFunctionReturn(0);
750d7d60843SJed Brown }
751d7d60843SJed Brown 
752d7d60843SJed Brown static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash)
753d7d60843SJed Brown {
754d7d60843SJed Brown   PetscErrorCode ierr;
755d7d60843SJed Brown 
756d7d60843SJed Brown   PetscFunctionBegin;
757d7d60843SJed Brown   if (stash->blocktype == MPI_DATATYPE_NULL) {
758d7d60843SJed Brown     PetscInt     bs2 = PetscSqr(stash->bs);
759d7d60843SJed Brown     PetscMPIInt  blocklens[2];
760d7d60843SJed Brown     MPI_Aint     displs[2];
761d7d60843SJed Brown     MPI_Datatype types[2],stype;
7629503c6c6SJed Brown     /* C++ std::complex is not my favorite datatype.  Since it is not POD, we cannot use offsetof to find the offset of
7639503c6c6SJed Brown      * vals.  But the layout is actually guaranteed by the standard, so we do a little dance here with struct
7649503c6c6SJed Brown      * DummyBlock, substituting PetscReal for PetscComplex so that we can determine the offset.
7659503c6c6SJed Brown      */
7669503c6c6SJed Brown     struct DummyBlock {PetscInt row,col; PetscReal vals;};
767d7d60843SJed Brown 
7689503c6c6SJed Brown     stash->blocktype_size = offsetof(struct DummyBlock,vals) + bs2*sizeof(PetscScalar);
769d7d60843SJed Brown     if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */
770d7d60843SJed Brown       stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt);
771d7d60843SJed Brown     }
772d7d60843SJed Brown     ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segsendblocks);CHKERRQ(ierr);
773d7d60843SJed Brown     ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segrecvblocks);CHKERRQ(ierr);
774d7d60843SJed Brown     ierr = PetscSegBufferCreate(sizeof(MatStashFrame),1,&stash->segrecvframe);CHKERRQ(ierr);
775d7d60843SJed Brown     blocklens[0] = 2;
776d7d60843SJed Brown     blocklens[1] = bs2;
7779503c6c6SJed Brown     displs[0] = offsetof(struct DummyBlock,row);
7789503c6c6SJed Brown     displs[1] = offsetof(struct DummyBlock,vals);
779d7d60843SJed Brown     types[0] = MPIU_INT;
780d7d60843SJed Brown     types[1] = MPIU_SCALAR;
781d7d60843SJed Brown     ierr = MPI_Type_create_struct(2,blocklens,displs,types,&stype);CHKERRQ(ierr);
782d7d60843SJed Brown     ierr = MPI_Type_commit(&stype);CHKERRQ(ierr);
783d7d60843SJed Brown     ierr = MPI_Type_create_resized(stype,0,stash->blocktype_size,&stash->blocktype);CHKERRQ(ierr); /* MPI-2 */
784d7d60843SJed Brown     ierr = MPI_Type_commit(&stash->blocktype);CHKERRQ(ierr);
785d7d60843SJed Brown     ierr = MPI_Type_free(&stype);CHKERRQ(ierr);
786d7d60843SJed Brown   }
787d7d60843SJed Brown   PetscFunctionReturn(0);
788d7d60843SJed Brown }
789d7d60843SJed Brown 
790d7d60843SJed Brown /* Callback invoked after target rank has initiatied receive of rendezvous message.
791d7d60843SJed Brown  * Here we post the main sends.
792d7d60843SJed Brown  */
793d7d60843SJed Brown static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rankid,PetscMPIInt rank,void *sdata,MPI_Request req[],void *ctx)
794d7d60843SJed Brown {
795d7d60843SJed Brown   MatStash *stash = (MatStash*)ctx;
796d7d60843SJed Brown   MatStashHeader *hdr = (MatStashHeader*)sdata;
797d7d60843SJed Brown   PetscErrorCode ierr;
798d7d60843SJed Brown 
799d7d60843SJed Brown   PetscFunctionBegin;
800d7d60843SJed Brown   if (rank != stash->sendranks[rankid]) SETERRQ3(comm,PETSC_ERR_PLIB,"BTS Send rank %d does not match sendranks[%d] %d",rank,rankid,stash->sendranks[rankid]);
801d7d60843SJed Brown   ierr = MPI_Isend(stash->sendframes[rankid].buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr);
802d7d60843SJed Brown   stash->sendframes[rankid].count = hdr->count;
803d7d60843SJed Brown   stash->sendframes[rankid].pending = 1;
804d7d60843SJed Brown   PetscFunctionReturn(0);
805d7d60843SJed Brown }
806d7d60843SJed Brown 
807d7d60843SJed Brown /* Callback invoked by target after receiving rendezvous message.
808d7d60843SJed Brown  * Here we post the main recvs.
809d7d60843SJed Brown  */
810d7d60843SJed Brown static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rank,void *rdata,MPI_Request req[],void *ctx)
811d7d60843SJed Brown {
812d7d60843SJed Brown   MatStash *stash = (MatStash*)ctx;
813d7d60843SJed Brown   MatStashHeader *hdr = (MatStashHeader*)rdata;
814d7d60843SJed Brown   MatStashFrame *frame;
815d7d60843SJed Brown   PetscErrorCode ierr;
816d7d60843SJed Brown 
817d7d60843SJed Brown   PetscFunctionBegin;
818d7d60843SJed Brown   ierr = PetscSegBufferGet(stash->segrecvframe,1,&frame);CHKERRQ(ierr);
819d7d60843SJed Brown   ierr = PetscSegBufferGet(stash->segrecvblocks,hdr->count,&frame->buffer);CHKERRQ(ierr);
820d7d60843SJed Brown   ierr = MPI_Irecv(frame->buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr);
821d7d60843SJed Brown   frame->count = hdr->count;
822d7d60843SJed Brown   frame->pending = 1;
823d7d60843SJed Brown   PetscFunctionReturn(0);
824d7d60843SJed Brown }
825d7d60843SJed Brown 
826*1667be42SBarry Smith #if !defined(PETSC_HAVE_MPIUNI)
827d7d60843SJed Brown /*
828d7d60843SJed Brown  * owners[] contains the ownership ranges; may be indexed by either blocks or scalars
829d7d60843SJed Brown  */
830d7d60843SJed Brown static PetscErrorCode MatStashScatterBegin_BTS(Mat mat,MatStash *stash,PetscInt owners[])
831d7d60843SJed Brown {
832d7d60843SJed Brown   PetscErrorCode ierr;
833d7d60843SJed Brown   size_t nblocks;
834d7d60843SJed Brown   char *sendblocks;
835d7d60843SJed Brown 
836d7d60843SJed Brown   PetscFunctionBegin;
8374b4eb8d3SJed Brown #if defined(PETSC_USE_DEBUG)
8384b4eb8d3SJed Brown   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
8394b4eb8d3SJed Brown     InsertMode addv;
840b2566f29SBarry Smith     ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
8414b4eb8d3SJed Brown     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
8424b4eb8d3SJed Brown   }
8434b4eb8d3SJed Brown #endif
8444b4eb8d3SJed Brown 
84597da8949SJed Brown   if (stash->subset_off_proc && !mat->subsetoffprocentries) { /* We won't use the old scatter context. */
84697da8949SJed Brown     ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr);
84797da8949SJed Brown   }
84897da8949SJed Brown 
849d7d60843SJed Brown   ierr = MatStashBlockTypeSetUp(stash);CHKERRQ(ierr);
850d7d60843SJed Brown   ierr = MatStashSortCompress_Private(stash,mat->insertmode);CHKERRQ(ierr);
851d7d60843SJed Brown   ierr = PetscSegBufferGetSize(stash->segsendblocks,&nblocks);CHKERRQ(ierr);
852d7d60843SJed Brown   ierr = PetscSegBufferExtractInPlace(stash->segsendblocks,&sendblocks);CHKERRQ(ierr);
85397da8949SJed Brown   if (stash->subset_off_proc && mat->subsetoffprocentries) { /* Set up sendhdrs and sendframes for each rank that we sent before */
85423b7d1baSJed Brown     PetscInt i;
85523b7d1baSJed Brown     size_t b;
85697da8949SJed Brown     for (i=0,b=0; i<stash->nsendranks; i++) {
85797da8949SJed Brown       stash->sendframes[i].buffer = &sendblocks[b*stash->blocktype_size];
85897da8949SJed Brown       /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */
85997da8949SJed Brown       stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */
86097da8949SJed Brown       for ( ; b<nblocks; b++) {
86197da8949SJed Brown         MatStashBlock *sendblock_b = (MatStashBlock*)&sendblocks[b*stash->blocktype_size];
86297da8949SJed Brown         if (PetscUnlikely(sendblock_b->row < owners[stash->sendranks[i]])) SETERRQ2(stash->comm,PETSC_ERR_ARG_WRONG,"MAT_SUBSET_OFF_PROC_ENTRIES set, but row %D owned by %d not communicated in initial assembly",sendblock_b->row,stash->sendranks[i]);
86397da8949SJed Brown         if (sendblock_b->row >= owners[stash->sendranks[i]+1]) break;
86497da8949SJed Brown         stash->sendhdr[i].count++;
86597da8949SJed Brown       }
86697da8949SJed Brown     }
86797da8949SJed Brown   } else {                      /* Dynamically count and pack (first time) */
86823b7d1baSJed Brown     PetscInt sendno;
86923b7d1baSJed Brown     size_t i,rowstart;
870d7d60843SJed Brown 
871d7d60843SJed Brown     /* Count number of send ranks and allocate for sends */
872d7d60843SJed Brown     stash->nsendranks = 0;
873d7d60843SJed Brown     for (rowstart=0; rowstart<nblocks; ) {
8747e2ea869SJed Brown       PetscInt owner;
875d7d60843SJed Brown       MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
876d7d60843SJed Brown       ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr);
877d7d60843SJed Brown       if (owner < 0) owner = -(owner+2);
878d7d60843SJed Brown       for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
879d7d60843SJed Brown         MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
8807e2ea869SJed Brown         if (sendblock_i->row >= owners[owner+1]) break;
881d7d60843SJed Brown       }
882d7d60843SJed Brown       stash->nsendranks++;
883d7d60843SJed Brown       rowstart = i;
884d7d60843SJed Brown     }
885d7d60843SJed Brown     ierr = PetscMalloc3(stash->nsendranks,&stash->sendranks,stash->nsendranks,&stash->sendhdr,stash->nsendranks,&stash->sendframes);CHKERRQ(ierr);
886d7d60843SJed Brown 
887d7d60843SJed Brown     /* Set up sendhdrs and sendframes */
888d7d60843SJed Brown     sendno = 0;
889d7d60843SJed Brown     for (rowstart=0; rowstart<nblocks; ) {
890d7d60843SJed Brown       PetscInt owner;
891d7d60843SJed Brown       MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
892d7d60843SJed Brown       ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr);
893d7d60843SJed Brown       if (owner < 0) owner = -(owner+2);
894d7d60843SJed Brown       stash->sendranks[sendno] = owner;
895d7d60843SJed Brown       for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
896d7d60843SJed Brown         MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
8977e2ea869SJed Brown         if (sendblock_i->row >= owners[owner+1]) break;
898d7d60843SJed Brown       }
899d7d60843SJed Brown       stash->sendframes[sendno].buffer = sendblock_rowstart;
900d7d60843SJed Brown       stash->sendframes[sendno].pending = 0;
901d7d60843SJed Brown       stash->sendhdr[sendno].count = i - rowstart;
902d7d60843SJed Brown       sendno++;
903d7d60843SJed Brown       rowstart = i;
904d7d60843SJed Brown     }
905d7d60843SJed Brown     if (sendno != stash->nsendranks) SETERRQ2(stash->comm,PETSC_ERR_PLIB,"BTS counted %D sendranks, but %D sends",stash->nsendranks,sendno);
906d7d60843SJed Brown   }
907d7d60843SJed Brown 
9084b4eb8d3SJed Brown   /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new
9094b4eb8d3SJed Brown    * message or a dummy entry of some sort. */
9104b4eb8d3SJed Brown   if (mat->insertmode == INSERT_VALUES) {
91123b7d1baSJed Brown     size_t i;
9124b4eb8d3SJed Brown     for (i=0; i<nblocks; i++) {
9134b4eb8d3SJed Brown       MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
9144b4eb8d3SJed Brown       sendblock_i->row = -(sendblock_i->row+1);
9154b4eb8d3SJed Brown     }
9164b4eb8d3SJed Brown   }
9174b4eb8d3SJed Brown 
91897da8949SJed Brown   if (stash->subset_off_proc && mat->subsetoffprocentries) {
91997da8949SJed Brown     PetscMPIInt i,tag;
92097da8949SJed Brown     ierr = PetscCommGetNewTag(stash->comm,&tag);CHKERRQ(ierr);
92197da8949SJed Brown     for (i=0; i<stash->nrecvranks; i++) {
92297da8949SJed Brown       ierr = MatStashBTSRecv_Private(stash->comm,&tag,stash->recvranks[i],&stash->recvhdr[i],&stash->recvreqs[i],stash);CHKERRQ(ierr);
92397da8949SJed Brown     }
92497da8949SJed Brown     for (i=0; i<stash->nsendranks; i++) {
92597da8949SJed Brown       ierr = MatStashBTSSend_Private(stash->comm,&tag,i,stash->sendranks[i],&stash->sendhdr[i],&stash->sendreqs[i],stash);CHKERRQ(ierr);
92697da8949SJed Brown     }
92797da8949SJed Brown     stash->use_status = PETSC_TRUE; /* Use count from message status. */
92897da8949SJed Brown   } else {
929e0ddb6e8SJed Brown     ierr = PetscCommBuildTwoSidedFReq(stash->comm,1,MPIU_INT,stash->nsendranks,stash->sendranks,(PetscInt*)stash->sendhdr,
930e0ddb6e8SJed Brown                                       &stash->nrecvranks,&stash->recvranks,(PetscInt*)&stash->recvhdr,1,&stash->sendreqs,&stash->recvreqs,
931d7d60843SJed Brown                                       MatStashBTSSend_Private,MatStashBTSRecv_Private,stash);CHKERRQ(ierr);
932b5ddc6f1SJed Brown     ierr = PetscMalloc2(stash->nrecvranks,&stash->some_indices,stash->nrecvranks,&stash->some_statuses);CHKERRQ(ierr);
93397da8949SJed Brown     stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */
93497da8949SJed Brown   }
935d7d60843SJed Brown 
936d7d60843SJed Brown   ierr = PetscSegBufferExtractInPlace(stash->segrecvframe,&stash->recvframes);CHKERRQ(ierr);
937d7d60843SJed Brown   stash->recvframe_active = NULL;
938d7d60843SJed Brown   stash->recvframe_i      = 0;
939d7d60843SJed Brown   stash->some_i           = 0;
940d7d60843SJed Brown   stash->some_count       = 0;
941d7d60843SJed Brown   stash->recvcount        = 0;
94297da8949SJed Brown   stash->subset_off_proc  = mat->subsetoffprocentries;
9434b4eb8d3SJed Brown   stash->insertmode       = &mat->insertmode;
944d7d60843SJed Brown   PetscFunctionReturn(0);
945d7d60843SJed Brown }
946d7d60843SJed Brown 
947d7d60843SJed Brown static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash,PetscMPIInt *n,PetscInt **row,PetscInt **col,PetscScalar **val,PetscInt *flg)
948d7d60843SJed Brown {
949d7d60843SJed Brown   PetscErrorCode ierr;
950d7d60843SJed Brown   MatStashBlock *block;
951d7d60843SJed Brown 
952d7d60843SJed Brown   PetscFunctionBegin;
953d7d60843SJed Brown   *flg = 0;
954d7d60843SJed Brown   while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) {
955d7d60843SJed Brown     if (stash->some_i == stash->some_count) {
956d7d60843SJed Brown       if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(0); /* Done */
957d7d60843SJed Brown       ierr = MPI_Waitsome(stash->nrecvranks,stash->recvreqs,&stash->some_count,stash->some_indices,stash->use_status?stash->some_statuses:MPI_STATUSES_IGNORE);CHKERRQ(ierr);
958d7d60843SJed Brown       stash->some_i = 0;
959d7d60843SJed Brown     }
960d7d60843SJed Brown     stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]];
961d7d60843SJed Brown     stash->recvframe_count = stash->recvframe_active->count; /* From header; maximum count */
962d7d60843SJed Brown     if (stash->use_status) { /* Count what was actually sent */
963d7d60843SJed Brown       ierr = MPI_Get_count(&stash->some_statuses[stash->some_i],stash->blocktype,&stash->recvframe_count);CHKERRQ(ierr);
964d7d60843SJed Brown     }
9654b4eb8d3SJed Brown     if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */
9664b4eb8d3SJed Brown       block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[0];
9674b4eb8d3SJed Brown       if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES;
9684b4eb8d3SJed Brown       if (PetscUnlikely(*stash->insertmode == INSERT_VALUES && block->row >= 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling INSERT_VALUES, but rank %d requested ADD_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]);
9694b4eb8d3SJed Brown       if (PetscUnlikely(*stash->insertmode == ADD_VALUES && block->row < 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling ADD_VALUES, but rank %d requested INSERT_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]);
9704b4eb8d3SJed Brown     }
971d7d60843SJed Brown     stash->some_i++;
972d7d60843SJed Brown     stash->recvcount++;
973d7d60843SJed Brown     stash->recvframe_i = 0;
974d7d60843SJed Brown   }
975d7d60843SJed Brown   *n = 1;
976d7d60843SJed Brown   block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[stash->recvframe_i*stash->blocktype_size];
9774b4eb8d3SJed Brown   if (block->row < 0) block->row = -(block->row + 1);
978d7d60843SJed Brown   *row = &block->row;
979d7d60843SJed Brown   *col = &block->col;
980d7d60843SJed Brown   *val = block->vals;
981d7d60843SJed Brown   stash->recvframe_i++;
982d7d60843SJed Brown   *flg = 1;
983d7d60843SJed Brown   PetscFunctionReturn(0);
984d7d60843SJed Brown }
985d7d60843SJed Brown 
986d7d60843SJed Brown static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash)
987d7d60843SJed Brown {
988d7d60843SJed Brown   PetscErrorCode ierr;
989d7d60843SJed Brown 
990d7d60843SJed Brown   PetscFunctionBegin;
991d7d60843SJed Brown   ierr = MPI_Waitall(stash->nsendranks,stash->sendreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
9923575f486SJed Brown   if (stash->subset_off_proc) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks  */
9933575f486SJed Brown     void *dummy;
9943575f486SJed Brown     ierr = PetscSegBufferExtractInPlace(stash->segrecvblocks,&dummy);CHKERRQ(ierr);
9953575f486SJed Brown   } else {                      /* No reuse, so collect everything. */
996d7d60843SJed Brown     ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr);
99797da8949SJed Brown   }
998d7d60843SJed Brown 
999d7d60843SJed Brown   /* Now update nmaxold to be app 10% more than max n used, this way the
1000d7d60843SJed Brown      wastage of space is reduced the next time this stash is used.
1001d7d60843SJed Brown      Also update the oldmax, only if it increases */
1002d7d60843SJed Brown   if (stash->n) {
1003d7d60843SJed Brown     PetscInt bs2     = stash->bs*stash->bs;
1004d7d60843SJed Brown     PetscInt oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
1005d7d60843SJed Brown     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
1006d7d60843SJed Brown   }
1007d7d60843SJed Brown 
1008d7d60843SJed Brown   stash->nmax       = 0;
1009d7d60843SJed Brown   stash->n          = 0;
1010d7d60843SJed Brown   stash->reallocs   = -1;
1011d7d60843SJed Brown   stash->nprocessed = 0;
1012d7d60843SJed Brown 
1013d7d60843SJed Brown   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
1014d7d60843SJed Brown 
1015d7d60843SJed Brown   stash->space = 0;
1016d7d60843SJed Brown 
1017d7d60843SJed Brown   PetscFunctionReturn(0);
1018d7d60843SJed Brown }
1019d7d60843SJed Brown 
1020d7d60843SJed Brown static PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash)
1021d7d60843SJed Brown {
1022d7d60843SJed Brown   PetscErrorCode ierr;
1023d7d60843SJed Brown 
1024d7d60843SJed Brown   PetscFunctionBegin;
1025d7d60843SJed Brown   ierr = PetscSegBufferDestroy(&stash->segsendblocks);CHKERRQ(ierr);
1026d7d60843SJed Brown   ierr = PetscSegBufferDestroy(&stash->segrecvframe);CHKERRQ(ierr);
1027d7d60843SJed Brown   stash->recvframes = NULL;
1028d7d60843SJed Brown   ierr = PetscSegBufferDestroy(&stash->segrecvblocks);CHKERRQ(ierr);
1029d7d60843SJed Brown   if (stash->blocktype != MPI_DATATYPE_NULL) {
1030d7d60843SJed Brown     ierr = MPI_Type_free(&stash->blocktype);CHKERRQ(ierr);
1031d7d60843SJed Brown   }
1032d7d60843SJed Brown   stash->nsendranks = 0;
1033d7d60843SJed Brown   stash->nrecvranks = 0;
1034d7d60843SJed Brown   ierr = PetscFree3(stash->sendranks,stash->sendhdr,stash->sendframes);CHKERRQ(ierr);
1035d7d60843SJed Brown   ierr = PetscFree(stash->sendreqs);CHKERRQ(ierr);
1036d7d60843SJed Brown   ierr = PetscFree(stash->recvreqs);CHKERRQ(ierr);
1037d7d60843SJed Brown   ierr = PetscFree(stash->recvranks);CHKERRQ(ierr);
1038d7d60843SJed Brown   ierr = PetscFree(stash->recvhdr);CHKERRQ(ierr);
1039d7d60843SJed Brown   ierr = PetscFree2(stash->some_indices,stash->some_statuses);CHKERRQ(ierr);
1040d7d60843SJed Brown   PetscFunctionReturn(0);
1041d7d60843SJed Brown }
1042*1667be42SBarry Smith #endif
1043