xref: /petsc/src/mat/utils/matstash.c (revision 7e2ea8690d8207a2d9eb3401d667867cc78ebce5)
12d5177cdSBarry Smith 
2b45d2f2cSJed Brown #include <petsc-private/matimpl.h>
35bd3b8fbSHong Zhang 
4bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE   10000
54c1ff481SSatish Balay 
6ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterBegin_Ref(Mat,MatStash*,PetscInt*);
7ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
8ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
9d7d60843SJed Brown static PetscErrorCode MatStashScatterBegin_BTS(Mat,MatStash*,PetscInt*);
10d7d60843SJed Brown static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
11d7d60843SJed Brown static PetscErrorCode MatStashScatterEnd_BTS(MatStash*);
12d7d60843SJed Brown static PetscErrorCode MatStashScatterDestroy_BTS(MatStash*);
13d7d60843SJed Brown 
149417f4adSLois Curfman McInnes /*
158798bf22SSatish Balay   MatStashCreate_Private - Creates a stash,currently used for all the parallel
164c1ff481SSatish Balay   matrix implementations. The stash is where elements of a matrix destined
174c1ff481SSatish Balay   to be stored on other processors are kept until matrix assembly is done.
189417f4adSLois Curfman McInnes 
194c1ff481SSatish Balay   This is a simple minded stash. Simply adds entries to end of stash.
204c1ff481SSatish Balay 
214c1ff481SSatish Balay   Input Parameters:
224c1ff481SSatish Balay   comm - communicator, required for scatters.
234c1ff481SSatish Balay   bs   - stash block size. used when stashing blocks of values
244c1ff481SSatish Balay 
254c1ff481SSatish Balay   Output Parameters:
264c1ff481SSatish Balay   stash    - the newly created stash
279417f4adSLois Curfman McInnes */
284a2ae208SSatish Balay #undef __FUNCT__
294a2ae208SSatish Balay #define __FUNCT__ "MatStashCreate_Private"
30c1ac3661SBarry Smith PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
319417f4adSLois Curfman McInnes {
32dfbe8321SBarry Smith   PetscErrorCode ierr;
33533163c2SBarry Smith   PetscInt       max,*opt,nopt,i;
34ace3abfcSBarry Smith   PetscBool      flg;
35bc5ccf88SSatish Balay 
363a40ed3dSBarry Smith   PetscFunctionBegin;
37bc5ccf88SSatish Balay   /* Require 2 tags,get the second using PetscCommGetNewTag() */
38752ec6e0SSatish Balay   stash->comm = comm;
398865f1eaSKarl Rupp 
40752ec6e0SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
41a2d1c673SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
42a2d1c673SSatish Balay   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
43a2d1c673SSatish Balay   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
44785e854fSJed Brown   ierr = PetscMalloc1(2*stash->size,&stash->flg_v);CHKERRQ(ierr);
45533163c2SBarry Smith   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
46533163c2SBarry Smith 
47bc5ccf88SSatish Balay 
48434d7ff9SSatish Balay   nopt = stash->size;
49785e854fSJed Brown   ierr = PetscMalloc1(nopt,&opt);CHKERRQ(ierr);
500298fd71SBarry Smith   ierr = PetscOptionsGetIntArray(NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
51434d7ff9SSatish Balay   if (flg) {
52434d7ff9SSatish Balay     if (nopt == 1)                max = opt[0];
53434d7ff9SSatish Balay     else if (nopt == stash->size) max = opt[stash->rank];
54434d7ff9SSatish Balay     else if (stash->rank < nopt)  max = opt[stash->rank];
55f4ab19daSSatish Balay     else                          max = 0; /* Use default */
56434d7ff9SSatish Balay     stash->umax = max;
57434d7ff9SSatish Balay   } else {
58434d7ff9SSatish Balay     stash->umax = 0;
59434d7ff9SSatish Balay   }
60606d414cSSatish Balay   ierr = PetscFree(opt);CHKERRQ(ierr);
614c1ff481SSatish Balay   if (bs <= 0) bs = 1;
62a2d1c673SSatish Balay 
634c1ff481SSatish Balay   stash->bs         = bs;
649417f4adSLois Curfman McInnes   stash->nmax       = 0;
65434d7ff9SSatish Balay   stash->oldnmax    = 0;
669417f4adSLois Curfman McInnes   stash->n          = 0;
674c1ff481SSatish Balay   stash->reallocs   = -1;
6875cae7c1SHong Zhang   stash->space_head = 0;
6975cae7c1SHong Zhang   stash->space      = 0;
709417f4adSLois Curfman McInnes 
71bc5ccf88SSatish Balay   stash->send_waits  = 0;
72bc5ccf88SSatish Balay   stash->recv_waits  = 0;
73a2d1c673SSatish Balay   stash->send_status = 0;
74bc5ccf88SSatish Balay   stash->nsends      = 0;
75bc5ccf88SSatish Balay   stash->nrecvs      = 0;
76bc5ccf88SSatish Balay   stash->svalues     = 0;
77bc5ccf88SSatish Balay   stash->rvalues     = 0;
78563fb871SSatish Balay   stash->rindices    = 0;
79a2d1c673SSatish Balay   stash->nprocessed  = 0;
8067318a8aSJed Brown   stash->reproduce   = PETSC_FALSE;
81d7d60843SJed Brown   stash->blocktype   = MPI_DATATYPE_NULL;
828865f1eaSKarl Rupp 
830298fd71SBarry Smith   ierr = PetscOptionsGetBool(NULL,"-matstash_reproduce",&stash->reproduce,NULL);CHKERRQ(ierr);
84ac2b2aa0SJed Brown   ierr = PetscOptionsGetBool(NULL,"-matstash_bts",&flg,NULL);CHKERRQ(ierr);
85ac2b2aa0SJed Brown   if (flg) {
86d7d60843SJed Brown     stash->ScatterBegin   = MatStashScatterBegin_BTS;
87d7d60843SJed Brown     stash->ScatterGetMesg = MatStashScatterGetMesg_BTS;
88d7d60843SJed Brown     stash->ScatterEnd     = MatStashScatterEnd_BTS;
89d7d60843SJed Brown     stash->ScatterDestroy = MatStashScatterDestroy_BTS;
90ac2b2aa0SJed Brown   } else {
91ac2b2aa0SJed Brown     stash->ScatterBegin   = MatStashScatterBegin_Ref;
92ac2b2aa0SJed Brown     stash->ScatterGetMesg = MatStashScatterGetMesg_Ref;
93ac2b2aa0SJed Brown     stash->ScatterEnd     = MatStashScatterEnd_Ref;
94ac2b2aa0SJed Brown     stash->ScatterDestroy = NULL;
95ac2b2aa0SJed Brown   }
963a40ed3dSBarry Smith   PetscFunctionReturn(0);
979417f4adSLois Curfman McInnes }
989417f4adSLois Curfman McInnes 
994c1ff481SSatish Balay /*
1008798bf22SSatish Balay    MatStashDestroy_Private - Destroy the stash
1014c1ff481SSatish Balay */
1024a2ae208SSatish Balay #undef __FUNCT__
1034a2ae208SSatish Balay #define __FUNCT__ "MatStashDestroy_Private"
104dfbe8321SBarry Smith PetscErrorCode MatStashDestroy_Private(MatStash *stash)
1059417f4adSLois Curfman McInnes {
106dfbe8321SBarry Smith   PetscErrorCode ierr;
107a2d1c673SSatish Balay 
108bc5ccf88SSatish Balay   PetscFunctionBegin;
1096bf464f9SBarry Smith   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
110ac2b2aa0SJed Brown   if (stash->ScatterDestroy) {ierr = (*stash->ScatterDestroy)(stash);CHKERRQ(ierr);}
1118865f1eaSKarl Rupp 
11282740460SHong Zhang   stash->space = 0;
1138865f1eaSKarl Rupp 
114533163c2SBarry Smith   ierr = PetscFree(stash->flg_v);CHKERRQ(ierr);
115bc5ccf88SSatish Balay   PetscFunctionReturn(0);
116bc5ccf88SSatish Balay }
117bc5ccf88SSatish Balay 
1184c1ff481SSatish Balay /*
11967318a8aSJed Brown    MatStashScatterEnd_Private - This is called as the final stage of
1204c1ff481SSatish Balay    scatter. The final stages of message passing is done here, and
12167318a8aSJed Brown    all the memory used for message passing is cleaned up. This
1224c1ff481SSatish Balay    routine also resets the stash, and deallocates the memory used
1234c1ff481SSatish Balay    for the stash. It also keeps track of the current memory usage
1244c1ff481SSatish Balay    so that the same value can be used the next time through.
1254c1ff481SSatish Balay */
1264a2ae208SSatish Balay #undef __FUNCT__
1274a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterEnd_Private"
128dfbe8321SBarry Smith PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
129bc5ccf88SSatish Balay {
1306849ba73SBarry Smith   PetscErrorCode ierr;
131ac2b2aa0SJed Brown 
132ac2b2aa0SJed Brown   PetscFunctionBegin;
133ac2b2aa0SJed Brown   ierr = (*stash->ScatterEnd)(stash);CHKERRQ(ierr);
134ac2b2aa0SJed Brown   PetscFunctionReturn(0);
135ac2b2aa0SJed Brown }
136ac2b2aa0SJed Brown 
137ac2b2aa0SJed Brown #undef __FUNCT__
138ac2b2aa0SJed Brown #define __FUNCT__ "MatStashScatterEnd_Ref"
139ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash)
140ac2b2aa0SJed Brown {
141ac2b2aa0SJed Brown   PetscErrorCode ierr;
142533163c2SBarry Smith   PetscInt       nsends=stash->nsends,bs2,oldnmax,i;
143a2d1c673SSatish Balay   MPI_Status     *send_status;
144a2d1c673SSatish Balay 
1453a40ed3dSBarry Smith   PetscFunctionBegin;
146533163c2SBarry Smith   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
147a2d1c673SSatish Balay   /* wait on sends */
148a2d1c673SSatish Balay   if (nsends) {
149785e854fSJed Brown     ierr = PetscMalloc1(2*nsends,&send_status);CHKERRQ(ierr);
150a2d1c673SSatish Balay     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
151606d414cSSatish Balay     ierr = PetscFree(send_status);CHKERRQ(ierr);
152a2d1c673SSatish Balay   }
153a2d1c673SSatish Balay 
154c0c58ca7SSatish Balay   /* Now update nmaxold to be app 10% more than max n used, this way the
155434d7ff9SSatish Balay      wastage of space is reduced the next time this stash is used.
156434d7ff9SSatish Balay      Also update the oldmax, only if it increases */
157b9b97703SBarry Smith   if (stash->n) {
15894b769a5SSatish Balay     bs2     = stash->bs*stash->bs;
1598a9378f0SSatish Balay     oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
160434d7ff9SSatish Balay     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
161b9b97703SBarry Smith   }
162434d7ff9SSatish Balay 
163d07ff455SSatish Balay   stash->nmax       = 0;
164d07ff455SSatish Balay   stash->n          = 0;
1654c1ff481SSatish Balay   stash->reallocs   = -1;
166a2d1c673SSatish Balay   stash->nprocessed = 0;
1678865f1eaSKarl Rupp 
1686bf464f9SBarry Smith   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
1698865f1eaSKarl Rupp 
17082740460SHong Zhang   stash->space = 0;
1718865f1eaSKarl Rupp 
172606d414cSSatish Balay   ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
173606d414cSSatish Balay   ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
174c05d87d6SBarry Smith   ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr);
175c05d87d6SBarry Smith   ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr);
176606d414cSSatish Balay   ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
177c05d87d6SBarry Smith   ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr);
178563fb871SSatish Balay   ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
1793a40ed3dSBarry Smith   PetscFunctionReturn(0);
1809417f4adSLois Curfman McInnes }
1819417f4adSLois Curfman McInnes 
1824c1ff481SSatish Balay /*
1838798bf22SSatish Balay    MatStashGetInfo_Private - Gets the relavant statistics of the stash
1844c1ff481SSatish Balay 
1854c1ff481SSatish Balay    Input Parameters:
1864c1ff481SSatish Balay    stash    - the stash
18794b769a5SSatish Balay    nstash   - the size of the stash. Indicates the number of values stored.
1884c1ff481SSatish Balay    reallocs - the number of additional mallocs incurred.
1894c1ff481SSatish Balay 
1904c1ff481SSatish Balay */
1914a2ae208SSatish Balay #undef __FUNCT__
1924a2ae208SSatish Balay #define __FUNCT__ "MatStashGetInfo_Private"
193c1ac3661SBarry Smith PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
19497530c3fSBarry Smith {
195c1ac3661SBarry Smith   PetscInt bs2 = stash->bs*stash->bs;
19694b769a5SSatish Balay 
1973a40ed3dSBarry Smith   PetscFunctionBegin;
1981ecfd215SBarry Smith   if (nstash) *nstash = stash->n*bs2;
1991ecfd215SBarry Smith   if (reallocs) {
200434d7ff9SSatish Balay     if (stash->reallocs < 0) *reallocs = 0;
201434d7ff9SSatish Balay     else                     *reallocs = stash->reallocs;
2021ecfd215SBarry Smith   }
203bc5ccf88SSatish Balay   PetscFunctionReturn(0);
204bc5ccf88SSatish Balay }
2054c1ff481SSatish Balay 
2064c1ff481SSatish Balay /*
2078798bf22SSatish Balay    MatStashSetInitialSize_Private - Sets the initial size of the stash
2084c1ff481SSatish Balay 
2094c1ff481SSatish Balay    Input Parameters:
2104c1ff481SSatish Balay    stash  - the stash
2114c1ff481SSatish Balay    max    - the value that is used as the max size of the stash.
2124c1ff481SSatish Balay             this value is used while allocating memory.
2134c1ff481SSatish Balay */
2144a2ae208SSatish Balay #undef __FUNCT__
2154a2ae208SSatish Balay #define __FUNCT__ "MatStashSetInitialSize_Private"
216c1ac3661SBarry Smith PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
217bc5ccf88SSatish Balay {
218bc5ccf88SSatish Balay   PetscFunctionBegin;
219434d7ff9SSatish Balay   stash->umax = max;
2203a40ed3dSBarry Smith   PetscFunctionReturn(0);
22197530c3fSBarry Smith }
22297530c3fSBarry Smith 
2238798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called
2244c1ff481SSatish Balay    when the space in the stash is not sufficient to add the new values
2254c1ff481SSatish Balay    being inserted into the stash.
2264c1ff481SSatish Balay 
2274c1ff481SSatish Balay    Input Parameters:
2284c1ff481SSatish Balay    stash - the stash
2294c1ff481SSatish Balay    incr  - the minimum increase requested
2304c1ff481SSatish Balay 
2314c1ff481SSatish Balay    Notes:
2324c1ff481SSatish Balay    This routine doubles the currently used memory.
2334c1ff481SSatish Balay  */
2344a2ae208SSatish Balay #undef __FUNCT__
2354a2ae208SSatish Balay #define __FUNCT__ "MatStashExpand_Private"
236c1ac3661SBarry Smith static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
2379417f4adSLois Curfman McInnes {
2386849ba73SBarry Smith   PetscErrorCode ierr;
2395bd3b8fbSHong Zhang   PetscInt       newnmax,bs2= stash->bs*stash->bs;
2409417f4adSLois Curfman McInnes 
2413a40ed3dSBarry Smith   PetscFunctionBegin;
2429417f4adSLois Curfman McInnes   /* allocate a larger stash */
243c481ceb5SSatish Balay   if (!stash->oldnmax && !stash->nmax) { /* new stash */
244434d7ff9SSatish Balay     if (stash->umax)                  newnmax = stash->umax/bs2;
245434d7ff9SSatish Balay     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
246c481ceb5SSatish Balay   } else if (!stash->nmax) { /* resuing stash */
247434d7ff9SSatish Balay     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
248434d7ff9SSatish Balay     else                              newnmax = stash->oldnmax/bs2;
249434d7ff9SSatish Balay   } else                              newnmax = stash->nmax*2;
2504c1ff481SSatish Balay   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
251d07ff455SSatish Balay 
25275cae7c1SHong Zhang   /* Get a MatStashSpace and attach it to stash */
25375cae7c1SHong Zhang   ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr);
254b087b6d6SSatish Balay   if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
255b087b6d6SSatish Balay     stash->space_head = stash->space;
25675cae7c1SHong Zhang   }
257b087b6d6SSatish Balay 
258bc5ccf88SSatish Balay   stash->reallocs++;
25975cae7c1SHong Zhang   stash->nmax = newnmax;
260bc5ccf88SSatish Balay   PetscFunctionReturn(0);
261bc5ccf88SSatish Balay }
262bc5ccf88SSatish Balay /*
2638798bf22SSatish Balay   MatStashValuesRow_Private - inserts values into the stash. This function
2644c1ff481SSatish Balay   expects the values to be roworiented. Multiple columns belong to the same row
2654c1ff481SSatish Balay   can be inserted with a single call to this function.
2664c1ff481SSatish Balay 
2674c1ff481SSatish Balay   Input Parameters:
2684c1ff481SSatish Balay   stash  - the stash
2694c1ff481SSatish Balay   row    - the global row correspoiding to the values
2704c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
2714c1ff481SSatish Balay   idxn   - the global column indices corresponding to each of the values.
2724c1ff481SSatish Balay   values - the values inserted
273bc5ccf88SSatish Balay */
2744a2ae208SSatish Balay #undef __FUNCT__
2754a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRow_Private"
276ace3abfcSBarry Smith PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries)
277bc5ccf88SSatish Balay {
278dfbe8321SBarry Smith   PetscErrorCode     ierr;
279b400d20cSBarry Smith   PetscInt           i,k,cnt = 0;
28075cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
281bc5ccf88SSatish Balay 
282bc5ccf88SSatish Balay   PetscFunctionBegin;
2834c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
28475cae7c1SHong Zhang   if (!space || space->local_remaining < n) {
2858798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
2869417f4adSLois Curfman McInnes   }
28775cae7c1SHong Zhang   space = stash->space;
28875cae7c1SHong Zhang   k     = space->local_used;
2894c1ff481SSatish Balay   for (i=0; i<n; i++) {
29088c3974fSBarry Smith     if (ignorezeroentries && (values[i] == 0.0)) continue;
29175cae7c1SHong Zhang     space->idx[k] = row;
29275cae7c1SHong Zhang     space->idy[k] = idxn[i];
29375cae7c1SHong Zhang     space->val[k] = values[i];
29475cae7c1SHong Zhang     k++;
295b400d20cSBarry Smith     cnt++;
2969417f4adSLois Curfman McInnes   }
297b400d20cSBarry Smith   stash->n               += cnt;
298b400d20cSBarry Smith   space->local_used      += cnt;
299b400d20cSBarry Smith   space->local_remaining -= cnt;
300a2d1c673SSatish Balay   PetscFunctionReturn(0);
301a2d1c673SSatish Balay }
30275cae7c1SHong Zhang 
3034c1ff481SSatish Balay /*
3048798bf22SSatish Balay   MatStashValuesCol_Private - inserts values into the stash. This function
3054c1ff481SSatish Balay   expects the values to be columnoriented. Multiple columns belong to the same row
3064c1ff481SSatish Balay   can be inserted with a single call to this function.
307a2d1c673SSatish Balay 
3084c1ff481SSatish Balay   Input Parameters:
3094c1ff481SSatish Balay   stash   - the stash
3104c1ff481SSatish Balay   row     - the global row correspoiding to the values
3114c1ff481SSatish Balay   n       - the number of elements inserted. All elements belong to the above row.
3124c1ff481SSatish Balay   idxn    - the global column indices corresponding to each of the values.
3134c1ff481SSatish Balay   values  - the values inserted
3144c1ff481SSatish Balay   stepval - the consecutive values are sepated by a distance of stepval.
3154c1ff481SSatish Balay             this happens because the input is columnoriented.
3164c1ff481SSatish Balay */
3174a2ae208SSatish Balay #undef __FUNCT__
3184a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesCol_Private"
319ace3abfcSBarry Smith PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries)
320a2d1c673SSatish Balay {
321dfbe8321SBarry Smith   PetscErrorCode     ierr;
32250e9ab7cSBarry Smith   PetscInt           i,k,cnt = 0;
32375cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
324a2d1c673SSatish Balay 
3254c1ff481SSatish Balay   PetscFunctionBegin;
3264c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
32775cae7c1SHong Zhang   if (!space || space->local_remaining < n) {
3288798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
3294c1ff481SSatish Balay   }
33075cae7c1SHong Zhang   space = stash->space;
33175cae7c1SHong Zhang   k     = space->local_used;
3324c1ff481SSatish Balay   for (i=0; i<n; i++) {
33388c3974fSBarry Smith     if (ignorezeroentries && (values[i*stepval] == 0.0)) continue;
33475cae7c1SHong Zhang     space->idx[k] = row;
33575cae7c1SHong Zhang     space->idy[k] = idxn[i];
33675cae7c1SHong Zhang     space->val[k] = values[i*stepval];
33775cae7c1SHong Zhang     k++;
338b400d20cSBarry Smith     cnt++;
3394c1ff481SSatish Balay   }
340b400d20cSBarry Smith   stash->n               += cnt;
341b400d20cSBarry Smith   space->local_used      += cnt;
342b400d20cSBarry Smith   space->local_remaining -= cnt;
3434c1ff481SSatish Balay   PetscFunctionReturn(0);
3444c1ff481SSatish Balay }
3454c1ff481SSatish Balay 
3464c1ff481SSatish Balay /*
3478798bf22SSatish Balay   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
3484c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3494c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3504c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3514c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3524c1ff481SSatish Balay 
3534c1ff481SSatish Balay   Input Parameters:
3544c1ff481SSatish Balay   stash  - the stash
3554c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3564c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3574c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3584c1ff481SSatish Balay            values. Each block is of size bs*bs.
3594c1ff481SSatish Balay   values - the values inserted
3604c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3614c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3624c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3634c1ff481SSatish Balay */
3644a2ae208SSatish Balay #undef __FUNCT__
3654a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRowBlocked_Private"
36654f21887SBarry Smith PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
3674c1ff481SSatish Balay {
368dfbe8321SBarry Smith   PetscErrorCode     ierr;
36975cae7c1SHong Zhang   PetscInt           i,j,k,bs2,bs=stash->bs,l;
37054f21887SBarry Smith   const PetscScalar  *vals;
37154f21887SBarry Smith   PetscScalar        *array;
37275cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
373a2d1c673SSatish Balay 
374a2d1c673SSatish Balay   PetscFunctionBegin;
37575cae7c1SHong Zhang   if (!space || space->local_remaining < n) {
3768798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
377a2d1c673SSatish Balay   }
37875cae7c1SHong Zhang   space = stash->space;
37975cae7c1SHong Zhang   l     = space->local_used;
38075cae7c1SHong Zhang   bs2   = bs*bs;
3814c1ff481SSatish Balay   for (i=0; i<n; i++) {
38275cae7c1SHong Zhang     space->idx[l] = row;
38375cae7c1SHong Zhang     space->idy[l] = idxn[i];
38475cae7c1SHong Zhang     /* Now copy over the block of values. Store the values column oriented.
38575cae7c1SHong Zhang        This enables inserting multiple blocks belonging to a row with a single
38675cae7c1SHong Zhang        funtion call */
38775cae7c1SHong Zhang     array = space->val + bs2*l;
38875cae7c1SHong Zhang     vals  = values + idx*bs2*n + bs*i;
38975cae7c1SHong Zhang     for (j=0; j<bs; j++) {
39075cae7c1SHong Zhang       for (k=0; k<bs; k++) array[k*bs] = vals[k];
39175cae7c1SHong Zhang       array++;
39275cae7c1SHong Zhang       vals += cmax*bs;
39375cae7c1SHong Zhang     }
39475cae7c1SHong Zhang     l++;
395a2d1c673SSatish Balay   }
3965bd3b8fbSHong Zhang   stash->n               += n;
39775cae7c1SHong Zhang   space->local_used      += n;
39875cae7c1SHong Zhang   space->local_remaining -= n;
3994c1ff481SSatish Balay   PetscFunctionReturn(0);
4004c1ff481SSatish Balay }
4014c1ff481SSatish Balay 
4024c1ff481SSatish Balay /*
4038798bf22SSatish Balay   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
4044c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
4054c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
4064c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
4074c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
4084c1ff481SSatish Balay 
4094c1ff481SSatish Balay   Input Parameters:
4104c1ff481SSatish Balay   stash  - the stash
4114c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
4124c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
4134c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
4144c1ff481SSatish Balay            values. Each block is of size bs*bs.
4154c1ff481SSatish Balay   values - the values inserted
4164c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
4174c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
4184c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
4194c1ff481SSatish Balay */
4204a2ae208SSatish Balay #undef __FUNCT__
4214a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesColBlocked_Private"
42254f21887SBarry Smith PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
4234c1ff481SSatish Balay {
424dfbe8321SBarry Smith   PetscErrorCode     ierr;
42575cae7c1SHong Zhang   PetscInt           i,j,k,bs2,bs=stash->bs,l;
42654f21887SBarry Smith   const PetscScalar  *vals;
42754f21887SBarry Smith   PetscScalar        *array;
42875cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
4294c1ff481SSatish Balay 
4304c1ff481SSatish Balay   PetscFunctionBegin;
43175cae7c1SHong Zhang   if (!space || space->local_remaining < n) {
4328798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
4334c1ff481SSatish Balay   }
43475cae7c1SHong Zhang   space = stash->space;
43575cae7c1SHong Zhang   l     = space->local_used;
43675cae7c1SHong Zhang   bs2   = bs*bs;
4374c1ff481SSatish Balay   for (i=0; i<n; i++) {
43875cae7c1SHong Zhang     space->idx[l] = row;
43975cae7c1SHong Zhang     space->idy[l] = idxn[i];
44075cae7c1SHong Zhang     /* Now copy over the block of values. Store the values column oriented.
44175cae7c1SHong Zhang      This enables inserting multiple blocks belonging to a row with a single
44275cae7c1SHong Zhang      funtion call */
44375cae7c1SHong Zhang     array = space->val + bs2*l;
44475cae7c1SHong Zhang     vals  = values + idx*bs2*n + bs*i;
44575cae7c1SHong Zhang     for (j=0; j<bs; j++) {
4468865f1eaSKarl Rupp       for (k=0; k<bs; k++) array[k] = vals[k];
44775cae7c1SHong Zhang       array += bs;
44875cae7c1SHong Zhang       vals  += rmax*bs;
44975cae7c1SHong Zhang     }
4505bd3b8fbSHong Zhang     l++;
451a2d1c673SSatish Balay   }
4525bd3b8fbSHong Zhang   stash->n               += n;
45375cae7c1SHong Zhang   space->local_used      += n;
45475cae7c1SHong Zhang   space->local_remaining -= n;
4553a40ed3dSBarry Smith   PetscFunctionReturn(0);
4569417f4adSLois Curfman McInnes }
4574c1ff481SSatish Balay /*
4588798bf22SSatish Balay   MatStashScatterBegin_Private - Initiates the transfer of values to the
4594c1ff481SSatish Balay   correct owners. This function goes through the stash, and check the
4604c1ff481SSatish Balay   owners of each stashed value, and sends the values off to the owner
4614c1ff481SSatish Balay   processors.
462bc5ccf88SSatish Balay 
4634c1ff481SSatish Balay   Input Parameters:
4644c1ff481SSatish Balay   stash  - the stash
4654c1ff481SSatish Balay   owners - an array of size 'no-of-procs' which gives the ownership range
4664c1ff481SSatish Balay            for each node.
4674c1ff481SSatish Balay 
4684c1ff481SSatish Balay   Notes: The 'owners' array in the cased of the blocked-stash has the
4694c1ff481SSatish Balay   ranges specified blocked global indices, and for the regular stash in
4704c1ff481SSatish Balay   the proper global indices.
4714c1ff481SSatish Balay */
4724a2ae208SSatish Balay #undef __FUNCT__
4734a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterBegin_Private"
4741e2582c4SBarry Smith PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners)
475bc5ccf88SSatish Balay {
476ac2b2aa0SJed Brown   PetscErrorCode ierr;
477ac2b2aa0SJed Brown 
478ac2b2aa0SJed Brown   PetscFunctionBegin;
479ac2b2aa0SJed Brown   ierr = (*stash->ScatterBegin)(mat,stash,owners);CHKERRQ(ierr);
480ac2b2aa0SJed Brown   PetscFunctionReturn(0);
481ac2b2aa0SJed Brown }
482ac2b2aa0SJed Brown 
483ac2b2aa0SJed Brown #undef __FUNCT__
484ac2b2aa0SJed Brown #define __FUNCT__ "MatStashScatterBegin_Ref"
485ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterBegin_Ref(Mat mat,MatStash *stash,PetscInt *owners)
486ac2b2aa0SJed Brown {
487c1ac3661SBarry Smith   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
488fe09c992SBarry Smith   PetscInt           size=stash->size,nsends;
4896849ba73SBarry Smith   PetscErrorCode     ierr;
49075cae7c1SHong Zhang   PetscInt           count,*sindices,**rindices,i,j,idx,lastidx,l;
49154f21887SBarry Smith   PetscScalar        **rvalues,*svalues;
492bc5ccf88SSatish Balay   MPI_Comm           comm = stash->comm;
493563fb871SSatish Balay   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
49476ec1555SBarry Smith   PetscMPIInt        *sizes,*nlengths,nreceives;
4955bd3b8fbSHong Zhang   PetscInt           *sp_idx,*sp_idy;
49654f21887SBarry Smith   PetscScalar        *sp_val;
4975bd3b8fbSHong Zhang   PetscMatStashSpace space,space_next;
498bc5ccf88SSatish Balay 
499bc5ccf88SSatish Balay   PetscFunctionBegin;
5004c1ff481SSatish Balay   bs2 = stash->bs*stash->bs;
50175cae7c1SHong Zhang 
502bc5ccf88SSatish Balay   /*  first count number of contributors to each processor */
503037dbc42SBarry Smith   ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr);
5041795a4d1SJed Brown   ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr);
505037dbc42SBarry Smith   ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr);
506a2d1c673SSatish Balay 
50775cae7c1SHong Zhang   i       = j    = 0;
5087357eb19SBarry Smith   lastidx = -1;
5095bd3b8fbSHong Zhang   space   = stash->space_head;
5100298fd71SBarry Smith   while (space != NULL) {
51175cae7c1SHong Zhang     space_next = space->next;
5125bd3b8fbSHong Zhang     sp_idx     = space->idx;
51375cae7c1SHong Zhang     for (l=0; l<space->local_used; l++) {
5147357eb19SBarry Smith       /* if indices are NOT locally sorted, need to start search at the beginning */
5155bd3b8fbSHong Zhang       if (lastidx > (idx = sp_idx[l])) j = 0;
5167357eb19SBarry Smith       lastidx = idx;
5177357eb19SBarry Smith       for (; j<size; j++) {
5184c1ff481SSatish Balay         if (idx >= owners[j] && idx < owners[j+1]) {
519563fb871SSatish Balay           nlengths[j]++; owner[i] = j; break;
520bc5ccf88SSatish Balay         }
521bc5ccf88SSatish Balay       }
52275cae7c1SHong Zhang       i++;
52375cae7c1SHong Zhang     }
52475cae7c1SHong Zhang     space = space_next;
525bc5ccf88SSatish Balay   }
526563fb871SSatish Balay   /* Now check what procs get messages - and compute nsends. */
527563fb871SSatish Balay   for (i=0, nsends=0; i<size; i++) {
5288865f1eaSKarl Rupp     if (nlengths[i]) {
52976ec1555SBarry Smith       sizes[i] = 1; nsends++;
5308865f1eaSKarl Rupp     }
531563fb871SSatish Balay   }
532bc5ccf88SSatish Balay 
53354f21887SBarry Smith   {PetscMPIInt *onodes,*olengths;
534563fb871SSatish Balay    /* Determine the number of messages to expect, their lengths, from from-ids */
53576ec1555SBarry Smith    ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr);
536563fb871SSatish Balay    ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
537563fb871SSatish Balay    /* since clubbing row,col - lengths are multiplied by 2 */
538563fb871SSatish Balay    for (i=0; i<nreceives; i++) olengths[i] *=2;
539563fb871SSatish Balay    ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
540563fb871SSatish Balay    /* values are size 'bs2' lengths (and remove earlier factor 2 */
541563fb871SSatish Balay    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
542563fb871SSatish Balay    ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
543563fb871SSatish Balay    ierr = PetscFree(onodes);CHKERRQ(ierr);
5448865f1eaSKarl Rupp    ierr = PetscFree(olengths);CHKERRQ(ierr);}
545bc5ccf88SSatish Balay 
546bc5ccf88SSatish Balay   /* do sends:
547bc5ccf88SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
548bc5ccf88SSatish Balay          the ith processor
549bc5ccf88SSatish Balay   */
550dcca6d9dSJed Brown   ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr);
551785e854fSJed Brown   ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr);
552dcca6d9dSJed Brown   ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr);
553a2d1c673SSatish Balay   /* use 2 sends the first with all_a, the next with all_i and all_j */
554bc5ccf88SSatish Balay   startv[0] = 0; starti[0] = 0;
555bc5ccf88SSatish Balay   for (i=1; i<size; i++) {
556563fb871SSatish Balay     startv[i] = startv[i-1] + nlengths[i-1];
557533163c2SBarry Smith     starti[i] = starti[i-1] + 2*nlengths[i-1];
558bc5ccf88SSatish Balay   }
55975cae7c1SHong Zhang 
56075cae7c1SHong Zhang   i     = 0;
5615bd3b8fbSHong Zhang   space = stash->space_head;
5620298fd71SBarry Smith   while (space != NULL) {
56375cae7c1SHong Zhang     space_next = space->next;
5645bd3b8fbSHong Zhang     sp_idx     = space->idx;
5655bd3b8fbSHong Zhang     sp_idy     = space->idy;
5665bd3b8fbSHong Zhang     sp_val     = space->val;
56775cae7c1SHong Zhang     for (l=0; l<space->local_used; l++) {
568bc5ccf88SSatish Balay       j = owner[i];
569a2d1c673SSatish Balay       if (bs2 == 1) {
5705bd3b8fbSHong Zhang         svalues[startv[j]] = sp_val[l];
571a2d1c673SSatish Balay       } else {
572c1ac3661SBarry Smith         PetscInt    k;
57354f21887SBarry Smith         PetscScalar *buf1,*buf2;
5744c1ff481SSatish Balay         buf1 = svalues+bs2*startv[j];
575b087b6d6SSatish Balay         buf2 = space->val + bs2*l;
5768865f1eaSKarl Rupp         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
577a2d1c673SSatish Balay       }
5785bd3b8fbSHong Zhang       sindices[starti[j]]             = sp_idx[l];
5795bd3b8fbSHong Zhang       sindices[starti[j]+nlengths[j]] = sp_idy[l];
580bc5ccf88SSatish Balay       startv[j]++;
581bc5ccf88SSatish Balay       starti[j]++;
58275cae7c1SHong Zhang       i++;
58375cae7c1SHong Zhang     }
58475cae7c1SHong Zhang     space = space_next;
585bc5ccf88SSatish Balay   }
586bc5ccf88SSatish Balay   startv[0] = 0;
5878865f1eaSKarl Rupp   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
588e5d0e772SSatish Balay 
589bc5ccf88SSatish Balay   for (i=0,count=0; i<size; i++) {
59076ec1555SBarry Smith     if (sizes[i]) {
591563fb871SSatish Balay       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
592a77337e4SBarry Smith       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
593bc5ccf88SSatish Balay     }
594b85c94c3SSatish Balay   }
5956cf91177SBarry Smith #if defined(PETSC_USE_INFO)
59693157e10SBarry Smith   ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr);
597e5d0e772SSatish Balay   for (i=0; i<size; i++) {
59876ec1555SBarry Smith     if (sizes[i]) {
59930c47e72SSatish Balay       ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
600e5d0e772SSatish Balay     }
601e5d0e772SSatish Balay   }
602e5d0e772SSatish Balay #endif
603c05d87d6SBarry Smith   ierr = PetscFree(nlengths);CHKERRQ(ierr);
604606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
605c05d87d6SBarry Smith   ierr = PetscFree2(startv,starti);CHKERRQ(ierr);
60676ec1555SBarry Smith   ierr = PetscFree(sizes);CHKERRQ(ierr);
607a2d1c673SSatish Balay 
608563fb871SSatish Balay   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
609785e854fSJed Brown   ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr);
610563fb871SSatish Balay 
611563fb871SSatish Balay   for (i=0; i<nreceives; i++) {
612563fb871SSatish Balay     recv_waits[2*i]   = recv_waits1[i];
613563fb871SSatish Balay     recv_waits[2*i+1] = recv_waits2[i];
614563fb871SSatish Balay   }
615563fb871SSatish Balay   stash->recv_waits = recv_waits;
6168865f1eaSKarl Rupp 
617563fb871SSatish Balay   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
618563fb871SSatish Balay   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
619563fb871SSatish Balay 
620c05d87d6SBarry Smith   stash->svalues         = svalues;
621c05d87d6SBarry Smith   stash->sindices        = sindices;
622c05d87d6SBarry Smith   stash->rvalues         = rvalues;
623c05d87d6SBarry Smith   stash->rindices        = rindices;
624c05d87d6SBarry Smith   stash->send_waits      = send_waits;
625c05d87d6SBarry Smith   stash->nsends          = nsends;
626c05d87d6SBarry Smith   stash->nrecvs          = nreceives;
62767318a8aSJed Brown   stash->reproduce_count = 0;
628bc5ccf88SSatish Balay   PetscFunctionReturn(0);
629bc5ccf88SSatish Balay }
630bc5ccf88SSatish Balay 
631a2d1c673SSatish Balay /*
6328798bf22SSatish Balay    MatStashScatterGetMesg_Private - This function waits on the receives posted
6338798bf22SSatish Balay    in the function MatStashScatterBegin_Private() and returns one message at
6344c1ff481SSatish Balay    a time to the calling function. If no messages are left, it indicates this
6354c1ff481SSatish Balay    by setting flg = 0, else it sets flg = 1.
6364c1ff481SSatish Balay 
6374c1ff481SSatish Balay    Input Parameters:
6384c1ff481SSatish Balay    stash - the stash
6394c1ff481SSatish Balay 
6404c1ff481SSatish Balay    Output Parameters:
6414c1ff481SSatish Balay    nvals - the number of entries in the current message.
6424c1ff481SSatish Balay    rows  - an array of row indices (or blocked indices) corresponding to the values
6434c1ff481SSatish Balay    cols  - an array of columnindices (or blocked indices) corresponding to the values
6444c1ff481SSatish Balay    vals  - the values
6454c1ff481SSatish Balay    flg   - 0 indicates no more message left, and the current call has no values associated.
6464c1ff481SSatish Balay            1 indicates that the current call successfully received a message, and the
6474c1ff481SSatish Balay              other output parameters nvals,rows,cols,vals are set appropriately.
648a2d1c673SSatish Balay */
6494a2ae208SSatish Balay #undef __FUNCT__
6504a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterGetMesg_Private"
65154f21887SBarry Smith PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
652bc5ccf88SSatish Balay {
6536849ba73SBarry Smith   PetscErrorCode ierr;
654ac2b2aa0SJed Brown 
655ac2b2aa0SJed Brown   PetscFunctionBegin;
656ac2b2aa0SJed Brown   ierr = (*stash->ScatterGetMesg)(stash,nvals,rows,cols,vals,flg);CHKERRQ(ierr);
657ac2b2aa0SJed Brown   PetscFunctionReturn(0);
658ac2b2aa0SJed Brown }
659ac2b2aa0SJed Brown 
660ac2b2aa0SJed Brown #undef __FUNCT__
661ac2b2aa0SJed Brown #define __FUNCT__ "MatStashScatterGetMesg_Ref"
662ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
663ac2b2aa0SJed Brown {
664ac2b2aa0SJed Brown   PetscErrorCode ierr;
665533163c2SBarry Smith   PetscMPIInt    i,*flg_v = stash->flg_v,i1,i2;
666fe09c992SBarry Smith   PetscInt       bs2;
667a2d1c673SSatish Balay   MPI_Status     recv_status;
668ace3abfcSBarry Smith   PetscBool      match_found = PETSC_FALSE;
669bc5ccf88SSatish Balay 
670bc5ccf88SSatish Balay   PetscFunctionBegin;
671a2d1c673SSatish Balay   *flg = 0; /* When a message is discovered this is reset to 1 */
672a2d1c673SSatish Balay   /* Return if no more messages to process */
6738865f1eaSKarl Rupp   if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(0);
674a2d1c673SSatish Balay 
6754c1ff481SSatish Balay   bs2 = stash->bs*stash->bs;
67667318a8aSJed Brown   /* If a matching pair of receives are found, process them, and return the data to
677a2d1c673SSatish Balay      the calling function. Until then keep receiving messages */
678a2d1c673SSatish Balay   while (!match_found) {
67967318a8aSJed Brown     if (stash->reproduce) {
68067318a8aSJed Brown       i    = stash->reproduce_count++;
68167318a8aSJed Brown       ierr = MPI_Wait(stash->recv_waits+i,&recv_status);CHKERRQ(ierr);
68267318a8aSJed Brown     } else {
683a2d1c673SSatish Balay       ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
68467318a8aSJed Brown     }
685e32f2f54SBarry Smith     if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!");
686533163c2SBarry Smith 
68767318a8aSJed Brown     /* Now pack the received message into a structure which is usable by others */
688a2d1c673SSatish Balay     if (i % 2) {
689a77337e4SBarry Smith       ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr);
6908865f1eaSKarl Rupp 
691c1dc657dSBarry Smith       flg_v[2*recv_status.MPI_SOURCE] = i/2;
6928865f1eaSKarl Rupp 
693a2d1c673SSatish Balay       *nvals = *nvals/bs2;
694563fb871SSatish Balay     } else {
695563fb871SSatish Balay       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
6968865f1eaSKarl Rupp 
697563fb871SSatish Balay       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
6988865f1eaSKarl Rupp 
699563fb871SSatish Balay       *nvals = *nvals/2; /* This message has both row indices and col indices */
700bc5ccf88SSatish Balay     }
701a2d1c673SSatish Balay 
702cb2b73ccSBarry Smith     /* Check if we have both messages from this proc */
703c1dc657dSBarry Smith     i1 = flg_v[2*recv_status.MPI_SOURCE];
704c1dc657dSBarry Smith     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
705a2d1c673SSatish Balay     if (i1 != -1 && i2 != -1) {
706563fb871SSatish Balay       *rows = stash->rindices[i2];
707a2d1c673SSatish Balay       *cols = *rows + *nvals;
708563fb871SSatish Balay       *vals = stash->rvalues[i1];
709a2d1c673SSatish Balay       *flg  = 1;
710a2d1c673SSatish Balay       stash->nprocessed++;
71135d8aa7fSBarry Smith       match_found = PETSC_TRUE;
712bc5ccf88SSatish Balay     }
713bc5ccf88SSatish Balay   }
714bc5ccf88SSatish Balay   PetscFunctionReturn(0);
715bc5ccf88SSatish Balay }
716d7d60843SJed Brown 
717d7d60843SJed Brown typedef struct {
718d7d60843SJed Brown   PetscInt row;
719d7d60843SJed Brown   PetscInt col;
720d7d60843SJed Brown   PetscScalar vals[1];          /* Actually an array of length bs2 */
721d7d60843SJed Brown } MatStashBlock;
722d7d60843SJed Brown 
723d7d60843SJed Brown #undef __FUNCT__
724d7d60843SJed Brown #define __FUNCT__ "MatStashSortCompress_Private"
725d7d60843SJed Brown static PetscErrorCode MatStashSortCompress_Private(MatStash *stash,InsertMode insertmode)
726d7d60843SJed Brown {
727d7d60843SJed Brown   PetscErrorCode ierr;
728d7d60843SJed Brown   PetscMatStashSpace space;
729d7d60843SJed Brown   PetscInt n = stash->n,bs = stash->bs,bs2 = bs*bs,cnt,*row,*col,*perm,rowstart,i;
730d7d60843SJed Brown   PetscScalar **valptr;
731d7d60843SJed Brown 
732d7d60843SJed Brown   PetscFunctionBegin;
733d7d60843SJed Brown   ierr = PetscMalloc4(n,&row,n,&col,n,&valptr,n,&perm);CHKERRQ(ierr);
734d7d60843SJed Brown   for (space=stash->space_head,cnt=0; space; space=space->next) {
735d7d60843SJed Brown     for (i=0; i<space->local_used; i++) {
736d7d60843SJed Brown       row[cnt] = space->idx[i];
737d7d60843SJed Brown       col[cnt] = space->idy[i];
738d7d60843SJed Brown       valptr[cnt] = &space->val[i*bs2];
739d7d60843SJed Brown       perm[cnt] = cnt;          /* Will tell us where to find valptr after sorting row[] and col[] */
740d7d60843SJed Brown       cnt++;
741d7d60843SJed Brown     }
742d7d60843SJed Brown   }
743d7d60843SJed Brown   if (cnt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatStash n %D, but counted %D entries",n,cnt);
744d7d60843SJed Brown   ierr = PetscSortIntWithArrayPair(n,row,col,perm);CHKERRQ(ierr);
745d7d60843SJed Brown   /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */
746d7d60843SJed Brown   for (rowstart=0,cnt=0,i=1; i<=n; i++) {
747d7d60843SJed Brown     if (i == n || row[i] != row[rowstart]) {         /* Sort the last row. */
748d7d60843SJed Brown       PetscInt colstart;
749d7d60843SJed Brown       ierr = PetscSortIntWithArray(i-rowstart,&col[rowstart],&perm[rowstart]);CHKERRQ(ierr);
750d7d60843SJed Brown       for (colstart=rowstart; colstart<i; ) { /* Compress multiple insertions to the same location */
751d7d60843SJed Brown         PetscInt j,l;
752d7d60843SJed Brown         MatStashBlock *block;
753d7d60843SJed Brown         ierr = PetscSegBufferGet(stash->segsendblocks,1,&block);CHKERRQ(ierr);
754d7d60843SJed Brown         block->row = row[rowstart];
755d7d60843SJed Brown         block->col = col[colstart];
756d7d60843SJed Brown         ierr = PetscMemcpy(block->vals,valptr[perm[colstart]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr);
757d7d60843SJed Brown         for (j=colstart+1; j<i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */
758d7d60843SJed Brown           if (insertmode == ADD_VALUES) {
759d7d60843SJed Brown             for (l=0; l<bs2; l++) block->vals[l] += valptr[perm[j]][l];
760d7d60843SJed Brown           } else {
761d7d60843SJed Brown             ierr = PetscMemcpy(block->vals,valptr[perm[j]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr);
762d7d60843SJed Brown           }
763d7d60843SJed Brown         }
764d7d60843SJed Brown         colstart = j;
765d7d60843SJed Brown       }
766d7d60843SJed Brown       rowstart = i;
767d7d60843SJed Brown     }
768d7d60843SJed Brown   }
769d7d60843SJed Brown   ierr = PetscFree4(row,col,valptr,perm);CHKERRQ(ierr);
770d7d60843SJed Brown   PetscFunctionReturn(0);
771d7d60843SJed Brown }
772d7d60843SJed Brown 
773d7d60843SJed Brown #undef __FUNCT__
774d7d60843SJed Brown #define __FUNCT__ "MatStashBlockTypeSetUp"
775d7d60843SJed Brown static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash)
776d7d60843SJed Brown {
777d7d60843SJed Brown   PetscErrorCode ierr;
778d7d60843SJed Brown 
779d7d60843SJed Brown   PetscFunctionBegin;
780d7d60843SJed Brown   if (stash->blocktype == MPI_DATATYPE_NULL) {
781d7d60843SJed Brown     PetscInt     bs2 = PetscSqr(stash->bs);
782d7d60843SJed Brown     PetscMPIInt  blocklens[2];
783d7d60843SJed Brown     MPI_Aint     displs[2];
784d7d60843SJed Brown     MPI_Datatype types[2],stype;
785d7d60843SJed Brown 
786d7d60843SJed Brown     stash->blocktype_size = offsetof(MatStashBlock,vals) + bs2*sizeof(PetscScalar);
787d7d60843SJed Brown     if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */
788d7d60843SJed Brown       stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt);
789d7d60843SJed Brown     }
790d7d60843SJed Brown     ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segsendblocks);CHKERRQ(ierr);
791d7d60843SJed Brown     ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segrecvblocks);CHKERRQ(ierr);
792d7d60843SJed Brown     ierr = PetscSegBufferCreate(sizeof(MatStashFrame),1,&stash->segrecvframe);CHKERRQ(ierr);
793d7d60843SJed Brown     blocklens[0] = 2;
794d7d60843SJed Brown     blocklens[1] = bs2;
795d7d60843SJed Brown     displs[0] = offsetof(MatStashBlock,row);
796d7d60843SJed Brown     displs[1] = offsetof(MatStashBlock,vals);
797d7d60843SJed Brown     types[0] = MPIU_INT;
798d7d60843SJed Brown     types[1] = MPIU_SCALAR;
799d7d60843SJed Brown     ierr = MPI_Type_create_struct(2,blocklens,displs,types,&stype);CHKERRQ(ierr);
800d7d60843SJed Brown     ierr = MPI_Type_commit(&stype);CHKERRQ(ierr);
801d7d60843SJed Brown     ierr = MPI_Type_create_resized(stype,0,stash->blocktype_size,&stash->blocktype);CHKERRQ(ierr); /* MPI-2 */
802d7d60843SJed Brown     ierr = MPI_Type_commit(&stash->blocktype);CHKERRQ(ierr);
803d7d60843SJed Brown     ierr = MPI_Type_free(&stype);CHKERRQ(ierr);
804d7d60843SJed Brown   }
805d7d60843SJed Brown   PetscFunctionReturn(0);
806d7d60843SJed Brown }
807d7d60843SJed Brown 
808d7d60843SJed Brown #undef __FUNCT__
809d7d60843SJed Brown #define __FUNCT__ "MatStashBTSSend_Private"
810d7d60843SJed Brown /* Callback invoked after target rank has initiatied receive of rendezvous message.
811d7d60843SJed Brown  * Here we post the main sends.
812d7d60843SJed Brown  */
813d7d60843SJed Brown static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rankid,PetscMPIInt rank,void *sdata,MPI_Request req[],void *ctx)
814d7d60843SJed Brown {
815d7d60843SJed Brown   MatStash *stash = (MatStash*)ctx;
816d7d60843SJed Brown   MatStashHeader *hdr = (MatStashHeader*)sdata;
817d7d60843SJed Brown   PetscErrorCode ierr;
818d7d60843SJed Brown 
819d7d60843SJed Brown   PetscFunctionBegin;
820d7d60843SJed Brown   if (rank != stash->sendranks[rankid]) SETERRQ3(comm,PETSC_ERR_PLIB,"BTS Send rank %d does not match sendranks[%d] %d",rank,rankid,stash->sendranks[rankid]);
821d7d60843SJed Brown   ierr = MPI_Isend(stash->sendframes[rankid].buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr);
822d7d60843SJed Brown   stash->sendframes[rankid].count = hdr->count;
823d7d60843SJed Brown   stash->sendframes[rankid].pending = 1;
824d7d60843SJed Brown   PetscFunctionReturn(0);
825d7d60843SJed Brown }
826d7d60843SJed Brown 
827d7d60843SJed Brown #undef __FUNCT__
828d7d60843SJed Brown #define __FUNCT__ "MatStashBTSRecv_Private"
829d7d60843SJed Brown /* Callback invoked by target after receiving rendezvous message.
830d7d60843SJed Brown  * Here we post the main recvs.
831d7d60843SJed Brown  */
832d7d60843SJed Brown static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rank,void *rdata,MPI_Request req[],void *ctx)
833d7d60843SJed Brown {
834d7d60843SJed Brown   MatStash *stash = (MatStash*)ctx;
835d7d60843SJed Brown   MatStashHeader *hdr = (MatStashHeader*)rdata;
836d7d60843SJed Brown   MatStashFrame *frame;
837d7d60843SJed Brown   PetscErrorCode ierr;
838d7d60843SJed Brown 
839d7d60843SJed Brown   PetscFunctionBegin;
840d7d60843SJed Brown   ierr = PetscSegBufferGet(stash->segrecvframe,1,&frame);CHKERRQ(ierr);
841d7d60843SJed Brown   ierr = PetscSegBufferGet(stash->segrecvblocks,hdr->count,&frame->buffer);CHKERRQ(ierr);
842d7d60843SJed Brown   ierr = MPI_Irecv(frame->buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr);
843d7d60843SJed Brown   frame->count = hdr->count;
844d7d60843SJed Brown   frame->pending = 1;
845d7d60843SJed Brown   PetscFunctionReturn(0);
846d7d60843SJed Brown }
847d7d60843SJed Brown 
848d7d60843SJed Brown #undef __FUNCT__
849d7d60843SJed Brown #define __FUNCT__ "MatStashScatterBegin_BTS"
850d7d60843SJed Brown /*
851d7d60843SJed Brown  * owners[] contains the ownership ranges; may be indexed by either blocks or scalars
852d7d60843SJed Brown  */
853d7d60843SJed Brown static PetscErrorCode MatStashScatterBegin_BTS(Mat mat,MatStash *stash,PetscInt owners[])
854d7d60843SJed Brown {
855d7d60843SJed Brown   PetscErrorCode ierr;
856d7d60843SJed Brown   size_t nblocks;
857d7d60843SJed Brown   char *sendblocks;
858d7d60843SJed Brown 
859d7d60843SJed Brown   PetscFunctionBegin;
86097da8949SJed Brown   if (stash->subset_off_proc && !mat->subsetoffprocentries) { /* We won't use the old scatter context. */
86197da8949SJed Brown     ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr);
86297da8949SJed Brown   }
86397da8949SJed Brown 
864d7d60843SJed Brown   ierr = MatStashBlockTypeSetUp(stash);CHKERRQ(ierr);
865d7d60843SJed Brown   ierr = MatStashSortCompress_Private(stash,mat->insertmode);CHKERRQ(ierr);
866d7d60843SJed Brown   ierr = PetscSegBufferGetSize(stash->segsendblocks,&nblocks);CHKERRQ(ierr);
867d7d60843SJed Brown   ierr = PetscSegBufferExtractInPlace(stash->segsendblocks,&sendblocks);CHKERRQ(ierr);
86897da8949SJed Brown   if (stash->subset_off_proc && mat->subsetoffprocentries) { /* Set up sendhdrs and sendframes for each rank that we sent before */
86997da8949SJed Brown     PetscInt i,b;
87097da8949SJed Brown     for (i=0,b=0; i<stash->nsendranks; i++) {
87197da8949SJed Brown       stash->sendframes[i].buffer = &sendblocks[b*stash->blocktype_size];
87297da8949SJed Brown       /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */
87397da8949SJed Brown       stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */
87497da8949SJed Brown       for ( ; b<nblocks; b++) {
87597da8949SJed Brown         MatStashBlock *sendblock_b = (MatStashBlock*)&sendblocks[b*stash->blocktype_size];
87697da8949SJed Brown         if (PetscUnlikely(sendblock_b->row < owners[stash->sendranks[i]])) SETERRQ2(stash->comm,PETSC_ERR_ARG_WRONG,"MAT_SUBSET_OFF_PROC_ENTRIES set, but row %D owned by %d not communicated in initial assembly",sendblock_b->row,stash->sendranks[i]);
87797da8949SJed Brown         if (sendblock_b->row >= owners[stash->sendranks[i]+1]) break;
87897da8949SJed Brown         stash->sendhdr[i].count++;
87997da8949SJed Brown       }
88097da8949SJed Brown     }
88197da8949SJed Brown   } else {                      /* Dynamically count and pack (first time) */
882d7d60843SJed Brown     PetscInt i,rowstart,sendno;
883d7d60843SJed Brown 
884d7d60843SJed Brown     /* Count number of send ranks and allocate for sends */
885d7d60843SJed Brown     stash->nsendranks = 0;
886d7d60843SJed Brown     for (rowstart=0; rowstart<nblocks; ) {
887*7e2ea869SJed Brown       PetscInt owner;
888d7d60843SJed Brown       MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
889d7d60843SJed Brown       ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr);
890d7d60843SJed Brown       if (owner < 0) owner = -(owner+2);
891d7d60843SJed Brown       for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
892d7d60843SJed Brown         MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
893*7e2ea869SJed Brown         if (sendblock_i->row >= owners[owner+1]) break;
894d7d60843SJed Brown       }
895d7d60843SJed Brown       stash->nsendranks++;
896d7d60843SJed Brown       rowstart = i;
897d7d60843SJed Brown     }
898d7d60843SJed Brown     ierr = PetscMalloc3(stash->nsendranks,&stash->sendranks,stash->nsendranks,&stash->sendhdr,stash->nsendranks,&stash->sendframes);CHKERRQ(ierr);
899d7d60843SJed Brown 
900d7d60843SJed Brown     /* Set up sendhdrs and sendframes */
901d7d60843SJed Brown     sendno = 0;
902d7d60843SJed Brown     for (rowstart=0; rowstart<nblocks; ) {
903d7d60843SJed Brown       PetscInt owner;
904d7d60843SJed Brown       MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
905d7d60843SJed Brown       ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr);
906d7d60843SJed Brown       if (owner < 0) owner = -(owner+2);
907d7d60843SJed Brown       stash->sendranks[sendno] = owner;
908d7d60843SJed Brown       for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
909d7d60843SJed Brown         MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
910*7e2ea869SJed Brown         if (sendblock_i->row >= owners[owner+1]) break;
911d7d60843SJed Brown       }
912d7d60843SJed Brown       stash->sendframes[sendno].buffer = sendblock_rowstart;
913d7d60843SJed Brown       stash->sendframes[sendno].pending = 0;
914d7d60843SJed Brown       stash->sendhdr[sendno].count = i - rowstart;
915d7d60843SJed Brown       stash->sendhdr[sendno].insertmode = mat->insertmode;
916d7d60843SJed Brown       sendno++;
917d7d60843SJed Brown       rowstart = i;
918d7d60843SJed Brown     }
919d7d60843SJed Brown     if (sendno != stash->nsendranks) SETERRQ2(stash->comm,PETSC_ERR_PLIB,"BTS counted %D sendranks, but %D sends",stash->nsendranks,sendno);
920d7d60843SJed Brown   }
921d7d60843SJed Brown 
92297da8949SJed Brown   if (stash->subset_off_proc && mat->subsetoffprocentries) {
92397da8949SJed Brown     PetscMPIInt i,tag;
92497da8949SJed Brown     ierr = PetscCommGetNewTag(stash->comm,&tag);CHKERRQ(ierr);
92597da8949SJed Brown     for (i=0; i<stash->nrecvranks; i++) {
92697da8949SJed Brown       ierr = MatStashBTSRecv_Private(stash->comm,&tag,stash->recvranks[i],&stash->recvhdr[i],&stash->recvreqs[i],stash);CHKERRQ(ierr);
92797da8949SJed Brown     }
92897da8949SJed Brown     for (i=0; i<stash->nsendranks; i++) {
92997da8949SJed Brown       ierr = MatStashBTSSend_Private(stash->comm,&tag,i,stash->sendranks[i],&stash->sendhdr[i],&stash->sendreqs[i],stash);CHKERRQ(ierr);
93097da8949SJed Brown     }
93197da8949SJed Brown     stash->use_status = PETSC_TRUE; /* Use count from message status. */
93297da8949SJed Brown   } else {
933d7d60843SJed Brown     ierr = PetscCommBuildTwoSidedFReq(stash->comm,2,MPIU_INT,stash->nsendranks,stash->sendranks,stash->sendhdr,
934d7d60843SJed Brown                                       &stash->nrecvranks,&stash->recvranks,&stash->recvhdr,1,&stash->sendreqs,&stash->recvreqs,
935d7d60843SJed Brown                                       MatStashBTSSend_Private,MatStashBTSRecv_Private,stash);CHKERRQ(ierr);
93697da8949SJed Brown     stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */
93797da8949SJed Brown   }
938d7d60843SJed Brown 
939d7d60843SJed Brown   ierr = PetscMalloc2(stash->nrecvranks,&stash->some_indices,stash->nrecvranks,&stash->some_statuses);CHKERRQ(ierr);
940d7d60843SJed Brown   ierr = PetscSegBufferExtractInPlace(stash->segrecvframe,&stash->recvframes);CHKERRQ(ierr);
941d7d60843SJed Brown   stash->recvframe_active = NULL;
942d7d60843SJed Brown   stash->recvframe_i      = 0;
943d7d60843SJed Brown   stash->some_i           = 0;
944d7d60843SJed Brown   stash->some_count       = 0;
945d7d60843SJed Brown   stash->recvcount        = 0;
94697da8949SJed Brown   stash->subset_off_proc  = mat->subsetoffprocentries;
947d7d60843SJed Brown   PetscFunctionReturn(0);
948d7d60843SJed Brown }
949d7d60843SJed Brown 
950d7d60843SJed Brown #undef __FUNCT__
951d7d60843SJed Brown #define __FUNCT__ "MatStashScatterGetMesg_BTS"
952d7d60843SJed Brown static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash,PetscMPIInt *n,PetscInt **row,PetscInt **col,PetscScalar **val,PetscInt *flg)
953d7d60843SJed Brown {
954d7d60843SJed Brown   PetscErrorCode ierr;
955d7d60843SJed Brown   MatStashBlock *block;
956d7d60843SJed Brown 
957d7d60843SJed Brown   PetscFunctionBegin;
958d7d60843SJed Brown   *flg = 0;
959d7d60843SJed Brown   while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) {
960d7d60843SJed Brown     if (stash->some_i == stash->some_count) {
961d7d60843SJed Brown       if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(0); /* Done */
962d7d60843SJed Brown       ierr = MPI_Waitsome(stash->nrecvranks,stash->recvreqs,&stash->some_count,stash->some_indices,stash->use_status?stash->some_statuses:MPI_STATUSES_IGNORE);CHKERRQ(ierr);
963d7d60843SJed Brown       stash->some_i = 0;
964d7d60843SJed Brown     }
965d7d60843SJed Brown     stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]];
966d7d60843SJed Brown     stash->recvframe_count = stash->recvframe_active->count; /* From header; maximum count */
967d7d60843SJed Brown     if (stash->use_status) { /* Count what was actually sent */
968d7d60843SJed Brown       ierr = MPI_Get_count(&stash->some_statuses[stash->some_i],stash->blocktype,&stash->recvframe_count);CHKERRQ(ierr);
969d7d60843SJed Brown     }
970d7d60843SJed Brown     stash->some_i++;
971d7d60843SJed Brown     stash->recvcount++;
972d7d60843SJed Brown     stash->recvframe_i = 0;
973d7d60843SJed Brown   }
974d7d60843SJed Brown   *n = 1;
975d7d60843SJed Brown   block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[stash->recvframe_i*stash->blocktype_size];
976d7d60843SJed Brown   *row = &block->row;
977d7d60843SJed Brown   *col = &block->col;
978d7d60843SJed Brown   *val = block->vals;
979d7d60843SJed Brown   stash->recvframe_i++;
980d7d60843SJed Brown   *flg = 1;
981d7d60843SJed Brown   PetscFunctionReturn(0);
982d7d60843SJed Brown }
983d7d60843SJed Brown 
984d7d60843SJed Brown #undef __FUNCT__
985d7d60843SJed Brown #define __FUNCT__ "MatStashScatterEnd_BTS"
986d7d60843SJed Brown static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash)
987d7d60843SJed Brown {
988d7d60843SJed Brown   PetscErrorCode ierr;
989d7d60843SJed Brown 
990d7d60843SJed Brown   PetscFunctionBegin;
991d7d60843SJed Brown   ierr = MPI_Waitall(stash->nsendranks,stash->sendreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
99297da8949SJed Brown   if (!stash->subset_off_proc) { /* Only collect the communication contexts if it won't be reused. */
993d7d60843SJed Brown     ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr);
99497da8949SJed Brown   }
995d7d60843SJed Brown 
996d7d60843SJed Brown   /* Now update nmaxold to be app 10% more than max n used, this way the
997d7d60843SJed Brown      wastage of space is reduced the next time this stash is used.
998d7d60843SJed Brown      Also update the oldmax, only if it increases */
999d7d60843SJed Brown   if (stash->n) {
1000d7d60843SJed Brown     PetscInt bs2     = stash->bs*stash->bs;
1001d7d60843SJed Brown     PetscInt oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
1002d7d60843SJed Brown     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
1003d7d60843SJed Brown   }
1004d7d60843SJed Brown 
1005d7d60843SJed Brown   stash->nmax       = 0;
1006d7d60843SJed Brown   stash->n          = 0;
1007d7d60843SJed Brown   stash->reallocs   = -1;
1008d7d60843SJed Brown   stash->nprocessed = 0;
1009d7d60843SJed Brown 
1010d7d60843SJed Brown   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
1011d7d60843SJed Brown 
1012d7d60843SJed Brown   stash->space = 0;
1013d7d60843SJed Brown 
1014d7d60843SJed Brown   PetscFunctionReturn(0);
1015d7d60843SJed Brown }
1016d7d60843SJed Brown 
1017d7d60843SJed Brown #undef __FUNCT__
1018d7d60843SJed Brown #define __FUNCT__ "MatStashScatterDestroy_BTS"
1019d7d60843SJed Brown static PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash)
1020d7d60843SJed Brown {
1021d7d60843SJed Brown   PetscErrorCode ierr;
1022d7d60843SJed Brown 
1023d7d60843SJed Brown   PetscFunctionBegin;
1024d7d60843SJed Brown   ierr = PetscSegBufferDestroy(&stash->segsendblocks);CHKERRQ(ierr);
1025d7d60843SJed Brown   ierr = PetscSegBufferDestroy(&stash->segrecvframe);CHKERRQ(ierr);
1026d7d60843SJed Brown   stash->recvframes = NULL;
1027d7d60843SJed Brown   ierr = PetscSegBufferDestroy(&stash->segrecvblocks);CHKERRQ(ierr);
1028d7d60843SJed Brown   if (stash->blocktype != MPI_DATATYPE_NULL) {
1029d7d60843SJed Brown     ierr = MPI_Type_free(&stash->blocktype);CHKERRQ(ierr);
1030d7d60843SJed Brown   }
1031d7d60843SJed Brown   stash->nsendranks = 0;
1032d7d60843SJed Brown   stash->nrecvranks = 0;
1033d7d60843SJed Brown   ierr = PetscFree3(stash->sendranks,stash->sendhdr,stash->sendframes);CHKERRQ(ierr);
1034d7d60843SJed Brown   ierr = PetscFree(stash->sendreqs);CHKERRQ(ierr);
1035d7d60843SJed Brown   ierr = PetscFree(stash->recvreqs);CHKERRQ(ierr);
1036d7d60843SJed Brown   ierr = PetscFree(stash->recvranks);CHKERRQ(ierr);
1037d7d60843SJed Brown   ierr = PetscFree(stash->recvhdr);CHKERRQ(ierr);
1038d7d60843SJed Brown   ierr = PetscFree2(stash->some_indices,stash->some_statuses);CHKERRQ(ierr);
1039d7d60843SJed Brown   PetscFunctionReturn(0);
1040d7d60843SJed Brown }
1041