xref: /petsc/src/mat/utils/matstash.c (revision d7d60843ef74bf0220ee2ef6d73674229d676f5f)
12d5177cdSBarry Smith 
2b45d2f2cSJed Brown #include <petsc-private/matimpl.h>
35bd3b8fbSHong Zhang 
4bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE   10000
54c1ff481SSatish Balay 
6ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterBegin_Ref(Mat,MatStash*,PetscInt*);
7ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
8ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
9*d7d60843SJed Brown static PetscErrorCode MatStashScatterBegin_BTS(Mat,MatStash*,PetscInt*);
10*d7d60843SJed Brown static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
11*d7d60843SJed Brown static PetscErrorCode MatStashScatterEnd_BTS(MatStash*);
12*d7d60843SJed Brown static PetscErrorCode MatStashScatterDestroy_BTS(MatStash*);
13*d7d60843SJed Brown 
149417f4adSLois Curfman McInnes /*
158798bf22SSatish Balay   MatStashCreate_Private - Creates a stash,currently used for all the parallel
164c1ff481SSatish Balay   matrix implementations. The stash is where elements of a matrix destined
174c1ff481SSatish Balay   to be stored on other processors are kept until matrix assembly is done.
189417f4adSLois Curfman McInnes 
194c1ff481SSatish Balay   This is a simple minded stash. Simply adds entries to end of stash.
204c1ff481SSatish Balay 
214c1ff481SSatish Balay   Input Parameters:
224c1ff481SSatish Balay   comm - communicator, required for scatters.
234c1ff481SSatish Balay   bs   - stash block size. used when stashing blocks of values
244c1ff481SSatish Balay 
254c1ff481SSatish Balay   Output Parameters:
264c1ff481SSatish Balay   stash    - the newly created stash
279417f4adSLois Curfman McInnes */
284a2ae208SSatish Balay #undef __FUNCT__
294a2ae208SSatish Balay #define __FUNCT__ "MatStashCreate_Private"
30c1ac3661SBarry Smith PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
319417f4adSLois Curfman McInnes {
32dfbe8321SBarry Smith   PetscErrorCode ierr;
33533163c2SBarry Smith   PetscInt       max,*opt,nopt,i;
34ace3abfcSBarry Smith   PetscBool      flg;
35bc5ccf88SSatish Balay 
363a40ed3dSBarry Smith   PetscFunctionBegin;
37bc5ccf88SSatish Balay   /* Require 2 tags,get the second using PetscCommGetNewTag() */
38752ec6e0SSatish Balay   stash->comm = comm;
398865f1eaSKarl Rupp 
40752ec6e0SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
41a2d1c673SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
42a2d1c673SSatish Balay   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
43a2d1c673SSatish Balay   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
44785e854fSJed Brown   ierr = PetscMalloc1(2*stash->size,&stash->flg_v);CHKERRQ(ierr);
45533163c2SBarry Smith   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
46533163c2SBarry Smith 
47bc5ccf88SSatish Balay 
48434d7ff9SSatish Balay   nopt = stash->size;
49785e854fSJed Brown   ierr = PetscMalloc1(nopt,&opt);CHKERRQ(ierr);
500298fd71SBarry Smith   ierr = PetscOptionsGetIntArray(NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
51434d7ff9SSatish Balay   if (flg) {
52434d7ff9SSatish Balay     if (nopt == 1)                max = opt[0];
53434d7ff9SSatish Balay     else if (nopt == stash->size) max = opt[stash->rank];
54434d7ff9SSatish Balay     else if (stash->rank < nopt)  max = opt[stash->rank];
55f4ab19daSSatish Balay     else                          max = 0; /* Use default */
56434d7ff9SSatish Balay     stash->umax = max;
57434d7ff9SSatish Balay   } else {
58434d7ff9SSatish Balay     stash->umax = 0;
59434d7ff9SSatish Balay   }
60606d414cSSatish Balay   ierr = PetscFree(opt);CHKERRQ(ierr);
614c1ff481SSatish Balay   if (bs <= 0) bs = 1;
62a2d1c673SSatish Balay 
634c1ff481SSatish Balay   stash->bs         = bs;
649417f4adSLois Curfman McInnes   stash->nmax       = 0;
65434d7ff9SSatish Balay   stash->oldnmax    = 0;
669417f4adSLois Curfman McInnes   stash->n          = 0;
674c1ff481SSatish Balay   stash->reallocs   = -1;
6875cae7c1SHong Zhang   stash->space_head = 0;
6975cae7c1SHong Zhang   stash->space      = 0;
709417f4adSLois Curfman McInnes 
71bc5ccf88SSatish Balay   stash->send_waits  = 0;
72bc5ccf88SSatish Balay   stash->recv_waits  = 0;
73a2d1c673SSatish Balay   stash->send_status = 0;
74bc5ccf88SSatish Balay   stash->nsends      = 0;
75bc5ccf88SSatish Balay   stash->nrecvs      = 0;
76bc5ccf88SSatish Balay   stash->svalues     = 0;
77bc5ccf88SSatish Balay   stash->rvalues     = 0;
78563fb871SSatish Balay   stash->rindices    = 0;
79a2d1c673SSatish Balay   stash->nprocessed  = 0;
8067318a8aSJed Brown   stash->reproduce   = PETSC_FALSE;
81*d7d60843SJed Brown   stash->blocktype   = MPI_DATATYPE_NULL;
828865f1eaSKarl Rupp 
830298fd71SBarry Smith   ierr = PetscOptionsGetBool(NULL,"-matstash_reproduce",&stash->reproduce,NULL);CHKERRQ(ierr);
84ac2b2aa0SJed Brown   ierr = PetscOptionsGetBool(NULL,"-matstash_bts",&flg,NULL);CHKERRQ(ierr);
85ac2b2aa0SJed Brown   if (flg) {
86*d7d60843SJed Brown     stash->ScatterBegin   = MatStashScatterBegin_BTS;
87*d7d60843SJed Brown     stash->ScatterGetMesg = MatStashScatterGetMesg_BTS;
88*d7d60843SJed Brown     stash->ScatterEnd     = MatStashScatterEnd_BTS;
89*d7d60843SJed Brown     stash->ScatterDestroy = MatStashScatterDestroy_BTS;
90ac2b2aa0SJed Brown   } else {
91ac2b2aa0SJed Brown     stash->ScatterBegin   = MatStashScatterBegin_Ref;
92ac2b2aa0SJed Brown     stash->ScatterGetMesg = MatStashScatterGetMesg_Ref;
93ac2b2aa0SJed Brown     stash->ScatterEnd     = MatStashScatterEnd_Ref;
94ac2b2aa0SJed Brown     stash->ScatterDestroy = NULL;
95ac2b2aa0SJed Brown   }
963a40ed3dSBarry Smith   PetscFunctionReturn(0);
979417f4adSLois Curfman McInnes }
989417f4adSLois Curfman McInnes 
994c1ff481SSatish Balay /*
1008798bf22SSatish Balay    MatStashDestroy_Private - Destroy the stash
1014c1ff481SSatish Balay */
1024a2ae208SSatish Balay #undef __FUNCT__
1034a2ae208SSatish Balay #define __FUNCT__ "MatStashDestroy_Private"
104dfbe8321SBarry Smith PetscErrorCode MatStashDestroy_Private(MatStash *stash)
1059417f4adSLois Curfman McInnes {
106dfbe8321SBarry Smith   PetscErrorCode ierr;
107a2d1c673SSatish Balay 
108bc5ccf88SSatish Balay   PetscFunctionBegin;
1096bf464f9SBarry Smith   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
110ac2b2aa0SJed Brown   if (stash->ScatterDestroy) {ierr = (*stash->ScatterDestroy)(stash);CHKERRQ(ierr);}
1118865f1eaSKarl Rupp 
11282740460SHong Zhang   stash->space = 0;
1138865f1eaSKarl Rupp 
114533163c2SBarry Smith   ierr = PetscFree(stash->flg_v);CHKERRQ(ierr);
115bc5ccf88SSatish Balay   PetscFunctionReturn(0);
116bc5ccf88SSatish Balay }
117bc5ccf88SSatish Balay 
1184c1ff481SSatish Balay /*
11967318a8aSJed Brown    MatStashScatterEnd_Private - This is called as the final stage of
1204c1ff481SSatish Balay    scatter. The final stages of message passing is done here, and
12167318a8aSJed Brown    all the memory used for message passing is cleaned up. This
1224c1ff481SSatish Balay    routine also resets the stash, and deallocates the memory used
1234c1ff481SSatish Balay    for the stash. It also keeps track of the current memory usage
1244c1ff481SSatish Balay    so that the same value can be used the next time through.
1254c1ff481SSatish Balay */
1264a2ae208SSatish Balay #undef __FUNCT__
1274a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterEnd_Private"
128dfbe8321SBarry Smith PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
129bc5ccf88SSatish Balay {
1306849ba73SBarry Smith   PetscErrorCode ierr;
131ac2b2aa0SJed Brown 
132ac2b2aa0SJed Brown   PetscFunctionBegin;
133ac2b2aa0SJed Brown   ierr = (*stash->ScatterEnd)(stash);CHKERRQ(ierr);
134ac2b2aa0SJed Brown   PetscFunctionReturn(0);
135ac2b2aa0SJed Brown }
136ac2b2aa0SJed Brown 
137ac2b2aa0SJed Brown #undef __FUNCT__
138ac2b2aa0SJed Brown #define __FUNCT__ "MatStashScatterEnd_Ref"
139ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash)
140ac2b2aa0SJed Brown {
141ac2b2aa0SJed Brown   PetscErrorCode ierr;
142533163c2SBarry Smith   PetscInt       nsends=stash->nsends,bs2,oldnmax,i;
143a2d1c673SSatish Balay   MPI_Status     *send_status;
144a2d1c673SSatish Balay 
1453a40ed3dSBarry Smith   PetscFunctionBegin;
146533163c2SBarry Smith   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
147a2d1c673SSatish Balay   /* wait on sends */
148a2d1c673SSatish Balay   if (nsends) {
149785e854fSJed Brown     ierr = PetscMalloc1(2*nsends,&send_status);CHKERRQ(ierr);
150a2d1c673SSatish Balay     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
151606d414cSSatish Balay     ierr = PetscFree(send_status);CHKERRQ(ierr);
152a2d1c673SSatish Balay   }
153a2d1c673SSatish Balay 
154c0c58ca7SSatish Balay   /* Now update nmaxold to be app 10% more than max n used, this way the
155434d7ff9SSatish Balay      wastage of space is reduced the next time this stash is used.
156434d7ff9SSatish Balay      Also update the oldmax, only if it increases */
157b9b97703SBarry Smith   if (stash->n) {
15894b769a5SSatish Balay     bs2     = stash->bs*stash->bs;
1598a9378f0SSatish Balay     oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
160434d7ff9SSatish Balay     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
161b9b97703SBarry Smith   }
162434d7ff9SSatish Balay 
163d07ff455SSatish Balay   stash->nmax       = 0;
164d07ff455SSatish Balay   stash->n          = 0;
1654c1ff481SSatish Balay   stash->reallocs   = -1;
166a2d1c673SSatish Balay   stash->nprocessed = 0;
1678865f1eaSKarl Rupp 
1686bf464f9SBarry Smith   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
1698865f1eaSKarl Rupp 
17082740460SHong Zhang   stash->space = 0;
1718865f1eaSKarl Rupp 
172606d414cSSatish Balay   ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
173606d414cSSatish Balay   ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
174c05d87d6SBarry Smith   ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr);
175c05d87d6SBarry Smith   ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr);
176606d414cSSatish Balay   ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
177c05d87d6SBarry Smith   ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr);
178563fb871SSatish Balay   ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
1793a40ed3dSBarry Smith   PetscFunctionReturn(0);
1809417f4adSLois Curfman McInnes }
1819417f4adSLois Curfman McInnes 
1824c1ff481SSatish Balay /*
1838798bf22SSatish Balay    MatStashGetInfo_Private - Gets the relavant statistics of the stash
1844c1ff481SSatish Balay 
1854c1ff481SSatish Balay    Input Parameters:
1864c1ff481SSatish Balay    stash    - the stash
18794b769a5SSatish Balay    nstash   - the size of the stash. Indicates the number of values stored.
1884c1ff481SSatish Balay    reallocs - the number of additional mallocs incurred.
1894c1ff481SSatish Balay 
1904c1ff481SSatish Balay */
1914a2ae208SSatish Balay #undef __FUNCT__
1924a2ae208SSatish Balay #define __FUNCT__ "MatStashGetInfo_Private"
193c1ac3661SBarry Smith PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
19497530c3fSBarry Smith {
195c1ac3661SBarry Smith   PetscInt bs2 = stash->bs*stash->bs;
19694b769a5SSatish Balay 
1973a40ed3dSBarry Smith   PetscFunctionBegin;
1981ecfd215SBarry Smith   if (nstash) *nstash = stash->n*bs2;
1991ecfd215SBarry Smith   if (reallocs) {
200434d7ff9SSatish Balay     if (stash->reallocs < 0) *reallocs = 0;
201434d7ff9SSatish Balay     else                     *reallocs = stash->reallocs;
2021ecfd215SBarry Smith   }
203bc5ccf88SSatish Balay   PetscFunctionReturn(0);
204bc5ccf88SSatish Balay }
2054c1ff481SSatish Balay 
2064c1ff481SSatish Balay /*
2078798bf22SSatish Balay    MatStashSetInitialSize_Private - Sets the initial size of the stash
2084c1ff481SSatish Balay 
2094c1ff481SSatish Balay    Input Parameters:
2104c1ff481SSatish Balay    stash  - the stash
2114c1ff481SSatish Balay    max    - the value that is used as the max size of the stash.
2124c1ff481SSatish Balay             this value is used while allocating memory.
2134c1ff481SSatish Balay */
2144a2ae208SSatish Balay #undef __FUNCT__
2154a2ae208SSatish Balay #define __FUNCT__ "MatStashSetInitialSize_Private"
216c1ac3661SBarry Smith PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
217bc5ccf88SSatish Balay {
218bc5ccf88SSatish Balay   PetscFunctionBegin;
219434d7ff9SSatish Balay   stash->umax = max;
2203a40ed3dSBarry Smith   PetscFunctionReturn(0);
22197530c3fSBarry Smith }
22297530c3fSBarry Smith 
2238798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called
2244c1ff481SSatish Balay    when the space in the stash is not sufficient to add the new values
2254c1ff481SSatish Balay    being inserted into the stash.
2264c1ff481SSatish Balay 
2274c1ff481SSatish Balay    Input Parameters:
2284c1ff481SSatish Balay    stash - the stash
2294c1ff481SSatish Balay    incr  - the minimum increase requested
2304c1ff481SSatish Balay 
2314c1ff481SSatish Balay    Notes:
2324c1ff481SSatish Balay    This routine doubles the currently used memory.
2334c1ff481SSatish Balay  */
2344a2ae208SSatish Balay #undef __FUNCT__
2354a2ae208SSatish Balay #define __FUNCT__ "MatStashExpand_Private"
236c1ac3661SBarry Smith static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
2379417f4adSLois Curfman McInnes {
2386849ba73SBarry Smith   PetscErrorCode ierr;
2395bd3b8fbSHong Zhang   PetscInt       newnmax,bs2= stash->bs*stash->bs;
2409417f4adSLois Curfman McInnes 
2413a40ed3dSBarry Smith   PetscFunctionBegin;
2429417f4adSLois Curfman McInnes   /* allocate a larger stash */
243c481ceb5SSatish Balay   if (!stash->oldnmax && !stash->nmax) { /* new stash */
244434d7ff9SSatish Balay     if (stash->umax)                  newnmax = stash->umax/bs2;
245434d7ff9SSatish Balay     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
246c481ceb5SSatish Balay   } else if (!stash->nmax) { /* resuing stash */
247434d7ff9SSatish Balay     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
248434d7ff9SSatish Balay     else                              newnmax = stash->oldnmax/bs2;
249434d7ff9SSatish Balay   } else                              newnmax = stash->nmax*2;
2504c1ff481SSatish Balay   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
251d07ff455SSatish Balay 
25275cae7c1SHong Zhang   /* Get a MatStashSpace and attach it to stash */
25375cae7c1SHong Zhang   ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr);
254b087b6d6SSatish Balay   if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
255b087b6d6SSatish Balay     stash->space_head = stash->space;
25675cae7c1SHong Zhang   }
257b087b6d6SSatish Balay 
258bc5ccf88SSatish Balay   stash->reallocs++;
25975cae7c1SHong Zhang   stash->nmax = newnmax;
260bc5ccf88SSatish Balay   PetscFunctionReturn(0);
261bc5ccf88SSatish Balay }
262bc5ccf88SSatish Balay /*
2638798bf22SSatish Balay   MatStashValuesRow_Private - inserts values into the stash. This function
2644c1ff481SSatish Balay   expects the values to be roworiented. Multiple columns belong to the same row
2654c1ff481SSatish Balay   can be inserted with a single call to this function.
2664c1ff481SSatish Balay 
2674c1ff481SSatish Balay   Input Parameters:
2684c1ff481SSatish Balay   stash  - the stash
2694c1ff481SSatish Balay   row    - the global row correspoiding to the values
2704c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
2714c1ff481SSatish Balay   idxn   - the global column indices corresponding to each of the values.
2724c1ff481SSatish Balay   values - the values inserted
273bc5ccf88SSatish Balay */
2744a2ae208SSatish Balay #undef __FUNCT__
2754a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRow_Private"
276ace3abfcSBarry Smith PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries)
277bc5ccf88SSatish Balay {
278dfbe8321SBarry Smith   PetscErrorCode     ierr;
279b400d20cSBarry Smith   PetscInt           i,k,cnt = 0;
28075cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
281bc5ccf88SSatish Balay 
282bc5ccf88SSatish Balay   PetscFunctionBegin;
2834c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
28475cae7c1SHong Zhang   if (!space || space->local_remaining < n) {
2858798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
2869417f4adSLois Curfman McInnes   }
28775cae7c1SHong Zhang   space = stash->space;
28875cae7c1SHong Zhang   k     = space->local_used;
2894c1ff481SSatish Balay   for (i=0; i<n; i++) {
29088c3974fSBarry Smith     if (ignorezeroentries && (values[i] == 0.0)) continue;
29175cae7c1SHong Zhang     space->idx[k] = row;
29275cae7c1SHong Zhang     space->idy[k] = idxn[i];
29375cae7c1SHong Zhang     space->val[k] = values[i];
29475cae7c1SHong Zhang     k++;
295b400d20cSBarry Smith     cnt++;
2969417f4adSLois Curfman McInnes   }
297b400d20cSBarry Smith   stash->n               += cnt;
298b400d20cSBarry Smith   space->local_used      += cnt;
299b400d20cSBarry Smith   space->local_remaining -= cnt;
300a2d1c673SSatish Balay   PetscFunctionReturn(0);
301a2d1c673SSatish Balay }
30275cae7c1SHong Zhang 
3034c1ff481SSatish Balay /*
3048798bf22SSatish Balay   MatStashValuesCol_Private - inserts values into the stash. This function
3054c1ff481SSatish Balay   expects the values to be columnoriented. Multiple columns belong to the same row
3064c1ff481SSatish Balay   can be inserted with a single call to this function.
307a2d1c673SSatish Balay 
3084c1ff481SSatish Balay   Input Parameters:
3094c1ff481SSatish Balay   stash   - the stash
3104c1ff481SSatish Balay   row     - the global row correspoiding to the values
3114c1ff481SSatish Balay   n       - the number of elements inserted. All elements belong to the above row.
3124c1ff481SSatish Balay   idxn    - the global column indices corresponding to each of the values.
3134c1ff481SSatish Balay   values  - the values inserted
3144c1ff481SSatish Balay   stepval - the consecutive values are sepated by a distance of stepval.
3154c1ff481SSatish Balay             this happens because the input is columnoriented.
3164c1ff481SSatish Balay */
3174a2ae208SSatish Balay #undef __FUNCT__
3184a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesCol_Private"
319ace3abfcSBarry Smith PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries)
320a2d1c673SSatish Balay {
321dfbe8321SBarry Smith   PetscErrorCode     ierr;
32250e9ab7cSBarry Smith   PetscInt           i,k,cnt = 0;
32375cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
324a2d1c673SSatish Balay 
3254c1ff481SSatish Balay   PetscFunctionBegin;
3264c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
32775cae7c1SHong Zhang   if (!space || space->local_remaining < n) {
3288798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
3294c1ff481SSatish Balay   }
33075cae7c1SHong Zhang   space = stash->space;
33175cae7c1SHong Zhang   k     = space->local_used;
3324c1ff481SSatish Balay   for (i=0; i<n; i++) {
33388c3974fSBarry Smith     if (ignorezeroentries && (values[i*stepval] == 0.0)) continue;
33475cae7c1SHong Zhang     space->idx[k] = row;
33575cae7c1SHong Zhang     space->idy[k] = idxn[i];
33675cae7c1SHong Zhang     space->val[k] = values[i*stepval];
33775cae7c1SHong Zhang     k++;
338b400d20cSBarry Smith     cnt++;
3394c1ff481SSatish Balay   }
340b400d20cSBarry Smith   stash->n               += cnt;
341b400d20cSBarry Smith   space->local_used      += cnt;
342b400d20cSBarry Smith   space->local_remaining -= cnt;
3434c1ff481SSatish Balay   PetscFunctionReturn(0);
3444c1ff481SSatish Balay }
3454c1ff481SSatish Balay 
3464c1ff481SSatish Balay /*
3478798bf22SSatish Balay   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
3484c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3494c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3504c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3514c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3524c1ff481SSatish Balay 
3534c1ff481SSatish Balay   Input Parameters:
3544c1ff481SSatish Balay   stash  - the stash
3554c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3564c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3574c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3584c1ff481SSatish Balay            values. Each block is of size bs*bs.
3594c1ff481SSatish Balay   values - the values inserted
3604c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3614c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3624c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3634c1ff481SSatish Balay */
3644a2ae208SSatish Balay #undef __FUNCT__
3654a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRowBlocked_Private"
36654f21887SBarry Smith PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
3674c1ff481SSatish Balay {
368dfbe8321SBarry Smith   PetscErrorCode     ierr;
36975cae7c1SHong Zhang   PetscInt           i,j,k,bs2,bs=stash->bs,l;
37054f21887SBarry Smith   const PetscScalar  *vals;
37154f21887SBarry Smith   PetscScalar        *array;
37275cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
373a2d1c673SSatish Balay 
374a2d1c673SSatish Balay   PetscFunctionBegin;
37575cae7c1SHong Zhang   if (!space || space->local_remaining < n) {
3768798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
377a2d1c673SSatish Balay   }
37875cae7c1SHong Zhang   space = stash->space;
37975cae7c1SHong Zhang   l     = space->local_used;
38075cae7c1SHong Zhang   bs2   = bs*bs;
3814c1ff481SSatish Balay   for (i=0; i<n; i++) {
38275cae7c1SHong Zhang     space->idx[l] = row;
38375cae7c1SHong Zhang     space->idy[l] = idxn[i];
38475cae7c1SHong Zhang     /* Now copy over the block of values. Store the values column oriented.
38575cae7c1SHong Zhang        This enables inserting multiple blocks belonging to a row with a single
38675cae7c1SHong Zhang        funtion call */
38775cae7c1SHong Zhang     array = space->val + bs2*l;
38875cae7c1SHong Zhang     vals  = values + idx*bs2*n + bs*i;
38975cae7c1SHong Zhang     for (j=0; j<bs; j++) {
39075cae7c1SHong Zhang       for (k=0; k<bs; k++) array[k*bs] = vals[k];
39175cae7c1SHong Zhang       array++;
39275cae7c1SHong Zhang       vals += cmax*bs;
39375cae7c1SHong Zhang     }
39475cae7c1SHong Zhang     l++;
395a2d1c673SSatish Balay   }
3965bd3b8fbSHong Zhang   stash->n               += n;
39775cae7c1SHong Zhang   space->local_used      += n;
39875cae7c1SHong Zhang   space->local_remaining -= n;
3994c1ff481SSatish Balay   PetscFunctionReturn(0);
4004c1ff481SSatish Balay }
4014c1ff481SSatish Balay 
4024c1ff481SSatish Balay /*
4038798bf22SSatish Balay   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
4044c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
4054c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
4064c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
4074c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
4084c1ff481SSatish Balay 
4094c1ff481SSatish Balay   Input Parameters:
4104c1ff481SSatish Balay   stash  - the stash
4114c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
4124c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
4134c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
4144c1ff481SSatish Balay            values. Each block is of size bs*bs.
4154c1ff481SSatish Balay   values - the values inserted
4164c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
4174c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
4184c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
4194c1ff481SSatish Balay */
4204a2ae208SSatish Balay #undef __FUNCT__
4214a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesColBlocked_Private"
42254f21887SBarry Smith PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
4234c1ff481SSatish Balay {
424dfbe8321SBarry Smith   PetscErrorCode     ierr;
42575cae7c1SHong Zhang   PetscInt           i,j,k,bs2,bs=stash->bs,l;
42654f21887SBarry Smith   const PetscScalar  *vals;
42754f21887SBarry Smith   PetscScalar        *array;
42875cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
4294c1ff481SSatish Balay 
4304c1ff481SSatish Balay   PetscFunctionBegin;
43175cae7c1SHong Zhang   if (!space || space->local_remaining < n) {
4328798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
4334c1ff481SSatish Balay   }
43475cae7c1SHong Zhang   space = stash->space;
43575cae7c1SHong Zhang   l     = space->local_used;
43675cae7c1SHong Zhang   bs2   = bs*bs;
4374c1ff481SSatish Balay   for (i=0; i<n; i++) {
43875cae7c1SHong Zhang     space->idx[l] = row;
43975cae7c1SHong Zhang     space->idy[l] = idxn[i];
44075cae7c1SHong Zhang     /* Now copy over the block of values. Store the values column oriented.
44175cae7c1SHong Zhang      This enables inserting multiple blocks belonging to a row with a single
44275cae7c1SHong Zhang      funtion call */
44375cae7c1SHong Zhang     array = space->val + bs2*l;
44475cae7c1SHong Zhang     vals  = values + idx*bs2*n + bs*i;
44575cae7c1SHong Zhang     for (j=0; j<bs; j++) {
4468865f1eaSKarl Rupp       for (k=0; k<bs; k++) array[k] = vals[k];
44775cae7c1SHong Zhang       array += bs;
44875cae7c1SHong Zhang       vals  += rmax*bs;
44975cae7c1SHong Zhang     }
4505bd3b8fbSHong Zhang     l++;
451a2d1c673SSatish Balay   }
4525bd3b8fbSHong Zhang   stash->n               += n;
45375cae7c1SHong Zhang   space->local_used      += n;
45475cae7c1SHong Zhang   space->local_remaining -= n;
4553a40ed3dSBarry Smith   PetscFunctionReturn(0);
4569417f4adSLois Curfman McInnes }
4574c1ff481SSatish Balay /*
4588798bf22SSatish Balay   MatStashScatterBegin_Private - Initiates the transfer of values to the
4594c1ff481SSatish Balay   correct owners. This function goes through the stash, and check the
4604c1ff481SSatish Balay   owners of each stashed value, and sends the values off to the owner
4614c1ff481SSatish Balay   processors.
462bc5ccf88SSatish Balay 
4634c1ff481SSatish Balay   Input Parameters:
4644c1ff481SSatish Balay   stash  - the stash
4654c1ff481SSatish Balay   owners - an array of size 'no-of-procs' which gives the ownership range
4664c1ff481SSatish Balay            for each node.
4674c1ff481SSatish Balay 
4684c1ff481SSatish Balay   Notes: The 'owners' array in the cased of the blocked-stash has the
4694c1ff481SSatish Balay   ranges specified blocked global indices, and for the regular stash in
4704c1ff481SSatish Balay   the proper global indices.
4714c1ff481SSatish Balay */
4724a2ae208SSatish Balay #undef __FUNCT__
4734a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterBegin_Private"
4741e2582c4SBarry Smith PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners)
475bc5ccf88SSatish Balay {
476ac2b2aa0SJed Brown   PetscErrorCode ierr;
477ac2b2aa0SJed Brown 
478ac2b2aa0SJed Brown   PetscFunctionBegin;
479ac2b2aa0SJed Brown   ierr = (*stash->ScatterBegin)(mat,stash,owners);CHKERRQ(ierr);
480ac2b2aa0SJed Brown   PetscFunctionReturn(0);
481ac2b2aa0SJed Brown }
482ac2b2aa0SJed Brown 
483ac2b2aa0SJed Brown #undef __FUNCT__
484ac2b2aa0SJed Brown #define __FUNCT__ "MatStashScatterBegin_Ref"
485ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterBegin_Ref(Mat mat,MatStash *stash,PetscInt *owners)
486ac2b2aa0SJed Brown {
487c1ac3661SBarry Smith   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
488fe09c992SBarry Smith   PetscInt           size=stash->size,nsends;
4896849ba73SBarry Smith   PetscErrorCode     ierr;
49075cae7c1SHong Zhang   PetscInt           count,*sindices,**rindices,i,j,idx,lastidx,l;
49154f21887SBarry Smith   PetscScalar        **rvalues,*svalues;
492bc5ccf88SSatish Balay   MPI_Comm           comm = stash->comm;
493563fb871SSatish Balay   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
49476ec1555SBarry Smith   PetscMPIInt        *sizes,*nlengths,nreceives;
4955bd3b8fbSHong Zhang   PetscInt           *sp_idx,*sp_idy;
49654f21887SBarry Smith   PetscScalar        *sp_val;
4975bd3b8fbSHong Zhang   PetscMatStashSpace space,space_next;
498bc5ccf88SSatish Balay 
499bc5ccf88SSatish Balay   PetscFunctionBegin;
5004c1ff481SSatish Balay   bs2 = stash->bs*stash->bs;
50175cae7c1SHong Zhang 
502bc5ccf88SSatish Balay   /*  first count number of contributors to each processor */
503037dbc42SBarry Smith   ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr);
5041795a4d1SJed Brown   ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr);
505037dbc42SBarry Smith   ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr);
506a2d1c673SSatish Balay 
50775cae7c1SHong Zhang   i       = j    = 0;
5087357eb19SBarry Smith   lastidx = -1;
5095bd3b8fbSHong Zhang   space   = stash->space_head;
5100298fd71SBarry Smith   while (space != NULL) {
51175cae7c1SHong Zhang     space_next = space->next;
5125bd3b8fbSHong Zhang     sp_idx     = space->idx;
51375cae7c1SHong Zhang     for (l=0; l<space->local_used; l++) {
5147357eb19SBarry Smith       /* if indices are NOT locally sorted, need to start search at the beginning */
5155bd3b8fbSHong Zhang       if (lastidx > (idx = sp_idx[l])) j = 0;
5167357eb19SBarry Smith       lastidx = idx;
5177357eb19SBarry Smith       for (; j<size; j++) {
5184c1ff481SSatish Balay         if (idx >= owners[j] && idx < owners[j+1]) {
519563fb871SSatish Balay           nlengths[j]++; owner[i] = j; break;
520bc5ccf88SSatish Balay         }
521bc5ccf88SSatish Balay       }
52275cae7c1SHong Zhang       i++;
52375cae7c1SHong Zhang     }
52475cae7c1SHong Zhang     space = space_next;
525bc5ccf88SSatish Balay   }
526563fb871SSatish Balay   /* Now check what procs get messages - and compute nsends. */
527563fb871SSatish Balay   for (i=0, nsends=0; i<size; i++) {
5288865f1eaSKarl Rupp     if (nlengths[i]) {
52976ec1555SBarry Smith       sizes[i] = 1; nsends++;
5308865f1eaSKarl Rupp     }
531563fb871SSatish Balay   }
532bc5ccf88SSatish Balay 
53354f21887SBarry Smith   {PetscMPIInt *onodes,*olengths;
534563fb871SSatish Balay    /* Determine the number of messages to expect, their lengths, from from-ids */
53576ec1555SBarry Smith    ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr);
536563fb871SSatish Balay    ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
537563fb871SSatish Balay    /* since clubbing row,col - lengths are multiplied by 2 */
538563fb871SSatish Balay    for (i=0; i<nreceives; i++) olengths[i] *=2;
539563fb871SSatish Balay    ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
540563fb871SSatish Balay    /* values are size 'bs2' lengths (and remove earlier factor 2 */
541563fb871SSatish Balay    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
542563fb871SSatish Balay    ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
543563fb871SSatish Balay    ierr = PetscFree(onodes);CHKERRQ(ierr);
5448865f1eaSKarl Rupp    ierr = PetscFree(olengths);CHKERRQ(ierr);}
545bc5ccf88SSatish Balay 
546bc5ccf88SSatish Balay   /* do sends:
547bc5ccf88SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
548bc5ccf88SSatish Balay          the ith processor
549bc5ccf88SSatish Balay   */
550dcca6d9dSJed Brown   ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr);
551785e854fSJed Brown   ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr);
552dcca6d9dSJed Brown   ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr);
553a2d1c673SSatish Balay   /* use 2 sends the first with all_a, the next with all_i and all_j */
554bc5ccf88SSatish Balay   startv[0] = 0; starti[0] = 0;
555bc5ccf88SSatish Balay   for (i=1; i<size; i++) {
556563fb871SSatish Balay     startv[i] = startv[i-1] + nlengths[i-1];
557533163c2SBarry Smith     starti[i] = starti[i-1] + 2*nlengths[i-1];
558bc5ccf88SSatish Balay   }
55975cae7c1SHong Zhang 
56075cae7c1SHong Zhang   i     = 0;
5615bd3b8fbSHong Zhang   space = stash->space_head;
5620298fd71SBarry Smith   while (space != NULL) {
56375cae7c1SHong Zhang     space_next = space->next;
5645bd3b8fbSHong Zhang     sp_idx     = space->idx;
5655bd3b8fbSHong Zhang     sp_idy     = space->idy;
5665bd3b8fbSHong Zhang     sp_val     = space->val;
56775cae7c1SHong Zhang     for (l=0; l<space->local_used; l++) {
568bc5ccf88SSatish Balay       j = owner[i];
569a2d1c673SSatish Balay       if (bs2 == 1) {
5705bd3b8fbSHong Zhang         svalues[startv[j]] = sp_val[l];
571a2d1c673SSatish Balay       } else {
572c1ac3661SBarry Smith         PetscInt    k;
57354f21887SBarry Smith         PetscScalar *buf1,*buf2;
5744c1ff481SSatish Balay         buf1 = svalues+bs2*startv[j];
575b087b6d6SSatish Balay         buf2 = space->val + bs2*l;
5768865f1eaSKarl Rupp         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
577a2d1c673SSatish Balay       }
5785bd3b8fbSHong Zhang       sindices[starti[j]]             = sp_idx[l];
5795bd3b8fbSHong Zhang       sindices[starti[j]+nlengths[j]] = sp_idy[l];
580bc5ccf88SSatish Balay       startv[j]++;
581bc5ccf88SSatish Balay       starti[j]++;
58275cae7c1SHong Zhang       i++;
58375cae7c1SHong Zhang     }
58475cae7c1SHong Zhang     space = space_next;
585bc5ccf88SSatish Balay   }
586bc5ccf88SSatish Balay   startv[0] = 0;
5878865f1eaSKarl Rupp   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
588e5d0e772SSatish Balay 
589bc5ccf88SSatish Balay   for (i=0,count=0; i<size; i++) {
59076ec1555SBarry Smith     if (sizes[i]) {
591563fb871SSatish Balay       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
592a77337e4SBarry Smith       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
593bc5ccf88SSatish Balay     }
594b85c94c3SSatish Balay   }
5956cf91177SBarry Smith #if defined(PETSC_USE_INFO)
59693157e10SBarry Smith   ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr);
597e5d0e772SSatish Balay   for (i=0; i<size; i++) {
59876ec1555SBarry Smith     if (sizes[i]) {
59930c47e72SSatish Balay       ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
600e5d0e772SSatish Balay     }
601e5d0e772SSatish Balay   }
602e5d0e772SSatish Balay #endif
603c05d87d6SBarry Smith   ierr = PetscFree(nlengths);CHKERRQ(ierr);
604606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
605c05d87d6SBarry Smith   ierr = PetscFree2(startv,starti);CHKERRQ(ierr);
60676ec1555SBarry Smith   ierr = PetscFree(sizes);CHKERRQ(ierr);
607a2d1c673SSatish Balay 
608563fb871SSatish Balay   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
609785e854fSJed Brown   ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr);
610563fb871SSatish Balay 
611563fb871SSatish Balay   for (i=0; i<nreceives; i++) {
612563fb871SSatish Balay     recv_waits[2*i]   = recv_waits1[i];
613563fb871SSatish Balay     recv_waits[2*i+1] = recv_waits2[i];
614563fb871SSatish Balay   }
615563fb871SSatish Balay   stash->recv_waits = recv_waits;
6168865f1eaSKarl Rupp 
617563fb871SSatish Balay   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
618563fb871SSatish Balay   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
619563fb871SSatish Balay 
620c05d87d6SBarry Smith   stash->svalues         = svalues;
621c05d87d6SBarry Smith   stash->sindices        = sindices;
622c05d87d6SBarry Smith   stash->rvalues         = rvalues;
623c05d87d6SBarry Smith   stash->rindices        = rindices;
624c05d87d6SBarry Smith   stash->send_waits      = send_waits;
625c05d87d6SBarry Smith   stash->nsends          = nsends;
626c05d87d6SBarry Smith   stash->nrecvs          = nreceives;
62767318a8aSJed Brown   stash->reproduce_count = 0;
628bc5ccf88SSatish Balay   PetscFunctionReturn(0);
629bc5ccf88SSatish Balay }
630bc5ccf88SSatish Balay 
631a2d1c673SSatish Balay /*
6328798bf22SSatish Balay    MatStashScatterGetMesg_Private - This function waits on the receives posted
6338798bf22SSatish Balay    in the function MatStashScatterBegin_Private() and returns one message at
6344c1ff481SSatish Balay    a time to the calling function. If no messages are left, it indicates this
6354c1ff481SSatish Balay    by setting flg = 0, else it sets flg = 1.
6364c1ff481SSatish Balay 
6374c1ff481SSatish Balay    Input Parameters:
6384c1ff481SSatish Balay    stash - the stash
6394c1ff481SSatish Balay 
6404c1ff481SSatish Balay    Output Parameters:
6414c1ff481SSatish Balay    nvals - the number of entries in the current message.
6424c1ff481SSatish Balay    rows  - an array of row indices (or blocked indices) corresponding to the values
6434c1ff481SSatish Balay    cols  - an array of columnindices (or blocked indices) corresponding to the values
6444c1ff481SSatish Balay    vals  - the values
6454c1ff481SSatish Balay    flg   - 0 indicates no more message left, and the current call has no values associated.
6464c1ff481SSatish Balay            1 indicates that the current call successfully received a message, and the
6474c1ff481SSatish Balay              other output parameters nvals,rows,cols,vals are set appropriately.
648a2d1c673SSatish Balay */
6494a2ae208SSatish Balay #undef __FUNCT__
6504a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterGetMesg_Private"
65154f21887SBarry Smith PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
652bc5ccf88SSatish Balay {
6536849ba73SBarry Smith   PetscErrorCode ierr;
654ac2b2aa0SJed Brown 
655ac2b2aa0SJed Brown   PetscFunctionBegin;
656ac2b2aa0SJed Brown   ierr = (*stash->ScatterGetMesg)(stash,nvals,rows,cols,vals,flg);CHKERRQ(ierr);
657ac2b2aa0SJed Brown   PetscFunctionReturn(0);
658ac2b2aa0SJed Brown }
659ac2b2aa0SJed Brown 
660ac2b2aa0SJed Brown #undef __FUNCT__
661ac2b2aa0SJed Brown #define __FUNCT__ "MatStashScatterGetMesg_Ref"
662ac2b2aa0SJed Brown static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
663ac2b2aa0SJed Brown {
664ac2b2aa0SJed Brown   PetscErrorCode ierr;
665533163c2SBarry Smith   PetscMPIInt    i,*flg_v = stash->flg_v,i1,i2;
666fe09c992SBarry Smith   PetscInt       bs2;
667a2d1c673SSatish Balay   MPI_Status     recv_status;
668ace3abfcSBarry Smith   PetscBool      match_found = PETSC_FALSE;
669bc5ccf88SSatish Balay 
670bc5ccf88SSatish Balay   PetscFunctionBegin;
671a2d1c673SSatish Balay   *flg = 0; /* When a message is discovered this is reset to 1 */
672a2d1c673SSatish Balay   /* Return if no more messages to process */
6738865f1eaSKarl Rupp   if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(0);
674a2d1c673SSatish Balay 
6754c1ff481SSatish Balay   bs2 = stash->bs*stash->bs;
67667318a8aSJed Brown   /* If a matching pair of receives are found, process them, and return the data to
677a2d1c673SSatish Balay      the calling function. Until then keep receiving messages */
678a2d1c673SSatish Balay   while (!match_found) {
67967318a8aSJed Brown     if (stash->reproduce) {
68067318a8aSJed Brown       i    = stash->reproduce_count++;
68167318a8aSJed Brown       ierr = MPI_Wait(stash->recv_waits+i,&recv_status);CHKERRQ(ierr);
68267318a8aSJed Brown     } else {
683a2d1c673SSatish Balay       ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
68467318a8aSJed Brown     }
685e32f2f54SBarry Smith     if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!");
686533163c2SBarry Smith 
68767318a8aSJed Brown     /* Now pack the received message into a structure which is usable by others */
688a2d1c673SSatish Balay     if (i % 2) {
689a77337e4SBarry Smith       ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr);
6908865f1eaSKarl Rupp 
691c1dc657dSBarry Smith       flg_v[2*recv_status.MPI_SOURCE] = i/2;
6928865f1eaSKarl Rupp 
693a2d1c673SSatish Balay       *nvals = *nvals/bs2;
694563fb871SSatish Balay     } else {
695563fb871SSatish Balay       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
6968865f1eaSKarl Rupp 
697563fb871SSatish Balay       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
6988865f1eaSKarl Rupp 
699563fb871SSatish Balay       *nvals = *nvals/2; /* This message has both row indices and col indices */
700bc5ccf88SSatish Balay     }
701a2d1c673SSatish Balay 
702cb2b73ccSBarry Smith     /* Check if we have both messages from this proc */
703c1dc657dSBarry Smith     i1 = flg_v[2*recv_status.MPI_SOURCE];
704c1dc657dSBarry Smith     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
705a2d1c673SSatish Balay     if (i1 != -1 && i2 != -1) {
706563fb871SSatish Balay       *rows = stash->rindices[i2];
707a2d1c673SSatish Balay       *cols = *rows + *nvals;
708563fb871SSatish Balay       *vals = stash->rvalues[i1];
709a2d1c673SSatish Balay       *flg  = 1;
710a2d1c673SSatish Balay       stash->nprocessed++;
71135d8aa7fSBarry Smith       match_found = PETSC_TRUE;
712bc5ccf88SSatish Balay     }
713bc5ccf88SSatish Balay   }
714bc5ccf88SSatish Balay   PetscFunctionReturn(0);
715bc5ccf88SSatish Balay }
716*d7d60843SJed Brown 
717*d7d60843SJed Brown typedef struct {
718*d7d60843SJed Brown   PetscInt row;
719*d7d60843SJed Brown   PetscInt col;
720*d7d60843SJed Brown   PetscScalar vals[1];          /* Actually an array of length bs2 */
721*d7d60843SJed Brown } MatStashBlock;
722*d7d60843SJed Brown 
723*d7d60843SJed Brown #undef __FUNCT__
724*d7d60843SJed Brown #define __FUNCT__ "MatStashSortCompress_Private"
725*d7d60843SJed Brown static PetscErrorCode MatStashSortCompress_Private(MatStash *stash,InsertMode insertmode)
726*d7d60843SJed Brown {
727*d7d60843SJed Brown   PetscErrorCode ierr;
728*d7d60843SJed Brown   PetscMatStashSpace space;
729*d7d60843SJed Brown   PetscInt n = stash->n,bs = stash->bs,bs2 = bs*bs,cnt,*row,*col,*perm,rowstart,i;
730*d7d60843SJed Brown   PetscScalar **valptr;
731*d7d60843SJed Brown 
732*d7d60843SJed Brown   PetscFunctionBegin;
733*d7d60843SJed Brown   ierr = PetscMalloc4(n,&row,n,&col,n,&valptr,n,&perm);CHKERRQ(ierr);
734*d7d60843SJed Brown   for (space=stash->space_head,cnt=0; space; space=space->next) {
735*d7d60843SJed Brown     for (i=0; i<space->local_used; i++) {
736*d7d60843SJed Brown       row[cnt] = space->idx[i];
737*d7d60843SJed Brown       col[cnt] = space->idy[i];
738*d7d60843SJed Brown       valptr[cnt] = &space->val[i*bs2];
739*d7d60843SJed Brown       perm[cnt] = cnt;          /* Will tell us where to find valptr after sorting row[] and col[] */
740*d7d60843SJed Brown       cnt++;
741*d7d60843SJed Brown     }
742*d7d60843SJed Brown   }
743*d7d60843SJed Brown   if (cnt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatStash n %D, but counted %D entries",n,cnt);
744*d7d60843SJed Brown   ierr = PetscSortIntWithArrayPair(n,row,col,perm);CHKERRQ(ierr);
745*d7d60843SJed Brown   /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */
746*d7d60843SJed Brown   for (rowstart=0,cnt=0,i=1; i<=n; i++) {
747*d7d60843SJed Brown     if (i == n || row[i] != row[rowstart]) {         /* Sort the last row. */
748*d7d60843SJed Brown       PetscInt colstart;
749*d7d60843SJed Brown       ierr = PetscSortIntWithArray(i-rowstart,&col[rowstart],&perm[rowstart]);CHKERRQ(ierr);
750*d7d60843SJed Brown       for (colstart=rowstart; colstart<i; ) { /* Compress multiple insertions to the same location */
751*d7d60843SJed Brown         PetscInt j,l;
752*d7d60843SJed Brown         MatStashBlock *block;
753*d7d60843SJed Brown         ierr = PetscSegBufferGet(stash->segsendblocks,1,&block);CHKERRQ(ierr);
754*d7d60843SJed Brown         block->row = row[rowstart];
755*d7d60843SJed Brown         block->col = col[colstart];
756*d7d60843SJed Brown         ierr = PetscMemcpy(block->vals,valptr[perm[colstart]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr);
757*d7d60843SJed Brown         for (j=colstart+1; j<i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */
758*d7d60843SJed Brown           if (insertmode == ADD_VALUES) {
759*d7d60843SJed Brown             for (l=0; l<bs2; l++) block->vals[l] += valptr[perm[j]][l];
760*d7d60843SJed Brown           } else {
761*d7d60843SJed Brown             ierr = PetscMemcpy(block->vals,valptr[perm[j]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr);
762*d7d60843SJed Brown           }
763*d7d60843SJed Brown         }
764*d7d60843SJed Brown         colstart = j;
765*d7d60843SJed Brown       }
766*d7d60843SJed Brown       rowstart = i;
767*d7d60843SJed Brown     }
768*d7d60843SJed Brown   }
769*d7d60843SJed Brown   ierr = PetscFree4(row,col,valptr,perm);CHKERRQ(ierr);
770*d7d60843SJed Brown   PetscFunctionReturn(0);
771*d7d60843SJed Brown }
772*d7d60843SJed Brown 
773*d7d60843SJed Brown #undef __FUNCT__
774*d7d60843SJed Brown #define __FUNCT__ "MatStashBlockTypeSetUp"
775*d7d60843SJed Brown static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash)
776*d7d60843SJed Brown {
777*d7d60843SJed Brown   PetscErrorCode ierr;
778*d7d60843SJed Brown 
779*d7d60843SJed Brown   PetscFunctionBegin;
780*d7d60843SJed Brown   if (stash->blocktype == MPI_DATATYPE_NULL) {
781*d7d60843SJed Brown     PetscInt     bs2 = PetscSqr(stash->bs);
782*d7d60843SJed Brown     PetscMPIInt  blocklens[2];
783*d7d60843SJed Brown     MPI_Aint     displs[2];
784*d7d60843SJed Brown     MPI_Datatype types[2],stype;
785*d7d60843SJed Brown 
786*d7d60843SJed Brown     stash->blocktype_size = offsetof(MatStashBlock,vals) + bs2*sizeof(PetscScalar);
787*d7d60843SJed Brown     if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */
788*d7d60843SJed Brown       stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt);
789*d7d60843SJed Brown     }
790*d7d60843SJed Brown     ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segsendblocks);CHKERRQ(ierr);
791*d7d60843SJed Brown     ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segrecvblocks);CHKERRQ(ierr);
792*d7d60843SJed Brown     ierr = PetscSegBufferCreate(sizeof(MatStashFrame),1,&stash->segrecvframe);CHKERRQ(ierr);
793*d7d60843SJed Brown     blocklens[0] = 2;
794*d7d60843SJed Brown     blocklens[1] = bs2;
795*d7d60843SJed Brown     displs[0] = offsetof(MatStashBlock,row);
796*d7d60843SJed Brown     displs[1] = offsetof(MatStashBlock,vals);
797*d7d60843SJed Brown     types[0] = MPIU_INT;
798*d7d60843SJed Brown     types[1] = MPIU_SCALAR;
799*d7d60843SJed Brown     ierr = MPI_Type_create_struct(2,blocklens,displs,types,&stype);CHKERRQ(ierr);
800*d7d60843SJed Brown     ierr = MPI_Type_commit(&stype);CHKERRQ(ierr);
801*d7d60843SJed Brown     ierr = MPI_Type_create_resized(stype,0,stash->blocktype_size,&stash->blocktype);CHKERRQ(ierr); /* MPI-2 */
802*d7d60843SJed Brown     ierr = MPI_Type_commit(&stash->blocktype);CHKERRQ(ierr);
803*d7d60843SJed Brown     ierr = MPI_Type_free(&stype);CHKERRQ(ierr);
804*d7d60843SJed Brown   }
805*d7d60843SJed Brown   PetscFunctionReturn(0);
806*d7d60843SJed Brown }
807*d7d60843SJed Brown 
808*d7d60843SJed Brown #undef __FUNCT__
809*d7d60843SJed Brown #define __FUNCT__ "MatStashBTSSend_Private"
810*d7d60843SJed Brown /* Callback invoked after target rank has initiatied receive of rendezvous message.
811*d7d60843SJed Brown  * Here we post the main sends.
812*d7d60843SJed Brown  */
813*d7d60843SJed Brown static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rankid,PetscMPIInt rank,void *sdata,MPI_Request req[],void *ctx)
814*d7d60843SJed Brown {
815*d7d60843SJed Brown   MatStash *stash = (MatStash*)ctx;
816*d7d60843SJed Brown   MatStashHeader *hdr = (MatStashHeader*)sdata;
817*d7d60843SJed Brown   PetscErrorCode ierr;
818*d7d60843SJed Brown 
819*d7d60843SJed Brown   PetscFunctionBegin;
820*d7d60843SJed Brown   if (rank != stash->sendranks[rankid]) SETERRQ3(comm,PETSC_ERR_PLIB,"BTS Send rank %d does not match sendranks[%d] %d",rank,rankid,stash->sendranks[rankid]);
821*d7d60843SJed Brown   ierr = MPI_Isend(stash->sendframes[rankid].buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr);
822*d7d60843SJed Brown   stash->sendframes[rankid].count = hdr->count;
823*d7d60843SJed Brown   stash->sendframes[rankid].pending = 1;
824*d7d60843SJed Brown   PetscFunctionReturn(0);
825*d7d60843SJed Brown }
826*d7d60843SJed Brown 
827*d7d60843SJed Brown #undef __FUNCT__
828*d7d60843SJed Brown #define __FUNCT__ "MatStashBTSRecv_Private"
829*d7d60843SJed Brown /* Callback invoked by target after receiving rendezvous message.
830*d7d60843SJed Brown  * Here we post the main recvs.
831*d7d60843SJed Brown  */
832*d7d60843SJed Brown static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rank,void *rdata,MPI_Request req[],void *ctx)
833*d7d60843SJed Brown {
834*d7d60843SJed Brown   MatStash *stash = (MatStash*)ctx;
835*d7d60843SJed Brown   MatStashHeader *hdr = (MatStashHeader*)rdata;
836*d7d60843SJed Brown   MatStashFrame *frame;
837*d7d60843SJed Brown   PetscErrorCode ierr;
838*d7d60843SJed Brown 
839*d7d60843SJed Brown   PetscFunctionBegin;
840*d7d60843SJed Brown   ierr = PetscSegBufferGet(stash->segrecvframe,1,&frame);CHKERRQ(ierr);
841*d7d60843SJed Brown   ierr = PetscSegBufferGet(stash->segrecvblocks,hdr->count,&frame->buffer);CHKERRQ(ierr);
842*d7d60843SJed Brown   ierr = MPI_Irecv(frame->buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr);
843*d7d60843SJed Brown   frame->count = hdr->count;
844*d7d60843SJed Brown   frame->pending = 1;
845*d7d60843SJed Brown   PetscFunctionReturn(0);
846*d7d60843SJed Brown }
847*d7d60843SJed Brown 
848*d7d60843SJed Brown #undef __FUNCT__
849*d7d60843SJed Brown #define __FUNCT__ "MatStashScatterBegin_BTS"
850*d7d60843SJed Brown /*
851*d7d60843SJed Brown  * owners[] contains the ownership ranges; may be indexed by either blocks or scalars
852*d7d60843SJed Brown  */
853*d7d60843SJed Brown static PetscErrorCode MatStashScatterBegin_BTS(Mat mat,MatStash *stash,PetscInt owners[])
854*d7d60843SJed Brown {
855*d7d60843SJed Brown   PetscErrorCode ierr;
856*d7d60843SJed Brown   size_t nblocks;
857*d7d60843SJed Brown   char *sendblocks;
858*d7d60843SJed Brown 
859*d7d60843SJed Brown   PetscFunctionBegin;
860*d7d60843SJed Brown   ierr = MatStashBlockTypeSetUp(stash);CHKERRQ(ierr);
861*d7d60843SJed Brown   ierr = MatStashSortCompress_Private(stash,mat->insertmode);CHKERRQ(ierr);
862*d7d60843SJed Brown   ierr = PetscSegBufferGetSize(stash->segsendblocks,&nblocks);CHKERRQ(ierr);
863*d7d60843SJed Brown   ierr = PetscSegBufferExtractInPlace(stash->segsendblocks,&sendblocks);CHKERRQ(ierr);
864*d7d60843SJed Brown   {
865*d7d60843SJed Brown     PetscInt i,rowstart,sendno;
866*d7d60843SJed Brown 
867*d7d60843SJed Brown     /* Count number of send ranks and allocate for sends */
868*d7d60843SJed Brown     stash->nsendranks = 0;
869*d7d60843SJed Brown     for (rowstart=0; rowstart<nblocks; ) {
870*d7d60843SJed Brown       PetscInt lastowner,owner;
871*d7d60843SJed Brown       MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
872*d7d60843SJed Brown       ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr);
873*d7d60843SJed Brown       if (owner < 0) owner = -(owner+2);
874*d7d60843SJed Brown       lastowner = owner;
875*d7d60843SJed Brown       for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
876*d7d60843SJed Brown         MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
877*d7d60843SJed Brown         if (sendblock_i->row == sendblock_rowstart->row) continue;
878*d7d60843SJed Brown         ierr = PetscFindInt(sendblock_i->row,stash->size+1,owners,&owner);CHKERRQ(ierr);
879*d7d60843SJed Brown         if (owner < 0) owner = -(owner+2);
880*d7d60843SJed Brown         if (owner != lastowner) break;
881*d7d60843SJed Brown       }
882*d7d60843SJed Brown       stash->nsendranks++;
883*d7d60843SJed Brown       rowstart = i;
884*d7d60843SJed Brown     }
885*d7d60843SJed Brown     ierr = PetscMalloc3(stash->nsendranks,&stash->sendranks,stash->nsendranks,&stash->sendhdr,stash->nsendranks,&stash->sendframes);CHKERRQ(ierr);
886*d7d60843SJed Brown 
887*d7d60843SJed Brown     /* Set up sendhdrs and sendframes */
888*d7d60843SJed Brown     sendno = 0;
889*d7d60843SJed Brown     for (rowstart=0; rowstart<nblocks; ) {
890*d7d60843SJed Brown       PetscInt owner;
891*d7d60843SJed Brown       MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
892*d7d60843SJed Brown       ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr);
893*d7d60843SJed Brown       if (owner < 0) owner = -(owner+2);
894*d7d60843SJed Brown       stash->sendranks[sendno] = owner;
895*d7d60843SJed Brown       for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
896*d7d60843SJed Brown         MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
897*d7d60843SJed Brown         if (sendblock_i->row == sendblock_rowstart->row) continue;
898*d7d60843SJed Brown         ierr = PetscFindInt(sendblock_i->row,stash->size+1,owners,&owner);CHKERRQ(ierr);
899*d7d60843SJed Brown         if (owner < 0) owner = -(owner+2);
900*d7d60843SJed Brown         if (owner != stash->sendranks[sendno]) break;
901*d7d60843SJed Brown       }
902*d7d60843SJed Brown       stash->sendframes[sendno].buffer = sendblock_rowstart;
903*d7d60843SJed Brown       stash->sendframes[sendno].pending = 0;
904*d7d60843SJed Brown       stash->sendhdr[sendno].count = i - rowstart;
905*d7d60843SJed Brown       stash->sendhdr[sendno].insertmode = mat->insertmode;
906*d7d60843SJed Brown       sendno++;
907*d7d60843SJed Brown       rowstart = i;
908*d7d60843SJed Brown     }
909*d7d60843SJed Brown     if (sendno != stash->nsendranks) SETERRQ2(stash->comm,PETSC_ERR_PLIB,"BTS counted %D sendranks, but %D sends",stash->nsendranks,sendno);
910*d7d60843SJed Brown   }
911*d7d60843SJed Brown 
912*d7d60843SJed Brown   ierr = PetscCommBuildTwoSidedFReq(stash->comm,2,MPIU_INT,stash->nsendranks,stash->sendranks,stash->sendhdr,
913*d7d60843SJed Brown                                     &stash->nrecvranks,&stash->recvranks,&stash->recvhdr,1,&stash->sendreqs,&stash->recvreqs,
914*d7d60843SJed Brown                                     MatStashBTSSend_Private,MatStashBTSRecv_Private,stash);CHKERRQ(ierr);
915*d7d60843SJed Brown 
916*d7d60843SJed Brown   ierr = PetscMalloc2(stash->nrecvranks,&stash->some_indices,stash->nrecvranks,&stash->some_statuses);CHKERRQ(ierr);
917*d7d60843SJed Brown   ierr = PetscSegBufferExtractInPlace(stash->segrecvframe,&stash->recvframes);CHKERRQ(ierr);
918*d7d60843SJed Brown   stash->recvframe_active = NULL;
919*d7d60843SJed Brown   stash->recvframe_i      = 0;
920*d7d60843SJed Brown   stash->some_i           = 0;
921*d7d60843SJed Brown   stash->some_count       = 0;
922*d7d60843SJed Brown   stash->recvcount        = 0;
923*d7d60843SJed Brown   stash->use_status       = PETSC_FALSE; /* Use count from header instead of from message. */
924*d7d60843SJed Brown 
925*d7d60843SJed Brown   PetscFunctionReturn(0);
926*d7d60843SJed Brown }
927*d7d60843SJed Brown 
928*d7d60843SJed Brown #undef __FUNCT__
929*d7d60843SJed Brown #define __FUNCT__ "MatStashScatterGetMesg_BTS"
930*d7d60843SJed Brown static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash,PetscMPIInt *n,PetscInt **row,PetscInt **col,PetscScalar **val,PetscInt *flg)
931*d7d60843SJed Brown {
932*d7d60843SJed Brown   PetscErrorCode ierr;
933*d7d60843SJed Brown   MatStashBlock *block;
934*d7d60843SJed Brown 
935*d7d60843SJed Brown   PetscFunctionBegin;
936*d7d60843SJed Brown   *flg = 0;
937*d7d60843SJed Brown   while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) {
938*d7d60843SJed Brown     if (stash->some_i == stash->some_count) {
939*d7d60843SJed Brown       if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(0); /* Done */
940*d7d60843SJed Brown       ierr = MPI_Waitsome(stash->nrecvranks,stash->recvreqs,&stash->some_count,stash->some_indices,stash->use_status?stash->some_statuses:MPI_STATUSES_IGNORE);CHKERRQ(ierr);
941*d7d60843SJed Brown       stash->some_i = 0;
942*d7d60843SJed Brown     }
943*d7d60843SJed Brown     stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]];
944*d7d60843SJed Brown     stash->recvframe_count = stash->recvframe_active->count; /* From header; maximum count */
945*d7d60843SJed Brown     if (stash->use_status) { /* Count what was actually sent */
946*d7d60843SJed Brown       ierr = MPI_Get_count(&stash->some_statuses[stash->some_i],stash->blocktype,&stash->recvframe_count);CHKERRQ(ierr);
947*d7d60843SJed Brown     }
948*d7d60843SJed Brown     stash->some_i++;
949*d7d60843SJed Brown     stash->recvcount++;
950*d7d60843SJed Brown     stash->recvframe_i = 0;
951*d7d60843SJed Brown   }
952*d7d60843SJed Brown   *n = 1;
953*d7d60843SJed Brown   block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[stash->recvframe_i*stash->blocktype_size];
954*d7d60843SJed Brown   *row = &block->row;
955*d7d60843SJed Brown   *col = &block->col;
956*d7d60843SJed Brown   *val = block->vals;
957*d7d60843SJed Brown   stash->recvframe_i++;
958*d7d60843SJed Brown   *flg = 1;
959*d7d60843SJed Brown   PetscFunctionReturn(0);
960*d7d60843SJed Brown }
961*d7d60843SJed Brown 
962*d7d60843SJed Brown #undef __FUNCT__
963*d7d60843SJed Brown #define __FUNCT__ "MatStashScatterEnd_BTS"
964*d7d60843SJed Brown static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash)
965*d7d60843SJed Brown {
966*d7d60843SJed Brown   PetscErrorCode ierr;
967*d7d60843SJed Brown 
968*d7d60843SJed Brown   PetscFunctionBegin;
969*d7d60843SJed Brown   ierr = MPI_Waitall(stash->nsendranks,stash->sendreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
970*d7d60843SJed Brown   ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr);
971*d7d60843SJed Brown 
972*d7d60843SJed Brown   /* Now update nmaxold to be app 10% more than max n used, this way the
973*d7d60843SJed Brown      wastage of space is reduced the next time this stash is used.
974*d7d60843SJed Brown      Also update the oldmax, only if it increases */
975*d7d60843SJed Brown   if (stash->n) {
976*d7d60843SJed Brown     PetscInt bs2     = stash->bs*stash->bs;
977*d7d60843SJed Brown     PetscInt oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
978*d7d60843SJed Brown     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
979*d7d60843SJed Brown   }
980*d7d60843SJed Brown 
981*d7d60843SJed Brown   stash->nmax       = 0;
982*d7d60843SJed Brown   stash->n          = 0;
983*d7d60843SJed Brown   stash->reallocs   = -1;
984*d7d60843SJed Brown   stash->nprocessed = 0;
985*d7d60843SJed Brown 
986*d7d60843SJed Brown   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
987*d7d60843SJed Brown 
988*d7d60843SJed Brown   stash->space = 0;
989*d7d60843SJed Brown 
990*d7d60843SJed Brown   PetscFunctionReturn(0);
991*d7d60843SJed Brown }
992*d7d60843SJed Brown 
993*d7d60843SJed Brown #undef __FUNCT__
994*d7d60843SJed Brown #define __FUNCT__ "MatStashScatterDestroy_BTS"
995*d7d60843SJed Brown static PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash)
996*d7d60843SJed Brown {
997*d7d60843SJed Brown   PetscErrorCode ierr;
998*d7d60843SJed Brown 
999*d7d60843SJed Brown   PetscFunctionBegin;
1000*d7d60843SJed Brown   ierr = PetscSegBufferDestroy(&stash->segsendblocks);CHKERRQ(ierr);
1001*d7d60843SJed Brown   ierr = PetscSegBufferDestroy(&stash->segrecvframe);CHKERRQ(ierr);
1002*d7d60843SJed Brown   stash->recvframes = NULL;
1003*d7d60843SJed Brown   ierr = PetscSegBufferDestroy(&stash->segrecvblocks);CHKERRQ(ierr);
1004*d7d60843SJed Brown   if (stash->blocktype != MPI_DATATYPE_NULL) {
1005*d7d60843SJed Brown     ierr = MPI_Type_free(&stash->blocktype);CHKERRQ(ierr);
1006*d7d60843SJed Brown   }
1007*d7d60843SJed Brown   stash->nsendranks = 0;
1008*d7d60843SJed Brown   stash->nrecvranks = 0;
1009*d7d60843SJed Brown   ierr = PetscFree3(stash->sendranks,stash->sendhdr,stash->sendframes);CHKERRQ(ierr);
1010*d7d60843SJed Brown   ierr = PetscFree(stash->sendreqs);CHKERRQ(ierr);
1011*d7d60843SJed Brown   ierr = PetscFree(stash->recvreqs);CHKERRQ(ierr);
1012*d7d60843SJed Brown   ierr = PetscFree(stash->recvranks);CHKERRQ(ierr);
1013*d7d60843SJed Brown   ierr = PetscFree(stash->recvhdr);CHKERRQ(ierr);
1014*d7d60843SJed Brown   ierr = PetscFree2(stash->some_indices,stash->some_statuses);CHKERRQ(ierr);
1015*d7d60843SJed Brown   PetscFunctionReturn(0);
1016*d7d60843SJed Brown }
1017