xref: /petsc/src/mat/utils/matstash.c (revision 533163c2cd1b19f0f8b5307c6699b5cf12eec670)
1be1d678aSKris Buschelman #define PETSCMAT_DLL
22d5177cdSBarry Smith 
37c4f633dSBarry Smith #include "private/matimpl.h"
45bd3b8fbSHong Zhang 
5bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE   10000
64c1ff481SSatish Balay 
79417f4adSLois Curfman McInnes /*
88798bf22SSatish Balay   MatStashCreate_Private - Creates a stash,currently used for all the parallel
94c1ff481SSatish Balay   matrix implementations. The stash is where elements of a matrix destined
104c1ff481SSatish Balay   to be stored on other processors are kept until matrix assembly is done.
119417f4adSLois Curfman McInnes 
124c1ff481SSatish Balay   This is a simple minded stash. Simply adds entries to end of stash.
134c1ff481SSatish Balay 
144c1ff481SSatish Balay   Input Parameters:
154c1ff481SSatish Balay   comm - communicator, required for scatters.
164c1ff481SSatish Balay   bs   - stash block size. used when stashing blocks of values
174c1ff481SSatish Balay 
184c1ff481SSatish Balay   Output Parameters:
194c1ff481SSatish Balay   stash    - the newly created stash
209417f4adSLois Curfman McInnes */
214a2ae208SSatish Balay #undef __FUNCT__
224a2ae208SSatish Balay #define __FUNCT__ "MatStashCreate_Private"
23c1ac3661SBarry Smith PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
249417f4adSLois Curfman McInnes {
25dfbe8321SBarry Smith   PetscErrorCode ierr;
26*533163c2SBarry Smith   PetscInt       max,*opt,nopt,i;
27f1af5d2fSBarry Smith   PetscTruth     flg;
28bc5ccf88SSatish Balay 
293a40ed3dSBarry Smith   PetscFunctionBegin;
30bc5ccf88SSatish Balay   /* Require 2 tags,get the second using PetscCommGetNewTag() */
31752ec6e0SSatish Balay   stash->comm = comm;
32752ec6e0SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
33a2d1c673SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
34a2d1c673SSatish Balay   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
35a2d1c673SSatish Balay   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
36*533163c2SBarry Smith   ierr  = PetscMalloc(2*stash->size*sizeof(PetscMPIInt),&stash->flg_v);CHKERRQ(ierr);
37*533163c2SBarry Smith   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
38*533163c2SBarry Smith 
39bc5ccf88SSatish Balay 
40434d7ff9SSatish Balay   nopt = stash->size;
41d7d82daaSBarry Smith   ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr);
42b0a32e0cSBarry Smith   ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
43434d7ff9SSatish Balay   if (flg) {
44434d7ff9SSatish Balay     if (nopt == 1)                max = opt[0];
45434d7ff9SSatish Balay     else if (nopt == stash->size) max = opt[stash->rank];
46434d7ff9SSatish Balay     else if (stash->rank < nopt)  max = opt[stash->rank];
47f4ab19daSSatish Balay     else                          max = 0; /* Use default */
48434d7ff9SSatish Balay     stash->umax = max;
49434d7ff9SSatish Balay   } else {
50434d7ff9SSatish Balay     stash->umax = 0;
51434d7ff9SSatish Balay   }
52606d414cSSatish Balay   ierr = PetscFree(opt);CHKERRQ(ierr);
534c1ff481SSatish Balay   if (bs <= 0) bs = 1;
54a2d1c673SSatish Balay 
554c1ff481SSatish Balay   stash->bs       = bs;
569417f4adSLois Curfman McInnes   stash->nmax     = 0;
57434d7ff9SSatish Balay   stash->oldnmax  = 0;
589417f4adSLois Curfman McInnes   stash->n        = 0;
594c1ff481SSatish Balay   stash->reallocs = -1;
6075cae7c1SHong Zhang   stash->space_head = 0;
6175cae7c1SHong Zhang   stash->space      = 0;
629417f4adSLois Curfman McInnes 
63bc5ccf88SSatish Balay   stash->send_waits  = 0;
64bc5ccf88SSatish Balay   stash->recv_waits  = 0;
65a2d1c673SSatish Balay   stash->send_status = 0;
66bc5ccf88SSatish Balay   stash->nsends      = 0;
67bc5ccf88SSatish Balay   stash->nrecvs      = 0;
68bc5ccf88SSatish Balay   stash->svalues     = 0;
69bc5ccf88SSatish Balay   stash->rvalues     = 0;
70563fb871SSatish Balay   stash->rindices    = 0;
71a2d1c673SSatish Balay   stash->nprocessed  = 0;
723a40ed3dSBarry Smith   PetscFunctionReturn(0);
739417f4adSLois Curfman McInnes }
749417f4adSLois Curfman McInnes 
754c1ff481SSatish Balay /*
768798bf22SSatish Balay    MatStashDestroy_Private - Destroy the stash
774c1ff481SSatish Balay */
784a2ae208SSatish Balay #undef __FUNCT__
794a2ae208SSatish Balay #define __FUNCT__ "MatStashDestroy_Private"
80dfbe8321SBarry Smith PetscErrorCode MatStashDestroy_Private(MatStash *stash)
819417f4adSLois Curfman McInnes {
82dfbe8321SBarry Smith   PetscErrorCode ierr;
83a2d1c673SSatish Balay 
84bc5ccf88SSatish Balay   PetscFunctionBegin;
8575cae7c1SHong Zhang   if (stash->space_head){
8675cae7c1SHong Zhang     ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr);
8775cae7c1SHong Zhang     stash->space_head = 0;
8882740460SHong Zhang     stash->space      = 0;
8975cae7c1SHong Zhang   }
90*533163c2SBarry Smith   ierr = PetscFree(stash->flg_v);CHKERRQ(ierr);
91bc5ccf88SSatish Balay   PetscFunctionReturn(0);
92bc5ccf88SSatish Balay }
93bc5ccf88SSatish Balay 
944c1ff481SSatish Balay /*
958798bf22SSatish Balay    MatStashScatterEnd_Private - This is called as the fial stage of
964c1ff481SSatish Balay    scatter. The final stages of messagepassing is done here, and
974c1ff481SSatish Balay    all the memory used for messagepassing is cleanedu up. This
984c1ff481SSatish Balay    routine also resets the stash, and deallocates the memory used
994c1ff481SSatish Balay    for the stash. It also keeps track of the current memory usage
1004c1ff481SSatish Balay    so that the same value can be used the next time through.
1014c1ff481SSatish Balay */
1024a2ae208SSatish Balay #undef __FUNCT__
1034a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterEnd_Private"
104dfbe8321SBarry Smith PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
105bc5ccf88SSatish Balay {
1066849ba73SBarry Smith   PetscErrorCode ierr;
107*533163c2SBarry Smith   PetscInt       nsends=stash->nsends,bs2,oldnmax,i;
108a2d1c673SSatish Balay   MPI_Status     *send_status;
109a2d1c673SSatish Balay 
1103a40ed3dSBarry Smith   PetscFunctionBegin;
111*533163c2SBarry Smith   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
112a2d1c673SSatish Balay   /* wait on sends */
113a2d1c673SSatish Balay   if (nsends) {
11482502324SSatish Balay     ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
115a2d1c673SSatish Balay     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
116606d414cSSatish Balay     ierr = PetscFree(send_status);CHKERRQ(ierr);
117a2d1c673SSatish Balay   }
118a2d1c673SSatish Balay 
119c0c58ca7SSatish Balay   /* Now update nmaxold to be app 10% more than max n used, this way the
120434d7ff9SSatish Balay      wastage of space is reduced the next time this stash is used.
121434d7ff9SSatish Balay      Also update the oldmax, only if it increases */
122b9b97703SBarry Smith   if (stash->n) {
12394b769a5SSatish Balay     bs2      = stash->bs*stash->bs;
1248a9378f0SSatish Balay     oldnmax  = ((int)(stash->n * 1.1) + 5)*bs2;
125434d7ff9SSatish Balay     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
126b9b97703SBarry Smith   }
127434d7ff9SSatish Balay 
128d07ff455SSatish Balay   stash->nmax       = 0;
129d07ff455SSatish Balay   stash->n          = 0;
1304c1ff481SSatish Balay   stash->reallocs   = -1;
131a2d1c673SSatish Balay   stash->nprocessed = 0;
13275cae7c1SHong Zhang   if (stash->space_head){
13375cae7c1SHong Zhang     ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr);
13475cae7c1SHong Zhang     stash->space_head = 0;
13582740460SHong Zhang     stash->space      = 0;
13675cae7c1SHong Zhang   }
137606d414cSSatish Balay   ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
138606d414cSSatish Balay   stash->send_waits = 0;
139606d414cSSatish Balay   ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
140606d414cSSatish Balay   stash->recv_waits = 0;
141c05d87d6SBarry Smith   ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr);
142606d414cSSatish Balay   stash->svalues = 0;
143c05d87d6SBarry Smith   ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr);
144606d414cSSatish Balay   ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
145606d414cSSatish Balay   stash->rvalues = 0;
146c05d87d6SBarry Smith   ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr);
147563fb871SSatish Balay   ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
148563fb871SSatish Balay   stash->rindices = 0;
1493a40ed3dSBarry Smith   PetscFunctionReturn(0);
1509417f4adSLois Curfman McInnes }
1519417f4adSLois Curfman McInnes 
1524c1ff481SSatish Balay /*
1538798bf22SSatish Balay    MatStashGetInfo_Private - Gets the relavant statistics of the stash
1544c1ff481SSatish Balay 
1554c1ff481SSatish Balay    Input Parameters:
1564c1ff481SSatish Balay    stash    - the stash
15794b769a5SSatish Balay    nstash   - the size of the stash. Indicates the number of values stored.
1584c1ff481SSatish Balay    reallocs - the number of additional mallocs incurred.
1594c1ff481SSatish Balay 
1604c1ff481SSatish Balay */
1614a2ae208SSatish Balay #undef __FUNCT__
1624a2ae208SSatish Balay #define __FUNCT__ "MatStashGetInfo_Private"
163c1ac3661SBarry Smith PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
16497530c3fSBarry Smith {
165c1ac3661SBarry Smith   PetscInt bs2 = stash->bs*stash->bs;
16694b769a5SSatish Balay 
1673a40ed3dSBarry Smith   PetscFunctionBegin;
1681ecfd215SBarry Smith   if (nstash) *nstash   = stash->n*bs2;
1691ecfd215SBarry Smith   if (reallocs) {
170434d7ff9SSatish Balay     if (stash->reallocs < 0) *reallocs = 0;
171434d7ff9SSatish Balay     else                     *reallocs = stash->reallocs;
1721ecfd215SBarry Smith   }
173bc5ccf88SSatish Balay   PetscFunctionReturn(0);
174bc5ccf88SSatish Balay }
1754c1ff481SSatish Balay 
1764c1ff481SSatish Balay /*
1778798bf22SSatish Balay    MatStashSetInitialSize_Private - Sets the initial size of the stash
1784c1ff481SSatish Balay 
1794c1ff481SSatish Balay    Input Parameters:
1804c1ff481SSatish Balay    stash  - the stash
1814c1ff481SSatish Balay    max    - the value that is used as the max size of the stash.
1824c1ff481SSatish Balay             this value is used while allocating memory.
1834c1ff481SSatish Balay */
1844a2ae208SSatish Balay #undef __FUNCT__
1854a2ae208SSatish Balay #define __FUNCT__ "MatStashSetInitialSize_Private"
186c1ac3661SBarry Smith PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
187bc5ccf88SSatish Balay {
188bc5ccf88SSatish Balay   PetscFunctionBegin;
189434d7ff9SSatish Balay   stash->umax = max;
1903a40ed3dSBarry Smith   PetscFunctionReturn(0);
19197530c3fSBarry Smith }
19297530c3fSBarry Smith 
1938798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called
1944c1ff481SSatish Balay    when the space in the stash is not sufficient to add the new values
1954c1ff481SSatish Balay    being inserted into the stash.
1964c1ff481SSatish Balay 
1974c1ff481SSatish Balay    Input Parameters:
1984c1ff481SSatish Balay    stash - the stash
1994c1ff481SSatish Balay    incr  - the minimum increase requested
2004c1ff481SSatish Balay 
2014c1ff481SSatish Balay    Notes:
2024c1ff481SSatish Balay    This routine doubles the currently used memory.
2034c1ff481SSatish Balay  */
2044a2ae208SSatish Balay #undef __FUNCT__
2054a2ae208SSatish Balay #define __FUNCT__ "MatStashExpand_Private"
206c1ac3661SBarry Smith static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
2079417f4adSLois Curfman McInnes {
2086849ba73SBarry Smith   PetscErrorCode ierr;
2095bd3b8fbSHong Zhang   PetscInt       newnmax,bs2= stash->bs*stash->bs;
2109417f4adSLois Curfman McInnes 
2113a40ed3dSBarry Smith   PetscFunctionBegin;
2129417f4adSLois Curfman McInnes   /* allocate a larger stash */
213c481ceb5SSatish Balay   if (!stash->oldnmax && !stash->nmax) { /* new stash */
214434d7ff9SSatish Balay     if (stash->umax)                  newnmax = stash->umax/bs2;
215434d7ff9SSatish Balay     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
216c481ceb5SSatish Balay   } else if (!stash->nmax) { /* resuing stash */
217434d7ff9SSatish Balay     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
218434d7ff9SSatish Balay     else                              newnmax = stash->oldnmax/bs2;
219434d7ff9SSatish Balay   } else                              newnmax = stash->nmax*2;
2204c1ff481SSatish Balay   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
221d07ff455SSatish Balay 
22275cae7c1SHong Zhang   /* Get a MatStashSpace and attach it to stash */
22375cae7c1SHong Zhang   ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr);
224b087b6d6SSatish Balay   if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
225b087b6d6SSatish Balay     stash->space_head = stash->space;
22675cae7c1SHong Zhang   }
227b087b6d6SSatish Balay 
228bc5ccf88SSatish Balay   stash->reallocs++;
22975cae7c1SHong Zhang   stash->nmax = newnmax;
230bc5ccf88SSatish Balay   PetscFunctionReturn(0);
231bc5ccf88SSatish Balay }
232bc5ccf88SSatish Balay /*
2338798bf22SSatish Balay   MatStashValuesRow_Private - inserts values into the stash. This function
2344c1ff481SSatish Balay   expects the values to be roworiented. Multiple columns belong to the same row
2354c1ff481SSatish Balay   can be inserted with a single call to this function.
2364c1ff481SSatish Balay 
2374c1ff481SSatish Balay   Input Parameters:
2384c1ff481SSatish Balay   stash  - the stash
2394c1ff481SSatish Balay   row    - the global row correspoiding to the values
2404c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
2414c1ff481SSatish Balay   idxn   - the global column indices corresponding to each of the values.
2424c1ff481SSatish Balay   values - the values inserted
243bc5ccf88SSatish Balay */
2444a2ae208SSatish Balay #undef __FUNCT__
2454a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRow_Private"
246b400d20cSBarry Smith PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscTruth ignorezeroentries)
247bc5ccf88SSatish Balay {
248dfbe8321SBarry Smith   PetscErrorCode     ierr;
249b400d20cSBarry Smith   PetscInt           i,k,cnt = 0;
25075cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
251bc5ccf88SSatish Balay 
252bc5ccf88SSatish Balay   PetscFunctionBegin;
2534c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
25475cae7c1SHong Zhang   if (!space || space->local_remaining < n){
2558798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
2569417f4adSLois Curfman McInnes   }
25775cae7c1SHong Zhang   space = stash->space;
25875cae7c1SHong Zhang   k     = space->local_used;
2594c1ff481SSatish Balay   for (i=0; i<n; i++) {
26088c3974fSBarry Smith     if (ignorezeroentries && (values[i] == 0.0)) continue;
26175cae7c1SHong Zhang     space->idx[k] = row;
26275cae7c1SHong Zhang     space->idy[k] = idxn[i];
26375cae7c1SHong Zhang     space->val[k] = values[i];
26475cae7c1SHong Zhang     k++;
265b400d20cSBarry Smith     cnt++;
2669417f4adSLois Curfman McInnes   }
267b400d20cSBarry Smith   stash->n               += cnt;
268b400d20cSBarry Smith   space->local_used      += cnt;
269b400d20cSBarry Smith   space->local_remaining -= cnt;
270a2d1c673SSatish Balay   PetscFunctionReturn(0);
271a2d1c673SSatish Balay }
27275cae7c1SHong Zhang 
2734c1ff481SSatish Balay /*
2748798bf22SSatish Balay   MatStashValuesCol_Private - inserts values into the stash. This function
2754c1ff481SSatish Balay   expects the values to be columnoriented. Multiple columns belong to the same row
2764c1ff481SSatish Balay   can be inserted with a single call to this function.
277a2d1c673SSatish Balay 
2784c1ff481SSatish Balay   Input Parameters:
2794c1ff481SSatish Balay   stash   - the stash
2804c1ff481SSatish Balay   row     - the global row correspoiding to the values
2814c1ff481SSatish Balay   n       - the number of elements inserted. All elements belong to the above row.
2824c1ff481SSatish Balay   idxn    - the global column indices corresponding to each of the values.
2834c1ff481SSatish Balay   values  - the values inserted
2844c1ff481SSatish Balay   stepval - the consecutive values are sepated by a distance of stepval.
2854c1ff481SSatish Balay             this happens because the input is columnoriented.
2864c1ff481SSatish Balay */
2874a2ae208SSatish Balay #undef __FUNCT__
2884a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesCol_Private"
289b400d20cSBarry Smith PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscTruth ignorezeroentries)
290a2d1c673SSatish Balay {
291dfbe8321SBarry Smith   PetscErrorCode     ierr;
29250e9ab7cSBarry Smith   PetscInt           i,k,cnt = 0;
29375cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
294a2d1c673SSatish Balay 
2954c1ff481SSatish Balay   PetscFunctionBegin;
2964c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
29775cae7c1SHong Zhang   if (!space || space->local_remaining < n){
2988798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
2994c1ff481SSatish Balay   }
30075cae7c1SHong Zhang   space = stash->space;
30175cae7c1SHong Zhang   k = space->local_used;
3024c1ff481SSatish Balay   for (i=0; i<n; i++) {
30388c3974fSBarry Smith     if (ignorezeroentries && (values[i*stepval] == 0.0)) continue;
30475cae7c1SHong Zhang     space->idx[k] = row;
30575cae7c1SHong Zhang     space->idy[k] = idxn[i];
30675cae7c1SHong Zhang     space->val[k] = values[i*stepval];
30775cae7c1SHong Zhang     k++;
308b400d20cSBarry Smith     cnt++;
3094c1ff481SSatish Balay   }
310b400d20cSBarry Smith   stash->n               += cnt;
311b400d20cSBarry Smith   space->local_used      += cnt;
312b400d20cSBarry Smith   space->local_remaining -= cnt;
3134c1ff481SSatish Balay   PetscFunctionReturn(0);
3144c1ff481SSatish Balay }
3154c1ff481SSatish Balay 
3164c1ff481SSatish Balay /*
3178798bf22SSatish Balay   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
3184c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3194c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3204c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3214c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3224c1ff481SSatish Balay 
3234c1ff481SSatish Balay   Input Parameters:
3244c1ff481SSatish Balay   stash  - the stash
3254c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3264c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3274c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3284c1ff481SSatish Balay            values. Each block is of size bs*bs.
3294c1ff481SSatish Balay   values - the values inserted
3304c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3314c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3324c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3334c1ff481SSatish Balay */
3344a2ae208SSatish Balay #undef __FUNCT__
3354a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRowBlocked_Private"
33654f21887SBarry Smith PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
3374c1ff481SSatish Balay {
338dfbe8321SBarry Smith   PetscErrorCode     ierr;
33975cae7c1SHong Zhang   PetscInt           i,j,k,bs2,bs=stash->bs,l;
34054f21887SBarry Smith   const PetscScalar  *vals;
34154f21887SBarry Smith   PetscScalar        *array;
34275cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
343a2d1c673SSatish Balay 
344a2d1c673SSatish Balay   PetscFunctionBegin;
34575cae7c1SHong Zhang   if (!space || space->local_remaining < n){
3468798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
347a2d1c673SSatish Balay   }
34875cae7c1SHong Zhang   space = stash->space;
34975cae7c1SHong Zhang   l     = space->local_used;
35075cae7c1SHong Zhang   bs2   = bs*bs;
3514c1ff481SSatish Balay   for (i=0; i<n; i++) {
35275cae7c1SHong Zhang     space->idx[l] = row;
35375cae7c1SHong Zhang     space->idy[l] = idxn[i];
35475cae7c1SHong Zhang     /* Now copy over the block of values. Store the values column oriented.
35575cae7c1SHong Zhang        This enables inserting multiple blocks belonging to a row with a single
35675cae7c1SHong Zhang        funtion call */
35775cae7c1SHong Zhang     array = space->val + bs2*l;
35875cae7c1SHong Zhang     vals  = values + idx*bs2*n + bs*i;
35975cae7c1SHong Zhang     for (j=0; j<bs; j++) {
36075cae7c1SHong Zhang       for (k=0; k<bs; k++) array[k*bs] = vals[k];
36175cae7c1SHong Zhang       array++;
36275cae7c1SHong Zhang       vals  += cmax*bs;
36375cae7c1SHong Zhang     }
36475cae7c1SHong Zhang     l++;
365a2d1c673SSatish Balay   }
3665bd3b8fbSHong Zhang   stash->n               += n;
36775cae7c1SHong Zhang   space->local_used      += n;
36875cae7c1SHong Zhang   space->local_remaining -= n;
3694c1ff481SSatish Balay   PetscFunctionReturn(0);
3704c1ff481SSatish Balay }
3714c1ff481SSatish Balay 
3724c1ff481SSatish Balay /*
3738798bf22SSatish Balay   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
3744c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3754c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3764c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3774c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3784c1ff481SSatish Balay 
3794c1ff481SSatish Balay   Input Parameters:
3804c1ff481SSatish Balay   stash  - the stash
3814c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3824c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3834c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3844c1ff481SSatish Balay            values. Each block is of size bs*bs.
3854c1ff481SSatish Balay   values - the values inserted
3864c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3874c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3884c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3894c1ff481SSatish Balay */
3904a2ae208SSatish Balay #undef __FUNCT__
3914a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesColBlocked_Private"
39254f21887SBarry Smith PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
3934c1ff481SSatish Balay {
394dfbe8321SBarry Smith   PetscErrorCode     ierr;
39575cae7c1SHong Zhang   PetscInt           i,j,k,bs2,bs=stash->bs,l;
39654f21887SBarry Smith   const PetscScalar  *vals;
39754f21887SBarry Smith   PetscScalar        *array;
39875cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
3994c1ff481SSatish Balay 
4004c1ff481SSatish Balay   PetscFunctionBegin;
40175cae7c1SHong Zhang   if (!space || space->local_remaining < n){
4028798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
4034c1ff481SSatish Balay   }
40475cae7c1SHong Zhang   space = stash->space;
40575cae7c1SHong Zhang   l     = space->local_used;
40675cae7c1SHong Zhang   bs2   = bs*bs;
4074c1ff481SSatish Balay   for (i=0; i<n; i++) {
40875cae7c1SHong Zhang     space->idx[l] = row;
40975cae7c1SHong Zhang     space->idy[l] = idxn[i];
41075cae7c1SHong Zhang     /* Now copy over the block of values. Store the values column oriented.
41175cae7c1SHong Zhang      This enables inserting multiple blocks belonging to a row with a single
41275cae7c1SHong Zhang      funtion call */
41375cae7c1SHong Zhang     array = space->val + bs2*l;
41475cae7c1SHong Zhang     vals  = values + idx*bs2*n + bs*i;
41575cae7c1SHong Zhang     for (j=0; j<bs; j++) {
41675cae7c1SHong Zhang       for (k=0; k<bs; k++) {array[k] = vals[k];}
41775cae7c1SHong Zhang       array += bs;
41875cae7c1SHong Zhang       vals  += rmax*bs;
41975cae7c1SHong Zhang     }
4205bd3b8fbSHong Zhang     l++;
421a2d1c673SSatish Balay   }
4225bd3b8fbSHong Zhang   stash->n               += n;
42375cae7c1SHong Zhang   space->local_used      += n;
42475cae7c1SHong Zhang   space->local_remaining -= n;
4253a40ed3dSBarry Smith   PetscFunctionReturn(0);
4269417f4adSLois Curfman McInnes }
4274c1ff481SSatish Balay /*
4288798bf22SSatish Balay   MatStashScatterBegin_Private - Initiates the transfer of values to the
4294c1ff481SSatish Balay   correct owners. This function goes through the stash, and check the
4304c1ff481SSatish Balay   owners of each stashed value, and sends the values off to the owner
4314c1ff481SSatish Balay   processors.
432bc5ccf88SSatish Balay 
4334c1ff481SSatish Balay   Input Parameters:
4344c1ff481SSatish Balay   stash  - the stash
4354c1ff481SSatish Balay   owners - an array of size 'no-of-procs' which gives the ownership range
4364c1ff481SSatish Balay            for each node.
4374c1ff481SSatish Balay 
4384c1ff481SSatish Balay   Notes: The 'owners' array in the cased of the blocked-stash has the
4394c1ff481SSatish Balay   ranges specified blocked global indices, and for the regular stash in
4404c1ff481SSatish Balay   the proper global indices.
4414c1ff481SSatish Balay */
4424a2ae208SSatish Balay #undef __FUNCT__
4434a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterBegin_Private"
4441e2582c4SBarry Smith PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners)
445bc5ccf88SSatish Balay {
446c1ac3661SBarry Smith   PetscInt          *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
447fe09c992SBarry Smith   PetscInt          size=stash->size,nsends;
4486849ba73SBarry Smith   PetscErrorCode    ierr;
44975cae7c1SHong Zhang   PetscInt          count,*sindices,**rindices,i,j,idx,lastidx,l;
45054f21887SBarry Smith   PetscScalar       **rvalues,*svalues;
451bc5ccf88SSatish Balay   MPI_Comm          comm = stash->comm;
452563fb871SSatish Balay   MPI_Request       *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
453fe09c992SBarry Smith   PetscMPIInt       *nprocs,*nlengths,nreceives;
4545bd3b8fbSHong Zhang   PetscInt          *sp_idx,*sp_idy;
45554f21887SBarry Smith   PetscScalar       *sp_val;
4565bd3b8fbSHong Zhang   PetscMatStashSpace space,space_next;
457bc5ccf88SSatish Balay 
458bc5ccf88SSatish Balay   PetscFunctionBegin;
4594c1ff481SSatish Balay   bs2 = stash->bs*stash->bs;
46075cae7c1SHong Zhang 
461bc5ccf88SSatish Balay   /*  first count number of contributors to each processor */
462c05d87d6SBarry Smith   ierr  = PetscMalloc(size*sizeof(PetscMPIInt),&nprocs);CHKERRQ(ierr);
463c05d87d6SBarry Smith   ierr  = PetscMemzero(nprocs,size*sizeof(PetscMPIInt));CHKERRQ(ierr);
464c05d87d6SBarry Smith   ierr  = PetscMalloc(size*sizeof(PetscMPIInt),&nlengths);CHKERRQ(ierr);
465c05d87d6SBarry Smith   ierr  = PetscMemzero(nlengths,size*sizeof(PetscMPIInt));CHKERRQ(ierr);
466c1ac3661SBarry Smith   ierr  = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);
467a2d1c673SSatish Balay 
46875cae7c1SHong Zhang   i = j    = 0;
4697357eb19SBarry Smith   lastidx  = -1;
4705bd3b8fbSHong Zhang   space    = stash->space_head;
47175cae7c1SHong Zhang   while (space != PETSC_NULL){
47275cae7c1SHong Zhang     space_next = space->next;
4735bd3b8fbSHong Zhang     sp_idx     = space->idx;
47475cae7c1SHong Zhang     for (l=0; l<space->local_used; l++){
4757357eb19SBarry Smith       /* if indices are NOT locally sorted, need to start search at the beginning */
4765bd3b8fbSHong Zhang       if (lastidx > (idx = sp_idx[l])) j = 0;
4777357eb19SBarry Smith       lastidx = idx;
4787357eb19SBarry Smith       for (; j<size; j++) {
4794c1ff481SSatish Balay         if (idx >= owners[j] && idx < owners[j+1]) {
480563fb871SSatish Balay           nlengths[j]++; owner[i] = j; break;
481bc5ccf88SSatish Balay         }
482bc5ccf88SSatish Balay       }
48375cae7c1SHong Zhang       i++;
48475cae7c1SHong Zhang     }
48575cae7c1SHong Zhang     space = space_next;
486bc5ccf88SSatish Balay   }
487563fb871SSatish Balay   /* Now check what procs get messages - and compute nsends. */
488563fb871SSatish Balay   for (i=0, nsends=0 ; i<size; i++) {
489563fb871SSatish Balay     if (nlengths[i]) { nprocs[i] = 1; nsends ++;}
490563fb871SSatish Balay   }
491bc5ccf88SSatish Balay 
49254f21887SBarry Smith   {PetscMPIInt  *onodes,*olengths;
493563fb871SSatish Balay   /* Determine the number of messages to expect, their lengths, from from-ids */
494563fb871SSatish Balay   ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr);
495563fb871SSatish Balay   ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
496563fb871SSatish Balay   /* since clubbing row,col - lengths are multiplied by 2 */
497563fb871SSatish Balay   for (i=0; i<nreceives; i++) olengths[i] *=2;
498563fb871SSatish Balay   ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
499563fb871SSatish Balay   /* values are size 'bs2' lengths (and remove earlier factor 2 */
500563fb871SSatish Balay   for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
501563fb871SSatish Balay   ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
502563fb871SSatish Balay   ierr = PetscFree(onodes);CHKERRQ(ierr);
503563fb871SSatish Balay   ierr = PetscFree(olengths);CHKERRQ(ierr);
504bc5ccf88SSatish Balay   }
505bc5ccf88SSatish Balay 
506bc5ccf88SSatish Balay   /* do sends:
507bc5ccf88SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
508bc5ccf88SSatish Balay          the ith processor
509bc5ccf88SSatish Balay   */
510c05d87d6SBarry Smith   ierr     = PetscMalloc2(bs2*stash->n,PetscScalar,&svalues,2*(stash->n+1),PetscInt,&sindices);CHKERRQ(ierr);
511*533163c2SBarry Smith   ierr     = PetscMalloc(2*nsends*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
512c05d87d6SBarry Smith   ierr     = PetscMalloc2(size,PetscInt,&startv,size,PetscInt,&starti);CHKERRQ(ierr);
513a2d1c673SSatish Balay   /* use 2 sends the first with all_a, the next with all_i and all_j */
514bc5ccf88SSatish Balay   startv[0]  = 0; starti[0] = 0;
515bc5ccf88SSatish Balay   for (i=1; i<size; i++) {
516563fb871SSatish Balay     startv[i] = startv[i-1] + nlengths[i-1];
517*533163c2SBarry Smith     starti[i] = starti[i-1] + 2*nlengths[i-1];
518bc5ccf88SSatish Balay   }
51975cae7c1SHong Zhang 
52075cae7c1SHong Zhang   i     = 0;
5215bd3b8fbSHong Zhang   space = stash->space_head;
52275cae7c1SHong Zhang   while (space != PETSC_NULL){
52375cae7c1SHong Zhang     space_next = space->next;
5245bd3b8fbSHong Zhang     sp_idx = space->idx;
5255bd3b8fbSHong Zhang     sp_idy = space->idy;
5265bd3b8fbSHong Zhang     sp_val = space->val;
52775cae7c1SHong Zhang     for (l=0; l<space->local_used; l++){
528bc5ccf88SSatish Balay       j = owner[i];
529a2d1c673SSatish Balay       if (bs2 == 1) {
5305bd3b8fbSHong Zhang         svalues[startv[j]] = sp_val[l];
531a2d1c673SSatish Balay       } else {
532c1ac3661SBarry Smith         PetscInt     k;
53354f21887SBarry Smith         PetscScalar *buf1,*buf2;
5344c1ff481SSatish Balay         buf1 = svalues+bs2*startv[j];
535b087b6d6SSatish Balay         buf2 = space->val + bs2*l;
5364c1ff481SSatish Balay         for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
537a2d1c673SSatish Balay       }
5385bd3b8fbSHong Zhang       sindices[starti[j]]             = sp_idx[l];
5395bd3b8fbSHong Zhang       sindices[starti[j]+nlengths[j]] = sp_idy[l];
540bc5ccf88SSatish Balay       startv[j]++;
541bc5ccf88SSatish Balay       starti[j]++;
54275cae7c1SHong Zhang       i++;
54375cae7c1SHong Zhang     }
54475cae7c1SHong Zhang     space = space_next;
545bc5ccf88SSatish Balay   }
546bc5ccf88SSatish Balay   startv[0] = 0;
547563fb871SSatish Balay   for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];}
548e5d0e772SSatish Balay 
549bc5ccf88SSatish Balay   for (i=0,count=0; i<size; i++) {
550563fb871SSatish Balay     if (nprocs[i]) {
551563fb871SSatish Balay       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
552a77337e4SBarry Smith       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
553bc5ccf88SSatish Balay     }
554b85c94c3SSatish Balay   }
5556cf91177SBarry Smith #if defined(PETSC_USE_INFO)
5561e2582c4SBarry Smith   ierr = PetscInfo1(mat,"No of messages: %d \n",nsends);CHKERRQ(ierr);
557e5d0e772SSatish Balay   for (i=0; i<size; i++) {
558e5d0e772SSatish Balay     if (nprocs[i]) {
559a77337e4SBarry Smith       ierr = PetscInfo2(mat,"Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(PetscScalar)+2*sizeof(PetscInt));CHKERRQ(ierr);
560e5d0e772SSatish Balay     }
561e5d0e772SSatish Balay   }
562e5d0e772SSatish Balay #endif
563c05d87d6SBarry Smith   ierr = PetscFree(nlengths);CHKERRQ(ierr);
564606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
565c05d87d6SBarry Smith   ierr = PetscFree2(startv,starti);CHKERRQ(ierr);
566c05d87d6SBarry Smith   ierr = PetscFree(nprocs);CHKERRQ(ierr);
567a2d1c673SSatish Balay 
568563fb871SSatish Balay   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
569*533163c2SBarry Smith   ierr  = PetscMalloc(2*nreceives*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
570563fb871SSatish Balay 
571563fb871SSatish Balay   for (i=0; i<nreceives; i++) {
572563fb871SSatish Balay     recv_waits[2*i]   = recv_waits1[i];
573563fb871SSatish Balay     recv_waits[2*i+1] = recv_waits2[i];
574563fb871SSatish Balay   }
575563fb871SSatish Balay   stash->recv_waits = recv_waits;
576563fb871SSatish Balay   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
577563fb871SSatish Balay   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
578563fb871SSatish Balay 
579c05d87d6SBarry Smith   stash->svalues     = svalues;
580c05d87d6SBarry Smith   stash->sindices    = sindices;
581c05d87d6SBarry Smith   stash->rvalues     = rvalues;
582c05d87d6SBarry Smith   stash->rindices    = rindices;
583c05d87d6SBarry Smith   stash->send_waits  = send_waits;
584c05d87d6SBarry Smith   stash->nsends      = nsends;
585c05d87d6SBarry Smith   stash->nrecvs      = nreceives;
586bc5ccf88SSatish Balay   PetscFunctionReturn(0);
587bc5ccf88SSatish Balay }
588bc5ccf88SSatish Balay 
589a2d1c673SSatish Balay /*
5908798bf22SSatish Balay    MatStashScatterGetMesg_Private - This function waits on the receives posted
5918798bf22SSatish Balay    in the function MatStashScatterBegin_Private() and returns one message at
5924c1ff481SSatish Balay    a time to the calling function. If no messages are left, it indicates this
5934c1ff481SSatish Balay    by setting flg = 0, else it sets flg = 1.
5944c1ff481SSatish Balay 
5954c1ff481SSatish Balay    Input Parameters:
5964c1ff481SSatish Balay    stash - the stash
5974c1ff481SSatish Balay 
5984c1ff481SSatish Balay    Output Parameters:
5994c1ff481SSatish Balay    nvals - the number of entries in the current message.
6004c1ff481SSatish Balay    rows  - an array of row indices (or blocked indices) corresponding to the values
6014c1ff481SSatish Balay    cols  - an array of columnindices (or blocked indices) corresponding to the values
6024c1ff481SSatish Balay    vals  - the values
6034c1ff481SSatish Balay    flg   - 0 indicates no more message left, and the current call has no values associated.
6044c1ff481SSatish Balay            1 indicates that the current call successfully received a message, and the
6054c1ff481SSatish Balay              other output parameters nvals,rows,cols,vals are set appropriately.
606a2d1c673SSatish Balay */
6074a2ae208SSatish Balay #undef __FUNCT__
6084a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterGetMesg_Private"
60954f21887SBarry Smith PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,PetscScalar **vals,PetscInt *flg)
610bc5ccf88SSatish Balay {
6116849ba73SBarry Smith   PetscErrorCode ierr;
612*533163c2SBarry Smith   PetscMPIInt    i,*flg_v = stash->flg_v,i1,i2;
613fe09c992SBarry Smith   PetscInt       bs2;
614a2d1c673SSatish Balay   MPI_Status     recv_status;
615b0a32e0cSBarry Smith   PetscTruth     match_found = PETSC_FALSE;
616bc5ccf88SSatish Balay 
617bc5ccf88SSatish Balay   PetscFunctionBegin;
618bc5ccf88SSatish Balay 
619a2d1c673SSatish Balay   *flg = 0; /* When a message is discovered this is reset to 1 */
620a2d1c673SSatish Balay   /* Return if no more messages to process */
621a2d1c673SSatish Balay   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
622a2d1c673SSatish Balay 
6234c1ff481SSatish Balay   bs2   = stash->bs*stash->bs;
624a2d1c673SSatish Balay   /* If a matching pair of receieves are found, process them, and return the data to
625a2d1c673SSatish Balay      the calling function. Until then keep receiving messages */
626a2d1c673SSatish Balay   while (!match_found) {
627*533163c2SBarry Smith     CHKMEMQ;
628a2d1c673SSatish Balay     ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
629*533163c2SBarry Smith     CHKMEMQ;
630*533163c2SBarry Smith     if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_ERR_PLIB,"Negative MPI source!");
631*533163c2SBarry Smith 
632a2d1c673SSatish Balay     /* Now pack the received message into a structure which is useable by others */
633a2d1c673SSatish Balay     if (i % 2) {
634a77337e4SBarry Smith       ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr);
635c1dc657dSBarry Smith       flg_v[2*recv_status.MPI_SOURCE] = i/2;
636a2d1c673SSatish Balay       *nvals = *nvals/bs2;
637563fb871SSatish Balay     } else {
638563fb871SSatish Balay       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
639563fb871SSatish Balay       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
640563fb871SSatish Balay       *nvals = *nvals/2; /* This message has both row indices and col indices */
641bc5ccf88SSatish Balay     }
642a2d1c673SSatish Balay 
643cb2b73ccSBarry Smith     /* Check if we have both messages from this proc */
644c1dc657dSBarry Smith     i1 = flg_v[2*recv_status.MPI_SOURCE];
645c1dc657dSBarry Smith     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
646a2d1c673SSatish Balay     if (i1 != -1 && i2 != -1) {
647563fb871SSatish Balay       *rows       = stash->rindices[i2];
648a2d1c673SSatish Balay       *cols       = *rows + *nvals;
649563fb871SSatish Balay       *vals       = stash->rvalues[i1];
650a2d1c673SSatish Balay       *flg        = 1;
651a2d1c673SSatish Balay       stash->nprocessed ++;
65235d8aa7fSBarry Smith       match_found = PETSC_TRUE;
653bc5ccf88SSatish Balay     }
654bc5ccf88SSatish Balay   }
655bc5ccf88SSatish Balay   PetscFunctionReturn(0);
656bc5ccf88SSatish Balay }
657