xref: /petsc/src/mat/utils/matstash.c (revision c05d87d6c2fa1d0952fe676de20339d09daa79bc)
1be1d678aSKris Buschelman #define PETSCMAT_DLL
22d5177cdSBarry Smith 
37c4f633dSBarry Smith #include "private/matimpl.h"
45bd3b8fbSHong Zhang 
5bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE   10000
64c1ff481SSatish Balay 
79417f4adSLois Curfman McInnes /*
88798bf22SSatish Balay   MatStashCreate_Private - Creates a stash,currently used for all the parallel
94c1ff481SSatish Balay   matrix implementations. The stash is where elements of a matrix destined
104c1ff481SSatish Balay   to be stored on other processors are kept until matrix assembly is done.
119417f4adSLois Curfman McInnes 
124c1ff481SSatish Balay   This is a simple minded stash. Simply adds entries to end of stash.
134c1ff481SSatish Balay 
144c1ff481SSatish Balay   Input Parameters:
154c1ff481SSatish Balay   comm - communicator, required for scatters.
164c1ff481SSatish Balay   bs   - stash block size. used when stashing blocks of values
174c1ff481SSatish Balay 
184c1ff481SSatish Balay   Output Parameters:
194c1ff481SSatish Balay   stash    - the newly created stash
209417f4adSLois Curfman McInnes */
214a2ae208SSatish Balay #undef __FUNCT__
224a2ae208SSatish Balay #define __FUNCT__ "MatStashCreate_Private"
23c1ac3661SBarry Smith PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
249417f4adSLois Curfman McInnes {
25dfbe8321SBarry Smith   PetscErrorCode ierr;
26c1ac3661SBarry Smith   PetscInt       max,*opt,nopt;
27f1af5d2fSBarry Smith   PetscTruth     flg;
28bc5ccf88SSatish Balay 
293a40ed3dSBarry Smith   PetscFunctionBegin;
30bc5ccf88SSatish Balay   /* Require 2 tags,get the second using PetscCommGetNewTag() */
31752ec6e0SSatish Balay   stash->comm = comm;
32752ec6e0SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
33a2d1c673SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
34a2d1c673SSatish Balay   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
35a2d1c673SSatish Balay   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
36bc5ccf88SSatish Balay 
37434d7ff9SSatish Balay   nopt = stash->size;
38d7d82daaSBarry Smith   ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr);
39b0a32e0cSBarry Smith   ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
40434d7ff9SSatish Balay   if (flg) {
41434d7ff9SSatish Balay     if (nopt == 1)                max = opt[0];
42434d7ff9SSatish Balay     else if (nopt == stash->size) max = opt[stash->rank];
43434d7ff9SSatish Balay     else if (stash->rank < nopt)  max = opt[stash->rank];
44f4ab19daSSatish Balay     else                          max = 0; /* Use default */
45434d7ff9SSatish Balay     stash->umax = max;
46434d7ff9SSatish Balay   } else {
47434d7ff9SSatish Balay     stash->umax = 0;
48434d7ff9SSatish Balay   }
49606d414cSSatish Balay   ierr = PetscFree(opt);CHKERRQ(ierr);
504c1ff481SSatish Balay   if (bs <= 0) bs = 1;
51a2d1c673SSatish Balay 
524c1ff481SSatish Balay   stash->bs       = bs;
539417f4adSLois Curfman McInnes   stash->nmax     = 0;
54434d7ff9SSatish Balay   stash->oldnmax  = 0;
559417f4adSLois Curfman McInnes   stash->n        = 0;
564c1ff481SSatish Balay   stash->reallocs = -1;
5775cae7c1SHong Zhang   stash->space_head = 0;
5875cae7c1SHong Zhang   stash->space      = 0;
599417f4adSLois Curfman McInnes 
60bc5ccf88SSatish Balay   stash->send_waits  = 0;
61bc5ccf88SSatish Balay   stash->recv_waits  = 0;
62a2d1c673SSatish Balay   stash->send_status = 0;
63bc5ccf88SSatish Balay   stash->nsends      = 0;
64bc5ccf88SSatish Balay   stash->nrecvs      = 0;
65bc5ccf88SSatish Balay   stash->svalues     = 0;
66bc5ccf88SSatish Balay   stash->rvalues     = 0;
67563fb871SSatish Balay   stash->rindices    = 0;
68a2d1c673SSatish Balay   stash->nprocessed  = 0;
693a40ed3dSBarry Smith   PetscFunctionReturn(0);
709417f4adSLois Curfman McInnes }
719417f4adSLois Curfman McInnes 
724c1ff481SSatish Balay /*
738798bf22SSatish Balay    MatStashDestroy_Private - Destroy the stash
744c1ff481SSatish Balay */
754a2ae208SSatish Balay #undef __FUNCT__
764a2ae208SSatish Balay #define __FUNCT__ "MatStashDestroy_Private"
77dfbe8321SBarry Smith PetscErrorCode MatStashDestroy_Private(MatStash *stash)
789417f4adSLois Curfman McInnes {
79dfbe8321SBarry Smith   PetscErrorCode ierr;
80a2d1c673SSatish Balay 
81bc5ccf88SSatish Balay   PetscFunctionBegin;
8275cae7c1SHong Zhang   if (stash->space_head){
8375cae7c1SHong Zhang     ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr);
8475cae7c1SHong Zhang     stash->space_head = 0;
8582740460SHong Zhang     stash->space      = 0;
8675cae7c1SHong Zhang   }
87bc5ccf88SSatish Balay   PetscFunctionReturn(0);
88bc5ccf88SSatish Balay }
89bc5ccf88SSatish Balay 
904c1ff481SSatish Balay /*
918798bf22SSatish Balay    MatStashScatterEnd_Private - This is called as the fial stage of
924c1ff481SSatish Balay    scatter. The final stages of messagepassing is done here, and
934c1ff481SSatish Balay    all the memory used for messagepassing is cleanedu up. This
944c1ff481SSatish Balay    routine also resets the stash, and deallocates the memory used
954c1ff481SSatish Balay    for the stash. It also keeps track of the current memory usage
964c1ff481SSatish Balay    so that the same value can be used the next time through.
974c1ff481SSatish Balay */
984a2ae208SSatish Balay #undef __FUNCT__
994a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterEnd_Private"
100dfbe8321SBarry Smith PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
101bc5ccf88SSatish Balay {
1026849ba73SBarry Smith   PetscErrorCode ierr;
1035bd3b8fbSHong Zhang   PetscInt       nsends=stash->nsends,bs2,oldnmax;
104a2d1c673SSatish Balay   MPI_Status     *send_status;
105a2d1c673SSatish Balay 
1063a40ed3dSBarry Smith   PetscFunctionBegin;
107a2d1c673SSatish Balay   /* wait on sends */
108a2d1c673SSatish Balay   if (nsends) {
10982502324SSatish Balay     ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
110a2d1c673SSatish Balay     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
111606d414cSSatish Balay     ierr = PetscFree(send_status);CHKERRQ(ierr);
112a2d1c673SSatish Balay   }
113a2d1c673SSatish Balay 
114c0c58ca7SSatish Balay   /* Now update nmaxold to be app 10% more than max n used, this way the
115434d7ff9SSatish Balay      wastage of space is reduced the next time this stash is used.
116434d7ff9SSatish Balay      Also update the oldmax, only if it increases */
117b9b97703SBarry Smith   if (stash->n) {
11894b769a5SSatish Balay     bs2      = stash->bs*stash->bs;
1198a9378f0SSatish Balay     oldnmax  = ((int)(stash->n * 1.1) + 5)*bs2;
120434d7ff9SSatish Balay     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
121b9b97703SBarry Smith   }
122434d7ff9SSatish Balay 
123d07ff455SSatish Balay   stash->nmax       = 0;
124d07ff455SSatish Balay   stash->n          = 0;
1254c1ff481SSatish Balay   stash->reallocs   = -1;
126a2d1c673SSatish Balay   stash->nprocessed = 0;
12775cae7c1SHong Zhang   if (stash->space_head){
12875cae7c1SHong Zhang     ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr);
12975cae7c1SHong Zhang     stash->space_head = 0;
13082740460SHong Zhang     stash->space      = 0;
13175cae7c1SHong Zhang   }
132606d414cSSatish Balay   ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
133606d414cSSatish Balay   stash->send_waits = 0;
134606d414cSSatish Balay   ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
135606d414cSSatish Balay   stash->recv_waits = 0;
136*c05d87d6SBarry Smith   ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr);
137606d414cSSatish Balay   stash->svalues = 0;
138*c05d87d6SBarry Smith   ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr);
139606d414cSSatish Balay   ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
140606d414cSSatish Balay   stash->rvalues = 0;
141*c05d87d6SBarry Smith   ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr);
142563fb871SSatish Balay   ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
143563fb871SSatish Balay   stash->rindices = 0;
1443a40ed3dSBarry Smith   PetscFunctionReturn(0);
1459417f4adSLois Curfman McInnes }
1469417f4adSLois Curfman McInnes 
1474c1ff481SSatish Balay /*
1488798bf22SSatish Balay    MatStashGetInfo_Private - Gets the relavant statistics of the stash
1494c1ff481SSatish Balay 
1504c1ff481SSatish Balay    Input Parameters:
1514c1ff481SSatish Balay    stash    - the stash
15294b769a5SSatish Balay    nstash   - the size of the stash. Indicates the number of values stored.
1534c1ff481SSatish Balay    reallocs - the number of additional mallocs incurred.
1544c1ff481SSatish Balay 
1554c1ff481SSatish Balay */
1564a2ae208SSatish Balay #undef __FUNCT__
1574a2ae208SSatish Balay #define __FUNCT__ "MatStashGetInfo_Private"
158c1ac3661SBarry Smith PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
15997530c3fSBarry Smith {
160c1ac3661SBarry Smith   PetscInt bs2 = stash->bs*stash->bs;
16194b769a5SSatish Balay 
1623a40ed3dSBarry Smith   PetscFunctionBegin;
1631ecfd215SBarry Smith   if (nstash) *nstash   = stash->n*bs2;
1641ecfd215SBarry Smith   if (reallocs) {
165434d7ff9SSatish Balay     if (stash->reallocs < 0) *reallocs = 0;
166434d7ff9SSatish Balay     else                     *reallocs = stash->reallocs;
1671ecfd215SBarry Smith   }
168bc5ccf88SSatish Balay   PetscFunctionReturn(0);
169bc5ccf88SSatish Balay }
1704c1ff481SSatish Balay 
1714c1ff481SSatish Balay /*
1728798bf22SSatish Balay    MatStashSetInitialSize_Private - Sets the initial size of the stash
1734c1ff481SSatish Balay 
1744c1ff481SSatish Balay    Input Parameters:
1754c1ff481SSatish Balay    stash  - the stash
1764c1ff481SSatish Balay    max    - the value that is used as the max size of the stash.
1774c1ff481SSatish Balay             this value is used while allocating memory.
1784c1ff481SSatish Balay */
1794a2ae208SSatish Balay #undef __FUNCT__
1804a2ae208SSatish Balay #define __FUNCT__ "MatStashSetInitialSize_Private"
181c1ac3661SBarry Smith PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
182bc5ccf88SSatish Balay {
183bc5ccf88SSatish Balay   PetscFunctionBegin;
184434d7ff9SSatish Balay   stash->umax = max;
1853a40ed3dSBarry Smith   PetscFunctionReturn(0);
18697530c3fSBarry Smith }
18797530c3fSBarry Smith 
1888798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called
1894c1ff481SSatish Balay    when the space in the stash is not sufficient to add the new values
1904c1ff481SSatish Balay    being inserted into the stash.
1914c1ff481SSatish Balay 
1924c1ff481SSatish Balay    Input Parameters:
1934c1ff481SSatish Balay    stash - the stash
1944c1ff481SSatish Balay    incr  - the minimum increase requested
1954c1ff481SSatish Balay 
1964c1ff481SSatish Balay    Notes:
1974c1ff481SSatish Balay    This routine doubles the currently used memory.
1984c1ff481SSatish Balay  */
1994a2ae208SSatish Balay #undef __FUNCT__
2004a2ae208SSatish Balay #define __FUNCT__ "MatStashExpand_Private"
201c1ac3661SBarry Smith static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
2029417f4adSLois Curfman McInnes {
2036849ba73SBarry Smith   PetscErrorCode ierr;
2045bd3b8fbSHong Zhang   PetscInt       newnmax,bs2= stash->bs*stash->bs;
2059417f4adSLois Curfman McInnes 
2063a40ed3dSBarry Smith   PetscFunctionBegin;
2079417f4adSLois Curfman McInnes   /* allocate a larger stash */
208c481ceb5SSatish Balay   if (!stash->oldnmax && !stash->nmax) { /* new stash */
209434d7ff9SSatish Balay     if (stash->umax)                  newnmax = stash->umax/bs2;
210434d7ff9SSatish Balay     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
211c481ceb5SSatish Balay   } else if (!stash->nmax) { /* resuing stash */
212434d7ff9SSatish Balay     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
213434d7ff9SSatish Balay     else                              newnmax = stash->oldnmax/bs2;
214434d7ff9SSatish Balay   } else                              newnmax = stash->nmax*2;
2154c1ff481SSatish Balay   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
216d07ff455SSatish Balay 
21775cae7c1SHong Zhang   /* Get a MatStashSpace and attach it to stash */
21875cae7c1SHong Zhang   ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr);
219b087b6d6SSatish Balay   if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
220b087b6d6SSatish Balay     stash->space_head = stash->space;
22175cae7c1SHong Zhang   }
222b087b6d6SSatish Balay 
223bc5ccf88SSatish Balay   stash->reallocs++;
22475cae7c1SHong Zhang   stash->nmax = newnmax;
225bc5ccf88SSatish Balay   PetscFunctionReturn(0);
226bc5ccf88SSatish Balay }
227bc5ccf88SSatish Balay /*
2288798bf22SSatish Balay   MatStashValuesRow_Private - inserts values into the stash. This function
2294c1ff481SSatish Balay   expects the values to be roworiented. Multiple columns belong to the same row
2304c1ff481SSatish Balay   can be inserted with a single call to this function.
2314c1ff481SSatish Balay 
2324c1ff481SSatish Balay   Input Parameters:
2334c1ff481SSatish Balay   stash  - the stash
2344c1ff481SSatish Balay   row    - the global row correspoiding to the values
2354c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
2364c1ff481SSatish Balay   idxn   - the global column indices corresponding to each of the values.
2374c1ff481SSatish Balay   values - the values inserted
238bc5ccf88SSatish Balay */
2394a2ae208SSatish Balay #undef __FUNCT__
2404a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRow_Private"
241b400d20cSBarry Smith PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscTruth ignorezeroentries)
242bc5ccf88SSatish Balay {
243dfbe8321SBarry Smith   PetscErrorCode     ierr;
244b400d20cSBarry Smith   PetscInt           i,k,cnt = 0;
24575cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
246bc5ccf88SSatish Balay 
247bc5ccf88SSatish Balay   PetscFunctionBegin;
2484c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
24975cae7c1SHong Zhang   if (!space || space->local_remaining < n){
2508798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
2519417f4adSLois Curfman McInnes   }
25275cae7c1SHong Zhang   space = stash->space;
25375cae7c1SHong Zhang   k     = space->local_used;
2544c1ff481SSatish Balay   for (i=0; i<n; i++) {
25588c3974fSBarry Smith     if (ignorezeroentries && (values[i] == 0.0)) continue;
25675cae7c1SHong Zhang     space->idx[k] = row;
25775cae7c1SHong Zhang     space->idy[k] = idxn[i];
25875cae7c1SHong Zhang     space->val[k] = values[i];
25975cae7c1SHong Zhang     k++;
260b400d20cSBarry Smith     cnt++;
2619417f4adSLois Curfman McInnes   }
262b400d20cSBarry Smith   stash->n               += cnt;
263b400d20cSBarry Smith   space->local_used      += cnt;
264b400d20cSBarry Smith   space->local_remaining -= cnt;
265a2d1c673SSatish Balay   PetscFunctionReturn(0);
266a2d1c673SSatish Balay }
26775cae7c1SHong Zhang 
2684c1ff481SSatish Balay /*
2698798bf22SSatish Balay   MatStashValuesCol_Private - inserts values into the stash. This function
2704c1ff481SSatish Balay   expects the values to be columnoriented. Multiple columns belong to the same row
2714c1ff481SSatish Balay   can be inserted with a single call to this function.
272a2d1c673SSatish Balay 
2734c1ff481SSatish Balay   Input Parameters:
2744c1ff481SSatish Balay   stash   - the stash
2754c1ff481SSatish Balay   row     - the global row correspoiding to the values
2764c1ff481SSatish Balay   n       - the number of elements inserted. All elements belong to the above row.
2774c1ff481SSatish Balay   idxn    - the global column indices corresponding to each of the values.
2784c1ff481SSatish Balay   values  - the values inserted
2794c1ff481SSatish Balay   stepval - the consecutive values are sepated by a distance of stepval.
2804c1ff481SSatish Balay             this happens because the input is columnoriented.
2814c1ff481SSatish Balay */
2824a2ae208SSatish Balay #undef __FUNCT__
2834a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesCol_Private"
284b400d20cSBarry Smith PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscTruth ignorezeroentries)
285a2d1c673SSatish Balay {
286dfbe8321SBarry Smith   PetscErrorCode     ierr;
28750e9ab7cSBarry Smith   PetscInt           i,k,cnt = 0;
28875cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
289a2d1c673SSatish Balay 
2904c1ff481SSatish Balay   PetscFunctionBegin;
2914c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
29275cae7c1SHong Zhang   if (!space || space->local_remaining < n){
2938798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
2944c1ff481SSatish Balay   }
29575cae7c1SHong Zhang   space = stash->space;
29675cae7c1SHong Zhang   k = space->local_used;
2974c1ff481SSatish Balay   for (i=0; i<n; i++) {
29888c3974fSBarry Smith     if (ignorezeroentries && (values[i*stepval] == 0.0)) continue;
29975cae7c1SHong Zhang     space->idx[k] = row;
30075cae7c1SHong Zhang     space->idy[k] = idxn[i];
30175cae7c1SHong Zhang     space->val[k] = values[i*stepval];
30275cae7c1SHong Zhang     k++;
303b400d20cSBarry Smith     cnt++;
3044c1ff481SSatish Balay   }
305b400d20cSBarry Smith   stash->n               += cnt;
306b400d20cSBarry Smith   space->local_used      += cnt;
307b400d20cSBarry Smith   space->local_remaining -= cnt;
3084c1ff481SSatish Balay   PetscFunctionReturn(0);
3094c1ff481SSatish Balay }
3104c1ff481SSatish Balay 
3114c1ff481SSatish Balay /*
3128798bf22SSatish Balay   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
3134c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3144c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3154c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3164c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3174c1ff481SSatish Balay 
3184c1ff481SSatish Balay   Input Parameters:
3194c1ff481SSatish Balay   stash  - the stash
3204c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3214c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3224c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3234c1ff481SSatish Balay            values. Each block is of size bs*bs.
3244c1ff481SSatish Balay   values - the values inserted
3254c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3264c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3274c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3284c1ff481SSatish Balay */
3294a2ae208SSatish Balay #undef __FUNCT__
3304a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRowBlocked_Private"
33154f21887SBarry Smith PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
3324c1ff481SSatish Balay {
333dfbe8321SBarry Smith   PetscErrorCode     ierr;
33475cae7c1SHong Zhang   PetscInt           i,j,k,bs2,bs=stash->bs,l;
33554f21887SBarry Smith   const PetscScalar  *vals;
33654f21887SBarry Smith   PetscScalar        *array;
33775cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
338a2d1c673SSatish Balay 
339a2d1c673SSatish Balay   PetscFunctionBegin;
34075cae7c1SHong Zhang   if (!space || space->local_remaining < n){
3418798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
342a2d1c673SSatish Balay   }
34375cae7c1SHong Zhang   space = stash->space;
34475cae7c1SHong Zhang   l     = space->local_used;
34575cae7c1SHong Zhang   bs2   = bs*bs;
3464c1ff481SSatish Balay   for (i=0; i<n; i++) {
34775cae7c1SHong Zhang     space->idx[l] = row;
34875cae7c1SHong Zhang     space->idy[l] = idxn[i];
34975cae7c1SHong Zhang     /* Now copy over the block of values. Store the values column oriented.
35075cae7c1SHong Zhang        This enables inserting multiple blocks belonging to a row with a single
35175cae7c1SHong Zhang        funtion call */
35275cae7c1SHong Zhang     array = space->val + bs2*l;
35375cae7c1SHong Zhang     vals  = values + idx*bs2*n + bs*i;
35475cae7c1SHong Zhang     for (j=0; j<bs; j++) {
35575cae7c1SHong Zhang       for (k=0; k<bs; k++) array[k*bs] = vals[k];
35675cae7c1SHong Zhang       array++;
35775cae7c1SHong Zhang       vals  += cmax*bs;
35875cae7c1SHong Zhang     }
35975cae7c1SHong Zhang     l++;
360a2d1c673SSatish Balay   }
3615bd3b8fbSHong Zhang   stash->n               += n;
36275cae7c1SHong Zhang   space->local_used      += n;
36375cae7c1SHong Zhang   space->local_remaining -= n;
3644c1ff481SSatish Balay   PetscFunctionReturn(0);
3654c1ff481SSatish Balay }
3664c1ff481SSatish Balay 
3674c1ff481SSatish Balay /*
3688798bf22SSatish Balay   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
3694c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3704c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3714c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3724c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3734c1ff481SSatish Balay 
3744c1ff481SSatish Balay   Input Parameters:
3754c1ff481SSatish Balay   stash  - the stash
3764c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3774c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3784c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3794c1ff481SSatish Balay            values. Each block is of size bs*bs.
3804c1ff481SSatish Balay   values - the values inserted
3814c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3824c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3834c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3844c1ff481SSatish Balay */
3854a2ae208SSatish Balay #undef __FUNCT__
3864a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesColBlocked_Private"
38754f21887SBarry Smith PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
3884c1ff481SSatish Balay {
389dfbe8321SBarry Smith   PetscErrorCode     ierr;
39075cae7c1SHong Zhang   PetscInt           i,j,k,bs2,bs=stash->bs,l;
39154f21887SBarry Smith   const PetscScalar  *vals;
39254f21887SBarry Smith   PetscScalar        *array;
39375cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
3944c1ff481SSatish Balay 
3954c1ff481SSatish Balay   PetscFunctionBegin;
39675cae7c1SHong Zhang   if (!space || space->local_remaining < n){
3978798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
3984c1ff481SSatish Balay   }
39975cae7c1SHong Zhang   space = stash->space;
40075cae7c1SHong Zhang   l     = space->local_used;
40175cae7c1SHong Zhang   bs2   = bs*bs;
4024c1ff481SSatish Balay   for (i=0; i<n; i++) {
40375cae7c1SHong Zhang     space->idx[l] = row;
40475cae7c1SHong Zhang     space->idy[l] = idxn[i];
40575cae7c1SHong Zhang     /* Now copy over the block of values. Store the values column oriented.
40675cae7c1SHong Zhang      This enables inserting multiple blocks belonging to a row with a single
40775cae7c1SHong Zhang      funtion call */
40875cae7c1SHong Zhang     array = space->val + bs2*l;
40975cae7c1SHong Zhang     vals  = values + idx*bs2*n + bs*i;
41075cae7c1SHong Zhang     for (j=0; j<bs; j++) {
41175cae7c1SHong Zhang       for (k=0; k<bs; k++) {array[k] = vals[k];}
41275cae7c1SHong Zhang       array += bs;
41375cae7c1SHong Zhang       vals  += rmax*bs;
41475cae7c1SHong Zhang     }
4155bd3b8fbSHong Zhang     l++;
416a2d1c673SSatish Balay   }
4175bd3b8fbSHong Zhang   stash->n               += n;
41875cae7c1SHong Zhang   space->local_used      += n;
41975cae7c1SHong Zhang   space->local_remaining -= n;
4203a40ed3dSBarry Smith   PetscFunctionReturn(0);
4219417f4adSLois Curfman McInnes }
4224c1ff481SSatish Balay /*
4238798bf22SSatish Balay   MatStashScatterBegin_Private - Initiates the transfer of values to the
4244c1ff481SSatish Balay   correct owners. This function goes through the stash, and check the
4254c1ff481SSatish Balay   owners of each stashed value, and sends the values off to the owner
4264c1ff481SSatish Balay   processors.
427bc5ccf88SSatish Balay 
4284c1ff481SSatish Balay   Input Parameters:
4294c1ff481SSatish Balay   stash  - the stash
4304c1ff481SSatish Balay   owners - an array of size 'no-of-procs' which gives the ownership range
4314c1ff481SSatish Balay            for each node.
4324c1ff481SSatish Balay 
4334c1ff481SSatish Balay   Notes: The 'owners' array in the cased of the blocked-stash has the
4344c1ff481SSatish Balay   ranges specified blocked global indices, and for the regular stash in
4354c1ff481SSatish Balay   the proper global indices.
4364c1ff481SSatish Balay */
4374a2ae208SSatish Balay #undef __FUNCT__
4384a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterBegin_Private"
4391e2582c4SBarry Smith PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners)
440bc5ccf88SSatish Balay {
441c1ac3661SBarry Smith   PetscInt          *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
442fe09c992SBarry Smith   PetscInt          size=stash->size,nsends;
4436849ba73SBarry Smith   PetscErrorCode    ierr;
44475cae7c1SHong Zhang   PetscInt          count,*sindices,**rindices,i,j,idx,lastidx,l;
44554f21887SBarry Smith   PetscScalar       **rvalues,*svalues;
446bc5ccf88SSatish Balay   MPI_Comm          comm = stash->comm;
447563fb871SSatish Balay   MPI_Request       *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
448fe09c992SBarry Smith   PetscMPIInt       *nprocs,*nlengths,nreceives;
4495bd3b8fbSHong Zhang   PetscInt          *sp_idx,*sp_idy;
45054f21887SBarry Smith   PetscScalar       *sp_val;
4515bd3b8fbSHong Zhang   PetscMatStashSpace space,space_next;
452bc5ccf88SSatish Balay 
453bc5ccf88SSatish Balay   PetscFunctionBegin;
4544c1ff481SSatish Balay   bs2 = stash->bs*stash->bs;
45575cae7c1SHong Zhang 
456bc5ccf88SSatish Balay   /*  first count number of contributors to each processor */
457*c05d87d6SBarry Smith   ierr  = PetscMalloc(size*sizeof(PetscMPIInt),&nprocs);CHKERRQ(ierr);
458*c05d87d6SBarry Smith   ierr  = PetscMemzero(nprocs,size*sizeof(PetscMPIInt));CHKERRQ(ierr);
459*c05d87d6SBarry Smith   ierr  = PetscMalloc(size*sizeof(PetscMPIInt),&nlengths);CHKERRQ(ierr);
460*c05d87d6SBarry Smith   ierr  = PetscMemzero(nlengths,size*sizeof(PetscMPIInt));CHKERRQ(ierr);
461c1ac3661SBarry Smith   ierr  = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);
462a2d1c673SSatish Balay 
46375cae7c1SHong Zhang   i = j    = 0;
4647357eb19SBarry Smith   lastidx  = -1;
4655bd3b8fbSHong Zhang   space    = stash->space_head;
46675cae7c1SHong Zhang   while (space != PETSC_NULL){
46775cae7c1SHong Zhang     space_next = space->next;
4685bd3b8fbSHong Zhang     sp_idx     = space->idx;
46975cae7c1SHong Zhang     for (l=0; l<space->local_used; l++){
4707357eb19SBarry Smith       /* if indices are NOT locally sorted, need to start search at the beginning */
4715bd3b8fbSHong Zhang       if (lastidx > (idx = sp_idx[l])) j = 0;
4727357eb19SBarry Smith       lastidx = idx;
4737357eb19SBarry Smith       for (; j<size; j++) {
4744c1ff481SSatish Balay         if (idx >= owners[j] && idx < owners[j+1]) {
475563fb871SSatish Balay           nlengths[j]++; owner[i] = j; break;
476bc5ccf88SSatish Balay         }
477bc5ccf88SSatish Balay       }
47875cae7c1SHong Zhang       i++;
47975cae7c1SHong Zhang     }
48075cae7c1SHong Zhang     space = space_next;
481bc5ccf88SSatish Balay   }
482563fb871SSatish Balay   /* Now check what procs get messages - and compute nsends. */
483563fb871SSatish Balay   for (i=0, nsends=0 ; i<size; i++) {
484563fb871SSatish Balay     if (nlengths[i]) { nprocs[i] = 1; nsends ++;}
485563fb871SSatish Balay   }
486bc5ccf88SSatish Balay 
48754f21887SBarry Smith   {PetscMPIInt  *onodes,*olengths;
488563fb871SSatish Balay   /* Determine the number of messages to expect, their lengths, from from-ids */
489563fb871SSatish Balay   ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr);
490563fb871SSatish Balay   ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
491563fb871SSatish Balay   /* since clubbing row,col - lengths are multiplied by 2 */
492563fb871SSatish Balay   for (i=0; i<nreceives; i++) olengths[i] *=2;
493563fb871SSatish Balay   ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
494563fb871SSatish Balay   /* values are size 'bs2' lengths (and remove earlier factor 2 */
495563fb871SSatish Balay   for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
496563fb871SSatish Balay   ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
497563fb871SSatish Balay   ierr = PetscFree(onodes);CHKERRQ(ierr);
498563fb871SSatish Balay   ierr = PetscFree(olengths);CHKERRQ(ierr);
499bc5ccf88SSatish Balay   }
500bc5ccf88SSatish Balay 
501bc5ccf88SSatish Balay   /* do sends:
502bc5ccf88SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
503bc5ccf88SSatish Balay          the ith processor
504bc5ccf88SSatish Balay   */
505*c05d87d6SBarry Smith   ierr     = PetscMalloc2(bs2*stash->n,PetscScalar,&svalues,2*(stash->n+1),PetscInt,&sindices);CHKERRQ(ierr);
506b0a32e0cSBarry Smith   ierr     = PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
507*c05d87d6SBarry Smith   ierr     = PetscMalloc2(size,PetscInt,&startv,size,PetscInt,&starti);CHKERRQ(ierr);
508a2d1c673SSatish Balay   /* use 2 sends the first with all_a, the next with all_i and all_j */
509bc5ccf88SSatish Balay   startv[0]  = 0; starti[0] = 0;
510bc5ccf88SSatish Balay   for (i=1; i<size; i++) {
511563fb871SSatish Balay     startv[i] = startv[i-1] + nlengths[i-1];
512563fb871SSatish Balay     starti[i] = starti[i-1] + nlengths[i-1]*2;
513bc5ccf88SSatish Balay   }
51475cae7c1SHong Zhang 
51575cae7c1SHong Zhang   i     = 0;
5165bd3b8fbSHong Zhang   space = stash->space_head;
51775cae7c1SHong Zhang   while (space != PETSC_NULL){
51875cae7c1SHong Zhang     space_next = space->next;
5195bd3b8fbSHong Zhang     sp_idx = space->idx;
5205bd3b8fbSHong Zhang     sp_idy = space->idy;
5215bd3b8fbSHong Zhang     sp_val = space->val;
52275cae7c1SHong Zhang     for (l=0; l<space->local_used; l++){
523bc5ccf88SSatish Balay       j = owner[i];
524a2d1c673SSatish Balay       if (bs2 == 1) {
5255bd3b8fbSHong Zhang         svalues[startv[j]] = sp_val[l];
526a2d1c673SSatish Balay       } else {
527c1ac3661SBarry Smith         PetscInt     k;
52854f21887SBarry Smith         PetscScalar *buf1,*buf2;
5294c1ff481SSatish Balay         buf1 = svalues+bs2*startv[j];
530b087b6d6SSatish Balay         buf2 = space->val + bs2*l;
5314c1ff481SSatish Balay         for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
532a2d1c673SSatish Balay       }
5335bd3b8fbSHong Zhang       sindices[starti[j]]             = sp_idx[l];
5345bd3b8fbSHong Zhang       sindices[starti[j]+nlengths[j]] = sp_idy[l];
535bc5ccf88SSatish Balay       startv[j]++;
536bc5ccf88SSatish Balay       starti[j]++;
53775cae7c1SHong Zhang       i++;
53875cae7c1SHong Zhang     }
53975cae7c1SHong Zhang     space = space_next;
540bc5ccf88SSatish Balay   }
541bc5ccf88SSatish Balay   startv[0] = 0;
542563fb871SSatish Balay   for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];}
543e5d0e772SSatish Balay 
544bc5ccf88SSatish Balay   for (i=0,count=0; i<size; i++) {
545563fb871SSatish Balay     if (nprocs[i]) {
546563fb871SSatish Balay       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
547a77337e4SBarry Smith       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
548bc5ccf88SSatish Balay     }
549b85c94c3SSatish Balay   }
5506cf91177SBarry Smith #if defined(PETSC_USE_INFO)
5511e2582c4SBarry Smith   ierr = PetscInfo1(mat,"No of messages: %d \n",nsends);CHKERRQ(ierr);
552e5d0e772SSatish Balay   for (i=0; i<size; i++) {
553e5d0e772SSatish Balay     if (nprocs[i]) {
554a77337e4SBarry Smith       ierr = PetscInfo2(mat,"Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(PetscScalar)+2*sizeof(PetscInt));CHKERRQ(ierr);
555e5d0e772SSatish Balay     }
556e5d0e772SSatish Balay   }
557e5d0e772SSatish Balay #endif
558*c05d87d6SBarry Smith   ierr = PetscFree(nlengths);CHKERRQ(ierr);
559606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
560*c05d87d6SBarry Smith   ierr = PetscFree2(startv,starti);CHKERRQ(ierr);
561*c05d87d6SBarry Smith   ierr = PetscFree(nprocs);CHKERRQ(ierr);
562a2d1c673SSatish Balay 
563563fb871SSatish Balay   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
564563fb871SSatish Balay   ierr  = PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
565563fb871SSatish Balay 
566563fb871SSatish Balay   for (i=0; i<nreceives; i++) {
567563fb871SSatish Balay     recv_waits[2*i]   = recv_waits1[i];
568563fb871SSatish Balay     recv_waits[2*i+1] = recv_waits2[i];
569563fb871SSatish Balay   }
570563fb871SSatish Balay   stash->recv_waits = recv_waits;
571563fb871SSatish Balay   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
572563fb871SSatish Balay   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
573563fb871SSatish Balay 
574*c05d87d6SBarry Smith   stash->svalues     = svalues;
575*c05d87d6SBarry Smith   stash->sindices    = sindices;
576*c05d87d6SBarry Smith   stash->rvalues     = rvalues;
577*c05d87d6SBarry Smith   stash->rindices    = rindices;
578*c05d87d6SBarry Smith   stash->send_waits  = send_waits;
579*c05d87d6SBarry Smith   stash->nsends      = nsends;
580*c05d87d6SBarry Smith   stash->nrecvs      = nreceives;
581bc5ccf88SSatish Balay   PetscFunctionReturn(0);
582bc5ccf88SSatish Balay }
583bc5ccf88SSatish Balay 
584a2d1c673SSatish Balay /*
5858798bf22SSatish Balay    MatStashScatterGetMesg_Private - This function waits on the receives posted
5868798bf22SSatish Balay    in the function MatStashScatterBegin_Private() and returns one message at
5874c1ff481SSatish Balay    a time to the calling function. If no messages are left, it indicates this
5884c1ff481SSatish Balay    by setting flg = 0, else it sets flg = 1.
5894c1ff481SSatish Balay 
5904c1ff481SSatish Balay    Input Parameters:
5914c1ff481SSatish Balay    stash - the stash
5924c1ff481SSatish Balay 
5934c1ff481SSatish Balay    Output Parameters:
5944c1ff481SSatish Balay    nvals - the number of entries in the current message.
5954c1ff481SSatish Balay    rows  - an array of row indices (or blocked indices) corresponding to the values
5964c1ff481SSatish Balay    cols  - an array of columnindices (or blocked indices) corresponding to the values
5974c1ff481SSatish Balay    vals  - the values
5984c1ff481SSatish Balay    flg   - 0 indicates no more message left, and the current call has no values associated.
5994c1ff481SSatish Balay            1 indicates that the current call successfully received a message, and the
6004c1ff481SSatish Balay              other output parameters nvals,rows,cols,vals are set appropriately.
601a2d1c673SSatish Balay */
6024a2ae208SSatish Balay #undef __FUNCT__
6034a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterGetMesg_Private"
60454f21887SBarry Smith PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,PetscScalar **vals,PetscInt *flg)
605bc5ccf88SSatish Balay {
6066849ba73SBarry Smith   PetscErrorCode ierr;
607*c05d87d6SBarry Smith   PetscMPIInt    i,*flg_v,i1,i2,size;
608fe09c992SBarry Smith   PetscInt       bs2;
609a2d1c673SSatish Balay   MPI_Status     recv_status;
610b0a32e0cSBarry Smith   PetscTruth     match_found = PETSC_FALSE;
611bc5ccf88SSatish Balay 
612bc5ccf88SSatish Balay   PetscFunctionBegin;
613bc5ccf88SSatish Balay 
614a2d1c673SSatish Balay   *flg = 0; /* When a message is discovered this is reset to 1 */
615a2d1c673SSatish Balay   /* Return if no more messages to process */
616a2d1c673SSatish Balay   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
617a2d1c673SSatish Balay 
618*c05d87d6SBarry Smith   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
619*c05d87d6SBarry Smith   ierr  = PetscMalloc(2*size*sizeof(PetscMPIInt),&flg_v);CHKERRQ(ierr);
620*c05d87d6SBarry Smith   for (i=0; i<2*size; i++) flg_v[i] = -1;
621*c05d87d6SBarry Smith 
6224c1ff481SSatish Balay   bs2   = stash->bs*stash->bs;
623a2d1c673SSatish Balay   /* If a matching pair of receieves are found, process them, and return the data to
624a2d1c673SSatish Balay      the calling function. Until then keep receiving messages */
625a2d1c673SSatish Balay   while (!match_found) {
626a2d1c673SSatish Balay     ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
627a2d1c673SSatish Balay     /* Now pack the received message into a structure which is useable by others */
628a2d1c673SSatish Balay     if (i % 2) {
629a77337e4SBarry Smith       ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr);
630c1dc657dSBarry Smith       flg_v[2*recv_status.MPI_SOURCE] = i/2;
631a2d1c673SSatish Balay       *nvals = *nvals/bs2;
632563fb871SSatish Balay     } else {
633563fb871SSatish Balay       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
634563fb871SSatish Balay       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
635563fb871SSatish Balay       *nvals = *nvals/2; /* This message has both row indices and col indices */
636bc5ccf88SSatish Balay     }
637a2d1c673SSatish Balay 
638cb2b73ccSBarry Smith     /* Check if we have both messages from this proc */
639c1dc657dSBarry Smith     i1 = flg_v[2*recv_status.MPI_SOURCE];
640c1dc657dSBarry Smith     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
641a2d1c673SSatish Balay     if (i1 != -1 && i2 != -1) {
642563fb871SSatish Balay       *rows       = stash->rindices[i2];
643a2d1c673SSatish Balay       *cols       = *rows + *nvals;
644563fb871SSatish Balay       *vals       = stash->rvalues[i1];
645a2d1c673SSatish Balay       *flg        = 1;
646a2d1c673SSatish Balay       stash->nprocessed ++;
64735d8aa7fSBarry Smith       match_found = PETSC_TRUE;
648bc5ccf88SSatish Balay     }
649bc5ccf88SSatish Balay   }
650*c05d87d6SBarry Smith   ierr = PetscFree(flg_v);CHKERRQ(ierr);
651bc5ccf88SSatish Balay   PetscFunctionReturn(0);
652bc5ccf88SSatish Balay }
653