xref: /petsc/src/mat/utils/matstash.c (revision a77337e4d7d5143cd577a9cca2c72dcc9694336d)
1be1d678aSKris Buschelman #define PETSCMAT_DLL
22d5177cdSBarry Smith 
3b9147fbbSdalcinl #include "include/private/matimpl.h"
45bd3b8fbSHong Zhang 
5bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE   10000
64c1ff481SSatish Balay 
79417f4adSLois Curfman McInnes /*
88798bf22SSatish Balay   MatStashCreate_Private - Creates a stash,currently used for all the parallel
94c1ff481SSatish Balay   matrix implementations. The stash is where elements of a matrix destined
104c1ff481SSatish Balay   to be stored on other processors are kept until matrix assembly is done.
119417f4adSLois Curfman McInnes 
124c1ff481SSatish Balay   This is a simple minded stash. Simply adds entries to end of stash.
134c1ff481SSatish Balay 
144c1ff481SSatish Balay   Input Parameters:
154c1ff481SSatish Balay   comm - communicator, required for scatters.
164c1ff481SSatish Balay   bs   - stash block size. used when stashing blocks of values
174c1ff481SSatish Balay 
184c1ff481SSatish Balay   Output Parameters:
194c1ff481SSatish Balay   stash    - the newly created stash
209417f4adSLois Curfman McInnes */
214a2ae208SSatish Balay #undef __FUNCT__
224a2ae208SSatish Balay #define __FUNCT__ "MatStashCreate_Private"
23c1ac3661SBarry Smith PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
249417f4adSLois Curfman McInnes {
25dfbe8321SBarry Smith   PetscErrorCode ierr;
26c1ac3661SBarry Smith   PetscInt       max,*opt,nopt;
27f1af5d2fSBarry Smith   PetscTruth     flg;
28bc5ccf88SSatish Balay 
293a40ed3dSBarry Smith   PetscFunctionBegin;
30bc5ccf88SSatish Balay   /* Require 2 tags,get the second using PetscCommGetNewTag() */
31752ec6e0SSatish Balay   stash->comm = comm;
32752ec6e0SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
33a2d1c673SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
34a2d1c673SSatish Balay   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
35a2d1c673SSatish Balay   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
36bc5ccf88SSatish Balay 
37434d7ff9SSatish Balay   nopt = stash->size;
38d7d82daaSBarry Smith   ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr);
39b0a32e0cSBarry Smith   ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
40434d7ff9SSatish Balay   if (flg) {
41434d7ff9SSatish Balay     if (nopt == 1)                max = opt[0];
42434d7ff9SSatish Balay     else if (nopt == stash->size) max = opt[stash->rank];
43434d7ff9SSatish Balay     else if (stash->rank < nopt)  max = opt[stash->rank];
44f4ab19daSSatish Balay     else                          max = 0; /* Use default */
45434d7ff9SSatish Balay     stash->umax = max;
46434d7ff9SSatish Balay   } else {
47434d7ff9SSatish Balay     stash->umax = 0;
48434d7ff9SSatish Balay   }
49606d414cSSatish Balay   ierr = PetscFree(opt);CHKERRQ(ierr);
504c1ff481SSatish Balay   if (bs <= 0) bs = 1;
51a2d1c673SSatish Balay 
524c1ff481SSatish Balay   stash->bs       = bs;
539417f4adSLois Curfman McInnes   stash->nmax     = 0;
54434d7ff9SSatish Balay   stash->oldnmax  = 0;
559417f4adSLois Curfman McInnes   stash->n        = 0;
564c1ff481SSatish Balay   stash->reallocs = -1;
5775cae7c1SHong Zhang   stash->space_head = 0;
5875cae7c1SHong Zhang   stash->space      = 0;
599417f4adSLois Curfman McInnes 
60bc5ccf88SSatish Balay   stash->send_waits  = 0;
61bc5ccf88SSatish Balay   stash->recv_waits  = 0;
62a2d1c673SSatish Balay   stash->send_status = 0;
63bc5ccf88SSatish Balay   stash->nsends      = 0;
64bc5ccf88SSatish Balay   stash->nrecvs      = 0;
65bc5ccf88SSatish Balay   stash->svalues     = 0;
66bc5ccf88SSatish Balay   stash->rvalues     = 0;
67563fb871SSatish Balay   stash->rindices    = 0;
68a2d1c673SSatish Balay   stash->nprocs      = 0;
69a2d1c673SSatish Balay   stash->nprocessed  = 0;
703a40ed3dSBarry Smith   PetscFunctionReturn(0);
719417f4adSLois Curfman McInnes }
729417f4adSLois Curfman McInnes 
734c1ff481SSatish Balay /*
748798bf22SSatish Balay    MatStashDestroy_Private - Destroy the stash
754c1ff481SSatish Balay */
764a2ae208SSatish Balay #undef __FUNCT__
774a2ae208SSatish Balay #define __FUNCT__ "MatStashDestroy_Private"
78dfbe8321SBarry Smith PetscErrorCode MatStashDestroy_Private(MatStash *stash)
799417f4adSLois Curfman McInnes {
80dfbe8321SBarry Smith   PetscErrorCode ierr;
81a2d1c673SSatish Balay 
82bc5ccf88SSatish Balay   PetscFunctionBegin;
8375cae7c1SHong Zhang   if (stash->space_head){
8475cae7c1SHong Zhang     ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr);
8575cae7c1SHong Zhang     stash->space_head = 0;
8682740460SHong Zhang     stash->space      = 0;
8775cae7c1SHong Zhang   }
88bc5ccf88SSatish Balay   PetscFunctionReturn(0);
89bc5ccf88SSatish Balay }
90bc5ccf88SSatish Balay 
914c1ff481SSatish Balay /*
928798bf22SSatish Balay    MatStashScatterEnd_Private - This is called as the fial stage of
934c1ff481SSatish Balay    scatter. The final stages of messagepassing is done here, and
944c1ff481SSatish Balay    all the memory used for messagepassing is cleanedu up. This
954c1ff481SSatish Balay    routine also resets the stash, and deallocates the memory used
964c1ff481SSatish Balay    for the stash. It also keeps track of the current memory usage
974c1ff481SSatish Balay    so that the same value can be used the next time through.
984c1ff481SSatish Balay */
994a2ae208SSatish Balay #undef __FUNCT__
1004a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterEnd_Private"
101dfbe8321SBarry Smith PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
102bc5ccf88SSatish Balay {
1036849ba73SBarry Smith   PetscErrorCode ierr;
1045bd3b8fbSHong Zhang   PetscInt       nsends=stash->nsends,bs2,oldnmax;
105a2d1c673SSatish Balay   MPI_Status     *send_status;
106a2d1c673SSatish Balay 
1073a40ed3dSBarry Smith   PetscFunctionBegin;
108a2d1c673SSatish Balay   /* wait on sends */
109a2d1c673SSatish Balay   if (nsends) {
11082502324SSatish Balay     ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
111a2d1c673SSatish Balay     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
112606d414cSSatish Balay     ierr = PetscFree(send_status);CHKERRQ(ierr);
113a2d1c673SSatish Balay   }
114a2d1c673SSatish Balay 
115c0c58ca7SSatish Balay   /* Now update nmaxold to be app 10% more than max n used, this way the
116434d7ff9SSatish Balay      wastage of space is reduced the next time this stash is used.
117434d7ff9SSatish Balay      Also update the oldmax, only if it increases */
118b9b97703SBarry Smith   if (stash->n) {
11994b769a5SSatish Balay     bs2      = stash->bs*stash->bs;
1208a9378f0SSatish Balay     oldnmax  = ((int)(stash->n * 1.1) + 5)*bs2;
121434d7ff9SSatish Balay     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
122b9b97703SBarry Smith   }
123434d7ff9SSatish Balay 
124d07ff455SSatish Balay   stash->nmax       = 0;
125d07ff455SSatish Balay   stash->n          = 0;
1264c1ff481SSatish Balay   stash->reallocs   = -1;
127a2d1c673SSatish Balay   stash->nprocessed = 0;
12875cae7c1SHong Zhang   if (stash->space_head){
12975cae7c1SHong Zhang     ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr);
13075cae7c1SHong Zhang     stash->space_head = 0;
13182740460SHong Zhang     stash->space      = 0;
13275cae7c1SHong Zhang   }
133606d414cSSatish Balay   ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
134606d414cSSatish Balay   stash->send_waits = 0;
135606d414cSSatish Balay   ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
136606d414cSSatish Balay   stash->recv_waits = 0;
137606d414cSSatish Balay   ierr = PetscFree(stash->svalues);CHKERRQ(ierr);
138606d414cSSatish Balay   stash->svalues = 0;
139606d414cSSatish Balay   ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
140606d414cSSatish Balay   stash->rvalues = 0;
141563fb871SSatish Balay   ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
142563fb871SSatish Balay   stash->rindices = 0;
143b22afee1SSatish Balay   ierr = PetscFree(stash->nprocs);CHKERRQ(ierr);
144606d414cSSatish Balay   stash->nprocs = 0;
1453a40ed3dSBarry Smith   PetscFunctionReturn(0);
1469417f4adSLois Curfman McInnes }
1479417f4adSLois Curfman McInnes 
1484c1ff481SSatish Balay /*
1498798bf22SSatish Balay    MatStashGetInfo_Private - Gets the relavant statistics of the stash
1504c1ff481SSatish Balay 
1514c1ff481SSatish Balay    Input Parameters:
1524c1ff481SSatish Balay    stash    - the stash
15394b769a5SSatish Balay    nstash   - the size of the stash. Indicates the number of values stored.
1544c1ff481SSatish Balay    reallocs - the number of additional mallocs incurred.
1554c1ff481SSatish Balay 
1564c1ff481SSatish Balay */
1574a2ae208SSatish Balay #undef __FUNCT__
1584a2ae208SSatish Balay #define __FUNCT__ "MatStashGetInfo_Private"
159c1ac3661SBarry Smith PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
16097530c3fSBarry Smith {
161c1ac3661SBarry Smith   PetscInt bs2 = stash->bs*stash->bs;
16294b769a5SSatish Balay 
1633a40ed3dSBarry Smith   PetscFunctionBegin;
1641ecfd215SBarry Smith   if (nstash) *nstash   = stash->n*bs2;
1651ecfd215SBarry Smith   if (reallocs) {
166434d7ff9SSatish Balay     if (stash->reallocs < 0) *reallocs = 0;
167434d7ff9SSatish Balay     else                     *reallocs = stash->reallocs;
1681ecfd215SBarry Smith   }
169bc5ccf88SSatish Balay   PetscFunctionReturn(0);
170bc5ccf88SSatish Balay }
1714c1ff481SSatish Balay 
1724c1ff481SSatish Balay /*
1738798bf22SSatish Balay    MatStashSetInitialSize_Private - Sets the initial size of the stash
1744c1ff481SSatish Balay 
1754c1ff481SSatish Balay    Input Parameters:
1764c1ff481SSatish Balay    stash  - the stash
1774c1ff481SSatish Balay    max    - the value that is used as the max size of the stash.
1784c1ff481SSatish Balay             this value is used while allocating memory.
1794c1ff481SSatish Balay */
1804a2ae208SSatish Balay #undef __FUNCT__
1814a2ae208SSatish Balay #define __FUNCT__ "MatStashSetInitialSize_Private"
182c1ac3661SBarry Smith PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
183bc5ccf88SSatish Balay {
184bc5ccf88SSatish Balay   PetscFunctionBegin;
185434d7ff9SSatish Balay   stash->umax = max;
1863a40ed3dSBarry Smith   PetscFunctionReturn(0);
18797530c3fSBarry Smith }
18897530c3fSBarry Smith 
1898798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called
1904c1ff481SSatish Balay    when the space in the stash is not sufficient to add the new values
1914c1ff481SSatish Balay    being inserted into the stash.
1924c1ff481SSatish Balay 
1934c1ff481SSatish Balay    Input Parameters:
1944c1ff481SSatish Balay    stash - the stash
1954c1ff481SSatish Balay    incr  - the minimum increase requested
1964c1ff481SSatish Balay 
1974c1ff481SSatish Balay    Notes:
1984c1ff481SSatish Balay    This routine doubles the currently used memory.
1994c1ff481SSatish Balay  */
2004a2ae208SSatish Balay #undef __FUNCT__
2014a2ae208SSatish Balay #define __FUNCT__ "MatStashExpand_Private"
202c1ac3661SBarry Smith static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
2039417f4adSLois Curfman McInnes {
2046849ba73SBarry Smith   PetscErrorCode ierr;
2055bd3b8fbSHong Zhang   PetscInt       newnmax,bs2= stash->bs*stash->bs;
2069417f4adSLois Curfman McInnes 
2073a40ed3dSBarry Smith   PetscFunctionBegin;
2089417f4adSLois Curfman McInnes   /* allocate a larger stash */
209c481ceb5SSatish Balay   if (!stash->oldnmax && !stash->nmax) { /* new stash */
210434d7ff9SSatish Balay     if (stash->umax)                  newnmax = stash->umax/bs2;
211434d7ff9SSatish Balay     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
212c481ceb5SSatish Balay   } else if (!stash->nmax) { /* resuing stash */
213434d7ff9SSatish Balay     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
214434d7ff9SSatish Balay     else                              newnmax = stash->oldnmax/bs2;
215434d7ff9SSatish Balay   } else                              newnmax = stash->nmax*2;
2164c1ff481SSatish Balay   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
217d07ff455SSatish Balay 
21875cae7c1SHong Zhang   /* Get a MatStashSpace and attach it to stash */
21975cae7c1SHong Zhang   ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr);
220b087b6d6SSatish Balay   if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
221b087b6d6SSatish Balay     stash->space_head = stash->space;
22275cae7c1SHong Zhang   }
223b087b6d6SSatish Balay 
224bc5ccf88SSatish Balay   stash->reallocs++;
22575cae7c1SHong Zhang   stash->nmax = newnmax;
226bc5ccf88SSatish Balay   PetscFunctionReturn(0);
227bc5ccf88SSatish Balay }
228bc5ccf88SSatish Balay /*
2298798bf22SSatish Balay   MatStashValuesRow_Private - inserts values into the stash. This function
2304c1ff481SSatish Balay   expects the values to be roworiented. Multiple columns belong to the same row
2314c1ff481SSatish Balay   can be inserted with a single call to this function.
2324c1ff481SSatish Balay 
2334c1ff481SSatish Balay   Input Parameters:
2344c1ff481SSatish Balay   stash  - the stash
2354c1ff481SSatish Balay   row    - the global row correspoiding to the values
2364c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
2374c1ff481SSatish Balay   idxn   - the global column indices corresponding to each of the values.
2384c1ff481SSatish Balay   values - the values inserted
239bc5ccf88SSatish Balay */
2404a2ae208SSatish Balay #undef __FUNCT__
2414a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRow_Private"
24254f21887SBarry Smith PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[])
243bc5ccf88SSatish Balay {
244dfbe8321SBarry Smith   PetscErrorCode     ierr;
24575cae7c1SHong Zhang   PetscInt           i,k;
24675cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
247bc5ccf88SSatish Balay 
248bc5ccf88SSatish Balay   PetscFunctionBegin;
2494c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
25075cae7c1SHong Zhang   if (!space || space->local_remaining < n){
2518798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
2529417f4adSLois Curfman McInnes   }
25375cae7c1SHong Zhang   space = stash->space;
25475cae7c1SHong Zhang   k     = space->local_used;
2554c1ff481SSatish Balay   for (i=0; i<n; i++) {
25675cae7c1SHong Zhang     space->idx[k] = row;
25775cae7c1SHong Zhang     space->idy[k] = idxn[i];
25875cae7c1SHong Zhang     space->val[k] = values[i];
25975cae7c1SHong Zhang     k++;
2609417f4adSLois Curfman McInnes   }
2615bd3b8fbSHong Zhang   stash->n               += n;
26275cae7c1SHong Zhang   space->local_used      += n;
26375cae7c1SHong Zhang   space->local_remaining -= n;
264a2d1c673SSatish Balay   PetscFunctionReturn(0);
265a2d1c673SSatish Balay }
26675cae7c1SHong Zhang 
2674c1ff481SSatish Balay /*
2688798bf22SSatish Balay   MatStashValuesCol_Private - inserts values into the stash. This function
2694c1ff481SSatish Balay   expects the values to be columnoriented. Multiple columns belong to the same row
2704c1ff481SSatish Balay   can be inserted with a single call to this function.
271a2d1c673SSatish Balay 
2724c1ff481SSatish Balay   Input Parameters:
2734c1ff481SSatish Balay   stash   - the stash
2744c1ff481SSatish Balay   row     - the global row correspoiding to the values
2754c1ff481SSatish Balay   n       - the number of elements inserted. All elements belong to the above row.
2764c1ff481SSatish Balay   idxn    - the global column indices corresponding to each of the values.
2774c1ff481SSatish Balay   values  - the values inserted
2784c1ff481SSatish Balay   stepval - the consecutive values are sepated by a distance of stepval.
2794c1ff481SSatish Balay             this happens because the input is columnoriented.
2804c1ff481SSatish Balay */
2814a2ae208SSatish Balay #undef __FUNCT__
2824a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesCol_Private"
28354f21887SBarry Smith PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval)
284a2d1c673SSatish Balay {
285dfbe8321SBarry Smith   PetscErrorCode     ierr;
28675cae7c1SHong Zhang   PetscInt           i,k;
28775cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
288a2d1c673SSatish Balay 
2894c1ff481SSatish Balay   PetscFunctionBegin;
2904c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
29175cae7c1SHong Zhang   if (!space || space->local_remaining < n){
2928798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
2934c1ff481SSatish Balay   }
29475cae7c1SHong Zhang   space = stash->space;
29575cae7c1SHong Zhang   k = space->local_used;
2964c1ff481SSatish Balay   for (i=0; i<n; i++) {
29775cae7c1SHong Zhang     space->idx[k] = row;
29875cae7c1SHong Zhang     space->idy[k] = idxn[i];
29975cae7c1SHong Zhang     space->val[k] = values[i*stepval];
30075cae7c1SHong Zhang     k++;
3014c1ff481SSatish Balay   }
3025bd3b8fbSHong Zhang   stash->n               += n;
30375cae7c1SHong Zhang   space->local_used      += n;
30475cae7c1SHong Zhang   space->local_remaining -= n;
3054c1ff481SSatish Balay   PetscFunctionReturn(0);
3064c1ff481SSatish Balay }
3074c1ff481SSatish Balay 
3084c1ff481SSatish Balay /*
3098798bf22SSatish Balay   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
3104c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3114c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3124c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3134c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3144c1ff481SSatish Balay 
3154c1ff481SSatish Balay   Input Parameters:
3164c1ff481SSatish Balay   stash  - the stash
3174c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3184c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3194c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3204c1ff481SSatish Balay            values. Each block is of size bs*bs.
3214c1ff481SSatish Balay   values - the values inserted
3224c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3234c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3244c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3254c1ff481SSatish Balay */
3264a2ae208SSatish Balay #undef __FUNCT__
3274a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRowBlocked_Private"
32854f21887SBarry Smith PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
3294c1ff481SSatish Balay {
330dfbe8321SBarry Smith   PetscErrorCode     ierr;
33175cae7c1SHong Zhang   PetscInt           i,j,k,bs2,bs=stash->bs,l;
33254f21887SBarry Smith   const PetscScalar  *vals;
33354f21887SBarry Smith   PetscScalar        *array;
33475cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
335a2d1c673SSatish Balay 
336a2d1c673SSatish Balay   PetscFunctionBegin;
33775cae7c1SHong Zhang   if (!space || space->local_remaining < n){
3388798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
339a2d1c673SSatish Balay   }
34075cae7c1SHong Zhang   space = stash->space;
34175cae7c1SHong Zhang   l     = space->local_used;
34275cae7c1SHong Zhang   bs2   = bs*bs;
3434c1ff481SSatish Balay   for (i=0; i<n; i++) {
34475cae7c1SHong Zhang     space->idx[l] = row;
34575cae7c1SHong Zhang     space->idy[l] = idxn[i];
34675cae7c1SHong Zhang     /* Now copy over the block of values. Store the values column oriented.
34775cae7c1SHong Zhang        This enables inserting multiple blocks belonging to a row with a single
34875cae7c1SHong Zhang        funtion call */
34975cae7c1SHong Zhang     array = space->val + bs2*l;
35075cae7c1SHong Zhang     vals  = values + idx*bs2*n + bs*i;
35175cae7c1SHong Zhang     for (j=0; j<bs; j++) {
35275cae7c1SHong Zhang       for (k=0; k<bs; k++) array[k*bs] = vals[k];
35375cae7c1SHong Zhang       array++;
35475cae7c1SHong Zhang       vals  += cmax*bs;
35575cae7c1SHong Zhang     }
35675cae7c1SHong Zhang     l++;
357a2d1c673SSatish Balay   }
3585bd3b8fbSHong Zhang   stash->n               += n;
35975cae7c1SHong Zhang   space->local_used      += n;
36075cae7c1SHong Zhang   space->local_remaining -= n;
3614c1ff481SSatish Balay   PetscFunctionReturn(0);
3624c1ff481SSatish Balay }
3634c1ff481SSatish Balay 
3644c1ff481SSatish Balay /*
3658798bf22SSatish Balay   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
3664c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3674c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3684c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3694c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3704c1ff481SSatish Balay 
3714c1ff481SSatish Balay   Input Parameters:
3724c1ff481SSatish Balay   stash  - the stash
3734c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3744c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3754c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3764c1ff481SSatish Balay            values. Each block is of size bs*bs.
3774c1ff481SSatish Balay   values - the values inserted
3784c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3794c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3804c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3814c1ff481SSatish Balay */
3824a2ae208SSatish Balay #undef __FUNCT__
3834a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesColBlocked_Private"
38454f21887SBarry Smith PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
3854c1ff481SSatish Balay {
386dfbe8321SBarry Smith   PetscErrorCode     ierr;
38775cae7c1SHong Zhang   PetscInt           i,j,k,bs2,bs=stash->bs,l;
38854f21887SBarry Smith   const PetscScalar  *vals;
38954f21887SBarry Smith   PetscScalar        *array;
39075cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
3914c1ff481SSatish Balay 
3924c1ff481SSatish Balay   PetscFunctionBegin;
39375cae7c1SHong Zhang   if (!space || space->local_remaining < n){
3948798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
3954c1ff481SSatish Balay   }
39675cae7c1SHong Zhang   space = stash->space;
39775cae7c1SHong Zhang   l     = space->local_used;
39875cae7c1SHong Zhang   bs2   = bs*bs;
3994c1ff481SSatish Balay   for (i=0; i<n; i++) {
40075cae7c1SHong Zhang     space->idx[l] = row;
40175cae7c1SHong Zhang     space->idy[l] = idxn[i];
40275cae7c1SHong Zhang     /* Now copy over the block of values. Store the values column oriented.
40375cae7c1SHong Zhang      This enables inserting multiple blocks belonging to a row with a single
40475cae7c1SHong Zhang      funtion call */
40575cae7c1SHong Zhang     array = space->val + bs2*l;
40675cae7c1SHong Zhang     vals  = values + idx*bs2*n + bs*i;
40775cae7c1SHong Zhang     for (j=0; j<bs; j++) {
40875cae7c1SHong Zhang       for (k=0; k<bs; k++) {array[k] = vals[k];}
40975cae7c1SHong Zhang       array += bs;
41075cae7c1SHong Zhang       vals  += rmax*bs;
41175cae7c1SHong Zhang     }
4125bd3b8fbSHong Zhang     l++;
413a2d1c673SSatish Balay   }
4145bd3b8fbSHong Zhang   stash->n               += n;
41575cae7c1SHong Zhang   space->local_used      += n;
41675cae7c1SHong Zhang   space->local_remaining -= n;
4173a40ed3dSBarry Smith   PetscFunctionReturn(0);
4189417f4adSLois Curfman McInnes }
4194c1ff481SSatish Balay /*
4208798bf22SSatish Balay   MatStashScatterBegin_Private - Initiates the transfer of values to the
4214c1ff481SSatish Balay   correct owners. This function goes through the stash, and check the
4224c1ff481SSatish Balay   owners of each stashed value, and sends the values off to the owner
4234c1ff481SSatish Balay   processors.
424bc5ccf88SSatish Balay 
4254c1ff481SSatish Balay   Input Parameters:
4264c1ff481SSatish Balay   stash  - the stash
4274c1ff481SSatish Balay   owners - an array of size 'no-of-procs' which gives the ownership range
4284c1ff481SSatish Balay            for each node.
4294c1ff481SSatish Balay 
4304c1ff481SSatish Balay   Notes: The 'owners' array in the cased of the blocked-stash has the
4314c1ff481SSatish Balay   ranges specified blocked global indices, and for the regular stash in
4324c1ff481SSatish Balay   the proper global indices.
4334c1ff481SSatish Balay */
4344a2ae208SSatish Balay #undef __FUNCT__
4354a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterBegin_Private"
4361e2582c4SBarry Smith PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners)
437bc5ccf88SSatish Balay {
438c1ac3661SBarry Smith   PetscInt          *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
439fe09c992SBarry Smith   PetscInt          size=stash->size,nsends;
4406849ba73SBarry Smith   PetscErrorCode    ierr;
44175cae7c1SHong Zhang   PetscInt          count,*sindices,**rindices,i,j,idx,lastidx,l;
44254f21887SBarry Smith   PetscScalar       **rvalues,*svalues;
443bc5ccf88SSatish Balay   MPI_Comm          comm = stash->comm;
444563fb871SSatish Balay   MPI_Request       *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
445fe09c992SBarry Smith   PetscMPIInt       *nprocs,*nlengths,nreceives;
4465bd3b8fbSHong Zhang   PetscInt          *sp_idx,*sp_idy;
44754f21887SBarry Smith   PetscScalar       *sp_val;
4485bd3b8fbSHong Zhang   PetscMatStashSpace space,space_next;
449bc5ccf88SSatish Balay 
450bc5ccf88SSatish Balay   PetscFunctionBegin;
4514c1ff481SSatish Balay   bs2 = stash->bs*stash->bs;
45275cae7c1SHong Zhang 
453bc5ccf88SSatish Balay   /*  first count number of contributors to each processor */
454fe09c992SBarry Smith   ierr  = PetscMalloc(2*size*sizeof(PetscMPIInt),&nprocs);CHKERRQ(ierr);
455fe09c992SBarry Smith   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscMPIInt));CHKERRQ(ierr);
456c1ac3661SBarry Smith   ierr  = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);
457a2d1c673SSatish Balay 
458563fb871SSatish Balay   nlengths = nprocs+size;
45975cae7c1SHong Zhang   i = j    = 0;
4607357eb19SBarry Smith   lastidx  = -1;
4615bd3b8fbSHong Zhang   space    = stash->space_head;
46275cae7c1SHong Zhang   while (space != PETSC_NULL){
46375cae7c1SHong Zhang     space_next = space->next;
4645bd3b8fbSHong Zhang     sp_idx     = space->idx;
46575cae7c1SHong Zhang     for (l=0; l<space->local_used; l++){
4667357eb19SBarry Smith       /* if indices are NOT locally sorted, need to start search at the beginning */
4675bd3b8fbSHong Zhang       if (lastidx > (idx = sp_idx[l])) j = 0;
4687357eb19SBarry Smith       lastidx = idx;
4697357eb19SBarry Smith       for (; j<size; j++) {
4704c1ff481SSatish Balay         if (idx >= owners[j] && idx < owners[j+1]) {
471563fb871SSatish Balay           nlengths[j]++; owner[i] = j; break;
472bc5ccf88SSatish Balay         }
473bc5ccf88SSatish Balay       }
47475cae7c1SHong Zhang       i++;
47575cae7c1SHong Zhang     }
47675cae7c1SHong Zhang     space = space_next;
477bc5ccf88SSatish Balay   }
478563fb871SSatish Balay   /* Now check what procs get messages - and compute nsends. */
479563fb871SSatish Balay   for (i=0, nsends=0 ; i<size; i++) {
480563fb871SSatish Balay     if (nlengths[i]) { nprocs[i] = 1; nsends ++;}
481563fb871SSatish Balay   }
482bc5ccf88SSatish Balay 
48354f21887SBarry Smith   {PetscMPIInt  *onodes,*olengths;
484563fb871SSatish Balay   /* Determine the number of messages to expect, their lengths, from from-ids */
485563fb871SSatish Balay   ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr);
486563fb871SSatish Balay   ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
487563fb871SSatish Balay   /* since clubbing row,col - lengths are multiplied by 2 */
488563fb871SSatish Balay   for (i=0; i<nreceives; i++) olengths[i] *=2;
489563fb871SSatish Balay   ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
490563fb871SSatish Balay   /* values are size 'bs2' lengths (and remove earlier factor 2 */
491563fb871SSatish Balay   for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
492563fb871SSatish Balay   ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
493563fb871SSatish Balay   ierr = PetscFree(onodes);CHKERRQ(ierr);
494563fb871SSatish Balay   ierr = PetscFree(olengths);CHKERRQ(ierr);
495bc5ccf88SSatish Balay   }
496bc5ccf88SSatish Balay 
497bc5ccf88SSatish Balay   /* do sends:
498bc5ccf88SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
499bc5ccf88SSatish Balay          the ith processor
500bc5ccf88SSatish Balay   */
501*a77337e4SBarry Smith   ierr     = PetscMalloc((stash->n+1)*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)),&svalues);CHKERRQ(ierr);
502c1ac3661SBarry Smith   sindices = (PetscInt*)(svalues + bs2*stash->n);
503b0a32e0cSBarry Smith   ierr     = PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
504c1ac3661SBarry Smith   ierr     = PetscMalloc(2*size*sizeof(PetscInt),&startv);CHKERRQ(ierr);
505bc5ccf88SSatish Balay   starti   = startv + size;
506a2d1c673SSatish Balay   /* use 2 sends the first with all_a, the next with all_i and all_j */
507bc5ccf88SSatish Balay   startv[0]  = 0; starti[0] = 0;
508bc5ccf88SSatish Balay   for (i=1; i<size; i++) {
509563fb871SSatish Balay     startv[i] = startv[i-1] + nlengths[i-1];
510563fb871SSatish Balay     starti[i] = starti[i-1] + nlengths[i-1]*2;
511bc5ccf88SSatish Balay   }
51275cae7c1SHong Zhang 
51375cae7c1SHong Zhang   i     = 0;
5145bd3b8fbSHong Zhang   space = stash->space_head;
51575cae7c1SHong Zhang   while (space != PETSC_NULL){
51675cae7c1SHong Zhang     space_next = space->next;
5175bd3b8fbSHong Zhang     sp_idx = space->idx;
5185bd3b8fbSHong Zhang     sp_idy = space->idy;
5195bd3b8fbSHong Zhang     sp_val = space->val;
52075cae7c1SHong Zhang     for (l=0; l<space->local_used; l++){
521bc5ccf88SSatish Balay       j = owner[i];
522a2d1c673SSatish Balay       if (bs2 == 1) {
5235bd3b8fbSHong Zhang         svalues[startv[j]] = sp_val[l];
524a2d1c673SSatish Balay       } else {
525c1ac3661SBarry Smith         PetscInt     k;
52654f21887SBarry Smith         PetscScalar *buf1,*buf2;
5274c1ff481SSatish Balay         buf1 = svalues+bs2*startv[j];
528b087b6d6SSatish Balay         buf2 = space->val + bs2*l;
5294c1ff481SSatish Balay         for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
530a2d1c673SSatish Balay       }
5315bd3b8fbSHong Zhang       sindices[starti[j]]             = sp_idx[l];
5325bd3b8fbSHong Zhang       sindices[starti[j]+nlengths[j]] = sp_idy[l];
533bc5ccf88SSatish Balay       startv[j]++;
534bc5ccf88SSatish Balay       starti[j]++;
53575cae7c1SHong Zhang       i++;
53675cae7c1SHong Zhang     }
53775cae7c1SHong Zhang     space = space_next;
538bc5ccf88SSatish Balay   }
539bc5ccf88SSatish Balay   startv[0] = 0;
540563fb871SSatish Balay   for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];}
541e5d0e772SSatish Balay 
542bc5ccf88SSatish Balay   for (i=0,count=0; i<size; i++) {
543563fb871SSatish Balay     if (nprocs[i]) {
544563fb871SSatish Balay       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
545*a77337e4SBarry Smith       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
546bc5ccf88SSatish Balay     }
547b85c94c3SSatish Balay   }
5486cf91177SBarry Smith #if defined(PETSC_USE_INFO)
5491e2582c4SBarry Smith   ierr = PetscInfo1(mat,"No of messages: %d \n",nsends);CHKERRQ(ierr);
550e5d0e772SSatish Balay   for (i=0; i<size; i++) {
551e5d0e772SSatish Balay     if (nprocs[i]) {
552*a77337e4SBarry Smith       ierr = PetscInfo2(mat,"Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(PetscScalar)+2*sizeof(PetscInt));CHKERRQ(ierr);
553e5d0e772SSatish Balay     }
554e5d0e772SSatish Balay   }
555e5d0e772SSatish Balay #endif
556606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
557606d414cSSatish Balay   ierr = PetscFree(startv);CHKERRQ(ierr);
558a2d1c673SSatish Balay   /* This memory is reused in scatter end  for a different purpose*/
559a2d1c673SSatish Balay   for (i=0; i<2*size; i++) nprocs[i] = -1;
560a2d1c673SSatish Balay   stash->nprocs = nprocs;
561a2d1c673SSatish Balay 
562563fb871SSatish Balay   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
563563fb871SSatish Balay   ierr  = PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
564563fb871SSatish Balay 
565563fb871SSatish Balay   for (i=0; i<nreceives; i++) {
566563fb871SSatish Balay     recv_waits[2*i]   = recv_waits1[i];
567563fb871SSatish Balay     recv_waits[2*i+1] = recv_waits2[i];
568563fb871SSatish Balay   }
569563fb871SSatish Balay   stash->recv_waits = recv_waits;
570563fb871SSatish Balay   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
571563fb871SSatish Balay   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
572563fb871SSatish Balay 
573bc5ccf88SSatish Balay   stash->svalues    = svalues;    stash->rvalues     = rvalues;
574563fb871SSatish Balay   stash->rindices   = rindices;   stash->send_waits  = send_waits;
575bc5ccf88SSatish Balay   stash->nsends     = nsends;     stash->nrecvs      = nreceives;
576bc5ccf88SSatish Balay   PetscFunctionReturn(0);
577bc5ccf88SSatish Balay }
578bc5ccf88SSatish Balay 
579a2d1c673SSatish Balay /*
5808798bf22SSatish Balay    MatStashScatterGetMesg_Private - This function waits on the receives posted
5818798bf22SSatish Balay    in the function MatStashScatterBegin_Private() and returns one message at
5824c1ff481SSatish Balay    a time to the calling function. If no messages are left, it indicates this
5834c1ff481SSatish Balay    by setting flg = 0, else it sets flg = 1.
5844c1ff481SSatish Balay 
5854c1ff481SSatish Balay    Input Parameters:
5864c1ff481SSatish Balay    stash - the stash
5874c1ff481SSatish Balay 
5884c1ff481SSatish Balay    Output Parameters:
5894c1ff481SSatish Balay    nvals - the number of entries in the current message.
5904c1ff481SSatish Balay    rows  - an array of row indices (or blocked indices) corresponding to the values
5914c1ff481SSatish Balay    cols  - an array of columnindices (or blocked indices) corresponding to the values
5924c1ff481SSatish Balay    vals  - the values
5934c1ff481SSatish Balay    flg   - 0 indicates no more message left, and the current call has no values associated.
5944c1ff481SSatish Balay            1 indicates that the current call successfully received a message, and the
5954c1ff481SSatish Balay              other output parameters nvals,rows,cols,vals are set appropriately.
596a2d1c673SSatish Balay */
5974a2ae208SSatish Balay #undef __FUNCT__
5984a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterGetMesg_Private"
59954f21887SBarry Smith PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,PetscScalar **vals,PetscInt *flg)
600bc5ccf88SSatish Balay {
6016849ba73SBarry Smith   PetscErrorCode ierr;
602fe09c992SBarry Smith   PetscMPIInt    i,*flg_v,i1,i2;
603fe09c992SBarry Smith   PetscInt       bs2;
604a2d1c673SSatish Balay   MPI_Status     recv_status;
605b0a32e0cSBarry Smith   PetscTruth     match_found = PETSC_FALSE;
606bc5ccf88SSatish Balay 
607bc5ccf88SSatish Balay   PetscFunctionBegin;
608bc5ccf88SSatish Balay 
609a2d1c673SSatish Balay   *flg = 0; /* When a message is discovered this is reset to 1 */
610a2d1c673SSatish Balay   /* Return if no more messages to process */
611a2d1c673SSatish Balay   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
612a2d1c673SSatish Balay 
613a2d1c673SSatish Balay   flg_v = stash->nprocs;
6144c1ff481SSatish Balay   bs2   = stash->bs*stash->bs;
615a2d1c673SSatish Balay   /* If a matching pair of receieves are found, process them, and return the data to
616a2d1c673SSatish Balay      the calling function. Until then keep receiving messages */
617a2d1c673SSatish Balay   while (!match_found) {
618a2d1c673SSatish Balay     ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
619a2d1c673SSatish Balay     /* Now pack the received message into a structure which is useable by others */
620a2d1c673SSatish Balay     if (i % 2) {
621*a77337e4SBarry Smith       ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr);
622c1dc657dSBarry Smith       flg_v[2*recv_status.MPI_SOURCE] = i/2;
623a2d1c673SSatish Balay       *nvals = *nvals/bs2;
624563fb871SSatish Balay     } else {
625563fb871SSatish Balay       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
626563fb871SSatish Balay       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
627563fb871SSatish Balay       *nvals = *nvals/2; /* This message has both row indices and col indices */
628bc5ccf88SSatish Balay     }
629a2d1c673SSatish Balay 
630a2d1c673SSatish Balay     /* Check if we have both the messages from this proc */
631c1dc657dSBarry Smith     i1 = flg_v[2*recv_status.MPI_SOURCE];
632c1dc657dSBarry Smith     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
633a2d1c673SSatish Balay     if (i1 != -1 && i2 != -1) {
634563fb871SSatish Balay       *rows       = stash->rindices[i2];
635a2d1c673SSatish Balay       *cols       = *rows + *nvals;
636563fb871SSatish Balay       *vals       = stash->rvalues[i1];
637a2d1c673SSatish Balay       *flg        = 1;
638a2d1c673SSatish Balay       stash->nprocessed ++;
63935d8aa7fSBarry Smith       match_found = PETSC_TRUE;
640bc5ccf88SSatish Balay     }
641bc5ccf88SSatish Balay   }
642bc5ccf88SSatish Balay   PetscFunctionReturn(0);
643bc5ccf88SSatish Balay }
644