xref: /petsc/src/mat/utils/matstash.c (revision 827404605a15659325321809d292aa22a79cdf6e)
1be1d678aSKris Buschelman #define PETSCMAT_DLL
22d5177cdSBarry Smith 
370f55243SBarry Smith #include "src/mat/matimpl.h"
475cae7c1SHong Zhang #include "src/mat/utils/matstashspace.h"
55bd3b8fbSHong Zhang 
63eda8832SBarry Smith /*
70ae3cd3bSBarry Smith        The input to the stash is ALWAYS in MatScalar precision, and the
80ae3cd3bSBarry Smith     internal storage and output is also in MatScalar.
93eda8832SBarry Smith */
10bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE   10000
114c1ff481SSatish Balay 
129417f4adSLois Curfman McInnes /*
138798bf22SSatish Balay   MatStashCreate_Private - Creates a stash,currently used for all the parallel
144c1ff481SSatish Balay   matrix implementations. The stash is where elements of a matrix destined
154c1ff481SSatish Balay   to be stored on other processors are kept until matrix assembly is done.
169417f4adSLois Curfman McInnes 
174c1ff481SSatish Balay   This is a simple minded stash. Simply adds entries to end of stash.
184c1ff481SSatish Balay 
194c1ff481SSatish Balay   Input Parameters:
204c1ff481SSatish Balay   comm - communicator, required for scatters.
214c1ff481SSatish Balay   bs   - stash block size. used when stashing blocks of values
224c1ff481SSatish Balay 
234c1ff481SSatish Balay   Output Parameters:
244c1ff481SSatish Balay   stash    - the newly created stash
259417f4adSLois Curfman McInnes */
264a2ae208SSatish Balay #undef __FUNCT__
274a2ae208SSatish Balay #define __FUNCT__ "MatStashCreate_Private"
28c1ac3661SBarry Smith PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
299417f4adSLois Curfman McInnes {
30dfbe8321SBarry Smith   PetscErrorCode ierr;
31c1ac3661SBarry Smith   PetscInt       max,*opt,nopt;
32f1af5d2fSBarry Smith   PetscTruth     flg;
33bc5ccf88SSatish Balay 
343a40ed3dSBarry Smith   PetscFunctionBegin;
35bc5ccf88SSatish Balay   /* Require 2 tags,get the second using PetscCommGetNewTag() */
36752ec6e0SSatish Balay   stash->comm = comm;
37752ec6e0SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
38a2d1c673SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
39a2d1c673SSatish Balay   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
40a2d1c673SSatish Balay   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
41bc5ccf88SSatish Balay 
42434d7ff9SSatish Balay   nopt = stash->size;
43d7d82daaSBarry Smith   ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr);
44b0a32e0cSBarry Smith   ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
45434d7ff9SSatish Balay   if (flg) {
46434d7ff9SSatish Balay     if (nopt == 1)                max = opt[0];
47434d7ff9SSatish Balay     else if (nopt == stash->size) max = opt[stash->rank];
48434d7ff9SSatish Balay     else if (stash->rank < nopt)  max = opt[stash->rank];
49f4ab19daSSatish Balay     else                          max = 0; /* Use default */
50434d7ff9SSatish Balay     stash->umax = max;
51434d7ff9SSatish Balay   } else {
52434d7ff9SSatish Balay     stash->umax = 0;
53434d7ff9SSatish Balay   }
54606d414cSSatish Balay   ierr = PetscFree(opt);CHKERRQ(ierr);
554c1ff481SSatish Balay   if (bs <= 0) bs = 1;
56a2d1c673SSatish Balay 
574c1ff481SSatish Balay   stash->bs       = bs;
589417f4adSLois Curfman McInnes   stash->nmax     = 0;
59434d7ff9SSatish Balay   stash->oldnmax  = 0;
609417f4adSLois Curfman McInnes   stash->n        = 0;
614c1ff481SSatish Balay   stash->reallocs = -1;
6275cae7c1SHong Zhang   stash->space_head = 0;
6375cae7c1SHong Zhang   stash->space      = 0;
649417f4adSLois Curfman McInnes 
65bc5ccf88SSatish Balay   stash->send_waits  = 0;
66bc5ccf88SSatish Balay   stash->recv_waits  = 0;
67a2d1c673SSatish Balay   stash->send_status = 0;
68bc5ccf88SSatish Balay   stash->nsends      = 0;
69bc5ccf88SSatish Balay   stash->nrecvs      = 0;
70bc5ccf88SSatish Balay   stash->svalues     = 0;
71bc5ccf88SSatish Balay   stash->rvalues     = 0;
72563fb871SSatish Balay   stash->rindices    = 0;
73a2d1c673SSatish Balay   stash->nprocs      = 0;
74a2d1c673SSatish Balay   stash->nprocessed  = 0;
753a40ed3dSBarry Smith   PetscFunctionReturn(0);
769417f4adSLois Curfman McInnes }
779417f4adSLois Curfman McInnes 
784c1ff481SSatish Balay /*
798798bf22SSatish Balay    MatStashDestroy_Private - Destroy the stash
804c1ff481SSatish Balay */
814a2ae208SSatish Balay #undef __FUNCT__
824a2ae208SSatish Balay #define __FUNCT__ "MatStashDestroy_Private"
83dfbe8321SBarry Smith PetscErrorCode MatStashDestroy_Private(MatStash *stash)
849417f4adSLois Curfman McInnes {
85dfbe8321SBarry Smith   PetscErrorCode ierr;
86a2d1c673SSatish Balay 
87bc5ccf88SSatish Balay   PetscFunctionBegin;
8875cae7c1SHong Zhang   if (stash->space_head){
8975cae7c1SHong Zhang     ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr);
9075cae7c1SHong Zhang     stash->space_head = 0;
91*82740460SHong Zhang     stash->space      = 0;
9275cae7c1SHong Zhang   }
93bc5ccf88SSatish Balay   PetscFunctionReturn(0);
94bc5ccf88SSatish Balay }
95bc5ccf88SSatish Balay 
964c1ff481SSatish Balay /*
978798bf22SSatish Balay    MatStashScatterEnd_Private - This is called as the fial stage of
984c1ff481SSatish Balay    scatter. The final stages of messagepassing is done here, and
994c1ff481SSatish Balay    all the memory used for messagepassing is cleanedu up. This
1004c1ff481SSatish Balay    routine also resets the stash, and deallocates the memory used
1014c1ff481SSatish Balay    for the stash. It also keeps track of the current memory usage
1024c1ff481SSatish Balay    so that the same value can be used the next time through.
1034c1ff481SSatish Balay */
1044a2ae208SSatish Balay #undef __FUNCT__
1054a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterEnd_Private"
106dfbe8321SBarry Smith PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
107bc5ccf88SSatish Balay {
1086849ba73SBarry Smith   PetscErrorCode ierr;
1095bd3b8fbSHong Zhang   PetscInt       nsends=stash->nsends,bs2,oldnmax;
110a2d1c673SSatish Balay   MPI_Status     *send_status;
111a2d1c673SSatish Balay 
1123a40ed3dSBarry Smith   PetscFunctionBegin;
113a2d1c673SSatish Balay   /* wait on sends */
114a2d1c673SSatish Balay   if (nsends) {
11582502324SSatish Balay     ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
116a2d1c673SSatish Balay     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
117606d414cSSatish Balay     ierr = PetscFree(send_status);CHKERRQ(ierr);
118a2d1c673SSatish Balay   }
119a2d1c673SSatish Balay 
120c0c58ca7SSatish Balay   /* Now update nmaxold to be app 10% more than max n used, this way the
121434d7ff9SSatish Balay      wastage of space is reduced the next time this stash is used.
122434d7ff9SSatish Balay      Also update the oldmax, only if it increases */
123b9b97703SBarry Smith   if (stash->n) {
12494b769a5SSatish Balay     bs2      = stash->bs*stash->bs;
1258a9378f0SSatish Balay     oldnmax  = ((int)(stash->n * 1.1) + 5)*bs2;
126434d7ff9SSatish Balay     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
127b9b97703SBarry Smith   }
128434d7ff9SSatish Balay 
129d07ff455SSatish Balay   stash->nmax       = 0;
130d07ff455SSatish Balay   stash->n          = 0;
1314c1ff481SSatish Balay   stash->reallocs   = -1;
132a2d1c673SSatish Balay   stash->nprocessed = 0;
13375cae7c1SHong Zhang   if (stash->space_head){
13475cae7c1SHong Zhang     ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr);
13575cae7c1SHong Zhang     stash->space_head = 0;
136*82740460SHong Zhang     stash->space      = 0;
13775cae7c1SHong Zhang   }
138606d414cSSatish Balay   if (stash->send_waits) {
139606d414cSSatish Balay     ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
140606d414cSSatish Balay     stash->send_waits = 0;
141606d414cSSatish Balay   }
142606d414cSSatish Balay   if (stash->recv_waits) {
143606d414cSSatish Balay     ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
144606d414cSSatish Balay     stash->recv_waits = 0;
145606d414cSSatish Balay   }
146606d414cSSatish Balay   if (stash->svalues) {
147606d414cSSatish Balay     ierr = PetscFree(stash->svalues);CHKERRQ(ierr);
148606d414cSSatish Balay     stash->svalues = 0;
149606d414cSSatish Balay   }
150606d414cSSatish Balay   if (stash->rvalues) {
151606d414cSSatish Balay     ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
152606d414cSSatish Balay     stash->rvalues = 0;
153606d414cSSatish Balay   }
154563fb871SSatish Balay   if (stash->rindices) {
155563fb871SSatish Balay     ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
156563fb871SSatish Balay     stash->rindices = 0;
157563fb871SSatish Balay   }
158606d414cSSatish Balay   if (stash->nprocs) {
159b22afee1SSatish Balay     ierr = PetscFree(stash->nprocs);CHKERRQ(ierr);
160606d414cSSatish Balay     stash->nprocs = 0;
161606d414cSSatish Balay   }
1623a40ed3dSBarry Smith   PetscFunctionReturn(0);
1639417f4adSLois Curfman McInnes }
1649417f4adSLois Curfman McInnes 
1654c1ff481SSatish Balay /*
1668798bf22SSatish Balay    MatStashGetInfo_Private - Gets the relavant statistics of the stash
1674c1ff481SSatish Balay 
1684c1ff481SSatish Balay    Input Parameters:
1694c1ff481SSatish Balay    stash    - the stash
17094b769a5SSatish Balay    nstash   - the size of the stash. Indicates the number of values stored.
1714c1ff481SSatish Balay    reallocs - the number of additional mallocs incurred.
1724c1ff481SSatish Balay 
1734c1ff481SSatish Balay */
1744a2ae208SSatish Balay #undef __FUNCT__
1754a2ae208SSatish Balay #define __FUNCT__ "MatStashGetInfo_Private"
176c1ac3661SBarry Smith PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
17797530c3fSBarry Smith {
178c1ac3661SBarry Smith   PetscInt bs2 = stash->bs*stash->bs;
17994b769a5SSatish Balay 
1803a40ed3dSBarry Smith   PetscFunctionBegin;
1811ecfd215SBarry Smith   if (nstash) *nstash   = stash->n*bs2;
1821ecfd215SBarry Smith   if (reallocs) {
183434d7ff9SSatish Balay     if (stash->reallocs < 0) *reallocs = 0;
184434d7ff9SSatish Balay     else                     *reallocs = stash->reallocs;
1851ecfd215SBarry Smith   }
186bc5ccf88SSatish Balay   PetscFunctionReturn(0);
187bc5ccf88SSatish Balay }
1884c1ff481SSatish Balay 
1894c1ff481SSatish Balay /*
1908798bf22SSatish Balay    MatStashSetInitialSize_Private - Sets the initial size of the stash
1914c1ff481SSatish Balay 
1924c1ff481SSatish Balay    Input Parameters:
1934c1ff481SSatish Balay    stash  - the stash
1944c1ff481SSatish Balay    max    - the value that is used as the max size of the stash.
1954c1ff481SSatish Balay             this value is used while allocating memory.
1964c1ff481SSatish Balay */
1974a2ae208SSatish Balay #undef __FUNCT__
1984a2ae208SSatish Balay #define __FUNCT__ "MatStashSetInitialSize_Private"
199c1ac3661SBarry Smith PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
200bc5ccf88SSatish Balay {
201bc5ccf88SSatish Balay   PetscFunctionBegin;
202434d7ff9SSatish Balay   stash->umax = max;
2033a40ed3dSBarry Smith   PetscFunctionReturn(0);
20497530c3fSBarry Smith }
20597530c3fSBarry Smith 
2068798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called
2074c1ff481SSatish Balay    when the space in the stash is not sufficient to add the new values
2084c1ff481SSatish Balay    being inserted into the stash.
2094c1ff481SSatish Balay 
2104c1ff481SSatish Balay    Input Parameters:
2114c1ff481SSatish Balay    stash - the stash
2124c1ff481SSatish Balay    incr  - the minimum increase requested
2134c1ff481SSatish Balay 
2144c1ff481SSatish Balay    Notes:
2154c1ff481SSatish Balay    This routine doubles the currently used memory.
2164c1ff481SSatish Balay  */
2174a2ae208SSatish Balay #undef __FUNCT__
2184a2ae208SSatish Balay #define __FUNCT__ "MatStashExpand_Private"
219c1ac3661SBarry Smith static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
2209417f4adSLois Curfman McInnes {
2216849ba73SBarry Smith   PetscErrorCode ierr;
2225bd3b8fbSHong Zhang   PetscInt       newnmax,bs2= stash->bs*stash->bs;
2239417f4adSLois Curfman McInnes 
2243a40ed3dSBarry Smith   PetscFunctionBegin;
2259417f4adSLois Curfman McInnes   /* allocate a larger stash */
226c481ceb5SSatish Balay   if (!stash->oldnmax && !stash->nmax) { /* new stash */
227434d7ff9SSatish Balay     if (stash->umax)                  newnmax = stash->umax/bs2;
228434d7ff9SSatish Balay     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
229c481ceb5SSatish Balay   } else if (!stash->nmax) { /* resuing stash */
230434d7ff9SSatish Balay     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
231434d7ff9SSatish Balay     else                              newnmax = stash->oldnmax/bs2;
232434d7ff9SSatish Balay   } else                              newnmax = stash->nmax*2;
2334c1ff481SSatish Balay   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
234d07ff455SSatish Balay 
23575cae7c1SHong Zhang   /* Get a MatStashSpace and attach it to stash */
23675cae7c1SHong Zhang   if (!stash->nmax) { /* new stash or resuing stash->oldnmax */
23775cae7c1SHong Zhang     ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space_head);CHKERRQ(ierr);
23875cae7c1SHong Zhang     stash->space = stash->space_head;
23975cae7c1SHong Zhang   } else {
24075cae7c1SHong Zhang     ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr);
24175cae7c1SHong Zhang   }
242bc5ccf88SSatish Balay   stash->reallocs++;
24375cae7c1SHong Zhang   stash->nmax = newnmax;
244bc5ccf88SSatish Balay   PetscFunctionReturn(0);
245bc5ccf88SSatish Balay }
246bc5ccf88SSatish Balay /*
2478798bf22SSatish Balay   MatStashValuesRow_Private - inserts values into the stash. This function
2484c1ff481SSatish Balay   expects the values to be roworiented. Multiple columns belong to the same row
2494c1ff481SSatish Balay   can be inserted with a single call to this function.
2504c1ff481SSatish Balay 
2514c1ff481SSatish Balay   Input Parameters:
2524c1ff481SSatish Balay   stash  - the stash
2534c1ff481SSatish Balay   row    - the global row correspoiding to the values
2544c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
2554c1ff481SSatish Balay   idxn   - the global column indices corresponding to each of the values.
2564c1ff481SSatish Balay   values - the values inserted
257bc5ccf88SSatish Balay */
2584a2ae208SSatish Balay #undef __FUNCT__
2594a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRow_Private"
260c1ac3661SBarry Smith PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[])
261bc5ccf88SSatish Balay {
262dfbe8321SBarry Smith   PetscErrorCode     ierr;
26375cae7c1SHong Zhang   PetscInt           i,k;
26475cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
265bc5ccf88SSatish Balay 
266bc5ccf88SSatish Balay   PetscFunctionBegin;
2674c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
26875cae7c1SHong Zhang   if (!space || space->local_remaining < n){
2698798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
2709417f4adSLois Curfman McInnes   }
27175cae7c1SHong Zhang   space = stash->space;
27275cae7c1SHong Zhang   k     = space->local_used;
2734c1ff481SSatish Balay   for (i=0; i<n; i++) {
27475cae7c1SHong Zhang     space->idx[k] = row;
27575cae7c1SHong Zhang     space->idy[k] = idxn[i];
27675cae7c1SHong Zhang     space->val[k] = values[i];
27775cae7c1SHong Zhang     k++;
2789417f4adSLois Curfman McInnes   }
2795bd3b8fbSHong Zhang   stash->n               += n;
28075cae7c1SHong Zhang   space->local_used      += n;
28175cae7c1SHong Zhang   space->local_remaining -= n;
282a2d1c673SSatish Balay   PetscFunctionReturn(0);
283a2d1c673SSatish Balay }
28475cae7c1SHong Zhang 
2854c1ff481SSatish Balay /*
2868798bf22SSatish Balay   MatStashValuesCol_Private - inserts values into the stash. This function
2874c1ff481SSatish Balay   expects the values to be columnoriented. Multiple columns belong to the same row
2884c1ff481SSatish Balay   can be inserted with a single call to this function.
289a2d1c673SSatish Balay 
2904c1ff481SSatish Balay   Input Parameters:
2914c1ff481SSatish Balay   stash   - the stash
2924c1ff481SSatish Balay   row     - the global row correspoiding to the values
2934c1ff481SSatish Balay   n       - the number of elements inserted. All elements belong to the above row.
2944c1ff481SSatish Balay   idxn    - the global column indices corresponding to each of the values.
2954c1ff481SSatish Balay   values  - the values inserted
2964c1ff481SSatish Balay   stepval - the consecutive values are sepated by a distance of stepval.
2974c1ff481SSatish Balay             this happens because the input is columnoriented.
2984c1ff481SSatish Balay */
2994a2ae208SSatish Balay #undef __FUNCT__
3004a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesCol_Private"
301c1ac3661SBarry Smith PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt stepval)
302a2d1c673SSatish Balay {
303dfbe8321SBarry Smith   PetscErrorCode     ierr;
30475cae7c1SHong Zhang   PetscInt           i,k;
30575cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
306a2d1c673SSatish Balay 
3074c1ff481SSatish Balay   PetscFunctionBegin;
3084c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
30975cae7c1SHong Zhang   if (!space || space->local_remaining < n){
3108798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
3114c1ff481SSatish Balay   }
31275cae7c1SHong Zhang   space = stash->space;
31375cae7c1SHong Zhang   k = space->local_used;
3144c1ff481SSatish Balay   for (i=0; i<n; i++) {
31575cae7c1SHong Zhang     space->idx[k] = row;
31675cae7c1SHong Zhang     space->idy[k] = idxn[i];
31775cae7c1SHong Zhang     space->val[k] = values[i*stepval];
31875cae7c1SHong Zhang     k++;
3194c1ff481SSatish Balay   }
3205bd3b8fbSHong Zhang   stash->n               += n;
32175cae7c1SHong Zhang   space->local_used      += n;
32275cae7c1SHong Zhang   space->local_remaining -= n;
3234c1ff481SSatish Balay   PetscFunctionReturn(0);
3244c1ff481SSatish Balay }
3254c1ff481SSatish Balay 
3264c1ff481SSatish Balay /*
3278798bf22SSatish Balay   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
3284c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3294c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3304c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3314c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3324c1ff481SSatish Balay 
3334c1ff481SSatish Balay   Input Parameters:
3344c1ff481SSatish Balay   stash  - the stash
3354c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3364c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3374c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3384c1ff481SSatish Balay            values. Each block is of size bs*bs.
3394c1ff481SSatish Balay   values - the values inserted
3404c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3414c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3424c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3434c1ff481SSatish Balay */
3444a2ae208SSatish Balay #undef __FUNCT__
3454a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRowBlocked_Private"
346c1ac3661SBarry Smith PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
3474c1ff481SSatish Balay {
348dfbe8321SBarry Smith   PetscErrorCode     ierr;
34975cae7c1SHong Zhang   PetscInt           i,j,k,bs2,bs=stash->bs,l;
350f15d580aSBarry Smith   const MatScalar    *vals;
351f15d580aSBarry Smith   MatScalar          *array;
35275cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
353a2d1c673SSatish Balay 
354a2d1c673SSatish Balay   PetscFunctionBegin;
35575cae7c1SHong Zhang   if (!space || space->local_remaining < n){
3568798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
357a2d1c673SSatish Balay   }
35875cae7c1SHong Zhang   space = stash->space;
35975cae7c1SHong Zhang   l     = space->local_used;
36075cae7c1SHong Zhang   bs2   = bs*bs;
3614c1ff481SSatish Balay   for (i=0; i<n; i++) {
36275cae7c1SHong Zhang     space->idx[l] = row;
36375cae7c1SHong Zhang     space->idy[l] = idxn[i];
36475cae7c1SHong Zhang     /* Now copy over the block of values. Store the values column oriented.
36575cae7c1SHong Zhang        This enables inserting multiple blocks belonging to a row with a single
36675cae7c1SHong Zhang        funtion call */
36775cae7c1SHong Zhang     array = space->val + bs2*l;
36875cae7c1SHong Zhang     vals  = values + idx*bs2*n + bs*i;
36975cae7c1SHong Zhang     for (j=0; j<bs; j++) {
37075cae7c1SHong Zhang       for (k=0; k<bs; k++) array[k*bs] = vals[k];
37175cae7c1SHong Zhang       array++;
37275cae7c1SHong Zhang       vals  += cmax*bs;
37375cae7c1SHong Zhang     }
37475cae7c1SHong Zhang     l++;
375a2d1c673SSatish Balay   }
3765bd3b8fbSHong Zhang   stash->n               += n;
37775cae7c1SHong Zhang   space->local_used      += n;
37875cae7c1SHong Zhang   space->local_remaining -= n;
3794c1ff481SSatish Balay   PetscFunctionReturn(0);
3804c1ff481SSatish Balay }
3814c1ff481SSatish Balay 
3824c1ff481SSatish Balay /*
3838798bf22SSatish Balay   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
3844c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3854c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3864c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3874c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3884c1ff481SSatish Balay 
3894c1ff481SSatish Balay   Input Parameters:
3904c1ff481SSatish Balay   stash  - the stash
3914c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3924c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3934c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3944c1ff481SSatish Balay            values. Each block is of size bs*bs.
3954c1ff481SSatish Balay   values - the values inserted
3964c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3974c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3984c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3994c1ff481SSatish Balay */
4004a2ae208SSatish Balay #undef __FUNCT__
4014a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesColBlocked_Private"
402c1ac3661SBarry Smith PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
4034c1ff481SSatish Balay {
404dfbe8321SBarry Smith   PetscErrorCode  ierr;
40575cae7c1SHong Zhang   PetscInt        i,j,k,bs2,bs=stash->bs,l;
406f15d580aSBarry Smith   const MatScalar *vals;
407f15d580aSBarry Smith   MatScalar       *array;
40875cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
4094c1ff481SSatish Balay 
4104c1ff481SSatish Balay   PetscFunctionBegin;
41175cae7c1SHong Zhang   if (!space || space->local_remaining < n){
4128798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
4134c1ff481SSatish Balay   }
41475cae7c1SHong Zhang   space = stash->space;
41575cae7c1SHong Zhang   l     = space->local_used;
41675cae7c1SHong Zhang   bs2   = bs*bs;
4174c1ff481SSatish Balay   for (i=0; i<n; i++) {
41875cae7c1SHong Zhang     space->idx[l] = row;
41975cae7c1SHong Zhang     space->idy[l] = idxn[i];
42075cae7c1SHong Zhang     /* Now copy over the block of values. Store the values column oriented.
42175cae7c1SHong Zhang      This enables inserting multiple blocks belonging to a row with a single
42275cae7c1SHong Zhang      funtion call */
42375cae7c1SHong Zhang     array = space->val + bs2*l;
42475cae7c1SHong Zhang     vals  = values + idx*bs2*n + bs*i;
42575cae7c1SHong Zhang     for (j=0; j<bs; j++) {
42675cae7c1SHong Zhang       for (k=0; k<bs; k++) {array[k] = vals[k];}
42775cae7c1SHong Zhang       array += bs;
42875cae7c1SHong Zhang       vals  += rmax*bs;
42975cae7c1SHong Zhang     }
4305bd3b8fbSHong Zhang     l++;
431a2d1c673SSatish Balay   }
4325bd3b8fbSHong Zhang   stash->n               += n;
43375cae7c1SHong Zhang   space->local_used      += n;
43475cae7c1SHong Zhang   space->local_remaining -= n;
4353a40ed3dSBarry Smith   PetscFunctionReturn(0);
4369417f4adSLois Curfman McInnes }
4374c1ff481SSatish Balay /*
4388798bf22SSatish Balay   MatStashScatterBegin_Private - Initiates the transfer of values to the
4394c1ff481SSatish Balay   correct owners. This function goes through the stash, and check the
4404c1ff481SSatish Balay   owners of each stashed value, and sends the values off to the owner
4414c1ff481SSatish Balay   processors.
442bc5ccf88SSatish Balay 
4434c1ff481SSatish Balay   Input Parameters:
4444c1ff481SSatish Balay   stash  - the stash
4454c1ff481SSatish Balay   owners - an array of size 'no-of-procs' which gives the ownership range
4464c1ff481SSatish Balay            for each node.
4474c1ff481SSatish Balay 
4484c1ff481SSatish Balay   Notes: The 'owners' array in the cased of the blocked-stash has the
4494c1ff481SSatish Balay   ranges specified blocked global indices, and for the regular stash in
4504c1ff481SSatish Balay   the proper global indices.
4514c1ff481SSatish Balay */
4524a2ae208SSatish Balay #undef __FUNCT__
4534a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterBegin_Private"
454c1ac3661SBarry Smith PetscErrorCode MatStashScatterBegin_Private(MatStash *stash,PetscInt *owners)
455bc5ccf88SSatish Balay {
456c1ac3661SBarry Smith   PetscInt       *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
457fe09c992SBarry Smith   PetscInt       size=stash->size,nsends;
4586849ba73SBarry Smith   PetscErrorCode ierr;
45975cae7c1SHong Zhang   PetscInt       count,*sindices,**rindices,i,j,idx,lastidx,l;
460563fb871SSatish Balay   MatScalar      **rvalues,*svalues;
461bc5ccf88SSatish Balay   MPI_Comm       comm = stash->comm;
462563fb871SSatish Balay   MPI_Request    *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
463fe09c992SBarry Smith   PetscMPIInt    *nprocs,*nlengths,nreceives;
4645bd3b8fbSHong Zhang   PetscInt       *sp_idx,*sp_idy;
4655bd3b8fbSHong Zhang   MatScalar      *sp_val;
4665bd3b8fbSHong Zhang   PetscMatStashSpace space,space_next;
467bc5ccf88SSatish Balay 
468bc5ccf88SSatish Balay   PetscFunctionBegin;
4694c1ff481SSatish Balay   bs2 = stash->bs*stash->bs;
47075cae7c1SHong Zhang 
471bc5ccf88SSatish Balay   /*  first count number of contributors to each processor */
472fe09c992SBarry Smith   ierr  = PetscMalloc(2*size*sizeof(PetscMPIInt),&nprocs);CHKERRQ(ierr);
473fe09c992SBarry Smith   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscMPIInt));CHKERRQ(ierr);
474c1ac3661SBarry Smith   ierr  = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);
475a2d1c673SSatish Balay 
476563fb871SSatish Balay   nlengths = nprocs+size;
47775cae7c1SHong Zhang   i = j    = 0;
4787357eb19SBarry Smith   lastidx  = -1;
4795bd3b8fbSHong Zhang   space    = stash->space_head;
48075cae7c1SHong Zhang   while (space != PETSC_NULL){
48175cae7c1SHong Zhang     space_next = space->next;
4825bd3b8fbSHong Zhang     sp_idx     = space->idx;
48375cae7c1SHong Zhang     for (l=0; l<space->local_used; l++){
4847357eb19SBarry Smith       /* if indices are NOT locally sorted, need to start search at the beginning */
4855bd3b8fbSHong Zhang       if (lastidx > (idx = sp_idx[l])) j = 0;
4867357eb19SBarry Smith       lastidx = idx;
4877357eb19SBarry Smith       for (; j<size; j++) {
4884c1ff481SSatish Balay         if (idx >= owners[j] && idx < owners[j+1]) {
489563fb871SSatish Balay           nlengths[j]++; owner[i] = j; break;
490bc5ccf88SSatish Balay         }
491bc5ccf88SSatish Balay       }
49275cae7c1SHong Zhang       i++;
49375cae7c1SHong Zhang     }
49475cae7c1SHong Zhang     space = space_next;
495bc5ccf88SSatish Balay   }
496563fb871SSatish Balay   /* Now check what procs get messages - and compute nsends. */
497563fb871SSatish Balay   for (i=0, nsends=0 ; i<size; i++) {
498563fb871SSatish Balay     if (nlengths[i]) { nprocs[i] = 1; nsends ++;}
499563fb871SSatish Balay   }
500bc5ccf88SSatish Balay 
501563fb871SSatish Balay   { int  *onodes,*olengths;
502563fb871SSatish Balay   /* Determine the number of messages to expect, their lengths, from from-ids */
503563fb871SSatish Balay   ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr);
504563fb871SSatish Balay   ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
505563fb871SSatish Balay   /* since clubbing row,col - lengths are multiplied by 2 */
506563fb871SSatish Balay   for (i=0; i<nreceives; i++) olengths[i] *=2;
507563fb871SSatish Balay   ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
508563fb871SSatish Balay   /* values are size 'bs2' lengths (and remove earlier factor 2 */
509563fb871SSatish Balay   for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
510563fb871SSatish Balay   ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
511563fb871SSatish Balay   ierr = PetscFree(onodes);CHKERRQ(ierr);
512563fb871SSatish Balay   ierr = PetscFree(olengths);CHKERRQ(ierr);
513bc5ccf88SSatish Balay   }
514bc5ccf88SSatish Balay 
515bc5ccf88SSatish Balay   /* do sends:
516bc5ccf88SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
517bc5ccf88SSatish Balay          the ith processor
518bc5ccf88SSatish Balay   */
519c1ac3661SBarry Smith   ierr     = PetscMalloc((stash->n+1)*(bs2*sizeof(MatScalar)+2*sizeof(PetscInt)),&svalues);CHKERRQ(ierr);
520c1ac3661SBarry Smith   sindices = (PetscInt*)(svalues + bs2*stash->n);
521b0a32e0cSBarry Smith   ierr     = PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
522c1ac3661SBarry Smith   ierr     = PetscMalloc(2*size*sizeof(PetscInt),&startv);CHKERRQ(ierr);
523bc5ccf88SSatish Balay   starti   = startv + size;
524a2d1c673SSatish Balay   /* use 2 sends the first with all_a, the next with all_i and all_j */
525bc5ccf88SSatish Balay   startv[0]  = 0; starti[0] = 0;
526bc5ccf88SSatish Balay   for (i=1; i<size; i++) {
527563fb871SSatish Balay     startv[i] = startv[i-1] + nlengths[i-1];
528563fb871SSatish Balay     starti[i] = starti[i-1] + nlengths[i-1]*2;
529bc5ccf88SSatish Balay   }
53075cae7c1SHong Zhang 
53175cae7c1SHong Zhang   i     = 0;
5325bd3b8fbSHong Zhang   space = stash->space_head;
53375cae7c1SHong Zhang   while (space != PETSC_NULL){
53475cae7c1SHong Zhang     space_next = space->next;
5355bd3b8fbSHong Zhang     sp_idx = space->idx;
5365bd3b8fbSHong Zhang     sp_idy = space->idy;
5375bd3b8fbSHong Zhang     sp_val = space->val;
53875cae7c1SHong Zhang     for (l=0; l<space->local_used; l++){
539bc5ccf88SSatish Balay       j = owner[i];
540a2d1c673SSatish Balay       if (bs2 == 1) {
5415bd3b8fbSHong Zhang         svalues[startv[j]] = sp_val[l];
542a2d1c673SSatish Balay       } else {
543c1ac3661SBarry Smith         PetscInt  k;
5443eda8832SBarry Smith         MatScalar *buf1,*buf2;
5454c1ff481SSatish Balay         buf1 = svalues+bs2*startv[j];
5465bd3b8fbSHong Zhang         buf2 = space->val + bs2*i;
5474c1ff481SSatish Balay         for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
548a2d1c673SSatish Balay       }
5495bd3b8fbSHong Zhang       sindices[starti[j]]             = sp_idx[l];
5505bd3b8fbSHong Zhang       sindices[starti[j]+nlengths[j]] = sp_idy[l];
551bc5ccf88SSatish Balay       startv[j]++;
552bc5ccf88SSatish Balay       starti[j]++;
55375cae7c1SHong Zhang       i++;
55475cae7c1SHong Zhang     }
55575cae7c1SHong Zhang     space = space_next;
556bc5ccf88SSatish Balay   }
557bc5ccf88SSatish Balay   startv[0] = 0;
558563fb871SSatish Balay   for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];}
559e5d0e772SSatish Balay 
560bc5ccf88SSatish Balay   for (i=0,count=0; i<size; i++) {
561563fb871SSatish Balay     if (nprocs[i]) {
562563fb871SSatish Balay       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
563563fb871SSatish Balay       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_MATSCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
564bc5ccf88SSatish Balay     }
565b85c94c3SSatish Balay   }
5666cf91177SBarry Smith #if defined(PETSC_USE_INFO)
567ae15b995SBarry Smith   ierr = PetscInfo1(0,"No of messages: %d \n",nsends);CHKERRQ(ierr);
568e5d0e772SSatish Balay   for (i=0; i<size; i++) {
569e5d0e772SSatish Balay     if (nprocs[i]) {
570ae15b995SBarry Smith       ierr = PetscInfo2(0,"Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(MatScalar)+2*sizeof(PetscInt));CHKERRQ(ierr);
571e5d0e772SSatish Balay     }
572e5d0e772SSatish Balay   }
573e5d0e772SSatish Balay #endif
574606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
575606d414cSSatish Balay   ierr = PetscFree(startv);CHKERRQ(ierr);
576a2d1c673SSatish Balay   /* This memory is reused in scatter end  for a different purpose*/
577a2d1c673SSatish Balay   for (i=0; i<2*size; i++) nprocs[i] = -1;
578a2d1c673SSatish Balay   stash->nprocs = nprocs;
579a2d1c673SSatish Balay 
580563fb871SSatish Balay   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
581563fb871SSatish Balay   ierr  = PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
582563fb871SSatish Balay 
583563fb871SSatish Balay   for (i=0; i<nreceives; i++) {
584563fb871SSatish Balay     recv_waits[2*i]   = recv_waits1[i];
585563fb871SSatish Balay     recv_waits[2*i+1] = recv_waits2[i];
586563fb871SSatish Balay   }
587563fb871SSatish Balay   stash->recv_waits = recv_waits;
588563fb871SSatish Balay   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
589563fb871SSatish Balay   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
590563fb871SSatish Balay 
591bc5ccf88SSatish Balay   stash->svalues    = svalues;    stash->rvalues     = rvalues;
592563fb871SSatish Balay   stash->rindices   = rindices;   stash->send_waits  = send_waits;
593bc5ccf88SSatish Balay   stash->nsends     = nsends;     stash->nrecvs      = nreceives;
594bc5ccf88SSatish Balay   PetscFunctionReturn(0);
595bc5ccf88SSatish Balay }
596bc5ccf88SSatish Balay 
597a2d1c673SSatish Balay /*
5988798bf22SSatish Balay    MatStashScatterGetMesg_Private - This function waits on the receives posted
5998798bf22SSatish Balay    in the function MatStashScatterBegin_Private() and returns one message at
6004c1ff481SSatish Balay    a time to the calling function. If no messages are left, it indicates this
6014c1ff481SSatish Balay    by setting flg = 0, else it sets flg = 1.
6024c1ff481SSatish Balay 
6034c1ff481SSatish Balay    Input Parameters:
6044c1ff481SSatish Balay    stash - the stash
6054c1ff481SSatish Balay 
6064c1ff481SSatish Balay    Output Parameters:
6074c1ff481SSatish Balay    nvals - the number of entries in the current message.
6084c1ff481SSatish Balay    rows  - an array of row indices (or blocked indices) corresponding to the values
6094c1ff481SSatish Balay    cols  - an array of columnindices (or blocked indices) corresponding to the values
6104c1ff481SSatish Balay    vals  - the values
6114c1ff481SSatish Balay    flg   - 0 indicates no more message left, and the current call has no values associated.
6124c1ff481SSatish Balay            1 indicates that the current call successfully received a message, and the
6134c1ff481SSatish Balay              other output parameters nvals,rows,cols,vals are set appropriately.
614a2d1c673SSatish Balay */
6154a2ae208SSatish Balay #undef __FUNCT__
6164a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterGetMesg_Private"
617c1ac3661SBarry Smith PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,MatScalar **vals,PetscInt *flg)
618bc5ccf88SSatish Balay {
6196849ba73SBarry Smith   PetscErrorCode ierr;
620fe09c992SBarry Smith   PetscMPIInt    i,*flg_v,i1,i2;
621fe09c992SBarry Smith   PetscInt       bs2;
622a2d1c673SSatish Balay   MPI_Status     recv_status;
623b0a32e0cSBarry Smith   PetscTruth     match_found = PETSC_FALSE;
624bc5ccf88SSatish Balay 
625bc5ccf88SSatish Balay   PetscFunctionBegin;
626bc5ccf88SSatish Balay 
627a2d1c673SSatish Balay   *flg = 0; /* When a message is discovered this is reset to 1 */
628a2d1c673SSatish Balay   /* Return if no more messages to process */
629a2d1c673SSatish Balay   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
630a2d1c673SSatish Balay 
631a2d1c673SSatish Balay   flg_v = stash->nprocs;
6324c1ff481SSatish Balay   bs2   = stash->bs*stash->bs;
633a2d1c673SSatish Balay   /* If a matching pair of receieves are found, process them, and return the data to
634a2d1c673SSatish Balay      the calling function. Until then keep receiving messages */
635a2d1c673SSatish Balay   while (!match_found) {
636a2d1c673SSatish Balay     ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
637a2d1c673SSatish Balay     /* Now pack the received message into a structure which is useable by others */
638a2d1c673SSatish Balay     if (i % 2) {
6393eda8832SBarry Smith       ierr = MPI_Get_count(&recv_status,MPIU_MATSCALAR,nvals);CHKERRQ(ierr);
640c1dc657dSBarry Smith       flg_v[2*recv_status.MPI_SOURCE] = i/2;
641a2d1c673SSatish Balay       *nvals = *nvals/bs2;
642563fb871SSatish Balay     } else {
643563fb871SSatish Balay       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
644563fb871SSatish Balay       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
645563fb871SSatish Balay       *nvals = *nvals/2; /* This message has both row indices and col indices */
646bc5ccf88SSatish Balay     }
647a2d1c673SSatish Balay 
648a2d1c673SSatish Balay     /* Check if we have both the messages from this proc */
649c1dc657dSBarry Smith     i1 = flg_v[2*recv_status.MPI_SOURCE];
650c1dc657dSBarry Smith     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
651a2d1c673SSatish Balay     if (i1 != -1 && i2 != -1) {
652563fb871SSatish Balay       *rows       = stash->rindices[i2];
653a2d1c673SSatish Balay       *cols       = *rows + *nvals;
654563fb871SSatish Balay       *vals       = stash->rvalues[i1];
655a2d1c673SSatish Balay       *flg        = 1;
656a2d1c673SSatish Balay       stash->nprocessed ++;
65735d8aa7fSBarry Smith       match_found = PETSC_TRUE;
658bc5ccf88SSatish Balay     }
659bc5ccf88SSatish Balay   }
660bc5ccf88SSatish Balay   PetscFunctionReturn(0);
661bc5ccf88SSatish Balay }
662