xref: /petsc/src/mat/utils/matstash.c (revision 54f218876c6e466de49bbdcd046b3156d70a18ce)
1be1d678aSKris Buschelman #define PETSCMAT_DLL
22d5177cdSBarry Smith 
3b9147fbbSdalcinl #include "include/private/matimpl.h"
45bd3b8fbSHong Zhang 
53eda8832SBarry Smith /*
60ae3cd3bSBarry Smith        The input to the stash is ALWAYS in MatScalar precision, and the
70ae3cd3bSBarry Smith     internal storage and output is also in MatScalar.
83eda8832SBarry Smith */
9bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE   10000
104c1ff481SSatish Balay 
119417f4adSLois Curfman McInnes /*
128798bf22SSatish Balay   MatStashCreate_Private - Creates a stash,currently used for all the parallel
134c1ff481SSatish Balay   matrix implementations. The stash is where elements of a matrix destined
144c1ff481SSatish Balay   to be stored on other processors are kept until matrix assembly is done.
159417f4adSLois Curfman McInnes 
164c1ff481SSatish Balay   This is a simple minded stash. Simply adds entries to end of stash.
174c1ff481SSatish Balay 
184c1ff481SSatish Balay   Input Parameters:
194c1ff481SSatish Balay   comm - communicator, required for scatters.
204c1ff481SSatish Balay   bs   - stash block size. used when stashing blocks of values
214c1ff481SSatish Balay 
224c1ff481SSatish Balay   Output Parameters:
234c1ff481SSatish Balay   stash    - the newly created stash
249417f4adSLois Curfman McInnes */
254a2ae208SSatish Balay #undef __FUNCT__
264a2ae208SSatish Balay #define __FUNCT__ "MatStashCreate_Private"
27c1ac3661SBarry Smith PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
289417f4adSLois Curfman McInnes {
29dfbe8321SBarry Smith   PetscErrorCode ierr;
30c1ac3661SBarry Smith   PetscInt       max,*opt,nopt;
31f1af5d2fSBarry Smith   PetscTruth     flg;
32bc5ccf88SSatish Balay 
333a40ed3dSBarry Smith   PetscFunctionBegin;
34bc5ccf88SSatish Balay   /* Require 2 tags,get the second using PetscCommGetNewTag() */
35752ec6e0SSatish Balay   stash->comm = comm;
36752ec6e0SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
37a2d1c673SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
38a2d1c673SSatish Balay   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
39a2d1c673SSatish Balay   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
40bc5ccf88SSatish Balay 
41434d7ff9SSatish Balay   nopt = stash->size;
42d7d82daaSBarry Smith   ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr);
43b0a32e0cSBarry Smith   ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
44434d7ff9SSatish Balay   if (flg) {
45434d7ff9SSatish Balay     if (nopt == 1)                max = opt[0];
46434d7ff9SSatish Balay     else if (nopt == stash->size) max = opt[stash->rank];
47434d7ff9SSatish Balay     else if (stash->rank < nopt)  max = opt[stash->rank];
48f4ab19daSSatish Balay     else                          max = 0; /* Use default */
49434d7ff9SSatish Balay     stash->umax = max;
50434d7ff9SSatish Balay   } else {
51434d7ff9SSatish Balay     stash->umax = 0;
52434d7ff9SSatish Balay   }
53606d414cSSatish Balay   ierr = PetscFree(opt);CHKERRQ(ierr);
544c1ff481SSatish Balay   if (bs <= 0) bs = 1;
55a2d1c673SSatish Balay 
564c1ff481SSatish Balay   stash->bs       = bs;
579417f4adSLois Curfman McInnes   stash->nmax     = 0;
58434d7ff9SSatish Balay   stash->oldnmax  = 0;
599417f4adSLois Curfman McInnes   stash->n        = 0;
604c1ff481SSatish Balay   stash->reallocs = -1;
6175cae7c1SHong Zhang   stash->space_head = 0;
6275cae7c1SHong Zhang   stash->space      = 0;
639417f4adSLois Curfman McInnes 
64bc5ccf88SSatish Balay   stash->send_waits  = 0;
65bc5ccf88SSatish Balay   stash->recv_waits  = 0;
66a2d1c673SSatish Balay   stash->send_status = 0;
67bc5ccf88SSatish Balay   stash->nsends      = 0;
68bc5ccf88SSatish Balay   stash->nrecvs      = 0;
69bc5ccf88SSatish Balay   stash->svalues     = 0;
70bc5ccf88SSatish Balay   stash->rvalues     = 0;
71563fb871SSatish Balay   stash->rindices    = 0;
72a2d1c673SSatish Balay   stash->nprocs      = 0;
73a2d1c673SSatish Balay   stash->nprocessed  = 0;
743a40ed3dSBarry Smith   PetscFunctionReturn(0);
759417f4adSLois Curfman McInnes }
769417f4adSLois Curfman McInnes 
774c1ff481SSatish Balay /*
788798bf22SSatish Balay    MatStashDestroy_Private - Destroy the stash
794c1ff481SSatish Balay */
804a2ae208SSatish Balay #undef __FUNCT__
814a2ae208SSatish Balay #define __FUNCT__ "MatStashDestroy_Private"
82dfbe8321SBarry Smith PetscErrorCode MatStashDestroy_Private(MatStash *stash)
839417f4adSLois Curfman McInnes {
84dfbe8321SBarry Smith   PetscErrorCode ierr;
85a2d1c673SSatish Balay 
86bc5ccf88SSatish Balay   PetscFunctionBegin;
8775cae7c1SHong Zhang   if (stash->space_head){
8875cae7c1SHong Zhang     ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr);
8975cae7c1SHong Zhang     stash->space_head = 0;
9082740460SHong Zhang     stash->space      = 0;
9175cae7c1SHong Zhang   }
92bc5ccf88SSatish Balay   PetscFunctionReturn(0);
93bc5ccf88SSatish Balay }
94bc5ccf88SSatish Balay 
954c1ff481SSatish Balay /*
968798bf22SSatish Balay    MatStashScatterEnd_Private - This is called as the fial stage of
974c1ff481SSatish Balay    scatter. The final stages of messagepassing is done here, and
984c1ff481SSatish Balay    all the memory used for messagepassing is cleanedu up. This
994c1ff481SSatish Balay    routine also resets the stash, and deallocates the memory used
1004c1ff481SSatish Balay    for the stash. It also keeps track of the current memory usage
1014c1ff481SSatish Balay    so that the same value can be used the next time through.
1024c1ff481SSatish Balay */
1034a2ae208SSatish Balay #undef __FUNCT__
1044a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterEnd_Private"
105dfbe8321SBarry Smith PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
106bc5ccf88SSatish Balay {
1076849ba73SBarry Smith   PetscErrorCode ierr;
1085bd3b8fbSHong Zhang   PetscInt       nsends=stash->nsends,bs2,oldnmax;
109a2d1c673SSatish Balay   MPI_Status     *send_status;
110a2d1c673SSatish Balay 
1113a40ed3dSBarry Smith   PetscFunctionBegin;
112a2d1c673SSatish Balay   /* wait on sends */
113a2d1c673SSatish Balay   if (nsends) {
11482502324SSatish Balay     ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
115a2d1c673SSatish Balay     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
116606d414cSSatish Balay     ierr = PetscFree(send_status);CHKERRQ(ierr);
117a2d1c673SSatish Balay   }
118a2d1c673SSatish Balay 
119c0c58ca7SSatish Balay   /* Now update nmaxold to be app 10% more than max n used, this way the
120434d7ff9SSatish Balay      wastage of space is reduced the next time this stash is used.
121434d7ff9SSatish Balay      Also update the oldmax, only if it increases */
122b9b97703SBarry Smith   if (stash->n) {
12394b769a5SSatish Balay     bs2      = stash->bs*stash->bs;
1248a9378f0SSatish Balay     oldnmax  = ((int)(stash->n * 1.1) + 5)*bs2;
125434d7ff9SSatish Balay     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
126b9b97703SBarry Smith   }
127434d7ff9SSatish Balay 
128d07ff455SSatish Balay   stash->nmax       = 0;
129d07ff455SSatish Balay   stash->n          = 0;
1304c1ff481SSatish Balay   stash->reallocs   = -1;
131a2d1c673SSatish Balay   stash->nprocessed = 0;
13275cae7c1SHong Zhang   if (stash->space_head){
13375cae7c1SHong Zhang     ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr);
13475cae7c1SHong Zhang     stash->space_head = 0;
13582740460SHong Zhang     stash->space      = 0;
13675cae7c1SHong Zhang   }
137606d414cSSatish Balay   ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
138606d414cSSatish Balay   stash->send_waits = 0;
139606d414cSSatish Balay   ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
140606d414cSSatish Balay   stash->recv_waits = 0;
141606d414cSSatish Balay   ierr = PetscFree(stash->svalues);CHKERRQ(ierr);
142606d414cSSatish Balay   stash->svalues = 0;
143606d414cSSatish Balay   ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
144606d414cSSatish Balay   stash->rvalues = 0;
145563fb871SSatish Balay   ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
146563fb871SSatish Balay   stash->rindices = 0;
147b22afee1SSatish Balay   ierr = PetscFree(stash->nprocs);CHKERRQ(ierr);
148606d414cSSatish Balay   stash->nprocs = 0;
1493a40ed3dSBarry Smith   PetscFunctionReturn(0);
1509417f4adSLois Curfman McInnes }
1519417f4adSLois Curfman McInnes 
1524c1ff481SSatish Balay /*
1538798bf22SSatish Balay    MatStashGetInfo_Private - Gets the relavant statistics of the stash
1544c1ff481SSatish Balay 
1554c1ff481SSatish Balay    Input Parameters:
1564c1ff481SSatish Balay    stash    - the stash
15794b769a5SSatish Balay    nstash   - the size of the stash. Indicates the number of values stored.
1584c1ff481SSatish Balay    reallocs - the number of additional mallocs incurred.
1594c1ff481SSatish Balay 
1604c1ff481SSatish Balay */
1614a2ae208SSatish Balay #undef __FUNCT__
1624a2ae208SSatish Balay #define __FUNCT__ "MatStashGetInfo_Private"
163c1ac3661SBarry Smith PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
16497530c3fSBarry Smith {
165c1ac3661SBarry Smith   PetscInt bs2 = stash->bs*stash->bs;
16694b769a5SSatish Balay 
1673a40ed3dSBarry Smith   PetscFunctionBegin;
1681ecfd215SBarry Smith   if (nstash) *nstash   = stash->n*bs2;
1691ecfd215SBarry Smith   if (reallocs) {
170434d7ff9SSatish Balay     if (stash->reallocs < 0) *reallocs = 0;
171434d7ff9SSatish Balay     else                     *reallocs = stash->reallocs;
1721ecfd215SBarry Smith   }
173bc5ccf88SSatish Balay   PetscFunctionReturn(0);
174bc5ccf88SSatish Balay }
1754c1ff481SSatish Balay 
1764c1ff481SSatish Balay /*
1778798bf22SSatish Balay    MatStashSetInitialSize_Private - Sets the initial size of the stash
1784c1ff481SSatish Balay 
1794c1ff481SSatish Balay    Input Parameters:
1804c1ff481SSatish Balay    stash  - the stash
1814c1ff481SSatish Balay    max    - the value that is used as the max size of the stash.
1824c1ff481SSatish Balay             this value is used while allocating memory.
1834c1ff481SSatish Balay */
1844a2ae208SSatish Balay #undef __FUNCT__
1854a2ae208SSatish Balay #define __FUNCT__ "MatStashSetInitialSize_Private"
186c1ac3661SBarry Smith PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
187bc5ccf88SSatish Balay {
188bc5ccf88SSatish Balay   PetscFunctionBegin;
189434d7ff9SSatish Balay   stash->umax = max;
1903a40ed3dSBarry Smith   PetscFunctionReturn(0);
19197530c3fSBarry Smith }
19297530c3fSBarry Smith 
1938798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called
1944c1ff481SSatish Balay    when the space in the stash is not sufficient to add the new values
1954c1ff481SSatish Balay    being inserted into the stash.
1964c1ff481SSatish Balay 
1974c1ff481SSatish Balay    Input Parameters:
1984c1ff481SSatish Balay    stash - the stash
1994c1ff481SSatish Balay    incr  - the minimum increase requested
2004c1ff481SSatish Balay 
2014c1ff481SSatish Balay    Notes:
2024c1ff481SSatish Balay    This routine doubles the currently used memory.
2034c1ff481SSatish Balay  */
2044a2ae208SSatish Balay #undef __FUNCT__
2054a2ae208SSatish Balay #define __FUNCT__ "MatStashExpand_Private"
206c1ac3661SBarry Smith static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
2079417f4adSLois Curfman McInnes {
2086849ba73SBarry Smith   PetscErrorCode ierr;
2095bd3b8fbSHong Zhang   PetscInt       newnmax,bs2= stash->bs*stash->bs;
2109417f4adSLois Curfman McInnes 
2113a40ed3dSBarry Smith   PetscFunctionBegin;
2129417f4adSLois Curfman McInnes   /* allocate a larger stash */
213c481ceb5SSatish Balay   if (!stash->oldnmax && !stash->nmax) { /* new stash */
214434d7ff9SSatish Balay     if (stash->umax)                  newnmax = stash->umax/bs2;
215434d7ff9SSatish Balay     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
216c481ceb5SSatish Balay   } else if (!stash->nmax) { /* resuing stash */
217434d7ff9SSatish Balay     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
218434d7ff9SSatish Balay     else                              newnmax = stash->oldnmax/bs2;
219434d7ff9SSatish Balay   } else                              newnmax = stash->nmax*2;
2204c1ff481SSatish Balay   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
221d07ff455SSatish Balay 
22275cae7c1SHong Zhang   /* Get a MatStashSpace and attach it to stash */
22375cae7c1SHong Zhang   ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr);
224b087b6d6SSatish Balay   if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
225b087b6d6SSatish Balay     stash->space_head = stash->space;
22675cae7c1SHong Zhang   }
227b087b6d6SSatish Balay 
228bc5ccf88SSatish Balay   stash->reallocs++;
22975cae7c1SHong Zhang   stash->nmax = newnmax;
230bc5ccf88SSatish Balay   PetscFunctionReturn(0);
231bc5ccf88SSatish Balay }
232bc5ccf88SSatish Balay /*
2338798bf22SSatish Balay   MatStashValuesRow_Private - inserts values into the stash. This function
2344c1ff481SSatish Balay   expects the values to be roworiented. Multiple columns belong to the same row
2354c1ff481SSatish Balay   can be inserted with a single call to this function.
2364c1ff481SSatish Balay 
2374c1ff481SSatish Balay   Input Parameters:
2384c1ff481SSatish Balay   stash  - the stash
2394c1ff481SSatish Balay   row    - the global row correspoiding to the values
2404c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
2414c1ff481SSatish Balay   idxn   - the global column indices corresponding to each of the values.
2424c1ff481SSatish Balay   values - the values inserted
243bc5ccf88SSatish Balay */
2444a2ae208SSatish Balay #undef __FUNCT__
2454a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRow_Private"
246*54f21887SBarry Smith PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[])
247bc5ccf88SSatish Balay {
248dfbe8321SBarry Smith   PetscErrorCode     ierr;
24975cae7c1SHong Zhang   PetscInt           i,k;
25075cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
251bc5ccf88SSatish Balay 
252bc5ccf88SSatish Balay   PetscFunctionBegin;
2534c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
25475cae7c1SHong Zhang   if (!space || space->local_remaining < n){
2558798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
2569417f4adSLois Curfman McInnes   }
25775cae7c1SHong Zhang   space = stash->space;
25875cae7c1SHong Zhang   k     = space->local_used;
2594c1ff481SSatish Balay   for (i=0; i<n; i++) {
26075cae7c1SHong Zhang     space->idx[k] = row;
26175cae7c1SHong Zhang     space->idy[k] = idxn[i];
26275cae7c1SHong Zhang     space->val[k] = values[i];
26375cae7c1SHong Zhang     k++;
2649417f4adSLois Curfman McInnes   }
2655bd3b8fbSHong Zhang   stash->n               += n;
26675cae7c1SHong Zhang   space->local_used      += n;
26775cae7c1SHong Zhang   space->local_remaining -= n;
268a2d1c673SSatish Balay   PetscFunctionReturn(0);
269a2d1c673SSatish Balay }
27075cae7c1SHong Zhang 
2714c1ff481SSatish Balay /*
2728798bf22SSatish Balay   MatStashValuesCol_Private - inserts values into the stash. This function
2734c1ff481SSatish Balay   expects the values to be columnoriented. Multiple columns belong to the same row
2744c1ff481SSatish Balay   can be inserted with a single call to this function.
275a2d1c673SSatish Balay 
2764c1ff481SSatish Balay   Input Parameters:
2774c1ff481SSatish Balay   stash   - the stash
2784c1ff481SSatish Balay   row     - the global row correspoiding to the values
2794c1ff481SSatish Balay   n       - the number of elements inserted. All elements belong to the above row.
2804c1ff481SSatish Balay   idxn    - the global column indices corresponding to each of the values.
2814c1ff481SSatish Balay   values  - the values inserted
2824c1ff481SSatish Balay   stepval - the consecutive values are sepated by a distance of stepval.
2834c1ff481SSatish Balay             this happens because the input is columnoriented.
2844c1ff481SSatish Balay */
2854a2ae208SSatish Balay #undef __FUNCT__
2864a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesCol_Private"
287*54f21887SBarry Smith PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval)
288a2d1c673SSatish Balay {
289dfbe8321SBarry Smith   PetscErrorCode     ierr;
29075cae7c1SHong Zhang   PetscInt           i,k;
29175cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
292a2d1c673SSatish Balay 
2934c1ff481SSatish Balay   PetscFunctionBegin;
2944c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
29575cae7c1SHong Zhang   if (!space || space->local_remaining < n){
2968798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
2974c1ff481SSatish Balay   }
29875cae7c1SHong Zhang   space = stash->space;
29975cae7c1SHong Zhang   k = space->local_used;
3004c1ff481SSatish Balay   for (i=0; i<n; i++) {
30175cae7c1SHong Zhang     space->idx[k] = row;
30275cae7c1SHong Zhang     space->idy[k] = idxn[i];
30375cae7c1SHong Zhang     space->val[k] = values[i*stepval];
30475cae7c1SHong Zhang     k++;
3054c1ff481SSatish Balay   }
3065bd3b8fbSHong Zhang   stash->n               += n;
30775cae7c1SHong Zhang   space->local_used      += n;
30875cae7c1SHong Zhang   space->local_remaining -= n;
3094c1ff481SSatish Balay   PetscFunctionReturn(0);
3104c1ff481SSatish Balay }
3114c1ff481SSatish Balay 
3124c1ff481SSatish Balay /*
3138798bf22SSatish Balay   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
3144c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3154c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3164c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3174c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3184c1ff481SSatish Balay 
3194c1ff481SSatish Balay   Input Parameters:
3204c1ff481SSatish Balay   stash  - the stash
3214c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3224c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3234c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3244c1ff481SSatish Balay            values. Each block is of size bs*bs.
3254c1ff481SSatish Balay   values - the values inserted
3264c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3274c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3284c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3294c1ff481SSatish Balay */
3304a2ae208SSatish Balay #undef __FUNCT__
3314a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRowBlocked_Private"
332*54f21887SBarry Smith PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
3334c1ff481SSatish Balay {
334dfbe8321SBarry Smith   PetscErrorCode     ierr;
33575cae7c1SHong Zhang   PetscInt           i,j,k,bs2,bs=stash->bs,l;
336*54f21887SBarry Smith   const PetscScalar  *vals;
337*54f21887SBarry Smith   PetscScalar        *array;
33875cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
339a2d1c673SSatish Balay 
340a2d1c673SSatish Balay   PetscFunctionBegin;
34175cae7c1SHong Zhang   if (!space || space->local_remaining < n){
3428798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
343a2d1c673SSatish Balay   }
34475cae7c1SHong Zhang   space = stash->space;
34575cae7c1SHong Zhang   l     = space->local_used;
34675cae7c1SHong Zhang   bs2   = bs*bs;
3474c1ff481SSatish Balay   for (i=0; i<n; i++) {
34875cae7c1SHong Zhang     space->idx[l] = row;
34975cae7c1SHong Zhang     space->idy[l] = idxn[i];
35075cae7c1SHong Zhang     /* Now copy over the block of values. Store the values column oriented.
35175cae7c1SHong Zhang        This enables inserting multiple blocks belonging to a row with a single
35275cae7c1SHong Zhang        funtion call */
35375cae7c1SHong Zhang     array = space->val + bs2*l;
35475cae7c1SHong Zhang     vals  = values + idx*bs2*n + bs*i;
35575cae7c1SHong Zhang     for (j=0; j<bs; j++) {
35675cae7c1SHong Zhang       for (k=0; k<bs; k++) array[k*bs] = vals[k];
35775cae7c1SHong Zhang       array++;
35875cae7c1SHong Zhang       vals  += cmax*bs;
35975cae7c1SHong Zhang     }
36075cae7c1SHong Zhang     l++;
361a2d1c673SSatish Balay   }
3625bd3b8fbSHong Zhang   stash->n               += n;
36375cae7c1SHong Zhang   space->local_used      += n;
36475cae7c1SHong Zhang   space->local_remaining -= n;
3654c1ff481SSatish Balay   PetscFunctionReturn(0);
3664c1ff481SSatish Balay }
3674c1ff481SSatish Balay 
3684c1ff481SSatish Balay /*
3698798bf22SSatish Balay   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
3704c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3714c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3724c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3734c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3744c1ff481SSatish Balay 
3754c1ff481SSatish Balay   Input Parameters:
3764c1ff481SSatish Balay   stash  - the stash
3774c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3784c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3794c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3804c1ff481SSatish Balay            values. Each block is of size bs*bs.
3814c1ff481SSatish Balay   values - the values inserted
3824c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3834c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3844c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3854c1ff481SSatish Balay */
3864a2ae208SSatish Balay #undef __FUNCT__
3874a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesColBlocked_Private"
388*54f21887SBarry Smith PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
3894c1ff481SSatish Balay {
390dfbe8321SBarry Smith   PetscErrorCode     ierr;
39175cae7c1SHong Zhang   PetscInt           i,j,k,bs2,bs=stash->bs,l;
392*54f21887SBarry Smith   const PetscScalar  *vals;
393*54f21887SBarry Smith   PetscScalar        *array;
39475cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
3954c1ff481SSatish Balay 
3964c1ff481SSatish Balay   PetscFunctionBegin;
39775cae7c1SHong Zhang   if (!space || space->local_remaining < n){
3988798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
3994c1ff481SSatish Balay   }
40075cae7c1SHong Zhang   space = stash->space;
40175cae7c1SHong Zhang   l     = space->local_used;
40275cae7c1SHong Zhang   bs2   = bs*bs;
4034c1ff481SSatish Balay   for (i=0; i<n; i++) {
40475cae7c1SHong Zhang     space->idx[l] = row;
40575cae7c1SHong Zhang     space->idy[l] = idxn[i];
40675cae7c1SHong Zhang     /* Now copy over the block of values. Store the values column oriented.
40775cae7c1SHong Zhang      This enables inserting multiple blocks belonging to a row with a single
40875cae7c1SHong Zhang      funtion call */
40975cae7c1SHong Zhang     array = space->val + bs2*l;
41075cae7c1SHong Zhang     vals  = values + idx*bs2*n + bs*i;
41175cae7c1SHong Zhang     for (j=0; j<bs; j++) {
41275cae7c1SHong Zhang       for (k=0; k<bs; k++) {array[k] = vals[k];}
41375cae7c1SHong Zhang       array += bs;
41475cae7c1SHong Zhang       vals  += rmax*bs;
41575cae7c1SHong Zhang     }
4165bd3b8fbSHong Zhang     l++;
417a2d1c673SSatish Balay   }
4185bd3b8fbSHong Zhang   stash->n               += n;
41975cae7c1SHong Zhang   space->local_used      += n;
42075cae7c1SHong Zhang   space->local_remaining -= n;
4213a40ed3dSBarry Smith   PetscFunctionReturn(0);
4229417f4adSLois Curfman McInnes }
4234c1ff481SSatish Balay /*
4248798bf22SSatish Balay   MatStashScatterBegin_Private - Initiates the transfer of values to the
4254c1ff481SSatish Balay   correct owners. This function goes through the stash, and check the
4264c1ff481SSatish Balay   owners of each stashed value, and sends the values off to the owner
4274c1ff481SSatish Balay   processors.
428bc5ccf88SSatish Balay 
4294c1ff481SSatish Balay   Input Parameters:
4304c1ff481SSatish Balay   stash  - the stash
4314c1ff481SSatish Balay   owners - an array of size 'no-of-procs' which gives the ownership range
4324c1ff481SSatish Balay            for each node.
4334c1ff481SSatish Balay 
4344c1ff481SSatish Balay   Notes: The 'owners' array in the cased of the blocked-stash has the
4354c1ff481SSatish Balay   ranges specified blocked global indices, and for the regular stash in
4364c1ff481SSatish Balay   the proper global indices.
4374c1ff481SSatish Balay */
4384a2ae208SSatish Balay #undef __FUNCT__
4394a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterBegin_Private"
4401e2582c4SBarry Smith PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners)
441bc5ccf88SSatish Balay {
442c1ac3661SBarry Smith   PetscInt          *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
443fe09c992SBarry Smith   PetscInt          size=stash->size,nsends;
4446849ba73SBarry Smith   PetscErrorCode    ierr;
44575cae7c1SHong Zhang   PetscInt          count,*sindices,**rindices,i,j,idx,lastidx,l;
446*54f21887SBarry Smith   PetscScalar       **rvalues,*svalues;
447bc5ccf88SSatish Balay   MPI_Comm          comm = stash->comm;
448563fb871SSatish Balay   MPI_Request       *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
449fe09c992SBarry Smith   PetscMPIInt       *nprocs,*nlengths,nreceives;
4505bd3b8fbSHong Zhang   PetscInt          *sp_idx,*sp_idy;
451*54f21887SBarry Smith   PetscScalar       *sp_val;
4525bd3b8fbSHong Zhang   PetscMatStashSpace space,space_next;
453bc5ccf88SSatish Balay 
454bc5ccf88SSatish Balay   PetscFunctionBegin;
4554c1ff481SSatish Balay   bs2 = stash->bs*stash->bs;
45675cae7c1SHong Zhang 
457bc5ccf88SSatish Balay   /*  first count number of contributors to each processor */
458fe09c992SBarry Smith   ierr  = PetscMalloc(2*size*sizeof(PetscMPIInt),&nprocs);CHKERRQ(ierr);
459fe09c992SBarry Smith   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscMPIInt));CHKERRQ(ierr);
460c1ac3661SBarry Smith   ierr  = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);
461a2d1c673SSatish Balay 
462563fb871SSatish Balay   nlengths = nprocs+size;
46375cae7c1SHong Zhang   i = j    = 0;
4647357eb19SBarry Smith   lastidx  = -1;
4655bd3b8fbSHong Zhang   space    = stash->space_head;
46675cae7c1SHong Zhang   while (space != PETSC_NULL){
46775cae7c1SHong Zhang     space_next = space->next;
4685bd3b8fbSHong Zhang     sp_idx     = space->idx;
46975cae7c1SHong Zhang     for (l=0; l<space->local_used; l++){
4707357eb19SBarry Smith       /* if indices are NOT locally sorted, need to start search at the beginning */
4715bd3b8fbSHong Zhang       if (lastidx > (idx = sp_idx[l])) j = 0;
4727357eb19SBarry Smith       lastidx = idx;
4737357eb19SBarry Smith       for (; j<size; j++) {
4744c1ff481SSatish Balay         if (idx >= owners[j] && idx < owners[j+1]) {
475563fb871SSatish Balay           nlengths[j]++; owner[i] = j; break;
476bc5ccf88SSatish Balay         }
477bc5ccf88SSatish Balay       }
47875cae7c1SHong Zhang       i++;
47975cae7c1SHong Zhang     }
48075cae7c1SHong Zhang     space = space_next;
481bc5ccf88SSatish Balay   }
482563fb871SSatish Balay   /* Now check what procs get messages - and compute nsends. */
483563fb871SSatish Balay   for (i=0, nsends=0 ; i<size; i++) {
484563fb871SSatish Balay     if (nlengths[i]) { nprocs[i] = 1; nsends ++;}
485563fb871SSatish Balay   }
486bc5ccf88SSatish Balay 
487*54f21887SBarry Smith   {PetscMPIInt  *onodes,*olengths;
488563fb871SSatish Balay   /* Determine the number of messages to expect, their lengths, from from-ids */
489563fb871SSatish Balay   ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr);
490563fb871SSatish Balay   ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
491563fb871SSatish Balay   /* since clubbing row,col - lengths are multiplied by 2 */
492563fb871SSatish Balay   for (i=0; i<nreceives; i++) olengths[i] *=2;
493563fb871SSatish Balay   ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
494563fb871SSatish Balay   /* values are size 'bs2' lengths (and remove earlier factor 2 */
495563fb871SSatish Balay   for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
496563fb871SSatish Balay   ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
497563fb871SSatish Balay   ierr = PetscFree(onodes);CHKERRQ(ierr);
498563fb871SSatish Balay   ierr = PetscFree(olengths);CHKERRQ(ierr);
499bc5ccf88SSatish Balay   }
500bc5ccf88SSatish Balay 
501bc5ccf88SSatish Balay   /* do sends:
502bc5ccf88SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
503bc5ccf88SSatish Balay          the ith processor
504bc5ccf88SSatish Balay   */
505c1ac3661SBarry Smith   ierr     = PetscMalloc((stash->n+1)*(bs2*sizeof(MatScalar)+2*sizeof(PetscInt)),&svalues);CHKERRQ(ierr);
506c1ac3661SBarry Smith   sindices = (PetscInt*)(svalues + bs2*stash->n);
507b0a32e0cSBarry Smith   ierr     = PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
508c1ac3661SBarry Smith   ierr     = PetscMalloc(2*size*sizeof(PetscInt),&startv);CHKERRQ(ierr);
509bc5ccf88SSatish Balay   starti   = startv + size;
510a2d1c673SSatish Balay   /* use 2 sends the first with all_a, the next with all_i and all_j */
511bc5ccf88SSatish Balay   startv[0]  = 0; starti[0] = 0;
512bc5ccf88SSatish Balay   for (i=1; i<size; i++) {
513563fb871SSatish Balay     startv[i] = startv[i-1] + nlengths[i-1];
514563fb871SSatish Balay     starti[i] = starti[i-1] + nlengths[i-1]*2;
515bc5ccf88SSatish Balay   }
51675cae7c1SHong Zhang 
51775cae7c1SHong Zhang   i     = 0;
5185bd3b8fbSHong Zhang   space = stash->space_head;
51975cae7c1SHong Zhang   while (space != PETSC_NULL){
52075cae7c1SHong Zhang     space_next = space->next;
5215bd3b8fbSHong Zhang     sp_idx = space->idx;
5225bd3b8fbSHong Zhang     sp_idy = space->idy;
5235bd3b8fbSHong Zhang     sp_val = space->val;
52475cae7c1SHong Zhang     for (l=0; l<space->local_used; l++){
525bc5ccf88SSatish Balay       j = owner[i];
526a2d1c673SSatish Balay       if (bs2 == 1) {
5275bd3b8fbSHong Zhang         svalues[startv[j]] = sp_val[l];
528a2d1c673SSatish Balay       } else {
529c1ac3661SBarry Smith         PetscInt     k;
530*54f21887SBarry Smith         PetscScalar *buf1,*buf2;
5314c1ff481SSatish Balay         buf1 = svalues+bs2*startv[j];
532b087b6d6SSatish Balay         buf2 = space->val + bs2*l;
5334c1ff481SSatish Balay         for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
534a2d1c673SSatish Balay       }
5355bd3b8fbSHong Zhang       sindices[starti[j]]             = sp_idx[l];
5365bd3b8fbSHong Zhang       sindices[starti[j]+nlengths[j]] = sp_idy[l];
537bc5ccf88SSatish Balay       startv[j]++;
538bc5ccf88SSatish Balay       starti[j]++;
53975cae7c1SHong Zhang       i++;
54075cae7c1SHong Zhang     }
54175cae7c1SHong Zhang     space = space_next;
542bc5ccf88SSatish Balay   }
543bc5ccf88SSatish Balay   startv[0] = 0;
544563fb871SSatish Balay   for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];}
545e5d0e772SSatish Balay 
546bc5ccf88SSatish Balay   for (i=0,count=0; i<size; i++) {
547563fb871SSatish Balay     if (nprocs[i]) {
548563fb871SSatish Balay       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
549563fb871SSatish Balay       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_MATSCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
550bc5ccf88SSatish Balay     }
551b85c94c3SSatish Balay   }
5526cf91177SBarry Smith #if defined(PETSC_USE_INFO)
5531e2582c4SBarry Smith   ierr = PetscInfo1(mat,"No of messages: %d \n",nsends);CHKERRQ(ierr);
554e5d0e772SSatish Balay   for (i=0; i<size; i++) {
555e5d0e772SSatish Balay     if (nprocs[i]) {
5561e2582c4SBarry Smith       ierr = PetscInfo2(mat,"Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(MatScalar)+2*sizeof(PetscInt));CHKERRQ(ierr);
557e5d0e772SSatish Balay     }
558e5d0e772SSatish Balay   }
559e5d0e772SSatish Balay #endif
560606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
561606d414cSSatish Balay   ierr = PetscFree(startv);CHKERRQ(ierr);
562a2d1c673SSatish Balay   /* This memory is reused in scatter end  for a different purpose*/
563a2d1c673SSatish Balay   for (i=0; i<2*size; i++) nprocs[i] = -1;
564a2d1c673SSatish Balay   stash->nprocs = nprocs;
565a2d1c673SSatish Balay 
566563fb871SSatish Balay   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
567563fb871SSatish Balay   ierr  = PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
568563fb871SSatish Balay 
569563fb871SSatish Balay   for (i=0; i<nreceives; i++) {
570563fb871SSatish Balay     recv_waits[2*i]   = recv_waits1[i];
571563fb871SSatish Balay     recv_waits[2*i+1] = recv_waits2[i];
572563fb871SSatish Balay   }
573563fb871SSatish Balay   stash->recv_waits = recv_waits;
574563fb871SSatish Balay   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
575563fb871SSatish Balay   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
576563fb871SSatish Balay 
577bc5ccf88SSatish Balay   stash->svalues    = svalues;    stash->rvalues     = rvalues;
578563fb871SSatish Balay   stash->rindices   = rindices;   stash->send_waits  = send_waits;
579bc5ccf88SSatish Balay   stash->nsends     = nsends;     stash->nrecvs      = nreceives;
580bc5ccf88SSatish Balay   PetscFunctionReturn(0);
581bc5ccf88SSatish Balay }
582bc5ccf88SSatish Balay 
583a2d1c673SSatish Balay /*
5848798bf22SSatish Balay    MatStashScatterGetMesg_Private - This function waits on the receives posted
5858798bf22SSatish Balay    in the function MatStashScatterBegin_Private() and returns one message at
5864c1ff481SSatish Balay    a time to the calling function. If no messages are left, it indicates this
5874c1ff481SSatish Balay    by setting flg = 0, else it sets flg = 1.
5884c1ff481SSatish Balay 
5894c1ff481SSatish Balay    Input Parameters:
5904c1ff481SSatish Balay    stash - the stash
5914c1ff481SSatish Balay 
5924c1ff481SSatish Balay    Output Parameters:
5934c1ff481SSatish Balay    nvals - the number of entries in the current message.
5944c1ff481SSatish Balay    rows  - an array of row indices (or blocked indices) corresponding to the values
5954c1ff481SSatish Balay    cols  - an array of columnindices (or blocked indices) corresponding to the values
5964c1ff481SSatish Balay    vals  - the values
5974c1ff481SSatish Balay    flg   - 0 indicates no more message left, and the current call has no values associated.
5984c1ff481SSatish Balay            1 indicates that the current call successfully received a message, and the
5994c1ff481SSatish Balay              other output parameters nvals,rows,cols,vals are set appropriately.
600a2d1c673SSatish Balay */
6014a2ae208SSatish Balay #undef __FUNCT__
6024a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterGetMesg_Private"
603*54f21887SBarry Smith PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,PetscScalar **vals,PetscInt *flg)
604bc5ccf88SSatish Balay {
6056849ba73SBarry Smith   PetscErrorCode ierr;
606fe09c992SBarry Smith   PetscMPIInt    i,*flg_v,i1,i2;
607fe09c992SBarry Smith   PetscInt       bs2;
608a2d1c673SSatish Balay   MPI_Status     recv_status;
609b0a32e0cSBarry Smith   PetscTruth     match_found = PETSC_FALSE;
610bc5ccf88SSatish Balay 
611bc5ccf88SSatish Balay   PetscFunctionBegin;
612bc5ccf88SSatish Balay 
613a2d1c673SSatish Balay   *flg = 0; /* When a message is discovered this is reset to 1 */
614a2d1c673SSatish Balay   /* Return if no more messages to process */
615a2d1c673SSatish Balay   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
616a2d1c673SSatish Balay 
617a2d1c673SSatish Balay   flg_v = stash->nprocs;
6184c1ff481SSatish Balay   bs2   = stash->bs*stash->bs;
619a2d1c673SSatish Balay   /* If a matching pair of receieves are found, process them, and return the data to
620a2d1c673SSatish Balay      the calling function. Until then keep receiving messages */
621a2d1c673SSatish Balay   while (!match_found) {
622a2d1c673SSatish Balay     ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
623a2d1c673SSatish Balay     /* Now pack the received message into a structure which is useable by others */
624a2d1c673SSatish Balay     if (i % 2) {
6253eda8832SBarry Smith       ierr = MPI_Get_count(&recv_status,MPIU_MATSCALAR,nvals);CHKERRQ(ierr);
626c1dc657dSBarry Smith       flg_v[2*recv_status.MPI_SOURCE] = i/2;
627a2d1c673SSatish Balay       *nvals = *nvals/bs2;
628563fb871SSatish Balay     } else {
629563fb871SSatish Balay       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
630563fb871SSatish Balay       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
631563fb871SSatish Balay       *nvals = *nvals/2; /* This message has both row indices and col indices */
632bc5ccf88SSatish Balay     }
633a2d1c673SSatish Balay 
634a2d1c673SSatish Balay     /* Check if we have both the messages from this proc */
635c1dc657dSBarry Smith     i1 = flg_v[2*recv_status.MPI_SOURCE];
636c1dc657dSBarry Smith     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
637a2d1c673SSatish Balay     if (i1 != -1 && i2 != -1) {
638563fb871SSatish Balay       *rows       = stash->rindices[i2];
639a2d1c673SSatish Balay       *cols       = *rows + *nvals;
640563fb871SSatish Balay       *vals       = stash->rvalues[i1];
641a2d1c673SSatish Balay       *flg        = 1;
642a2d1c673SSatish Balay       stash->nprocessed ++;
64335d8aa7fSBarry Smith       match_found = PETSC_TRUE;
644bc5ccf88SSatish Balay     }
645bc5ccf88SSatish Balay   }
646bc5ccf88SSatish Balay   PetscFunctionReturn(0);
647bc5ccf88SSatish Balay }
648