xref: /petsc/src/mat/utils/matstash.c (revision 5bd3b8fb56ed570a25d2634c59a90f90516c76e3)
1be1d678aSKris Buschelman #define PETSCMAT_DLL
22d5177cdSBarry Smith 
370f55243SBarry Smith #include "src/mat/matimpl.h"
475cae7c1SHong Zhang #include "src/mat/utils/matstashspace.h"
5*5bd3b8fbSHong Zhang 
63eda8832SBarry Smith /*
70ae3cd3bSBarry Smith        The input to the stash is ALWAYS in MatScalar precision, and the
80ae3cd3bSBarry Smith     internal storage and output is also in MatScalar.
93eda8832SBarry Smith */
10bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE   10000
114c1ff481SSatish Balay 
129417f4adSLois Curfman McInnes /*
138798bf22SSatish Balay   MatStashCreate_Private - Creates a stash,currently used for all the parallel
144c1ff481SSatish Balay   matrix implementations. The stash is where elements of a matrix destined
154c1ff481SSatish Balay   to be stored on other processors are kept until matrix assembly is done.
169417f4adSLois Curfman McInnes 
174c1ff481SSatish Balay   This is a simple minded stash. Simply adds entries to end of stash.
184c1ff481SSatish Balay 
194c1ff481SSatish Balay   Input Parameters:
204c1ff481SSatish Balay   comm - communicator, required for scatters.
214c1ff481SSatish Balay   bs   - stash block size. used when stashing blocks of values
224c1ff481SSatish Balay 
234c1ff481SSatish Balay   Output Parameters:
244c1ff481SSatish Balay   stash    - the newly created stash
259417f4adSLois Curfman McInnes */
264a2ae208SSatish Balay #undef __FUNCT__
274a2ae208SSatish Balay #define __FUNCT__ "MatStashCreate_Private"
28c1ac3661SBarry Smith PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
299417f4adSLois Curfman McInnes {
30dfbe8321SBarry Smith   PetscErrorCode ierr;
31c1ac3661SBarry Smith   PetscInt       max,*opt,nopt;
32f1af5d2fSBarry Smith   PetscTruth     flg;
33bc5ccf88SSatish Balay 
343a40ed3dSBarry Smith   PetscFunctionBegin;
35bc5ccf88SSatish Balay   /* Require 2 tags,get the second using PetscCommGetNewTag() */
36752ec6e0SSatish Balay   stash->comm = comm;
37752ec6e0SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
38a2d1c673SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
39a2d1c673SSatish Balay   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
40a2d1c673SSatish Balay   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
41bc5ccf88SSatish Balay 
42434d7ff9SSatish Balay   nopt = stash->size;
43d7d82daaSBarry Smith   ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr);
44b0a32e0cSBarry Smith   ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
45434d7ff9SSatish Balay   if (flg) {
46434d7ff9SSatish Balay     if (nopt == 1)                max = opt[0];
47434d7ff9SSatish Balay     else if (nopt == stash->size) max = opt[stash->rank];
48434d7ff9SSatish Balay     else if (stash->rank < nopt)  max = opt[stash->rank];
49f4ab19daSSatish Balay     else                          max = 0; /* Use default */
50434d7ff9SSatish Balay     stash->umax = max;
51434d7ff9SSatish Balay   } else {
52434d7ff9SSatish Balay     stash->umax = 0;
53434d7ff9SSatish Balay   }
54606d414cSSatish Balay   ierr = PetscFree(opt);CHKERRQ(ierr);
554c1ff481SSatish Balay   if (bs <= 0) bs = 1;
56a2d1c673SSatish Balay 
574c1ff481SSatish Balay   stash->bs       = bs;
589417f4adSLois Curfman McInnes   stash->nmax     = 0;
59434d7ff9SSatish Balay   stash->oldnmax  = 0;
609417f4adSLois Curfman McInnes   stash->n        = 0;
614c1ff481SSatish Balay   stash->reallocs = -1;
6275cae7c1SHong Zhang   stash->space_head = 0;
6375cae7c1SHong Zhang   stash->space      = 0;
649417f4adSLois Curfman McInnes 
65bc5ccf88SSatish Balay   stash->send_waits  = 0;
66bc5ccf88SSatish Balay   stash->recv_waits  = 0;
67a2d1c673SSatish Balay   stash->send_status = 0;
68bc5ccf88SSatish Balay   stash->nsends      = 0;
69bc5ccf88SSatish Balay   stash->nrecvs      = 0;
70bc5ccf88SSatish Balay   stash->svalues     = 0;
71bc5ccf88SSatish Balay   stash->rvalues     = 0;
72563fb871SSatish Balay   stash->rindices    = 0;
73a2d1c673SSatish Balay   stash->nprocs      = 0;
74a2d1c673SSatish Balay   stash->nprocessed  = 0;
753a40ed3dSBarry Smith   PetscFunctionReturn(0);
769417f4adSLois Curfman McInnes }
779417f4adSLois Curfman McInnes 
784c1ff481SSatish Balay /*
798798bf22SSatish Balay    MatStashDestroy_Private - Destroy the stash
804c1ff481SSatish Balay */
814a2ae208SSatish Balay #undef __FUNCT__
824a2ae208SSatish Balay #define __FUNCT__ "MatStashDestroy_Private"
83dfbe8321SBarry Smith PetscErrorCode MatStashDestroy_Private(MatStash *stash)
849417f4adSLois Curfman McInnes {
85dfbe8321SBarry Smith   PetscErrorCode ierr;
86a2d1c673SSatish Balay 
87bc5ccf88SSatish Balay   PetscFunctionBegin;
8875cae7c1SHong Zhang   if (stash->space_head){
8975cae7c1SHong Zhang     ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr);
9075cae7c1SHong Zhang     stash->space_head = 0;
9175cae7c1SHong Zhang   }
92bc5ccf88SSatish Balay   PetscFunctionReturn(0);
93bc5ccf88SSatish Balay }
94bc5ccf88SSatish Balay 
954c1ff481SSatish Balay /*
968798bf22SSatish Balay    MatStashScatterEnd_Private - This is called as the fial stage of
974c1ff481SSatish Balay    scatter. The final stages of messagepassing is done here, and
984c1ff481SSatish Balay    all the memory used for messagepassing is cleanedu up. This
994c1ff481SSatish Balay    routine also resets the stash, and deallocates the memory used
1004c1ff481SSatish Balay    for the stash. It also keeps track of the current memory usage
1014c1ff481SSatish Balay    so that the same value can be used the next time through.
1024c1ff481SSatish Balay */
1034a2ae208SSatish Balay #undef __FUNCT__
1044a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterEnd_Private"
105dfbe8321SBarry Smith PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
106bc5ccf88SSatish Balay {
1076849ba73SBarry Smith   PetscErrorCode ierr;
108*5bd3b8fbSHong Zhang   PetscInt       nsends=stash->nsends,bs2,oldnmax;
109a2d1c673SSatish Balay   MPI_Status     *send_status;
110a2d1c673SSatish Balay 
1113a40ed3dSBarry Smith   PetscFunctionBegin;
112a2d1c673SSatish Balay   /* wait on sends */
113a2d1c673SSatish Balay   if (nsends) {
11482502324SSatish Balay     ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
115a2d1c673SSatish Balay     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
116606d414cSSatish Balay     ierr = PetscFree(send_status);CHKERRQ(ierr);
117a2d1c673SSatish Balay   }
118a2d1c673SSatish Balay 
119c0c58ca7SSatish Balay   /* Now update nmaxold to be app 10% more than max n used, this way the
120434d7ff9SSatish Balay      wastage of space is reduced the next time this stash is used.
121434d7ff9SSatish Balay      Also update the oldmax, only if it increases */
122b9b97703SBarry Smith   if (stash->n) {
12394b769a5SSatish Balay     bs2      = stash->bs*stash->bs;
1248a9378f0SSatish Balay     oldnmax  = ((int)(stash->n * 1.1) + 5)*bs2;
125434d7ff9SSatish Balay     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
126b9b97703SBarry Smith   }
127434d7ff9SSatish Balay 
128d07ff455SSatish Balay   stash->nmax       = 0;
129d07ff455SSatish Balay   stash->n          = 0;
1304c1ff481SSatish Balay   stash->reallocs   = -1;
131a2d1c673SSatish Balay   stash->nprocessed = 0;
13275cae7c1SHong Zhang   if (stash->space_head){
13375cae7c1SHong Zhang     ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr);
13475cae7c1SHong Zhang     stash->space_head = 0;
13575cae7c1SHong Zhang   }
136606d414cSSatish Balay   if (stash->send_waits) {
137606d414cSSatish Balay     ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
138606d414cSSatish Balay     stash->send_waits = 0;
139606d414cSSatish Balay   }
140606d414cSSatish Balay   if (stash->recv_waits) {
141606d414cSSatish Balay     ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
142606d414cSSatish Balay     stash->recv_waits = 0;
143606d414cSSatish Balay   }
144606d414cSSatish Balay   if (stash->svalues) {
145606d414cSSatish Balay     ierr = PetscFree(stash->svalues);CHKERRQ(ierr);
146606d414cSSatish Balay     stash->svalues = 0;
147606d414cSSatish Balay   }
148606d414cSSatish Balay   if (stash->rvalues) {
149606d414cSSatish Balay     ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
150606d414cSSatish Balay     stash->rvalues = 0;
151606d414cSSatish Balay   }
152563fb871SSatish Balay   if (stash->rindices) {
153563fb871SSatish Balay     ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
154563fb871SSatish Balay     stash->rindices = 0;
155563fb871SSatish Balay   }
156606d414cSSatish Balay   if (stash->nprocs) {
157b22afee1SSatish Balay     ierr = PetscFree(stash->nprocs);CHKERRQ(ierr);
158606d414cSSatish Balay     stash->nprocs = 0;
159606d414cSSatish Balay   }
1603a40ed3dSBarry Smith   PetscFunctionReturn(0);
1619417f4adSLois Curfman McInnes }
1629417f4adSLois Curfman McInnes 
1634c1ff481SSatish Balay /*
1648798bf22SSatish Balay    MatStashGetInfo_Private - Gets the relavant statistics of the stash
1654c1ff481SSatish Balay 
1664c1ff481SSatish Balay    Input Parameters:
1674c1ff481SSatish Balay    stash    - the stash
16894b769a5SSatish Balay    nstash   - the size of the stash. Indicates the number of values stored.
1694c1ff481SSatish Balay    reallocs - the number of additional mallocs incurred.
1704c1ff481SSatish Balay 
1714c1ff481SSatish Balay */
1724a2ae208SSatish Balay #undef __FUNCT__
1734a2ae208SSatish Balay #define __FUNCT__ "MatStashGetInfo_Private"
174c1ac3661SBarry Smith PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
17597530c3fSBarry Smith {
176c1ac3661SBarry Smith   PetscInt bs2 = stash->bs*stash->bs;
17794b769a5SSatish Balay 
1783a40ed3dSBarry Smith   PetscFunctionBegin;
1791ecfd215SBarry Smith   if (nstash) *nstash   = stash->n*bs2;
1801ecfd215SBarry Smith   if (reallocs) {
181434d7ff9SSatish Balay     if (stash->reallocs < 0) *reallocs = 0;
182434d7ff9SSatish Balay     else                     *reallocs = stash->reallocs;
1831ecfd215SBarry Smith   }
184bc5ccf88SSatish Balay   PetscFunctionReturn(0);
185bc5ccf88SSatish Balay }
1864c1ff481SSatish Balay 
1874c1ff481SSatish Balay /*
1888798bf22SSatish Balay    MatStashSetInitialSize_Private - Sets the initial size of the stash
1894c1ff481SSatish Balay 
1904c1ff481SSatish Balay    Input Parameters:
1914c1ff481SSatish Balay    stash  - the stash
1924c1ff481SSatish Balay    max    - the value that is used as the max size of the stash.
1934c1ff481SSatish Balay             this value is used while allocating memory.
1944c1ff481SSatish Balay */
1954a2ae208SSatish Balay #undef __FUNCT__
1964a2ae208SSatish Balay #define __FUNCT__ "MatStashSetInitialSize_Private"
197c1ac3661SBarry Smith PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
198bc5ccf88SSatish Balay {
199bc5ccf88SSatish Balay   PetscFunctionBegin;
200434d7ff9SSatish Balay   stash->umax = max;
2013a40ed3dSBarry Smith   PetscFunctionReturn(0);
20297530c3fSBarry Smith }
20397530c3fSBarry Smith 
2048798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called
2054c1ff481SSatish Balay    when the space in the stash is not sufficient to add the new values
2064c1ff481SSatish Balay    being inserted into the stash.
2074c1ff481SSatish Balay 
2084c1ff481SSatish Balay    Input Parameters:
2094c1ff481SSatish Balay    stash - the stash
2104c1ff481SSatish Balay    incr  - the minimum increase requested
2114c1ff481SSatish Balay 
2124c1ff481SSatish Balay    Notes:
2134c1ff481SSatish Balay    This routine doubles the currently used memory.
2144c1ff481SSatish Balay  */
2154a2ae208SSatish Balay #undef __FUNCT__
2164a2ae208SSatish Balay #define __FUNCT__ "MatStashExpand_Private"
217c1ac3661SBarry Smith static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
2189417f4adSLois Curfman McInnes {
2196849ba73SBarry Smith   PetscErrorCode ierr;
220*5bd3b8fbSHong Zhang   PetscInt       newnmax,bs2= stash->bs*stash->bs;
2219417f4adSLois Curfman McInnes 
2223a40ed3dSBarry Smith   PetscFunctionBegin;
2239417f4adSLois Curfman McInnes   /* allocate a larger stash */
224c481ceb5SSatish Balay   if (!stash->oldnmax && !stash->nmax) { /* new stash */
225434d7ff9SSatish Balay     if (stash->umax)                  newnmax = stash->umax/bs2;
226434d7ff9SSatish Balay     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
227c481ceb5SSatish Balay   } else if (!stash->nmax) { /* resuing stash */
228434d7ff9SSatish Balay     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
229434d7ff9SSatish Balay     else                              newnmax = stash->oldnmax/bs2;
230434d7ff9SSatish Balay   } else                              newnmax = stash->nmax*2;
2314c1ff481SSatish Balay   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
232d07ff455SSatish Balay 
23375cae7c1SHong Zhang   /* Get a MatStashSpace and attach it to stash */
23475cae7c1SHong Zhang   if (!stash->nmax) { /* new stash or resuing stash->oldnmax */
23575cae7c1SHong Zhang     ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space_head);CHKERRQ(ierr);
23675cae7c1SHong Zhang     stash->space = stash->space_head;
23775cae7c1SHong Zhang   } else {
23875cae7c1SHong Zhang     ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr);
23975cae7c1SHong Zhang   }
240bc5ccf88SSatish Balay   stash->reallocs++;
24175cae7c1SHong Zhang   stash->nmax = newnmax;
242bc5ccf88SSatish Balay   PetscFunctionReturn(0);
243bc5ccf88SSatish Balay }
244bc5ccf88SSatish Balay /*
2458798bf22SSatish Balay   MatStashValuesRow_Private - inserts values into the stash. This function
2464c1ff481SSatish Balay   expects the values to be roworiented. Multiple columns belong to the same row
2474c1ff481SSatish Balay   can be inserted with a single call to this function.
2484c1ff481SSatish Balay 
2494c1ff481SSatish Balay   Input Parameters:
2504c1ff481SSatish Balay   stash  - the stash
2514c1ff481SSatish Balay   row    - the global row correspoiding to the values
2524c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
2534c1ff481SSatish Balay   idxn   - the global column indices corresponding to each of the values.
2544c1ff481SSatish Balay   values - the values inserted
255bc5ccf88SSatish Balay */
2564a2ae208SSatish Balay #undef __FUNCT__
2574a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRow_Private"
258c1ac3661SBarry Smith PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[])
259bc5ccf88SSatish Balay {
260dfbe8321SBarry Smith   PetscErrorCode     ierr;
26175cae7c1SHong Zhang   PetscInt           i,k;
26275cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
263bc5ccf88SSatish Balay 
264bc5ccf88SSatish Balay   PetscFunctionBegin;
2654c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
26675cae7c1SHong Zhang   if (!space || space->local_remaining < n){
2678798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
2689417f4adSLois Curfman McInnes   }
26975cae7c1SHong Zhang   space = stash->space;
27075cae7c1SHong Zhang   k     = space->local_used;
2714c1ff481SSatish Balay   for (i=0; i<n; i++) {
27275cae7c1SHong Zhang     space->idx[k] = row;
27375cae7c1SHong Zhang     space->idy[k] = idxn[i];
27475cae7c1SHong Zhang     space->val[k] = values[i];
27575cae7c1SHong Zhang     k++;
2769417f4adSLois Curfman McInnes   }
277*5bd3b8fbSHong Zhang   stash->n               += n;
27875cae7c1SHong Zhang   space->local_used      += n;
27975cae7c1SHong Zhang   space->local_remaining -= n;
280a2d1c673SSatish Balay   PetscFunctionReturn(0);
281a2d1c673SSatish Balay }
28275cae7c1SHong Zhang 
2834c1ff481SSatish Balay /*
2848798bf22SSatish Balay   MatStashValuesCol_Private - inserts values into the stash. This function
2854c1ff481SSatish Balay   expects the values to be columnoriented. Multiple columns belong to the same row
2864c1ff481SSatish Balay   can be inserted with a single call to this function.
287a2d1c673SSatish Balay 
2884c1ff481SSatish Balay   Input Parameters:
2894c1ff481SSatish Balay   stash   - the stash
2904c1ff481SSatish Balay   row     - the global row correspoiding to the values
2914c1ff481SSatish Balay   n       - the number of elements inserted. All elements belong to the above row.
2924c1ff481SSatish Balay   idxn    - the global column indices corresponding to each of the values.
2934c1ff481SSatish Balay   values  - the values inserted
2944c1ff481SSatish Balay   stepval - the consecutive values are sepated by a distance of stepval.
2954c1ff481SSatish Balay             this happens because the input is columnoriented.
2964c1ff481SSatish Balay */
2974a2ae208SSatish Balay #undef __FUNCT__
2984a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesCol_Private"
299c1ac3661SBarry Smith PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt stepval)
300a2d1c673SSatish Balay {
301dfbe8321SBarry Smith   PetscErrorCode     ierr;
30275cae7c1SHong Zhang   PetscInt           i,k;
30375cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
304a2d1c673SSatish Balay 
3054c1ff481SSatish Balay   PetscFunctionBegin;
3064c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
30775cae7c1SHong Zhang   if (!space || space->local_remaining < n){
3088798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
3094c1ff481SSatish Balay   }
31075cae7c1SHong Zhang   space = stash->space;
31175cae7c1SHong Zhang   k = space->local_used;
3124c1ff481SSatish Balay   for (i=0; i<n; i++) {
31375cae7c1SHong Zhang     space->idx[k] = row;
31475cae7c1SHong Zhang     space->idy[k] = idxn[i];
31575cae7c1SHong Zhang     space->val[k] = values[i*stepval];
31675cae7c1SHong Zhang     k++;
3174c1ff481SSatish Balay   }
318*5bd3b8fbSHong Zhang   stash->n               += n;
31975cae7c1SHong Zhang   space->local_used      += n;
32075cae7c1SHong Zhang   space->local_remaining -= n;
3214c1ff481SSatish Balay   PetscFunctionReturn(0);
3224c1ff481SSatish Balay }
3234c1ff481SSatish Balay 
3244c1ff481SSatish Balay /*
3258798bf22SSatish Balay   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
3264c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3274c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3284c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3294c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3304c1ff481SSatish Balay 
3314c1ff481SSatish Balay   Input Parameters:
3324c1ff481SSatish Balay   stash  - the stash
3334c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3344c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3354c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3364c1ff481SSatish Balay            values. Each block is of size bs*bs.
3374c1ff481SSatish Balay   values - the values inserted
3384c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3394c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3404c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3414c1ff481SSatish Balay */
3424a2ae208SSatish Balay #undef __FUNCT__
3434a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRowBlocked_Private"
344c1ac3661SBarry Smith PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
3454c1ff481SSatish Balay {
346dfbe8321SBarry Smith   PetscErrorCode     ierr;
34775cae7c1SHong Zhang   PetscInt           i,j,k,bs2,bs=stash->bs,l;
348f15d580aSBarry Smith   const MatScalar    *vals;
349f15d580aSBarry Smith   MatScalar          *array;
35075cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
351a2d1c673SSatish Balay 
352a2d1c673SSatish Balay   PetscFunctionBegin;
35375cae7c1SHong Zhang   if (!space || space->local_remaining < n){
3548798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
355a2d1c673SSatish Balay   }
35675cae7c1SHong Zhang   space = stash->space;
35775cae7c1SHong Zhang   l     = space->local_used;
35875cae7c1SHong Zhang   bs2   = bs*bs;
3594c1ff481SSatish Balay   for (i=0; i<n; i++) {
36075cae7c1SHong Zhang     space->idx[l] = row;
36175cae7c1SHong Zhang     space->idy[l] = idxn[i];
36275cae7c1SHong Zhang     /* Now copy over the block of values. Store the values column oriented.
36375cae7c1SHong Zhang        This enables inserting multiple blocks belonging to a row with a single
36475cae7c1SHong Zhang        funtion call */
36575cae7c1SHong Zhang     array = space->val + bs2*l;
36675cae7c1SHong Zhang     vals  = values + idx*bs2*n + bs*i;
36775cae7c1SHong Zhang     for (j=0; j<bs; j++) {
36875cae7c1SHong Zhang       for (k=0; k<bs; k++) array[k*bs] = vals[k];
36975cae7c1SHong Zhang       array++;
37075cae7c1SHong Zhang       vals  += cmax*bs;
37175cae7c1SHong Zhang     }
37275cae7c1SHong Zhang     l++;
373a2d1c673SSatish Balay   }
374*5bd3b8fbSHong Zhang   stash->n               += n;
37575cae7c1SHong Zhang   space->local_used      += n;
37675cae7c1SHong Zhang   space->local_remaining -= n;
3774c1ff481SSatish Balay   PetscFunctionReturn(0);
3784c1ff481SSatish Balay }
3794c1ff481SSatish Balay 
3804c1ff481SSatish Balay /*
3818798bf22SSatish Balay   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
3824c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3834c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3844c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3854c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3864c1ff481SSatish Balay 
3874c1ff481SSatish Balay   Input Parameters:
3884c1ff481SSatish Balay   stash  - the stash
3894c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3904c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3914c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3924c1ff481SSatish Balay            values. Each block is of size bs*bs.
3934c1ff481SSatish Balay   values - the values inserted
3944c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3954c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3964c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3974c1ff481SSatish Balay */
3984a2ae208SSatish Balay #undef __FUNCT__
3994a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesColBlocked_Private"
400c1ac3661SBarry Smith PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
4014c1ff481SSatish Balay {
402dfbe8321SBarry Smith   PetscErrorCode  ierr;
40375cae7c1SHong Zhang   PetscInt        i,j,k,bs2,bs=stash->bs,l;
404f15d580aSBarry Smith   const MatScalar *vals;
405f15d580aSBarry Smith   MatScalar       *array;
40675cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
4074c1ff481SSatish Balay 
4084c1ff481SSatish Balay   PetscFunctionBegin;
40975cae7c1SHong Zhang   if (!space || space->local_remaining < n){
4108798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
4114c1ff481SSatish Balay   }
41275cae7c1SHong Zhang   space = stash->space;
41375cae7c1SHong Zhang   l     = space->local_used;
41475cae7c1SHong Zhang   bs2   = bs*bs;
4154c1ff481SSatish Balay   for (i=0; i<n; i++) {
41675cae7c1SHong Zhang     space->idx[l] = row;
41775cae7c1SHong Zhang     space->idy[l] = idxn[i];
41875cae7c1SHong Zhang     /* Now copy over the block of values. Store the values column oriented.
41975cae7c1SHong Zhang      This enables inserting multiple blocks belonging to a row with a single
42075cae7c1SHong Zhang      funtion call */
42175cae7c1SHong Zhang     array = space->val + bs2*l;
42275cae7c1SHong Zhang     vals  = values + idx*bs2*n + bs*i;
42375cae7c1SHong Zhang     for (j=0; j<bs; j++) {
42475cae7c1SHong Zhang       for (k=0; k<bs; k++) {array[k] = vals[k];}
42575cae7c1SHong Zhang       array += bs;
42675cae7c1SHong Zhang       vals  += rmax*bs;
42775cae7c1SHong Zhang     }
428*5bd3b8fbSHong Zhang     l++;
429a2d1c673SSatish Balay   }
430*5bd3b8fbSHong Zhang   stash->n               += n;
43175cae7c1SHong Zhang   space->local_used      += n;
43275cae7c1SHong Zhang   space->local_remaining -= n;
4333a40ed3dSBarry Smith   PetscFunctionReturn(0);
4349417f4adSLois Curfman McInnes }
4354c1ff481SSatish Balay /*
4368798bf22SSatish Balay   MatStashScatterBegin_Private - Initiates the transfer of values to the
4374c1ff481SSatish Balay   correct owners. This function goes through the stash, and check the
4384c1ff481SSatish Balay   owners of each stashed value, and sends the values off to the owner
4394c1ff481SSatish Balay   processors.
440bc5ccf88SSatish Balay 
4414c1ff481SSatish Balay   Input Parameters:
4424c1ff481SSatish Balay   stash  - the stash
4434c1ff481SSatish Balay   owners - an array of size 'no-of-procs' which gives the ownership range
4444c1ff481SSatish Balay            for each node.
4454c1ff481SSatish Balay 
4464c1ff481SSatish Balay   Notes: The 'owners' array in the cased of the blocked-stash has the
4474c1ff481SSatish Balay   ranges specified blocked global indices, and for the regular stash in
4484c1ff481SSatish Balay   the proper global indices.
4494c1ff481SSatish Balay */
4504a2ae208SSatish Balay #undef __FUNCT__
4514a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterBegin_Private"
452c1ac3661SBarry Smith PetscErrorCode MatStashScatterBegin_Private(MatStash *stash,PetscInt *owners)
453bc5ccf88SSatish Balay {
454c1ac3661SBarry Smith   PetscInt       *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
455fe09c992SBarry Smith   PetscInt       size=stash->size,nsends;
4566849ba73SBarry Smith   PetscErrorCode ierr;
45775cae7c1SHong Zhang   PetscInt       count,*sindices,**rindices,i,j,idx,lastidx,l;
458563fb871SSatish Balay   MatScalar      **rvalues,*svalues;
459bc5ccf88SSatish Balay   MPI_Comm       comm = stash->comm;
460563fb871SSatish Balay   MPI_Request    *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
461fe09c992SBarry Smith   PetscMPIInt    *nprocs,*nlengths,nreceives;
462*5bd3b8fbSHong Zhang   PetscInt       *sp_idx,*sp_idy;
463*5bd3b8fbSHong Zhang   MatScalar      *sp_val;
464*5bd3b8fbSHong Zhang   PetscMatStashSpace space,space_next;
465bc5ccf88SSatish Balay 
466bc5ccf88SSatish Balay   PetscFunctionBegin;
4674c1ff481SSatish Balay   bs2 = stash->bs*stash->bs;
46875cae7c1SHong Zhang 
469bc5ccf88SSatish Balay   /*  first count number of contributors to each processor */
470fe09c992SBarry Smith   ierr  = PetscMalloc(2*size*sizeof(PetscMPIInt),&nprocs);CHKERRQ(ierr);
471fe09c992SBarry Smith   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscMPIInt));CHKERRQ(ierr);
472c1ac3661SBarry Smith   ierr  = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);
473a2d1c673SSatish Balay 
474563fb871SSatish Balay   nlengths = nprocs+size;
47575cae7c1SHong Zhang   i = j    = 0;
4767357eb19SBarry Smith   lastidx  = -1;
477*5bd3b8fbSHong Zhang   space    = stash->space_head;
47875cae7c1SHong Zhang   while (space != PETSC_NULL){
47975cae7c1SHong Zhang     space_next = space->next;
480*5bd3b8fbSHong Zhang     sp_idx     = space->idx;
48175cae7c1SHong Zhang     for (l=0; l<space->local_used; l++){
4827357eb19SBarry Smith       /* if indices are NOT locally sorted, need to start search at the beginning */
483*5bd3b8fbSHong Zhang       if (lastidx > (idx = sp_idx[l])) j = 0;
4847357eb19SBarry Smith       lastidx = idx;
4857357eb19SBarry Smith       for (; j<size; j++) {
4864c1ff481SSatish Balay         if (idx >= owners[j] && idx < owners[j+1]) {
487563fb871SSatish Balay           nlengths[j]++; owner[i] = j; break;
488bc5ccf88SSatish Balay         }
489bc5ccf88SSatish Balay       }
49075cae7c1SHong Zhang       i++;
49175cae7c1SHong Zhang     }
49275cae7c1SHong Zhang     space = space_next;
493bc5ccf88SSatish Balay   }
494563fb871SSatish Balay   /* Now check what procs get messages - and compute nsends. */
495563fb871SSatish Balay   for (i=0, nsends=0 ; i<size; i++) {
496563fb871SSatish Balay     if (nlengths[i]) { nprocs[i] = 1; nsends ++;}
497563fb871SSatish Balay   }
498bc5ccf88SSatish Balay 
499563fb871SSatish Balay   { int  *onodes,*olengths;
500563fb871SSatish Balay   /* Determine the number of messages to expect, their lengths, from from-ids */
501563fb871SSatish Balay   ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr);
502563fb871SSatish Balay   ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
503563fb871SSatish Balay   /* since clubbing row,col - lengths are multiplied by 2 */
504563fb871SSatish Balay   for (i=0; i<nreceives; i++) olengths[i] *=2;
505563fb871SSatish Balay   ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
506563fb871SSatish Balay   /* values are size 'bs2' lengths (and remove earlier factor 2 */
507563fb871SSatish Balay   for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
508563fb871SSatish Balay   ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
509563fb871SSatish Balay   ierr = PetscFree(onodes);CHKERRQ(ierr);
510563fb871SSatish Balay   ierr = PetscFree(olengths);CHKERRQ(ierr);
511bc5ccf88SSatish Balay   }
512bc5ccf88SSatish Balay 
513bc5ccf88SSatish Balay   /* do sends:
514bc5ccf88SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
515bc5ccf88SSatish Balay          the ith processor
516bc5ccf88SSatish Balay   */
517c1ac3661SBarry Smith   ierr     = PetscMalloc((stash->n+1)*(bs2*sizeof(MatScalar)+2*sizeof(PetscInt)),&svalues);CHKERRQ(ierr);
518c1ac3661SBarry Smith   sindices = (PetscInt*)(svalues + bs2*stash->n);
519b0a32e0cSBarry Smith   ierr     = PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
520c1ac3661SBarry Smith   ierr     = PetscMalloc(2*size*sizeof(PetscInt),&startv);CHKERRQ(ierr);
521bc5ccf88SSatish Balay   starti   = startv + size;
522a2d1c673SSatish Balay   /* use 2 sends the first with all_a, the next with all_i and all_j */
523bc5ccf88SSatish Balay   startv[0]  = 0; starti[0] = 0;
524bc5ccf88SSatish Balay   for (i=1; i<size; i++) {
525563fb871SSatish Balay     startv[i] = startv[i-1] + nlengths[i-1];
526563fb871SSatish Balay     starti[i] = starti[i-1] + nlengths[i-1]*2;
527bc5ccf88SSatish Balay   }
52875cae7c1SHong Zhang 
52975cae7c1SHong Zhang   i     = 0;
530*5bd3b8fbSHong Zhang   space = stash->space_head;
53175cae7c1SHong Zhang   while (space != PETSC_NULL){
53275cae7c1SHong Zhang     space_next = space->next;
533*5bd3b8fbSHong Zhang     sp_idx = space->idx;
534*5bd3b8fbSHong Zhang     sp_idy = space->idy;
535*5bd3b8fbSHong Zhang     sp_val = space->val;
53675cae7c1SHong Zhang     for (l=0; l<space->local_used; l++){
537bc5ccf88SSatish Balay       j = owner[i];
538a2d1c673SSatish Balay       if (bs2 == 1) {
539*5bd3b8fbSHong Zhang         svalues[startv[j]] = sp_val[l];
540a2d1c673SSatish Balay       } else {
541c1ac3661SBarry Smith         PetscInt  k;
5423eda8832SBarry Smith         MatScalar *buf1,*buf2;
5434c1ff481SSatish Balay         buf1 = svalues+bs2*startv[j];
544*5bd3b8fbSHong Zhang         buf2 = space->val + bs2*i;
5454c1ff481SSatish Balay         for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
546a2d1c673SSatish Balay       }
547*5bd3b8fbSHong Zhang       sindices[starti[j]]             = sp_idx[l];
548*5bd3b8fbSHong Zhang       sindices[starti[j]+nlengths[j]] = sp_idy[l];
549bc5ccf88SSatish Balay       startv[j]++;
550bc5ccf88SSatish Balay       starti[j]++;
55175cae7c1SHong Zhang       i++;
55275cae7c1SHong Zhang     }
55375cae7c1SHong Zhang     space = space_next;
554bc5ccf88SSatish Balay   }
555bc5ccf88SSatish Balay   startv[0] = 0;
556563fb871SSatish Balay   for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];}
557e5d0e772SSatish Balay 
558bc5ccf88SSatish Balay   for (i=0,count=0; i<size; i++) {
559563fb871SSatish Balay     if (nprocs[i]) {
560563fb871SSatish Balay       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
561563fb871SSatish Balay       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_MATSCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
562bc5ccf88SSatish Balay     }
563b85c94c3SSatish Balay   }
5645bcf5ddbSSatish Balay #if defined(PETSC_USE_VERBOSE)
56509f3b4e5SSatish Balay   ierr = PetscVerboseInfo((0,"MatStashScatterBegin_Private: No of messages: %d \n",nsends));CHKERRQ(ierr);
566e5d0e772SSatish Balay   for (i=0; i<size; i++) {
567e5d0e772SSatish Balay     if (nprocs[i]) {
56809f3b4e5SSatish Balay       ierr = PetscVerboseInfo((0,"MatStashScatterBegin_Private: Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
569e5d0e772SSatish Balay     }
570e5d0e772SSatish Balay   }
571e5d0e772SSatish Balay #endif
572606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
573606d414cSSatish Balay   ierr = PetscFree(startv);CHKERRQ(ierr);
574a2d1c673SSatish Balay   /* This memory is reused in scatter end  for a different purpose*/
575a2d1c673SSatish Balay   for (i=0; i<2*size; i++) nprocs[i] = -1;
576a2d1c673SSatish Balay   stash->nprocs = nprocs;
577a2d1c673SSatish Balay 
578563fb871SSatish Balay   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
579563fb871SSatish Balay   ierr  = PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
580563fb871SSatish Balay 
581563fb871SSatish Balay   for (i=0; i<nreceives; i++) {
582563fb871SSatish Balay     recv_waits[2*i]   = recv_waits1[i];
583563fb871SSatish Balay     recv_waits[2*i+1] = recv_waits2[i];
584563fb871SSatish Balay   }
585563fb871SSatish Balay   stash->recv_waits = recv_waits;
586563fb871SSatish Balay   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
587563fb871SSatish Balay   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
588563fb871SSatish Balay 
589bc5ccf88SSatish Balay   stash->svalues    = svalues;    stash->rvalues     = rvalues;
590563fb871SSatish Balay   stash->rindices   = rindices;   stash->send_waits  = send_waits;
591bc5ccf88SSatish Balay   stash->nsends     = nsends;     stash->nrecvs      = nreceives;
592bc5ccf88SSatish Balay   PetscFunctionReturn(0);
593bc5ccf88SSatish Balay }
594bc5ccf88SSatish Balay 
595a2d1c673SSatish Balay /*
5968798bf22SSatish Balay    MatStashScatterGetMesg_Private - This function waits on the receives posted
5978798bf22SSatish Balay    in the function MatStashScatterBegin_Private() and returns one message at
5984c1ff481SSatish Balay    a time to the calling function. If no messages are left, it indicates this
5994c1ff481SSatish Balay    by setting flg = 0, else it sets flg = 1.
6004c1ff481SSatish Balay 
6014c1ff481SSatish Balay    Input Parameters:
6024c1ff481SSatish Balay    stash - the stash
6034c1ff481SSatish Balay 
6044c1ff481SSatish Balay    Output Parameters:
6054c1ff481SSatish Balay    nvals - the number of entries in the current message.
6064c1ff481SSatish Balay    rows  - an array of row indices (or blocked indices) corresponding to the values
6074c1ff481SSatish Balay    cols  - an array of columnindices (or blocked indices) corresponding to the values
6084c1ff481SSatish Balay    vals  - the values
6094c1ff481SSatish Balay    flg   - 0 indicates no more message left, and the current call has no values associated.
6104c1ff481SSatish Balay            1 indicates that the current call successfully received a message, and the
6114c1ff481SSatish Balay              other output parameters nvals,rows,cols,vals are set appropriately.
612a2d1c673SSatish Balay */
6134a2ae208SSatish Balay #undef __FUNCT__
6144a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterGetMesg_Private"
615c1ac3661SBarry Smith PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,MatScalar **vals,PetscInt *flg)
616bc5ccf88SSatish Balay {
6176849ba73SBarry Smith   PetscErrorCode ierr;
618fe09c992SBarry Smith   PetscMPIInt    i,*flg_v,i1,i2;
619fe09c992SBarry Smith   PetscInt       bs2;
620a2d1c673SSatish Balay   MPI_Status     recv_status;
621b0a32e0cSBarry Smith   PetscTruth     match_found = PETSC_FALSE;
622bc5ccf88SSatish Balay 
623bc5ccf88SSatish Balay   PetscFunctionBegin;
624bc5ccf88SSatish Balay 
625a2d1c673SSatish Balay   *flg = 0; /* When a message is discovered this is reset to 1 */
626a2d1c673SSatish Balay   /* Return if no more messages to process */
627a2d1c673SSatish Balay   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
628a2d1c673SSatish Balay 
629a2d1c673SSatish Balay   flg_v = stash->nprocs;
6304c1ff481SSatish Balay   bs2   = stash->bs*stash->bs;
631a2d1c673SSatish Balay   /* If a matching pair of receieves are found, process them, and return the data to
632a2d1c673SSatish Balay      the calling function. Until then keep receiving messages */
633a2d1c673SSatish Balay   while (!match_found) {
634a2d1c673SSatish Balay     ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
635a2d1c673SSatish Balay     /* Now pack the received message into a structure which is useable by others */
636a2d1c673SSatish Balay     if (i % 2) {
6373eda8832SBarry Smith       ierr = MPI_Get_count(&recv_status,MPIU_MATSCALAR,nvals);CHKERRQ(ierr);
638c1dc657dSBarry Smith       flg_v[2*recv_status.MPI_SOURCE] = i/2;
639a2d1c673SSatish Balay       *nvals = *nvals/bs2;
640563fb871SSatish Balay     } else {
641563fb871SSatish Balay       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
642563fb871SSatish Balay       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
643563fb871SSatish Balay       *nvals = *nvals/2; /* This message has both row indices and col indices */
644bc5ccf88SSatish Balay     }
645a2d1c673SSatish Balay 
646a2d1c673SSatish Balay     /* Check if we have both the messages from this proc */
647c1dc657dSBarry Smith     i1 = flg_v[2*recv_status.MPI_SOURCE];
648c1dc657dSBarry Smith     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
649a2d1c673SSatish Balay     if (i1 != -1 && i2 != -1) {
650563fb871SSatish Balay       *rows       = stash->rindices[i2];
651a2d1c673SSatish Balay       *cols       = *rows + *nvals;
652563fb871SSatish Balay       *vals       = stash->rvalues[i1];
653a2d1c673SSatish Balay       *flg        = 1;
654a2d1c673SSatish Balay       stash->nprocessed ++;
65535d8aa7fSBarry Smith       match_found = PETSC_TRUE;
656bc5ccf88SSatish Balay     }
657bc5ccf88SSatish Balay   }
658bc5ccf88SSatish Balay   PetscFunctionReturn(0);
659bc5ccf88SSatish Balay }
660