xref: /petsc/src/mat/utils/matstash.c (revision b45d2f2cb7e031d9c0de5873eca80614ca7b863b)
12d5177cdSBarry Smith 
2*b45d2f2cSJed Brown #include <petsc-private/matimpl.h>
35bd3b8fbSHong Zhang 
4bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE   10000
54c1ff481SSatish Balay 
69417f4adSLois Curfman McInnes /*
78798bf22SSatish Balay   MatStashCreate_Private - Creates a stash,currently used for all the parallel
84c1ff481SSatish Balay   matrix implementations. The stash is where elements of a matrix destined
94c1ff481SSatish Balay   to be stored on other processors are kept until matrix assembly is done.
109417f4adSLois Curfman McInnes 
114c1ff481SSatish Balay   This is a simple minded stash. Simply adds entries to end of stash.
124c1ff481SSatish Balay 
134c1ff481SSatish Balay   Input Parameters:
144c1ff481SSatish Balay   comm - communicator, required for scatters.
154c1ff481SSatish Balay   bs   - stash block size. used when stashing blocks of values
164c1ff481SSatish Balay 
174c1ff481SSatish Balay   Output Parameters:
184c1ff481SSatish Balay   stash    - the newly created stash
199417f4adSLois Curfman McInnes */
204a2ae208SSatish Balay #undef __FUNCT__
214a2ae208SSatish Balay #define __FUNCT__ "MatStashCreate_Private"
22c1ac3661SBarry Smith PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
239417f4adSLois Curfman McInnes {
24dfbe8321SBarry Smith   PetscErrorCode ierr;
25533163c2SBarry Smith   PetscInt       max,*opt,nopt,i;
26ace3abfcSBarry Smith   PetscBool      flg;
27bc5ccf88SSatish Balay 
283a40ed3dSBarry Smith   PetscFunctionBegin;
29bc5ccf88SSatish Balay   /* Require 2 tags,get the second using PetscCommGetNewTag() */
30752ec6e0SSatish Balay   stash->comm = comm;
31752ec6e0SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
32a2d1c673SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
33a2d1c673SSatish Balay   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
34a2d1c673SSatish Balay   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
35533163c2SBarry Smith   ierr  = PetscMalloc(2*stash->size*sizeof(PetscMPIInt),&stash->flg_v);CHKERRQ(ierr);
36533163c2SBarry Smith   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
37533163c2SBarry Smith 
38bc5ccf88SSatish Balay 
39434d7ff9SSatish Balay   nopt = stash->size;
40d7d82daaSBarry Smith   ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr);
41b0a32e0cSBarry Smith   ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
42434d7ff9SSatish Balay   if (flg) {
43434d7ff9SSatish Balay     if (nopt == 1)                max = opt[0];
44434d7ff9SSatish Balay     else if (nopt == stash->size) max = opt[stash->rank];
45434d7ff9SSatish Balay     else if (stash->rank < nopt)  max = opt[stash->rank];
46f4ab19daSSatish Balay     else                          max = 0; /* Use default */
47434d7ff9SSatish Balay     stash->umax = max;
48434d7ff9SSatish Balay   } else {
49434d7ff9SSatish Balay     stash->umax = 0;
50434d7ff9SSatish Balay   }
51606d414cSSatish Balay   ierr = PetscFree(opt);CHKERRQ(ierr);
524c1ff481SSatish Balay   if (bs <= 0) bs = 1;
53a2d1c673SSatish Balay 
544c1ff481SSatish Balay   stash->bs       = bs;
559417f4adSLois Curfman McInnes   stash->nmax     = 0;
56434d7ff9SSatish Balay   stash->oldnmax  = 0;
579417f4adSLois Curfman McInnes   stash->n        = 0;
584c1ff481SSatish Balay   stash->reallocs = -1;
5975cae7c1SHong Zhang   stash->space_head = 0;
6075cae7c1SHong Zhang   stash->space      = 0;
619417f4adSLois Curfman McInnes 
62bc5ccf88SSatish Balay   stash->send_waits  = 0;
63bc5ccf88SSatish Balay   stash->recv_waits  = 0;
64a2d1c673SSatish Balay   stash->send_status = 0;
65bc5ccf88SSatish Balay   stash->nsends      = 0;
66bc5ccf88SSatish Balay   stash->nrecvs      = 0;
67bc5ccf88SSatish Balay   stash->svalues     = 0;
68bc5ccf88SSatish Balay   stash->rvalues     = 0;
69563fb871SSatish Balay   stash->rindices    = 0;
70a2d1c673SSatish Balay   stash->nprocessed  = 0;
7167318a8aSJed Brown 
7267318a8aSJed Brown   stash->reproduce   = PETSC_FALSE;
73acfcf0e5SJed Brown   ierr = PetscOptionsGetBool(PETSC_NULL,"-matstash_reproduce",&stash->reproduce,PETSC_NULL);CHKERRQ(ierr);
743a40ed3dSBarry Smith   PetscFunctionReturn(0);
759417f4adSLois Curfman McInnes }
769417f4adSLois Curfman McInnes 
774c1ff481SSatish Balay /*
788798bf22SSatish Balay    MatStashDestroy_Private - Destroy the stash
794c1ff481SSatish Balay */
804a2ae208SSatish Balay #undef __FUNCT__
814a2ae208SSatish Balay #define __FUNCT__ "MatStashDestroy_Private"
82dfbe8321SBarry Smith PetscErrorCode MatStashDestroy_Private(MatStash *stash)
839417f4adSLois Curfman McInnes {
84dfbe8321SBarry Smith   PetscErrorCode ierr;
85a2d1c673SSatish Balay 
86bc5ccf88SSatish Balay   PetscFunctionBegin;
876bf464f9SBarry Smith   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
8882740460SHong Zhang   stash->space = 0;
89533163c2SBarry Smith   ierr = PetscFree(stash->flg_v);CHKERRQ(ierr);
90bc5ccf88SSatish Balay   PetscFunctionReturn(0);
91bc5ccf88SSatish Balay }
92bc5ccf88SSatish Balay 
934c1ff481SSatish Balay /*
9467318a8aSJed Brown    MatStashScatterEnd_Private - This is called as the final stage of
954c1ff481SSatish Balay    scatter. The final stages of message passing is done here, and
9667318a8aSJed Brown    all the memory used for message passing is cleaned up. This
974c1ff481SSatish Balay    routine also resets the stash, and deallocates the memory used
984c1ff481SSatish Balay    for the stash. It also keeps track of the current memory usage
994c1ff481SSatish Balay    so that the same value can be used the next time through.
1004c1ff481SSatish Balay */
1014a2ae208SSatish Balay #undef __FUNCT__
1024a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterEnd_Private"
103dfbe8321SBarry Smith PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
104bc5ccf88SSatish Balay {
1056849ba73SBarry Smith   PetscErrorCode ierr;
106533163c2SBarry Smith   PetscInt       nsends=stash->nsends,bs2,oldnmax,i;
107a2d1c673SSatish Balay   MPI_Status     *send_status;
108a2d1c673SSatish Balay 
1093a40ed3dSBarry Smith   PetscFunctionBegin;
110533163c2SBarry Smith   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
111a2d1c673SSatish Balay   /* wait on sends */
112a2d1c673SSatish Balay   if (nsends) {
11382502324SSatish Balay     ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
114a2d1c673SSatish Balay     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
115606d414cSSatish Balay     ierr = PetscFree(send_status);CHKERRQ(ierr);
116a2d1c673SSatish Balay   }
117a2d1c673SSatish Balay 
118c0c58ca7SSatish Balay   /* Now update nmaxold to be app 10% more than max n used, this way the
119434d7ff9SSatish Balay      wastage of space is reduced the next time this stash is used.
120434d7ff9SSatish Balay      Also update the oldmax, only if it increases */
121b9b97703SBarry Smith   if (stash->n) {
12294b769a5SSatish Balay     bs2      = stash->bs*stash->bs;
1238a9378f0SSatish Balay     oldnmax  = ((int)(stash->n * 1.1) + 5)*bs2;
124434d7ff9SSatish Balay     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
125b9b97703SBarry Smith   }
126434d7ff9SSatish Balay 
127d07ff455SSatish Balay   stash->nmax       = 0;
128d07ff455SSatish Balay   stash->n          = 0;
1294c1ff481SSatish Balay   stash->reallocs   = -1;
130a2d1c673SSatish Balay   stash->nprocessed = 0;
1316bf464f9SBarry Smith   ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr);
13282740460SHong Zhang   stash->space      = 0;
133606d414cSSatish Balay   ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
134606d414cSSatish Balay   ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
135c05d87d6SBarry Smith   ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr);
136c05d87d6SBarry Smith   ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr);
137606d414cSSatish Balay   ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
138c05d87d6SBarry Smith   ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr);
139563fb871SSatish Balay   ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
1403a40ed3dSBarry Smith   PetscFunctionReturn(0);
1419417f4adSLois Curfman McInnes }
1429417f4adSLois Curfman McInnes 
1434c1ff481SSatish Balay /*
1448798bf22SSatish Balay    MatStashGetInfo_Private - Gets the relavant statistics of the stash
1454c1ff481SSatish Balay 
1464c1ff481SSatish Balay    Input Parameters:
1474c1ff481SSatish Balay    stash    - the stash
14894b769a5SSatish Balay    nstash   - the size of the stash. Indicates the number of values stored.
1494c1ff481SSatish Balay    reallocs - the number of additional mallocs incurred.
1504c1ff481SSatish Balay 
1514c1ff481SSatish Balay */
1524a2ae208SSatish Balay #undef __FUNCT__
1534a2ae208SSatish Balay #define __FUNCT__ "MatStashGetInfo_Private"
154c1ac3661SBarry Smith PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
15597530c3fSBarry Smith {
156c1ac3661SBarry Smith   PetscInt bs2 = stash->bs*stash->bs;
15794b769a5SSatish Balay 
1583a40ed3dSBarry Smith   PetscFunctionBegin;
1591ecfd215SBarry Smith   if (nstash) *nstash   = stash->n*bs2;
1601ecfd215SBarry Smith   if (reallocs) {
161434d7ff9SSatish Balay     if (stash->reallocs < 0) *reallocs = 0;
162434d7ff9SSatish Balay     else                     *reallocs = stash->reallocs;
1631ecfd215SBarry Smith   }
164bc5ccf88SSatish Balay   PetscFunctionReturn(0);
165bc5ccf88SSatish Balay }
1664c1ff481SSatish Balay 
1674c1ff481SSatish Balay /*
1688798bf22SSatish Balay    MatStashSetInitialSize_Private - Sets the initial size of the stash
1694c1ff481SSatish Balay 
1704c1ff481SSatish Balay    Input Parameters:
1714c1ff481SSatish Balay    stash  - the stash
1724c1ff481SSatish Balay    max    - the value that is used as the max size of the stash.
1734c1ff481SSatish Balay             this value is used while allocating memory.
1744c1ff481SSatish Balay */
1754a2ae208SSatish Balay #undef __FUNCT__
1764a2ae208SSatish Balay #define __FUNCT__ "MatStashSetInitialSize_Private"
177c1ac3661SBarry Smith PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
178bc5ccf88SSatish Balay {
179bc5ccf88SSatish Balay   PetscFunctionBegin;
180434d7ff9SSatish Balay   stash->umax = max;
1813a40ed3dSBarry Smith   PetscFunctionReturn(0);
18297530c3fSBarry Smith }
18397530c3fSBarry Smith 
1848798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called
1854c1ff481SSatish Balay    when the space in the stash is not sufficient to add the new values
1864c1ff481SSatish Balay    being inserted into the stash.
1874c1ff481SSatish Balay 
1884c1ff481SSatish Balay    Input Parameters:
1894c1ff481SSatish Balay    stash - the stash
1904c1ff481SSatish Balay    incr  - the minimum increase requested
1914c1ff481SSatish Balay 
1924c1ff481SSatish Balay    Notes:
1934c1ff481SSatish Balay    This routine doubles the currently used memory.
1944c1ff481SSatish Balay  */
1954a2ae208SSatish Balay #undef __FUNCT__
1964a2ae208SSatish Balay #define __FUNCT__ "MatStashExpand_Private"
197c1ac3661SBarry Smith static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
1989417f4adSLois Curfman McInnes {
1996849ba73SBarry Smith   PetscErrorCode ierr;
2005bd3b8fbSHong Zhang   PetscInt       newnmax,bs2= stash->bs*stash->bs;
2019417f4adSLois Curfman McInnes 
2023a40ed3dSBarry Smith   PetscFunctionBegin;
2039417f4adSLois Curfman McInnes   /* allocate a larger stash */
204c481ceb5SSatish Balay   if (!stash->oldnmax && !stash->nmax) { /* new stash */
205434d7ff9SSatish Balay     if (stash->umax)                  newnmax = stash->umax/bs2;
206434d7ff9SSatish Balay     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
207c481ceb5SSatish Balay   } else if (!stash->nmax) { /* resuing stash */
208434d7ff9SSatish Balay     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
209434d7ff9SSatish Balay     else                              newnmax = stash->oldnmax/bs2;
210434d7ff9SSatish Balay   } else                              newnmax = stash->nmax*2;
2114c1ff481SSatish Balay   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
212d07ff455SSatish Balay 
21375cae7c1SHong Zhang   /* Get a MatStashSpace and attach it to stash */
21475cae7c1SHong Zhang   ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr);
215b087b6d6SSatish Balay   if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
216b087b6d6SSatish Balay     stash->space_head = stash->space;
21775cae7c1SHong Zhang   }
218b087b6d6SSatish Balay 
219bc5ccf88SSatish Balay   stash->reallocs++;
22075cae7c1SHong Zhang   stash->nmax = newnmax;
221bc5ccf88SSatish Balay   PetscFunctionReturn(0);
222bc5ccf88SSatish Balay }
223bc5ccf88SSatish Balay /*
2248798bf22SSatish Balay   MatStashValuesRow_Private - inserts values into the stash. This function
2254c1ff481SSatish Balay   expects the values to be roworiented. Multiple columns belong to the same row
2264c1ff481SSatish Balay   can be inserted with a single call to this function.
2274c1ff481SSatish Balay 
2284c1ff481SSatish Balay   Input Parameters:
2294c1ff481SSatish Balay   stash  - the stash
2304c1ff481SSatish Balay   row    - the global row correspoiding to the values
2314c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
2324c1ff481SSatish Balay   idxn   - the global column indices corresponding to each of the values.
2334c1ff481SSatish Balay   values - the values inserted
234bc5ccf88SSatish Balay */
2354a2ae208SSatish Balay #undef __FUNCT__
2364a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRow_Private"
237ace3abfcSBarry Smith PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool  ignorezeroentries)
238bc5ccf88SSatish Balay {
239dfbe8321SBarry Smith   PetscErrorCode     ierr;
240b400d20cSBarry Smith   PetscInt           i,k,cnt = 0;
24175cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
242bc5ccf88SSatish Balay 
243bc5ccf88SSatish Balay   PetscFunctionBegin;
2444c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
24575cae7c1SHong Zhang   if (!space || space->local_remaining < n){
2468798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
2479417f4adSLois Curfman McInnes   }
24875cae7c1SHong Zhang   space = stash->space;
24975cae7c1SHong Zhang   k     = space->local_used;
2504c1ff481SSatish Balay   for (i=0; i<n; i++) {
25188c3974fSBarry Smith     if (ignorezeroentries && (values[i] == 0.0)) continue;
25275cae7c1SHong Zhang     space->idx[k] = row;
25375cae7c1SHong Zhang     space->idy[k] = idxn[i];
25475cae7c1SHong Zhang     space->val[k] = values[i];
25575cae7c1SHong Zhang     k++;
256b400d20cSBarry Smith     cnt++;
2579417f4adSLois Curfman McInnes   }
258b400d20cSBarry Smith   stash->n               += cnt;
259b400d20cSBarry Smith   space->local_used      += cnt;
260b400d20cSBarry Smith   space->local_remaining -= cnt;
261a2d1c673SSatish Balay   PetscFunctionReturn(0);
262a2d1c673SSatish Balay }
26375cae7c1SHong Zhang 
2644c1ff481SSatish Balay /*
2658798bf22SSatish Balay   MatStashValuesCol_Private - inserts values into the stash. This function
2664c1ff481SSatish Balay   expects the values to be columnoriented. Multiple columns belong to the same row
2674c1ff481SSatish Balay   can be inserted with a single call to this function.
268a2d1c673SSatish Balay 
2694c1ff481SSatish Balay   Input Parameters:
2704c1ff481SSatish Balay   stash   - the stash
2714c1ff481SSatish Balay   row     - the global row correspoiding to the values
2724c1ff481SSatish Balay   n       - the number of elements inserted. All elements belong to the above row.
2734c1ff481SSatish Balay   idxn    - the global column indices corresponding to each of the values.
2744c1ff481SSatish Balay   values  - the values inserted
2754c1ff481SSatish Balay   stepval - the consecutive values are sepated by a distance of stepval.
2764c1ff481SSatish Balay             this happens because the input is columnoriented.
2774c1ff481SSatish Balay */
2784a2ae208SSatish Balay #undef __FUNCT__
2794a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesCol_Private"
280ace3abfcSBarry Smith PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool  ignorezeroentries)
281a2d1c673SSatish Balay {
282dfbe8321SBarry Smith   PetscErrorCode     ierr;
28350e9ab7cSBarry Smith   PetscInt           i,k,cnt = 0;
28475cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
285a2d1c673SSatish Balay 
2864c1ff481SSatish Balay   PetscFunctionBegin;
2874c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
28875cae7c1SHong Zhang   if (!space || space->local_remaining < n){
2898798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
2904c1ff481SSatish Balay   }
29175cae7c1SHong Zhang   space = stash->space;
29275cae7c1SHong Zhang   k = space->local_used;
2934c1ff481SSatish Balay   for (i=0; i<n; i++) {
29488c3974fSBarry Smith     if (ignorezeroentries && (values[i*stepval] == 0.0)) continue;
29575cae7c1SHong Zhang     space->idx[k] = row;
29675cae7c1SHong Zhang     space->idy[k] = idxn[i];
29775cae7c1SHong Zhang     space->val[k] = values[i*stepval];
29875cae7c1SHong Zhang     k++;
299b400d20cSBarry Smith     cnt++;
3004c1ff481SSatish Balay   }
301b400d20cSBarry Smith   stash->n               += cnt;
302b400d20cSBarry Smith   space->local_used      += cnt;
303b400d20cSBarry Smith   space->local_remaining -= cnt;
3044c1ff481SSatish Balay   PetscFunctionReturn(0);
3054c1ff481SSatish Balay }
3064c1ff481SSatish Balay 
3074c1ff481SSatish Balay /*
3088798bf22SSatish Balay   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
3094c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3104c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3114c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3124c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3134c1ff481SSatish Balay 
3144c1ff481SSatish Balay   Input Parameters:
3154c1ff481SSatish Balay   stash  - the stash
3164c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3174c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3184c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3194c1ff481SSatish Balay            values. Each block is of size bs*bs.
3204c1ff481SSatish Balay   values - the values inserted
3214c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3224c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3234c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3244c1ff481SSatish Balay */
3254a2ae208SSatish Balay #undef __FUNCT__
3264a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRowBlocked_Private"
32754f21887SBarry Smith PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
3284c1ff481SSatish Balay {
329dfbe8321SBarry Smith   PetscErrorCode     ierr;
33075cae7c1SHong Zhang   PetscInt           i,j,k,bs2,bs=stash->bs,l;
33154f21887SBarry Smith   const PetscScalar  *vals;
33254f21887SBarry Smith   PetscScalar        *array;
33375cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
334a2d1c673SSatish Balay 
335a2d1c673SSatish Balay   PetscFunctionBegin;
33675cae7c1SHong Zhang   if (!space || space->local_remaining < n){
3378798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
338a2d1c673SSatish Balay   }
33975cae7c1SHong Zhang   space = stash->space;
34075cae7c1SHong Zhang   l     = space->local_used;
34175cae7c1SHong Zhang   bs2   = bs*bs;
3424c1ff481SSatish Balay   for (i=0; i<n; i++) {
34375cae7c1SHong Zhang     space->idx[l] = row;
34475cae7c1SHong Zhang     space->idy[l] = idxn[i];
34575cae7c1SHong Zhang     /* Now copy over the block of values. Store the values column oriented.
34675cae7c1SHong Zhang        This enables inserting multiple blocks belonging to a row with a single
34775cae7c1SHong Zhang        funtion call */
34875cae7c1SHong Zhang     array = space->val + bs2*l;
34975cae7c1SHong Zhang     vals  = values + idx*bs2*n + bs*i;
35075cae7c1SHong Zhang     for (j=0; j<bs; j++) {
35175cae7c1SHong Zhang       for (k=0; k<bs; k++) array[k*bs] = vals[k];
35275cae7c1SHong Zhang       array++;
35375cae7c1SHong Zhang       vals  += cmax*bs;
35475cae7c1SHong Zhang     }
35575cae7c1SHong Zhang     l++;
356a2d1c673SSatish Balay   }
3575bd3b8fbSHong Zhang   stash->n               += n;
35875cae7c1SHong Zhang   space->local_used      += n;
35975cae7c1SHong Zhang   space->local_remaining -= n;
3604c1ff481SSatish Balay   PetscFunctionReturn(0);
3614c1ff481SSatish Balay }
3624c1ff481SSatish Balay 
3634c1ff481SSatish Balay /*
3648798bf22SSatish Balay   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
3654c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3664c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3674c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3684c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3694c1ff481SSatish Balay 
3704c1ff481SSatish Balay   Input Parameters:
3714c1ff481SSatish Balay   stash  - the stash
3724c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3734c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3744c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3754c1ff481SSatish Balay            values. Each block is of size bs*bs.
3764c1ff481SSatish Balay   values - the values inserted
3774c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3784c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3794c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3804c1ff481SSatish Balay */
3814a2ae208SSatish Balay #undef __FUNCT__
3824a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesColBlocked_Private"
38354f21887SBarry Smith PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
3844c1ff481SSatish Balay {
385dfbe8321SBarry Smith   PetscErrorCode     ierr;
38675cae7c1SHong Zhang   PetscInt           i,j,k,bs2,bs=stash->bs,l;
38754f21887SBarry Smith   const PetscScalar  *vals;
38854f21887SBarry Smith   PetscScalar        *array;
38975cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
3904c1ff481SSatish Balay 
3914c1ff481SSatish Balay   PetscFunctionBegin;
39275cae7c1SHong Zhang   if (!space || space->local_remaining < n){
3938798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
3944c1ff481SSatish Balay   }
39575cae7c1SHong Zhang   space = stash->space;
39675cae7c1SHong Zhang   l     = space->local_used;
39775cae7c1SHong Zhang   bs2   = bs*bs;
3984c1ff481SSatish Balay   for (i=0; i<n; i++) {
39975cae7c1SHong Zhang     space->idx[l] = row;
40075cae7c1SHong Zhang     space->idy[l] = idxn[i];
40175cae7c1SHong Zhang     /* Now copy over the block of values. Store the values column oriented.
40275cae7c1SHong Zhang      This enables inserting multiple blocks belonging to a row with a single
40375cae7c1SHong Zhang      funtion call */
40475cae7c1SHong Zhang     array = space->val + bs2*l;
40575cae7c1SHong Zhang     vals  = values + idx*bs2*n + bs*i;
40675cae7c1SHong Zhang     for (j=0; j<bs; j++) {
40775cae7c1SHong Zhang       for (k=0; k<bs; k++) {array[k] = vals[k];}
40875cae7c1SHong Zhang       array += bs;
40975cae7c1SHong Zhang       vals  += rmax*bs;
41075cae7c1SHong Zhang     }
4115bd3b8fbSHong Zhang     l++;
412a2d1c673SSatish Balay   }
4135bd3b8fbSHong Zhang   stash->n               += n;
41475cae7c1SHong Zhang   space->local_used      += n;
41575cae7c1SHong Zhang   space->local_remaining -= n;
4163a40ed3dSBarry Smith   PetscFunctionReturn(0);
4179417f4adSLois Curfman McInnes }
4184c1ff481SSatish Balay /*
4198798bf22SSatish Balay   MatStashScatterBegin_Private - Initiates the transfer of values to the
4204c1ff481SSatish Balay   correct owners. This function goes through the stash, and check the
4214c1ff481SSatish Balay   owners of each stashed value, and sends the values off to the owner
4224c1ff481SSatish Balay   processors.
423bc5ccf88SSatish Balay 
4244c1ff481SSatish Balay   Input Parameters:
4254c1ff481SSatish Balay   stash  - the stash
4264c1ff481SSatish Balay   owners - an array of size 'no-of-procs' which gives the ownership range
4274c1ff481SSatish Balay            for each node.
4284c1ff481SSatish Balay 
4294c1ff481SSatish Balay   Notes: The 'owners' array in the cased of the blocked-stash has the
4304c1ff481SSatish Balay   ranges specified blocked global indices, and for the regular stash in
4314c1ff481SSatish Balay   the proper global indices.
4324c1ff481SSatish Balay */
4334a2ae208SSatish Balay #undef __FUNCT__
4344a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterBegin_Private"
4351e2582c4SBarry Smith PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners)
436bc5ccf88SSatish Balay {
437c1ac3661SBarry Smith   PetscInt          *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
438fe09c992SBarry Smith   PetscInt          size=stash->size,nsends;
4396849ba73SBarry Smith   PetscErrorCode    ierr;
44075cae7c1SHong Zhang   PetscInt          count,*sindices,**rindices,i,j,idx,lastidx,l;
44154f21887SBarry Smith   PetscScalar       **rvalues,*svalues;
442bc5ccf88SSatish Balay   MPI_Comm          comm = stash->comm;
443563fb871SSatish Balay   MPI_Request       *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
444fe09c992SBarry Smith   PetscMPIInt       *nprocs,*nlengths,nreceives;
4455bd3b8fbSHong Zhang   PetscInt          *sp_idx,*sp_idy;
44654f21887SBarry Smith   PetscScalar       *sp_val;
4475bd3b8fbSHong Zhang   PetscMatStashSpace space,space_next;
448bc5ccf88SSatish Balay 
449bc5ccf88SSatish Balay   PetscFunctionBegin;
4504c1ff481SSatish Balay   bs2 = stash->bs*stash->bs;
45175cae7c1SHong Zhang 
452bc5ccf88SSatish Balay   /*  first count number of contributors to each processor */
453c05d87d6SBarry Smith   ierr  = PetscMalloc(size*sizeof(PetscMPIInt),&nprocs);CHKERRQ(ierr);
454c05d87d6SBarry Smith   ierr  = PetscMemzero(nprocs,size*sizeof(PetscMPIInt));CHKERRQ(ierr);
455c05d87d6SBarry Smith   ierr  = PetscMalloc(size*sizeof(PetscMPIInt),&nlengths);CHKERRQ(ierr);
456c05d87d6SBarry Smith   ierr  = PetscMemzero(nlengths,size*sizeof(PetscMPIInt));CHKERRQ(ierr);
457c1ac3661SBarry Smith   ierr  = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);
458a2d1c673SSatish Balay 
45975cae7c1SHong Zhang   i = j    = 0;
4607357eb19SBarry Smith   lastidx  = -1;
4615bd3b8fbSHong Zhang   space    = stash->space_head;
46275cae7c1SHong Zhang   while (space != PETSC_NULL){
46375cae7c1SHong Zhang     space_next = space->next;
4645bd3b8fbSHong Zhang     sp_idx     = space->idx;
46575cae7c1SHong Zhang     for (l=0; l<space->local_used; l++){
4667357eb19SBarry Smith       /* if indices are NOT locally sorted, need to start search at the beginning */
4675bd3b8fbSHong Zhang       if (lastidx > (idx = sp_idx[l])) j = 0;
4687357eb19SBarry Smith       lastidx = idx;
4697357eb19SBarry Smith       for (; j<size; j++) {
4704c1ff481SSatish Balay         if (idx >= owners[j] && idx < owners[j+1]) {
471563fb871SSatish Balay           nlengths[j]++; owner[i] = j; break;
472bc5ccf88SSatish Balay         }
473bc5ccf88SSatish Balay       }
47475cae7c1SHong Zhang       i++;
47575cae7c1SHong Zhang     }
47675cae7c1SHong Zhang     space = space_next;
477bc5ccf88SSatish Balay   }
478563fb871SSatish Balay   /* Now check what procs get messages - and compute nsends. */
479563fb871SSatish Balay   for (i=0, nsends=0 ; i<size; i++) {
480563fb871SSatish Balay     if (nlengths[i]) { nprocs[i] = 1; nsends ++;}
481563fb871SSatish Balay   }
482bc5ccf88SSatish Balay 
48354f21887SBarry Smith   {PetscMPIInt  *onodes,*olengths;
484563fb871SSatish Balay   /* Determine the number of messages to expect, their lengths, from from-ids */
485563fb871SSatish Balay   ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr);
486563fb871SSatish Balay   ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
487563fb871SSatish Balay   /* since clubbing row,col - lengths are multiplied by 2 */
488563fb871SSatish Balay   for (i=0; i<nreceives; i++) olengths[i] *=2;
489563fb871SSatish Balay   ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
490563fb871SSatish Balay   /* values are size 'bs2' lengths (and remove earlier factor 2 */
491563fb871SSatish Balay   for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
492563fb871SSatish Balay   ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
493563fb871SSatish Balay   ierr = PetscFree(onodes);CHKERRQ(ierr);
494563fb871SSatish Balay   ierr = PetscFree(olengths);CHKERRQ(ierr);
495bc5ccf88SSatish Balay   }
496bc5ccf88SSatish Balay 
497bc5ccf88SSatish Balay   /* do sends:
498bc5ccf88SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
499bc5ccf88SSatish Balay          the ith processor
500bc5ccf88SSatish Balay   */
501c05d87d6SBarry Smith   ierr     = PetscMalloc2(bs2*stash->n,PetscScalar,&svalues,2*(stash->n+1),PetscInt,&sindices);CHKERRQ(ierr);
502533163c2SBarry Smith   ierr     = PetscMalloc(2*nsends*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
503c05d87d6SBarry Smith   ierr     = PetscMalloc2(size,PetscInt,&startv,size,PetscInt,&starti);CHKERRQ(ierr);
504a2d1c673SSatish Balay   /* use 2 sends the first with all_a, the next with all_i and all_j */
505bc5ccf88SSatish Balay   startv[0]  = 0; starti[0] = 0;
506bc5ccf88SSatish Balay   for (i=1; i<size; i++) {
507563fb871SSatish Balay     startv[i] = startv[i-1] + nlengths[i-1];
508533163c2SBarry Smith     starti[i] = starti[i-1] + 2*nlengths[i-1];
509bc5ccf88SSatish Balay   }
51075cae7c1SHong Zhang 
51175cae7c1SHong Zhang   i     = 0;
5125bd3b8fbSHong Zhang   space = stash->space_head;
51375cae7c1SHong Zhang   while (space != PETSC_NULL){
51475cae7c1SHong Zhang     space_next = space->next;
5155bd3b8fbSHong Zhang     sp_idx = space->idx;
5165bd3b8fbSHong Zhang     sp_idy = space->idy;
5175bd3b8fbSHong Zhang     sp_val = space->val;
51875cae7c1SHong Zhang     for (l=0; l<space->local_used; l++){
519bc5ccf88SSatish Balay       j = owner[i];
520a2d1c673SSatish Balay       if (bs2 == 1) {
5215bd3b8fbSHong Zhang         svalues[startv[j]] = sp_val[l];
522a2d1c673SSatish Balay       } else {
523c1ac3661SBarry Smith         PetscInt     k;
52454f21887SBarry Smith         PetscScalar *buf1,*buf2;
5254c1ff481SSatish Balay         buf1 = svalues+bs2*startv[j];
526b087b6d6SSatish Balay         buf2 = space->val + bs2*l;
5274c1ff481SSatish Balay         for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
528a2d1c673SSatish Balay       }
5295bd3b8fbSHong Zhang       sindices[starti[j]]             = sp_idx[l];
5305bd3b8fbSHong Zhang       sindices[starti[j]+nlengths[j]] = sp_idy[l];
531bc5ccf88SSatish Balay       startv[j]++;
532bc5ccf88SSatish Balay       starti[j]++;
53375cae7c1SHong Zhang       i++;
53475cae7c1SHong Zhang     }
53575cae7c1SHong Zhang     space = space_next;
536bc5ccf88SSatish Balay   }
537bc5ccf88SSatish Balay   startv[0] = 0;
538563fb871SSatish Balay   for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];}
539e5d0e772SSatish Balay 
540bc5ccf88SSatish Balay   for (i=0,count=0; i<size; i++) {
541563fb871SSatish Balay     if (nprocs[i]) {
542563fb871SSatish Balay       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
543a77337e4SBarry Smith       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
544bc5ccf88SSatish Balay     }
545b85c94c3SSatish Balay   }
5466cf91177SBarry Smith #if defined(PETSC_USE_INFO)
5471e2582c4SBarry Smith   ierr = PetscInfo1(mat,"No of messages: %d \n",nsends);CHKERRQ(ierr);
548e5d0e772SSatish Balay   for (i=0; i<size; i++) {
549e5d0e772SSatish Balay     if (nprocs[i]) {
550a77337e4SBarry Smith       ierr = PetscInfo2(mat,"Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(PetscScalar)+2*sizeof(PetscInt));CHKERRQ(ierr);
551e5d0e772SSatish Balay     }
552e5d0e772SSatish Balay   }
553e5d0e772SSatish Balay #endif
554c05d87d6SBarry Smith   ierr = PetscFree(nlengths);CHKERRQ(ierr);
555606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
556c05d87d6SBarry Smith   ierr = PetscFree2(startv,starti);CHKERRQ(ierr);
557c05d87d6SBarry Smith   ierr = PetscFree(nprocs);CHKERRQ(ierr);
558a2d1c673SSatish Balay 
559563fb871SSatish Balay   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
560533163c2SBarry Smith   ierr  = PetscMalloc(2*nreceives*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
561563fb871SSatish Balay 
562563fb871SSatish Balay   for (i=0; i<nreceives; i++) {
563563fb871SSatish Balay     recv_waits[2*i]   = recv_waits1[i];
564563fb871SSatish Balay     recv_waits[2*i+1] = recv_waits2[i];
565563fb871SSatish Balay   }
566563fb871SSatish Balay   stash->recv_waits = recv_waits;
567563fb871SSatish Balay   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
568563fb871SSatish Balay   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
569563fb871SSatish Balay 
570c05d87d6SBarry Smith   stash->svalues     = svalues;
571c05d87d6SBarry Smith   stash->sindices    = sindices;
572c05d87d6SBarry Smith   stash->rvalues     = rvalues;
573c05d87d6SBarry Smith   stash->rindices    = rindices;
574c05d87d6SBarry Smith   stash->send_waits  = send_waits;
575c05d87d6SBarry Smith   stash->nsends      = nsends;
576c05d87d6SBarry Smith   stash->nrecvs      = nreceives;
57767318a8aSJed Brown   stash->reproduce_count = 0;
578bc5ccf88SSatish Balay   PetscFunctionReturn(0);
579bc5ccf88SSatish Balay }
580bc5ccf88SSatish Balay 
581a2d1c673SSatish Balay /*
5828798bf22SSatish Balay    MatStashScatterGetMesg_Private - This function waits on the receives posted
5838798bf22SSatish Balay    in the function MatStashScatterBegin_Private() and returns one message at
5844c1ff481SSatish Balay    a time to the calling function. If no messages are left, it indicates this
5854c1ff481SSatish Balay    by setting flg = 0, else it sets flg = 1.
5864c1ff481SSatish Balay 
5874c1ff481SSatish Balay    Input Parameters:
5884c1ff481SSatish Balay    stash - the stash
5894c1ff481SSatish Balay 
5904c1ff481SSatish Balay    Output Parameters:
5914c1ff481SSatish Balay    nvals - the number of entries in the current message.
5924c1ff481SSatish Balay    rows  - an array of row indices (or blocked indices) corresponding to the values
5934c1ff481SSatish Balay    cols  - an array of columnindices (or blocked indices) corresponding to the values
5944c1ff481SSatish Balay    vals  - the values
5954c1ff481SSatish Balay    flg   - 0 indicates no more message left, and the current call has no values associated.
5964c1ff481SSatish Balay            1 indicates that the current call successfully received a message, and the
5974c1ff481SSatish Balay              other output parameters nvals,rows,cols,vals are set appropriately.
598a2d1c673SSatish Balay */
5994a2ae208SSatish Balay #undef __FUNCT__
6004a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterGetMesg_Private"
60154f21887SBarry Smith PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,PetscScalar **vals,PetscInt *flg)
602bc5ccf88SSatish Balay {
6036849ba73SBarry Smith   PetscErrorCode ierr;
604533163c2SBarry Smith   PetscMPIInt    i,*flg_v = stash->flg_v,i1,i2;
605fe09c992SBarry Smith   PetscInt       bs2;
606a2d1c673SSatish Balay   MPI_Status     recv_status;
607ace3abfcSBarry Smith   PetscBool      match_found = PETSC_FALSE;
608bc5ccf88SSatish Balay 
609bc5ccf88SSatish Balay   PetscFunctionBegin;
610bc5ccf88SSatish Balay 
611a2d1c673SSatish Balay   *flg = 0; /* When a message is discovered this is reset to 1 */
612a2d1c673SSatish Balay   /* Return if no more messages to process */
613a2d1c673SSatish Balay   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
614a2d1c673SSatish Balay 
6154c1ff481SSatish Balay   bs2   = stash->bs*stash->bs;
61667318a8aSJed Brown   /* If a matching pair of receives are found, process them, and return the data to
617a2d1c673SSatish Balay      the calling function. Until then keep receiving messages */
618a2d1c673SSatish Balay   while (!match_found) {
619533163c2SBarry Smith     CHKMEMQ;
62067318a8aSJed Brown     if (stash->reproduce) {
62167318a8aSJed Brown       i = stash->reproduce_count++;
62267318a8aSJed Brown       ierr = MPI_Wait(stash->recv_waits+i,&recv_status);CHKERRQ(ierr);
62367318a8aSJed Brown     } else {
624a2d1c673SSatish Balay       ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
62567318a8aSJed Brown     }
626533163c2SBarry Smith     CHKMEMQ;
627e32f2f54SBarry Smith     if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!");
628533163c2SBarry Smith 
62967318a8aSJed Brown     /* Now pack the received message into a structure which is usable by others */
630a2d1c673SSatish Balay     if (i % 2) {
631a77337e4SBarry Smith       ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr);
632c1dc657dSBarry Smith       flg_v[2*recv_status.MPI_SOURCE] = i/2;
633a2d1c673SSatish Balay       *nvals = *nvals/bs2;
634563fb871SSatish Balay     } else {
635563fb871SSatish Balay       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
636563fb871SSatish Balay       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
637563fb871SSatish Balay       *nvals = *nvals/2; /* This message has both row indices and col indices */
638bc5ccf88SSatish Balay     }
639a2d1c673SSatish Balay 
640cb2b73ccSBarry Smith     /* Check if we have both messages from this proc */
641c1dc657dSBarry Smith     i1 = flg_v[2*recv_status.MPI_SOURCE];
642c1dc657dSBarry Smith     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
643a2d1c673SSatish Balay     if (i1 != -1 && i2 != -1) {
644563fb871SSatish Balay       *rows       = stash->rindices[i2];
645a2d1c673SSatish Balay       *cols       = *rows + *nvals;
646563fb871SSatish Balay       *vals       = stash->rvalues[i1];
647a2d1c673SSatish Balay       *flg        = 1;
648a2d1c673SSatish Balay       stash->nprocessed ++;
64935d8aa7fSBarry Smith       match_found = PETSC_TRUE;
650bc5ccf88SSatish Balay     }
651bc5ccf88SSatish Balay   }
652bc5ccf88SSatish Balay   PetscFunctionReturn(0);
653bc5ccf88SSatish Balay }
654