1*f1af5d2fSBarry Smith /*$Id: matstash.c,v 1.37 1999/10/24 14:02:51 bsmith Exp bsmith $*/ 22d5177cdSBarry Smith 370f55243SBarry Smith #include "src/mat/matimpl.h" 49417f4adSLois Curfman McInnes 5bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE 10000 64c1ff481SSatish Balay 79417f4adSLois Curfman McInnes /* 88798bf22SSatish Balay MatStashCreate_Private - Creates a stash ,currently used for all the parallel 94c1ff481SSatish Balay matrix implementations. The stash is where elements of a matrix destined 104c1ff481SSatish Balay to be stored on other processors are kept until matrix assembly is done. 119417f4adSLois Curfman McInnes 124c1ff481SSatish Balay This is a simple minded stash. Simply adds entries to end of stash. 134c1ff481SSatish Balay 144c1ff481SSatish Balay Input Parameters: 154c1ff481SSatish Balay comm - communicator, required for scatters. 164c1ff481SSatish Balay bs - stash block size. used when stashing blocks of values 174c1ff481SSatish Balay 184c1ff481SSatish Balay Output Parameters: 194c1ff481SSatish Balay stash - the newly created stash 209417f4adSLois Curfman McInnes */ 215615d1e5SSatish Balay #undef __FUNC__ 228798bf22SSatish Balay #define __FUNC__ "MatStashCreate_Private" 238798bf22SSatish Balay int MatStashCreate_Private(MPI_Comm comm,int bs, MatStash *stash) 249417f4adSLois Curfman McInnes { 25*f1af5d2fSBarry Smith int ierr,max,*opt,nopt; 26*f1af5d2fSBarry Smith PetscTruth flg; 27bc5ccf88SSatish Balay 283a40ed3dSBarry Smith PetscFunctionBegin; 29bc5ccf88SSatish Balay /* Require 2 tags, get the second using PetscCommGetNewTag() */ 30bc5ccf88SSatish Balay ierr = PetscCommDuplicate_Private(comm,&stash->comm,&stash->tag1);CHKERRQ(ierr); 31a2d1c673SSatish Balay ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr); 32a2d1c673SSatish Balay ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr); 33a2d1c673SSatish Balay ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr); 34bc5ccf88SSatish Balay 35434d7ff9SSatish Balay nopt = stash->size; 36434d7ff9SSatish Balay opt = (int*) PetscMalloc(nopt*sizeof(int));CHKPTRQ(opt); 37434d7ff9SSatish Balay ierr = OptionsGetIntArray(PETSC_NULL,"-vecstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr); 38434d7ff9SSatish Balay if (flg) { 39434d7ff9SSatish Balay if (nopt == 1) max = opt[0]; 40434d7ff9SSatish Balay else if (nopt == stash->size) max = opt[stash->rank]; 41434d7ff9SSatish Balay else if (stash->rank < nopt) max = opt[stash->rank]; 42f4ab19daSSatish Balay else max = 0; /* Use default */ 43434d7ff9SSatish Balay stash->umax = max; 44434d7ff9SSatish Balay } else { 45434d7ff9SSatish Balay stash->umax = 0; 46434d7ff9SSatish Balay } 47606d414cSSatish Balay ierr = PetscFree(opt);CHKERRQ(ierr); 484c1ff481SSatish Balay if (bs <= 0) bs = 1; 49a2d1c673SSatish Balay 504c1ff481SSatish Balay stash->bs = bs; 519417f4adSLois Curfman McInnes stash->nmax = 0; 52434d7ff9SSatish Balay stash->oldnmax = 0; 539417f4adSLois Curfman McInnes stash->n = 0; 544c1ff481SSatish Balay stash->reallocs = -1; 559417f4adSLois Curfman McInnes stash->idx = 0; 569417f4adSLois Curfman McInnes stash->idy = 0; 57bc5ccf88SSatish Balay stash->array = 0; 589417f4adSLois Curfman McInnes 59bc5ccf88SSatish Balay stash->send_waits = 0; 60bc5ccf88SSatish Balay stash->recv_waits = 0; 61a2d1c673SSatish Balay stash->send_status = 0; 62bc5ccf88SSatish Balay stash->nsends = 0; 63bc5ccf88SSatish Balay stash->nrecvs = 0; 64bc5ccf88SSatish Balay stash->svalues = 0; 65bc5ccf88SSatish Balay stash->rvalues = 0; 66bc5ccf88SSatish Balay stash->rmax = 0; 67a2d1c673SSatish Balay stash->nprocs = 0; 68a2d1c673SSatish Balay stash->nprocessed = 0; 693a40ed3dSBarry Smith PetscFunctionReturn(0); 709417f4adSLois Curfman McInnes } 719417f4adSLois Curfman McInnes 724c1ff481SSatish Balay /* 738798bf22SSatish Balay MatStashDestroy_Private - Destroy the stash 744c1ff481SSatish Balay */ 755615d1e5SSatish Balay #undef __FUNC__ 768798bf22SSatish Balay #define __FUNC__ "MatStashDestroy_Private" 778798bf22SSatish Balay int MatStashDestroy_Private(MatStash *stash) 789417f4adSLois Curfman McInnes { 79bc5ccf88SSatish Balay int ierr; 80a2d1c673SSatish Balay 81bc5ccf88SSatish Balay PetscFunctionBegin; 82bc5ccf88SSatish Balay ierr = PetscCommDestroy_Private(&stash->comm);CHKERRQ(ierr); 83606d414cSSatish Balay if (stash->array) { 84606d414cSSatish Balay ierr = PetscFree(stash->array);CHKERRQ(ierr); 85606d414cSSatish Balay stash->array = 0; 86606d414cSSatish Balay } 87bc5ccf88SSatish Balay PetscFunctionReturn(0); 88bc5ccf88SSatish Balay } 89bc5ccf88SSatish Balay 904c1ff481SSatish Balay /* 918798bf22SSatish Balay MatStashScatterEnd_Private - This is called as the fial stage of 924c1ff481SSatish Balay scatter. The final stages of messagepassing is done here, and 934c1ff481SSatish Balay all the memory used for messagepassing is cleanedu up. This 944c1ff481SSatish Balay routine also resets the stash, and deallocates the memory used 954c1ff481SSatish Balay for the stash. It also keeps track of the current memory usage 964c1ff481SSatish Balay so that the same value can be used the next time through. 974c1ff481SSatish Balay */ 98bc5ccf88SSatish Balay #undef __FUNC__ 998798bf22SSatish Balay #define __FUNC__ "MatStashScatterEnd_Private" 1008798bf22SSatish Balay int MatStashScatterEnd_Private(MatStash *stash) 101bc5ccf88SSatish Balay { 102434d7ff9SSatish Balay int nsends=stash->nsends,ierr,bs2,oldnmax; 103a2d1c673SSatish Balay MPI_Status *send_status; 104a2d1c673SSatish Balay 1053a40ed3dSBarry Smith PetscFunctionBegin; 106a2d1c673SSatish Balay /* wait on sends */ 107a2d1c673SSatish Balay if (nsends) { 108a2d1c673SSatish Balay send_status = (MPI_Status *)PetscMalloc(2*nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 109a2d1c673SSatish Balay ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr); 110606d414cSSatish Balay ierr = PetscFree(send_status);CHKERRQ(ierr); 111a2d1c673SSatish Balay } 112a2d1c673SSatish Balay 113c0c58ca7SSatish Balay /* Now update nmaxold to be app 10% more than max n used, this way the 114434d7ff9SSatish Balay wastage of space is reduced the next time this stash is used. 115434d7ff9SSatish Balay Also update the oldmax, only if it increases */ 11694b769a5SSatish Balay bs2 = stash->bs*stash->bs; 1178a9378f0SSatish Balay oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 118434d7ff9SSatish Balay if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 119434d7ff9SSatish Balay 120d07ff455SSatish Balay stash->nmax = 0; 121d07ff455SSatish Balay stash->n = 0; 1224c1ff481SSatish Balay stash->reallocs = -1; 123bc5ccf88SSatish Balay stash->rmax = 0; 124a2d1c673SSatish Balay stash->nprocessed = 0; 125bc5ccf88SSatish Balay 126bc5ccf88SSatish Balay if (stash->array) { 127606d414cSSatish Balay ierr = PetscFree(stash->array);CHKERRQ(ierr); 128bc5ccf88SSatish Balay stash->array = 0; 129bc5ccf88SSatish Balay stash->idx = 0; 130bc5ccf88SSatish Balay stash->idy = 0; 131bc5ccf88SSatish Balay } 132606d414cSSatish Balay if (stash->send_waits) { 133606d414cSSatish Balay ierr = PetscFree(stash->send_waits);CHKERRQ(ierr); 134606d414cSSatish Balay stash->send_waits = 0; 135606d414cSSatish Balay } 136606d414cSSatish Balay if (stash->recv_waits) { 137606d414cSSatish Balay ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr); 138606d414cSSatish Balay stash->recv_waits = 0; 139606d414cSSatish Balay } 140606d414cSSatish Balay if (stash->svalues) { 141606d414cSSatish Balay ierr = PetscFree(stash->svalues);CHKERRQ(ierr); 142606d414cSSatish Balay stash->svalues = 0; 143606d414cSSatish Balay } 144606d414cSSatish Balay if (stash->rvalues) { 145606d414cSSatish Balay ierr = PetscFree(stash->rvalues);CHKERRQ(ierr); 146606d414cSSatish Balay stash->rvalues = 0; 147606d414cSSatish Balay } 148606d414cSSatish Balay if (stash->nprocs) { 149606d414cSSatish Balay ierr = PetscFree(stash->nprocs); 150606d414cSSatish Balay stash->nprocs = 0; 151606d414cSSatish Balay } 152bc5ccf88SSatish Balay 1533a40ed3dSBarry Smith PetscFunctionReturn(0); 1549417f4adSLois Curfman McInnes } 1559417f4adSLois Curfman McInnes 1564c1ff481SSatish Balay /* 1578798bf22SSatish Balay MatStashGetInfo_Private - Gets the relavant statistics of the stash 1584c1ff481SSatish Balay 1594c1ff481SSatish Balay Input Parameters: 1604c1ff481SSatish Balay stash - the stash 16194b769a5SSatish Balay nstash - the size of the stash. Indicates the number of values stored. 1624c1ff481SSatish Balay reallocs - the number of additional mallocs incurred. 1634c1ff481SSatish Balay 1644c1ff481SSatish Balay */ 1655615d1e5SSatish Balay #undef __FUNC__ 1668798bf22SSatish Balay #define __FUNC__ "MatStashGetInfo_Private" 1678798bf22SSatish Balay int MatStashGetInfo_Private(MatStash *stash,int *nstash, int *reallocs) 16897530c3fSBarry Smith { 16994b769a5SSatish Balay int bs2 = stash->bs*stash->bs; 17094b769a5SSatish Balay 1713a40ed3dSBarry Smith PetscFunctionBegin; 17294b769a5SSatish Balay *nstash = stash->n*bs2; 173434d7ff9SSatish Balay if (stash->reallocs < 0) *reallocs = 0; 174434d7ff9SSatish Balay else *reallocs = stash->reallocs; 175bc5ccf88SSatish Balay PetscFunctionReturn(0); 176bc5ccf88SSatish Balay } 1774c1ff481SSatish Balay 1784c1ff481SSatish Balay 1794c1ff481SSatish Balay /* 1808798bf22SSatish Balay MatStashSetInitialSize_Private - Sets the initial size of the stash 1814c1ff481SSatish Balay 1824c1ff481SSatish Balay Input Parameters: 1834c1ff481SSatish Balay stash - the stash 1844c1ff481SSatish Balay max - the value that is used as the max size of the stash. 1854c1ff481SSatish Balay this value is used while allocating memory. 1864c1ff481SSatish Balay */ 187bc5ccf88SSatish Balay #undef __FUNC__ 1888798bf22SSatish Balay #define __FUNC__ "MatStashSetInitialSize_Private" 1898798bf22SSatish Balay int MatStashSetInitialSize_Private(MatStash *stash,int max) 190bc5ccf88SSatish Balay { 191bc5ccf88SSatish Balay PetscFunctionBegin; 192434d7ff9SSatish Balay stash->umax = max; 1933a40ed3dSBarry Smith PetscFunctionReturn(0); 19497530c3fSBarry Smith } 19597530c3fSBarry Smith 1968798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called 1974c1ff481SSatish Balay when the space in the stash is not sufficient to add the new values 1984c1ff481SSatish Balay being inserted into the stash. 1994c1ff481SSatish Balay 2004c1ff481SSatish Balay Input Parameters: 2014c1ff481SSatish Balay stash - the stash 2024c1ff481SSatish Balay incr - the minimum increase requested 2034c1ff481SSatish Balay 2044c1ff481SSatish Balay Notes: 2054c1ff481SSatish Balay This routine doubles the currently used memory. 2064c1ff481SSatish Balay */ 2075615d1e5SSatish Balay #undef __FUNC__ 2088798bf22SSatish Balay #define __FUNC__ "MatStashExpand_Private" 2098798bf22SSatish Balay static int MatStashExpand_Private(MatStash *stash,int incr) 2109417f4adSLois Curfman McInnes { 211549d3d68SSatish Balay int *n_idx,*n_idy,newnmax,bs2,ierr; 212bc5ccf88SSatish Balay Scalar *n_array; 2139417f4adSLois Curfman McInnes 2143a40ed3dSBarry Smith PetscFunctionBegin; 2159417f4adSLois Curfman McInnes /* allocate a larger stash */ 21694b769a5SSatish Balay bs2 = stash->bs*stash->bs; 217c481ceb5SSatish Balay if (!stash->oldnmax && !stash->nmax) { /* new stash */ 218434d7ff9SSatish Balay if (stash->umax) newnmax = stash->umax/bs2; 219434d7ff9SSatish Balay else newnmax = DEFAULT_STASH_SIZE/bs2; 220c481ceb5SSatish Balay } else if (!stash->nmax) { /* resuing stash */ 221434d7ff9SSatish Balay if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2; 222434d7ff9SSatish Balay else newnmax = stash->oldnmax/bs2; 223434d7ff9SSatish Balay } else newnmax = stash->nmax*2; 2244c1ff481SSatish Balay if (newnmax < (stash->nmax + incr)) newnmax += 2*incr; 225d07ff455SSatish Balay 226a2d1c673SSatish Balay n_array = (Scalar *)PetscMalloc((newnmax)*(2*sizeof(int)+bs2*sizeof(Scalar)));CHKPTRQ(n_array); 227a2d1c673SSatish Balay n_idx = (int *) (n_array + bs2*newnmax); 228d07ff455SSatish Balay n_idy = (int *) (n_idx + newnmax); 229549d3d68SSatish Balay ierr = PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(Scalar));CHKERRQ(ierr); 230549d3d68SSatish Balay ierr = PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(int));CHKERRQ(ierr); 231549d3d68SSatish Balay ierr = PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(int));CHKERRQ(ierr); 232606d414cSSatish Balay if (stash->array) {ierr = PetscFree(stash->array);CHKERRQ(ierr);} 233d07ff455SSatish Balay stash->array = n_array; 234d07ff455SSatish Balay stash->idx = n_idx; 235d07ff455SSatish Balay stash->idy = n_idy; 236d07ff455SSatish Balay stash->nmax = newnmax; 237bc5ccf88SSatish Balay stash->reallocs++; 238bc5ccf88SSatish Balay PetscFunctionReturn(0); 239bc5ccf88SSatish Balay } 240bc5ccf88SSatish Balay /* 2418798bf22SSatish Balay MatStashValuesRow_Private - inserts values into the stash. This function 2424c1ff481SSatish Balay expects the values to be roworiented. Multiple columns belong to the same row 2434c1ff481SSatish Balay can be inserted with a single call to this function. 2444c1ff481SSatish Balay 2454c1ff481SSatish Balay Input Parameters: 2464c1ff481SSatish Balay stash - the stash 2474c1ff481SSatish Balay row - the global row correspoiding to the values 2484c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2494c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2504c1ff481SSatish Balay values - the values inserted 251bc5ccf88SSatish Balay */ 252bc5ccf88SSatish Balay #undef __FUNC__ 2538798bf22SSatish Balay #define __FUNC__ "MatStashValuesRow_Private" 2548798bf22SSatish Balay int MatStashValuesRow_Private(MatStash *stash,int row,int n, int *idxn,Scalar *values) 255bc5ccf88SSatish Balay { 256a2d1c673SSatish Balay int ierr,i; 257bc5ccf88SSatish Balay 258bc5ccf88SSatish Balay PetscFunctionBegin; 2594c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 2604c1ff481SSatish Balay if ((stash->n + n) > stash->nmax) { 2618798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 2629417f4adSLois Curfman McInnes } 2634c1ff481SSatish Balay for ( i=0; i<n; i++ ) { 2649417f4adSLois Curfman McInnes stash->idx[stash->n] = row; 265a2d1c673SSatish Balay stash->idy[stash->n] = idxn[i]; 266a2d1c673SSatish Balay stash->array[stash->n] = values[i]; 267a2d1c673SSatish Balay stash->n++; 2689417f4adSLois Curfman McInnes } 269a2d1c673SSatish Balay PetscFunctionReturn(0); 270a2d1c673SSatish Balay } 2714c1ff481SSatish Balay /* 2728798bf22SSatish Balay MatStashValuesCol_Private - inserts values into the stash. This function 2734c1ff481SSatish Balay expects the values to be columnoriented. Multiple columns belong to the same row 2744c1ff481SSatish Balay can be inserted with a single call to this function. 275a2d1c673SSatish Balay 2764c1ff481SSatish Balay Input Parameters: 2774c1ff481SSatish Balay stash - the stash 2784c1ff481SSatish Balay row - the global row correspoiding to the values 2794c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2804c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2814c1ff481SSatish Balay values - the values inserted 2824c1ff481SSatish Balay stepval - the consecutive values are sepated by a distance of stepval. 2834c1ff481SSatish Balay this happens because the input is columnoriented. 2844c1ff481SSatish Balay */ 285a2d1c673SSatish Balay #undef __FUNC__ 2868798bf22SSatish Balay #define __FUNC__ "MatStashValuesCol_Private" 2878798bf22SSatish Balay int MatStashValuesCol_Private(MatStash *stash,int row,int n, int *idxn, 2884c1ff481SSatish Balay Scalar *values,int stepval) 289a2d1c673SSatish Balay { 2904c1ff481SSatish Balay int ierr,i; 291a2d1c673SSatish Balay 2924c1ff481SSatish Balay PetscFunctionBegin; 2934c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 2944c1ff481SSatish Balay if ((stash->n + n) > stash->nmax) { 2958798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 2964c1ff481SSatish Balay } 2974c1ff481SSatish Balay for ( i=0; i<n; i++ ) { 2984c1ff481SSatish Balay stash->idx[stash->n] = row; 2994c1ff481SSatish Balay stash->idy[stash->n] = idxn[i]; 3004c1ff481SSatish Balay stash->array[stash->n] = values[i*stepval]; 3014c1ff481SSatish Balay stash->n++; 3024c1ff481SSatish Balay } 3034c1ff481SSatish Balay PetscFunctionReturn(0); 3044c1ff481SSatish Balay } 3054c1ff481SSatish Balay 3064c1ff481SSatish Balay /* 3078798bf22SSatish Balay MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 3084c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 3094c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3104c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3114c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3124c1ff481SSatish Balay 3134c1ff481SSatish Balay Input Parameters: 3144c1ff481SSatish Balay stash - the stash 3154c1ff481SSatish Balay row - the global block-row correspoiding to the values 3164c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3174c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3184c1ff481SSatish Balay values. Each block is of size bs*bs. 3194c1ff481SSatish Balay values - the values inserted 3204c1ff481SSatish Balay rmax - the number of block-rows in the original block. 3214c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 3224c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3234c1ff481SSatish Balay */ 3244c1ff481SSatish Balay #undef __FUNC__ 3258798bf22SSatish Balay #define __FUNC__ "MatStashValuesRowBlocked_Private" 3268798bf22SSatish Balay int MatStashValuesRowBlocked_Private(MatStash *stash,int row,int n,int *idxn,Scalar *values, 3274c1ff481SSatish Balay int rmax,int cmax,int idx) 3284c1ff481SSatish Balay { 3294c1ff481SSatish Balay int ierr,i,j,k,bs2,bs=stash->bs; 3304c1ff481SSatish Balay Scalar *vals,*array; 331a2d1c673SSatish Balay 332a2d1c673SSatish Balay PetscFunctionBegin; 333a2d1c673SSatish Balay bs2 = bs*bs; 3344c1ff481SSatish Balay if ((stash->n+n) > stash->nmax) { 3358798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 336a2d1c673SSatish Balay } 3374c1ff481SSatish Balay for ( i=0; i<n; i++ ) { 338a2d1c673SSatish Balay stash->idx[stash->n] = row; 339a2d1c673SSatish Balay stash->idy[stash->n] = idxn[i]; 340a2d1c673SSatish Balay /* Now copy over the block of values. Store the values column oriented. 341a2d1c673SSatish Balay This enables inserting multiple blocks belonging to a row with a single 342a2d1c673SSatish Balay funtion call */ 343a2d1c673SSatish Balay array = stash->array + bs2*stash->n; 344a2d1c673SSatish Balay vals = values + idx*bs2*n + bs*i; 345a2d1c673SSatish Balay for ( j=0; j<bs; j++ ) { 346a2d1c673SSatish Balay for ( k=0; k<bs; k++ ) {array[k*bs] = vals[k];} 347a2d1c673SSatish Balay array += 1; 348a2d1c673SSatish Balay vals += cmax*bs; 349a2d1c673SSatish Balay } 3504c1ff481SSatish Balay stash->n++; 3514c1ff481SSatish Balay } 3524c1ff481SSatish Balay PetscFunctionReturn(0); 3534c1ff481SSatish Balay } 3544c1ff481SSatish Balay 3554c1ff481SSatish Balay /* 3568798bf22SSatish Balay MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 3574c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 3584c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3594c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3604c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3614c1ff481SSatish Balay 3624c1ff481SSatish Balay Input Parameters: 3634c1ff481SSatish Balay stash - the stash 3644c1ff481SSatish Balay row - the global block-row correspoiding to the values 3654c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3664c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3674c1ff481SSatish Balay values. Each block is of size bs*bs. 3684c1ff481SSatish Balay values - the values inserted 3694c1ff481SSatish Balay rmax - the number of block-rows in the original block. 3704c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 3714c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3724c1ff481SSatish Balay */ 3734c1ff481SSatish Balay #undef __FUNC__ 3748798bf22SSatish Balay #define __FUNC__ "MatStashValuesColBlocked_Private" 3758798bf22SSatish Balay int MatStashValuesColBlocked_Private(MatStash *stash,int row,int n,int *idxn, 3764c1ff481SSatish Balay Scalar *values,int rmax,int cmax,int idx) 3774c1ff481SSatish Balay { 3784c1ff481SSatish Balay int ierr,i,j,k,bs2,bs=stash->bs; 3794c1ff481SSatish Balay Scalar *vals,*array; 3804c1ff481SSatish Balay 3814c1ff481SSatish Balay PetscFunctionBegin; 3824c1ff481SSatish Balay bs2 = bs*bs; 3834c1ff481SSatish Balay if ((stash->n+n) > stash->nmax) { 3848798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 3854c1ff481SSatish Balay } 3864c1ff481SSatish Balay for ( i=0; i<n; i++ ) { 3874c1ff481SSatish Balay stash->idx[stash->n] = row; 3884c1ff481SSatish Balay stash->idy[stash->n] = idxn[i]; 3894c1ff481SSatish Balay /* Now copy over the block of values. Store the values column oriented. 3904c1ff481SSatish Balay This enables inserting multiple blocks belonging to a row with a single 3914c1ff481SSatish Balay funtion call */ 392a2d1c673SSatish Balay array = stash->array + bs2*stash->n; 393a2d1c673SSatish Balay vals = values + idx*bs + bs2*rmax*i; 394a2d1c673SSatish Balay for ( j=0; j<bs; j++ ) { 395a2d1c673SSatish Balay for ( k=0; k<bs; k++ ) {array[k] = vals[k];} 396a2d1c673SSatish Balay array += bs; 397a2d1c673SSatish Balay vals += rmax*bs; 398a2d1c673SSatish Balay } 399a2d1c673SSatish Balay stash->n++; 4009417f4adSLois Curfman McInnes } 4013a40ed3dSBarry Smith PetscFunctionReturn(0); 4029417f4adSLois Curfman McInnes } 4034c1ff481SSatish Balay /* 4048798bf22SSatish Balay MatStashScatterBegin_Private - Initiates the transfer of values to the 4054c1ff481SSatish Balay correct owners. This function goes through the stash, and check the 4064c1ff481SSatish Balay owners of each stashed value, and sends the values off to the owner 4074c1ff481SSatish Balay processors. 408bc5ccf88SSatish Balay 4094c1ff481SSatish Balay Input Parameters: 4104c1ff481SSatish Balay stash - the stash 4114c1ff481SSatish Balay owners - an array of size 'no-of-procs' which gives the ownership range 4124c1ff481SSatish Balay for each node. 4134c1ff481SSatish Balay 4144c1ff481SSatish Balay Notes: The 'owners' array in the cased of the blocked-stash has the 4154c1ff481SSatish Balay ranges specified blocked global indices, and for the regular stash in 4164c1ff481SSatish Balay the proper global indices. 4174c1ff481SSatish Balay */ 418bc5ccf88SSatish Balay #undef __FUNC__ 4198798bf22SSatish Balay #define __FUNC__ "MatStashScatterBegin_Private" 4208798bf22SSatish Balay int MatStashScatterBegin_Private(MatStash *stash,int *owners) 421bc5ccf88SSatish Balay { 422a2d1c673SSatish Balay int *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 423a2d1c673SSatish Balay int rank=stash->rank,size=stash->size,*nprocs,*procs,nsends,nreceives; 4244c1ff481SSatish Balay int nmax,*work,count,ierr,*sindices,*rindices,i,j,idx; 425a2d1c673SSatish Balay Scalar *rvalues,*svalues; 426bc5ccf88SSatish Balay MPI_Comm comm = stash->comm; 427bc5ccf88SSatish Balay MPI_Request *send_waits,*recv_waits; 428bc5ccf88SSatish Balay 429bc5ccf88SSatish Balay PetscFunctionBegin; 430bc5ccf88SSatish Balay 4314c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 432bc5ccf88SSatish Balay /* first count number of contributors to each processor */ 433bc5ccf88SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs); 434549d3d68SSatish Balay ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 435549d3d68SSatish Balay procs = nprocs + size; 436bc5ccf88SSatish Balay owner = (int *) PetscMalloc( (stash->n+1)*sizeof(int) );CHKPTRQ(owner); 437a2d1c673SSatish Balay 438bc5ccf88SSatish Balay for ( i=0; i<stash->n; i++ ) { 439bc5ccf88SSatish Balay idx = stash->idx[i]; 440bc5ccf88SSatish Balay for ( j=0; j<size; j++ ) { 4414c1ff481SSatish Balay if (idx >= owners[j] && idx < owners[j+1]) { 442bc5ccf88SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; break; 443bc5ccf88SSatish Balay } 444bc5ccf88SSatish Balay } 445bc5ccf88SSatish Balay } 446bc5ccf88SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 447bc5ccf88SSatish Balay 448bc5ccf88SSatish Balay /* inform other processors of number of messages and max length*/ 4496831982aSBarry Smith work = (int *)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work); 4506831982aSBarry Smith ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 451bc5ccf88SSatish Balay nmax = work[rank]; 4526831982aSBarry Smith nreceives = work[size+rank]; 453606d414cSSatish Balay ierr = PetscFree(work);CHKERRQ(ierr); 454bc5ccf88SSatish Balay /* post receives: 455bc5ccf88SSatish Balay since we don't know how long each individual message is we 456bc5ccf88SSatish Balay allocate the largest needed buffer for each receive. Potentially 457bc5ccf88SSatish Balay this is a lot of wasted space. 458bc5ccf88SSatish Balay */ 459a2d1c673SSatish Balay rvalues = (Scalar *)PetscMalloc((nreceives+1)*(nmax+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(rvalues); 460a2d1c673SSatish Balay rindices = (int *) (rvalues + bs2*nreceives*nmax); 461a2d1c673SSatish Balay recv_waits = (MPI_Request *)PetscMalloc((nreceives+1)*2*sizeof(MPI_Request));CHKPTRQ(recv_waits); 462bc5ccf88SSatish Balay for ( i=0,count=0; i<nreceives; i++ ) { 463a2d1c673SSatish Balay ierr = MPI_Irecv(rvalues+bs2*nmax*i,bs2*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag1,comm, 464bc5ccf88SSatish Balay recv_waits+count++);CHKERRQ(ierr); 465bc5ccf88SSatish Balay ierr = MPI_Irecv(rindices+2*nmax*i,2*nmax,MPI_INT,MPI_ANY_SOURCE,tag2,comm, 466bc5ccf88SSatish Balay recv_waits+count++);CHKERRQ(ierr); 467bc5ccf88SSatish Balay } 468bc5ccf88SSatish Balay 469bc5ccf88SSatish Balay /* do sends: 470bc5ccf88SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 471bc5ccf88SSatish Balay the ith processor 472bc5ccf88SSatish Balay */ 473a2d1c673SSatish Balay svalues = (Scalar *)PetscMalloc((stash->n+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(svalues); 474a2d1c673SSatish Balay sindices = (int *) (svalues + bs2*stash->n); 475549d3d68SSatish Balay send_waits = (MPI_Request *) PetscMalloc(2*(nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 476bc5ccf88SSatish Balay startv = (int *) PetscMalloc(2*size*sizeof(int) );CHKPTRQ(startv); 477bc5ccf88SSatish Balay starti = startv + size; 478a2d1c673SSatish Balay /* use 2 sends the first with all_a, the next with all_i and all_j */ 479bc5ccf88SSatish Balay startv[0] = 0; starti[0] = 0; 480bc5ccf88SSatish Balay for ( i=1; i<size; i++ ) { 481bc5ccf88SSatish Balay startv[i] = startv[i-1] + nprocs[i-1]; 482bc5ccf88SSatish Balay starti[i] = starti[i-1] + nprocs[i-1]*2; 483bc5ccf88SSatish Balay } 484bc5ccf88SSatish Balay for ( i=0; i<stash->n; i++ ) { 485bc5ccf88SSatish Balay j = owner[i]; 486a2d1c673SSatish Balay if (bs2 == 1) { 487bc5ccf88SSatish Balay svalues[startv[j]] = stash->array[i]; 488a2d1c673SSatish Balay } else { 4894c1ff481SSatish Balay int k; 4904c1ff481SSatish Balay Scalar *buf1,*buf2; 4914c1ff481SSatish Balay buf1 = svalues+bs2*startv[j]; 4924c1ff481SSatish Balay buf2 = stash->array+bs2*i; 4934c1ff481SSatish Balay for ( k=0; k<bs2; k++ ){ buf1[k] = buf2[k]; } 494a2d1c673SSatish Balay } 495bc5ccf88SSatish Balay sindices[starti[j]] = stash->idx[i]; 496bc5ccf88SSatish Balay sindices[starti[j]+nprocs[j]] = stash->idy[i]; 497bc5ccf88SSatish Balay startv[j]++; 498bc5ccf88SSatish Balay starti[j]++; 499bc5ccf88SSatish Balay } 500bc5ccf88SSatish Balay startv[0] = 0; 501bc5ccf88SSatish Balay for ( i=1; i<size; i++ ) { startv[i] = startv[i-1] + nprocs[i-1];} 502bc5ccf88SSatish Balay for ( i=0,count=0; i<size; i++ ) { 503bc5ccf88SSatish Balay if (procs[i]) { 504a2d1c673SSatish Balay ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nprocs[i],MPIU_SCALAR,i,tag1,comm, 505bc5ccf88SSatish Balay send_waits+count++);CHKERRQ(ierr); 506bc5ccf88SSatish Balay ierr = MPI_Isend(sindices+2*startv[i],2*nprocs[i],MPI_INT,i,tag2,comm, 507bc5ccf88SSatish Balay send_waits+count++);CHKERRQ(ierr); 508bc5ccf88SSatish Balay } 509bc5ccf88SSatish Balay } 510606d414cSSatish Balay ierr = PetscFree(owner);CHKERRQ(ierr); 511606d414cSSatish Balay ierr = PetscFree(startv);CHKERRQ(ierr); 512a2d1c673SSatish Balay /* This memory is reused in scatter end for a different purpose*/ 513a2d1c673SSatish Balay for (i=0; i<2*size; i++ ) nprocs[i] = -1; 514a2d1c673SSatish Balay stash->nprocs = nprocs; 515a2d1c673SSatish Balay 516bc5ccf88SSatish Balay stash->svalues = svalues; stash->rvalues = rvalues; 517bc5ccf88SSatish Balay stash->nsends = nsends; stash->nrecvs = nreceives; 518bc5ccf88SSatish Balay stash->send_waits = send_waits; stash->recv_waits = recv_waits; 519bc5ccf88SSatish Balay stash->rmax = nmax; 520bc5ccf88SSatish Balay PetscFunctionReturn(0); 521bc5ccf88SSatish Balay } 522bc5ccf88SSatish Balay 523a2d1c673SSatish Balay /* 5248798bf22SSatish Balay MatStashScatterGetMesg_Private - This function waits on the receives posted 5258798bf22SSatish Balay in the function MatStashScatterBegin_Private() and returns one message at 5264c1ff481SSatish Balay a time to the calling function. If no messages are left, it indicates this 5274c1ff481SSatish Balay by setting flg = 0, else it sets flg = 1. 5284c1ff481SSatish Balay 5294c1ff481SSatish Balay Input Parameters: 5304c1ff481SSatish Balay stash - the stash 5314c1ff481SSatish Balay 5324c1ff481SSatish Balay Output Parameters: 5334c1ff481SSatish Balay nvals - the number of entries in the current message. 5344c1ff481SSatish Balay rows - an array of row indices (or blocked indices) corresponding to the values 5354c1ff481SSatish Balay cols - an array of columnindices (or blocked indices) corresponding to the values 5364c1ff481SSatish Balay vals - the values 5374c1ff481SSatish Balay flg - 0 indicates no more message left, and the current call has no values associated. 5384c1ff481SSatish Balay 1 indicates that the current call successfully received a message, and the 5394c1ff481SSatish Balay other output parameters nvals,rows,cols,vals are set appropriately. 540a2d1c673SSatish Balay */ 541bc5ccf88SSatish Balay #undef __FUNC__ 5428798bf22SSatish Balay #define __FUNC__ "MatStashScatterGetMesg_Private" 5438798bf22SSatish Balay int MatStashScatterGetMesg_Private(MatStash *stash,int *nvals,int **rows,int** cols,Scalar **vals,int *flg) 544bc5ccf88SSatish Balay { 545a2d1c673SSatish Balay int i,ierr,size=stash->size,*flg_v,*flg_i; 546a2d1c673SSatish Balay int i1,i2,*rindices,match_found=0,bs2; 547a2d1c673SSatish Balay MPI_Status recv_status; 548bc5ccf88SSatish Balay 549bc5ccf88SSatish Balay PetscFunctionBegin; 550bc5ccf88SSatish Balay 551a2d1c673SSatish Balay *flg = 0; /* When a message is discovered this is reset to 1 */ 552a2d1c673SSatish Balay /* Return if no more messages to process */ 553a2d1c673SSatish Balay if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); } 554a2d1c673SSatish Balay 555a2d1c673SSatish Balay flg_v = stash->nprocs; 556a2d1c673SSatish Balay flg_i = flg_v + size; 5574c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 558a2d1c673SSatish Balay /* If a matching pair of receieves are found, process them, and return the data to 559a2d1c673SSatish Balay the calling function. Until then keep receiving messages */ 560a2d1c673SSatish Balay while (!match_found) { 561a2d1c673SSatish Balay ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr); 562a2d1c673SSatish Balay /* Now pack the received message into a structure which is useable by others */ 563a2d1c673SSatish Balay if (i % 2) { 564a2d1c673SSatish Balay ierr = MPI_Get_count(&recv_status,MPI_INT,nvals);CHKERRQ(ierr); 565a2d1c673SSatish Balay flg_i[recv_status.MPI_SOURCE] = i/2; 566a2d1c673SSatish Balay *nvals = *nvals/2; /* This message has both row indices and col indices */ 567a2d1c673SSatish Balay } else { 568a2d1c673SSatish Balay ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr); 569a2d1c673SSatish Balay flg_v[recv_status.MPI_SOURCE] = i/2; 570a2d1c673SSatish Balay *nvals = *nvals/bs2; 571bc5ccf88SSatish Balay } 572a2d1c673SSatish Balay 573a2d1c673SSatish Balay /* Check if we have both the messages from this proc */ 574a2d1c673SSatish Balay i1 = flg_v[recv_status.MPI_SOURCE]; 575a2d1c673SSatish Balay i2 = flg_i[recv_status.MPI_SOURCE]; 576a2d1c673SSatish Balay if (i1 != -1 && i2 != -1) { 577a2d1c673SSatish Balay rindices = (int *) (stash->rvalues + bs2*stash->rmax*stash->nrecvs); 578a2d1c673SSatish Balay *rows = rindices + 2*i2*stash->rmax; 579a2d1c673SSatish Balay *cols = *rows + *nvals; 580a2d1c673SSatish Balay *vals = stash->rvalues + i1*bs2*stash->rmax; 581a2d1c673SSatish Balay *flg = 1; 582a2d1c673SSatish Balay stash->nprocessed ++; 583a2d1c673SSatish Balay match_found = 1; 584bc5ccf88SSatish Balay } 585bc5ccf88SSatish Balay } 586bc5ccf88SSatish Balay PetscFunctionReturn(0); 587bc5ccf88SSatish Balay } 588