1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER 2*549d3d68SSatish Balay static char vcid[] = "$Id: matstash.c,v 1.33 1999/03/19 22:42:58 balay Exp balay $"; 32d5177cdSBarry Smith #endif 42d5177cdSBarry Smith 570f55243SBarry Smith #include "src/mat/matimpl.h" 69417f4adSLois Curfman McInnes 7bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE 10000 84c1ff481SSatish Balay 99417f4adSLois Curfman McInnes /* 108798bf22SSatish Balay MatStashCreate_Private - Creates a stash ,currently used for all the parallel 114c1ff481SSatish Balay matrix implementations. The stash is where elements of a matrix destined 124c1ff481SSatish Balay to be stored on other processors are kept until matrix assembly is done. 139417f4adSLois Curfman McInnes 144c1ff481SSatish Balay This is a simple minded stash. Simply adds entries to end of stash. 154c1ff481SSatish Balay 164c1ff481SSatish Balay Input Parameters: 174c1ff481SSatish Balay comm - communicator, required for scatters. 184c1ff481SSatish Balay bs - stash block size. used when stashing blocks of values 194c1ff481SSatish Balay 204c1ff481SSatish Balay Output Parameters: 214c1ff481SSatish Balay stash - the newly created stash 229417f4adSLois Curfman McInnes */ 235615d1e5SSatish Balay #undef __FUNC__ 248798bf22SSatish Balay #define __FUNC__ "MatStashCreate_Private" 258798bf22SSatish Balay int MatStashCreate_Private(MPI_Comm comm,int bs, MatStash *stash) 269417f4adSLois Curfman McInnes { 27434d7ff9SSatish Balay int ierr,flg,max,*opt,nopt; 28bc5ccf88SSatish Balay 293a40ed3dSBarry Smith PetscFunctionBegin; 30bc5ccf88SSatish Balay /* Require 2 tags, get the second using PetscCommGetNewTag() */ 31bc5ccf88SSatish Balay ierr = PetscCommDuplicate_Private(comm,&stash->comm,&stash->tag1);CHKERRQ(ierr); 32a2d1c673SSatish Balay ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr); 33a2d1c673SSatish Balay ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr); 34a2d1c673SSatish Balay ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr); 35bc5ccf88SSatish Balay 36434d7ff9SSatish Balay nopt = stash->size; 37434d7ff9SSatish Balay opt = (int*) PetscMalloc(nopt*sizeof(int));CHKPTRQ(opt); 38434d7ff9SSatish Balay ierr = OptionsGetIntArray(PETSC_NULL,"-vecstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr); 39434d7ff9SSatish Balay if (flg) { 40434d7ff9SSatish Balay if (nopt == 1) max = opt[0]; 41434d7ff9SSatish Balay else if (nopt == stash->size) max = opt[stash->rank]; 42434d7ff9SSatish Balay else if (stash->rank < nopt) max = opt[stash->rank]; 43f4ab19daSSatish Balay else max = 0; /* Use default */ 44434d7ff9SSatish Balay stash->umax = max; 45434d7ff9SSatish Balay } else { 46434d7ff9SSatish Balay stash->umax = 0; 47434d7ff9SSatish Balay } 48434d7ff9SSatish Balay PetscFree(opt); 494c1ff481SSatish Balay if (bs <= 0) bs = 1; 50a2d1c673SSatish Balay 514c1ff481SSatish Balay stash->bs = bs; 529417f4adSLois Curfman McInnes stash->nmax = 0; 53434d7ff9SSatish Balay stash->oldnmax = 0; 549417f4adSLois Curfman McInnes stash->n = 0; 554c1ff481SSatish Balay stash->reallocs = -1; 569417f4adSLois Curfman McInnes stash->idx = 0; 579417f4adSLois Curfman McInnes stash->idy = 0; 58bc5ccf88SSatish Balay stash->array = 0; 599417f4adSLois Curfman McInnes 60bc5ccf88SSatish Balay stash->send_waits = 0; 61bc5ccf88SSatish Balay stash->recv_waits = 0; 62a2d1c673SSatish Balay stash->send_status = 0; 63bc5ccf88SSatish Balay stash->nsends = 0; 64bc5ccf88SSatish Balay stash->nrecvs = 0; 65bc5ccf88SSatish Balay stash->svalues = 0; 66bc5ccf88SSatish Balay stash->rvalues = 0; 67bc5ccf88SSatish Balay stash->rmax = 0; 68a2d1c673SSatish Balay stash->nprocs = 0; 69a2d1c673SSatish Balay stash->nprocessed = 0; 703a40ed3dSBarry Smith PetscFunctionReturn(0); 719417f4adSLois Curfman McInnes } 729417f4adSLois Curfman McInnes 734c1ff481SSatish Balay /* 748798bf22SSatish Balay MatStashDestroy_Private - Destroy the stash 754c1ff481SSatish Balay */ 765615d1e5SSatish Balay #undef __FUNC__ 778798bf22SSatish Balay #define __FUNC__ "MatStashDestroy_Private" 788798bf22SSatish Balay int MatStashDestroy_Private(MatStash *stash) 799417f4adSLois Curfman McInnes { 80bc5ccf88SSatish Balay int ierr; 81a2d1c673SSatish Balay 82bc5ccf88SSatish Balay PetscFunctionBegin; 83bc5ccf88SSatish Balay ierr = PetscCommDestroy_Private(&stash->comm);CHKERRQ(ierr); 84bc5ccf88SSatish Balay if (stash->array) {PetscFree(stash->array); stash->array = 0;} 85bc5ccf88SSatish Balay PetscFunctionReturn(0); 86bc5ccf88SSatish Balay } 87bc5ccf88SSatish Balay 884c1ff481SSatish Balay /* 898798bf22SSatish Balay MatStashScatterEnd_Private - This is called as the fial stage of 904c1ff481SSatish Balay scatter. The final stages of messagepassing is done here, and 914c1ff481SSatish Balay all the memory used for messagepassing is cleanedu up. This 924c1ff481SSatish Balay routine also resets the stash, and deallocates the memory used 934c1ff481SSatish Balay for the stash. It also keeps track of the current memory usage 944c1ff481SSatish Balay so that the same value can be used the next time through. 954c1ff481SSatish Balay */ 96bc5ccf88SSatish Balay #undef __FUNC__ 978798bf22SSatish Balay #define __FUNC__ "MatStashScatterEnd_Private" 988798bf22SSatish Balay int MatStashScatterEnd_Private(MatStash *stash) 99bc5ccf88SSatish Balay { 100434d7ff9SSatish Balay int nsends=stash->nsends,ierr,bs2,oldnmax; 101a2d1c673SSatish Balay MPI_Status *send_status; 102a2d1c673SSatish Balay 1033a40ed3dSBarry Smith PetscFunctionBegin; 104a2d1c673SSatish Balay /* wait on sends */ 105a2d1c673SSatish Balay if (nsends) { 106a2d1c673SSatish Balay send_status = (MPI_Status *)PetscMalloc(2*nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 107a2d1c673SSatish Balay ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr); 108a2d1c673SSatish Balay PetscFree(send_status); 109a2d1c673SSatish Balay } 110a2d1c673SSatish Balay 111c0c58ca7SSatish Balay /* Now update nmaxold to be app 10% more than max n used, this way the 112434d7ff9SSatish Balay wastage of space is reduced the next time this stash is used. 113434d7ff9SSatish Balay Also update the oldmax, only if it increases */ 11494b769a5SSatish Balay bs2 = stash->bs*stash->bs; 1158a9378f0SSatish Balay oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 116434d7ff9SSatish Balay if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 117434d7ff9SSatish Balay 118d07ff455SSatish Balay stash->nmax = 0; 119d07ff455SSatish Balay stash->n = 0; 1204c1ff481SSatish Balay stash->reallocs = -1; 121bc5ccf88SSatish Balay stash->rmax = 0; 122a2d1c673SSatish Balay stash->nprocessed = 0; 123bc5ccf88SSatish Balay 124bc5ccf88SSatish Balay if (stash->array) { 125bc5ccf88SSatish Balay PetscFree(stash->array); 126bc5ccf88SSatish Balay stash->array = 0; 127bc5ccf88SSatish Balay stash->idx = 0; 128bc5ccf88SSatish Balay stash->idy = 0; 129bc5ccf88SSatish Balay } 130bc5ccf88SSatish Balay if (stash->send_waits) {PetscFree(stash->send_waits);stash->send_waits = 0;} 131bc5ccf88SSatish Balay if (stash->recv_waits) {PetscFree(stash->recv_waits);stash->recv_waits = 0;} 132bc5ccf88SSatish Balay if (stash->svalues) {PetscFree(stash->svalues);stash->svalues = 0;} 133bc5ccf88SSatish Balay if (stash->rvalues) {PetscFree(stash->rvalues); stash->rvalues = 0;} 134a2d1c673SSatish Balay if (stash->nprocs) {PetscFree(stash->nprocs); stash->nprocs = 0;} 135bc5ccf88SSatish Balay 1363a40ed3dSBarry Smith PetscFunctionReturn(0); 1379417f4adSLois Curfman McInnes } 1389417f4adSLois Curfman McInnes 1394c1ff481SSatish Balay /* 1408798bf22SSatish Balay MatStashGetInfo_Private - Gets the relavant statistics of the stash 1414c1ff481SSatish Balay 1424c1ff481SSatish Balay Input Parameters: 1434c1ff481SSatish Balay stash - the stash 14494b769a5SSatish Balay nstash - the size of the stash. Indicates the number of values stored. 1454c1ff481SSatish Balay reallocs - the number of additional mallocs incurred. 1464c1ff481SSatish Balay 1474c1ff481SSatish Balay */ 1485615d1e5SSatish Balay #undef __FUNC__ 1498798bf22SSatish Balay #define __FUNC__ "MatStashGetInfo_Private" 1508798bf22SSatish Balay int MatStashGetInfo_Private(MatStash *stash,int *nstash, int *reallocs) 15197530c3fSBarry Smith { 15294b769a5SSatish Balay int bs2 = stash->bs*stash->bs; 15394b769a5SSatish Balay 1543a40ed3dSBarry Smith PetscFunctionBegin; 15594b769a5SSatish Balay *nstash = stash->n*bs2; 156434d7ff9SSatish Balay if (stash->reallocs < 0) *reallocs = 0; 157434d7ff9SSatish Balay else *reallocs = stash->reallocs; 158bc5ccf88SSatish Balay PetscFunctionReturn(0); 159bc5ccf88SSatish Balay } 1604c1ff481SSatish Balay 1614c1ff481SSatish Balay 1624c1ff481SSatish Balay /* 1638798bf22SSatish Balay MatStashSetInitialSize_Private - Sets the initial size of the stash 1644c1ff481SSatish Balay 1654c1ff481SSatish Balay Input Parameters: 1664c1ff481SSatish Balay stash - the stash 1674c1ff481SSatish Balay max - the value that is used as the max size of the stash. 1684c1ff481SSatish Balay this value is used while allocating memory. 1694c1ff481SSatish Balay */ 170bc5ccf88SSatish Balay #undef __FUNC__ 1718798bf22SSatish Balay #define __FUNC__ "MatStashSetInitialSize_Private" 1728798bf22SSatish Balay int MatStashSetInitialSize_Private(MatStash *stash,int max) 173bc5ccf88SSatish Balay { 174bc5ccf88SSatish Balay PetscFunctionBegin; 175434d7ff9SSatish Balay stash->umax = max; 1763a40ed3dSBarry Smith PetscFunctionReturn(0); 17797530c3fSBarry Smith } 17897530c3fSBarry Smith 1798798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called 1804c1ff481SSatish Balay when the space in the stash is not sufficient to add the new values 1814c1ff481SSatish Balay being inserted into the stash. 1824c1ff481SSatish Balay 1834c1ff481SSatish Balay Input Parameters: 1844c1ff481SSatish Balay stash - the stash 1854c1ff481SSatish Balay incr - the minimum increase requested 1864c1ff481SSatish Balay 1874c1ff481SSatish Balay Notes: 1884c1ff481SSatish Balay This routine doubles the currently used memory. 1894c1ff481SSatish Balay */ 1905615d1e5SSatish Balay #undef __FUNC__ 1918798bf22SSatish Balay #define __FUNC__ "MatStashExpand_Private" 1928798bf22SSatish Balay static int MatStashExpand_Private(MatStash *stash,int incr) 1939417f4adSLois Curfman McInnes { 194*549d3d68SSatish Balay int *n_idx,*n_idy,newnmax,bs2,ierr; 195bc5ccf88SSatish Balay Scalar *n_array; 1969417f4adSLois Curfman McInnes 1973a40ed3dSBarry Smith PetscFunctionBegin; 1989417f4adSLois Curfman McInnes /* allocate a larger stash */ 19994b769a5SSatish Balay bs2 = stash->bs*stash->bs; 200c481ceb5SSatish Balay if (!stash->oldnmax && !stash->nmax) { /* new stash */ 201434d7ff9SSatish Balay if (stash->umax) newnmax = stash->umax/bs2; 202434d7ff9SSatish Balay else newnmax = DEFAULT_STASH_SIZE/bs2; 203c481ceb5SSatish Balay } else if (!stash->nmax) { /* resuing stash */ 204434d7ff9SSatish Balay if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2; 205434d7ff9SSatish Balay else newnmax = stash->oldnmax/bs2; 206434d7ff9SSatish Balay } else newnmax = stash->nmax*2; 2074c1ff481SSatish Balay if (newnmax < (stash->nmax + incr)) newnmax += 2*incr; 208d07ff455SSatish Balay 209a2d1c673SSatish Balay n_array = (Scalar *)PetscMalloc((newnmax)*(2*sizeof(int)+bs2*sizeof(Scalar)));CHKPTRQ(n_array); 210a2d1c673SSatish Balay n_idx = (int *) (n_array + bs2*newnmax); 211d07ff455SSatish Balay n_idy = (int *) (n_idx + newnmax); 212*549d3d68SSatish Balay ierr = PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(Scalar));CHKERRQ(ierr); 213*549d3d68SSatish Balay ierr = PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(int));CHKERRQ(ierr); 214*549d3d68SSatish Balay ierr = PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(int));CHKERRQ(ierr); 2150452661fSBarry Smith if (stash->array) PetscFree(stash->array); 216d07ff455SSatish Balay stash->array = n_array; 217d07ff455SSatish Balay stash->idx = n_idx; 218d07ff455SSatish Balay stash->idy = n_idy; 219d07ff455SSatish Balay stash->nmax = newnmax; 220bc5ccf88SSatish Balay stash->reallocs++; 221bc5ccf88SSatish Balay PetscFunctionReturn(0); 222bc5ccf88SSatish Balay } 223bc5ccf88SSatish Balay /* 2248798bf22SSatish Balay MatStashValuesRow_Private - inserts values into the stash. This function 2254c1ff481SSatish Balay expects the values to be roworiented. Multiple columns belong to the same row 2264c1ff481SSatish Balay can be inserted with a single call to this function. 2274c1ff481SSatish Balay 2284c1ff481SSatish Balay Input Parameters: 2294c1ff481SSatish Balay stash - the stash 2304c1ff481SSatish Balay row - the global row correspoiding to the values 2314c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2324c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2334c1ff481SSatish Balay values - the values inserted 234bc5ccf88SSatish Balay */ 235bc5ccf88SSatish Balay #undef __FUNC__ 2368798bf22SSatish Balay #define __FUNC__ "MatStashValuesRow_Private" 2378798bf22SSatish Balay int MatStashValuesRow_Private(MatStash *stash,int row,int n, int *idxn,Scalar *values) 238bc5ccf88SSatish Balay { 239a2d1c673SSatish Balay int ierr,i; 240bc5ccf88SSatish Balay 241bc5ccf88SSatish Balay PetscFunctionBegin; 2424c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 2434c1ff481SSatish Balay if ((stash->n + n) > stash->nmax) { 2448798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 2459417f4adSLois Curfman McInnes } 2464c1ff481SSatish Balay for ( i=0; i<n; i++ ) { 2479417f4adSLois Curfman McInnes stash->idx[stash->n] = row; 248a2d1c673SSatish Balay stash->idy[stash->n] = idxn[i]; 249a2d1c673SSatish Balay stash->array[stash->n] = values[i]; 250a2d1c673SSatish Balay stash->n++; 2519417f4adSLois Curfman McInnes } 252a2d1c673SSatish Balay PetscFunctionReturn(0); 253a2d1c673SSatish Balay } 2544c1ff481SSatish Balay /* 2558798bf22SSatish Balay MatStashValuesCol_Private - inserts values into the stash. This function 2564c1ff481SSatish Balay expects the values to be columnoriented. Multiple columns belong to the same row 2574c1ff481SSatish Balay can be inserted with a single call to this function. 258a2d1c673SSatish Balay 2594c1ff481SSatish Balay Input Parameters: 2604c1ff481SSatish Balay stash - the stash 2614c1ff481SSatish Balay row - the global row correspoiding to the values 2624c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2634c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2644c1ff481SSatish Balay values - the values inserted 2654c1ff481SSatish Balay stepval - the consecutive values are sepated by a distance of stepval. 2664c1ff481SSatish Balay this happens because the input is columnoriented. 2674c1ff481SSatish Balay */ 268a2d1c673SSatish Balay #undef __FUNC__ 2698798bf22SSatish Balay #define __FUNC__ "MatStashValuesCol_Private" 2708798bf22SSatish Balay int MatStashValuesCol_Private(MatStash *stash,int row,int n, int *idxn, 2714c1ff481SSatish Balay Scalar *values,int stepval) 272a2d1c673SSatish Balay { 2734c1ff481SSatish Balay int ierr,i; 274a2d1c673SSatish Balay 2754c1ff481SSatish Balay PetscFunctionBegin; 2764c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 2774c1ff481SSatish Balay if ((stash->n + n) > stash->nmax) { 2788798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 2794c1ff481SSatish Balay } 2804c1ff481SSatish Balay for ( i=0; i<n; i++ ) { 2814c1ff481SSatish Balay stash->idx[stash->n] = row; 2824c1ff481SSatish Balay stash->idy[stash->n] = idxn[i]; 2834c1ff481SSatish Balay stash->array[stash->n] = values[i*stepval]; 2844c1ff481SSatish Balay stash->n++; 2854c1ff481SSatish Balay } 2864c1ff481SSatish Balay PetscFunctionReturn(0); 2874c1ff481SSatish Balay } 2884c1ff481SSatish Balay 2894c1ff481SSatish Balay /* 2908798bf22SSatish Balay MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 2914c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 2924c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 2934c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 2944c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 2954c1ff481SSatish Balay 2964c1ff481SSatish Balay Input Parameters: 2974c1ff481SSatish Balay stash - the stash 2984c1ff481SSatish Balay row - the global block-row correspoiding to the values 2994c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3004c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3014c1ff481SSatish Balay values. Each block is of size bs*bs. 3024c1ff481SSatish Balay values - the values inserted 3034c1ff481SSatish Balay rmax - the number of block-rows in the original block. 3044c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 3054c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3064c1ff481SSatish Balay */ 3074c1ff481SSatish Balay #undef __FUNC__ 3088798bf22SSatish Balay #define __FUNC__ "MatStashValuesRowBlocked_Private" 3098798bf22SSatish Balay int MatStashValuesRowBlocked_Private(MatStash *stash,int row,int n,int *idxn,Scalar *values, 3104c1ff481SSatish Balay int rmax,int cmax,int idx) 3114c1ff481SSatish Balay { 3124c1ff481SSatish Balay int ierr,i,j,k,bs2,bs=stash->bs; 3134c1ff481SSatish Balay Scalar *vals,*array; 314a2d1c673SSatish Balay 315a2d1c673SSatish Balay PetscFunctionBegin; 316a2d1c673SSatish Balay bs2 = bs*bs; 3174c1ff481SSatish Balay if ((stash->n+n) > stash->nmax) { 3188798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 319a2d1c673SSatish Balay } 3204c1ff481SSatish Balay for ( i=0; i<n; i++ ) { 321a2d1c673SSatish Balay stash->idx[stash->n] = row; 322a2d1c673SSatish Balay stash->idy[stash->n] = idxn[i]; 323a2d1c673SSatish Balay /* Now copy over the block of values. Store the values column oriented. 324a2d1c673SSatish Balay This enables inserting multiple blocks belonging to a row with a single 325a2d1c673SSatish Balay funtion call */ 326a2d1c673SSatish Balay array = stash->array + bs2*stash->n; 327a2d1c673SSatish Balay vals = values + idx*bs2*n + bs*i; 328a2d1c673SSatish Balay for ( j=0; j<bs; j++ ) { 329a2d1c673SSatish Balay for ( k=0; k<bs; k++ ) {array[k*bs] = vals[k];} 330a2d1c673SSatish Balay array += 1; 331a2d1c673SSatish Balay vals += cmax*bs; 332a2d1c673SSatish Balay } 3334c1ff481SSatish Balay stash->n++; 3344c1ff481SSatish Balay } 3354c1ff481SSatish Balay PetscFunctionReturn(0); 3364c1ff481SSatish Balay } 3374c1ff481SSatish Balay 3384c1ff481SSatish Balay /* 3398798bf22SSatish Balay MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 3404c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 3414c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3424c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3434c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3444c1ff481SSatish Balay 3454c1ff481SSatish Balay Input Parameters: 3464c1ff481SSatish Balay stash - the stash 3474c1ff481SSatish Balay row - the global block-row correspoiding to the values 3484c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3494c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3504c1ff481SSatish Balay values. Each block is of size bs*bs. 3514c1ff481SSatish Balay values - the values inserted 3524c1ff481SSatish Balay rmax - the number of block-rows in the original block. 3534c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 3544c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3554c1ff481SSatish Balay */ 3564c1ff481SSatish Balay #undef __FUNC__ 3578798bf22SSatish Balay #define __FUNC__ "MatStashValuesColBlocked_Private" 3588798bf22SSatish Balay int MatStashValuesColBlocked_Private(MatStash *stash,int row,int n,int *idxn, 3594c1ff481SSatish Balay Scalar *values,int rmax,int cmax,int idx) 3604c1ff481SSatish Balay { 3614c1ff481SSatish Balay int ierr,i,j,k,bs2,bs=stash->bs; 3624c1ff481SSatish Balay Scalar *vals,*array; 3634c1ff481SSatish Balay 3644c1ff481SSatish Balay PetscFunctionBegin; 3654c1ff481SSatish Balay bs2 = bs*bs; 3664c1ff481SSatish Balay if ((stash->n+n) > stash->nmax) { 3678798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 3684c1ff481SSatish Balay } 3694c1ff481SSatish Balay for ( i=0; i<n; i++ ) { 3704c1ff481SSatish Balay stash->idx[stash->n] = row; 3714c1ff481SSatish Balay stash->idy[stash->n] = idxn[i]; 3724c1ff481SSatish Balay /* Now copy over the block of values. Store the values column oriented. 3734c1ff481SSatish Balay This enables inserting multiple blocks belonging to a row with a single 3744c1ff481SSatish Balay funtion call */ 375a2d1c673SSatish Balay array = stash->array + bs2*stash->n; 376a2d1c673SSatish Balay vals = values + idx*bs + bs2*rmax*i; 377a2d1c673SSatish Balay for ( j=0; j<bs; j++ ) { 378a2d1c673SSatish Balay for ( k=0; k<bs; k++ ) {array[k] = vals[k];} 379a2d1c673SSatish Balay array += bs; 380a2d1c673SSatish Balay vals += rmax*bs; 381a2d1c673SSatish Balay } 382a2d1c673SSatish Balay stash->n++; 3839417f4adSLois Curfman McInnes } 3843a40ed3dSBarry Smith PetscFunctionReturn(0); 3859417f4adSLois Curfman McInnes } 3864c1ff481SSatish Balay /* 3878798bf22SSatish Balay MatStashScatterBegin_Private - Initiates the transfer of values to the 3884c1ff481SSatish Balay correct owners. This function goes through the stash, and check the 3894c1ff481SSatish Balay owners of each stashed value, and sends the values off to the owner 3904c1ff481SSatish Balay processors. 391bc5ccf88SSatish Balay 3924c1ff481SSatish Balay Input Parameters: 3934c1ff481SSatish Balay stash - the stash 3944c1ff481SSatish Balay owners - an array of size 'no-of-procs' which gives the ownership range 3954c1ff481SSatish Balay for each node. 3964c1ff481SSatish Balay 3974c1ff481SSatish Balay Notes: The 'owners' array in the cased of the blocked-stash has the 3984c1ff481SSatish Balay ranges specified blocked global indices, and for the regular stash in 3994c1ff481SSatish Balay the proper global indices. 4004c1ff481SSatish Balay */ 401bc5ccf88SSatish Balay #undef __FUNC__ 4028798bf22SSatish Balay #define __FUNC__ "MatStashScatterBegin_Private" 4038798bf22SSatish Balay int MatStashScatterBegin_Private(MatStash *stash,int *owners) 404bc5ccf88SSatish Balay { 405a2d1c673SSatish Balay int *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 406a2d1c673SSatish Balay int rank=stash->rank,size=stash->size,*nprocs,*procs,nsends,nreceives; 4074c1ff481SSatish Balay int nmax,*work,count,ierr,*sindices,*rindices,i,j,idx; 408a2d1c673SSatish Balay Scalar *rvalues,*svalues; 409bc5ccf88SSatish Balay MPI_Comm comm = stash->comm; 410bc5ccf88SSatish Balay MPI_Request *send_waits,*recv_waits; 411bc5ccf88SSatish Balay 412bc5ccf88SSatish Balay PetscFunctionBegin; 413bc5ccf88SSatish Balay 4144c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 415bc5ccf88SSatish Balay /* first count number of contributors to each processor */ 416bc5ccf88SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs); 417*549d3d68SSatish Balay ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 418*549d3d68SSatish Balay procs = nprocs + size; 419bc5ccf88SSatish Balay owner = (int *) PetscMalloc( (stash->n+1)*sizeof(int) );CHKPTRQ(owner); 420a2d1c673SSatish Balay 421bc5ccf88SSatish Balay for ( i=0; i<stash->n; i++ ) { 422bc5ccf88SSatish Balay idx = stash->idx[i]; 423bc5ccf88SSatish Balay for ( j=0; j<size; j++ ) { 4244c1ff481SSatish Balay if (idx >= owners[j] && idx < owners[j+1]) { 425bc5ccf88SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; break; 426bc5ccf88SSatish Balay } 427bc5ccf88SSatish Balay } 428bc5ccf88SSatish Balay } 429bc5ccf88SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 430bc5ccf88SSatish Balay 431bc5ccf88SSatish Balay /* inform other processors of number of messages and max length*/ 432bc5ccf88SSatish Balay work = (int *)PetscMalloc(size*sizeof(int));CHKPTRQ(work); 433bc5ccf88SSatish Balay ierr = MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 434bc5ccf88SSatish Balay nreceives = work[rank]; 435bc5ccf88SSatish Balay ierr = MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 436bc5ccf88SSatish Balay nmax = work[rank]; 437bc5ccf88SSatish Balay PetscFree(work); 438bc5ccf88SSatish Balay /* post receives: 439bc5ccf88SSatish Balay since we don't know how long each individual message is we 440bc5ccf88SSatish Balay allocate the largest needed buffer for each receive. Potentially 441bc5ccf88SSatish Balay this is a lot of wasted space. 442bc5ccf88SSatish Balay */ 443a2d1c673SSatish Balay rvalues = (Scalar *)PetscMalloc((nreceives+1)*(nmax+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(rvalues); 444a2d1c673SSatish Balay rindices = (int *) (rvalues + bs2*nreceives*nmax); 445a2d1c673SSatish Balay recv_waits = (MPI_Request *)PetscMalloc((nreceives+1)*2*sizeof(MPI_Request));CHKPTRQ(recv_waits); 446bc5ccf88SSatish Balay for ( i=0,count=0; i<nreceives; i++ ) { 447a2d1c673SSatish Balay ierr = MPI_Irecv(rvalues+bs2*nmax*i,bs2*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag1,comm, 448bc5ccf88SSatish Balay recv_waits+count++);CHKERRQ(ierr); 449bc5ccf88SSatish Balay ierr = MPI_Irecv(rindices+2*nmax*i,2*nmax,MPI_INT,MPI_ANY_SOURCE,tag2,comm, 450bc5ccf88SSatish Balay recv_waits+count++);CHKERRQ(ierr); 451bc5ccf88SSatish Balay } 452bc5ccf88SSatish Balay 453bc5ccf88SSatish Balay /* do sends: 454bc5ccf88SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 455bc5ccf88SSatish Balay the ith processor 456bc5ccf88SSatish Balay */ 457a2d1c673SSatish Balay svalues = (Scalar *)PetscMalloc((stash->n+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(svalues); 458a2d1c673SSatish Balay sindices = (int *) (svalues + bs2*stash->n); 459*549d3d68SSatish Balay send_waits = (MPI_Request *) PetscMalloc(2*(nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 460bc5ccf88SSatish Balay startv = (int *) PetscMalloc(2*size*sizeof(int) );CHKPTRQ(startv); 461bc5ccf88SSatish Balay starti = startv + size; 462a2d1c673SSatish Balay /* use 2 sends the first with all_a, the next with all_i and all_j */ 463bc5ccf88SSatish Balay startv[0] = 0; starti[0] = 0; 464bc5ccf88SSatish Balay for ( i=1; i<size; i++ ) { 465bc5ccf88SSatish Balay startv[i] = startv[i-1] + nprocs[i-1]; 466bc5ccf88SSatish Balay starti[i] = starti[i-1] + nprocs[i-1]*2; 467bc5ccf88SSatish Balay } 468bc5ccf88SSatish Balay for ( i=0; i<stash->n; i++ ) { 469bc5ccf88SSatish Balay j = owner[i]; 470a2d1c673SSatish Balay if (bs2 == 1) { 471bc5ccf88SSatish Balay svalues[startv[j]] = stash->array[i]; 472a2d1c673SSatish Balay } else { 4734c1ff481SSatish Balay int k; 4744c1ff481SSatish Balay Scalar *buf1,*buf2; 4754c1ff481SSatish Balay buf1 = svalues+bs2*startv[j]; 4764c1ff481SSatish Balay buf2 = stash->array+bs2*i; 4774c1ff481SSatish Balay for ( k=0; k<bs2; k++ ){ buf1[k] = buf2[k]; } 478a2d1c673SSatish Balay } 479bc5ccf88SSatish Balay sindices[starti[j]] = stash->idx[i]; 480bc5ccf88SSatish Balay sindices[starti[j]+nprocs[j]] = stash->idy[i]; 481bc5ccf88SSatish Balay startv[j]++; 482bc5ccf88SSatish Balay starti[j]++; 483bc5ccf88SSatish Balay } 484bc5ccf88SSatish Balay startv[0] = 0; 485bc5ccf88SSatish Balay for ( i=1; i<size; i++ ) { startv[i] = startv[i-1] + nprocs[i-1];} 486bc5ccf88SSatish Balay for ( i=0,count=0; i<size; i++ ) { 487bc5ccf88SSatish Balay if (procs[i]) { 488a2d1c673SSatish Balay ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nprocs[i],MPIU_SCALAR,i,tag1,comm, 489bc5ccf88SSatish Balay send_waits+count++);CHKERRQ(ierr); 490bc5ccf88SSatish Balay ierr = MPI_Isend(sindices+2*startv[i],2*nprocs[i],MPI_INT,i,tag2,comm, 491bc5ccf88SSatish Balay send_waits+count++);CHKERRQ(ierr); 492bc5ccf88SSatish Balay } 493bc5ccf88SSatish Balay } 494bc5ccf88SSatish Balay PetscFree(owner); 495bc5ccf88SSatish Balay PetscFree(startv); 496a2d1c673SSatish Balay /* This memory is reused in scatter end for a different purpose*/ 497a2d1c673SSatish Balay for (i=0; i<2*size; i++ ) nprocs[i] = -1; 498a2d1c673SSatish Balay stash->nprocs = nprocs; 499a2d1c673SSatish Balay 500bc5ccf88SSatish Balay stash->svalues = svalues; stash->rvalues = rvalues; 501bc5ccf88SSatish Balay stash->nsends = nsends; stash->nrecvs = nreceives; 502bc5ccf88SSatish Balay stash->send_waits = send_waits; stash->recv_waits = recv_waits; 503bc5ccf88SSatish Balay stash->rmax = nmax; 504bc5ccf88SSatish Balay PetscFunctionReturn(0); 505bc5ccf88SSatish Balay } 506bc5ccf88SSatish Balay 507a2d1c673SSatish Balay /* 5088798bf22SSatish Balay MatStashScatterGetMesg_Private - This function waits on the receives posted 5098798bf22SSatish Balay in the function MatStashScatterBegin_Private() and returns one message at 5104c1ff481SSatish Balay a time to the calling function. If no messages are left, it indicates this 5114c1ff481SSatish Balay by setting flg = 0, else it sets flg = 1. 5124c1ff481SSatish Balay 5134c1ff481SSatish Balay Input Parameters: 5144c1ff481SSatish Balay stash - the stash 5154c1ff481SSatish Balay 5164c1ff481SSatish Balay Output Parameters: 5174c1ff481SSatish Balay nvals - the number of entries in the current message. 5184c1ff481SSatish Balay rows - an array of row indices (or blocked indices) corresponding to the values 5194c1ff481SSatish Balay cols - an array of columnindices (or blocked indices) corresponding to the values 5204c1ff481SSatish Balay vals - the values 5214c1ff481SSatish Balay flg - 0 indicates no more message left, and the current call has no values associated. 5224c1ff481SSatish Balay 1 indicates that the current call successfully received a message, and the 5234c1ff481SSatish Balay other output parameters nvals,rows,cols,vals are set appropriately. 524a2d1c673SSatish Balay */ 525bc5ccf88SSatish Balay #undef __FUNC__ 5268798bf22SSatish Balay #define __FUNC__ "MatStashScatterGetMesg_Private" 5278798bf22SSatish Balay int MatStashScatterGetMesg_Private(MatStash *stash,int *nvals,int **rows,int** cols,Scalar **vals,int *flg) 528bc5ccf88SSatish Balay { 529a2d1c673SSatish Balay int i,ierr,size=stash->size,*flg_v,*flg_i; 530a2d1c673SSatish Balay int i1,i2,*rindices,match_found=0,bs2; 531a2d1c673SSatish Balay MPI_Status recv_status; 532bc5ccf88SSatish Balay 533bc5ccf88SSatish Balay PetscFunctionBegin; 534bc5ccf88SSatish Balay 535a2d1c673SSatish Balay *flg = 0; /* When a message is discovered this is reset to 1 */ 536a2d1c673SSatish Balay /* Return if no more messages to process */ 537a2d1c673SSatish Balay if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); } 538a2d1c673SSatish Balay 539a2d1c673SSatish Balay flg_v = stash->nprocs; 540a2d1c673SSatish Balay flg_i = flg_v + size; 5414c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 542a2d1c673SSatish Balay /* If a matching pair of receieves are found, process them, and return the data to 543a2d1c673SSatish Balay the calling function. Until then keep receiving messages */ 544a2d1c673SSatish Balay while (!match_found) { 545a2d1c673SSatish Balay ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr); 546a2d1c673SSatish Balay /* Now pack the received message into a structure which is useable by others */ 547a2d1c673SSatish Balay if (i % 2) { 548a2d1c673SSatish Balay ierr = MPI_Get_count(&recv_status,MPI_INT,nvals);CHKERRQ(ierr); 549a2d1c673SSatish Balay flg_i[recv_status.MPI_SOURCE] = i/2; 550a2d1c673SSatish Balay *nvals = *nvals/2; /* This message has both row indices and col indices */ 551a2d1c673SSatish Balay } else { 552a2d1c673SSatish Balay ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr); 553a2d1c673SSatish Balay flg_v[recv_status.MPI_SOURCE] = i/2; 554a2d1c673SSatish Balay *nvals = *nvals/bs2; 555bc5ccf88SSatish Balay } 556a2d1c673SSatish Balay 557a2d1c673SSatish Balay /* Check if we have both the messages from this proc */ 558a2d1c673SSatish Balay i1 = flg_v[recv_status.MPI_SOURCE]; 559a2d1c673SSatish Balay i2 = flg_i[recv_status.MPI_SOURCE]; 560a2d1c673SSatish Balay if (i1 != -1 && i2 != -1) { 561a2d1c673SSatish Balay rindices = (int *) (stash->rvalues + bs2*stash->rmax*stash->nrecvs); 562a2d1c673SSatish Balay *rows = rindices + 2*i2*stash->rmax; 563a2d1c673SSatish Balay *cols = *rows + *nvals; 564a2d1c673SSatish Balay *vals = stash->rvalues + i1*bs2*stash->rmax; 565a2d1c673SSatish Balay *flg = 1; 566a2d1c673SSatish Balay stash->nprocessed ++; 567a2d1c673SSatish Balay match_found = 1; 568bc5ccf88SSatish Balay } 569bc5ccf88SSatish Balay } 570bc5ccf88SSatish Balay PetscFunctionReturn(0); 571bc5ccf88SSatish Balay } 572