173f4d377SMatthew Knepley /*$Id: matstash.c,v 1.50 2001/03/23 23:22:45 balay Exp $*/ 22d5177cdSBarry Smith 370f55243SBarry Smith #include "src/mat/matimpl.h" 49417f4adSLois Curfman McInnes 53eda8832SBarry Smith /* 60ae3cd3bSBarry Smith The input to the stash is ALWAYS in MatScalar precision, and the 70ae3cd3bSBarry Smith internal storage and output is also in MatScalar. 83eda8832SBarry Smith */ 9bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE 10000 104c1ff481SSatish Balay 119417f4adSLois Curfman McInnes /* 128798bf22SSatish Balay MatStashCreate_Private - Creates a stash,currently used for all the parallel 134c1ff481SSatish Balay matrix implementations. The stash is where elements of a matrix destined 144c1ff481SSatish Balay to be stored on other processors are kept until matrix assembly is done. 159417f4adSLois Curfman McInnes 164c1ff481SSatish Balay This is a simple minded stash. Simply adds entries to end of stash. 174c1ff481SSatish Balay 184c1ff481SSatish Balay Input Parameters: 194c1ff481SSatish Balay comm - communicator, required for scatters. 204c1ff481SSatish Balay bs - stash block size. used when stashing blocks of values 214c1ff481SSatish Balay 224c1ff481SSatish Balay Output Parameters: 234c1ff481SSatish Balay stash - the newly created stash 249417f4adSLois Curfman McInnes */ 254a2ae208SSatish Balay #undef __FUNCT__ 264a2ae208SSatish Balay #define __FUNCT__ "MatStashCreate_Private" 278798bf22SSatish Balay int MatStashCreate_Private(MPI_Comm comm,int bs,MatStash *stash) 289417f4adSLois Curfman McInnes { 29f1af5d2fSBarry Smith int ierr,max,*opt,nopt; 30f1af5d2fSBarry Smith PetscTruth flg; 31bc5ccf88SSatish Balay 323a40ed3dSBarry Smith PetscFunctionBegin; 33bc5ccf88SSatish Balay /* Require 2 tags,get the second using PetscCommGetNewTag() */ 34752ec6e0SSatish Balay stash->comm = comm; 35752ec6e0SSatish Balay ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr); 36a2d1c673SSatish Balay ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr); 37a2d1c673SSatish Balay ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr); 38a2d1c673SSatish Balay ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr); 39bc5ccf88SSatish Balay 40434d7ff9SSatish Balay nopt = stash->size; 4182502324SSatish Balay ierr = PetscMalloc(nopt*sizeof(int),&opt);CHKERRQ(ierr); 42b0a32e0cSBarry Smith ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr); 43434d7ff9SSatish Balay if (flg) { 44434d7ff9SSatish Balay if (nopt == 1) max = opt[0]; 45434d7ff9SSatish Balay else if (nopt == stash->size) max = opt[stash->rank]; 46434d7ff9SSatish Balay else if (stash->rank < nopt) max = opt[stash->rank]; 47f4ab19daSSatish Balay else max = 0; /* Use default */ 48434d7ff9SSatish Balay stash->umax = max; 49434d7ff9SSatish Balay } else { 50434d7ff9SSatish Balay stash->umax = 0; 51434d7ff9SSatish Balay } 52606d414cSSatish Balay ierr = PetscFree(opt);CHKERRQ(ierr); 534c1ff481SSatish Balay if (bs <= 0) bs = 1; 54a2d1c673SSatish Balay 554c1ff481SSatish Balay stash->bs = bs; 569417f4adSLois Curfman McInnes stash->nmax = 0; 57434d7ff9SSatish Balay stash->oldnmax = 0; 589417f4adSLois Curfman McInnes stash->n = 0; 594c1ff481SSatish Balay stash->reallocs = -1; 609417f4adSLois Curfman McInnes stash->idx = 0; 619417f4adSLois Curfman McInnes stash->idy = 0; 62bc5ccf88SSatish Balay stash->array = 0; 639417f4adSLois Curfman McInnes 64bc5ccf88SSatish Balay stash->send_waits = 0; 65bc5ccf88SSatish Balay stash->recv_waits = 0; 66a2d1c673SSatish Balay stash->send_status = 0; 67bc5ccf88SSatish Balay stash->nsends = 0; 68bc5ccf88SSatish Balay stash->nrecvs = 0; 69bc5ccf88SSatish Balay stash->svalues = 0; 70bc5ccf88SSatish Balay stash->rvalues = 0; 71bc5ccf88SSatish Balay stash->rmax = 0; 72a2d1c673SSatish Balay stash->nprocs = 0; 73a2d1c673SSatish Balay stash->nprocessed = 0; 743a40ed3dSBarry Smith PetscFunctionReturn(0); 759417f4adSLois Curfman McInnes } 769417f4adSLois Curfman McInnes 774c1ff481SSatish Balay /* 788798bf22SSatish Balay MatStashDestroy_Private - Destroy the stash 794c1ff481SSatish Balay */ 804a2ae208SSatish Balay #undef __FUNCT__ 814a2ae208SSatish Balay #define __FUNCT__ "MatStashDestroy_Private" 828798bf22SSatish Balay int MatStashDestroy_Private(MatStash *stash) 839417f4adSLois Curfman McInnes { 84bc5ccf88SSatish Balay int ierr; 85a2d1c673SSatish Balay 86bc5ccf88SSatish Balay PetscFunctionBegin; 87606d414cSSatish Balay if (stash->array) { 88606d414cSSatish Balay ierr = PetscFree(stash->array);CHKERRQ(ierr); 89606d414cSSatish Balay stash->array = 0; 90606d414cSSatish Balay } 91bc5ccf88SSatish Balay PetscFunctionReturn(0); 92bc5ccf88SSatish Balay } 93bc5ccf88SSatish Balay 944c1ff481SSatish Balay /* 958798bf22SSatish Balay MatStashScatterEnd_Private - This is called as the fial stage of 964c1ff481SSatish Balay scatter. The final stages of messagepassing is done here, and 974c1ff481SSatish Balay all the memory used for messagepassing is cleanedu up. This 984c1ff481SSatish Balay routine also resets the stash, and deallocates the memory used 994c1ff481SSatish Balay for the stash. It also keeps track of the current memory usage 1004c1ff481SSatish Balay so that the same value can be used the next time through. 1014c1ff481SSatish Balay */ 1024a2ae208SSatish Balay #undef __FUNCT__ 1034a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterEnd_Private" 1048798bf22SSatish Balay int MatStashScatterEnd_Private(MatStash *stash) 105bc5ccf88SSatish Balay { 106434d7ff9SSatish Balay int nsends=stash->nsends,ierr,bs2,oldnmax; 107a2d1c673SSatish Balay MPI_Status *send_status; 108a2d1c673SSatish Balay 1093a40ed3dSBarry Smith PetscFunctionBegin; 110a2d1c673SSatish Balay /* wait on sends */ 111a2d1c673SSatish Balay if (nsends) { 11282502324SSatish Balay ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 113a2d1c673SSatish Balay ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr); 114606d414cSSatish Balay ierr = PetscFree(send_status);CHKERRQ(ierr); 115a2d1c673SSatish Balay } 116a2d1c673SSatish Balay 117c0c58ca7SSatish Balay /* Now update nmaxold to be app 10% more than max n used, this way the 118434d7ff9SSatish Balay wastage of space is reduced the next time this stash is used. 119434d7ff9SSatish Balay Also update the oldmax, only if it increases */ 120b9b97703SBarry Smith if (stash->n) { 12194b769a5SSatish Balay bs2 = stash->bs*stash->bs; 1228a9378f0SSatish Balay oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 123434d7ff9SSatish Balay if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 124b9b97703SBarry Smith } 125434d7ff9SSatish Balay 126d07ff455SSatish Balay stash->nmax = 0; 127d07ff455SSatish Balay stash->n = 0; 1284c1ff481SSatish Balay stash->reallocs = -1; 129bc5ccf88SSatish Balay stash->rmax = 0; 130a2d1c673SSatish Balay stash->nprocessed = 0; 131bc5ccf88SSatish Balay 132bc5ccf88SSatish Balay if (stash->array) { 133606d414cSSatish Balay ierr = PetscFree(stash->array);CHKERRQ(ierr); 134bc5ccf88SSatish Balay stash->array = 0; 135bc5ccf88SSatish Balay stash->idx = 0; 136bc5ccf88SSatish Balay stash->idy = 0; 137bc5ccf88SSatish Balay } 138606d414cSSatish Balay if (stash->send_waits) { 139606d414cSSatish Balay ierr = PetscFree(stash->send_waits);CHKERRQ(ierr); 140606d414cSSatish Balay stash->send_waits = 0; 141606d414cSSatish Balay } 142606d414cSSatish Balay if (stash->recv_waits) { 143606d414cSSatish Balay ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr); 144606d414cSSatish Balay stash->recv_waits = 0; 145606d414cSSatish Balay } 146606d414cSSatish Balay if (stash->svalues) { 147606d414cSSatish Balay ierr = PetscFree(stash->svalues);CHKERRQ(ierr); 148606d414cSSatish Balay stash->svalues = 0; 149606d414cSSatish Balay } 150606d414cSSatish Balay if (stash->rvalues) { 151606d414cSSatish Balay ierr = PetscFree(stash->rvalues);CHKERRQ(ierr); 152606d414cSSatish Balay stash->rvalues = 0; 153606d414cSSatish Balay } 154606d414cSSatish Balay if (stash->nprocs) { 155b22afee1SSatish Balay ierr = PetscFree(stash->nprocs);CHKERRQ(ierr); 156606d414cSSatish Balay stash->nprocs = 0; 157606d414cSSatish Balay } 158bc5ccf88SSatish Balay 1593a40ed3dSBarry Smith PetscFunctionReturn(0); 1609417f4adSLois Curfman McInnes } 1619417f4adSLois Curfman McInnes 1624c1ff481SSatish Balay /* 1638798bf22SSatish Balay MatStashGetInfo_Private - Gets the relavant statistics of the stash 1644c1ff481SSatish Balay 1654c1ff481SSatish Balay Input Parameters: 1664c1ff481SSatish Balay stash - the stash 16794b769a5SSatish Balay nstash - the size of the stash. Indicates the number of values stored. 1684c1ff481SSatish Balay reallocs - the number of additional mallocs incurred. 1694c1ff481SSatish Balay 1704c1ff481SSatish Balay */ 1714a2ae208SSatish Balay #undef __FUNCT__ 1724a2ae208SSatish Balay #define __FUNCT__ "MatStashGetInfo_Private" 1738798bf22SSatish Balay int MatStashGetInfo_Private(MatStash *stash,int *nstash,int *reallocs) 17497530c3fSBarry Smith { 17594b769a5SSatish Balay int bs2 = stash->bs*stash->bs; 17694b769a5SSatish Balay 1773a40ed3dSBarry Smith PetscFunctionBegin; 17894b769a5SSatish Balay *nstash = stash->n*bs2; 179434d7ff9SSatish Balay if (stash->reallocs < 0) *reallocs = 0; 180434d7ff9SSatish Balay else *reallocs = stash->reallocs; 181bc5ccf88SSatish Balay PetscFunctionReturn(0); 182bc5ccf88SSatish Balay } 1834c1ff481SSatish Balay 1844c1ff481SSatish Balay 1854c1ff481SSatish Balay /* 1868798bf22SSatish Balay MatStashSetInitialSize_Private - Sets the initial size of the stash 1874c1ff481SSatish Balay 1884c1ff481SSatish Balay Input Parameters: 1894c1ff481SSatish Balay stash - the stash 1904c1ff481SSatish Balay max - the value that is used as the max size of the stash. 1914c1ff481SSatish Balay this value is used while allocating memory. 1924c1ff481SSatish Balay */ 1934a2ae208SSatish Balay #undef __FUNCT__ 1944a2ae208SSatish Balay #define __FUNCT__ "MatStashSetInitialSize_Private" 1958798bf22SSatish Balay int MatStashSetInitialSize_Private(MatStash *stash,int max) 196bc5ccf88SSatish Balay { 197bc5ccf88SSatish Balay PetscFunctionBegin; 198434d7ff9SSatish Balay stash->umax = max; 1993a40ed3dSBarry Smith PetscFunctionReturn(0); 20097530c3fSBarry Smith } 20197530c3fSBarry Smith 2028798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called 2034c1ff481SSatish Balay when the space in the stash is not sufficient to add the new values 2044c1ff481SSatish Balay being inserted into the stash. 2054c1ff481SSatish Balay 2064c1ff481SSatish Balay Input Parameters: 2074c1ff481SSatish Balay stash - the stash 2084c1ff481SSatish Balay incr - the minimum increase requested 2094c1ff481SSatish Balay 2104c1ff481SSatish Balay Notes: 2114c1ff481SSatish Balay This routine doubles the currently used memory. 2124c1ff481SSatish Balay */ 2134a2ae208SSatish Balay #undef __FUNCT__ 2144a2ae208SSatish Balay #define __FUNCT__ "MatStashExpand_Private" 2158798bf22SSatish Balay static int MatStashExpand_Private(MatStash *stash,int incr) 2169417f4adSLois Curfman McInnes { 217549d3d68SSatish Balay int *n_idx,*n_idy,newnmax,bs2,ierr; 2183eda8832SBarry Smith MatScalar *n_array; 2199417f4adSLois Curfman McInnes 2203a40ed3dSBarry Smith PetscFunctionBegin; 2219417f4adSLois Curfman McInnes /* allocate a larger stash */ 22294b769a5SSatish Balay bs2 = stash->bs*stash->bs; 223c481ceb5SSatish Balay if (!stash->oldnmax && !stash->nmax) { /* new stash */ 224434d7ff9SSatish Balay if (stash->umax) newnmax = stash->umax/bs2; 225434d7ff9SSatish Balay else newnmax = DEFAULT_STASH_SIZE/bs2; 226c481ceb5SSatish Balay } else if (!stash->nmax) { /* resuing stash */ 227434d7ff9SSatish Balay if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2; 228434d7ff9SSatish Balay else newnmax = stash->oldnmax/bs2; 229434d7ff9SSatish Balay } else newnmax = stash->nmax*2; 2304c1ff481SSatish Balay if (newnmax < (stash->nmax + incr)) newnmax += 2*incr; 231d07ff455SSatish Balay 232b0a32e0cSBarry Smith ierr = PetscMalloc((newnmax)*(2*sizeof(int)+bs2*sizeof(MatScalar)),&n_array);CHKERRQ(ierr); 233a2d1c673SSatish Balay n_idx = (int*)(n_array + bs2*newnmax); 234d07ff455SSatish Balay n_idy = (int*)(n_idx + newnmax); 2353eda8832SBarry Smith ierr = PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(MatScalar));CHKERRQ(ierr); 236549d3d68SSatish Balay ierr = PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(int));CHKERRQ(ierr); 237549d3d68SSatish Balay ierr = PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(int));CHKERRQ(ierr); 238606d414cSSatish Balay if (stash->array) {ierr = PetscFree(stash->array);CHKERRQ(ierr);} 239d07ff455SSatish Balay stash->array = n_array; 240d07ff455SSatish Balay stash->idx = n_idx; 241d07ff455SSatish Balay stash->idy = n_idy; 242d07ff455SSatish Balay stash->nmax = newnmax; 243bc5ccf88SSatish Balay stash->reallocs++; 244bc5ccf88SSatish Balay PetscFunctionReturn(0); 245bc5ccf88SSatish Balay } 246bc5ccf88SSatish Balay /* 2478798bf22SSatish Balay MatStashValuesRow_Private - inserts values into the stash. This function 2484c1ff481SSatish Balay expects the values to be roworiented. Multiple columns belong to the same row 2494c1ff481SSatish Balay can be inserted with a single call to this function. 2504c1ff481SSatish Balay 2514c1ff481SSatish Balay Input Parameters: 2524c1ff481SSatish Balay stash - the stash 2534c1ff481SSatish Balay row - the global row correspoiding to the values 2544c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2554c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2564c1ff481SSatish Balay values - the values inserted 257bc5ccf88SSatish Balay */ 2584a2ae208SSatish Balay #undef __FUNCT__ 2594a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRow_Private" 2600ae3cd3bSBarry Smith int MatStashValuesRow_Private(MatStash *stash,int row,int n,int *idxn,MatScalar *values) 261bc5ccf88SSatish Balay { 262a2d1c673SSatish Balay int ierr,i; 263bc5ccf88SSatish Balay 264bc5ccf88SSatish Balay PetscFunctionBegin; 2654c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 2664c1ff481SSatish Balay if ((stash->n + n) > stash->nmax) { 2678798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 2689417f4adSLois Curfman McInnes } 2694c1ff481SSatish Balay for (i=0; i<n; i++) { 2709417f4adSLois Curfman McInnes stash->idx[stash->n] = row; 271a2d1c673SSatish Balay stash->idy[stash->n] = idxn[i]; 2720ae3cd3bSBarry Smith stash->array[stash->n] = values[i]; 273a2d1c673SSatish Balay stash->n++; 2749417f4adSLois Curfman McInnes } 275a2d1c673SSatish Balay PetscFunctionReturn(0); 276a2d1c673SSatish Balay } 2774c1ff481SSatish Balay /* 2788798bf22SSatish Balay MatStashValuesCol_Private - inserts values into the stash. This function 2794c1ff481SSatish Balay expects the values to be columnoriented. Multiple columns belong to the same row 2804c1ff481SSatish Balay can be inserted with a single call to this function. 281a2d1c673SSatish Balay 2824c1ff481SSatish Balay Input Parameters: 2834c1ff481SSatish Balay stash - the stash 2844c1ff481SSatish Balay row - the global row correspoiding to the values 2854c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2864c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2874c1ff481SSatish Balay values - the values inserted 2884c1ff481SSatish Balay stepval - the consecutive values are sepated by a distance of stepval. 2894c1ff481SSatish Balay this happens because the input is columnoriented. 2904c1ff481SSatish Balay */ 2914a2ae208SSatish Balay #undef __FUNCT__ 2924a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesCol_Private" 2930ae3cd3bSBarry Smith int MatStashValuesCol_Private(MatStash *stash,int row,int n,int *idxn,MatScalar *values,int stepval) 294a2d1c673SSatish Balay { 2954c1ff481SSatish Balay int ierr,i; 296a2d1c673SSatish Balay 2974c1ff481SSatish Balay PetscFunctionBegin; 2984c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 2994c1ff481SSatish Balay if ((stash->n + n) > stash->nmax) { 3008798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 3014c1ff481SSatish Balay } 3024c1ff481SSatish Balay for (i=0; i<n; i++) { 3034c1ff481SSatish Balay stash->idx[stash->n] = row; 3044c1ff481SSatish Balay stash->idy[stash->n] = idxn[i]; 3050ae3cd3bSBarry Smith stash->array[stash->n] = values[i*stepval]; 3064c1ff481SSatish Balay stash->n++; 3074c1ff481SSatish Balay } 3084c1ff481SSatish Balay PetscFunctionReturn(0); 3094c1ff481SSatish Balay } 3104c1ff481SSatish Balay 3114c1ff481SSatish Balay /* 3128798bf22SSatish Balay MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 3134c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 3144c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3154c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3164c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3174c1ff481SSatish Balay 3184c1ff481SSatish Balay Input Parameters: 3194c1ff481SSatish Balay stash - the stash 3204c1ff481SSatish Balay row - the global block-row correspoiding to the values 3214c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3224c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3234c1ff481SSatish Balay values. Each block is of size bs*bs. 3244c1ff481SSatish Balay values - the values inserted 3254c1ff481SSatish Balay rmax - the number of block-rows in the original block. 3264c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 3274c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3284c1ff481SSatish Balay */ 3294a2ae208SSatish Balay #undef __FUNCT__ 3304a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRowBlocked_Private" 3310ae3cd3bSBarry Smith int MatStashValuesRowBlocked_Private(MatStash *stash,int row,int n,int *idxn,MatScalar *values,int rmax,int cmax,int idx) 3324c1ff481SSatish Balay { 3334c1ff481SSatish Balay int ierr,i,j,k,bs2,bs=stash->bs; 3340ae3cd3bSBarry Smith MatScalar *vals,*array; 335a2d1c673SSatish Balay 336a2d1c673SSatish Balay PetscFunctionBegin; 337a2d1c673SSatish Balay bs2 = bs*bs; 3384c1ff481SSatish Balay if ((stash->n+n) > stash->nmax) { 3398798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 340a2d1c673SSatish Balay } 3414c1ff481SSatish Balay for (i=0; i<n; i++) { 342a2d1c673SSatish Balay stash->idx[stash->n] = row; 343a2d1c673SSatish Balay stash->idy[stash->n] = idxn[i]; 344a2d1c673SSatish Balay /* Now copy over the block of values. Store the values column oriented. 345a2d1c673SSatish Balay This enables inserting multiple blocks belonging to a row with a single 346a2d1c673SSatish Balay funtion call */ 347a2d1c673SSatish Balay array = stash->array + bs2*stash->n; 348a2d1c673SSatish Balay vals = values + idx*bs2*n + bs*i; 349a2d1c673SSatish Balay for (j=0; j<bs; j++) { 3500ae3cd3bSBarry Smith for (k=0; k<bs; k++) {array[k*bs] = vals[k];} 351a2d1c673SSatish Balay array += 1; 352a2d1c673SSatish Balay vals += cmax*bs; 353a2d1c673SSatish Balay } 3544c1ff481SSatish Balay stash->n++; 3554c1ff481SSatish Balay } 3564c1ff481SSatish Balay PetscFunctionReturn(0); 3574c1ff481SSatish Balay } 3584c1ff481SSatish Balay 3594c1ff481SSatish Balay /* 3608798bf22SSatish Balay MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 3614c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 3624c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3634c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3644c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3654c1ff481SSatish Balay 3664c1ff481SSatish Balay Input Parameters: 3674c1ff481SSatish Balay stash - the stash 3684c1ff481SSatish Balay row - the global block-row correspoiding to the values 3694c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3704c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3714c1ff481SSatish Balay values. Each block is of size bs*bs. 3724c1ff481SSatish Balay values - the values inserted 3734c1ff481SSatish Balay rmax - the number of block-rows in the original block. 3744c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 3754c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3764c1ff481SSatish Balay */ 3774a2ae208SSatish Balay #undef __FUNCT__ 3784a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesColBlocked_Private" 3790ae3cd3bSBarry Smith int MatStashValuesColBlocked_Private(MatStash *stash,int row,int n,int *idxn,MatScalar *values,int rmax,int cmax,int idx) 3804c1ff481SSatish Balay { 3814c1ff481SSatish Balay int ierr,i,j,k,bs2,bs=stash->bs; 3820ae3cd3bSBarry Smith MatScalar *vals,*array; 3834c1ff481SSatish Balay 3844c1ff481SSatish Balay PetscFunctionBegin; 3854c1ff481SSatish Balay bs2 = bs*bs; 3864c1ff481SSatish Balay if ((stash->n+n) > stash->nmax) { 3878798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 3884c1ff481SSatish Balay } 3894c1ff481SSatish Balay for (i=0; i<n; i++) { 3904c1ff481SSatish Balay stash->idx[stash->n] = row; 3914c1ff481SSatish Balay stash->idy[stash->n] = idxn[i]; 3924c1ff481SSatish Balay /* Now copy over the block of values. Store the values column oriented. 3934c1ff481SSatish Balay This enables inserting multiple blocks belonging to a row with a single 3944c1ff481SSatish Balay funtion call */ 395a2d1c673SSatish Balay array = stash->array + bs2*stash->n; 396a2d1c673SSatish Balay vals = values + idx*bs + bs2*rmax*i; 397a2d1c673SSatish Balay for (j=0; j<bs; j++) { 3980ae3cd3bSBarry Smith for (k=0; k<bs; k++) {array[k] = vals[k];} 399a2d1c673SSatish Balay array += bs; 400a2d1c673SSatish Balay vals += rmax*bs; 401a2d1c673SSatish Balay } 402a2d1c673SSatish Balay stash->n++; 4039417f4adSLois Curfman McInnes } 4043a40ed3dSBarry Smith PetscFunctionReturn(0); 4059417f4adSLois Curfman McInnes } 4064c1ff481SSatish Balay /* 4078798bf22SSatish Balay MatStashScatterBegin_Private - Initiates the transfer of values to the 4084c1ff481SSatish Balay correct owners. This function goes through the stash, and check the 4094c1ff481SSatish Balay owners of each stashed value, and sends the values off to the owner 4104c1ff481SSatish Balay processors. 411bc5ccf88SSatish Balay 4124c1ff481SSatish Balay Input Parameters: 4134c1ff481SSatish Balay stash - the stash 4144c1ff481SSatish Balay owners - an array of size 'no-of-procs' which gives the ownership range 4154c1ff481SSatish Balay for each node. 4164c1ff481SSatish Balay 4174c1ff481SSatish Balay Notes: The 'owners' array in the cased of the blocked-stash has the 4184c1ff481SSatish Balay ranges specified blocked global indices, and for the regular stash in 4194c1ff481SSatish Balay the proper global indices. 4204c1ff481SSatish Balay */ 4214a2ae208SSatish Balay #undef __FUNCT__ 4224a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterBegin_Private" 4238798bf22SSatish Balay int MatStashScatterBegin_Private(MatStash *stash,int *owners) 424bc5ccf88SSatish Balay { 425a2d1c673SSatish Balay int *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 426*ccae9161SBarry Smith int size=stash->size,*nprocs,nsends,nreceives; 427c1dc657dSBarry Smith int nmax,count,ierr,*sindices,*rindices,i,j,idx; 4283eda8832SBarry Smith MatScalar *rvalues,*svalues; 429bc5ccf88SSatish Balay MPI_Comm comm = stash->comm; 430bc5ccf88SSatish Balay MPI_Request *send_waits,*recv_waits; 431bc5ccf88SSatish Balay 432bc5ccf88SSatish Balay PetscFunctionBegin; 433bc5ccf88SSatish Balay 4344c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 435bc5ccf88SSatish Balay /* first count number of contributors to each processor */ 43682502324SSatish Balay ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 437549d3d68SSatish Balay ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 43882502324SSatish Balay ierr = PetscMalloc((stash->n+1)*sizeof(int),&owner);CHKERRQ(ierr); 439a2d1c673SSatish Balay 440bc5ccf88SSatish Balay for (i=0; i<stash->n; i++) { 441bc5ccf88SSatish Balay idx = stash->idx[i]; 442bc5ccf88SSatish Balay for (j=0; j<size; j++) { 4434c1ff481SSatish Balay if (idx >= owners[j] && idx < owners[j+1]) { 444c1dc657dSBarry Smith nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; break; 445bc5ccf88SSatish Balay } 446bc5ccf88SSatish Balay } 447bc5ccf88SSatish Balay } 448c1dc657dSBarry Smith nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 449bc5ccf88SSatish Balay 450bc5ccf88SSatish Balay /* inform other processors of number of messages and max length*/ 451c1dc657dSBarry Smith ierr = PetscMaxSum(comm,nprocs,&nmax,&nreceives);CHKERRQ(ierr); 452c1dc657dSBarry Smith 453bc5ccf88SSatish Balay /* post receives: 454bc5ccf88SSatish Balay since we don't know how long each individual message is we 455bc5ccf88SSatish Balay allocate the largest needed buffer for each receive. Potentially 456bc5ccf88SSatish Balay this is a lot of wasted space. 457bc5ccf88SSatish Balay */ 458b0a32e0cSBarry Smith ierr = PetscMalloc((nreceives+1)*(nmax+1)*(bs2*sizeof(MatScalar)+2*sizeof(int)),&rvalues);CHKERRQ(ierr); 459a2d1c673SSatish Balay rindices = (int*)(rvalues + bs2*nreceives*nmax); 460b0a32e0cSBarry Smith ierr = PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 461bc5ccf88SSatish Balay for (i=0,count=0; i<nreceives; i++) { 4623eda8832SBarry Smith ierr = MPI_Irecv(rvalues+bs2*nmax*i,bs2*nmax,MPIU_MATSCALAR,MPI_ANY_SOURCE,tag1,comm, 463bc5ccf88SSatish Balay recv_waits+count++);CHKERRQ(ierr); 4640ae3cd3bSBarry Smith ierr = MPI_Irecv(rindices+2*nmax*i,2*nmax,MPI_INT,MPI_ANY_SOURCE,tag2,comm,recv_waits+count++);CHKERRQ(ierr); 465bc5ccf88SSatish Balay } 466bc5ccf88SSatish Balay 467bc5ccf88SSatish Balay /* do sends: 468bc5ccf88SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 469bc5ccf88SSatish Balay the ith processor 470bc5ccf88SSatish Balay */ 47182502324SSatish Balay ierr = PetscMalloc((stash->n+1)*(bs2*sizeof(MatScalar)+2*sizeof(int)),&svalues);CHKERRQ(ierr); 472a2d1c673SSatish Balay sindices = (int*)(svalues + bs2*stash->n); 473b0a32e0cSBarry Smith ierr = PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 47482502324SSatish Balay ierr = PetscMalloc(2*size*sizeof(int),&startv);CHKERRQ(ierr); 475bc5ccf88SSatish Balay starti = startv + size; 476a2d1c673SSatish Balay /* use 2 sends the first with all_a, the next with all_i and all_j */ 477bc5ccf88SSatish Balay startv[0] = 0; starti[0] = 0; 478bc5ccf88SSatish Balay for (i=1; i<size; i++) { 479c1dc657dSBarry Smith startv[i] = startv[i-1] + nprocs[2*i-2]; 480c1dc657dSBarry Smith starti[i] = starti[i-1] + nprocs[2*i-2]*2; 481bc5ccf88SSatish Balay } 482bc5ccf88SSatish Balay for (i=0; i<stash->n; i++) { 483bc5ccf88SSatish Balay j = owner[i]; 484a2d1c673SSatish Balay if (bs2 == 1) { 485bc5ccf88SSatish Balay svalues[startv[j]] = stash->array[i]; 486a2d1c673SSatish Balay } else { 4874c1ff481SSatish Balay int k; 4883eda8832SBarry Smith MatScalar *buf1,*buf2; 4894c1ff481SSatish Balay buf1 = svalues+bs2*startv[j]; 4904c1ff481SSatish Balay buf2 = stash->array+bs2*i; 4914c1ff481SSatish Balay for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; } 492a2d1c673SSatish Balay } 493bc5ccf88SSatish Balay sindices[starti[j]] = stash->idx[i]; 494c1dc657dSBarry Smith sindices[starti[j]+nprocs[2*j]] = stash->idy[i]; 495bc5ccf88SSatish Balay startv[j]++; 496bc5ccf88SSatish Balay starti[j]++; 497bc5ccf88SSatish Balay } 498bc5ccf88SSatish Balay startv[0] = 0; 499c1dc657dSBarry Smith for (i=1; i<size; i++) { startv[i] = startv[i-1] + nprocs[2*i-2];} 500bc5ccf88SSatish Balay for (i=0,count=0; i<size; i++) { 501c1dc657dSBarry Smith if (nprocs[2*i+1]) { 502c1dc657dSBarry Smith ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nprocs[2*i],MPIU_MATSCALAR,i,tag1,comm, 503bc5ccf88SSatish Balay send_waits+count++);CHKERRQ(ierr); 504c1dc657dSBarry Smith ierr = MPI_Isend(sindices+2*startv[i],2*nprocs[2*i],MPI_INT,i,tag2,comm, 505bc5ccf88SSatish Balay send_waits+count++);CHKERRQ(ierr); 506bc5ccf88SSatish Balay } 507bc5ccf88SSatish Balay } 508606d414cSSatish Balay ierr = PetscFree(owner);CHKERRQ(ierr); 509606d414cSSatish Balay ierr = PetscFree(startv);CHKERRQ(ierr); 510a2d1c673SSatish Balay /* This memory is reused in scatter end for a different purpose*/ 511a2d1c673SSatish Balay for (i=0; i<2*size; i++) nprocs[i] = -1; 512a2d1c673SSatish Balay stash->nprocs = nprocs; 513a2d1c673SSatish Balay 514bc5ccf88SSatish Balay stash->svalues = svalues; stash->rvalues = rvalues; 515bc5ccf88SSatish Balay stash->nsends = nsends; stash->nrecvs = nreceives; 516bc5ccf88SSatish Balay stash->send_waits = send_waits; stash->recv_waits = recv_waits; 517bc5ccf88SSatish Balay stash->rmax = nmax; 518bc5ccf88SSatish Balay PetscFunctionReturn(0); 519bc5ccf88SSatish Balay } 520bc5ccf88SSatish Balay 521a2d1c673SSatish Balay /* 5228798bf22SSatish Balay MatStashScatterGetMesg_Private - This function waits on the receives posted 5238798bf22SSatish Balay in the function MatStashScatterBegin_Private() and returns one message at 5244c1ff481SSatish Balay a time to the calling function. If no messages are left, it indicates this 5254c1ff481SSatish Balay by setting flg = 0, else it sets flg = 1. 5264c1ff481SSatish Balay 5274c1ff481SSatish Balay Input Parameters: 5284c1ff481SSatish Balay stash - the stash 5294c1ff481SSatish Balay 5304c1ff481SSatish Balay Output Parameters: 5314c1ff481SSatish Balay nvals - the number of entries in the current message. 5324c1ff481SSatish Balay rows - an array of row indices (or blocked indices) corresponding to the values 5334c1ff481SSatish Balay cols - an array of columnindices (or blocked indices) corresponding to the values 5344c1ff481SSatish Balay vals - the values 5354c1ff481SSatish Balay flg - 0 indicates no more message left, and the current call has no values associated. 5364c1ff481SSatish Balay 1 indicates that the current call successfully received a message, and the 5374c1ff481SSatish Balay other output parameters nvals,rows,cols,vals are set appropriately. 538a2d1c673SSatish Balay */ 5394a2ae208SSatish Balay #undef __FUNCT__ 5404a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterGetMesg_Private" 5413eda8832SBarry Smith int MatStashScatterGetMesg_Private(MatStash *stash,int *nvals,int **rows,int** cols,MatScalar **vals,int *flg) 542bc5ccf88SSatish Balay { 543*ccae9161SBarry Smith int i,ierr,*flg_v,i1,i2,*rindices,bs2; 544a2d1c673SSatish Balay MPI_Status recv_status; 545b0a32e0cSBarry Smith PetscTruth match_found = PETSC_FALSE; 546bc5ccf88SSatish Balay 547bc5ccf88SSatish Balay PetscFunctionBegin; 548bc5ccf88SSatish Balay 549a2d1c673SSatish Balay *flg = 0; /* When a message is discovered this is reset to 1 */ 550a2d1c673SSatish Balay /* Return if no more messages to process */ 551a2d1c673SSatish Balay if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); } 552a2d1c673SSatish Balay 553a2d1c673SSatish Balay flg_v = stash->nprocs; 5544c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 555a2d1c673SSatish Balay /* If a matching pair of receieves are found, process them, and return the data to 556a2d1c673SSatish Balay the calling function. Until then keep receiving messages */ 557a2d1c673SSatish Balay while (!match_found) { 558a2d1c673SSatish Balay ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr); 559a2d1c673SSatish Balay /* Now pack the received message into a structure which is useable by others */ 560a2d1c673SSatish Balay if (i % 2) { 561a2d1c673SSatish Balay ierr = MPI_Get_count(&recv_status,MPI_INT,nvals);CHKERRQ(ierr); 562c1dc657dSBarry Smith flg_v[2*recv_status.MPI_SOURCE+1] = i/2; 563a2d1c673SSatish Balay *nvals = *nvals/2; /* This message has both row indices and col indices */ 564a2d1c673SSatish Balay } else { 5653eda8832SBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_MATSCALAR,nvals);CHKERRQ(ierr); 566c1dc657dSBarry Smith flg_v[2*recv_status.MPI_SOURCE] = i/2; 567a2d1c673SSatish Balay *nvals = *nvals/bs2; 568bc5ccf88SSatish Balay } 569a2d1c673SSatish Balay 570a2d1c673SSatish Balay /* Check if we have both the messages from this proc */ 571c1dc657dSBarry Smith i1 = flg_v[2*recv_status.MPI_SOURCE]; 572c1dc657dSBarry Smith i2 = flg_v[2*recv_status.MPI_SOURCE+1]; 573a2d1c673SSatish Balay if (i1 != -1 && i2 != -1) { 574a2d1c673SSatish Balay rindices = (int*)(stash->rvalues + bs2*stash->rmax*stash->nrecvs); 575a2d1c673SSatish Balay *rows = rindices + 2*i2*stash->rmax; 576a2d1c673SSatish Balay *cols = *rows + *nvals; 577a2d1c673SSatish Balay *vals = stash->rvalues + i1*bs2*stash->rmax; 578a2d1c673SSatish Balay *flg = 1; 579a2d1c673SSatish Balay stash->nprocessed ++; 58035d8aa7fSBarry Smith match_found = PETSC_TRUE; 581bc5ccf88SSatish Balay } 582bc5ccf88SSatish Balay } 583bc5ccf88SSatish Balay PetscFunctionReturn(0); 584bc5ccf88SSatish Balay } 585