1*be1d678aSKris Buschelman #define PETSCMAT_DLL 22d5177cdSBarry Smith 370f55243SBarry Smith #include "src/mat/matimpl.h" 49417f4adSLois Curfman McInnes 53eda8832SBarry Smith /* 60ae3cd3bSBarry Smith The input to the stash is ALWAYS in MatScalar precision, and the 70ae3cd3bSBarry Smith internal storage and output is also in MatScalar. 83eda8832SBarry Smith */ 9bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE 10000 104c1ff481SSatish Balay 119417f4adSLois Curfman McInnes /* 128798bf22SSatish Balay MatStashCreate_Private - Creates a stash,currently used for all the parallel 134c1ff481SSatish Balay matrix implementations. The stash is where elements of a matrix destined 144c1ff481SSatish Balay to be stored on other processors are kept until matrix assembly is done. 159417f4adSLois Curfman McInnes 164c1ff481SSatish Balay This is a simple minded stash. Simply adds entries to end of stash. 174c1ff481SSatish Balay 184c1ff481SSatish Balay Input Parameters: 194c1ff481SSatish Balay comm - communicator, required for scatters. 204c1ff481SSatish Balay bs - stash block size. used when stashing blocks of values 214c1ff481SSatish Balay 224c1ff481SSatish Balay Output Parameters: 234c1ff481SSatish Balay stash - the newly created stash 249417f4adSLois Curfman McInnes */ 254a2ae208SSatish Balay #undef __FUNCT__ 264a2ae208SSatish Balay #define __FUNCT__ "MatStashCreate_Private" 27c1ac3661SBarry Smith PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash) 289417f4adSLois Curfman McInnes { 29dfbe8321SBarry Smith PetscErrorCode ierr; 30c1ac3661SBarry Smith PetscInt max,*opt,nopt; 31f1af5d2fSBarry Smith PetscTruth flg; 32bc5ccf88SSatish Balay 333a40ed3dSBarry Smith PetscFunctionBegin; 34bc5ccf88SSatish Balay /* Require 2 tags,get the second using PetscCommGetNewTag() */ 35752ec6e0SSatish Balay stash->comm = comm; 36752ec6e0SSatish Balay ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr); 37a2d1c673SSatish Balay ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr); 38a2d1c673SSatish Balay ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr); 39a2d1c673SSatish Balay ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr); 40bc5ccf88SSatish Balay 41434d7ff9SSatish Balay nopt = stash->size; 42d7d82daaSBarry Smith ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr); 43b0a32e0cSBarry Smith ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr); 44434d7ff9SSatish Balay if (flg) { 45434d7ff9SSatish Balay if (nopt == 1) max = opt[0]; 46434d7ff9SSatish Balay else if (nopt == stash->size) max = opt[stash->rank]; 47434d7ff9SSatish Balay else if (stash->rank < nopt) max = opt[stash->rank]; 48f4ab19daSSatish Balay else max = 0; /* Use default */ 49434d7ff9SSatish Balay stash->umax = max; 50434d7ff9SSatish Balay } else { 51434d7ff9SSatish Balay stash->umax = 0; 52434d7ff9SSatish Balay } 53606d414cSSatish Balay ierr = PetscFree(opt);CHKERRQ(ierr); 544c1ff481SSatish Balay if (bs <= 0) bs = 1; 55a2d1c673SSatish Balay 564c1ff481SSatish Balay stash->bs = bs; 579417f4adSLois Curfman McInnes stash->nmax = 0; 58434d7ff9SSatish Balay stash->oldnmax = 0; 599417f4adSLois Curfman McInnes stash->n = 0; 604c1ff481SSatish Balay stash->reallocs = -1; 619417f4adSLois Curfman McInnes stash->idx = 0; 629417f4adSLois Curfman McInnes stash->idy = 0; 63bc5ccf88SSatish Balay stash->array = 0; 649417f4adSLois Curfman McInnes 65bc5ccf88SSatish Balay stash->send_waits = 0; 66bc5ccf88SSatish Balay stash->recv_waits = 0; 67a2d1c673SSatish Balay stash->send_status = 0; 68bc5ccf88SSatish Balay stash->nsends = 0; 69bc5ccf88SSatish Balay stash->nrecvs = 0; 70bc5ccf88SSatish Balay stash->svalues = 0; 71bc5ccf88SSatish Balay stash->rvalues = 0; 72bc5ccf88SSatish Balay stash->rmax = 0; 73a2d1c673SSatish Balay stash->nprocs = 0; 74a2d1c673SSatish Balay stash->nprocessed = 0; 753a40ed3dSBarry Smith PetscFunctionReturn(0); 769417f4adSLois Curfman McInnes } 779417f4adSLois Curfman McInnes 784c1ff481SSatish Balay /* 798798bf22SSatish Balay MatStashDestroy_Private - Destroy the stash 804c1ff481SSatish Balay */ 814a2ae208SSatish Balay #undef __FUNCT__ 824a2ae208SSatish Balay #define __FUNCT__ "MatStashDestroy_Private" 83dfbe8321SBarry Smith PetscErrorCode MatStashDestroy_Private(MatStash *stash) 849417f4adSLois Curfman McInnes { 85dfbe8321SBarry Smith PetscErrorCode ierr; 86a2d1c673SSatish Balay 87bc5ccf88SSatish Balay PetscFunctionBegin; 88606d414cSSatish Balay if (stash->array) { 89606d414cSSatish Balay ierr = PetscFree(stash->array);CHKERRQ(ierr); 90606d414cSSatish Balay stash->array = 0; 91606d414cSSatish Balay } 92bc5ccf88SSatish Balay PetscFunctionReturn(0); 93bc5ccf88SSatish Balay } 94bc5ccf88SSatish Balay 954c1ff481SSatish Balay /* 968798bf22SSatish Balay MatStashScatterEnd_Private - This is called as the fial stage of 974c1ff481SSatish Balay scatter. The final stages of messagepassing is done here, and 984c1ff481SSatish Balay all the memory used for messagepassing is cleanedu up. This 994c1ff481SSatish Balay routine also resets the stash, and deallocates the memory used 1004c1ff481SSatish Balay for the stash. It also keeps track of the current memory usage 1014c1ff481SSatish Balay so that the same value can be used the next time through. 1024c1ff481SSatish Balay */ 1034a2ae208SSatish Balay #undef __FUNCT__ 1044a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterEnd_Private" 105dfbe8321SBarry Smith PetscErrorCode MatStashScatterEnd_Private(MatStash *stash) 106bc5ccf88SSatish Balay { 1076849ba73SBarry Smith PetscErrorCode ierr; 1086849ba73SBarry Smith int nsends=stash->nsends,bs2,oldnmax; 109a2d1c673SSatish Balay MPI_Status *send_status; 110a2d1c673SSatish Balay 1113a40ed3dSBarry Smith PetscFunctionBegin; 112a2d1c673SSatish Balay /* wait on sends */ 113a2d1c673SSatish Balay if (nsends) { 11482502324SSatish Balay ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 115a2d1c673SSatish Balay ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr); 116606d414cSSatish Balay ierr = PetscFree(send_status);CHKERRQ(ierr); 117a2d1c673SSatish Balay } 118a2d1c673SSatish Balay 119c0c58ca7SSatish Balay /* Now update nmaxold to be app 10% more than max n used, this way the 120434d7ff9SSatish Balay wastage of space is reduced the next time this stash is used. 121434d7ff9SSatish Balay Also update the oldmax, only if it increases */ 122b9b97703SBarry Smith if (stash->n) { 12394b769a5SSatish Balay bs2 = stash->bs*stash->bs; 1248a9378f0SSatish Balay oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 125434d7ff9SSatish Balay if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 126b9b97703SBarry Smith } 127434d7ff9SSatish Balay 128d07ff455SSatish Balay stash->nmax = 0; 129d07ff455SSatish Balay stash->n = 0; 1304c1ff481SSatish Balay stash->reallocs = -1; 131bc5ccf88SSatish Balay stash->rmax = 0; 132a2d1c673SSatish Balay stash->nprocessed = 0; 133bc5ccf88SSatish Balay 134bc5ccf88SSatish Balay if (stash->array) { 135606d414cSSatish Balay ierr = PetscFree(stash->array);CHKERRQ(ierr); 136bc5ccf88SSatish Balay stash->array = 0; 137bc5ccf88SSatish Balay stash->idx = 0; 138bc5ccf88SSatish Balay stash->idy = 0; 139bc5ccf88SSatish Balay } 140606d414cSSatish Balay if (stash->send_waits) { 141606d414cSSatish Balay ierr = PetscFree(stash->send_waits);CHKERRQ(ierr); 142606d414cSSatish Balay stash->send_waits = 0; 143606d414cSSatish Balay } 144606d414cSSatish Balay if (stash->recv_waits) { 145606d414cSSatish Balay ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr); 146606d414cSSatish Balay stash->recv_waits = 0; 147606d414cSSatish Balay } 148606d414cSSatish Balay if (stash->svalues) { 149606d414cSSatish Balay ierr = PetscFree(stash->svalues);CHKERRQ(ierr); 150606d414cSSatish Balay stash->svalues = 0; 151606d414cSSatish Balay } 152606d414cSSatish Balay if (stash->rvalues) { 153606d414cSSatish Balay ierr = PetscFree(stash->rvalues);CHKERRQ(ierr); 154606d414cSSatish Balay stash->rvalues = 0; 155606d414cSSatish Balay } 156606d414cSSatish Balay if (stash->nprocs) { 157b22afee1SSatish Balay ierr = PetscFree(stash->nprocs);CHKERRQ(ierr); 158606d414cSSatish Balay stash->nprocs = 0; 159606d414cSSatish Balay } 160bc5ccf88SSatish Balay 1613a40ed3dSBarry Smith PetscFunctionReturn(0); 1629417f4adSLois Curfman McInnes } 1639417f4adSLois Curfman McInnes 1644c1ff481SSatish Balay /* 1658798bf22SSatish Balay MatStashGetInfo_Private - Gets the relavant statistics of the stash 1664c1ff481SSatish Balay 1674c1ff481SSatish Balay Input Parameters: 1684c1ff481SSatish Balay stash - the stash 16994b769a5SSatish Balay nstash - the size of the stash. Indicates the number of values stored. 1704c1ff481SSatish Balay reallocs - the number of additional mallocs incurred. 1714c1ff481SSatish Balay 1724c1ff481SSatish Balay */ 1734a2ae208SSatish Balay #undef __FUNCT__ 1744a2ae208SSatish Balay #define __FUNCT__ "MatStashGetInfo_Private" 175c1ac3661SBarry Smith PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs) 17697530c3fSBarry Smith { 177c1ac3661SBarry Smith PetscInt bs2 = stash->bs*stash->bs; 17894b769a5SSatish Balay 1793a40ed3dSBarry Smith PetscFunctionBegin; 1801ecfd215SBarry Smith if (nstash) *nstash = stash->n*bs2; 1811ecfd215SBarry Smith if (reallocs) { 182434d7ff9SSatish Balay if (stash->reallocs < 0) *reallocs = 0; 183434d7ff9SSatish Balay else *reallocs = stash->reallocs; 1841ecfd215SBarry Smith } 185bc5ccf88SSatish Balay PetscFunctionReturn(0); 186bc5ccf88SSatish Balay } 1874c1ff481SSatish Balay 1884c1ff481SSatish Balay 1894c1ff481SSatish Balay /* 1908798bf22SSatish Balay MatStashSetInitialSize_Private - Sets the initial size of the stash 1914c1ff481SSatish Balay 1924c1ff481SSatish Balay Input Parameters: 1934c1ff481SSatish Balay stash - the stash 1944c1ff481SSatish Balay max - the value that is used as the max size of the stash. 1954c1ff481SSatish Balay this value is used while allocating memory. 1964c1ff481SSatish Balay */ 1974a2ae208SSatish Balay #undef __FUNCT__ 1984a2ae208SSatish Balay #define __FUNCT__ "MatStashSetInitialSize_Private" 199c1ac3661SBarry Smith PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max) 200bc5ccf88SSatish Balay { 201bc5ccf88SSatish Balay PetscFunctionBegin; 202434d7ff9SSatish Balay stash->umax = max; 2033a40ed3dSBarry Smith PetscFunctionReturn(0); 20497530c3fSBarry Smith } 20597530c3fSBarry Smith 2068798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called 2074c1ff481SSatish Balay when the space in the stash is not sufficient to add the new values 2084c1ff481SSatish Balay being inserted into the stash. 2094c1ff481SSatish Balay 2104c1ff481SSatish Balay Input Parameters: 2114c1ff481SSatish Balay stash - the stash 2124c1ff481SSatish Balay incr - the minimum increase requested 2134c1ff481SSatish Balay 2144c1ff481SSatish Balay Notes: 2154c1ff481SSatish Balay This routine doubles the currently used memory. 2164c1ff481SSatish Balay */ 2174a2ae208SSatish Balay #undef __FUNCT__ 2184a2ae208SSatish Balay #define __FUNCT__ "MatStashExpand_Private" 219c1ac3661SBarry Smith static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr) 2209417f4adSLois Curfman McInnes { 2216849ba73SBarry Smith PetscErrorCode ierr; 222c1ac3661SBarry Smith PetscInt *n_idx,*n_idy,newnmax,bs2; 2233eda8832SBarry Smith MatScalar *n_array; 2249417f4adSLois Curfman McInnes 2253a40ed3dSBarry Smith PetscFunctionBegin; 2269417f4adSLois Curfman McInnes /* allocate a larger stash */ 22794b769a5SSatish Balay bs2 = stash->bs*stash->bs; 228c481ceb5SSatish Balay if (!stash->oldnmax && !stash->nmax) { /* new stash */ 229434d7ff9SSatish Balay if (stash->umax) newnmax = stash->umax/bs2; 230434d7ff9SSatish Balay else newnmax = DEFAULT_STASH_SIZE/bs2; 231c481ceb5SSatish Balay } else if (!stash->nmax) { /* resuing stash */ 232434d7ff9SSatish Balay if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2; 233434d7ff9SSatish Balay else newnmax = stash->oldnmax/bs2; 234434d7ff9SSatish Balay } else newnmax = stash->nmax*2; 2354c1ff481SSatish Balay if (newnmax < (stash->nmax + incr)) newnmax += 2*incr; 236d07ff455SSatish Balay 237c1ac3661SBarry Smith ierr = PetscMalloc((newnmax)*(2*sizeof(PetscInt)+bs2*sizeof(MatScalar)),&n_array);CHKERRQ(ierr); 238c1ac3661SBarry Smith n_idx = (PetscInt*)(n_array + bs2*newnmax); 239c1ac3661SBarry Smith n_idy = (PetscInt*)(n_idx + newnmax); 2403eda8832SBarry Smith ierr = PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(MatScalar));CHKERRQ(ierr); 241c1ac3661SBarry Smith ierr = PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(PetscInt));CHKERRQ(ierr); 242c1ac3661SBarry Smith ierr = PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(PetscInt));CHKERRQ(ierr); 243606d414cSSatish Balay if (stash->array) {ierr = PetscFree(stash->array);CHKERRQ(ierr);} 244d07ff455SSatish Balay stash->array = n_array; 245d07ff455SSatish Balay stash->idx = n_idx; 246d07ff455SSatish Balay stash->idy = n_idy; 247d07ff455SSatish Balay stash->nmax = newnmax; 248bc5ccf88SSatish Balay stash->reallocs++; 249bc5ccf88SSatish Balay PetscFunctionReturn(0); 250bc5ccf88SSatish Balay } 251bc5ccf88SSatish Balay /* 2528798bf22SSatish Balay MatStashValuesRow_Private - inserts values into the stash. This function 2534c1ff481SSatish Balay expects the values to be roworiented. Multiple columns belong to the same row 2544c1ff481SSatish Balay can be inserted with a single call to this function. 2554c1ff481SSatish Balay 2564c1ff481SSatish Balay Input Parameters: 2574c1ff481SSatish Balay stash - the stash 2584c1ff481SSatish Balay row - the global row correspoiding to the values 2594c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2604c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2614c1ff481SSatish Balay values - the values inserted 262bc5ccf88SSatish Balay */ 2634a2ae208SSatish Balay #undef __FUNCT__ 2644a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRow_Private" 265c1ac3661SBarry Smith PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[]) 266bc5ccf88SSatish Balay { 267dfbe8321SBarry Smith PetscErrorCode ierr; 268c1ac3661SBarry Smith PetscInt i; 269bc5ccf88SSatish Balay 270bc5ccf88SSatish Balay PetscFunctionBegin; 2714c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 2724c1ff481SSatish Balay if ((stash->n + n) > stash->nmax) { 2738798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 2749417f4adSLois Curfman McInnes } 2754c1ff481SSatish Balay for (i=0; i<n; i++) { 2769417f4adSLois Curfman McInnes stash->idx[stash->n] = row; 277a2d1c673SSatish Balay stash->idy[stash->n] = idxn[i]; 2780ae3cd3bSBarry Smith stash->array[stash->n] = values[i]; 279a2d1c673SSatish Balay stash->n++; 2809417f4adSLois Curfman McInnes } 281a2d1c673SSatish Balay PetscFunctionReturn(0); 282a2d1c673SSatish Balay } 2834c1ff481SSatish Balay /* 2848798bf22SSatish Balay MatStashValuesCol_Private - inserts values into the stash. This function 2854c1ff481SSatish Balay expects the values to be columnoriented. Multiple columns belong to the same row 2864c1ff481SSatish Balay can be inserted with a single call to this function. 287a2d1c673SSatish Balay 2884c1ff481SSatish Balay Input Parameters: 2894c1ff481SSatish Balay stash - the stash 2904c1ff481SSatish Balay row - the global row correspoiding to the values 2914c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2924c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2934c1ff481SSatish Balay values - the values inserted 2944c1ff481SSatish Balay stepval - the consecutive values are sepated by a distance of stepval. 2954c1ff481SSatish Balay this happens because the input is columnoriented. 2964c1ff481SSatish Balay */ 2974a2ae208SSatish Balay #undef __FUNCT__ 2984a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesCol_Private" 299c1ac3661SBarry Smith PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt stepval) 300a2d1c673SSatish Balay { 301dfbe8321SBarry Smith PetscErrorCode ierr; 302c1ac3661SBarry Smith PetscInt i; 303a2d1c673SSatish Balay 3044c1ff481SSatish Balay PetscFunctionBegin; 3054c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 3064c1ff481SSatish Balay if ((stash->n + n) > stash->nmax) { 3078798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 3084c1ff481SSatish Balay } 3094c1ff481SSatish Balay for (i=0; i<n; i++) { 3104c1ff481SSatish Balay stash->idx[stash->n] = row; 3114c1ff481SSatish Balay stash->idy[stash->n] = idxn[i]; 3120ae3cd3bSBarry Smith stash->array[stash->n] = values[i*stepval]; 3134c1ff481SSatish Balay stash->n++; 3144c1ff481SSatish Balay } 3154c1ff481SSatish Balay PetscFunctionReturn(0); 3164c1ff481SSatish Balay } 3174c1ff481SSatish Balay 3184c1ff481SSatish Balay /* 3198798bf22SSatish Balay MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 3204c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 3214c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3224c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3234c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3244c1ff481SSatish Balay 3254c1ff481SSatish Balay Input Parameters: 3264c1ff481SSatish Balay stash - the stash 3274c1ff481SSatish Balay row - the global block-row correspoiding to the values 3284c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3294c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3304c1ff481SSatish Balay values. Each block is of size bs*bs. 3314c1ff481SSatish Balay values - the values inserted 3324c1ff481SSatish Balay rmax - the number of block-rows in the original block. 3334c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 3344c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3354c1ff481SSatish Balay */ 3364a2ae208SSatish Balay #undef __FUNCT__ 3374a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRowBlocked_Private" 338c1ac3661SBarry Smith PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 3394c1ff481SSatish Balay { 340dfbe8321SBarry Smith PetscErrorCode ierr; 341c1ac3661SBarry Smith PetscInt i,j,k,bs2,bs=stash->bs; 342f15d580aSBarry Smith const MatScalar *vals; 343f15d580aSBarry Smith MatScalar *array; 344a2d1c673SSatish Balay 345a2d1c673SSatish Balay PetscFunctionBegin; 346a2d1c673SSatish Balay bs2 = bs*bs; 3474c1ff481SSatish Balay if ((stash->n+n) > stash->nmax) { 3488798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 349a2d1c673SSatish Balay } 3504c1ff481SSatish Balay for (i=0; i<n; i++) { 351a2d1c673SSatish Balay stash->idx[stash->n] = row; 352a2d1c673SSatish Balay stash->idy[stash->n] = idxn[i]; 353a2d1c673SSatish Balay /* Now copy over the block of values. Store the values column oriented. 354a2d1c673SSatish Balay This enables inserting multiple blocks belonging to a row with a single 355a2d1c673SSatish Balay funtion call */ 356a2d1c673SSatish Balay array = stash->array + bs2*stash->n; 357a2d1c673SSatish Balay vals = values + idx*bs2*n + bs*i; 358a2d1c673SSatish Balay for (j=0; j<bs; j++) { 3590ae3cd3bSBarry Smith for (k=0; k<bs; k++) {array[k*bs] = vals[k];} 360a2d1c673SSatish Balay array += 1; 361a2d1c673SSatish Balay vals += cmax*bs; 362a2d1c673SSatish Balay } 3634c1ff481SSatish Balay stash->n++; 3644c1ff481SSatish Balay } 3654c1ff481SSatish Balay PetscFunctionReturn(0); 3664c1ff481SSatish Balay } 3674c1ff481SSatish Balay 3684c1ff481SSatish Balay /* 3698798bf22SSatish Balay MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 3704c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 3714c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3724c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3734c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3744c1ff481SSatish Balay 3754c1ff481SSatish Balay Input Parameters: 3764c1ff481SSatish Balay stash - the stash 3774c1ff481SSatish Balay row - the global block-row correspoiding to the values 3784c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3794c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3804c1ff481SSatish Balay values. Each block is of size bs*bs. 3814c1ff481SSatish Balay values - the values inserted 3824c1ff481SSatish Balay rmax - the number of block-rows in the original block. 3834c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 3844c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3854c1ff481SSatish Balay */ 3864a2ae208SSatish Balay #undef __FUNCT__ 3874a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesColBlocked_Private" 388c1ac3661SBarry Smith PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 3894c1ff481SSatish Balay { 390dfbe8321SBarry Smith PetscErrorCode ierr; 391c1ac3661SBarry Smith PetscInt i,j,k,bs2,bs=stash->bs; 392f15d580aSBarry Smith const MatScalar *vals; 393f15d580aSBarry Smith MatScalar *array; 3944c1ff481SSatish Balay 3954c1ff481SSatish Balay PetscFunctionBegin; 3964c1ff481SSatish Balay bs2 = bs*bs; 3974c1ff481SSatish Balay if ((stash->n+n) > stash->nmax) { 3988798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 3994c1ff481SSatish Balay } 4004c1ff481SSatish Balay for (i=0; i<n; i++) { 4014c1ff481SSatish Balay stash->idx[stash->n] = row; 4024c1ff481SSatish Balay stash->idy[stash->n] = idxn[i]; 4034c1ff481SSatish Balay /* Now copy over the block of values. Store the values column oriented. 4044c1ff481SSatish Balay This enables inserting multiple blocks belonging to a row with a single 4054c1ff481SSatish Balay funtion call */ 406a2d1c673SSatish Balay array = stash->array + bs2*stash->n; 407a2d1c673SSatish Balay vals = values + idx*bs + bs2*rmax*i; 408a2d1c673SSatish Balay for (j=0; j<bs; j++) { 4090ae3cd3bSBarry Smith for (k=0; k<bs; k++) {array[k] = vals[k];} 410a2d1c673SSatish Balay array += bs; 411a2d1c673SSatish Balay vals += rmax*bs; 412a2d1c673SSatish Balay } 413a2d1c673SSatish Balay stash->n++; 4149417f4adSLois Curfman McInnes } 4153a40ed3dSBarry Smith PetscFunctionReturn(0); 4169417f4adSLois Curfman McInnes } 4174c1ff481SSatish Balay /* 4188798bf22SSatish Balay MatStashScatterBegin_Private - Initiates the transfer of values to the 4194c1ff481SSatish Balay correct owners. This function goes through the stash, and check the 4204c1ff481SSatish Balay owners of each stashed value, and sends the values off to the owner 4214c1ff481SSatish Balay processors. 422bc5ccf88SSatish Balay 4234c1ff481SSatish Balay Input Parameters: 4244c1ff481SSatish Balay stash - the stash 4254c1ff481SSatish Balay owners - an array of size 'no-of-procs' which gives the ownership range 4264c1ff481SSatish Balay for each node. 4274c1ff481SSatish Balay 4284c1ff481SSatish Balay Notes: The 'owners' array in the cased of the blocked-stash has the 4294c1ff481SSatish Balay ranges specified blocked global indices, and for the regular stash in 4304c1ff481SSatish Balay the proper global indices. 4314c1ff481SSatish Balay */ 4324a2ae208SSatish Balay #undef __FUNCT__ 4334a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterBegin_Private" 434c1ac3661SBarry Smith PetscErrorCode MatStashScatterBegin_Private(MatStash *stash,PetscInt *owners) 435bc5ccf88SSatish Balay { 436c1ac3661SBarry Smith PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 437c1ac3661SBarry Smith PetscInt size=stash->size,*nprocs,nsends,nreceives; 4386849ba73SBarry Smith PetscErrorCode ierr; 4397357eb19SBarry Smith PetscInt nmax,count,*sindices,*rindices,i,j,idx,lastidx; 4403eda8832SBarry Smith MatScalar *rvalues,*svalues; 441bc5ccf88SSatish Balay MPI_Comm comm = stash->comm; 442bc5ccf88SSatish Balay MPI_Request *send_waits,*recv_waits; 443bc5ccf88SSatish Balay 444bc5ccf88SSatish Balay PetscFunctionBegin; 445bc5ccf88SSatish Balay 4464c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 447bc5ccf88SSatish Balay /* first count number of contributors to each processor */ 448c1ac3661SBarry Smith ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 449c1ac3661SBarry Smith ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 450c1ac3661SBarry Smith ierr = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); 451a2d1c673SSatish Balay 4527357eb19SBarry Smith j = 0; 4537357eb19SBarry Smith lastidx = -1; 454bc5ccf88SSatish Balay for (i=0; i<stash->n; i++) { 4557357eb19SBarry Smith /* if indices are NOT locally sorted, need to start search at the beginning */ 4567357eb19SBarry Smith if (lastidx > (idx = stash->idx[i])) j = 0; 4577357eb19SBarry Smith lastidx = idx; 4587357eb19SBarry Smith for (; j<size; j++) { 4594c1ff481SSatish Balay if (idx >= owners[j] && idx < owners[j+1]) { 460c1dc657dSBarry Smith nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; break; 461bc5ccf88SSatish Balay } 462bc5ccf88SSatish Balay } 463bc5ccf88SSatish Balay } 464c1dc657dSBarry Smith nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 465bc5ccf88SSatish Balay 466bc5ccf88SSatish Balay /* inform other processors of number of messages and max length*/ 467c1dc657dSBarry Smith ierr = PetscMaxSum(comm,nprocs,&nmax,&nreceives);CHKERRQ(ierr); 468c1dc657dSBarry Smith 469bc5ccf88SSatish Balay /* post receives: 470bc5ccf88SSatish Balay since we don't know how long each individual message is we 471bc5ccf88SSatish Balay allocate the largest needed buffer for each receive. Potentially 472bc5ccf88SSatish Balay this is a lot of wasted space. 473bc5ccf88SSatish Balay */ 474c1ac3661SBarry Smith ierr = PetscMalloc((nreceives+1)*(nmax+1)*(bs2*sizeof(MatScalar)+2*sizeof(PetscInt)),&rvalues);CHKERRQ(ierr); 475c1ac3661SBarry Smith rindices = (PetscInt*)(rvalues + bs2*nreceives*nmax); 476b0a32e0cSBarry Smith ierr = PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 477bc5ccf88SSatish Balay for (i=0,count=0; i<nreceives; i++) { 478d7d82daaSBarry Smith ierr = MPI_Irecv(rvalues+bs2*nmax*i,bs2*nmax,MPIU_MATSCALAR,MPI_ANY_SOURCE,tag1,comm,recv_waits+count++);CHKERRQ(ierr); 479c1ac3661SBarry Smith ierr = MPI_Irecv(rindices+2*nmax*i,2*nmax,MPIU_INT,MPI_ANY_SOURCE,tag2,comm,recv_waits+count++);CHKERRQ(ierr); 480bc5ccf88SSatish Balay } 481bc5ccf88SSatish Balay 482bc5ccf88SSatish Balay /* do sends: 483bc5ccf88SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 484bc5ccf88SSatish Balay the ith processor 485bc5ccf88SSatish Balay */ 486c1ac3661SBarry Smith ierr = PetscMalloc((stash->n+1)*(bs2*sizeof(MatScalar)+2*sizeof(PetscInt)),&svalues);CHKERRQ(ierr); 487c1ac3661SBarry Smith sindices = (PetscInt*)(svalues + bs2*stash->n); 488b0a32e0cSBarry Smith ierr = PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 489c1ac3661SBarry Smith ierr = PetscMalloc(2*size*sizeof(PetscInt),&startv);CHKERRQ(ierr); 490bc5ccf88SSatish Balay starti = startv + size; 491a2d1c673SSatish Balay /* use 2 sends the first with all_a, the next with all_i and all_j */ 492bc5ccf88SSatish Balay startv[0] = 0; starti[0] = 0; 493bc5ccf88SSatish Balay for (i=1; i<size; i++) { 494c1dc657dSBarry Smith startv[i] = startv[i-1] + nprocs[2*i-2]; 495c1dc657dSBarry Smith starti[i] = starti[i-1] + nprocs[2*i-2]*2; 496bc5ccf88SSatish Balay } 497bc5ccf88SSatish Balay for (i=0; i<stash->n; i++) { 498bc5ccf88SSatish Balay j = owner[i]; 499a2d1c673SSatish Balay if (bs2 == 1) { 500bc5ccf88SSatish Balay svalues[startv[j]] = stash->array[i]; 501a2d1c673SSatish Balay } else { 502c1ac3661SBarry Smith PetscInt k; 5033eda8832SBarry Smith MatScalar *buf1,*buf2; 5044c1ff481SSatish Balay buf1 = svalues+bs2*startv[j]; 5054c1ff481SSatish Balay buf2 = stash->array+bs2*i; 5064c1ff481SSatish Balay for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; } 507a2d1c673SSatish Balay } 508bc5ccf88SSatish Balay sindices[starti[j]] = stash->idx[i]; 509c1dc657dSBarry Smith sindices[starti[j]+nprocs[2*j]] = stash->idy[i]; 510bc5ccf88SSatish Balay startv[j]++; 511bc5ccf88SSatish Balay starti[j]++; 512bc5ccf88SSatish Balay } 513bc5ccf88SSatish Balay startv[0] = 0; 514c1dc657dSBarry Smith for (i=1; i<size; i++) { startv[i] = startv[i-1] + nprocs[2*i-2];} 515bc5ccf88SSatish Balay for (i=0,count=0; i<size; i++) { 516c1dc657dSBarry Smith if (nprocs[2*i+1]) { 517d7d82daaSBarry Smith ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nprocs[2*i],MPIU_MATSCALAR,i,tag1,comm,send_waits+count++);CHKERRQ(ierr); 518d7d82daaSBarry Smith ierr = MPI_Isend(sindices+2*startv[i],2*nprocs[2*i],MPIU_INT,i,tag2,comm,send_waits+count++);CHKERRQ(ierr); 519bc5ccf88SSatish Balay } 520bc5ccf88SSatish Balay } 521606d414cSSatish Balay ierr = PetscFree(owner);CHKERRQ(ierr); 522606d414cSSatish Balay ierr = PetscFree(startv);CHKERRQ(ierr); 523a2d1c673SSatish Balay /* This memory is reused in scatter end for a different purpose*/ 524a2d1c673SSatish Balay for (i=0; i<2*size; i++) nprocs[i] = -1; 525a2d1c673SSatish Balay stash->nprocs = nprocs; 526a2d1c673SSatish Balay 527bc5ccf88SSatish Balay stash->svalues = svalues; stash->rvalues = rvalues; 528bc5ccf88SSatish Balay stash->nsends = nsends; stash->nrecvs = nreceives; 529bc5ccf88SSatish Balay stash->send_waits = send_waits; stash->recv_waits = recv_waits; 530bc5ccf88SSatish Balay stash->rmax = nmax; 531bc5ccf88SSatish Balay PetscFunctionReturn(0); 532bc5ccf88SSatish Balay } 533bc5ccf88SSatish Balay 534a2d1c673SSatish Balay /* 5358798bf22SSatish Balay MatStashScatterGetMesg_Private - This function waits on the receives posted 5368798bf22SSatish Balay in the function MatStashScatterBegin_Private() and returns one message at 5374c1ff481SSatish Balay a time to the calling function. If no messages are left, it indicates this 5384c1ff481SSatish Balay by setting flg = 0, else it sets flg = 1. 5394c1ff481SSatish Balay 5404c1ff481SSatish Balay Input Parameters: 5414c1ff481SSatish Balay stash - the stash 5424c1ff481SSatish Balay 5434c1ff481SSatish Balay Output Parameters: 5444c1ff481SSatish Balay nvals - the number of entries in the current message. 5454c1ff481SSatish Balay rows - an array of row indices (or blocked indices) corresponding to the values 5464c1ff481SSatish Balay cols - an array of columnindices (or blocked indices) corresponding to the values 5474c1ff481SSatish Balay vals - the values 5484c1ff481SSatish Balay flg - 0 indicates no more message left, and the current call has no values associated. 5494c1ff481SSatish Balay 1 indicates that the current call successfully received a message, and the 5504c1ff481SSatish Balay other output parameters nvals,rows,cols,vals are set appropriately. 551a2d1c673SSatish Balay */ 5524a2ae208SSatish Balay #undef __FUNCT__ 5534a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterGetMesg_Private" 554c1ac3661SBarry Smith PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,MatScalar **vals,PetscInt *flg) 555bc5ccf88SSatish Balay { 5566849ba73SBarry Smith PetscErrorCode ierr; 557c1ac3661SBarry Smith PetscMPIInt i; 558c1ac3661SBarry Smith PetscInt *flg_v,i1,i2,*rindices,bs2; 559a2d1c673SSatish Balay MPI_Status recv_status; 560b0a32e0cSBarry Smith PetscTruth match_found = PETSC_FALSE; 561bc5ccf88SSatish Balay 562bc5ccf88SSatish Balay PetscFunctionBegin; 563bc5ccf88SSatish Balay 564a2d1c673SSatish Balay *flg = 0; /* When a message is discovered this is reset to 1 */ 565a2d1c673SSatish Balay /* Return if no more messages to process */ 566a2d1c673SSatish Balay if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); } 567a2d1c673SSatish Balay 568a2d1c673SSatish Balay flg_v = stash->nprocs; 5694c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 570a2d1c673SSatish Balay /* If a matching pair of receieves are found, process them, and return the data to 571a2d1c673SSatish Balay the calling function. Until then keep receiving messages */ 572a2d1c673SSatish Balay while (!match_found) { 573a2d1c673SSatish Balay ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr); 574a2d1c673SSatish Balay /* Now pack the received message into a structure which is useable by others */ 575a2d1c673SSatish Balay if (i % 2) { 576d7d82daaSBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr); 577c1dc657dSBarry Smith flg_v[2*recv_status.MPI_SOURCE+1] = i/2; 578a2d1c673SSatish Balay *nvals = *nvals/2; /* This message has both row indices and col indices */ 579a2d1c673SSatish Balay } else { 5803eda8832SBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_MATSCALAR,nvals);CHKERRQ(ierr); 581c1dc657dSBarry Smith flg_v[2*recv_status.MPI_SOURCE] = i/2; 582a2d1c673SSatish Balay *nvals = *nvals/bs2; 583bc5ccf88SSatish Balay } 584a2d1c673SSatish Balay 585a2d1c673SSatish Balay /* Check if we have both the messages from this proc */ 586c1dc657dSBarry Smith i1 = flg_v[2*recv_status.MPI_SOURCE]; 587c1dc657dSBarry Smith i2 = flg_v[2*recv_status.MPI_SOURCE+1]; 588a2d1c673SSatish Balay if (i1 != -1 && i2 != -1) { 589c1ac3661SBarry Smith rindices = (PetscInt*)(stash->rvalues + bs2*stash->rmax*stash->nrecvs); 590a2d1c673SSatish Balay *rows = rindices + 2*i2*stash->rmax; 591a2d1c673SSatish Balay *cols = *rows + *nvals; 592a2d1c673SSatish Balay *vals = stash->rvalues + i1*bs2*stash->rmax; 593a2d1c673SSatish Balay *flg = 1; 594a2d1c673SSatish Balay stash->nprocessed ++; 59535d8aa7fSBarry Smith match_found = PETSC_TRUE; 596bc5ccf88SSatish Balay } 597bc5ccf88SSatish Balay } 598bc5ccf88SSatish Balay PetscFunctionReturn(0); 599bc5ccf88SSatish Balay } 600