1be1d678aSKris Buschelman #define PETSCMAT_DLL 22d5177cdSBarry Smith 370f55243SBarry Smith #include "src/mat/matimpl.h" 49417f4adSLois Curfman McInnes 53eda8832SBarry Smith /* 60ae3cd3bSBarry Smith The input to the stash is ALWAYS in MatScalar precision, and the 70ae3cd3bSBarry Smith internal storage and output is also in MatScalar. 83eda8832SBarry Smith */ 9bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE 10000 104c1ff481SSatish Balay 119417f4adSLois Curfman McInnes /* 128798bf22SSatish Balay MatStashCreate_Private - Creates a stash,currently used for all the parallel 134c1ff481SSatish Balay matrix implementations. The stash is where elements of a matrix destined 144c1ff481SSatish Balay to be stored on other processors are kept until matrix assembly is done. 159417f4adSLois Curfman McInnes 164c1ff481SSatish Balay This is a simple minded stash. Simply adds entries to end of stash. 174c1ff481SSatish Balay 184c1ff481SSatish Balay Input Parameters: 194c1ff481SSatish Balay comm - communicator, required for scatters. 204c1ff481SSatish Balay bs - stash block size. used when stashing blocks of values 214c1ff481SSatish Balay 224c1ff481SSatish Balay Output Parameters: 234c1ff481SSatish Balay stash - the newly created stash 249417f4adSLois Curfman McInnes */ 254a2ae208SSatish Balay #undef __FUNCT__ 264a2ae208SSatish Balay #define __FUNCT__ "MatStashCreate_Private" 27c1ac3661SBarry Smith PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash) 289417f4adSLois Curfman McInnes { 29dfbe8321SBarry Smith PetscErrorCode ierr; 30c1ac3661SBarry Smith PetscInt max,*opt,nopt; 31f1af5d2fSBarry Smith PetscTruth flg; 32bc5ccf88SSatish Balay 333a40ed3dSBarry Smith PetscFunctionBegin; 34bc5ccf88SSatish Balay /* Require 2 tags,get the second using PetscCommGetNewTag() */ 35752ec6e0SSatish Balay stash->comm = comm; 36752ec6e0SSatish Balay ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr); 37a2d1c673SSatish Balay ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr); 38a2d1c673SSatish Balay ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr); 39a2d1c673SSatish Balay ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr); 40bc5ccf88SSatish Balay 41434d7ff9SSatish Balay nopt = stash->size; 42d7d82daaSBarry Smith ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr); 43b0a32e0cSBarry Smith ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr); 44434d7ff9SSatish Balay if (flg) { 45434d7ff9SSatish Balay if (nopt == 1) max = opt[0]; 46434d7ff9SSatish Balay else if (nopt == stash->size) max = opt[stash->rank]; 47434d7ff9SSatish Balay else if (stash->rank < nopt) max = opt[stash->rank]; 48f4ab19daSSatish Balay else max = 0; /* Use default */ 49434d7ff9SSatish Balay stash->umax = max; 50434d7ff9SSatish Balay } else { 51434d7ff9SSatish Balay stash->umax = 0; 52434d7ff9SSatish Balay } 53606d414cSSatish Balay ierr = PetscFree(opt);CHKERRQ(ierr); 544c1ff481SSatish Balay if (bs <= 0) bs = 1; 55a2d1c673SSatish Balay 564c1ff481SSatish Balay stash->bs = bs; 579417f4adSLois Curfman McInnes stash->nmax = 0; 58434d7ff9SSatish Balay stash->oldnmax = 0; 599417f4adSLois Curfman McInnes stash->n = 0; 604c1ff481SSatish Balay stash->reallocs = -1; 619417f4adSLois Curfman McInnes stash->idx = 0; 629417f4adSLois Curfman McInnes stash->idy = 0; 63bc5ccf88SSatish Balay stash->array = 0; 649417f4adSLois Curfman McInnes 65bc5ccf88SSatish Balay stash->send_waits = 0; 66bc5ccf88SSatish Balay stash->recv_waits = 0; 67a2d1c673SSatish Balay stash->send_status = 0; 68bc5ccf88SSatish Balay stash->nsends = 0; 69bc5ccf88SSatish Balay stash->nrecvs = 0; 70bc5ccf88SSatish Balay stash->svalues = 0; 71bc5ccf88SSatish Balay stash->rvalues = 0; 72563fb871SSatish Balay stash->rindices = 0; 73a2d1c673SSatish Balay stash->nprocs = 0; 74a2d1c673SSatish Balay stash->nprocessed = 0; 753a40ed3dSBarry Smith PetscFunctionReturn(0); 769417f4adSLois Curfman McInnes } 779417f4adSLois Curfman McInnes 784c1ff481SSatish Balay /* 798798bf22SSatish Balay MatStashDestroy_Private - Destroy the stash 804c1ff481SSatish Balay */ 814a2ae208SSatish Balay #undef __FUNCT__ 824a2ae208SSatish Balay #define __FUNCT__ "MatStashDestroy_Private" 83dfbe8321SBarry Smith PetscErrorCode MatStashDestroy_Private(MatStash *stash) 849417f4adSLois Curfman McInnes { 85dfbe8321SBarry Smith PetscErrorCode ierr; 86a2d1c673SSatish Balay 87bc5ccf88SSatish Balay PetscFunctionBegin; 88606d414cSSatish Balay if (stash->array) { 89606d414cSSatish Balay ierr = PetscFree(stash->array);CHKERRQ(ierr); 90606d414cSSatish Balay stash->array = 0; 91606d414cSSatish Balay } 92bc5ccf88SSatish Balay PetscFunctionReturn(0); 93bc5ccf88SSatish Balay } 94bc5ccf88SSatish Balay 954c1ff481SSatish Balay /* 968798bf22SSatish Balay MatStashScatterEnd_Private - This is called as the fial stage of 974c1ff481SSatish Balay scatter. The final stages of messagepassing is done here, and 984c1ff481SSatish Balay all the memory used for messagepassing is cleanedu up. This 994c1ff481SSatish Balay routine also resets the stash, and deallocates the memory used 1004c1ff481SSatish Balay for the stash. It also keeps track of the current memory usage 1014c1ff481SSatish Balay so that the same value can be used the next time through. 1024c1ff481SSatish Balay */ 1034a2ae208SSatish Balay #undef __FUNCT__ 1044a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterEnd_Private" 105dfbe8321SBarry Smith PetscErrorCode MatStashScatterEnd_Private(MatStash *stash) 106bc5ccf88SSatish Balay { 1076849ba73SBarry Smith PetscErrorCode ierr; 1086849ba73SBarry Smith int nsends=stash->nsends,bs2,oldnmax; 109a2d1c673SSatish Balay MPI_Status *send_status; 110a2d1c673SSatish Balay 1113a40ed3dSBarry Smith PetscFunctionBegin; 112a2d1c673SSatish Balay /* wait on sends */ 113a2d1c673SSatish Balay if (nsends) { 11482502324SSatish Balay ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 115a2d1c673SSatish Balay ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr); 116606d414cSSatish Balay ierr = PetscFree(send_status);CHKERRQ(ierr); 117a2d1c673SSatish Balay } 118a2d1c673SSatish Balay 119c0c58ca7SSatish Balay /* Now update nmaxold to be app 10% more than max n used, this way the 120434d7ff9SSatish Balay wastage of space is reduced the next time this stash is used. 121434d7ff9SSatish Balay Also update the oldmax, only if it increases */ 122b9b97703SBarry Smith if (stash->n) { 12394b769a5SSatish Balay bs2 = stash->bs*stash->bs; 1248a9378f0SSatish Balay oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 125434d7ff9SSatish Balay if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 126b9b97703SBarry Smith } 127434d7ff9SSatish Balay 128d07ff455SSatish Balay stash->nmax = 0; 129d07ff455SSatish Balay stash->n = 0; 1304c1ff481SSatish Balay stash->reallocs = -1; 131a2d1c673SSatish Balay stash->nprocessed = 0; 132bc5ccf88SSatish Balay 133bc5ccf88SSatish Balay if (stash->array) { 134606d414cSSatish Balay ierr = PetscFree(stash->array);CHKERRQ(ierr); 135bc5ccf88SSatish Balay stash->array = 0; 136bc5ccf88SSatish Balay stash->idx = 0; 137bc5ccf88SSatish Balay stash->idy = 0; 138bc5ccf88SSatish Balay } 139606d414cSSatish Balay if (stash->send_waits) { 140606d414cSSatish Balay ierr = PetscFree(stash->send_waits);CHKERRQ(ierr); 141606d414cSSatish Balay stash->send_waits = 0; 142606d414cSSatish Balay } 143606d414cSSatish Balay if (stash->recv_waits) { 144606d414cSSatish Balay ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr); 145606d414cSSatish Balay stash->recv_waits = 0; 146606d414cSSatish Balay } 147606d414cSSatish Balay if (stash->svalues) { 148606d414cSSatish Balay ierr = PetscFree(stash->svalues);CHKERRQ(ierr); 149606d414cSSatish Balay stash->svalues = 0; 150606d414cSSatish Balay } 151606d414cSSatish Balay if (stash->rvalues) { 152606d414cSSatish Balay ierr = PetscFree(stash->rvalues);CHKERRQ(ierr); 153606d414cSSatish Balay stash->rvalues = 0; 154606d414cSSatish Balay } 155563fb871SSatish Balay if (stash->rindices) { 156563fb871SSatish Balay ierr = PetscFree(stash->rindices);CHKERRQ(ierr); 157563fb871SSatish Balay stash->rindices = 0; 158563fb871SSatish Balay } 159606d414cSSatish Balay if (stash->nprocs) { 160b22afee1SSatish Balay ierr = PetscFree(stash->nprocs);CHKERRQ(ierr); 161606d414cSSatish Balay stash->nprocs = 0; 162606d414cSSatish Balay } 163bc5ccf88SSatish Balay 1643a40ed3dSBarry Smith PetscFunctionReturn(0); 1659417f4adSLois Curfman McInnes } 1669417f4adSLois Curfman McInnes 1674c1ff481SSatish Balay /* 1688798bf22SSatish Balay MatStashGetInfo_Private - Gets the relavant statistics of the stash 1694c1ff481SSatish Balay 1704c1ff481SSatish Balay Input Parameters: 1714c1ff481SSatish Balay stash - the stash 17294b769a5SSatish Balay nstash - the size of the stash. Indicates the number of values stored. 1734c1ff481SSatish Balay reallocs - the number of additional mallocs incurred. 1744c1ff481SSatish Balay 1754c1ff481SSatish Balay */ 1764a2ae208SSatish Balay #undef __FUNCT__ 1774a2ae208SSatish Balay #define __FUNCT__ "MatStashGetInfo_Private" 178c1ac3661SBarry Smith PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs) 17997530c3fSBarry Smith { 180c1ac3661SBarry Smith PetscInt bs2 = stash->bs*stash->bs; 18194b769a5SSatish Balay 1823a40ed3dSBarry Smith PetscFunctionBegin; 1831ecfd215SBarry Smith if (nstash) *nstash = stash->n*bs2; 1841ecfd215SBarry Smith if (reallocs) { 185434d7ff9SSatish Balay if (stash->reallocs < 0) *reallocs = 0; 186434d7ff9SSatish Balay else *reallocs = stash->reallocs; 1871ecfd215SBarry Smith } 188bc5ccf88SSatish Balay PetscFunctionReturn(0); 189bc5ccf88SSatish Balay } 1904c1ff481SSatish Balay 1914c1ff481SSatish Balay 1924c1ff481SSatish Balay /* 1938798bf22SSatish Balay MatStashSetInitialSize_Private - Sets the initial size of the stash 1944c1ff481SSatish Balay 1954c1ff481SSatish Balay Input Parameters: 1964c1ff481SSatish Balay stash - the stash 1974c1ff481SSatish Balay max - the value that is used as the max size of the stash. 1984c1ff481SSatish Balay this value is used while allocating memory. 1994c1ff481SSatish Balay */ 2004a2ae208SSatish Balay #undef __FUNCT__ 2014a2ae208SSatish Balay #define __FUNCT__ "MatStashSetInitialSize_Private" 202c1ac3661SBarry Smith PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max) 203bc5ccf88SSatish Balay { 204bc5ccf88SSatish Balay PetscFunctionBegin; 205434d7ff9SSatish Balay stash->umax = max; 2063a40ed3dSBarry Smith PetscFunctionReturn(0); 20797530c3fSBarry Smith } 20897530c3fSBarry Smith 2098798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called 2104c1ff481SSatish Balay when the space in the stash is not sufficient to add the new values 2114c1ff481SSatish Balay being inserted into the stash. 2124c1ff481SSatish Balay 2134c1ff481SSatish Balay Input Parameters: 2144c1ff481SSatish Balay stash - the stash 2154c1ff481SSatish Balay incr - the minimum increase requested 2164c1ff481SSatish Balay 2174c1ff481SSatish Balay Notes: 2184c1ff481SSatish Balay This routine doubles the currently used memory. 2194c1ff481SSatish Balay */ 2204a2ae208SSatish Balay #undef __FUNCT__ 2214a2ae208SSatish Balay #define __FUNCT__ "MatStashExpand_Private" 222c1ac3661SBarry Smith static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr) 2239417f4adSLois Curfman McInnes { 2246849ba73SBarry Smith PetscErrorCode ierr; 225c1ac3661SBarry Smith PetscInt *n_idx,*n_idy,newnmax,bs2; 2263eda8832SBarry Smith MatScalar *n_array; 2279417f4adSLois Curfman McInnes 2283a40ed3dSBarry Smith PetscFunctionBegin; 2299417f4adSLois Curfman McInnes /* allocate a larger stash */ 23094b769a5SSatish Balay bs2 = stash->bs*stash->bs; 231c481ceb5SSatish Balay if (!stash->oldnmax && !stash->nmax) { /* new stash */ 232434d7ff9SSatish Balay if (stash->umax) newnmax = stash->umax/bs2; 233434d7ff9SSatish Balay else newnmax = DEFAULT_STASH_SIZE/bs2; 234c481ceb5SSatish Balay } else if (!stash->nmax) { /* resuing stash */ 235434d7ff9SSatish Balay if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2; 236434d7ff9SSatish Balay else newnmax = stash->oldnmax/bs2; 237434d7ff9SSatish Balay } else newnmax = stash->nmax*2; 2384c1ff481SSatish Balay if (newnmax < (stash->nmax + incr)) newnmax += 2*incr; 239d07ff455SSatish Balay 240c1ac3661SBarry Smith ierr = PetscMalloc((newnmax)*(2*sizeof(PetscInt)+bs2*sizeof(MatScalar)),&n_array);CHKERRQ(ierr); 241c1ac3661SBarry Smith n_idx = (PetscInt*)(n_array + bs2*newnmax); 242c1ac3661SBarry Smith n_idy = (PetscInt*)(n_idx + newnmax); 2433eda8832SBarry Smith ierr = PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(MatScalar));CHKERRQ(ierr); 244c1ac3661SBarry Smith ierr = PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(PetscInt));CHKERRQ(ierr); 245c1ac3661SBarry Smith ierr = PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(PetscInt));CHKERRQ(ierr); 246606d414cSSatish Balay if (stash->array) {ierr = PetscFree(stash->array);CHKERRQ(ierr);} 247d07ff455SSatish Balay stash->array = n_array; 248d07ff455SSatish Balay stash->idx = n_idx; 249d07ff455SSatish Balay stash->idy = n_idy; 250d07ff455SSatish Balay stash->nmax = newnmax; 251bc5ccf88SSatish Balay stash->reallocs++; 252bc5ccf88SSatish Balay PetscFunctionReturn(0); 253bc5ccf88SSatish Balay } 254bc5ccf88SSatish Balay /* 2558798bf22SSatish Balay MatStashValuesRow_Private - inserts values into the stash. This function 2564c1ff481SSatish Balay expects the values to be roworiented. Multiple columns belong to the same row 2574c1ff481SSatish Balay can be inserted with a single call to this function. 2584c1ff481SSatish Balay 2594c1ff481SSatish Balay Input Parameters: 2604c1ff481SSatish Balay stash - the stash 2614c1ff481SSatish Balay row - the global row correspoiding to the values 2624c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2634c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2644c1ff481SSatish Balay values - the values inserted 265bc5ccf88SSatish Balay */ 2664a2ae208SSatish Balay #undef __FUNCT__ 2674a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRow_Private" 268c1ac3661SBarry Smith PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[]) 269bc5ccf88SSatish Balay { 270dfbe8321SBarry Smith PetscErrorCode ierr; 271c1ac3661SBarry Smith PetscInt i; 272bc5ccf88SSatish Balay 273bc5ccf88SSatish Balay PetscFunctionBegin; 2744c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 2754c1ff481SSatish Balay if ((stash->n + n) > stash->nmax) { 2768798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 2779417f4adSLois Curfman McInnes } 2784c1ff481SSatish Balay for (i=0; i<n; i++) { 2799417f4adSLois Curfman McInnes stash->idx[stash->n] = row; 280a2d1c673SSatish Balay stash->idy[stash->n] = idxn[i]; 2810ae3cd3bSBarry Smith stash->array[stash->n] = values[i]; 282a2d1c673SSatish Balay stash->n++; 2839417f4adSLois Curfman McInnes } 284a2d1c673SSatish Balay PetscFunctionReturn(0); 285a2d1c673SSatish Balay } 2864c1ff481SSatish Balay /* 2878798bf22SSatish Balay MatStashValuesCol_Private - inserts values into the stash. This function 2884c1ff481SSatish Balay expects the values to be columnoriented. Multiple columns belong to the same row 2894c1ff481SSatish Balay can be inserted with a single call to this function. 290a2d1c673SSatish Balay 2914c1ff481SSatish Balay Input Parameters: 2924c1ff481SSatish Balay stash - the stash 2934c1ff481SSatish Balay row - the global row correspoiding to the values 2944c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2954c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2964c1ff481SSatish Balay values - the values inserted 2974c1ff481SSatish Balay stepval - the consecutive values are sepated by a distance of stepval. 2984c1ff481SSatish Balay this happens because the input is columnoriented. 2994c1ff481SSatish Balay */ 3004a2ae208SSatish Balay #undef __FUNCT__ 3014a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesCol_Private" 302c1ac3661SBarry Smith PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt stepval) 303a2d1c673SSatish Balay { 304dfbe8321SBarry Smith PetscErrorCode ierr; 305c1ac3661SBarry Smith PetscInt i; 306a2d1c673SSatish Balay 3074c1ff481SSatish Balay PetscFunctionBegin; 3084c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 3094c1ff481SSatish Balay if ((stash->n + n) > stash->nmax) { 3108798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 3114c1ff481SSatish Balay } 3124c1ff481SSatish Balay for (i=0; i<n; i++) { 3134c1ff481SSatish Balay stash->idx[stash->n] = row; 3144c1ff481SSatish Balay stash->idy[stash->n] = idxn[i]; 3150ae3cd3bSBarry Smith stash->array[stash->n] = values[i*stepval]; 3164c1ff481SSatish Balay stash->n++; 3174c1ff481SSatish Balay } 3184c1ff481SSatish Balay PetscFunctionReturn(0); 3194c1ff481SSatish Balay } 3204c1ff481SSatish Balay 3214c1ff481SSatish Balay /* 3228798bf22SSatish Balay MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 3234c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 3244c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3254c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3264c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3274c1ff481SSatish Balay 3284c1ff481SSatish Balay Input Parameters: 3294c1ff481SSatish Balay stash - the stash 3304c1ff481SSatish Balay row - the global block-row correspoiding to the values 3314c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3324c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3334c1ff481SSatish Balay values. Each block is of size bs*bs. 3344c1ff481SSatish Balay values - the values inserted 3354c1ff481SSatish Balay rmax - the number of block-rows in the original block. 3364c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 3374c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3384c1ff481SSatish Balay */ 3394a2ae208SSatish Balay #undef __FUNCT__ 3404a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRowBlocked_Private" 341c1ac3661SBarry Smith PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 3424c1ff481SSatish Balay { 343dfbe8321SBarry Smith PetscErrorCode ierr; 344c1ac3661SBarry Smith PetscInt i,j,k,bs2,bs=stash->bs; 345f15d580aSBarry Smith const MatScalar *vals; 346f15d580aSBarry Smith MatScalar *array; 347a2d1c673SSatish Balay 348a2d1c673SSatish Balay PetscFunctionBegin; 349a2d1c673SSatish Balay bs2 = bs*bs; 3504c1ff481SSatish Balay if ((stash->n+n) > stash->nmax) { 3518798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 352a2d1c673SSatish Balay } 3534c1ff481SSatish Balay for (i=0; i<n; i++) { 354a2d1c673SSatish Balay stash->idx[stash->n] = row; 355a2d1c673SSatish Balay stash->idy[stash->n] = idxn[i]; 356a2d1c673SSatish Balay /* Now copy over the block of values. Store the values column oriented. 357a2d1c673SSatish Balay This enables inserting multiple blocks belonging to a row with a single 358a2d1c673SSatish Balay funtion call */ 359a2d1c673SSatish Balay array = stash->array + bs2*stash->n; 360a2d1c673SSatish Balay vals = values + idx*bs2*n + bs*i; 361a2d1c673SSatish Balay for (j=0; j<bs; j++) { 3620ae3cd3bSBarry Smith for (k=0; k<bs; k++) {array[k*bs] = vals[k];} 363a2d1c673SSatish Balay array += 1; 364a2d1c673SSatish Balay vals += cmax*bs; 365a2d1c673SSatish Balay } 3664c1ff481SSatish Balay stash->n++; 3674c1ff481SSatish Balay } 3684c1ff481SSatish Balay PetscFunctionReturn(0); 3694c1ff481SSatish Balay } 3704c1ff481SSatish Balay 3714c1ff481SSatish Balay /* 3728798bf22SSatish Balay MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 3734c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 3744c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3754c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3764c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3774c1ff481SSatish Balay 3784c1ff481SSatish Balay Input Parameters: 3794c1ff481SSatish Balay stash - the stash 3804c1ff481SSatish Balay row - the global block-row correspoiding to the values 3814c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3824c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3834c1ff481SSatish Balay values. Each block is of size bs*bs. 3844c1ff481SSatish Balay values - the values inserted 3854c1ff481SSatish Balay rmax - the number of block-rows in the original block. 3864c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 3874c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3884c1ff481SSatish Balay */ 3894a2ae208SSatish Balay #undef __FUNCT__ 3904a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesColBlocked_Private" 391c1ac3661SBarry Smith PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 3924c1ff481SSatish Balay { 393dfbe8321SBarry Smith PetscErrorCode ierr; 394c1ac3661SBarry Smith PetscInt i,j,k,bs2,bs=stash->bs; 395f15d580aSBarry Smith const MatScalar *vals; 396f15d580aSBarry Smith MatScalar *array; 3974c1ff481SSatish Balay 3984c1ff481SSatish Balay PetscFunctionBegin; 3994c1ff481SSatish Balay bs2 = bs*bs; 4004c1ff481SSatish Balay if ((stash->n+n) > stash->nmax) { 4018798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 4024c1ff481SSatish Balay } 4034c1ff481SSatish Balay for (i=0; i<n; i++) { 4044c1ff481SSatish Balay stash->idx[stash->n] = row; 4054c1ff481SSatish Balay stash->idy[stash->n] = idxn[i]; 4064c1ff481SSatish Balay /* Now copy over the block of values. Store the values column oriented. 4074c1ff481SSatish Balay This enables inserting multiple blocks belonging to a row with a single 4084c1ff481SSatish Balay funtion call */ 409a2d1c673SSatish Balay array = stash->array + bs2*stash->n; 410a2d1c673SSatish Balay vals = values + idx*bs + bs2*rmax*i; 411a2d1c673SSatish Balay for (j=0; j<bs; j++) { 4120ae3cd3bSBarry Smith for (k=0; k<bs; k++) {array[k] = vals[k];} 413a2d1c673SSatish Balay array += bs; 414a2d1c673SSatish Balay vals += rmax*bs; 415a2d1c673SSatish Balay } 416a2d1c673SSatish Balay stash->n++; 4179417f4adSLois Curfman McInnes } 4183a40ed3dSBarry Smith PetscFunctionReturn(0); 4199417f4adSLois Curfman McInnes } 4204c1ff481SSatish Balay /* 4218798bf22SSatish Balay MatStashScatterBegin_Private - Initiates the transfer of values to the 4224c1ff481SSatish Balay correct owners. This function goes through the stash, and check the 4234c1ff481SSatish Balay owners of each stashed value, and sends the values off to the owner 4244c1ff481SSatish Balay processors. 425bc5ccf88SSatish Balay 4264c1ff481SSatish Balay Input Parameters: 4274c1ff481SSatish Balay stash - the stash 4284c1ff481SSatish Balay owners - an array of size 'no-of-procs' which gives the ownership range 4294c1ff481SSatish Balay for each node. 4304c1ff481SSatish Balay 4314c1ff481SSatish Balay Notes: The 'owners' array in the cased of the blocked-stash has the 4324c1ff481SSatish Balay ranges specified blocked global indices, and for the regular stash in 4334c1ff481SSatish Balay the proper global indices. 4344c1ff481SSatish Balay */ 4354a2ae208SSatish Balay #undef __FUNCT__ 4364a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterBegin_Private" 437c1ac3661SBarry Smith PetscErrorCode MatStashScatterBegin_Private(MatStash *stash,PetscInt *owners) 438bc5ccf88SSatish Balay { 439c1ac3661SBarry Smith PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 440fe09c992SBarry Smith PetscInt size=stash->size,nsends; 4416849ba73SBarry Smith PetscErrorCode ierr; 4421b27d27dSSatish Balay PetscInt count,*sindices,**rindices,i,j,idx,lastidx; 443563fb871SSatish Balay MatScalar **rvalues,*svalues; 444bc5ccf88SSatish Balay MPI_Comm comm = stash->comm; 445563fb871SSatish Balay MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 446fe09c992SBarry Smith PetscMPIInt *nprocs,*nlengths,nreceives; 447bc5ccf88SSatish Balay 448bc5ccf88SSatish Balay PetscFunctionBegin; 449bc5ccf88SSatish Balay 4504c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 451bc5ccf88SSatish Balay /* first count number of contributors to each processor */ 452fe09c992SBarry Smith ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&nprocs);CHKERRQ(ierr); 453fe09c992SBarry Smith ierr = PetscMemzero(nprocs,2*size*sizeof(PetscMPIInt));CHKERRQ(ierr); 454c1ac3661SBarry Smith ierr = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); 455a2d1c673SSatish Balay 456563fb871SSatish Balay nlengths = nprocs+size; 4577357eb19SBarry Smith j = 0; 4587357eb19SBarry Smith lastidx = -1; 459bc5ccf88SSatish Balay for (i=0; i<stash->n; i++) { 4607357eb19SBarry Smith /* if indices are NOT locally sorted, need to start search at the beginning */ 4617357eb19SBarry Smith if (lastidx > (idx = stash->idx[i])) j = 0; 4627357eb19SBarry Smith lastidx = idx; 4637357eb19SBarry Smith for (; j<size; j++) { 4644c1ff481SSatish Balay if (idx >= owners[j] && idx < owners[j+1]) { 465563fb871SSatish Balay nlengths[j]++; owner[i] = j; break; 466bc5ccf88SSatish Balay } 467bc5ccf88SSatish Balay } 468bc5ccf88SSatish Balay } 469563fb871SSatish Balay /* Now check what procs get messages - and compute nsends. */ 470563fb871SSatish Balay for (i=0, nsends=0 ; i<size; i++) { 471563fb871SSatish Balay if (nlengths[i]) { nprocs[i] = 1; nsends ++;} 472563fb871SSatish Balay } 473bc5ccf88SSatish Balay 474563fb871SSatish Balay { int *onodes,*olengths; 475563fb871SSatish Balay /* Determine the number of messages to expect, their lengths, from from-ids */ 476563fb871SSatish Balay ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr); 477563fb871SSatish Balay ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr); 478563fb871SSatish Balay /* since clubbing row,col - lengths are multiplied by 2 */ 479563fb871SSatish Balay for (i=0; i<nreceives; i++) olengths[i] *=2; 480563fb871SSatish Balay ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr); 481563fb871SSatish Balay /* values are size 'bs2' lengths (and remove earlier factor 2 */ 482563fb871SSatish Balay for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 483563fb871SSatish Balay ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr); 484563fb871SSatish Balay ierr = PetscFree(onodes);CHKERRQ(ierr); 485563fb871SSatish Balay ierr = PetscFree(olengths);CHKERRQ(ierr); 486bc5ccf88SSatish Balay } 487bc5ccf88SSatish Balay 488bc5ccf88SSatish Balay /* do sends: 489bc5ccf88SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 490bc5ccf88SSatish Balay the ith processor 491bc5ccf88SSatish Balay */ 492c1ac3661SBarry Smith ierr = PetscMalloc((stash->n+1)*(bs2*sizeof(MatScalar)+2*sizeof(PetscInt)),&svalues);CHKERRQ(ierr); 493c1ac3661SBarry Smith sindices = (PetscInt*)(svalues + bs2*stash->n); 494b0a32e0cSBarry Smith ierr = PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 495c1ac3661SBarry Smith ierr = PetscMalloc(2*size*sizeof(PetscInt),&startv);CHKERRQ(ierr); 496bc5ccf88SSatish Balay starti = startv + size; 497a2d1c673SSatish Balay /* use 2 sends the first with all_a, the next with all_i and all_j */ 498bc5ccf88SSatish Balay startv[0] = 0; starti[0] = 0; 499bc5ccf88SSatish Balay for (i=1; i<size; i++) { 500563fb871SSatish Balay startv[i] = startv[i-1] + nlengths[i-1]; 501563fb871SSatish Balay starti[i] = starti[i-1] + nlengths[i-1]*2; 502bc5ccf88SSatish Balay } 503bc5ccf88SSatish Balay for (i=0; i<stash->n; i++) { 504bc5ccf88SSatish Balay j = owner[i]; 505a2d1c673SSatish Balay if (bs2 == 1) { 506bc5ccf88SSatish Balay svalues[startv[j]] = stash->array[i]; 507a2d1c673SSatish Balay } else { 508c1ac3661SBarry Smith PetscInt k; 5093eda8832SBarry Smith MatScalar *buf1,*buf2; 5104c1ff481SSatish Balay buf1 = svalues+bs2*startv[j]; 5114c1ff481SSatish Balay buf2 = stash->array+bs2*i; 5124c1ff481SSatish Balay for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; } 513a2d1c673SSatish Balay } 514bc5ccf88SSatish Balay sindices[starti[j]] = stash->idx[i]; 515563fb871SSatish Balay sindices[starti[j]+nlengths[j]] = stash->idy[i]; 516bc5ccf88SSatish Balay startv[j]++; 517bc5ccf88SSatish Balay starti[j]++; 518bc5ccf88SSatish Balay } 519bc5ccf88SSatish Balay startv[0] = 0; 520563fb871SSatish Balay for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];} 521e5d0e772SSatish Balay 522bc5ccf88SSatish Balay for (i=0,count=0; i<size; i++) { 523563fb871SSatish Balay if (nprocs[i]) { 524563fb871SSatish Balay ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr); 525563fb871SSatish Balay ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_MATSCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr); 526bc5ccf88SSatish Balay } 527b85c94c3SSatish Balay } 5286cf91177SBarry Smith #if defined(PETSC_USE_INFO) 529*ae15b995SBarry Smith ierr = PetscInfo1(0,"No of messages: %d \n",nsends);CHKERRQ(ierr); 530e5d0e772SSatish Balay for (i=0; i<size; i++) { 531e5d0e772SSatish Balay if (nprocs[i]) { 532*ae15b995SBarry Smith ierr = PetscInfo2(0,"Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(MatScalar)+2*sizeof(PetscInt));CHKERRQ(ierr); 533e5d0e772SSatish Balay } 534e5d0e772SSatish Balay } 535e5d0e772SSatish Balay #endif 536606d414cSSatish Balay ierr = PetscFree(owner);CHKERRQ(ierr); 537606d414cSSatish Balay ierr = PetscFree(startv);CHKERRQ(ierr); 538a2d1c673SSatish Balay /* This memory is reused in scatter end for a different purpose*/ 539a2d1c673SSatish Balay for (i=0; i<2*size; i++) nprocs[i] = -1; 540a2d1c673SSatish Balay stash->nprocs = nprocs; 541a2d1c673SSatish Balay 542563fb871SSatish Balay /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 543563fb871SSatish Balay ierr = PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 544563fb871SSatish Balay 545563fb871SSatish Balay for (i=0; i<nreceives; i++) { 546563fb871SSatish Balay recv_waits[2*i] = recv_waits1[i]; 547563fb871SSatish Balay recv_waits[2*i+1] = recv_waits2[i]; 548563fb871SSatish Balay } 549563fb871SSatish Balay stash->recv_waits = recv_waits; 550563fb871SSatish Balay ierr = PetscFree(recv_waits1);CHKERRQ(ierr); 551563fb871SSatish Balay ierr = PetscFree(recv_waits2);CHKERRQ(ierr); 552563fb871SSatish Balay 553bc5ccf88SSatish Balay stash->svalues = svalues; stash->rvalues = rvalues; 554563fb871SSatish Balay stash->rindices = rindices; stash->send_waits = send_waits; 555bc5ccf88SSatish Balay stash->nsends = nsends; stash->nrecvs = nreceives; 556bc5ccf88SSatish Balay PetscFunctionReturn(0); 557bc5ccf88SSatish Balay } 558bc5ccf88SSatish Balay 559a2d1c673SSatish Balay /* 5608798bf22SSatish Balay MatStashScatterGetMesg_Private - This function waits on the receives posted 5618798bf22SSatish Balay in the function MatStashScatterBegin_Private() and returns one message at 5624c1ff481SSatish Balay a time to the calling function. If no messages are left, it indicates this 5634c1ff481SSatish Balay by setting flg = 0, else it sets flg = 1. 5644c1ff481SSatish Balay 5654c1ff481SSatish Balay Input Parameters: 5664c1ff481SSatish Balay stash - the stash 5674c1ff481SSatish Balay 5684c1ff481SSatish Balay Output Parameters: 5694c1ff481SSatish Balay nvals - the number of entries in the current message. 5704c1ff481SSatish Balay rows - an array of row indices (or blocked indices) corresponding to the values 5714c1ff481SSatish Balay cols - an array of columnindices (or blocked indices) corresponding to the values 5724c1ff481SSatish Balay vals - the values 5734c1ff481SSatish Balay flg - 0 indicates no more message left, and the current call has no values associated. 5744c1ff481SSatish Balay 1 indicates that the current call successfully received a message, and the 5754c1ff481SSatish Balay other output parameters nvals,rows,cols,vals are set appropriately. 576a2d1c673SSatish Balay */ 5774a2ae208SSatish Balay #undef __FUNCT__ 5784a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterGetMesg_Private" 579c1ac3661SBarry Smith PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,MatScalar **vals,PetscInt *flg) 580bc5ccf88SSatish Balay { 5816849ba73SBarry Smith PetscErrorCode ierr; 582fe09c992SBarry Smith PetscMPIInt i,*flg_v,i1,i2; 583fe09c992SBarry Smith PetscInt bs2; 584a2d1c673SSatish Balay MPI_Status recv_status; 585b0a32e0cSBarry Smith PetscTruth match_found = PETSC_FALSE; 586bc5ccf88SSatish Balay 587bc5ccf88SSatish Balay PetscFunctionBegin; 588bc5ccf88SSatish Balay 589a2d1c673SSatish Balay *flg = 0; /* When a message is discovered this is reset to 1 */ 590a2d1c673SSatish Balay /* Return if no more messages to process */ 591a2d1c673SSatish Balay if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); } 592a2d1c673SSatish Balay 593a2d1c673SSatish Balay flg_v = stash->nprocs; 5944c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 595a2d1c673SSatish Balay /* If a matching pair of receieves are found, process them, and return the data to 596a2d1c673SSatish Balay the calling function. Until then keep receiving messages */ 597a2d1c673SSatish Balay while (!match_found) { 598a2d1c673SSatish Balay ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr); 599a2d1c673SSatish Balay /* Now pack the received message into a structure which is useable by others */ 600a2d1c673SSatish Balay if (i % 2) { 6013eda8832SBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_MATSCALAR,nvals);CHKERRQ(ierr); 602c1dc657dSBarry Smith flg_v[2*recv_status.MPI_SOURCE] = i/2; 603a2d1c673SSatish Balay *nvals = *nvals/bs2; 604563fb871SSatish Balay } else { 605563fb871SSatish Balay ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr); 606563fb871SSatish Balay flg_v[2*recv_status.MPI_SOURCE+1] = i/2; 607563fb871SSatish Balay *nvals = *nvals/2; /* This message has both row indices and col indices */ 608bc5ccf88SSatish Balay } 609a2d1c673SSatish Balay 610a2d1c673SSatish Balay /* Check if we have both the messages from this proc */ 611c1dc657dSBarry Smith i1 = flg_v[2*recv_status.MPI_SOURCE]; 612c1dc657dSBarry Smith i2 = flg_v[2*recv_status.MPI_SOURCE+1]; 613a2d1c673SSatish Balay if (i1 != -1 && i2 != -1) { 614563fb871SSatish Balay *rows = stash->rindices[i2]; 615a2d1c673SSatish Balay *cols = *rows + *nvals; 616563fb871SSatish Balay *vals = stash->rvalues[i1]; 617a2d1c673SSatish Balay *flg = 1; 618a2d1c673SSatish Balay stash->nprocessed ++; 61935d8aa7fSBarry Smith match_found = PETSC_TRUE; 620bc5ccf88SSatish Balay } 621bc5ccf88SSatish Balay } 622bc5ccf88SSatish Balay PetscFunctionReturn(0); 623bc5ccf88SSatish Balay } 624