1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER 2*4787f768SSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.147 1999/02/12 00:04:25 balay Exp balay $"; 379bdfe76SSatish Balay #endif 479bdfe76SSatish Balay 577ed5343SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "mat.h" I*/ 6c16cb8f2SBarry Smith #include "src/vec/vecimpl.h" 779bdfe76SSatish Balay 857b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat); 957b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat); 10d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 117b2a1423SBarry Smith extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 12946de2abSSatish Balay extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *); 133b2fbd54SBarry Smith 14537820f0SBarry Smith /* 15537820f0SBarry Smith Local utility routine that creates a mapping from the global column 1657b952d6SSatish Balay number to the local number in the off-diagonal part of the local 1757b952d6SSatish Balay storage of the matrix. This is done in a non scable way since the 1857b952d6SSatish Balay length of colmap equals the global matrix length. 1957b952d6SSatish Balay */ 205615d1e5SSatish Balay #undef __FUNC__ 215615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 2257b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat) 2357b952d6SSatish Balay { 2457b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 2557b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 266dee3f7eSBarry Smith int nbs = B->nbs,i,bs=B->bs; 276dee3f7eSBarry Smith #if defined (USE_CTABLE) 286dee3f7eSBarry Smith int ierr; 296dee3f7eSBarry Smith #endif 3057b952d6SSatish Balay 31d64ed03dSBarry Smith PetscFunctionBegin; 3248e59246SSatish Balay #if defined (USE_CTABLE) 3348e59246SSatish Balay ierr = TableCreate( &baij->colmap, baij->nbs/5 ); CHKERRQ(ierr); 3448e59246SSatish Balay for ( i=0; i<nbs; i++ ){ 3548e59246SSatish Balay ierr = TableAdd( baij->colmap, baij->garray[i] + 1, i*bs+1 );CHKERRQ(ierr); 3648e59246SSatish Balay } 3748e59246SSatish Balay #else 38758f045eSSatish Balay baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 3957b952d6SSatish Balay PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 4057b952d6SSatish Balay PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 41928fc39bSSatish Balay for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 4248e59246SSatish Balay #endif 433a40ed3dSBarry Smith PetscFunctionReturn(0); 4457b952d6SSatish Balay } 4557b952d6SSatish Balay 4680c1aa95SSatish Balay #define CHUNKSIZE 10 4780c1aa95SSatish Balay 48f5e9677aSSatish Balay #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 4980c1aa95SSatish Balay { \ 5080c1aa95SSatish Balay \ 5180c1aa95SSatish Balay brow = row/bs; \ 5280c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 53ac7a638eSSatish Balay rmax = aimax[brow]; nrow = ailen[brow]; \ 5480c1aa95SSatish Balay bcol = col/bs; \ 5580c1aa95SSatish Balay ridx = row % bs; cidx = col % bs; \ 56ab26458aSBarry Smith low = 0; high = nrow; \ 57ab26458aSBarry Smith while (high-low > 3) { \ 58ab26458aSBarry Smith t = (low+high)/2; \ 59ab26458aSBarry Smith if (rp[t] > bcol) high = t; \ 60ab26458aSBarry Smith else low = t; \ 61ab26458aSBarry Smith } \ 62ab26458aSBarry Smith for ( _i=low; _i<high; _i++ ) { \ 6380c1aa95SSatish Balay if (rp[_i] > bcol) break; \ 6480c1aa95SSatish Balay if (rp[_i] == bcol) { \ 6580c1aa95SSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 66eada6651SSatish Balay if (addv == ADD_VALUES) *bap += value; \ 67eada6651SSatish Balay else *bap = value; \ 68ac7a638eSSatish Balay goto a_noinsert; \ 6980c1aa95SSatish Balay } \ 7080c1aa95SSatish Balay } \ 7189280ab3SLois Curfman McInnes if (a->nonew == 1) goto a_noinsert; \ 72a8c6a408SBarry Smith else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 7380c1aa95SSatish Balay if (nrow >= rmax) { \ 7480c1aa95SSatish Balay /* there is no extra room in row, therefore enlarge */ \ 7580c1aa95SSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 7680c1aa95SSatish Balay Scalar *new_a; \ 7780c1aa95SSatish Balay \ 78a8c6a408SBarry Smith if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 7989280ab3SLois Curfman McInnes \ 8080c1aa95SSatish Balay /* malloc new storage space */ \ 8180c1aa95SSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 8280c1aa95SSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 8380c1aa95SSatish Balay new_j = (int *) (new_a + bs2*new_nz); \ 8480c1aa95SSatish Balay new_i = new_j + new_nz; \ 8580c1aa95SSatish Balay \ 8680c1aa95SSatish Balay /* copy over old data into new slots */ \ 8780c1aa95SSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 8880c1aa95SSatish Balay for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 8980c1aa95SSatish Balay PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 9080c1aa95SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 9180c1aa95SSatish Balay PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 9280c1aa95SSatish Balay len*sizeof(int)); \ 9380c1aa95SSatish Balay PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 9480c1aa95SSatish Balay PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 9580c1aa95SSatish Balay PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 9680c1aa95SSatish Balay aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 9780c1aa95SSatish Balay /* free up old matrix storage */ \ 9880c1aa95SSatish Balay PetscFree(a->a); \ 9980c1aa95SSatish Balay if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 10080c1aa95SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 10180c1aa95SSatish Balay a->singlemalloc = 1; \ 10280c1aa95SSatish Balay \ 10380c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 104ac7a638eSSatish Balay rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 10580c1aa95SSatish Balay PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 10680c1aa95SSatish Balay a->maxnz += bs2*CHUNKSIZE; \ 10780c1aa95SSatish Balay a->reallocs++; \ 10880c1aa95SSatish Balay a->nz++; \ 10980c1aa95SSatish Balay } \ 11080c1aa95SSatish Balay N = nrow++ - 1; \ 11180c1aa95SSatish Balay /* shift up all the later entries in this row */ \ 11280c1aa95SSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 11380c1aa95SSatish Balay rp[ii+1] = rp[ii]; \ 11480c1aa95SSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 11580c1aa95SSatish Balay } \ 11680c1aa95SSatish Balay if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 11780c1aa95SSatish Balay rp[_i] = bcol; \ 11880c1aa95SSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 119ac7a638eSSatish Balay a_noinsert:; \ 12080c1aa95SSatish Balay ailen[brow] = nrow; \ 12180c1aa95SSatish Balay } 12257b952d6SSatish Balay 123ac7a638eSSatish Balay #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 124ac7a638eSSatish Balay { \ 125ac7a638eSSatish Balay \ 126ac7a638eSSatish Balay brow = row/bs; \ 127ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 128ac7a638eSSatish Balay rmax = bimax[brow]; nrow = bilen[brow]; \ 129ac7a638eSSatish Balay bcol = col/bs; \ 130ac7a638eSSatish Balay ridx = row % bs; cidx = col % bs; \ 131ac7a638eSSatish Balay low = 0; high = nrow; \ 132ac7a638eSSatish Balay while (high-low > 3) { \ 133ac7a638eSSatish Balay t = (low+high)/2; \ 134ac7a638eSSatish Balay if (rp[t] > bcol) high = t; \ 135ac7a638eSSatish Balay else low = t; \ 136ac7a638eSSatish Balay } \ 137ac7a638eSSatish Balay for ( _i=low; _i<high; _i++ ) { \ 138ac7a638eSSatish Balay if (rp[_i] > bcol) break; \ 139ac7a638eSSatish Balay if (rp[_i] == bcol) { \ 140ac7a638eSSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 141ac7a638eSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 142ac7a638eSSatish Balay else *bap = value; \ 143ac7a638eSSatish Balay goto b_noinsert; \ 144ac7a638eSSatish Balay } \ 145ac7a638eSSatish Balay } \ 14689280ab3SLois Curfman McInnes if (b->nonew == 1) goto b_noinsert; \ 147a8c6a408SBarry Smith else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 148ac7a638eSSatish Balay if (nrow >= rmax) { \ 149ac7a638eSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 15074c639caSSatish Balay int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 151ac7a638eSSatish Balay Scalar *new_a; \ 152ac7a638eSSatish Balay \ 153a8c6a408SBarry Smith if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 15489280ab3SLois Curfman McInnes \ 155ac7a638eSSatish Balay /* malloc new storage space */ \ 15674c639caSSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 157ac7a638eSSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 158ac7a638eSSatish Balay new_j = (int *) (new_a + bs2*new_nz); \ 159ac7a638eSSatish Balay new_i = new_j + new_nz; \ 160ac7a638eSSatish Balay \ 161ac7a638eSSatish Balay /* copy over old data into new slots */ \ 162ac7a638eSSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 16374c639caSSatish Balay for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 164ac7a638eSSatish Balay PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \ 165ac7a638eSSatish Balay len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 166ac7a638eSSatish Balay PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 167ac7a638eSSatish Balay len*sizeof(int)); \ 168ac7a638eSSatish Balay PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \ 169ac7a638eSSatish Balay PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 170ac7a638eSSatish Balay PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 171ac7a638eSSatish Balay ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar)); \ 172ac7a638eSSatish Balay /* free up old matrix storage */ \ 17374c639caSSatish Balay PetscFree(b->a); \ 17474c639caSSatish Balay if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 17574c639caSSatish Balay ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 17674c639caSSatish Balay b->singlemalloc = 1; \ 177ac7a638eSSatish Balay \ 178ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 179ac7a638eSSatish Balay rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 18074c639caSSatish Balay PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 18174c639caSSatish Balay b->maxnz += bs2*CHUNKSIZE; \ 18274c639caSSatish Balay b->reallocs++; \ 18374c639caSSatish Balay b->nz++; \ 184ac7a638eSSatish Balay } \ 185ac7a638eSSatish Balay N = nrow++ - 1; \ 186ac7a638eSSatish Balay /* shift up all the later entries in this row */ \ 187ac7a638eSSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 188ac7a638eSSatish Balay rp[ii+1] = rp[ii]; \ 189ac7a638eSSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 190ac7a638eSSatish Balay } \ 191ac7a638eSSatish Balay if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 192ac7a638eSSatish Balay rp[_i] = bcol; \ 193ac7a638eSSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 194ac7a638eSSatish Balay b_noinsert:; \ 195ac7a638eSSatish Balay bilen[brow] = nrow; \ 196ac7a638eSSatish Balay } 197ac7a638eSSatish Balay 1985615d1e5SSatish Balay #undef __FUNC__ 1995615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ" 200ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 20157b952d6SSatish Balay { 20257b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 20357b952d6SSatish Balay Scalar value; 2044fa0d573SSatish Balay int ierr,i,j,row,col; 2054fa0d573SSatish Balay int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 2064fa0d573SSatish Balay int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 2074fa0d573SSatish Balay int cend_orig=baij->cend_bs,bs=baij->bs; 20857b952d6SSatish Balay 209eada6651SSatish Balay /* Some Variables required in the macro */ 21080c1aa95SSatish Balay Mat A = baij->A; 21180c1aa95SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 212ac7a638eSSatish Balay int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 213ac7a638eSSatish Balay Scalar *aa=a->a; 214ac7a638eSSatish Balay 215ac7a638eSSatish Balay Mat B = baij->B; 216ac7a638eSSatish Balay Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 217ac7a638eSSatish Balay int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 218ac7a638eSSatish Balay Scalar *ba=b->a; 219ac7a638eSSatish Balay 220ac7a638eSSatish Balay int *rp,ii,nrow,_i,rmax,N,brow,bcol; 221ab26458aSBarry Smith int low,high,t,ridx,cidx,bs2=a->bs2; 222ac7a638eSSatish Balay Scalar *ap,*bap; 22380c1aa95SSatish Balay 224d64ed03dSBarry Smith PetscFunctionBegin; 22557b952d6SSatish Balay for ( i=0; i<m; i++ ) { 2265ef9f2a5SBarry Smith if (im[i] < 0) continue; 2273a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 228a8c6a408SBarry Smith if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 229639f9d9dSBarry Smith #endif 23057b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 23157b952d6SSatish Balay row = im[i] - rstart_orig; 23257b952d6SSatish Balay for ( j=0; j<n; j++ ) { 23357b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig){ 23457b952d6SSatish Balay col = in[j] - cstart_orig; 23557b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 236f5e9677aSSatish Balay MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 23780c1aa95SSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 23857b952d6SSatish Balay } 2395ef9f2a5SBarry Smith else if (in[j] < 0) continue; 2403a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 241a8c6a408SBarry Smith else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 242639f9d9dSBarry Smith #endif 24357b952d6SSatish Balay else { 24457b952d6SSatish Balay if (mat->was_assembled) { 245905e6a2fSBarry Smith if (!baij->colmap) { 246905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 247905e6a2fSBarry Smith } 24848e59246SSatish Balay #if defined (USE_CTABLE) 24948e59246SSatish Balay col = TableFind( baij->colmap, in[j]/bs + 1 ) - 1 + in[j]%bs; 25048e59246SSatish Balay #else 251905e6a2fSBarry Smith col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 25248e59246SSatish Balay #endif 25357b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 25457b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 25557b952d6SSatish Balay col = in[j]; 2569bf004c3SSatish Balay /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 2579bf004c3SSatish Balay B = baij->B; 2589bf004c3SSatish Balay b = (Mat_SeqBAIJ *) (B)->data; 2599bf004c3SSatish Balay bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 2609bf004c3SSatish Balay ba=b->a; 26157b952d6SSatish Balay } 262d64ed03dSBarry Smith } else col = in[j]; 26357b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 264ac7a638eSSatish Balay MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 265ac7a638eSSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 26657b952d6SSatish Balay } 26757b952d6SSatish Balay } 268d64ed03dSBarry Smith } else { 26990f02eecSBarry Smith if (roworiented && !baij->donotstash) { 27057b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 271d64ed03dSBarry Smith } else { 27290f02eecSBarry Smith if (!baij->donotstash) { 27357b952d6SSatish Balay row = im[i]; 27457b952d6SSatish Balay for ( j=0; j<n; j++ ) { 27557b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 27657b952d6SSatish Balay } 27757b952d6SSatish Balay } 27857b952d6SSatish Balay } 27957b952d6SSatish Balay } 28090f02eecSBarry Smith } 2813a40ed3dSBarry Smith PetscFunctionReturn(0); 28257b952d6SSatish Balay } 28357b952d6SSatish Balay 284ab26458aSBarry Smith extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 285ab26458aSBarry Smith #undef __FUNC__ 286ab26458aSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 287ec1ea8d8SLois Curfman McInnes int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 288ab26458aSBarry Smith { 289ab26458aSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 29030793edcSSatish Balay Scalar *value,*barray=baij->barray; 291abef11f7SSatish Balay int ierr,i,j,ii,jj,row,col,k,l; 292ab26458aSBarry Smith int roworiented = baij->roworiented,rstart=baij->rstart ; 293ab26458aSBarry Smith int rend=baij->rend,cstart=baij->cstart,stepval; 294ab26458aSBarry Smith int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 295ab26458aSBarry Smith 29630793edcSSatish Balay if(!barray) { 29747513183SBarry Smith baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray); 29830793edcSSatish Balay } 29930793edcSSatish Balay 300ab26458aSBarry Smith if (roworiented) { 301ab26458aSBarry Smith stepval = (n-1)*bs; 302ab26458aSBarry Smith } else { 303ab26458aSBarry Smith stepval = (m-1)*bs; 304ab26458aSBarry Smith } 305ab26458aSBarry Smith for ( i=0; i<m; i++ ) { 3065ef9f2a5SBarry Smith if (im[i] < 0) continue; 3073a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 3085ef9f2a5SBarry Smith if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs); 309ab26458aSBarry Smith #endif 310ab26458aSBarry Smith if (im[i] >= rstart && im[i] < rend) { 311ab26458aSBarry Smith row = im[i] - rstart; 312ab26458aSBarry Smith for ( j=0; j<n; j++ ) { 31315b57d14SSatish Balay /* If NumCol = 1 then a copy is not required */ 31415b57d14SSatish Balay if ((roworiented) && (n == 1)) { 31515b57d14SSatish Balay barray = v + i*bs2; 31615b57d14SSatish Balay } else if((!roworiented) && (m == 1)) { 31715b57d14SSatish Balay barray = v + j*bs2; 31815b57d14SSatish Balay } else { /* Here a copy is required */ 319ab26458aSBarry Smith if (roworiented) { 320ab26458aSBarry Smith value = v + i*(stepval+bs)*bs + j*bs; 321ab26458aSBarry Smith } else { 322ab26458aSBarry Smith value = v + j*(stepval+bs)*bs + i*bs; 323abef11f7SSatish Balay } 32447513183SBarry Smith for ( ii=0; ii<bs; ii++,value+=stepval ) { 32547513183SBarry Smith for (jj=0; jj<bs; jj++ ) { 32630793edcSSatish Balay *barray++ = *value++; 32747513183SBarry Smith } 32847513183SBarry Smith } 32930793edcSSatish Balay barray -=bs2; 33015b57d14SSatish Balay } 331abef11f7SSatish Balay 332abef11f7SSatish Balay if (in[j] >= cstart && in[j] < cend){ 333abef11f7SSatish Balay col = in[j] - cstart; 33430793edcSSatish Balay ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 335ab26458aSBarry Smith } 3365ef9f2a5SBarry Smith else if (in[j] < 0) continue; 3373a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 3385ef9f2a5SBarry Smith else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);} 339ab26458aSBarry Smith #endif 340ab26458aSBarry Smith else { 341ab26458aSBarry Smith if (mat->was_assembled) { 342ab26458aSBarry Smith if (!baij->colmap) { 343ab26458aSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 344ab26458aSBarry Smith } 345a5eb4965SSatish Balay 3463a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 34748e59246SSatish Balay #if defined (USE_CTABLE) 34848e59246SSatish Balay if( (TableFind( baij->colmap, in[j] + 1 ) - 1) % bs) 34948e59246SSatish Balay {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 35048e59246SSatish Balay #else 351a8c6a408SBarry Smith if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 352a5eb4965SSatish Balay #endif 35348e59246SSatish Balay #endif 35448e59246SSatish Balay #if defined (USE_CTABLE) 35548e59246SSatish Balay col = (TableFind( baij->colmap, in[j] + 1 ) - 1)/bs; 35648e59246SSatish Balay #else 357a5eb4965SSatish Balay col = (baij->colmap[in[j]] - 1)/bs; 35848e59246SSatish Balay #endif 359ab26458aSBarry Smith if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 360ab26458aSBarry Smith ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 361ab26458aSBarry Smith col = in[j]; 362ab26458aSBarry Smith } 363ab26458aSBarry Smith } 364ab26458aSBarry Smith else col = in[j]; 36530793edcSSatish Balay ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 366ab26458aSBarry Smith } 367ab26458aSBarry Smith } 368d64ed03dSBarry Smith } else { 369ab26458aSBarry Smith if (!baij->donotstash) { 370ab26458aSBarry Smith if (roworiented ) { 371abef11f7SSatish Balay row = im[i]*bs; 372abef11f7SSatish Balay value = v + i*(stepval+bs)*bs; 373abef11f7SSatish Balay for ( j=0; j<bs; j++,row++ ) { 374abef11f7SSatish Balay for ( k=0; k<n; k++ ) { 375abef11f7SSatish Balay for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 376abef11f7SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 377abef11f7SSatish Balay } 378ab26458aSBarry Smith } 379ab26458aSBarry Smith } 380d64ed03dSBarry Smith } else { 381ab26458aSBarry Smith for ( j=0; j<n; j++ ) { 382abef11f7SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 383abef11f7SSatish Balay col = in[j]*bs; 384abef11f7SSatish Balay for ( k=0; k<bs; k++,col++,value+=stepval) { 385abef11f7SSatish Balay for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 386abef11f7SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 387ab26458aSBarry Smith } 388ab26458aSBarry Smith } 389ab26458aSBarry Smith } 390abef11f7SSatish Balay } 391abef11f7SSatish Balay } 392ab26458aSBarry Smith } 393ab26458aSBarry Smith } 3943a40ed3dSBarry Smith PetscFunctionReturn(0); 395ab26458aSBarry Smith } 3960bdbc534SSatish Balay #define HASH_KEY 0.6180339887 397c2760754SSatish Balay /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 398c2760754SSatish Balay #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 399c2760754SSatish Balay /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 4005615d1e5SSatish Balay #undef __FUNC__ 4010bdbc534SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 4020bdbc534SSatish Balay int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 4030bdbc534SSatish Balay { 4040bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 4050bdbc534SSatish Balay int ierr,i,j,row,col; 4060bdbc534SSatish Balay int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 407c2760754SSatish Balay int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 408c2760754SSatish Balay int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 409c2760754SSatish Balay double tmp; 410b9e4cc15SSatish Balay Scalar ** HD = baij->hd,value; 4114a15367fSSatish Balay #if defined(USE_PETSC_BOPT_g) 4124a15367fSSatish Balay int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 4134a15367fSSatish Balay #endif 4140bdbc534SSatish Balay 4150bdbc534SSatish Balay PetscFunctionBegin; 4160bdbc534SSatish Balay 4170bdbc534SSatish Balay for ( i=0; i<m; i++ ) { 4180bdbc534SSatish Balay #if defined(USE_PETSC_BOPT_g) 4190bdbc534SSatish Balay if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 4200bdbc534SSatish Balay if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 4210bdbc534SSatish Balay #endif 4220bdbc534SSatish Balay row = im[i]; 423c2760754SSatish Balay if (row >= rstart_orig && row < rend_orig) { 4240bdbc534SSatish Balay for ( j=0; j<n; j++ ) { 4250bdbc534SSatish Balay col = in[j]; 4260bdbc534SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 4270bdbc534SSatish Balay /* Look up into the Hash Table */ 428c2760754SSatish Balay key = (row/bs)*Nbs+(col/bs)+1; 429c2760754SSatish Balay h1 = HASH(size,key,tmp); 4300bdbc534SSatish Balay 431c2760754SSatish Balay 432c2760754SSatish Balay idx = h1; 433187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 434187ce0cbSSatish Balay insert_ct++; 435187ce0cbSSatish Balay total_ct++; 436187ce0cbSSatish Balay if (HT[idx] != key) { 437187ce0cbSSatish Balay for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 438187ce0cbSSatish Balay if (idx == size) { 439187ce0cbSSatish Balay for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 440187ce0cbSSatish Balay if (idx == h1) { 441187ce0cbSSatish Balay SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 442187ce0cbSSatish Balay } 443187ce0cbSSatish Balay } 444187ce0cbSSatish Balay } 445187ce0cbSSatish Balay #else 446c2760754SSatish Balay if (HT[idx] != key) { 447c2760754SSatish Balay for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 448c2760754SSatish Balay if (idx == size) { 449c2760754SSatish Balay for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 450c2760754SSatish Balay if (idx == h1) { 451c2760754SSatish Balay SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 452c2760754SSatish Balay } 453c2760754SSatish Balay } 454c2760754SSatish Balay } 455187ce0cbSSatish Balay #endif 456c2760754SSatish Balay /* A HASH table entry is found, so insert the values at the correct address */ 457c2760754SSatish Balay if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 458c2760754SSatish Balay else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 4590bdbc534SSatish Balay } 4600bdbc534SSatish Balay } else { 4610bdbc534SSatish Balay if (roworiented && !baij->donotstash) { 4620bdbc534SSatish Balay ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 4630bdbc534SSatish Balay } else { 4640bdbc534SSatish Balay if (!baij->donotstash) { 4650bdbc534SSatish Balay row = im[i]; 4660bdbc534SSatish Balay for ( j=0; j<n; j++ ) { 4670bdbc534SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 4680bdbc534SSatish Balay } 4690bdbc534SSatish Balay } 4700bdbc534SSatish Balay } 4710bdbc534SSatish Balay } 4720bdbc534SSatish Balay } 473187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 474187ce0cbSSatish Balay baij->ht_total_ct = total_ct; 475187ce0cbSSatish Balay baij->ht_insert_ct = insert_ct; 476187ce0cbSSatish Balay #endif 4770bdbc534SSatish Balay PetscFunctionReturn(0); 4780bdbc534SSatish Balay } 4790bdbc534SSatish Balay 4800bdbc534SSatish Balay #undef __FUNC__ 4810bdbc534SSatish Balay #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 4820bdbc534SSatish Balay int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 4830bdbc534SSatish Balay { 4840bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 4850bdbc534SSatish Balay int ierr,i,j,ii,jj,row,col,k,l; 4860bdbc534SSatish Balay int roworiented = baij->roworiented,rstart=baij->rstart ; 487b4cc0f5aSSatish Balay int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 488c2760754SSatish Balay int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 489c2760754SSatish Balay double tmp; 490187ce0cbSSatish Balay Scalar ** HD = baij->hd,*value,*v_t,*baij_a; 4914a15367fSSatish Balay #if defined(USE_PETSC_BOPT_g) 4924a15367fSSatish Balay int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 4934a15367fSSatish Balay #endif 4940bdbc534SSatish Balay 495d0a41580SSatish Balay PetscFunctionBegin; 496d0a41580SSatish Balay 4970bdbc534SSatish Balay if (roworiented) { 4980bdbc534SSatish Balay stepval = (n-1)*bs; 4990bdbc534SSatish Balay } else { 5000bdbc534SSatish Balay stepval = (m-1)*bs; 5010bdbc534SSatish Balay } 5020bdbc534SSatish Balay for ( i=0; i<m; i++ ) { 5030bdbc534SSatish Balay #if defined(USE_PETSC_BOPT_g) 5040bdbc534SSatish Balay if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 5050bdbc534SSatish Balay if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 5060bdbc534SSatish Balay #endif 5070bdbc534SSatish Balay row = im[i]; 508187ce0cbSSatish Balay v_t = v + i*bs2; 509c2760754SSatish Balay if (row >= rstart && row < rend) { 5100bdbc534SSatish Balay for ( j=0; j<n; j++ ) { 5110bdbc534SSatish Balay col = in[j]; 5120bdbc534SSatish Balay 5130bdbc534SSatish Balay /* Look up into the Hash Table */ 514c2760754SSatish Balay key = row*Nbs+col+1; 515c2760754SSatish Balay h1 = HASH(size,key,tmp); 5160bdbc534SSatish Balay 517c2760754SSatish Balay idx = h1; 518187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 519187ce0cbSSatish Balay total_ct++; 520187ce0cbSSatish Balay insert_ct++; 521187ce0cbSSatish Balay if (HT[idx] != key) { 522187ce0cbSSatish Balay for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 523187ce0cbSSatish Balay if (idx == size) { 524187ce0cbSSatish Balay for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 525187ce0cbSSatish Balay if (idx == h1) { 526187ce0cbSSatish Balay SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 527187ce0cbSSatish Balay } 528187ce0cbSSatish Balay } 529187ce0cbSSatish Balay } 530187ce0cbSSatish Balay #else 531c2760754SSatish Balay if (HT[idx] != key) { 532c2760754SSatish Balay for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 533c2760754SSatish Balay if (idx == size) { 534c2760754SSatish Balay for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 535c2760754SSatish Balay if (idx == h1) { 536c2760754SSatish Balay SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 537c2760754SSatish Balay } 538c2760754SSatish Balay } 539c2760754SSatish Balay } 540187ce0cbSSatish Balay #endif 541c2760754SSatish Balay baij_a = HD[idx]; 5420bdbc534SSatish Balay if (roworiented) { 543c2760754SSatish Balay /*value = v + i*(stepval+bs)*bs + j*bs;*/ 544187ce0cbSSatish Balay /* value = v + (i*(stepval+bs)+j)*bs; */ 545187ce0cbSSatish Balay value = v_t; 546187ce0cbSSatish Balay v_t += bs; 547fef45726SSatish Balay if (addv == ADD_VALUES) { 548c2760754SSatish Balay for ( ii=0; ii<bs; ii++,value+=stepval) { 549c2760754SSatish Balay for ( jj=ii; jj<bs2; jj+=bs ) { 550fef45726SSatish Balay baij_a[jj] += *value++; 551b4cc0f5aSSatish Balay } 552b4cc0f5aSSatish Balay } 553fef45726SSatish Balay } else { 554c2760754SSatish Balay for ( ii=0; ii<bs; ii++,value+=stepval) { 555c2760754SSatish Balay for ( jj=ii; jj<bs2; jj+=bs ) { 556fef45726SSatish Balay baij_a[jj] = *value++; 557fef45726SSatish Balay } 558fef45726SSatish Balay } 559fef45726SSatish Balay } 5600bdbc534SSatish Balay } else { 5610bdbc534SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 562fef45726SSatish Balay if (addv == ADD_VALUES) { 563b4cc0f5aSSatish Balay for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 5640bdbc534SSatish Balay for ( jj=0; jj<bs; jj++ ) { 565fef45726SSatish Balay baij_a[jj] += *value++; 566fef45726SSatish Balay } 567fef45726SSatish Balay } 568fef45726SSatish Balay } else { 569fef45726SSatish Balay for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 570fef45726SSatish Balay for ( jj=0; jj<bs; jj++ ) { 571fef45726SSatish Balay baij_a[jj] = *value++; 572fef45726SSatish Balay } 573b4cc0f5aSSatish Balay } 5740bdbc534SSatish Balay } 5750bdbc534SSatish Balay } 5760bdbc534SSatish Balay } 5770bdbc534SSatish Balay } else { 5780bdbc534SSatish Balay if (!baij->donotstash) { 5790bdbc534SSatish Balay if (roworiented ) { 5800bdbc534SSatish Balay row = im[i]*bs; 5810bdbc534SSatish Balay value = v + i*(stepval+bs)*bs; 5820bdbc534SSatish Balay for ( j=0; j<bs; j++,row++ ) { 5830bdbc534SSatish Balay for ( k=0; k<n; k++ ) { 5840bdbc534SSatish Balay for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 5850bdbc534SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 5860bdbc534SSatish Balay } 5870bdbc534SSatish Balay } 5880bdbc534SSatish Balay } 5890bdbc534SSatish Balay } else { 5900bdbc534SSatish Balay for ( j=0; j<n; j++ ) { 5910bdbc534SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 5920bdbc534SSatish Balay col = in[j]*bs; 5930bdbc534SSatish Balay for ( k=0; k<bs; k++,col++,value+=stepval) { 5940bdbc534SSatish Balay for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 5950bdbc534SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 5960bdbc534SSatish Balay } 5970bdbc534SSatish Balay } 5980bdbc534SSatish Balay } 5990bdbc534SSatish Balay } 6000bdbc534SSatish Balay } 6010bdbc534SSatish Balay } 6020bdbc534SSatish Balay } 603187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 604187ce0cbSSatish Balay baij->ht_total_ct = total_ct; 605187ce0cbSSatish Balay baij->ht_insert_ct = insert_ct; 606187ce0cbSSatish Balay #endif 6070bdbc534SSatish Balay PetscFunctionReturn(0); 6080bdbc534SSatish Balay } 609133cdb44SSatish Balay 6100bdbc534SSatish Balay #undef __FUNC__ 6115615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ" 612ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 613d6de1c52SSatish Balay { 614d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 615d6de1c52SSatish Balay int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 61648e59246SSatish Balay int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data; 617d6de1c52SSatish Balay 618133cdb44SSatish Balay PetscFunctionBegin; 619d6de1c52SSatish Balay for ( i=0; i<m; i++ ) { 620a8c6a408SBarry Smith if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 621a8c6a408SBarry Smith if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 622d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 623d6de1c52SSatish Balay row = idxm[i] - bsrstart; 624d6de1c52SSatish Balay for ( j=0; j<n; j++ ) { 625a8c6a408SBarry Smith if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 626a8c6a408SBarry Smith if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 627d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend){ 628d6de1c52SSatish Balay col = idxn[j] - bscstart; 62998dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 630d64ed03dSBarry Smith } else { 631905e6a2fSBarry Smith if (!baij->colmap) { 632905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 633905e6a2fSBarry Smith } 63448e59246SSatish Balay #if defined (USE_CTABLE) 63548e59246SSatish Balay data = TableFind( baij->colmap, idxn[j]/bs + 1 ) - 1; 63648e59246SSatish Balay #else 63748e59246SSatish Balay data = baij->colmap[idxn[j]/bs]-1; 63848e59246SSatish Balay #endif 63948e59246SSatish Balay if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 640d9d09a02SSatish Balay else { 64148e59246SSatish Balay col = data + idxn[j]%bs; 64298dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 643d6de1c52SSatish Balay } 644d6de1c52SSatish Balay } 645d6de1c52SSatish Balay } 646d64ed03dSBarry Smith } else { 647a8c6a408SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 648d6de1c52SSatish Balay } 649d6de1c52SSatish Balay } 6503a40ed3dSBarry Smith PetscFunctionReturn(0); 651d6de1c52SSatish Balay } 652d6de1c52SSatish Balay 6535615d1e5SSatish Balay #undef __FUNC__ 6545615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ" 655ec1ea8d8SLois Curfman McInnes int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 656d6de1c52SSatish Balay { 657d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 658d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 659acdf5bf4SSatish Balay int ierr, i,bs2=baij->bs2; 660d6de1c52SSatish Balay double sum = 0.0; 661d6de1c52SSatish Balay Scalar *v; 662d6de1c52SSatish Balay 663d64ed03dSBarry Smith PetscFunctionBegin; 664d6de1c52SSatish Balay if (baij->size == 1) { 665d6de1c52SSatish Balay ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 666d6de1c52SSatish Balay } else { 667d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 668d6de1c52SSatish Balay v = amat->a; 669d6de1c52SSatish Balay for (i=0; i<amat->nz*bs2; i++ ) { 6703a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX) 671e20fef11SSatish Balay sum += PetscReal(PetscConj(*v)*(*v)); v++; 672d6de1c52SSatish Balay #else 673d6de1c52SSatish Balay sum += (*v)*(*v); v++; 674d6de1c52SSatish Balay #endif 675d6de1c52SSatish Balay } 676d6de1c52SSatish Balay v = bmat->a; 677d6de1c52SSatish Balay for (i=0; i<bmat->nz*bs2; i++ ) { 6783a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX) 679e20fef11SSatish Balay sum += PetscReal(PetscConj(*v)*(*v)); v++; 680d6de1c52SSatish Balay #else 681d6de1c52SSatish Balay sum += (*v)*(*v); v++; 682d6de1c52SSatish Balay #endif 683d6de1c52SSatish Balay } 684ca161407SBarry Smith ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 685d6de1c52SSatish Balay *norm = sqrt(*norm); 686d64ed03dSBarry Smith } else { 687e3372554SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 688d6de1c52SSatish Balay } 689d64ed03dSBarry Smith } 6903a40ed3dSBarry Smith PetscFunctionReturn(0); 691d6de1c52SSatish Balay } 69257b952d6SSatish Balay 6935615d1e5SSatish Balay #undef __FUNC__ 6945615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 695ec1ea8d8SLois Curfman McInnes int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 69657b952d6SSatish Balay { 69757b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 69857b952d6SSatish Balay MPI_Comm comm = mat->comm; 69957b952d6SSatish Balay int size = baij->size, *owners = baij->rowners,bs=baij->bs; 70057b952d6SSatish Balay int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 70157b952d6SSatish Balay MPI_Request *send_waits,*recv_waits; 70257b952d6SSatish Balay int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 70357b952d6SSatish Balay InsertMode addv; 70457b952d6SSatish Balay Scalar *rvalues,*svalues; 70557b952d6SSatish Balay 706d64ed03dSBarry Smith PetscFunctionBegin; 707570da906SBarry Smith if (baij->donotstash) { 708570da906SBarry Smith baij->svalues = 0; baij->rvalues = 0; 709570da906SBarry Smith baij->nsends = 0; baij->nrecvs = 0; 710570da906SBarry Smith baij->send_waits = 0; baij->recv_waits = 0; 711570da906SBarry Smith baij->rmax = 0; 712570da906SBarry Smith PetscFunctionReturn(0); 713570da906SBarry Smith } 714570da906SBarry Smith 71557b952d6SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 716ca161407SBarry Smith ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 71757b952d6SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 718a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 71957b952d6SSatish Balay } 720e0fa3b82SLois Curfman McInnes mat->insertmode = addv; /* in case this processor had no cache */ 72157b952d6SSatish Balay 72257b952d6SSatish Balay /* first count number of contributors to each processor */ 72357b952d6SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 72457b952d6SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 72557b952d6SSatish Balay owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 72657b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 72757b952d6SSatish Balay idx = baij->stash.idx[i]; 72857b952d6SSatish Balay for ( j=0; j<size; j++ ) { 72957b952d6SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 73057b952d6SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; break; 73157b952d6SSatish Balay } 73257b952d6SSatish Balay } 73357b952d6SSatish Balay } 73457b952d6SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 73557b952d6SSatish Balay 73657b952d6SSatish Balay /* inform other processors of number of messages and max length*/ 73757b952d6SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 738ca161407SBarry Smith ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 73957b952d6SSatish Balay nreceives = work[rank]; 740ca161407SBarry Smith ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 74157b952d6SSatish Balay nmax = work[rank]; 74257b952d6SSatish Balay PetscFree(work); 74357b952d6SSatish Balay 74457b952d6SSatish Balay /* post receives: 74557b952d6SSatish Balay 1) each message will consist of ordered pairs 74657b952d6SSatish Balay (global index,value) we store the global index as a double 74757b952d6SSatish Balay to simplify the message passing. 74857b952d6SSatish Balay 2) since we don't know how long each individual message is we 74957b952d6SSatish Balay allocate the largest needed buffer for each receive. Potentially 75057b952d6SSatish Balay this is a lot of wasted space. 75157b952d6SSatish Balay 75257b952d6SSatish Balay 75357b952d6SSatish Balay This could be done better. 75457b952d6SSatish Balay */ 755f8abc2e8SBarry Smith rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 756f8abc2e8SBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 75757b952d6SSatish Balay for ( i=0; i<nreceives; i++ ) { 758ca161407SBarry Smith ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 75957b952d6SSatish Balay } 76057b952d6SSatish Balay 76157b952d6SSatish Balay /* do sends: 76257b952d6SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 76357b952d6SSatish Balay the ith processor 76457b952d6SSatish Balay */ 76557b952d6SSatish Balay svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 766d64ed03dSBarry Smith send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 76757b952d6SSatish Balay starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 76857b952d6SSatish Balay starts[0] = 0; 76957b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 77057b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 77157b952d6SSatish Balay svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 77257b952d6SSatish Balay svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 77357b952d6SSatish Balay svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 77457b952d6SSatish Balay } 77557b952d6SSatish Balay PetscFree(owner); 77657b952d6SSatish Balay starts[0] = 0; 77757b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 77857b952d6SSatish Balay count = 0; 77957b952d6SSatish Balay for ( i=0; i<size; i++ ) { 78057b952d6SSatish Balay if (procs[i]) { 781ca161407SBarry Smith ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 78257b952d6SSatish Balay } 78357b952d6SSatish Balay } 78457b952d6SSatish Balay PetscFree(starts); PetscFree(nprocs); 78557b952d6SSatish Balay 78657b952d6SSatish Balay /* Free cache space */ 78710a665d1SBarry Smith PLogInfo(baij->A,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n); 78857b952d6SSatish Balay ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 78957b952d6SSatish Balay 79057b952d6SSatish Balay baij->svalues = svalues; baij->rvalues = rvalues; 79157b952d6SSatish Balay baij->nsends = nsends; baij->nrecvs = nreceives; 79257b952d6SSatish Balay baij->send_waits = send_waits; baij->recv_waits = recv_waits; 79357b952d6SSatish Balay baij->rmax = nmax; 79457b952d6SSatish Balay 7953a40ed3dSBarry Smith PetscFunctionReturn(0); 79657b952d6SSatish Balay } 797bd7f49f5SSatish Balay 798fef45726SSatish Balay /* 799fef45726SSatish Balay Creates the hash table, and sets the table 800fef45726SSatish Balay This table is created only once. 801fef45726SSatish Balay If new entried need to be added to the matrix 802fef45726SSatish Balay then the hash table has to be destroyed and 803fef45726SSatish Balay recreated. 804fef45726SSatish Balay */ 805fef45726SSatish Balay #undef __FUNC__ 806fef45726SSatish Balay #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 807d0a41580SSatish Balay int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor) 808596b8d2eSBarry Smith { 809596b8d2eSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 810596b8d2eSBarry Smith Mat A = baij->A, B=baij->B; 811596b8d2eSBarry Smith Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 8120bdbc534SSatish Balay int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 8134a15367fSSatish Balay int size,bs2=baij->bs2,rstart=baij->rstart; 814187ce0cbSSatish Balay int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 815fef45726SSatish Balay int *HT,key; 8160bdbc534SSatish Balay Scalar **HD; 817c2760754SSatish Balay double tmp; 8184a15367fSSatish Balay #if defined(USE_PETSC_BOPT_g) 8194a15367fSSatish Balay int ct=0,max=0; 8204a15367fSSatish Balay #endif 821fef45726SSatish Balay 822d64ed03dSBarry Smith PetscFunctionBegin; 8230bdbc534SSatish Balay baij->ht_size=(int)(factor*nz); 8240bdbc534SSatish Balay size = baij->ht_size; 825fef45726SSatish Balay 8260bdbc534SSatish Balay if (baij->ht) { 8270bdbc534SSatish Balay PetscFunctionReturn(0); 828596b8d2eSBarry Smith } 8290bdbc534SSatish Balay 830fef45726SSatish Balay /* Allocate Memory for Hash Table */ 831b9e4cc15SSatish Balay baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd); 832b9e4cc15SSatish Balay baij->ht = (int*)(baij->hd + size); 833b9e4cc15SSatish Balay HD = baij->hd; 834a07cd24cSSatish Balay HT = baij->ht; 835b9e4cc15SSatish Balay 836b9e4cc15SSatish Balay 837c2760754SSatish Balay PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*))); 8380bdbc534SSatish Balay 839596b8d2eSBarry Smith 840596b8d2eSBarry Smith /* Loop Over A */ 8410bdbc534SSatish Balay for ( i=0; i<a->mbs; i++ ) { 842596b8d2eSBarry Smith for ( j=ai[i]; j<ai[i+1]; j++ ) { 8430bdbc534SSatish Balay row = i+rstart; 8440bdbc534SSatish Balay col = aj[j]+cstart; 845596b8d2eSBarry Smith 846187ce0cbSSatish Balay key = row*Nbs + col + 1; 847c2760754SSatish Balay h1 = HASH(size,key,tmp); 8480bdbc534SSatish Balay for ( k=0; k<size; k++ ){ 8490bdbc534SSatish Balay if (HT[(h1+k)%size] == 0.0) { 8500bdbc534SSatish Balay HT[(h1+k)%size] = key; 8510bdbc534SSatish Balay HD[(h1+k)%size] = a->a + j*bs2; 852596b8d2eSBarry Smith break; 853187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 854187ce0cbSSatish Balay } else { 855187ce0cbSSatish Balay ct++; 856187ce0cbSSatish Balay #endif 857596b8d2eSBarry Smith } 858187ce0cbSSatish Balay } 859187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 860187ce0cbSSatish Balay if (k> max) max = k; 861187ce0cbSSatish Balay #endif 862596b8d2eSBarry Smith } 863596b8d2eSBarry Smith } 864596b8d2eSBarry Smith /* Loop Over B */ 8650bdbc534SSatish Balay for ( i=0; i<b->mbs; i++ ) { 866596b8d2eSBarry Smith for ( j=bi[i]; j<bi[i+1]; j++ ) { 8670bdbc534SSatish Balay row = i+rstart; 8680bdbc534SSatish Balay col = garray[bj[j]]; 869187ce0cbSSatish Balay key = row*Nbs + col + 1; 870c2760754SSatish Balay h1 = HASH(size,key,tmp); 8710bdbc534SSatish Balay for ( k=0; k<size; k++ ){ 8720bdbc534SSatish Balay if (HT[(h1+k)%size] == 0.0) { 8730bdbc534SSatish Balay HT[(h1+k)%size] = key; 8740bdbc534SSatish Balay HD[(h1+k)%size] = b->a + j*bs2; 875596b8d2eSBarry Smith break; 876187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 877187ce0cbSSatish Balay } else { 878187ce0cbSSatish Balay ct++; 879187ce0cbSSatish Balay #endif 880596b8d2eSBarry Smith } 881187ce0cbSSatish Balay } 882187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 883187ce0cbSSatish Balay if (k> max) max = k; 884187ce0cbSSatish Balay #endif 885596b8d2eSBarry Smith } 886596b8d2eSBarry Smith } 887596b8d2eSBarry Smith 888596b8d2eSBarry Smith /* Print Summary */ 889187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 890c2760754SSatish Balay for ( i=0,j=0; i<size; i++) 891596b8d2eSBarry Smith if (HT[i]) {j++;} 892187ce0cbSSatish Balay PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n", 893187ce0cbSSatish Balay (j== 0)? 0.0:((double)(ct+j))/j,max); 894187ce0cbSSatish Balay #endif 8953a40ed3dSBarry Smith PetscFunctionReturn(0); 896596b8d2eSBarry Smith } 89757b952d6SSatish Balay 8985615d1e5SSatish Balay #undef __FUNC__ 8995615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 900ec1ea8d8SLois Curfman McInnes int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 90157b952d6SSatish Balay { 90257b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 90357b952d6SSatish Balay MPI_Status *send_status,recv_status; 90457b952d6SSatish Balay int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 905b7029e64SSatish Balay int bs=baij->bs,row,col,other_disassembled; 90657b952d6SSatish Balay Scalar *values,val; 907e0fa3b82SLois Curfman McInnes InsertMode addv = mat->insertmode; 90857b952d6SSatish Balay 909d64ed03dSBarry Smith PetscFunctionBegin; 91057b952d6SSatish Balay /* wait on receives */ 91157b952d6SSatish Balay while (count) { 912ca161407SBarry Smith ierr = MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 91357b952d6SSatish Balay /* unpack receives into our local space */ 91457b952d6SSatish Balay values = baij->rvalues + 3*imdex*baij->rmax; 915ca161407SBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 91657b952d6SSatish Balay n = n/3; 91757b952d6SSatish Balay for ( i=0; i<n; i++ ) { 91857b952d6SSatish Balay row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 91957b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 92057b952d6SSatish Balay val = values[3*i+2]; 92157b952d6SSatish Balay if (col >= baij->cstart*bs && col < baij->cend*bs) { 92257b952d6SSatish Balay col -= baij->cstart*bs; 9236fd7127cSSatish Balay ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 924d64ed03dSBarry Smith } else { 92557b952d6SSatish Balay if (mat->was_assembled) { 926905e6a2fSBarry Smith if (!baij->colmap) { 927905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr); 928905e6a2fSBarry Smith } 92948e59246SSatish Balay #if defined (USE_CTABLE) 93048e59246SSatish Balay col = TableFind( baij->colmap, col/bs + 1 ) - 1 + col%bs; 93148e59246SSatish Balay #else 932a5eb4965SSatish Balay col = (baij->colmap[col/bs]) - 1 + col%bs; 93348e59246SSatish Balay #endif 93457b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 93557b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 93657b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 93757b952d6SSatish Balay } 93857b952d6SSatish Balay } 9396fd7127cSSatish Balay ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 94057b952d6SSatish Balay } 94157b952d6SSatish Balay } 94257b952d6SSatish Balay count--; 94357b952d6SSatish Balay } 944570da906SBarry Smith if (baij->recv_waits) PetscFree(baij->recv_waits); 945570da906SBarry Smith if (baij->rvalues) PetscFree(baij->rvalues); 94657b952d6SSatish Balay 94757b952d6SSatish Balay /* wait on sends */ 94857b952d6SSatish Balay if (baij->nsends) { 949d64ed03dSBarry Smith send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 950ca161407SBarry Smith ierr = MPI_Waitall(baij->nsends,baij->send_waits,send_status);CHKERRQ(ierr); 95157b952d6SSatish Balay PetscFree(send_status); 95257b952d6SSatish Balay } 953570da906SBarry Smith if (baij->send_waits) PetscFree(baij->send_waits); 954570da906SBarry Smith if (baij->svalues) PetscFree(baij->svalues); 95557b952d6SSatish Balay 95657b952d6SSatish Balay ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 95757b952d6SSatish Balay ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 95857b952d6SSatish Balay 95957b952d6SSatish Balay /* determine if any processor has disassembled, if so we must 96057b952d6SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 9616e713f22SBarry Smith /* 9626e713f22SBarry Smith if nonzero structure of submatrix B cannot change then we know that 9636e713f22SBarry Smith no processor disassembled thus we can skip this stuff 9646e713f22SBarry Smith */ 9656e713f22SBarry Smith if (!((Mat_SeqBAIJ*) baij->B->data)->nonew) { 966ca161407SBarry Smith ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 96757b952d6SSatish Balay if (mat->was_assembled && !other_disassembled) { 96857b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 96957b952d6SSatish Balay } 9706e713f22SBarry Smith } 97157b952d6SSatish Balay 9726d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 97357b952d6SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 97457b952d6SSatish Balay } 97557b952d6SSatish Balay ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 97657b952d6SSatish Balay ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 97757b952d6SSatish Balay 978187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 979187ce0cbSSatish Balay if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 980187ce0cbSSatish Balay PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n", 981187ce0cbSSatish Balay ((double)baij->ht_total_ct)/baij->ht_insert_ct); 982187ce0cbSSatish Balay baij->ht_total_ct = 0; 983187ce0cbSSatish Balay baij->ht_insert_ct = 0; 984187ce0cbSSatish Balay } 985187ce0cbSSatish Balay #endif 986133cdb44SSatish Balay if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 987133cdb44SSatish Balay ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact); CHKERRQ(ierr); 988f830108cSBarry Smith mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 989f830108cSBarry Smith mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 990bd7f49f5SSatish Balay } 991187ce0cbSSatish Balay 99257b952d6SSatish Balay if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 9933a40ed3dSBarry Smith PetscFunctionReturn(0); 99457b952d6SSatish Balay } 99557b952d6SSatish Balay 9965615d1e5SSatish Balay #undef __FUNC__ 9975615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary" 99857b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 99957b952d6SSatish Balay { 100057b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 100157b952d6SSatish Balay int ierr; 100257b952d6SSatish Balay 1003d64ed03dSBarry Smith PetscFunctionBegin; 100457b952d6SSatish Balay if (baij->size == 1) { 100557b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 1006a8c6a408SBarry Smith } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 10073a40ed3dSBarry Smith PetscFunctionReturn(0); 100857b952d6SSatish Balay } 100957b952d6SSatish Balay 10105615d1e5SSatish Balay #undef __FUNC__ 10117b2a1423SBarry Smith #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 10127b2a1423SBarry Smith static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 101357b952d6SSatish Balay { 101457b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 101577ed5343SBarry Smith int ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank; 101657b952d6SSatish Balay FILE *fd; 101757b952d6SSatish Balay ViewerType vtype; 101857b952d6SSatish Balay 1019d64ed03dSBarry Smith PetscFunctionBegin; 102057b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 10213f1db9ecSBarry Smith if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 102257b952d6SSatish Balay ierr = ViewerGetFormat(viewer,&format); 1023639f9d9dSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 10244e220ebcSLois Curfman McInnes MatInfo info; 102557b952d6SSatish Balay MPI_Comm_rank(mat->comm,&rank); 102657b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 10274e220ebcSLois Curfman McInnes ierr = MatGetInfo(mat,MAT_LOCAL,&info); 102857b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 102957b952d6SSatish Balay fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 10304e220ebcSLois Curfman McInnes rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 10314e220ebcSLois Curfman McInnes baij->bs,(int)info.memory); 10324e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 10334e220ebcSLois Curfman McInnes fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 10344e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 10354e220ebcSLois Curfman McInnes fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 103657b952d6SSatish Balay fflush(fd); 103757b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 103857b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 10393a40ed3dSBarry Smith PetscFunctionReturn(0); 1040d64ed03dSBarry Smith } else if (format == VIEWER_FORMAT_ASCII_INFO) { 1041bcc3fcf6SBarry Smith PetscPrintf(mat->comm," block size is %d\n",bs); 10423a40ed3dSBarry Smith PetscFunctionReturn(0); 104357b952d6SSatish Balay } 104457b952d6SSatish Balay } 104557b952d6SSatish Balay 10463f1db9ecSBarry Smith if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 104757b952d6SSatish Balay Draw draw; 104857b952d6SSatish Balay PetscTruth isnull; 104977ed5343SBarry Smith ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr); 10503a40ed3dSBarry Smith ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 105157b952d6SSatish Balay } 105257b952d6SSatish Balay 105357b952d6SSatish Balay if (size == 1) { 105457b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 1055d64ed03dSBarry Smith } else { 105657b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 105757b952d6SSatish Balay Mat A; 105857b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 105940011551SBarry Smith int M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals; 106057b952d6SSatish Balay int mbs = baij->mbs; 106157b952d6SSatish Balay Scalar *a; 106257b952d6SSatish Balay 106357b952d6SSatish Balay if (!rank) { 106455843e3eSBarry Smith ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1065d64ed03dSBarry Smith } else { 106655843e3eSBarry Smith ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 106757b952d6SSatish Balay } 106857b952d6SSatish Balay PLogObjectParent(mat,A); 106957b952d6SSatish Balay 107057b952d6SSatish Balay /* copy over the A part */ 107157b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 107257b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 107357b952d6SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 107457b952d6SSatish Balay 107557b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 107657b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 107757b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 107857b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 107957b952d6SSatish Balay col = (baij->cstart+aj[j])*bs; 108057b952d6SSatish Balay for (k=0; k<bs; k++ ) { 1081cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1082cee3aa6bSSatish Balay col++; a += bs; 108357b952d6SSatish Balay } 108457b952d6SSatish Balay } 108557b952d6SSatish Balay } 108657b952d6SSatish Balay /* copy over the B part */ 108757b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 108857b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 108957b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 109057b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 109157b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 109257b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 109357b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 109457b952d6SSatish Balay for (k=0; k<bs; k++ ) { 1095cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1096cee3aa6bSSatish Balay col++; a += bs; 109757b952d6SSatish Balay } 109857b952d6SSatish Balay } 109957b952d6SSatish Balay } 110057b952d6SSatish Balay PetscFree(rvals); 11016d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 11026d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 110355843e3eSBarry Smith /* 110455843e3eSBarry Smith Everyone has to call to draw the matrix since the graphics waits are 110555843e3eSBarry Smith synchronized across all processors that share the Draw object 110655843e3eSBarry Smith */ 11073f1db9ecSBarry Smith if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) { 110857b952d6SSatish Balay ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 110957b952d6SSatish Balay } 111057b952d6SSatish Balay ierr = MatDestroy(A); CHKERRQ(ierr); 111157b952d6SSatish Balay } 11123a40ed3dSBarry Smith PetscFunctionReturn(0); 111357b952d6SSatish Balay } 111457b952d6SSatish Balay 111557b952d6SSatish Balay 111657b952d6SSatish Balay 11175615d1e5SSatish Balay #undef __FUNC__ 11185615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ" 1119e1311b90SBarry Smith int MatView_MPIBAIJ(Mat mat,Viewer viewer) 112057b952d6SSatish Balay { 112157b952d6SSatish Balay int ierr; 112257b952d6SSatish Balay ViewerType vtype; 112357b952d6SSatish Balay 1124d64ed03dSBarry Smith PetscFunctionBegin; 112557b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 11263f1db9ecSBarry Smith if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) || 11277b2a1423SBarry Smith PetscTypeCompare(vtype,SOCKET_VIEWER)) { 11287b2a1423SBarry Smith ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer); CHKERRQ(ierr); 11293f1db9ecSBarry Smith } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 11303a40ed3dSBarry Smith ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 11315cd90555SBarry Smith } else { 11325cd90555SBarry Smith SETERRQ(1,1,"Viewer type not supported by PETSc object"); 113357b952d6SSatish Balay } 11343a40ed3dSBarry Smith PetscFunctionReturn(0); 113557b952d6SSatish Balay } 113657b952d6SSatish Balay 11375615d1e5SSatish Balay #undef __FUNC__ 11385615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ" 1139e1311b90SBarry Smith int MatDestroy_MPIBAIJ(Mat mat) 114079bdfe76SSatish Balay { 114179bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 114279bdfe76SSatish Balay int ierr; 114379bdfe76SSatish Balay 1144d64ed03dSBarry Smith PetscFunctionBegin; 114598dd23e9SBarry Smith if (--mat->refct > 0) PetscFunctionReturn(0); 114698dd23e9SBarry Smith 114798dd23e9SBarry Smith if (mat->mapping) { 114898dd23e9SBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 114998dd23e9SBarry Smith } 115098dd23e9SBarry Smith if (mat->bmapping) { 115198dd23e9SBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr); 115298dd23e9SBarry Smith } 115361b13de0SBarry Smith if (mat->rmap) { 115461b13de0SBarry Smith ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 115561b13de0SBarry Smith } 115661b13de0SBarry Smith if (mat->cmap) { 115761b13de0SBarry Smith ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 115861b13de0SBarry Smith } 11593a40ed3dSBarry Smith #if defined(USE_PETSC_LOG) 1160e1311b90SBarry Smith PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N); 116179bdfe76SSatish Balay #endif 116279bdfe76SSatish Balay 116383e2fdc7SBarry Smith ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 116479bdfe76SSatish Balay PetscFree(baij->rowners); 116579bdfe76SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 116679bdfe76SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 116748e59246SSatish Balay #if defined (USE_CTABLE) 116848e59246SSatish Balay if (baij->colmap) TableDelete(baij->colmap); 116948e59246SSatish Balay #else 117079bdfe76SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 117148e59246SSatish Balay #endif 117279bdfe76SSatish Balay if (baij->garray) PetscFree(baij->garray); 117379bdfe76SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 117479bdfe76SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 117579bdfe76SSatish Balay if (baij->rowvalues) PetscFree(baij->rowvalues); 117630793edcSSatish Balay if (baij->barray) PetscFree(baij->barray); 1177b9e4cc15SSatish Balay if (baij->hd) PetscFree(baij->hd); 117879bdfe76SSatish Balay PetscFree(baij); 117979bdfe76SSatish Balay PLogObjectDestroy(mat); 118079bdfe76SSatish Balay PetscHeaderDestroy(mat); 11813a40ed3dSBarry Smith PetscFunctionReturn(0); 118279bdfe76SSatish Balay } 118379bdfe76SSatish Balay 11845615d1e5SSatish Balay #undef __FUNC__ 11855615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ" 1186ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1187cee3aa6bSSatish Balay { 1188cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 118947b4a8eaSLois Curfman McInnes int ierr, nt; 1190cee3aa6bSSatish Balay 1191d64ed03dSBarry Smith PetscFunctionBegin; 1192e1311b90SBarry Smith ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 119347b4a8eaSLois Curfman McInnes if (nt != a->n) { 1194a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 119547b4a8eaSLois Curfman McInnes } 1196e1311b90SBarry Smith ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 119747b4a8eaSLois Curfman McInnes if (nt != a->m) { 1198a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 119947b4a8eaSLois Curfman McInnes } 120043a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1201f830108cSBarry Smith ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 120243a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1203f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 120443a90d84SBarry Smith ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 12053a40ed3dSBarry Smith PetscFunctionReturn(0); 1206cee3aa6bSSatish Balay } 1207cee3aa6bSSatish Balay 12085615d1e5SSatish Balay #undef __FUNC__ 12095615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ" 1210ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1211cee3aa6bSSatish Balay { 1212cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1213cee3aa6bSSatish Balay int ierr; 1214d64ed03dSBarry Smith 1215d64ed03dSBarry Smith PetscFunctionBegin; 121643a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1217f830108cSBarry Smith ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 121843a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1219f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 12203a40ed3dSBarry Smith PetscFunctionReturn(0); 1221cee3aa6bSSatish Balay } 1222cee3aa6bSSatish Balay 12235615d1e5SSatish Balay #undef __FUNC__ 12245615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ" 1225ec1ea8d8SLois Curfman McInnes int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1226cee3aa6bSSatish Balay { 1227cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1228cee3aa6bSSatish Balay int ierr; 1229cee3aa6bSSatish Balay 1230d64ed03dSBarry Smith PetscFunctionBegin; 1231cee3aa6bSSatish Balay /* do nondiagonal part */ 1232f830108cSBarry Smith ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1233cee3aa6bSSatish Balay /* send it on its way */ 1234537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1235cee3aa6bSSatish Balay /* do local part */ 1236f830108cSBarry Smith ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 1237cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1238cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 1239cee3aa6bSSatish Balay /* but is not perhaps always true. */ 1240639f9d9dSBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 12413a40ed3dSBarry Smith PetscFunctionReturn(0); 1242cee3aa6bSSatish Balay } 1243cee3aa6bSSatish Balay 12445615d1e5SSatish Balay #undef __FUNC__ 12455615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1246ec1ea8d8SLois Curfman McInnes int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1247cee3aa6bSSatish Balay { 1248cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1249cee3aa6bSSatish Balay int ierr; 1250cee3aa6bSSatish Balay 1251d64ed03dSBarry Smith PetscFunctionBegin; 1252cee3aa6bSSatish Balay /* do nondiagonal part */ 1253f830108cSBarry Smith ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1254cee3aa6bSSatish Balay /* send it on its way */ 1255537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1256cee3aa6bSSatish Balay /* do local part */ 1257f830108cSBarry Smith ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1258cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1259cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 1260cee3aa6bSSatish Balay /* but is not perhaps always true. */ 1261537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 12623a40ed3dSBarry Smith PetscFunctionReturn(0); 1263cee3aa6bSSatish Balay } 1264cee3aa6bSSatish Balay 1265cee3aa6bSSatish Balay /* 1266cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 1267cee3aa6bSSatish Balay diagonal block 1268cee3aa6bSSatish Balay */ 12695615d1e5SSatish Balay #undef __FUNC__ 12705615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1271ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1272cee3aa6bSSatish Balay { 1273cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 12743a40ed3dSBarry Smith int ierr; 1275d64ed03dSBarry Smith 1276d64ed03dSBarry Smith PetscFunctionBegin; 1277a8c6a408SBarry Smith if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 12783a40ed3dSBarry Smith ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 12793a40ed3dSBarry Smith PetscFunctionReturn(0); 1280cee3aa6bSSatish Balay } 1281cee3aa6bSSatish Balay 12825615d1e5SSatish Balay #undef __FUNC__ 12835615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ" 1284ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1285cee3aa6bSSatish Balay { 1286cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1287cee3aa6bSSatish Balay int ierr; 1288d64ed03dSBarry Smith 1289d64ed03dSBarry Smith PetscFunctionBegin; 1290cee3aa6bSSatish Balay ierr = MatScale(aa,a->A); CHKERRQ(ierr); 1291cee3aa6bSSatish Balay ierr = MatScale(aa,a->B); CHKERRQ(ierr); 12923a40ed3dSBarry Smith PetscFunctionReturn(0); 1293cee3aa6bSSatish Balay } 1294026e39d0SSatish Balay 12955615d1e5SSatish Balay #undef __FUNC__ 12965615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ" 1297ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 129857b952d6SSatish Balay { 129957b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1300d64ed03dSBarry Smith 1301d64ed03dSBarry Smith PetscFunctionBegin; 1302bd7f49f5SSatish Balay if (m) *m = mat->M; 1303bd7f49f5SSatish Balay if (n) *n = mat->N; 13043a40ed3dSBarry Smith PetscFunctionReturn(0); 130557b952d6SSatish Balay } 130657b952d6SSatish Balay 13075615d1e5SSatish Balay #undef __FUNC__ 13085615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1309ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 131057b952d6SSatish Balay { 131157b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1312d64ed03dSBarry Smith 1313d64ed03dSBarry Smith PetscFunctionBegin; 1314f830108cSBarry Smith *m = mat->m; *n = mat->n; 13153a40ed3dSBarry Smith PetscFunctionReturn(0); 131657b952d6SSatish Balay } 131757b952d6SSatish Balay 13185615d1e5SSatish Balay #undef __FUNC__ 13195615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1320ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 132157b952d6SSatish Balay { 132257b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1323d64ed03dSBarry Smith 1324d64ed03dSBarry Smith PetscFunctionBegin; 132557b952d6SSatish Balay *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 13263a40ed3dSBarry Smith PetscFunctionReturn(0); 132757b952d6SSatish Balay } 132857b952d6SSatish Balay 1329acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1330acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1331acdf5bf4SSatish Balay 13325615d1e5SSatish Balay #undef __FUNC__ 13335615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ" 1334acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1335acdf5bf4SSatish Balay { 1336acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1337acdf5bf4SSatish Balay Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1338acdf5bf4SSatish Balay int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1339d9d09a02SSatish Balay int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1340d9d09a02SSatish Balay int *cmap, *idx_p,cstart = mat->cstart; 1341acdf5bf4SSatish Balay 1342d64ed03dSBarry Smith PetscFunctionBegin; 1343a8c6a408SBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1344acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 1345acdf5bf4SSatish Balay 1346acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 1347acdf5bf4SSatish Balay /* 1348acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 1349acdf5bf4SSatish Balay */ 1350acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1351bd16c2feSSatish Balay int max = 1,mbs = mat->mbs,tmp; 1352bd16c2feSSatish Balay for ( i=0; i<mbs; i++ ) { 1353acdf5bf4SSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1354acdf5bf4SSatish Balay if (max < tmp) { max = tmp; } 1355acdf5bf4SSatish Balay } 1356acdf5bf4SSatish Balay mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 1357acdf5bf4SSatish Balay CHKPTRQ(mat->rowvalues); 1358acdf5bf4SSatish Balay mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1359acdf5bf4SSatish Balay } 1360acdf5bf4SSatish Balay 1361a8c6a408SBarry Smith if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1362d9d09a02SSatish Balay lrow = row - brstart; 1363acdf5bf4SSatish Balay 1364acdf5bf4SSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1365acdf5bf4SSatish Balay if (!v) {pvA = 0; pvB = 0;} 1366acdf5bf4SSatish Balay if (!idx) {pcA = 0; if (!v) pcB = 0;} 1367f830108cSBarry Smith ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1368f830108cSBarry Smith ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1369acdf5bf4SSatish Balay nztot = nzA + nzB; 1370acdf5bf4SSatish Balay 1371acdf5bf4SSatish Balay cmap = mat->garray; 1372acdf5bf4SSatish Balay if (v || idx) { 1373acdf5bf4SSatish Balay if (nztot) { 1374acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 1375acdf5bf4SSatish Balay int imark = -1; 1376acdf5bf4SSatish Balay if (v) { 1377acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 1378acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 1379d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1380acdf5bf4SSatish Balay else break; 1381acdf5bf4SSatish Balay } 1382acdf5bf4SSatish Balay imark = i; 1383acdf5bf4SSatish Balay for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1384acdf5bf4SSatish Balay for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1385acdf5bf4SSatish Balay } 1386acdf5bf4SSatish Balay if (idx) { 1387acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 1388acdf5bf4SSatish Balay if (imark > -1) { 1389acdf5bf4SSatish Balay for ( i=0; i<imark; i++ ) { 1390bd16c2feSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1391acdf5bf4SSatish Balay } 1392acdf5bf4SSatish Balay } else { 1393acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 1394d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) 1395d9d09a02SSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1396acdf5bf4SSatish Balay else break; 1397acdf5bf4SSatish Balay } 1398acdf5bf4SSatish Balay imark = i; 1399acdf5bf4SSatish Balay } 1400d9d09a02SSatish Balay for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1401d9d09a02SSatish Balay for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1402acdf5bf4SSatish Balay } 1403d64ed03dSBarry Smith } else { 1404d212a18eSSatish Balay if (idx) *idx = 0; 1405d212a18eSSatish Balay if (v) *v = 0; 1406d212a18eSSatish Balay } 1407acdf5bf4SSatish Balay } 1408acdf5bf4SSatish Balay *nz = nztot; 1409f830108cSBarry Smith ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1410f830108cSBarry Smith ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 14113a40ed3dSBarry Smith PetscFunctionReturn(0); 1412acdf5bf4SSatish Balay } 1413acdf5bf4SSatish Balay 14145615d1e5SSatish Balay #undef __FUNC__ 14155615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1416acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1417acdf5bf4SSatish Balay { 1418acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1419d64ed03dSBarry Smith 1420d64ed03dSBarry Smith PetscFunctionBegin; 1421acdf5bf4SSatish Balay if (baij->getrowactive == PETSC_FALSE) { 1422a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1423acdf5bf4SSatish Balay } 1424acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 14253a40ed3dSBarry Smith PetscFunctionReturn(0); 1426acdf5bf4SSatish Balay } 1427acdf5bf4SSatish Balay 14285615d1e5SSatish Balay #undef __FUNC__ 14295615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1430ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 14315a838052SSatish Balay { 14325a838052SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1433d64ed03dSBarry Smith 1434d64ed03dSBarry Smith PetscFunctionBegin; 14355a838052SSatish Balay *bs = baij->bs; 14363a40ed3dSBarry Smith PetscFunctionReturn(0); 14375a838052SSatish Balay } 14385a838052SSatish Balay 14395615d1e5SSatish Balay #undef __FUNC__ 14405615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1441ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A) 144258667388SSatish Balay { 144358667388SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 144458667388SSatish Balay int ierr; 1445d64ed03dSBarry Smith 1446d64ed03dSBarry Smith PetscFunctionBegin; 144758667388SSatish Balay ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 144858667388SSatish Balay ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 14493a40ed3dSBarry Smith PetscFunctionReturn(0); 145058667388SSatish Balay } 14510ac07820SSatish Balay 14525615d1e5SSatish Balay #undef __FUNC__ 14535615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ" 1454ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 14550ac07820SSatish Balay { 14564e220ebcSLois Curfman McInnes Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 14574e220ebcSLois Curfman McInnes Mat A = a->A, B = a->B; 14587d57db60SLois Curfman McInnes int ierr; 14597d57db60SLois Curfman McInnes double isend[5], irecv[5]; 14600ac07820SSatish Balay 1461d64ed03dSBarry Smith PetscFunctionBegin; 14624e220ebcSLois Curfman McInnes info->block_size = (double)a->bs; 14634e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 14640e4b21beSBarry Smith isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1465de87f314SBarry Smith isend[3] = info->memory; isend[4] = info->mallocs; 14664e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 14670e4b21beSBarry Smith isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1468de87f314SBarry Smith isend[3] += info->memory; isend[4] += info->mallocs; 14690ac07820SSatish Balay if (flag == MAT_LOCAL) { 14704e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 14714e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 14724e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 14734e220ebcSLois Curfman McInnes info->memory = isend[3]; 14744e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 14750ac07820SSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 1476f7cdd7c9SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 14774e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 14784e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 14794e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 14804e220ebcSLois Curfman McInnes info->memory = irecv[3]; 14814e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 14820ac07820SSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 1483f7cdd7c9SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 14844e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 14854e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 14864e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 14874e220ebcSLois Curfman McInnes info->memory = irecv[3]; 14884e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 14890ac07820SSatish Balay } 14905a5d4f66SBarry Smith info->rows_global = (double)a->M; 14915a5d4f66SBarry Smith info->columns_global = (double)a->N; 14925a5d4f66SBarry Smith info->rows_local = (double)a->m; 14935a5d4f66SBarry Smith info->columns_local = (double)a->N; 14944e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 14954e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 14964e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 14973a40ed3dSBarry Smith PetscFunctionReturn(0); 14980ac07820SSatish Balay } 14990ac07820SSatish Balay 15005615d1e5SSatish Balay #undef __FUNC__ 15015615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ" 1502ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op) 150358667388SSatish Balay { 150458667388SSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 150558667388SSatish Balay 1506d64ed03dSBarry Smith PetscFunctionBegin; 150758667388SSatish Balay if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 150858667388SSatish Balay op == MAT_YES_NEW_NONZERO_LOCATIONS || 15096da5968aSLois Curfman McInnes op == MAT_COLUMNS_UNSORTED || 1510c2653b3dSLois Curfman McInnes op == MAT_COLUMNS_SORTED || 1511*4787f768SSatish Balay op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1512*4787f768SSatish Balay op == MAT_NEW_NONZERO_LOCATION_ERR) { 1513b1fbbac0SLois Curfman McInnes MatSetOption(a->A,op); 1514b1fbbac0SLois Curfman McInnes MatSetOption(a->B,op); 1515b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROW_ORIENTED) { 1516aeafbbfcSLois Curfman McInnes a->roworiented = 1; 151758667388SSatish Balay MatSetOption(a->A,op); 151858667388SSatish Balay MatSetOption(a->B,op); 1519b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROWS_SORTED || 15206da5968aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 152158667388SSatish Balay op == MAT_SYMMETRIC || 152258667388SSatish Balay op == MAT_STRUCTURALLY_SYMMETRIC || 1523b51ba29fSSatish Balay op == MAT_YES_NEW_DIAGONALS || 1524b51ba29fSSatish Balay op == MAT_USE_HASH_TABLE) 152558667388SSatish Balay PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 152658667388SSatish Balay else if (op == MAT_COLUMN_ORIENTED) { 152758667388SSatish Balay a->roworiented = 0; 152858667388SSatish Balay MatSetOption(a->A,op); 152958667388SSatish Balay MatSetOption(a->B,op); 15302b362799SSatish Balay } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 153190f02eecSBarry Smith a->donotstash = 1; 1532d64ed03dSBarry Smith } else if (op == MAT_NO_NEW_DIAGONALS) { 1533d64ed03dSBarry Smith SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1534133cdb44SSatish Balay } else if (op == MAT_USE_HASH_TABLE) { 1535133cdb44SSatish Balay a->ht_flag = 1; 1536d64ed03dSBarry Smith } else { 1537d64ed03dSBarry Smith SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1538d64ed03dSBarry Smith } 15393a40ed3dSBarry Smith PetscFunctionReturn(0); 154058667388SSatish Balay } 154158667388SSatish Balay 15425615d1e5SSatish Balay #undef __FUNC__ 15435615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ(" 1544ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 15450ac07820SSatish Balay { 15460ac07820SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 15470ac07820SSatish Balay Mat_SeqBAIJ *Aloc; 15480ac07820SSatish Balay Mat B; 154940011551SBarry Smith int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 15500ac07820SSatish Balay int bs=baij->bs,mbs=baij->mbs; 15510ac07820SSatish Balay Scalar *a; 15520ac07820SSatish Balay 1553d64ed03dSBarry Smith PetscFunctionBegin; 1554a8c6a408SBarry Smith if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 15550ac07820SSatish Balay ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 15560ac07820SSatish Balay CHKERRQ(ierr); 15570ac07820SSatish Balay 15580ac07820SSatish Balay /* copy over the A part */ 15590ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 15600ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 15610ac07820SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 15620ac07820SSatish Balay 15630ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 15640ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 15650ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 15660ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 15670ac07820SSatish Balay col = (baij->cstart+aj[j])*bs; 15680ac07820SSatish Balay for (k=0; k<bs; k++ ) { 15690ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 15700ac07820SSatish Balay col++; a += bs; 15710ac07820SSatish Balay } 15720ac07820SSatish Balay } 15730ac07820SSatish Balay } 15740ac07820SSatish Balay /* copy over the B part */ 15750ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 15760ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 15770ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 15780ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 15790ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 15800ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 15810ac07820SSatish Balay col = baij->garray[aj[j]]*bs; 15820ac07820SSatish Balay for (k=0; k<bs; k++ ) { 15830ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 15840ac07820SSatish Balay col++; a += bs; 15850ac07820SSatish Balay } 15860ac07820SSatish Balay } 15870ac07820SSatish Balay } 15880ac07820SSatish Balay PetscFree(rvals); 15890ac07820SSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 15900ac07820SSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 15910ac07820SSatish Balay 15920ac07820SSatish Balay if (matout != PETSC_NULL) { 15930ac07820SSatish Balay *matout = B; 15940ac07820SSatish Balay } else { 1595f830108cSBarry Smith PetscOps *Abops; 1596cc2dc46cSBarry Smith MatOps Aops; 1597f830108cSBarry Smith 15980ac07820SSatish Balay /* This isn't really an in-place transpose .... but free data structures from baij */ 15990ac07820SSatish Balay PetscFree(baij->rowners); 16000ac07820SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 16010ac07820SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1602b1fc9764SSatish Balay #if defined (USE_CTABLE) 1603b1fc9764SSatish Balay if (baij->colmap) TableDelete(baij->colmap); 1604b1fc9764SSatish Balay #else 16050ac07820SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 1606b1fc9764SSatish Balay #endif 16070ac07820SSatish Balay if (baij->garray) PetscFree(baij->garray); 16080ac07820SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 16090ac07820SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 16100ac07820SSatish Balay PetscFree(baij); 1611f830108cSBarry Smith 1612f830108cSBarry Smith /* 1613f830108cSBarry Smith This is horrible, horrible code. We need to keep the 1614f830108cSBarry Smith A pointers for the bops and ops but copy everything 1615f830108cSBarry Smith else from C. 1616f830108cSBarry Smith */ 1617f830108cSBarry Smith Abops = A->bops; 1618f830108cSBarry Smith Aops = A->ops; 1619f09e8eb9SSatish Balay PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1620f830108cSBarry Smith A->bops = Abops; 1621f830108cSBarry Smith A->ops = Aops; 1622f830108cSBarry Smith 16230ac07820SSatish Balay PetscHeaderDestroy(B); 16240ac07820SSatish Balay } 16253a40ed3dSBarry Smith PetscFunctionReturn(0); 16260ac07820SSatish Balay } 16270e95ebc0SSatish Balay 16285615d1e5SSatish Balay #undef __FUNC__ 16295615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 16300e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 16310e95ebc0SSatish Balay { 16320e95ebc0SSatish Balay Mat a = ((Mat_MPIBAIJ *) A->data)->A; 16330e95ebc0SSatish Balay Mat b = ((Mat_MPIBAIJ *) A->data)->B; 16340e95ebc0SSatish Balay int ierr,s1,s2,s3; 16350e95ebc0SSatish Balay 1636d64ed03dSBarry Smith PetscFunctionBegin; 16370e95ebc0SSatish Balay if (ll) { 16380e95ebc0SSatish Balay ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 16390e95ebc0SSatish Balay ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1640a8c6a408SBarry Smith if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes"); 16410e95ebc0SSatish Balay ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 16420e95ebc0SSatish Balay ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 16430e95ebc0SSatish Balay } 1644a8c6a408SBarry Smith if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector"); 16453a40ed3dSBarry Smith PetscFunctionReturn(0); 16460e95ebc0SSatish Balay } 16470e95ebc0SSatish Balay 16485615d1e5SSatish Balay #undef __FUNC__ 16495615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ" 1650ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 16510ac07820SSatish Balay { 16520ac07820SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 16530ac07820SSatish Balay int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1654a07cd24cSSatish Balay int *procs,*nprocs,j,found,idx,nsends,*work,row; 16550ac07820SSatish Balay int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 16560ac07820SSatish Balay int *rvalues,tag = A->tag,count,base,slen,n,*source; 1657a07cd24cSSatish Balay int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 16580ac07820SSatish Balay MPI_Comm comm = A->comm; 16590ac07820SSatish Balay MPI_Request *send_waits,*recv_waits; 16600ac07820SSatish Balay MPI_Status recv_status,*send_status; 16610ac07820SSatish Balay IS istmp; 16620ac07820SSatish Balay 1663d64ed03dSBarry Smith PetscFunctionBegin; 16640ac07820SSatish Balay ierr = ISGetSize(is,&N); CHKERRQ(ierr); 16650ac07820SSatish Balay ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 16660ac07820SSatish Balay 16670ac07820SSatish Balay /* first count number of contributors to each processor */ 16680ac07820SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 16690ac07820SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 16700ac07820SSatish Balay owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 16710ac07820SSatish Balay for ( i=0; i<N; i++ ) { 16720ac07820SSatish Balay idx = rows[i]; 16730ac07820SSatish Balay found = 0; 16740ac07820SSatish Balay for ( j=0; j<size; j++ ) { 16750ac07820SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 16760ac07820SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 16770ac07820SSatish Balay } 16780ac07820SSatish Balay } 1679a8c6a408SBarry Smith if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 16800ac07820SSatish Balay } 16810ac07820SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 16820ac07820SSatish Balay 16830ac07820SSatish Balay /* inform other processors of number of messages and max length*/ 16840ac07820SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 1685ca161407SBarry Smith ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 16860ac07820SSatish Balay nrecvs = work[rank]; 1687ca161407SBarry Smith ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 16880ac07820SSatish Balay nmax = work[rank]; 16890ac07820SSatish Balay PetscFree(work); 16900ac07820SSatish Balay 16910ac07820SSatish Balay /* post receives: */ 1692d64ed03dSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues); 1693d64ed03dSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 16940ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 1695ca161407SBarry Smith ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 16960ac07820SSatish Balay } 16970ac07820SSatish Balay 16980ac07820SSatish Balay /* do sends: 16990ac07820SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 17000ac07820SSatish Balay the ith processor 17010ac07820SSatish Balay */ 17020ac07820SSatish Balay svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 1703ca161407SBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 17040ac07820SSatish Balay starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 17050ac07820SSatish Balay starts[0] = 0; 17060ac07820SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 17070ac07820SSatish Balay for ( i=0; i<N; i++ ) { 17080ac07820SSatish Balay svalues[starts[owner[i]]++] = rows[i]; 17090ac07820SSatish Balay } 17100ac07820SSatish Balay ISRestoreIndices(is,&rows); 17110ac07820SSatish Balay 17120ac07820SSatish Balay starts[0] = 0; 17130ac07820SSatish Balay for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 17140ac07820SSatish Balay count = 0; 17150ac07820SSatish Balay for ( i=0; i<size; i++ ) { 17160ac07820SSatish Balay if (procs[i]) { 1717ca161407SBarry Smith ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 17180ac07820SSatish Balay } 17190ac07820SSatish Balay } 17200ac07820SSatish Balay PetscFree(starts); 17210ac07820SSatish Balay 17220ac07820SSatish Balay base = owners[rank]*bs; 17230ac07820SSatish Balay 17240ac07820SSatish Balay /* wait on receives */ 17250ac07820SSatish Balay lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 17260ac07820SSatish Balay source = lens + nrecvs; 17270ac07820SSatish Balay count = nrecvs; slen = 0; 17280ac07820SSatish Balay while (count) { 1729ca161407SBarry Smith ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 17300ac07820SSatish Balay /* unpack receives into our local space */ 1731ca161407SBarry Smith ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 17320ac07820SSatish Balay source[imdex] = recv_status.MPI_SOURCE; 17330ac07820SSatish Balay lens[imdex] = n; 17340ac07820SSatish Balay slen += n; 17350ac07820SSatish Balay count--; 17360ac07820SSatish Balay } 17370ac07820SSatish Balay PetscFree(recv_waits); 17380ac07820SSatish Balay 17390ac07820SSatish Balay /* move the data into the send scatter */ 17400ac07820SSatish Balay lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 17410ac07820SSatish Balay count = 0; 17420ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 17430ac07820SSatish Balay values = rvalues + i*nmax; 17440ac07820SSatish Balay for ( j=0; j<lens[i]; j++ ) { 17450ac07820SSatish Balay lrows[count++] = values[j] - base; 17460ac07820SSatish Balay } 17470ac07820SSatish Balay } 17480ac07820SSatish Balay PetscFree(rvalues); PetscFree(lens); 17490ac07820SSatish Balay PetscFree(owner); PetscFree(nprocs); 17500ac07820SSatish Balay 17510ac07820SSatish Balay /* actually zap the local rows */ 1752029af93fSBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 17530ac07820SSatish Balay PLogObjectParent(A,istmp); 1754a07cd24cSSatish Balay 175572dacd9aSBarry Smith /* 175672dacd9aSBarry Smith Zero the required rows. If the "diagonal block" of the matrix 175772dacd9aSBarry Smith is square and the user wishes to set the diagonal we use seperate 175872dacd9aSBarry Smith code so that MatSetValues() is not called for each diagonal allocating 175972dacd9aSBarry Smith new memory, thus calling lots of mallocs and slowing things down. 176072dacd9aSBarry Smith 176172dacd9aSBarry Smith Contributed by: Mathew Knepley 176272dacd9aSBarry Smith */ 17639c957beeSSatish Balay /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 17640ac07820SSatish Balay ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 17659c957beeSSatish Balay if (diag && (l->A->M == l->A->N)) { 17669c957beeSSatish Balay ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 17679c957beeSSatish Balay } else if (diag) { 17689c957beeSSatish Balay ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 1769a07cd24cSSatish Balay for ( i = 0; i < slen; i++ ) { 1770a07cd24cSSatish Balay row = lrows[i] + rstart_bs; 1771a07cd24cSSatish Balay ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1772a07cd24cSSatish Balay } 1773a07cd24cSSatish Balay ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1774a07cd24cSSatish Balay ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 17759c957beeSSatish Balay } else { 17769c957beeSSatish Balay ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 1777a07cd24cSSatish Balay } 17789c957beeSSatish Balay 17799c957beeSSatish Balay ierr = ISDestroy(istmp); CHKERRQ(ierr); 1780a07cd24cSSatish Balay PetscFree(lrows); 1781a07cd24cSSatish Balay 17820ac07820SSatish Balay /* wait on sends */ 17830ac07820SSatish Balay if (nsends) { 1784d64ed03dSBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1785ca161407SBarry Smith ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 17860ac07820SSatish Balay PetscFree(send_status); 17870ac07820SSatish Balay } 17880ac07820SSatish Balay PetscFree(send_waits); PetscFree(svalues); 17890ac07820SSatish Balay 17903a40ed3dSBarry Smith PetscFunctionReturn(0); 17910ac07820SSatish Balay } 179272dacd9aSBarry Smith 1793ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat); 17945615d1e5SSatish Balay #undef __FUNC__ 17955615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1796ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A) 1797ba4ca20aSSatish Balay { 1798ba4ca20aSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 179925fdafccSSatish Balay MPI_Comm comm = A->comm; 1800133cdb44SSatish Balay static int called = 0; 18013a40ed3dSBarry Smith int ierr; 1802ba4ca20aSSatish Balay 1803d64ed03dSBarry Smith PetscFunctionBegin; 18043a40ed3dSBarry Smith if (!a->rank) { 18053a40ed3dSBarry Smith ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 180625fdafccSSatish Balay } 180725fdafccSSatish Balay if (called) {PetscFunctionReturn(0);} else called = 1; 1808133cdb44SSatish Balay (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n"); 1809133cdb44SSatish Balay (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n"); 18103a40ed3dSBarry Smith PetscFunctionReturn(0); 1811ba4ca20aSSatish Balay } 18120ac07820SSatish Balay 18135615d1e5SSatish Balay #undef __FUNC__ 18145615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1815ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A) 1816bb5a7306SBarry Smith { 1817bb5a7306SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1818bb5a7306SBarry Smith int ierr; 1819d64ed03dSBarry Smith 1820d64ed03dSBarry Smith PetscFunctionBegin; 1821bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 18223a40ed3dSBarry Smith PetscFunctionReturn(0); 1823bb5a7306SBarry Smith } 1824bb5a7306SBarry Smith 18252e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 18260ac07820SSatish Balay 182779bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 1828cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = { 1829cc2dc46cSBarry Smith MatSetValues_MPIBAIJ, 1830cc2dc46cSBarry Smith MatGetRow_MPIBAIJ, 1831cc2dc46cSBarry Smith MatRestoreRow_MPIBAIJ, 1832cc2dc46cSBarry Smith MatMult_MPIBAIJ, 1833cc2dc46cSBarry Smith MatMultAdd_MPIBAIJ, 1834cc2dc46cSBarry Smith MatMultTrans_MPIBAIJ, 1835cc2dc46cSBarry Smith MatMultTransAdd_MPIBAIJ, 1836cc2dc46cSBarry Smith 0, 1837cc2dc46cSBarry Smith 0, 1838cc2dc46cSBarry Smith 0, 1839cc2dc46cSBarry Smith 0, 1840cc2dc46cSBarry Smith 0, 1841cc2dc46cSBarry Smith 0, 1842cc2dc46cSBarry Smith 0, 1843cc2dc46cSBarry Smith MatTranspose_MPIBAIJ, 1844cc2dc46cSBarry Smith MatGetInfo_MPIBAIJ, 1845cc2dc46cSBarry Smith 0, 1846cc2dc46cSBarry Smith MatGetDiagonal_MPIBAIJ, 1847cc2dc46cSBarry Smith MatDiagonalScale_MPIBAIJ, 1848cc2dc46cSBarry Smith MatNorm_MPIBAIJ, 1849cc2dc46cSBarry Smith MatAssemblyBegin_MPIBAIJ, 1850cc2dc46cSBarry Smith MatAssemblyEnd_MPIBAIJ, 1851cc2dc46cSBarry Smith 0, 1852cc2dc46cSBarry Smith MatSetOption_MPIBAIJ, 1853cc2dc46cSBarry Smith MatZeroEntries_MPIBAIJ, 1854cc2dc46cSBarry Smith MatZeroRows_MPIBAIJ, 1855cc2dc46cSBarry Smith 0, 1856cc2dc46cSBarry Smith 0, 1857cc2dc46cSBarry Smith 0, 1858cc2dc46cSBarry Smith 0, 1859cc2dc46cSBarry Smith MatGetSize_MPIBAIJ, 1860cc2dc46cSBarry Smith MatGetLocalSize_MPIBAIJ, 1861cc2dc46cSBarry Smith MatGetOwnershipRange_MPIBAIJ, 1862cc2dc46cSBarry Smith 0, 1863cc2dc46cSBarry Smith 0, 1864cc2dc46cSBarry Smith 0, 1865cc2dc46cSBarry Smith 0, 18662e8a6d31SBarry Smith MatDuplicate_MPIBAIJ, 1867cc2dc46cSBarry Smith 0, 1868cc2dc46cSBarry Smith 0, 1869cc2dc46cSBarry Smith 0, 1870cc2dc46cSBarry Smith 0, 1871cc2dc46cSBarry Smith 0, 1872cc2dc46cSBarry Smith MatGetSubMatrices_MPIBAIJ, 1873cc2dc46cSBarry Smith MatIncreaseOverlap_MPIBAIJ, 1874cc2dc46cSBarry Smith MatGetValues_MPIBAIJ, 1875cc2dc46cSBarry Smith 0, 1876cc2dc46cSBarry Smith MatPrintHelp_MPIBAIJ, 1877cc2dc46cSBarry Smith MatScale_MPIBAIJ, 1878cc2dc46cSBarry Smith 0, 1879cc2dc46cSBarry Smith 0, 1880cc2dc46cSBarry Smith 0, 1881cc2dc46cSBarry Smith MatGetBlockSize_MPIBAIJ, 1882cc2dc46cSBarry Smith 0, 1883cc2dc46cSBarry Smith 0, 1884cc2dc46cSBarry Smith 0, 1885cc2dc46cSBarry Smith 0, 1886cc2dc46cSBarry Smith 0, 1887cc2dc46cSBarry Smith 0, 1888cc2dc46cSBarry Smith MatSetUnfactored_MPIBAIJ, 1889cc2dc46cSBarry Smith 0, 1890cc2dc46cSBarry Smith MatSetValuesBlocked_MPIBAIJ, 1891cc2dc46cSBarry Smith 0, 1892cc2dc46cSBarry Smith 0, 1893cc2dc46cSBarry Smith 0, 1894cc2dc46cSBarry Smith MatGetMaps_Petsc}; 189579bdfe76SSatish Balay 18965ef9f2a5SBarry Smith 1897e18c124aSSatish Balay EXTERN_C_BEGIN 18985ef9f2a5SBarry Smith #undef __FUNC__ 18995ef9f2a5SBarry Smith #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ" 19005ef9f2a5SBarry Smith int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 19015ef9f2a5SBarry Smith { 19025ef9f2a5SBarry Smith PetscFunctionBegin; 19035ef9f2a5SBarry Smith *a = ((Mat_MPIBAIJ *)A->data)->A; 19045ef9f2a5SBarry Smith *iscopy = PETSC_FALSE; 19055ef9f2a5SBarry Smith PetscFunctionReturn(0); 19065ef9f2a5SBarry Smith } 1907e18c124aSSatish Balay EXTERN_C_END 190879bdfe76SSatish Balay 19095615d1e5SSatish Balay #undef __FUNC__ 19105615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ" 191179bdfe76SSatish Balay /*@C 191279bdfe76SSatish Balay MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 191379bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 191479bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 191579bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 191679bdfe76SSatish Balay performance can be increased by more than a factor of 50. 191779bdfe76SSatish Balay 1918db81eaa0SLois Curfman McInnes Collective on MPI_Comm 1919db81eaa0SLois Curfman McInnes 192079bdfe76SSatish Balay Input Parameters: 1921db81eaa0SLois Curfman McInnes + comm - MPI communicator 192279bdfe76SSatish Balay . bs - size of blockk 192379bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 192492e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 192592e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 192692e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 192792e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 192892e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 1929be79a94dSBarry Smith . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1930be79a94dSBarry Smith . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 193179bdfe76SSatish Balay . d_nz - number of block nonzeros per block row in diagonal portion of local 193279bdfe76SSatish Balay submatrix (same for all local rows) 193392e8d321SLois Curfman McInnes . d_nzz - array containing the number of block nonzeros in the various block rows 193492e8d321SLois Curfman McInnes of the in diagonal portion of the local (possibly different for each block 1935db81eaa0SLois Curfman McInnes row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 193692e8d321SLois Curfman McInnes . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 193779bdfe76SSatish Balay submatrix (same for all local rows). 1938db81eaa0SLois Curfman McInnes - o_nzz - array containing the number of nonzeros in the various block rows of the 193992e8d321SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 194092e8d321SLois Curfman McInnes each block row) or PETSC_NULL. 194179bdfe76SSatish Balay 194279bdfe76SSatish Balay Output Parameter: 194379bdfe76SSatish Balay . A - the matrix 194479bdfe76SSatish Balay 1945db81eaa0SLois Curfman McInnes Options Database Keys: 1946db81eaa0SLois Curfman McInnes . -mat_no_unroll - uses code that does not unroll the loops in the 1947db81eaa0SLois Curfman McInnes block calculations (much slower) 1948db81eaa0SLois Curfman McInnes . -mat_block_size - size of the blocks to use 1949494eafd4SBarry Smith . -mat_mpi - use the parallel matrix data structures even on one processor 1950494eafd4SBarry Smith (defaults to using SeqBAIJ format on one processor) 19513ffaccefSLois Curfman McInnes 1952b259b22eSLois Curfman McInnes Notes: 195379bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 195479bdfe76SSatish Balay (possibly both). 195579bdfe76SSatish Balay 1956be79a94dSBarry Smith If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1957be79a94dSBarry Smith than it must be used on all processors that share the object for that argument. 1958be79a94dSBarry Smith 195979bdfe76SSatish Balay Storage Information: 196079bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 196179bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 196279bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 196379bdfe76SSatish Balay local matrix (a rectangular submatrix). 196479bdfe76SSatish Balay 196579bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 196679bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 196779bdfe76SSatish Balay d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 196879bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 196979bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 197079bdfe76SSatish Balay 197179bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 197279bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 197379bdfe76SSatish Balay 1974db81eaa0SLois Curfman McInnes .vb 1975db81eaa0SLois Curfman McInnes 0 1 2 3 4 5 6 7 8 9 10 11 1976db81eaa0SLois Curfman McInnes ------------------- 1977db81eaa0SLois Curfman McInnes row 3 | o o o d d d o o o o o o 1978db81eaa0SLois Curfman McInnes row 4 | o o o d d d o o o o o o 1979db81eaa0SLois Curfman McInnes row 5 | o o o d d d o o o o o o 1980db81eaa0SLois Curfman McInnes ------------------- 1981db81eaa0SLois Curfman McInnes .ve 198279bdfe76SSatish Balay 198379bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 198479bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 198579bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 198657b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 198779bdfe76SSatish Balay 1988d64ed03dSBarry Smith Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1989d64ed03dSBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix. 199079bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 199192e8d321SLois Curfman McInnes one expects d_nz >> o_nz. For large problems you MUST preallocate memory 199292e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 19936da5968aSLois Curfman McInnes matrices. 199479bdfe76SSatish Balay 1995027ccd11SLois Curfman McInnes Level: intermediate 1996027ccd11SLois Curfman McInnes 199792e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel 199879bdfe76SSatish Balay 1999db81eaa0SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ() 200079bdfe76SSatish Balay @*/ 200179bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 200279bdfe76SSatish Balay int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 200379bdfe76SSatish Balay { 200479bdfe76SSatish Balay Mat B; 200579bdfe76SSatish Balay Mat_MPIBAIJ *b; 2006133cdb44SSatish Balay int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg; 2007a2ab621fSBarry Smith int flag1 = 0,flag2 = 0; 200879bdfe76SSatish Balay 2009d64ed03dSBarry Smith PetscFunctionBegin; 2010a8c6a408SBarry Smith if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 20113914022bSBarry Smith 20123914022bSBarry Smith MPI_Comm_size(comm,&size); 2013494eafd4SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1); CHKERRQ(ierr); 2014494eafd4SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr); 2015494eafd4SBarry Smith if (!flag1 && !flag2 && size == 1) { 20163914022bSBarry Smith if (M == PETSC_DECIDE) M = m; 20173914022bSBarry Smith if (N == PETSC_DECIDE) N = n; 20183914022bSBarry Smith ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 20193a40ed3dSBarry Smith PetscFunctionReturn(0); 20203914022bSBarry Smith } 20213914022bSBarry Smith 202279bdfe76SSatish Balay *A = 0; 20233f1db9ecSBarry Smith PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView); 202479bdfe76SSatish Balay PLogObjectCreate(B); 202579bdfe76SSatish Balay B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 202679bdfe76SSatish Balay PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 2027cc2dc46cSBarry Smith PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)); 20284c50302cSBarry Smith 2029e1311b90SBarry Smith B->ops->destroy = MatDestroy_MPIBAIJ; 2030e1311b90SBarry Smith B->ops->view = MatView_MPIBAIJ; 203190f02eecSBarry Smith B->mapping = 0; 203279bdfe76SSatish Balay B->factor = 0; 203379bdfe76SSatish Balay B->assembled = PETSC_FALSE; 203479bdfe76SSatish Balay 2035e0fa3b82SLois Curfman McInnes B->insertmode = NOT_SET_VALUES; 203679bdfe76SSatish Balay MPI_Comm_rank(comm,&b->rank); 203779bdfe76SSatish Balay MPI_Comm_size(comm,&b->size); 203879bdfe76SSatish Balay 2039d64ed03dSBarry Smith if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 2040a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 2041d64ed03dSBarry Smith } 2042a8c6a408SBarry Smith if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 2043a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 2044a8c6a408SBarry Smith } 2045a8c6a408SBarry Smith if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 2046a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 2047a8c6a408SBarry Smith } 2048cee3aa6bSSatish Balay if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 2049cee3aa6bSSatish Balay if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 205079bdfe76SSatish Balay 205179bdfe76SSatish Balay if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 205279bdfe76SSatish Balay work[0] = m; work[1] = n; 205379bdfe76SSatish Balay mbs = m/bs; nbs = n/bs; 2054ca161407SBarry Smith ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 205579bdfe76SSatish Balay if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 205679bdfe76SSatish Balay if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 205779bdfe76SSatish Balay } 205879bdfe76SSatish Balay if (m == PETSC_DECIDE) { 205979bdfe76SSatish Balay Mbs = M/bs; 2060a8c6a408SBarry Smith if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 206179bdfe76SSatish Balay mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 206279bdfe76SSatish Balay m = mbs*bs; 206379bdfe76SSatish Balay } 206479bdfe76SSatish Balay if (n == PETSC_DECIDE) { 206579bdfe76SSatish Balay Nbs = N/bs; 2066a8c6a408SBarry Smith if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 206779bdfe76SSatish Balay nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 206879bdfe76SSatish Balay n = nbs*bs; 206979bdfe76SSatish Balay } 2070a8c6a408SBarry Smith if (mbs*bs != m || nbs*bs != n) { 2071a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 2072a8c6a408SBarry Smith } 207379bdfe76SSatish Balay 207479bdfe76SSatish Balay b->m = m; B->m = m; 207579bdfe76SSatish Balay b->n = n; B->n = n; 207679bdfe76SSatish Balay b->N = N; B->N = N; 207779bdfe76SSatish Balay b->M = M; B->M = M; 207879bdfe76SSatish Balay b->bs = bs; 207979bdfe76SSatish Balay b->bs2 = bs*bs; 208079bdfe76SSatish Balay b->mbs = mbs; 208179bdfe76SSatish Balay b->nbs = nbs; 208279bdfe76SSatish Balay b->Mbs = Mbs; 208379bdfe76SSatish Balay b->Nbs = Nbs; 208479bdfe76SSatish Balay 2085c7fcc2eaSBarry Smith /* the information in the maps duplicates the information computed below, eventually 2086c7fcc2eaSBarry Smith we should remove the duplicate information that is not contained in the maps */ 2087488ecbafSBarry Smith ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 2088488ecbafSBarry Smith ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 2089c7fcc2eaSBarry Smith 209079bdfe76SSatish Balay /* build local table of row and column ownerships */ 209179bdfe76SSatish Balay b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 2092f09e8eb9SSatish Balay PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 20930ac07820SSatish Balay b->cowners = b->rowners + b->size + 2; 2094ca161407SBarry Smith ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 209579bdfe76SSatish Balay b->rowners[0] = 0; 209679bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 209779bdfe76SSatish Balay b->rowners[i] += b->rowners[i-1]; 209879bdfe76SSatish Balay } 209979bdfe76SSatish Balay b->rstart = b->rowners[b->rank]; 210079bdfe76SSatish Balay b->rend = b->rowners[b->rank+1]; 21014fa0d573SSatish Balay b->rstart_bs = b->rstart * bs; 21024fa0d573SSatish Balay b->rend_bs = b->rend * bs; 21034fa0d573SSatish Balay 2104ca161407SBarry Smith ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 210579bdfe76SSatish Balay b->cowners[0] = 0; 210679bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 210779bdfe76SSatish Balay b->cowners[i] += b->cowners[i-1]; 210879bdfe76SSatish Balay } 210979bdfe76SSatish Balay b->cstart = b->cowners[b->rank]; 211079bdfe76SSatish Balay b->cend = b->cowners[b->rank+1]; 21114fa0d573SSatish Balay b->cstart_bs = b->cstart * bs; 21124fa0d573SSatish Balay b->cend_bs = b->cend * bs; 211379bdfe76SSatish Balay 2114a07cd24cSSatish Balay 211579bdfe76SSatish Balay if (d_nz == PETSC_DEFAULT) d_nz = 5; 2116029af93fSBarry Smith ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 211779bdfe76SSatish Balay PLogObjectParent(B,b->A); 211879bdfe76SSatish Balay if (o_nz == PETSC_DEFAULT) o_nz = 0; 2119029af93fSBarry Smith ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 212079bdfe76SSatish Balay PLogObjectParent(B,b->B); 212179bdfe76SSatish Balay 212279bdfe76SSatish Balay /* build cache for off array entries formed */ 212379bdfe76SSatish Balay ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 212490f02eecSBarry Smith b->donotstash = 0; 212579bdfe76SSatish Balay b->colmap = 0; 212679bdfe76SSatish Balay b->garray = 0; 212779bdfe76SSatish Balay b->roworiented = 1; 212879bdfe76SSatish Balay 212930793edcSSatish Balay /* stuff used in block assembly */ 213030793edcSSatish Balay b->barray = 0; 213130793edcSSatish Balay 213279bdfe76SSatish Balay /* stuff used for matrix vector multiply */ 213379bdfe76SSatish Balay b->lvec = 0; 213479bdfe76SSatish Balay b->Mvctx = 0; 213579bdfe76SSatish Balay 213679bdfe76SSatish Balay /* stuff for MatGetRow() */ 213779bdfe76SSatish Balay b->rowindices = 0; 213879bdfe76SSatish Balay b->rowvalues = 0; 213979bdfe76SSatish Balay b->getrowactive = PETSC_FALSE; 214079bdfe76SSatish Balay 2141a07cd24cSSatish Balay /* hash table stuff */ 2142a07cd24cSSatish Balay b->ht = 0; 2143187ce0cbSSatish Balay b->hd = 0; 21440bdbc534SSatish Balay b->ht_size = 0; 2145133cdb44SSatish Balay b->ht_flag = 0; 214625fdafccSSatish Balay b->ht_fact = 0; 2147187ce0cbSSatish Balay b->ht_total_ct = 0; 2148187ce0cbSSatish Balay b->ht_insert_ct = 0; 2149a07cd24cSSatish Balay 215079bdfe76SSatish Balay *A = B; 2151133cdb44SSatish Balay ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg); CHKERRQ(ierr); 2152133cdb44SSatish Balay if (flg) { 2153133cdb44SSatish Balay double fact = 1.39; 2154133cdb44SSatish Balay ierr = MatSetOption(B,MAT_USE_HASH_TABLE); CHKERRQ(ierr); 2155133cdb44SSatish Balay ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg); CHKERRQ(ierr); 2156133cdb44SSatish Balay if (fact <= 1.0) fact = 1.39; 2157133cdb44SSatish Balay ierr = MatMPIBAIJSetHashTableFactor(B,fact); CHKERRQ(ierr); 2158133cdb44SSatish Balay PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2159133cdb44SSatish Balay } 21605ef9f2a5SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C", 21615ef9f2a5SBarry Smith "MatGetDiagonalBlock_MPIBAIJ", 21625ef9f2a5SBarry Smith (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 21633a40ed3dSBarry Smith PetscFunctionReturn(0); 216479bdfe76SSatish Balay } 2165026e39d0SSatish Balay 21665615d1e5SSatish Balay #undef __FUNC__ 21672e8a6d31SBarry Smith #define __FUNC__ "MatDuplicate_MPIBAIJ" 21682e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 21690ac07820SSatish Balay { 21700ac07820SSatish Balay Mat mat; 21710ac07820SSatish Balay Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 21720ac07820SSatish Balay int ierr, len=0, flg; 21730ac07820SSatish Balay 2174d64ed03dSBarry Smith PetscFunctionBegin; 21750ac07820SSatish Balay *newmat = 0; 21763f1db9ecSBarry Smith PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView); 21770ac07820SSatish Balay PLogObjectCreate(mat); 21780ac07820SSatish Balay mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 2179cc2dc46cSBarry Smith PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)); 2180e1311b90SBarry Smith mat->ops->destroy = MatDestroy_MPIBAIJ; 2181e1311b90SBarry Smith mat->ops->view = MatView_MPIBAIJ; 21820ac07820SSatish Balay mat->factor = matin->factor; 21830ac07820SSatish Balay mat->assembled = PETSC_TRUE; 21840ac07820SSatish Balay 21850ac07820SSatish Balay a->m = mat->m = oldmat->m; 21860ac07820SSatish Balay a->n = mat->n = oldmat->n; 21870ac07820SSatish Balay a->M = mat->M = oldmat->M; 21880ac07820SSatish Balay a->N = mat->N = oldmat->N; 21890ac07820SSatish Balay 21900ac07820SSatish Balay a->bs = oldmat->bs; 21910ac07820SSatish Balay a->bs2 = oldmat->bs2; 21920ac07820SSatish Balay a->mbs = oldmat->mbs; 21930ac07820SSatish Balay a->nbs = oldmat->nbs; 21940ac07820SSatish Balay a->Mbs = oldmat->Mbs; 21950ac07820SSatish Balay a->Nbs = oldmat->Nbs; 21960ac07820SSatish Balay 21970ac07820SSatish Balay a->rstart = oldmat->rstart; 21980ac07820SSatish Balay a->rend = oldmat->rend; 21990ac07820SSatish Balay a->cstart = oldmat->cstart; 22000ac07820SSatish Balay a->cend = oldmat->cend; 22010ac07820SSatish Balay a->size = oldmat->size; 22020ac07820SSatish Balay a->rank = oldmat->rank; 2203e0fa3b82SLois Curfman McInnes mat->insertmode = NOT_SET_VALUES; 22040ac07820SSatish Balay a->rowvalues = 0; 22050ac07820SSatish Balay a->getrowactive = PETSC_FALSE; 220630793edcSSatish Balay a->barray = 0; 22070ac07820SSatish Balay 2208133cdb44SSatish Balay /* hash table stuff */ 2209133cdb44SSatish Balay a->ht = 0; 2210133cdb44SSatish Balay a->hd = 0; 2211133cdb44SSatish Balay a->ht_size = 0; 2212133cdb44SSatish Balay a->ht_flag = oldmat->ht_flag; 221325fdafccSSatish Balay a->ht_fact = oldmat->ht_fact; 2214133cdb44SSatish Balay a->ht_total_ct = 0; 2215133cdb44SSatish Balay a->ht_insert_ct = 0; 2216133cdb44SSatish Balay 2217133cdb44SSatish Balay 22180ac07820SSatish Balay a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 2219f09e8eb9SSatish Balay PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 22200ac07820SSatish Balay a->cowners = a->rowners + a->size + 2; 22210ac07820SSatish Balay PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 22220ac07820SSatish Balay ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 22230ac07820SSatish Balay if (oldmat->colmap) { 222448e59246SSatish Balay #if defined (USE_CTABLE) 222548e59246SSatish Balay ierr = TableCreateCopy( &a->colmap,oldmat->colmap ); CHKERRQ(ierr); 222648e59246SSatish Balay #else 22270ac07820SSatish Balay a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 22280ac07820SSatish Balay PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 22290ac07820SSatish Balay PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 223048e59246SSatish Balay #endif 22310ac07820SSatish Balay } else a->colmap = 0; 22320ac07820SSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 22330ac07820SSatish Balay a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 22340ac07820SSatish Balay PLogObjectMemory(mat,len*sizeof(int)); 22350ac07820SSatish Balay PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 22360ac07820SSatish Balay } else a->garray = 0; 22370ac07820SSatish Balay 22380ac07820SSatish Balay ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 22390ac07820SSatish Balay PLogObjectParent(mat,a->lvec); 22400ac07820SSatish Balay ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 2241e18c124aSSatish Balay 22420ac07820SSatish Balay PLogObjectParent(mat,a->Mvctx); 22432e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr); 22440ac07820SSatish Balay PLogObjectParent(mat,a->A); 22452e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr); 22460ac07820SSatish Balay PLogObjectParent(mat,a->B); 22470ac07820SSatish Balay ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 2248e18c124aSSatish Balay ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 22490ac07820SSatish Balay if (flg) { 22500ac07820SSatish Balay ierr = MatPrintHelp(mat); CHKERRQ(ierr); 22510ac07820SSatish Balay } 22520ac07820SSatish Balay *newmat = mat; 22533a40ed3dSBarry Smith PetscFunctionReturn(0); 22540ac07820SSatish Balay } 225557b952d6SSatish Balay 225657b952d6SSatish Balay #include "sys.h" 225757b952d6SSatish Balay 22585615d1e5SSatish Balay #undef __FUNC__ 22595615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ" 226057b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 226157b952d6SSatish Balay { 226257b952d6SSatish Balay Mat A; 226357b952d6SSatish Balay int i, nz, ierr, j,rstart, rend, fd; 226457b952d6SSatish Balay Scalar *vals,*buf; 226557b952d6SSatish Balay MPI_Comm comm = ((PetscObject)viewer)->comm; 226657b952d6SSatish Balay MPI_Status status; 2267cee3aa6bSSatish Balay int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 226857b952d6SSatish Balay int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 226940011551SBarry Smith int flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 227057b952d6SSatish Balay int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 227157b952d6SSatish Balay int dcount,kmax,k,nzcount,tmp; 227257b952d6SSatish Balay 2273d64ed03dSBarry Smith PetscFunctionBegin; 227457b952d6SSatish Balay ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 227557b952d6SSatish Balay 227657b952d6SSatish Balay MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 227757b952d6SSatish Balay if (!rank) { 227857b952d6SSatish Balay ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2279e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 2280a8c6a408SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2281d64ed03dSBarry Smith if (header[3] < 0) { 2282a8c6a408SBarry Smith SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2283d64ed03dSBarry Smith } 22846c5fab8fSBarry Smith } 2285d64ed03dSBarry Smith 2286ca161407SBarry Smith ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 228757b952d6SSatish Balay M = header[1]; N = header[2]; 228857b952d6SSatish Balay 2289a8c6a408SBarry Smith if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 229057b952d6SSatish Balay 229157b952d6SSatish Balay /* 229257b952d6SSatish Balay This code adds extra rows to make sure the number of rows is 229357b952d6SSatish Balay divisible by the blocksize 229457b952d6SSatish Balay */ 229557b952d6SSatish Balay Mbs = M/bs; 229657b952d6SSatish Balay extra_rows = bs - M + bs*(Mbs); 229757b952d6SSatish Balay if (extra_rows == bs) extra_rows = 0; 229857b952d6SSatish Balay else Mbs++; 229957b952d6SSatish Balay if (extra_rows &&!rank) { 2300b0267e0aSLois Curfman McInnes PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 230157b952d6SSatish Balay } 2302537820f0SBarry Smith 230357b952d6SSatish Balay /* determine ownership of all rows */ 230457b952d6SSatish Balay mbs = Mbs/size + ((Mbs % size) > rank); 230557b952d6SSatish Balay m = mbs * bs; 2306cee3aa6bSSatish Balay rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 2307cee3aa6bSSatish Balay browners = rowners + size + 1; 2308ca161407SBarry Smith ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 230957b952d6SSatish Balay rowners[0] = 0; 2310cee3aa6bSSatish Balay for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2311cee3aa6bSSatish Balay for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 231257b952d6SSatish Balay rstart = rowners[rank]; 231357b952d6SSatish Balay rend = rowners[rank+1]; 231457b952d6SSatish Balay 231557b952d6SSatish Balay /* distribute row lengths to all processors */ 231657b952d6SSatish Balay locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 231757b952d6SSatish Balay if (!rank) { 231857b952d6SSatish Balay rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 2319e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 232057b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 232157b952d6SSatish Balay sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 2322cee3aa6bSSatish Balay for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2323ca161407SBarry Smith ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 232457b952d6SSatish Balay PetscFree(sndcounts); 2325d64ed03dSBarry Smith } else { 2326ca161407SBarry Smith ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 232757b952d6SSatish Balay } 232857b952d6SSatish Balay 232957b952d6SSatish Balay if (!rank) { 233057b952d6SSatish Balay /* calculate the number of nonzeros on each processor */ 233157b952d6SSatish Balay procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 233257b952d6SSatish Balay PetscMemzero(procsnz,size*sizeof(int)); 233357b952d6SSatish Balay for ( i=0; i<size; i++ ) { 233457b952d6SSatish Balay for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 233557b952d6SSatish Balay procsnz[i] += rowlengths[j]; 233657b952d6SSatish Balay } 233757b952d6SSatish Balay } 233857b952d6SSatish Balay PetscFree(rowlengths); 233957b952d6SSatish Balay 234057b952d6SSatish Balay /* determine max buffer needed and allocate it */ 234157b952d6SSatish Balay maxnz = 0; 234257b952d6SSatish Balay for ( i=0; i<size; i++ ) { 234357b952d6SSatish Balay maxnz = PetscMax(maxnz,procsnz[i]); 234457b952d6SSatish Balay } 234557b952d6SSatish Balay cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 234657b952d6SSatish Balay 234757b952d6SSatish Balay /* read in my part of the matrix column indices */ 234857b952d6SSatish Balay nz = procsnz[0]; 234957b952d6SSatish Balay ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 235057b952d6SSatish Balay mycols = ibuf; 2351cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 2352e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 2353cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2354cee3aa6bSSatish Balay 235557b952d6SSatish Balay /* read in every ones (except the last) and ship off */ 235657b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 235757b952d6SSatish Balay nz = procsnz[i]; 2358e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2359ca161407SBarry Smith ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 236057b952d6SSatish Balay } 236157b952d6SSatish Balay /* read in the stuff for the last proc */ 236257b952d6SSatish Balay if ( size != 1 ) { 236357b952d6SSatish Balay nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2364e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 236557b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2366ca161407SBarry Smith ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 236757b952d6SSatish Balay } 236857b952d6SSatish Balay PetscFree(cols); 2369d64ed03dSBarry Smith } else { 237057b952d6SSatish Balay /* determine buffer space needed for message */ 237157b952d6SSatish Balay nz = 0; 237257b952d6SSatish Balay for ( i=0; i<m; i++ ) { 237357b952d6SSatish Balay nz += locrowlens[i]; 237457b952d6SSatish Balay } 237557b952d6SSatish Balay ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 237657b952d6SSatish Balay mycols = ibuf; 237757b952d6SSatish Balay /* receive message of column indices*/ 2378ca161407SBarry Smith ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2379ca161407SBarry Smith ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2380a8c6a408SBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 238157b952d6SSatish Balay } 238257b952d6SSatish Balay 238357b952d6SSatish Balay /* loop over local rows, determining number of off diagonal entries */ 2384cee3aa6bSSatish Balay dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 2385cee3aa6bSSatish Balay odlens = dlens + (rend-rstart); 238657b952d6SSatish Balay mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 2387cee3aa6bSSatish Balay PetscMemzero(mask,3*Mbs*sizeof(int)); 238857b952d6SSatish Balay masked1 = mask + Mbs; 238957b952d6SSatish Balay masked2 = masked1 + Mbs; 239057b952d6SSatish Balay rowcount = 0; nzcount = 0; 239157b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 239257b952d6SSatish Balay dcount = 0; 239357b952d6SSatish Balay odcount = 0; 239457b952d6SSatish Balay for ( j=0; j<bs; j++ ) { 239557b952d6SSatish Balay kmax = locrowlens[rowcount]; 239657b952d6SSatish Balay for ( k=0; k<kmax; k++ ) { 239757b952d6SSatish Balay tmp = mycols[nzcount++]/bs; 239857b952d6SSatish Balay if (!mask[tmp]) { 239957b952d6SSatish Balay mask[tmp] = 1; 240057b952d6SSatish Balay if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 240157b952d6SSatish Balay else masked1[dcount++] = tmp; 240257b952d6SSatish Balay } 240357b952d6SSatish Balay } 240457b952d6SSatish Balay rowcount++; 240557b952d6SSatish Balay } 2406cee3aa6bSSatish Balay 240757b952d6SSatish Balay dlens[i] = dcount; 240857b952d6SSatish Balay odlens[i] = odcount; 2409cee3aa6bSSatish Balay 241057b952d6SSatish Balay /* zero out the mask elements we set */ 241157b952d6SSatish Balay for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 241257b952d6SSatish Balay for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 241357b952d6SSatish Balay } 2414cee3aa6bSSatish Balay 241557b952d6SSatish Balay /* create our matrix */ 2416537820f0SBarry Smith ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 2417537820f0SBarry Smith CHKERRQ(ierr); 241857b952d6SSatish Balay A = *newmat; 24196d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 242057b952d6SSatish Balay 242157b952d6SSatish Balay if (!rank) { 242257b952d6SSatish Balay buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 242357b952d6SSatish Balay /* read in my part of the matrix numerical values */ 242457b952d6SSatish Balay nz = procsnz[0]; 242557b952d6SSatish Balay vals = buf; 2426cee3aa6bSSatish Balay mycols = ibuf; 2427cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 2428e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2429cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2430537820f0SBarry Smith 243157b952d6SSatish Balay /* insert into matrix */ 243257b952d6SSatish Balay jj = rstart*bs; 243357b952d6SSatish Balay for ( i=0; i<m; i++ ) { 243457b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 243557b952d6SSatish Balay mycols += locrowlens[i]; 243657b952d6SSatish Balay vals += locrowlens[i]; 243757b952d6SSatish Balay jj++; 243857b952d6SSatish Balay } 243957b952d6SSatish Balay /* read in other processors (except the last one) and ship out */ 244057b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 244157b952d6SSatish Balay nz = procsnz[i]; 244257b952d6SSatish Balay vals = buf; 2443e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2444ca161407SBarry Smith ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 244557b952d6SSatish Balay } 244657b952d6SSatish Balay /* the last proc */ 244757b952d6SSatish Balay if ( size != 1 ){ 244857b952d6SSatish Balay nz = procsnz[i] - extra_rows; 2449cee3aa6bSSatish Balay vals = buf; 2450e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 245157b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2452ca161407SBarry Smith ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 245357b952d6SSatish Balay } 245457b952d6SSatish Balay PetscFree(procsnz); 2455d64ed03dSBarry Smith } else { 245657b952d6SSatish Balay /* receive numeric values */ 245757b952d6SSatish Balay buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 245857b952d6SSatish Balay 245957b952d6SSatish Balay /* receive message of values*/ 246057b952d6SSatish Balay vals = buf; 2461cee3aa6bSSatish Balay mycols = ibuf; 2462ca161407SBarry Smith ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2463ca161407SBarry Smith ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2464a8c6a408SBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 246557b952d6SSatish Balay 246657b952d6SSatish Balay /* insert into matrix */ 246757b952d6SSatish Balay jj = rstart*bs; 2468cee3aa6bSSatish Balay for ( i=0; i<m; i++ ) { 246957b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 247057b952d6SSatish Balay mycols += locrowlens[i]; 247157b952d6SSatish Balay vals += locrowlens[i]; 247257b952d6SSatish Balay jj++; 247357b952d6SSatish Balay } 247457b952d6SSatish Balay } 247557b952d6SSatish Balay PetscFree(locrowlens); 247657b952d6SSatish Balay PetscFree(buf); 247757b952d6SSatish Balay PetscFree(ibuf); 247857b952d6SSatish Balay PetscFree(rowners); 247957b952d6SSatish Balay PetscFree(dlens); 2480cee3aa6bSSatish Balay PetscFree(mask); 24816d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 24826d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 24833a40ed3dSBarry Smith PetscFunctionReturn(0); 248457b952d6SSatish Balay } 248557b952d6SSatish Balay 248657b952d6SSatish Balay 2487133cdb44SSatish Balay 2488133cdb44SSatish Balay #undef __FUNC__ 2489133cdb44SSatish Balay #define __FUNC__ "MatMPIBAIJSetHashTableFactor" 2490133cdb44SSatish Balay /*@ 2491133cdb44SSatish Balay MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2492133cdb44SSatish Balay 2493133cdb44SSatish Balay Input Parameters: 2494133cdb44SSatish Balay . mat - the matrix 2495133cdb44SSatish Balay . fact - factor 2496133cdb44SSatish Balay 2497fee21e36SBarry Smith Collective on Mat 2498fee21e36SBarry Smith 2499133cdb44SSatish Balay Notes: 2500133cdb44SSatish Balay This can also be set by the command line option: -mat_use_hash_table fact 2501133cdb44SSatish Balay 2502133cdb44SSatish Balay .keywords: matrix, hashtable, factor, HT 2503133cdb44SSatish Balay 2504133cdb44SSatish Balay .seealso: MatSetOption() 2505133cdb44SSatish Balay @*/ 2506133cdb44SSatish Balay int MatMPIBAIJSetHashTableFactor(Mat mat,double fact) 2507133cdb44SSatish Balay { 250825fdafccSSatish Balay Mat_MPIBAIJ *baij; 2509133cdb44SSatish Balay 2510133cdb44SSatish Balay PetscFunctionBegin; 2511133cdb44SSatish Balay PetscValidHeaderSpecific(mat,MAT_COOKIE); 251225fdafccSSatish Balay if (mat->type != MATMPIBAIJ) { 2513133cdb44SSatish Balay SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2514133cdb44SSatish Balay } 2515133cdb44SSatish Balay baij = (Mat_MPIBAIJ*) mat->data; 2516133cdb44SSatish Balay baij->ht_fact = fact; 2517133cdb44SSatish Balay PetscFunctionReturn(0); 2518133cdb44SSatish Balay } 2519