1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER 2*48e59246SSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.140 1999/01/08 16:55:04 balay Exp balay $"; 379bdfe76SSatish Balay #endif 479bdfe76SSatish Balay 577ed5343SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "mat.h" I*/ 6c16cb8f2SBarry Smith #include "src/vec/vecimpl.h" 779bdfe76SSatish Balay 857b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat); 957b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat); 10d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 11d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **); 12946de2abSSatish Balay extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *); 133b2fbd54SBarry Smith 14537820f0SBarry Smith /* 15537820f0SBarry Smith Local utility routine that creates a mapping from the global column 1657b952d6SSatish Balay number to the local number in the off-diagonal part of the local 1757b952d6SSatish Balay storage of the matrix. This is done in a non scable way since the 1857b952d6SSatish Balay length of colmap equals the global matrix length. 1957b952d6SSatish Balay */ 205615d1e5SSatish Balay #undef __FUNC__ 215615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 2257b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat) 2357b952d6SSatish Balay { 2457b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 2557b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 26*48e59246SSatish Balay int nbs = B->nbs,i,bs=B->bs,ierr; 2757b952d6SSatish Balay 28d64ed03dSBarry Smith PetscFunctionBegin; 29*48e59246SSatish Balay #if defined (USE_CTABLE) 30*48e59246SSatish Balay ierr = TableCreate( &baij->colmap, baij->nbs/5 ); CHKERRQ(ierr); 31*48e59246SSatish Balay for ( i=0; i<nbs; i++ ){ 32*48e59246SSatish Balay ierr = TableAdd( baij->colmap, baij->garray[i] + 1, i*bs+1 );CHKERRQ(ierr); 33*48e59246SSatish Balay } 34*48e59246SSatish Balay #else 35758f045eSSatish Balay baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 3657b952d6SSatish Balay PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 3757b952d6SSatish Balay PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 38928fc39bSSatish Balay for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 39*48e59246SSatish Balay #endif 403a40ed3dSBarry Smith PetscFunctionReturn(0); 4157b952d6SSatish Balay } 4257b952d6SSatish Balay 4380c1aa95SSatish Balay #define CHUNKSIZE 10 4480c1aa95SSatish Balay 45f5e9677aSSatish Balay #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 4680c1aa95SSatish Balay { \ 4780c1aa95SSatish Balay \ 4880c1aa95SSatish Balay brow = row/bs; \ 4980c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 50ac7a638eSSatish Balay rmax = aimax[brow]; nrow = ailen[brow]; \ 5180c1aa95SSatish Balay bcol = col/bs; \ 5280c1aa95SSatish Balay ridx = row % bs; cidx = col % bs; \ 53ab26458aSBarry Smith low = 0; high = nrow; \ 54ab26458aSBarry Smith while (high-low > 3) { \ 55ab26458aSBarry Smith t = (low+high)/2; \ 56ab26458aSBarry Smith if (rp[t] > bcol) high = t; \ 57ab26458aSBarry Smith else low = t; \ 58ab26458aSBarry Smith } \ 59ab26458aSBarry Smith for ( _i=low; _i<high; _i++ ) { \ 6080c1aa95SSatish Balay if (rp[_i] > bcol) break; \ 6180c1aa95SSatish Balay if (rp[_i] == bcol) { \ 6280c1aa95SSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 63eada6651SSatish Balay if (addv == ADD_VALUES) *bap += value; \ 64eada6651SSatish Balay else *bap = value; \ 65ac7a638eSSatish Balay goto a_noinsert; \ 6680c1aa95SSatish Balay } \ 6780c1aa95SSatish Balay } \ 6889280ab3SLois Curfman McInnes if (a->nonew == 1) goto a_noinsert; \ 69a8c6a408SBarry Smith else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 7080c1aa95SSatish Balay if (nrow >= rmax) { \ 7180c1aa95SSatish Balay /* there is no extra room in row, therefore enlarge */ \ 7280c1aa95SSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 7380c1aa95SSatish Balay Scalar *new_a; \ 7480c1aa95SSatish Balay \ 75a8c6a408SBarry Smith if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 7689280ab3SLois Curfman McInnes \ 7780c1aa95SSatish Balay /* malloc new storage space */ \ 7880c1aa95SSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 7980c1aa95SSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 8080c1aa95SSatish Balay new_j = (int *) (new_a + bs2*new_nz); \ 8180c1aa95SSatish Balay new_i = new_j + new_nz; \ 8280c1aa95SSatish Balay \ 8380c1aa95SSatish Balay /* copy over old data into new slots */ \ 8480c1aa95SSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 8580c1aa95SSatish Balay for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 8680c1aa95SSatish Balay PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 8780c1aa95SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 8880c1aa95SSatish Balay PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 8980c1aa95SSatish Balay len*sizeof(int)); \ 9080c1aa95SSatish Balay PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 9180c1aa95SSatish Balay PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 9280c1aa95SSatish Balay PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 9380c1aa95SSatish Balay aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 9480c1aa95SSatish Balay /* free up old matrix storage */ \ 9580c1aa95SSatish Balay PetscFree(a->a); \ 9680c1aa95SSatish Balay if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 9780c1aa95SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 9880c1aa95SSatish Balay a->singlemalloc = 1; \ 9980c1aa95SSatish Balay \ 10080c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 101ac7a638eSSatish Balay rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 10280c1aa95SSatish Balay PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 10380c1aa95SSatish Balay a->maxnz += bs2*CHUNKSIZE; \ 10480c1aa95SSatish Balay a->reallocs++; \ 10580c1aa95SSatish Balay a->nz++; \ 10680c1aa95SSatish Balay } \ 10780c1aa95SSatish Balay N = nrow++ - 1; \ 10880c1aa95SSatish Balay /* shift up all the later entries in this row */ \ 10980c1aa95SSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 11080c1aa95SSatish Balay rp[ii+1] = rp[ii]; \ 11180c1aa95SSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 11280c1aa95SSatish Balay } \ 11380c1aa95SSatish Balay if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 11480c1aa95SSatish Balay rp[_i] = bcol; \ 11580c1aa95SSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 116ac7a638eSSatish Balay a_noinsert:; \ 11780c1aa95SSatish Balay ailen[brow] = nrow; \ 11880c1aa95SSatish Balay } 11957b952d6SSatish Balay 120ac7a638eSSatish Balay #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 121ac7a638eSSatish Balay { \ 122ac7a638eSSatish Balay \ 123ac7a638eSSatish Balay brow = row/bs; \ 124ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 125ac7a638eSSatish Balay rmax = bimax[brow]; nrow = bilen[brow]; \ 126ac7a638eSSatish Balay bcol = col/bs; \ 127ac7a638eSSatish Balay ridx = row % bs; cidx = col % bs; \ 128ac7a638eSSatish Balay low = 0; high = nrow; \ 129ac7a638eSSatish Balay while (high-low > 3) { \ 130ac7a638eSSatish Balay t = (low+high)/2; \ 131ac7a638eSSatish Balay if (rp[t] > bcol) high = t; \ 132ac7a638eSSatish Balay else low = t; \ 133ac7a638eSSatish Balay } \ 134ac7a638eSSatish Balay for ( _i=low; _i<high; _i++ ) { \ 135ac7a638eSSatish Balay if (rp[_i] > bcol) break; \ 136ac7a638eSSatish Balay if (rp[_i] == bcol) { \ 137ac7a638eSSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 138ac7a638eSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 139ac7a638eSSatish Balay else *bap = value; \ 140ac7a638eSSatish Balay goto b_noinsert; \ 141ac7a638eSSatish Balay } \ 142ac7a638eSSatish Balay } \ 14389280ab3SLois Curfman McInnes if (b->nonew == 1) goto b_noinsert; \ 144a8c6a408SBarry Smith else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 145ac7a638eSSatish Balay if (nrow >= rmax) { \ 146ac7a638eSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 14774c639caSSatish Balay int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 148ac7a638eSSatish Balay Scalar *new_a; \ 149ac7a638eSSatish Balay \ 150a8c6a408SBarry Smith if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 15189280ab3SLois Curfman McInnes \ 152ac7a638eSSatish Balay /* malloc new storage space */ \ 15374c639caSSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 154ac7a638eSSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 155ac7a638eSSatish Balay new_j = (int *) (new_a + bs2*new_nz); \ 156ac7a638eSSatish Balay new_i = new_j + new_nz; \ 157ac7a638eSSatish Balay \ 158ac7a638eSSatish Balay /* copy over old data into new slots */ \ 159ac7a638eSSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 16074c639caSSatish Balay for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 161ac7a638eSSatish Balay PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \ 162ac7a638eSSatish Balay len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 163ac7a638eSSatish Balay PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 164ac7a638eSSatish Balay len*sizeof(int)); \ 165ac7a638eSSatish Balay PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \ 166ac7a638eSSatish Balay PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 167ac7a638eSSatish Balay PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 168ac7a638eSSatish Balay ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar)); \ 169ac7a638eSSatish Balay /* free up old matrix storage */ \ 17074c639caSSatish Balay PetscFree(b->a); \ 17174c639caSSatish Balay if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 17274c639caSSatish Balay ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 17374c639caSSatish Balay b->singlemalloc = 1; \ 174ac7a638eSSatish Balay \ 175ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 176ac7a638eSSatish Balay rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 17774c639caSSatish Balay PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 17874c639caSSatish Balay b->maxnz += bs2*CHUNKSIZE; \ 17974c639caSSatish Balay b->reallocs++; \ 18074c639caSSatish Balay b->nz++; \ 181ac7a638eSSatish Balay } \ 182ac7a638eSSatish Balay N = nrow++ - 1; \ 183ac7a638eSSatish Balay /* shift up all the later entries in this row */ \ 184ac7a638eSSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 185ac7a638eSSatish Balay rp[ii+1] = rp[ii]; \ 186ac7a638eSSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 187ac7a638eSSatish Balay } \ 188ac7a638eSSatish Balay if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 189ac7a638eSSatish Balay rp[_i] = bcol; \ 190ac7a638eSSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 191ac7a638eSSatish Balay b_noinsert:; \ 192ac7a638eSSatish Balay bilen[brow] = nrow; \ 193ac7a638eSSatish Balay } 194ac7a638eSSatish Balay 1955615d1e5SSatish Balay #undef __FUNC__ 1965615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ" 197ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 19857b952d6SSatish Balay { 19957b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 20057b952d6SSatish Balay Scalar value; 2014fa0d573SSatish Balay int ierr,i,j,row,col; 2024fa0d573SSatish Balay int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 2034fa0d573SSatish Balay int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 2044fa0d573SSatish Balay int cend_orig=baij->cend_bs,bs=baij->bs; 20557b952d6SSatish Balay 206eada6651SSatish Balay /* Some Variables required in the macro */ 20780c1aa95SSatish Balay Mat A = baij->A; 20880c1aa95SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 209ac7a638eSSatish Balay int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 210ac7a638eSSatish Balay Scalar *aa=a->a; 211ac7a638eSSatish Balay 212ac7a638eSSatish Balay Mat B = baij->B; 213ac7a638eSSatish Balay Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 214ac7a638eSSatish Balay int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 215ac7a638eSSatish Balay Scalar *ba=b->a; 216ac7a638eSSatish Balay 217ac7a638eSSatish Balay int *rp,ii,nrow,_i,rmax,N,brow,bcol; 218ab26458aSBarry Smith int low,high,t,ridx,cidx,bs2=a->bs2; 219ac7a638eSSatish Balay Scalar *ap,*bap; 22080c1aa95SSatish Balay 221d64ed03dSBarry Smith PetscFunctionBegin; 22257b952d6SSatish Balay for ( i=0; i<m; i++ ) { 2233a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 224a8c6a408SBarry Smith if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 225a8c6a408SBarry Smith if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 226639f9d9dSBarry Smith #endif 22757b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 22857b952d6SSatish Balay row = im[i] - rstart_orig; 22957b952d6SSatish Balay for ( j=0; j<n; j++ ) { 23057b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig){ 23157b952d6SSatish Balay col = in[j] - cstart_orig; 23257b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 233f5e9677aSSatish Balay MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 23480c1aa95SSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 23557b952d6SSatish Balay } 2363a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 237a8c6a408SBarry Smith else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 238a8c6a408SBarry Smith else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 239639f9d9dSBarry Smith #endif 24057b952d6SSatish Balay else { 24157b952d6SSatish Balay if (mat->was_assembled) { 242905e6a2fSBarry Smith if (!baij->colmap) { 243905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 244905e6a2fSBarry Smith } 245*48e59246SSatish Balay #if defined (USE_CTABLE) 246*48e59246SSatish Balay col = TableFind( baij->colmap, in[j]/bs + 1 ) - 1 + in[j]%bs; 247*48e59246SSatish Balay #else 248905e6a2fSBarry Smith col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 249*48e59246SSatish Balay #endif 25057b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 25157b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 25257b952d6SSatish Balay col = in[j]; 2539bf004c3SSatish Balay /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 2549bf004c3SSatish Balay B = baij->B; 2559bf004c3SSatish Balay b = (Mat_SeqBAIJ *) (B)->data; 2569bf004c3SSatish Balay bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 2579bf004c3SSatish Balay ba=b->a; 25857b952d6SSatish Balay } 259d64ed03dSBarry Smith } else col = in[j]; 26057b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 261ac7a638eSSatish Balay MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 262ac7a638eSSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 26357b952d6SSatish Balay } 26457b952d6SSatish Balay } 265d64ed03dSBarry Smith } else { 26690f02eecSBarry Smith if (roworiented && !baij->donotstash) { 26757b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 268d64ed03dSBarry Smith } else { 26990f02eecSBarry Smith if (!baij->donotstash) { 27057b952d6SSatish Balay row = im[i]; 27157b952d6SSatish Balay for ( j=0; j<n; j++ ) { 27257b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 27357b952d6SSatish Balay } 27457b952d6SSatish Balay } 27557b952d6SSatish Balay } 27657b952d6SSatish Balay } 27790f02eecSBarry Smith } 2783a40ed3dSBarry Smith PetscFunctionReturn(0); 27957b952d6SSatish Balay } 28057b952d6SSatish Balay 281ab26458aSBarry Smith extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 282ab26458aSBarry Smith #undef __FUNC__ 283ab26458aSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 284ec1ea8d8SLois Curfman McInnes int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 285ab26458aSBarry Smith { 286ab26458aSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 28730793edcSSatish Balay Scalar *value,*barray=baij->barray; 288abef11f7SSatish Balay int ierr,i,j,ii,jj,row,col,k,l; 289ab26458aSBarry Smith int roworiented = baij->roworiented,rstart=baij->rstart ; 290ab26458aSBarry Smith int rend=baij->rend,cstart=baij->cstart,stepval; 291ab26458aSBarry Smith int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 292ab26458aSBarry Smith 29330793edcSSatish Balay if(!barray) { 29447513183SBarry Smith baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray); 29530793edcSSatish Balay } 29630793edcSSatish Balay 297ab26458aSBarry Smith if (roworiented) { 298ab26458aSBarry Smith stepval = (n-1)*bs; 299ab26458aSBarry Smith } else { 300ab26458aSBarry Smith stepval = (m-1)*bs; 301ab26458aSBarry Smith } 302ab26458aSBarry Smith for ( i=0; i<m; i++ ) { 3033a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 304a8c6a408SBarry Smith if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 305a8c6a408SBarry Smith if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 306ab26458aSBarry Smith #endif 307ab26458aSBarry Smith if (im[i] >= rstart && im[i] < rend) { 308ab26458aSBarry Smith row = im[i] - rstart; 309ab26458aSBarry Smith for ( j=0; j<n; j++ ) { 31015b57d14SSatish Balay /* If NumCol = 1 then a copy is not required */ 31115b57d14SSatish Balay if ((roworiented) && (n == 1)) { 31215b57d14SSatish Balay barray = v + i*bs2; 31315b57d14SSatish Balay } else if((!roworiented) && (m == 1)) { 31415b57d14SSatish Balay barray = v + j*bs2; 31515b57d14SSatish Balay } else { /* Here a copy is required */ 316ab26458aSBarry Smith if (roworiented) { 317ab26458aSBarry Smith value = v + i*(stepval+bs)*bs + j*bs; 318ab26458aSBarry Smith } else { 319ab26458aSBarry Smith value = v + j*(stepval+bs)*bs + i*bs; 320abef11f7SSatish Balay } 32147513183SBarry Smith for ( ii=0; ii<bs; ii++,value+=stepval ) { 32247513183SBarry Smith for (jj=0; jj<bs; jj++ ) { 32330793edcSSatish Balay *barray++ = *value++; 32447513183SBarry Smith } 32547513183SBarry Smith } 32630793edcSSatish Balay barray -=bs2; 32715b57d14SSatish Balay } 328abef11f7SSatish Balay 329abef11f7SSatish Balay if (in[j] >= cstart && in[j] < cend){ 330abef11f7SSatish Balay col = in[j] - cstart; 33130793edcSSatish Balay ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 332ab26458aSBarry Smith } 3333a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 334a8c6a408SBarry Smith else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 335a8c6a408SBarry Smith else if (in[j] >= baij->Nbs) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 336ab26458aSBarry Smith #endif 337ab26458aSBarry Smith else { 338ab26458aSBarry Smith if (mat->was_assembled) { 339ab26458aSBarry Smith if (!baij->colmap) { 340ab26458aSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 341ab26458aSBarry Smith } 342a5eb4965SSatish Balay 3433a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 344*48e59246SSatish Balay #if defined (USE_CTABLE) 345*48e59246SSatish Balay if( (TableFind( baij->colmap, in[j] + 1 ) - 1) % bs) 346*48e59246SSatish Balay {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 347*48e59246SSatish Balay #else 348a8c6a408SBarry Smith if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 349a5eb4965SSatish Balay #endif 350*48e59246SSatish Balay #endif 351*48e59246SSatish Balay #if defined (USE_CTABLE) 352*48e59246SSatish Balay col = (TableFind( baij->colmap, in[j] + 1 ) - 1)/bs; 353*48e59246SSatish Balay #else 354a5eb4965SSatish Balay col = (baij->colmap[in[j]] - 1)/bs; 355*48e59246SSatish Balay #endif 356ab26458aSBarry Smith if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 357ab26458aSBarry Smith ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 358ab26458aSBarry Smith col = in[j]; 359ab26458aSBarry Smith } 360ab26458aSBarry Smith } 361ab26458aSBarry Smith else col = in[j]; 36230793edcSSatish Balay ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 363ab26458aSBarry Smith } 364ab26458aSBarry Smith } 365d64ed03dSBarry Smith } else { 366ab26458aSBarry Smith if (!baij->donotstash) { 367ab26458aSBarry Smith if (roworiented ) { 368abef11f7SSatish Balay row = im[i]*bs; 369abef11f7SSatish Balay value = v + i*(stepval+bs)*bs; 370abef11f7SSatish Balay for ( j=0; j<bs; j++,row++ ) { 371abef11f7SSatish Balay for ( k=0; k<n; k++ ) { 372abef11f7SSatish Balay for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 373abef11f7SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 374abef11f7SSatish Balay } 375ab26458aSBarry Smith } 376ab26458aSBarry Smith } 377d64ed03dSBarry Smith } else { 378ab26458aSBarry Smith for ( j=0; j<n; j++ ) { 379abef11f7SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 380abef11f7SSatish Balay col = in[j]*bs; 381abef11f7SSatish Balay for ( k=0; k<bs; k++,col++,value+=stepval) { 382abef11f7SSatish Balay for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 383abef11f7SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 384ab26458aSBarry Smith } 385ab26458aSBarry Smith } 386ab26458aSBarry Smith } 387abef11f7SSatish Balay } 388abef11f7SSatish Balay } 389ab26458aSBarry Smith } 390ab26458aSBarry Smith } 3913a40ed3dSBarry Smith PetscFunctionReturn(0); 392ab26458aSBarry Smith } 3930bdbc534SSatish Balay #define HASH_KEY 0.6180339887 394c2760754SSatish Balay /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 395c2760754SSatish Balay #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 396c2760754SSatish Balay /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 3975615d1e5SSatish Balay #undef __FUNC__ 3980bdbc534SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 3990bdbc534SSatish Balay int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 4000bdbc534SSatish Balay { 4010bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 4020bdbc534SSatish Balay int ierr,i,j,row,col; 4030bdbc534SSatish Balay int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 404c2760754SSatish Balay int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 405c2760754SSatish Balay int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 406c2760754SSatish Balay double tmp; 407b9e4cc15SSatish Balay Scalar ** HD = baij->hd,value; 4084a15367fSSatish Balay #if defined(USE_PETSC_BOPT_g) 4094a15367fSSatish Balay int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 4104a15367fSSatish Balay #endif 4110bdbc534SSatish Balay 4120bdbc534SSatish Balay PetscFunctionBegin; 4130bdbc534SSatish Balay 4140bdbc534SSatish Balay for ( i=0; i<m; i++ ) { 4150bdbc534SSatish Balay #if defined(USE_PETSC_BOPT_g) 4160bdbc534SSatish Balay if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 4170bdbc534SSatish Balay if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 4180bdbc534SSatish Balay #endif 4190bdbc534SSatish Balay row = im[i]; 420c2760754SSatish Balay if (row >= rstart_orig && row < rend_orig) { 4210bdbc534SSatish Balay for ( j=0; j<n; j++ ) { 4220bdbc534SSatish Balay col = in[j]; 4230bdbc534SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 4240bdbc534SSatish Balay /* Look up into the Hash Table */ 425c2760754SSatish Balay key = (row/bs)*Nbs+(col/bs)+1; 426c2760754SSatish Balay h1 = HASH(size,key,tmp); 4270bdbc534SSatish Balay 428c2760754SSatish Balay 429c2760754SSatish Balay idx = h1; 430187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 431187ce0cbSSatish Balay insert_ct++; 432187ce0cbSSatish Balay total_ct++; 433187ce0cbSSatish Balay if (HT[idx] != key) { 434187ce0cbSSatish Balay for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 435187ce0cbSSatish Balay if (idx == size) { 436187ce0cbSSatish Balay for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 437187ce0cbSSatish Balay if (idx == h1) { 438187ce0cbSSatish Balay SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 439187ce0cbSSatish Balay } 440187ce0cbSSatish Balay } 441187ce0cbSSatish Balay } 442187ce0cbSSatish Balay #else 443c2760754SSatish Balay if (HT[idx] != key) { 444c2760754SSatish Balay for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 445c2760754SSatish Balay if (idx == size) { 446c2760754SSatish Balay for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 447c2760754SSatish Balay if (idx == h1) { 448c2760754SSatish Balay SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 449c2760754SSatish Balay } 450c2760754SSatish Balay } 451c2760754SSatish Balay } 452187ce0cbSSatish Balay #endif 453c2760754SSatish Balay /* A HASH table entry is found, so insert the values at the correct address */ 454c2760754SSatish Balay if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 455c2760754SSatish Balay else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 4560bdbc534SSatish Balay } 4570bdbc534SSatish Balay } else { 4580bdbc534SSatish Balay if (roworiented && !baij->donotstash) { 4590bdbc534SSatish Balay ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 4600bdbc534SSatish Balay } else { 4610bdbc534SSatish Balay if (!baij->donotstash) { 4620bdbc534SSatish Balay row = im[i]; 4630bdbc534SSatish Balay for ( j=0; j<n; j++ ) { 4640bdbc534SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 4650bdbc534SSatish Balay } 4660bdbc534SSatish Balay } 4670bdbc534SSatish Balay } 4680bdbc534SSatish Balay } 4690bdbc534SSatish Balay } 470187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 471187ce0cbSSatish Balay baij->ht_total_ct = total_ct; 472187ce0cbSSatish Balay baij->ht_insert_ct = insert_ct; 473187ce0cbSSatish Balay #endif 4740bdbc534SSatish Balay PetscFunctionReturn(0); 4750bdbc534SSatish Balay } 4760bdbc534SSatish Balay 4770bdbc534SSatish Balay #undef __FUNC__ 4780bdbc534SSatish Balay #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 4790bdbc534SSatish Balay int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 4800bdbc534SSatish Balay { 4810bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 4820bdbc534SSatish Balay int ierr,i,j,ii,jj,row,col,k,l; 4830bdbc534SSatish Balay int roworiented = baij->roworiented,rstart=baij->rstart ; 484b4cc0f5aSSatish Balay int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 485c2760754SSatish Balay int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 486c2760754SSatish Balay double tmp; 487187ce0cbSSatish Balay Scalar ** HD = baij->hd,*value,*v_t,*baij_a; 4884a15367fSSatish Balay #if defined(USE_PETSC_BOPT_g) 4894a15367fSSatish Balay int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 4904a15367fSSatish Balay #endif 4910bdbc534SSatish Balay 492d0a41580SSatish Balay PetscFunctionBegin; 493d0a41580SSatish Balay 4940bdbc534SSatish Balay if (roworiented) { 4950bdbc534SSatish Balay stepval = (n-1)*bs; 4960bdbc534SSatish Balay } else { 4970bdbc534SSatish Balay stepval = (m-1)*bs; 4980bdbc534SSatish Balay } 4990bdbc534SSatish Balay for ( i=0; i<m; i++ ) { 5000bdbc534SSatish Balay #if defined(USE_PETSC_BOPT_g) 5010bdbc534SSatish Balay if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 5020bdbc534SSatish Balay if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 5030bdbc534SSatish Balay #endif 5040bdbc534SSatish Balay row = im[i]; 505187ce0cbSSatish Balay v_t = v + i*bs2; 506c2760754SSatish Balay if (row >= rstart && row < rend) { 5070bdbc534SSatish Balay for ( j=0; j<n; j++ ) { 5080bdbc534SSatish Balay col = in[j]; 5090bdbc534SSatish Balay 5100bdbc534SSatish Balay /* Look up into the Hash Table */ 511c2760754SSatish Balay key = row*Nbs+col+1; 512c2760754SSatish Balay h1 = HASH(size,key,tmp); 5130bdbc534SSatish Balay 514c2760754SSatish Balay idx = h1; 515187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 516187ce0cbSSatish Balay total_ct++; 517187ce0cbSSatish Balay insert_ct++; 518187ce0cbSSatish Balay if (HT[idx] != key) { 519187ce0cbSSatish Balay for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 520187ce0cbSSatish Balay if (idx == size) { 521187ce0cbSSatish Balay for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 522187ce0cbSSatish Balay if (idx == h1) { 523187ce0cbSSatish Balay SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 524187ce0cbSSatish Balay } 525187ce0cbSSatish Balay } 526187ce0cbSSatish Balay } 527187ce0cbSSatish Balay #else 528c2760754SSatish Balay if (HT[idx] != key) { 529c2760754SSatish Balay for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 530c2760754SSatish Balay if (idx == size) { 531c2760754SSatish Balay for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 532c2760754SSatish Balay if (idx == h1) { 533c2760754SSatish Balay SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 534c2760754SSatish Balay } 535c2760754SSatish Balay } 536c2760754SSatish Balay } 537187ce0cbSSatish Balay #endif 538c2760754SSatish Balay baij_a = HD[idx]; 5390bdbc534SSatish Balay if (roworiented) { 540c2760754SSatish Balay /*value = v + i*(stepval+bs)*bs + j*bs;*/ 541187ce0cbSSatish Balay /* value = v + (i*(stepval+bs)+j)*bs; */ 542187ce0cbSSatish Balay value = v_t; 543187ce0cbSSatish Balay v_t += bs; 544fef45726SSatish Balay if (addv == ADD_VALUES) { 545c2760754SSatish Balay for ( ii=0; ii<bs; ii++,value+=stepval) { 546c2760754SSatish Balay for ( jj=ii; jj<bs2; jj+=bs ) { 547fef45726SSatish Balay baij_a[jj] += *value++; 548b4cc0f5aSSatish Balay } 549b4cc0f5aSSatish Balay } 550fef45726SSatish Balay } else { 551c2760754SSatish Balay for ( ii=0; ii<bs; ii++,value+=stepval) { 552c2760754SSatish Balay for ( jj=ii; jj<bs2; jj+=bs ) { 553fef45726SSatish Balay baij_a[jj] = *value++; 554fef45726SSatish Balay } 555fef45726SSatish Balay } 556fef45726SSatish Balay } 5570bdbc534SSatish Balay } else { 5580bdbc534SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 559fef45726SSatish Balay if (addv == ADD_VALUES) { 560b4cc0f5aSSatish Balay for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 5610bdbc534SSatish Balay for ( jj=0; jj<bs; jj++ ) { 562fef45726SSatish Balay baij_a[jj] += *value++; 563fef45726SSatish Balay } 564fef45726SSatish Balay } 565fef45726SSatish Balay } else { 566fef45726SSatish Balay for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 567fef45726SSatish Balay for ( jj=0; jj<bs; jj++ ) { 568fef45726SSatish Balay baij_a[jj] = *value++; 569fef45726SSatish Balay } 570b4cc0f5aSSatish Balay } 5710bdbc534SSatish Balay } 5720bdbc534SSatish Balay } 5730bdbc534SSatish Balay } 5740bdbc534SSatish Balay } else { 5750bdbc534SSatish Balay if (!baij->donotstash) { 5760bdbc534SSatish Balay if (roworiented ) { 5770bdbc534SSatish Balay row = im[i]*bs; 5780bdbc534SSatish Balay value = v + i*(stepval+bs)*bs; 5790bdbc534SSatish Balay for ( j=0; j<bs; j++,row++ ) { 5800bdbc534SSatish Balay for ( k=0; k<n; k++ ) { 5810bdbc534SSatish Balay for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 5820bdbc534SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 5830bdbc534SSatish Balay } 5840bdbc534SSatish Balay } 5850bdbc534SSatish Balay } 5860bdbc534SSatish Balay } else { 5870bdbc534SSatish Balay for ( j=0; j<n; j++ ) { 5880bdbc534SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 5890bdbc534SSatish Balay col = in[j]*bs; 5900bdbc534SSatish Balay for ( k=0; k<bs; k++,col++,value+=stepval) { 5910bdbc534SSatish Balay for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 5920bdbc534SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 5930bdbc534SSatish Balay } 5940bdbc534SSatish Balay } 5950bdbc534SSatish Balay } 5960bdbc534SSatish Balay } 5970bdbc534SSatish Balay } 5980bdbc534SSatish Balay } 5990bdbc534SSatish Balay } 600187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 601187ce0cbSSatish Balay baij->ht_total_ct = total_ct; 602187ce0cbSSatish Balay baij->ht_insert_ct = insert_ct; 603187ce0cbSSatish Balay #endif 6040bdbc534SSatish Balay PetscFunctionReturn(0); 6050bdbc534SSatish Balay } 606133cdb44SSatish Balay 6070bdbc534SSatish Balay #undef __FUNC__ 6085615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ" 609ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 610d6de1c52SSatish Balay { 611d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 612d6de1c52SSatish Balay int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 613*48e59246SSatish Balay int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data; 614d6de1c52SSatish Balay 615133cdb44SSatish Balay PetscFunctionBegin; 616d6de1c52SSatish Balay for ( i=0; i<m; i++ ) { 617a8c6a408SBarry Smith if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 618a8c6a408SBarry Smith if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 619d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 620d6de1c52SSatish Balay row = idxm[i] - bsrstart; 621d6de1c52SSatish Balay for ( j=0; j<n; j++ ) { 622a8c6a408SBarry Smith if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 623a8c6a408SBarry Smith if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 624d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend){ 625d6de1c52SSatish Balay col = idxn[j] - bscstart; 62698dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 627d64ed03dSBarry Smith } else { 628905e6a2fSBarry Smith if (!baij->colmap) { 629905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 630905e6a2fSBarry Smith } 631*48e59246SSatish Balay #if defined (USE_CTABLE) 632*48e59246SSatish Balay data = TableFind( baij->colmap, idxn[j]/bs + 1 ) - 1; 633*48e59246SSatish Balay #else 634*48e59246SSatish Balay data = baij->colmap[idxn[j]/bs]-1; 635*48e59246SSatish Balay #endif 636*48e59246SSatish Balay if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 637d9d09a02SSatish Balay else { 638*48e59246SSatish Balay col = data + idxn[j]%bs; 63998dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 640d6de1c52SSatish Balay } 641d6de1c52SSatish Balay } 642d6de1c52SSatish Balay } 643d64ed03dSBarry Smith } else { 644a8c6a408SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 645d6de1c52SSatish Balay } 646d6de1c52SSatish Balay } 6473a40ed3dSBarry Smith PetscFunctionReturn(0); 648d6de1c52SSatish Balay } 649d6de1c52SSatish Balay 6505615d1e5SSatish Balay #undef __FUNC__ 6515615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ" 652ec1ea8d8SLois Curfman McInnes int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 653d6de1c52SSatish Balay { 654d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 655d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 656acdf5bf4SSatish Balay int ierr, i,bs2=baij->bs2; 657d6de1c52SSatish Balay double sum = 0.0; 658d6de1c52SSatish Balay Scalar *v; 659d6de1c52SSatish Balay 660d64ed03dSBarry Smith PetscFunctionBegin; 661d6de1c52SSatish Balay if (baij->size == 1) { 662d6de1c52SSatish Balay ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 663d6de1c52SSatish Balay } else { 664d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 665d6de1c52SSatish Balay v = amat->a; 666d6de1c52SSatish Balay for (i=0; i<amat->nz*bs2; i++ ) { 6673a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX) 668e20fef11SSatish Balay sum += PetscReal(PetscConj(*v)*(*v)); v++; 669d6de1c52SSatish Balay #else 670d6de1c52SSatish Balay sum += (*v)*(*v); v++; 671d6de1c52SSatish Balay #endif 672d6de1c52SSatish Balay } 673d6de1c52SSatish Balay v = bmat->a; 674d6de1c52SSatish Balay for (i=0; i<bmat->nz*bs2; i++ ) { 6753a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX) 676e20fef11SSatish Balay sum += PetscReal(PetscConj(*v)*(*v)); v++; 677d6de1c52SSatish Balay #else 678d6de1c52SSatish Balay sum += (*v)*(*v); v++; 679d6de1c52SSatish Balay #endif 680d6de1c52SSatish Balay } 681ca161407SBarry Smith ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 682d6de1c52SSatish Balay *norm = sqrt(*norm); 683d64ed03dSBarry Smith } else { 684e3372554SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 685d6de1c52SSatish Balay } 686d64ed03dSBarry Smith } 6873a40ed3dSBarry Smith PetscFunctionReturn(0); 688d6de1c52SSatish Balay } 68957b952d6SSatish Balay 6905615d1e5SSatish Balay #undef __FUNC__ 6915615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 692ec1ea8d8SLois Curfman McInnes int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 69357b952d6SSatish Balay { 69457b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 69557b952d6SSatish Balay MPI_Comm comm = mat->comm; 69657b952d6SSatish Balay int size = baij->size, *owners = baij->rowners,bs=baij->bs; 69757b952d6SSatish Balay int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 69857b952d6SSatish Balay MPI_Request *send_waits,*recv_waits; 69957b952d6SSatish Balay int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 70057b952d6SSatish Balay InsertMode addv; 70157b952d6SSatish Balay Scalar *rvalues,*svalues; 70257b952d6SSatish Balay 703d64ed03dSBarry Smith PetscFunctionBegin; 704570da906SBarry Smith if (baij->donotstash) { 705570da906SBarry Smith baij->svalues = 0; baij->rvalues = 0; 706570da906SBarry Smith baij->nsends = 0; baij->nrecvs = 0; 707570da906SBarry Smith baij->send_waits = 0; baij->recv_waits = 0; 708570da906SBarry Smith baij->rmax = 0; 709570da906SBarry Smith PetscFunctionReturn(0); 710570da906SBarry Smith } 711570da906SBarry Smith 71257b952d6SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 713ca161407SBarry Smith ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 71457b952d6SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 715a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 71657b952d6SSatish Balay } 717e0fa3b82SLois Curfman McInnes mat->insertmode = addv; /* in case this processor had no cache */ 71857b952d6SSatish Balay 71957b952d6SSatish Balay /* first count number of contributors to each processor */ 72057b952d6SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 72157b952d6SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 72257b952d6SSatish Balay owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 72357b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 72457b952d6SSatish Balay idx = baij->stash.idx[i]; 72557b952d6SSatish Balay for ( j=0; j<size; j++ ) { 72657b952d6SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 72757b952d6SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; break; 72857b952d6SSatish Balay } 72957b952d6SSatish Balay } 73057b952d6SSatish Balay } 73157b952d6SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 73257b952d6SSatish Balay 73357b952d6SSatish Balay /* inform other processors of number of messages and max length*/ 73457b952d6SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 735ca161407SBarry Smith ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 73657b952d6SSatish Balay nreceives = work[rank]; 737ca161407SBarry Smith ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 73857b952d6SSatish Balay nmax = work[rank]; 73957b952d6SSatish Balay PetscFree(work); 74057b952d6SSatish Balay 74157b952d6SSatish Balay /* post receives: 74257b952d6SSatish Balay 1) each message will consist of ordered pairs 74357b952d6SSatish Balay (global index,value) we store the global index as a double 74457b952d6SSatish Balay to simplify the message passing. 74557b952d6SSatish Balay 2) since we don't know how long each individual message is we 74657b952d6SSatish Balay allocate the largest needed buffer for each receive. Potentially 74757b952d6SSatish Balay this is a lot of wasted space. 74857b952d6SSatish Balay 74957b952d6SSatish Balay 75057b952d6SSatish Balay This could be done better. 75157b952d6SSatish Balay */ 752f8abc2e8SBarry Smith rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 753f8abc2e8SBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 75457b952d6SSatish Balay for ( i=0; i<nreceives; i++ ) { 755ca161407SBarry Smith ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 75657b952d6SSatish Balay } 75757b952d6SSatish Balay 75857b952d6SSatish Balay /* do sends: 75957b952d6SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 76057b952d6SSatish Balay the ith processor 76157b952d6SSatish Balay */ 76257b952d6SSatish Balay svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 763d64ed03dSBarry Smith send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 76457b952d6SSatish Balay starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 76557b952d6SSatish Balay starts[0] = 0; 76657b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 76757b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 76857b952d6SSatish Balay svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 76957b952d6SSatish Balay svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 77057b952d6SSatish Balay svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 77157b952d6SSatish Balay } 77257b952d6SSatish Balay PetscFree(owner); 77357b952d6SSatish Balay starts[0] = 0; 77457b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 77557b952d6SSatish Balay count = 0; 77657b952d6SSatish Balay for ( i=0; i<size; i++ ) { 77757b952d6SSatish Balay if (procs[i]) { 778ca161407SBarry Smith ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 77957b952d6SSatish Balay } 78057b952d6SSatish Balay } 78157b952d6SSatish Balay PetscFree(starts); PetscFree(nprocs); 78257b952d6SSatish Balay 78357b952d6SSatish Balay /* Free cache space */ 78410a665d1SBarry Smith PLogInfo(baij->A,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n); 78557b952d6SSatish Balay ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 78657b952d6SSatish Balay 78757b952d6SSatish Balay baij->svalues = svalues; baij->rvalues = rvalues; 78857b952d6SSatish Balay baij->nsends = nsends; baij->nrecvs = nreceives; 78957b952d6SSatish Balay baij->send_waits = send_waits; baij->recv_waits = recv_waits; 79057b952d6SSatish Balay baij->rmax = nmax; 79157b952d6SSatish Balay 7923a40ed3dSBarry Smith PetscFunctionReturn(0); 79357b952d6SSatish Balay } 794bd7f49f5SSatish Balay 795fef45726SSatish Balay /* 796fef45726SSatish Balay Creates the hash table, and sets the table 797fef45726SSatish Balay This table is created only once. 798fef45726SSatish Balay If new entried need to be added to the matrix 799fef45726SSatish Balay then the hash table has to be destroyed and 800fef45726SSatish Balay recreated. 801fef45726SSatish Balay */ 802fef45726SSatish Balay #undef __FUNC__ 803fef45726SSatish Balay #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 804d0a41580SSatish Balay int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor) 805596b8d2eSBarry Smith { 806596b8d2eSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 807596b8d2eSBarry Smith Mat A = baij->A, B=baij->B; 808596b8d2eSBarry Smith Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 8090bdbc534SSatish Balay int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 8104a15367fSSatish Balay int size,bs2=baij->bs2,rstart=baij->rstart; 811187ce0cbSSatish Balay int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 812fef45726SSatish Balay int *HT,key; 8130bdbc534SSatish Balay Scalar **HD; 814c2760754SSatish Balay double tmp; 8154a15367fSSatish Balay #if defined(USE_PETSC_BOPT_g) 8164a15367fSSatish Balay int ct=0,max=0; 8174a15367fSSatish Balay #endif 818fef45726SSatish Balay 819d64ed03dSBarry Smith PetscFunctionBegin; 8200bdbc534SSatish Balay baij->ht_size=(int)(factor*nz); 8210bdbc534SSatish Balay size = baij->ht_size; 822fef45726SSatish Balay 8230bdbc534SSatish Balay if (baij->ht) { 8240bdbc534SSatish Balay PetscFunctionReturn(0); 825596b8d2eSBarry Smith } 8260bdbc534SSatish Balay 827fef45726SSatish Balay /* Allocate Memory for Hash Table */ 828b9e4cc15SSatish Balay baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd); 829b9e4cc15SSatish Balay baij->ht = (int*)(baij->hd + size); 830b9e4cc15SSatish Balay HD = baij->hd; 831a07cd24cSSatish Balay HT = baij->ht; 832b9e4cc15SSatish Balay 833b9e4cc15SSatish Balay 834c2760754SSatish Balay PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*))); 8350bdbc534SSatish Balay 836596b8d2eSBarry Smith 837596b8d2eSBarry Smith /* Loop Over A */ 8380bdbc534SSatish Balay for ( i=0; i<a->mbs; i++ ) { 839596b8d2eSBarry Smith for ( j=ai[i]; j<ai[i+1]; j++ ) { 8400bdbc534SSatish Balay row = i+rstart; 8410bdbc534SSatish Balay col = aj[j]+cstart; 842596b8d2eSBarry Smith 843187ce0cbSSatish Balay key = row*Nbs + col + 1; 844c2760754SSatish Balay h1 = HASH(size,key,tmp); 8450bdbc534SSatish Balay for ( k=0; k<size; k++ ){ 8460bdbc534SSatish Balay if (HT[(h1+k)%size] == 0.0) { 8470bdbc534SSatish Balay HT[(h1+k)%size] = key; 8480bdbc534SSatish Balay HD[(h1+k)%size] = a->a + j*bs2; 849596b8d2eSBarry Smith break; 850187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 851187ce0cbSSatish Balay } else { 852187ce0cbSSatish Balay ct++; 853187ce0cbSSatish Balay #endif 854596b8d2eSBarry Smith } 855187ce0cbSSatish Balay } 856187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 857187ce0cbSSatish Balay if (k> max) max = k; 858187ce0cbSSatish Balay #endif 859596b8d2eSBarry Smith } 860596b8d2eSBarry Smith } 861596b8d2eSBarry Smith /* Loop Over B */ 8620bdbc534SSatish Balay for ( i=0; i<b->mbs; i++ ) { 863596b8d2eSBarry Smith for ( j=bi[i]; j<bi[i+1]; j++ ) { 8640bdbc534SSatish Balay row = i+rstart; 8650bdbc534SSatish Balay col = garray[bj[j]]; 866187ce0cbSSatish Balay key = row*Nbs + col + 1; 867c2760754SSatish Balay h1 = HASH(size,key,tmp); 8680bdbc534SSatish Balay for ( k=0; k<size; k++ ){ 8690bdbc534SSatish Balay if (HT[(h1+k)%size] == 0.0) { 8700bdbc534SSatish Balay HT[(h1+k)%size] = key; 8710bdbc534SSatish Balay HD[(h1+k)%size] = b->a + j*bs2; 872596b8d2eSBarry Smith break; 873187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 874187ce0cbSSatish Balay } else { 875187ce0cbSSatish Balay ct++; 876187ce0cbSSatish Balay #endif 877596b8d2eSBarry Smith } 878187ce0cbSSatish Balay } 879187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 880187ce0cbSSatish Balay if (k> max) max = k; 881187ce0cbSSatish Balay #endif 882596b8d2eSBarry Smith } 883596b8d2eSBarry Smith } 884596b8d2eSBarry Smith 885596b8d2eSBarry Smith /* Print Summary */ 886187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 887c2760754SSatish Balay for ( i=0,j=0; i<size; i++) 888596b8d2eSBarry Smith if (HT[i]) {j++;} 889187ce0cbSSatish Balay PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n", 890187ce0cbSSatish Balay (j== 0)? 0.0:((double)(ct+j))/j,max); 891187ce0cbSSatish Balay #endif 8923a40ed3dSBarry Smith PetscFunctionReturn(0); 893596b8d2eSBarry Smith } 89457b952d6SSatish Balay 8955615d1e5SSatish Balay #undef __FUNC__ 8965615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 897ec1ea8d8SLois Curfman McInnes int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 89857b952d6SSatish Balay { 89957b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 90057b952d6SSatish Balay MPI_Status *send_status,recv_status; 90157b952d6SSatish Balay int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 902b7029e64SSatish Balay int bs=baij->bs,row,col,other_disassembled; 90357b952d6SSatish Balay Scalar *values,val; 904e0fa3b82SLois Curfman McInnes InsertMode addv = mat->insertmode; 90557b952d6SSatish Balay 906d64ed03dSBarry Smith PetscFunctionBegin; 90757b952d6SSatish Balay /* wait on receives */ 90857b952d6SSatish Balay while (count) { 909ca161407SBarry Smith ierr = MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 91057b952d6SSatish Balay /* unpack receives into our local space */ 91157b952d6SSatish Balay values = baij->rvalues + 3*imdex*baij->rmax; 912ca161407SBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 91357b952d6SSatish Balay n = n/3; 91457b952d6SSatish Balay for ( i=0; i<n; i++ ) { 91557b952d6SSatish Balay row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 91657b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 91757b952d6SSatish Balay val = values[3*i+2]; 91857b952d6SSatish Balay if (col >= baij->cstart*bs && col < baij->cend*bs) { 91957b952d6SSatish Balay col -= baij->cstart*bs; 9206fd7127cSSatish Balay ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 921d64ed03dSBarry Smith } else { 92257b952d6SSatish Balay if (mat->was_assembled) { 923905e6a2fSBarry Smith if (!baij->colmap) { 924905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr); 925905e6a2fSBarry Smith } 926*48e59246SSatish Balay #if defined (USE_CTABLE) 927*48e59246SSatish Balay col = TableFind( baij->colmap, col/bs + 1 ) - 1 + col%bs; 928*48e59246SSatish Balay #else 929a5eb4965SSatish Balay col = (baij->colmap[col/bs]) - 1 + col%bs; 930*48e59246SSatish Balay #endif 93157b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 93257b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 93357b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 93457b952d6SSatish Balay } 93557b952d6SSatish Balay } 9366fd7127cSSatish Balay ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 93757b952d6SSatish Balay } 93857b952d6SSatish Balay } 93957b952d6SSatish Balay count--; 94057b952d6SSatish Balay } 941570da906SBarry Smith if (baij->recv_waits) PetscFree(baij->recv_waits); 942570da906SBarry Smith if (baij->rvalues) PetscFree(baij->rvalues); 94357b952d6SSatish Balay 94457b952d6SSatish Balay /* wait on sends */ 94557b952d6SSatish Balay if (baij->nsends) { 946d64ed03dSBarry Smith send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 947ca161407SBarry Smith ierr = MPI_Waitall(baij->nsends,baij->send_waits,send_status);CHKERRQ(ierr); 94857b952d6SSatish Balay PetscFree(send_status); 94957b952d6SSatish Balay } 950570da906SBarry Smith if (baij->send_waits) PetscFree(baij->send_waits); 951570da906SBarry Smith if (baij->svalues) PetscFree(baij->svalues); 95257b952d6SSatish Balay 95357b952d6SSatish Balay ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 95457b952d6SSatish Balay ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 95557b952d6SSatish Balay 95657b952d6SSatish Balay /* determine if any processor has disassembled, if so we must 95757b952d6SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 9586e713f22SBarry Smith /* 9596e713f22SBarry Smith if nonzero structure of submatrix B cannot change then we know that 9606e713f22SBarry Smith no processor disassembled thus we can skip this stuff 9616e713f22SBarry Smith */ 9626e713f22SBarry Smith if (!((Mat_SeqBAIJ*) baij->B->data)->nonew) { 963ca161407SBarry Smith ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 96457b952d6SSatish Balay if (mat->was_assembled && !other_disassembled) { 96557b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 96657b952d6SSatish Balay } 9676e713f22SBarry Smith } 96857b952d6SSatish Balay 9696d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 97057b952d6SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 97157b952d6SSatish Balay } 97257b952d6SSatish Balay ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 97357b952d6SSatish Balay ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 97457b952d6SSatish Balay 975187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 976187ce0cbSSatish Balay if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 977187ce0cbSSatish Balay PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n", 978187ce0cbSSatish Balay ((double)baij->ht_total_ct)/baij->ht_insert_ct); 979187ce0cbSSatish Balay baij->ht_total_ct = 0; 980187ce0cbSSatish Balay baij->ht_insert_ct = 0; 981187ce0cbSSatish Balay } 982187ce0cbSSatish Balay #endif 983133cdb44SSatish Balay if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 984133cdb44SSatish Balay ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact); CHKERRQ(ierr); 985f830108cSBarry Smith mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 986f830108cSBarry Smith mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 987bd7f49f5SSatish Balay } 988187ce0cbSSatish Balay 98957b952d6SSatish Balay if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 9903a40ed3dSBarry Smith PetscFunctionReturn(0); 99157b952d6SSatish Balay } 99257b952d6SSatish Balay 9935615d1e5SSatish Balay #undef __FUNC__ 9945615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary" 99557b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 99657b952d6SSatish Balay { 99757b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 99857b952d6SSatish Balay int ierr; 99957b952d6SSatish Balay 1000d64ed03dSBarry Smith PetscFunctionBegin; 100157b952d6SSatish Balay if (baij->size == 1) { 100257b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 1003a8c6a408SBarry Smith } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 10043a40ed3dSBarry Smith PetscFunctionReturn(0); 100557b952d6SSatish Balay } 100657b952d6SSatish Balay 10075615d1e5SSatish Balay #undef __FUNC__ 10085615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab" 100957b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 101057b952d6SSatish Balay { 101157b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 101277ed5343SBarry Smith int ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank; 101357b952d6SSatish Balay FILE *fd; 101457b952d6SSatish Balay ViewerType vtype; 101557b952d6SSatish Balay 1016d64ed03dSBarry Smith PetscFunctionBegin; 101757b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 10183f1db9ecSBarry Smith if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 101957b952d6SSatish Balay ierr = ViewerGetFormat(viewer,&format); 1020639f9d9dSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 10214e220ebcSLois Curfman McInnes MatInfo info; 102257b952d6SSatish Balay MPI_Comm_rank(mat->comm,&rank); 102357b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 10244e220ebcSLois Curfman McInnes ierr = MatGetInfo(mat,MAT_LOCAL,&info); 102557b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 102657b952d6SSatish Balay fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 10274e220ebcSLois Curfman McInnes rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 10284e220ebcSLois Curfman McInnes baij->bs,(int)info.memory); 10294e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 10304e220ebcSLois Curfman McInnes fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 10314e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 10324e220ebcSLois Curfman McInnes fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 103357b952d6SSatish Balay fflush(fd); 103457b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 103557b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 10363a40ed3dSBarry Smith PetscFunctionReturn(0); 1037d64ed03dSBarry Smith } else if (format == VIEWER_FORMAT_ASCII_INFO) { 1038bcc3fcf6SBarry Smith PetscPrintf(mat->comm," block size is %d\n",bs); 10393a40ed3dSBarry Smith PetscFunctionReturn(0); 104057b952d6SSatish Balay } 104157b952d6SSatish Balay } 104257b952d6SSatish Balay 10433f1db9ecSBarry Smith if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 104457b952d6SSatish Balay Draw draw; 104557b952d6SSatish Balay PetscTruth isnull; 104677ed5343SBarry Smith ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr); 10473a40ed3dSBarry Smith ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 104857b952d6SSatish Balay } 104957b952d6SSatish Balay 105057b952d6SSatish Balay if (size == 1) { 105157b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 1052d64ed03dSBarry Smith } else { 105357b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 105457b952d6SSatish Balay Mat A; 105557b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 105640011551SBarry Smith int M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals; 105757b952d6SSatish Balay int mbs = baij->mbs; 105857b952d6SSatish Balay Scalar *a; 105957b952d6SSatish Balay 106057b952d6SSatish Balay if (!rank) { 106155843e3eSBarry Smith ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1062d64ed03dSBarry Smith } else { 106355843e3eSBarry Smith ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 106457b952d6SSatish Balay } 106557b952d6SSatish Balay PLogObjectParent(mat,A); 106657b952d6SSatish Balay 106757b952d6SSatish Balay /* copy over the A part */ 106857b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 106957b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 107057b952d6SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 107157b952d6SSatish Balay 107257b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 107357b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 107457b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 107557b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 107657b952d6SSatish Balay col = (baij->cstart+aj[j])*bs; 107757b952d6SSatish Balay for (k=0; k<bs; k++ ) { 1078cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1079cee3aa6bSSatish Balay col++; a += bs; 108057b952d6SSatish Balay } 108157b952d6SSatish Balay } 108257b952d6SSatish Balay } 108357b952d6SSatish Balay /* copy over the B part */ 108457b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 108557b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 108657b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 108757b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 108857b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 108957b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 109057b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 109157b952d6SSatish Balay for (k=0; k<bs; k++ ) { 1092cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1093cee3aa6bSSatish Balay col++; a += bs; 109457b952d6SSatish Balay } 109557b952d6SSatish Balay } 109657b952d6SSatish Balay } 109757b952d6SSatish Balay PetscFree(rvals); 10986d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 10996d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 110055843e3eSBarry Smith /* 110155843e3eSBarry Smith Everyone has to call to draw the matrix since the graphics waits are 110255843e3eSBarry Smith synchronized across all processors that share the Draw object 110355843e3eSBarry Smith */ 11043f1db9ecSBarry Smith if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) { 110557b952d6SSatish Balay ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 110657b952d6SSatish Balay } 110757b952d6SSatish Balay ierr = MatDestroy(A); CHKERRQ(ierr); 110857b952d6SSatish Balay } 11093a40ed3dSBarry Smith PetscFunctionReturn(0); 111057b952d6SSatish Balay } 111157b952d6SSatish Balay 111257b952d6SSatish Balay 111357b952d6SSatish Balay 11145615d1e5SSatish Balay #undef __FUNC__ 11155615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ" 1116e1311b90SBarry Smith int MatView_MPIBAIJ(Mat mat,Viewer viewer) 111757b952d6SSatish Balay { 111857b952d6SSatish Balay int ierr; 111957b952d6SSatish Balay ViewerType vtype; 112057b952d6SSatish Balay 1121d64ed03dSBarry Smith PetscFunctionBegin; 112257b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 11233f1db9ecSBarry Smith if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) || 11243f1db9ecSBarry Smith PetscTypeCompare(vtype,MATLAB_VIEWER)) { 112557b952d6SSatish Balay ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 11263f1db9ecSBarry Smith } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 11273a40ed3dSBarry Smith ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 11285cd90555SBarry Smith } else { 11295cd90555SBarry Smith SETERRQ(1,1,"Viewer type not supported by PETSc object"); 113057b952d6SSatish Balay } 11313a40ed3dSBarry Smith PetscFunctionReturn(0); 113257b952d6SSatish Balay } 113357b952d6SSatish Balay 11345615d1e5SSatish Balay #undef __FUNC__ 11355615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ" 1136e1311b90SBarry Smith int MatDestroy_MPIBAIJ(Mat mat) 113779bdfe76SSatish Balay { 113879bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 113979bdfe76SSatish Balay int ierr; 114079bdfe76SSatish Balay 1141d64ed03dSBarry Smith PetscFunctionBegin; 114298dd23e9SBarry Smith if (--mat->refct > 0) PetscFunctionReturn(0); 114398dd23e9SBarry Smith 114498dd23e9SBarry Smith if (mat->mapping) { 114598dd23e9SBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 114698dd23e9SBarry Smith } 114798dd23e9SBarry Smith if (mat->bmapping) { 114898dd23e9SBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr); 114998dd23e9SBarry Smith } 115061b13de0SBarry Smith if (mat->rmap) { 115161b13de0SBarry Smith ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 115261b13de0SBarry Smith } 115361b13de0SBarry Smith if (mat->cmap) { 115461b13de0SBarry Smith ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 115561b13de0SBarry Smith } 11563a40ed3dSBarry Smith #if defined(USE_PETSC_LOG) 1157e1311b90SBarry Smith PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N); 115879bdfe76SSatish Balay #endif 115979bdfe76SSatish Balay 116083e2fdc7SBarry Smith ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 116179bdfe76SSatish Balay PetscFree(baij->rowners); 116279bdfe76SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 116379bdfe76SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1164*48e59246SSatish Balay #if defined (USE_CTABLE) 1165*48e59246SSatish Balay if (baij->colmap) TableDelete(baij->colmap); 1166*48e59246SSatish Balay #else 116779bdfe76SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 1168*48e59246SSatish Balay #endif 116979bdfe76SSatish Balay if (baij->garray) PetscFree(baij->garray); 117079bdfe76SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 117179bdfe76SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 117279bdfe76SSatish Balay if (baij->rowvalues) PetscFree(baij->rowvalues); 117330793edcSSatish Balay if (baij->barray) PetscFree(baij->barray); 1174b9e4cc15SSatish Balay if (baij->hd) PetscFree(baij->hd); 117579bdfe76SSatish Balay PetscFree(baij); 117679bdfe76SSatish Balay PLogObjectDestroy(mat); 117779bdfe76SSatish Balay PetscHeaderDestroy(mat); 11783a40ed3dSBarry Smith PetscFunctionReturn(0); 117979bdfe76SSatish Balay } 118079bdfe76SSatish Balay 11815615d1e5SSatish Balay #undef __FUNC__ 11825615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ" 1183ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1184cee3aa6bSSatish Balay { 1185cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 118647b4a8eaSLois Curfman McInnes int ierr, nt; 1187cee3aa6bSSatish Balay 1188d64ed03dSBarry Smith PetscFunctionBegin; 1189e1311b90SBarry Smith ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 119047b4a8eaSLois Curfman McInnes if (nt != a->n) { 1191a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 119247b4a8eaSLois Curfman McInnes } 1193e1311b90SBarry Smith ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 119447b4a8eaSLois Curfman McInnes if (nt != a->m) { 1195a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 119647b4a8eaSLois Curfman McInnes } 119743a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1198f830108cSBarry Smith ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 119943a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1200f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 120143a90d84SBarry Smith ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 12023a40ed3dSBarry Smith PetscFunctionReturn(0); 1203cee3aa6bSSatish Balay } 1204cee3aa6bSSatish Balay 12055615d1e5SSatish Balay #undef __FUNC__ 12065615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ" 1207ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1208cee3aa6bSSatish Balay { 1209cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1210cee3aa6bSSatish Balay int ierr; 1211d64ed03dSBarry Smith 1212d64ed03dSBarry Smith PetscFunctionBegin; 121343a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1214f830108cSBarry Smith ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 121543a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1216f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 12173a40ed3dSBarry Smith PetscFunctionReturn(0); 1218cee3aa6bSSatish Balay } 1219cee3aa6bSSatish Balay 12205615d1e5SSatish Balay #undef __FUNC__ 12215615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ" 1222ec1ea8d8SLois Curfman McInnes int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1223cee3aa6bSSatish Balay { 1224cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1225cee3aa6bSSatish Balay int ierr; 1226cee3aa6bSSatish Balay 1227d64ed03dSBarry Smith PetscFunctionBegin; 1228cee3aa6bSSatish Balay /* do nondiagonal part */ 1229f830108cSBarry Smith ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1230cee3aa6bSSatish Balay /* send it on its way */ 1231537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1232cee3aa6bSSatish Balay /* do local part */ 1233f830108cSBarry Smith ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 1234cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1235cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 1236cee3aa6bSSatish Balay /* but is not perhaps always true. */ 1237639f9d9dSBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 12383a40ed3dSBarry Smith PetscFunctionReturn(0); 1239cee3aa6bSSatish Balay } 1240cee3aa6bSSatish Balay 12415615d1e5SSatish Balay #undef __FUNC__ 12425615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1243ec1ea8d8SLois Curfman McInnes int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1244cee3aa6bSSatish Balay { 1245cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1246cee3aa6bSSatish Balay int ierr; 1247cee3aa6bSSatish Balay 1248d64ed03dSBarry Smith PetscFunctionBegin; 1249cee3aa6bSSatish Balay /* do nondiagonal part */ 1250f830108cSBarry Smith ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1251cee3aa6bSSatish Balay /* send it on its way */ 1252537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1253cee3aa6bSSatish Balay /* do local part */ 1254f830108cSBarry Smith ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1255cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1256cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 1257cee3aa6bSSatish Balay /* but is not perhaps always true. */ 1258537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 12593a40ed3dSBarry Smith PetscFunctionReturn(0); 1260cee3aa6bSSatish Balay } 1261cee3aa6bSSatish Balay 1262cee3aa6bSSatish Balay /* 1263cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 1264cee3aa6bSSatish Balay diagonal block 1265cee3aa6bSSatish Balay */ 12665615d1e5SSatish Balay #undef __FUNC__ 12675615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1268ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1269cee3aa6bSSatish Balay { 1270cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 12713a40ed3dSBarry Smith int ierr; 1272d64ed03dSBarry Smith 1273d64ed03dSBarry Smith PetscFunctionBegin; 1274a8c6a408SBarry Smith if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 12753a40ed3dSBarry Smith ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 12763a40ed3dSBarry Smith PetscFunctionReturn(0); 1277cee3aa6bSSatish Balay } 1278cee3aa6bSSatish Balay 12795615d1e5SSatish Balay #undef __FUNC__ 12805615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ" 1281ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1282cee3aa6bSSatish Balay { 1283cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1284cee3aa6bSSatish Balay int ierr; 1285d64ed03dSBarry Smith 1286d64ed03dSBarry Smith PetscFunctionBegin; 1287cee3aa6bSSatish Balay ierr = MatScale(aa,a->A); CHKERRQ(ierr); 1288cee3aa6bSSatish Balay ierr = MatScale(aa,a->B); CHKERRQ(ierr); 12893a40ed3dSBarry Smith PetscFunctionReturn(0); 1290cee3aa6bSSatish Balay } 1291026e39d0SSatish Balay 12925615d1e5SSatish Balay #undef __FUNC__ 12935615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ" 1294ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 129557b952d6SSatish Balay { 129657b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1297d64ed03dSBarry Smith 1298d64ed03dSBarry Smith PetscFunctionBegin; 1299bd7f49f5SSatish Balay if (m) *m = mat->M; 1300bd7f49f5SSatish Balay if (n) *n = mat->N; 13013a40ed3dSBarry Smith PetscFunctionReturn(0); 130257b952d6SSatish Balay } 130357b952d6SSatish Balay 13045615d1e5SSatish Balay #undef __FUNC__ 13055615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1306ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 130757b952d6SSatish Balay { 130857b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1309d64ed03dSBarry Smith 1310d64ed03dSBarry Smith PetscFunctionBegin; 1311f830108cSBarry Smith *m = mat->m; *n = mat->n; 13123a40ed3dSBarry Smith PetscFunctionReturn(0); 131357b952d6SSatish Balay } 131457b952d6SSatish Balay 13155615d1e5SSatish Balay #undef __FUNC__ 13165615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1317ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 131857b952d6SSatish Balay { 131957b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1320d64ed03dSBarry Smith 1321d64ed03dSBarry Smith PetscFunctionBegin; 132257b952d6SSatish Balay *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 13233a40ed3dSBarry Smith PetscFunctionReturn(0); 132457b952d6SSatish Balay } 132557b952d6SSatish Balay 1326acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1327acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1328acdf5bf4SSatish Balay 13295615d1e5SSatish Balay #undef __FUNC__ 13305615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ" 1331acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1332acdf5bf4SSatish Balay { 1333acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1334acdf5bf4SSatish Balay Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1335acdf5bf4SSatish Balay int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1336d9d09a02SSatish Balay int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1337d9d09a02SSatish Balay int *cmap, *idx_p,cstart = mat->cstart; 1338acdf5bf4SSatish Balay 1339d64ed03dSBarry Smith PetscFunctionBegin; 1340a8c6a408SBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1341acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 1342acdf5bf4SSatish Balay 1343acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 1344acdf5bf4SSatish Balay /* 1345acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 1346acdf5bf4SSatish Balay */ 1347acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1348bd16c2feSSatish Balay int max = 1,mbs = mat->mbs,tmp; 1349bd16c2feSSatish Balay for ( i=0; i<mbs; i++ ) { 1350acdf5bf4SSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1351acdf5bf4SSatish Balay if (max < tmp) { max = tmp; } 1352acdf5bf4SSatish Balay } 1353acdf5bf4SSatish Balay mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 1354acdf5bf4SSatish Balay CHKPTRQ(mat->rowvalues); 1355acdf5bf4SSatish Balay mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1356acdf5bf4SSatish Balay } 1357acdf5bf4SSatish Balay 1358a8c6a408SBarry Smith if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1359d9d09a02SSatish Balay lrow = row - brstart; 1360acdf5bf4SSatish Balay 1361acdf5bf4SSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1362acdf5bf4SSatish Balay if (!v) {pvA = 0; pvB = 0;} 1363acdf5bf4SSatish Balay if (!idx) {pcA = 0; if (!v) pcB = 0;} 1364f830108cSBarry Smith ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1365f830108cSBarry Smith ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1366acdf5bf4SSatish Balay nztot = nzA + nzB; 1367acdf5bf4SSatish Balay 1368acdf5bf4SSatish Balay cmap = mat->garray; 1369acdf5bf4SSatish Balay if (v || idx) { 1370acdf5bf4SSatish Balay if (nztot) { 1371acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 1372acdf5bf4SSatish Balay int imark = -1; 1373acdf5bf4SSatish Balay if (v) { 1374acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 1375acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 1376d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1377acdf5bf4SSatish Balay else break; 1378acdf5bf4SSatish Balay } 1379acdf5bf4SSatish Balay imark = i; 1380acdf5bf4SSatish Balay for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1381acdf5bf4SSatish Balay for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1382acdf5bf4SSatish Balay } 1383acdf5bf4SSatish Balay if (idx) { 1384acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 1385acdf5bf4SSatish Balay if (imark > -1) { 1386acdf5bf4SSatish Balay for ( i=0; i<imark; i++ ) { 1387bd16c2feSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1388acdf5bf4SSatish Balay } 1389acdf5bf4SSatish Balay } else { 1390acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 1391d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) 1392d9d09a02SSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1393acdf5bf4SSatish Balay else break; 1394acdf5bf4SSatish Balay } 1395acdf5bf4SSatish Balay imark = i; 1396acdf5bf4SSatish Balay } 1397d9d09a02SSatish Balay for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1398d9d09a02SSatish Balay for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1399acdf5bf4SSatish Balay } 1400d64ed03dSBarry Smith } else { 1401d212a18eSSatish Balay if (idx) *idx = 0; 1402d212a18eSSatish Balay if (v) *v = 0; 1403d212a18eSSatish Balay } 1404acdf5bf4SSatish Balay } 1405acdf5bf4SSatish Balay *nz = nztot; 1406f830108cSBarry Smith ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1407f830108cSBarry Smith ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 14083a40ed3dSBarry Smith PetscFunctionReturn(0); 1409acdf5bf4SSatish Balay } 1410acdf5bf4SSatish Balay 14115615d1e5SSatish Balay #undef __FUNC__ 14125615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1413acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1414acdf5bf4SSatish Balay { 1415acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1416d64ed03dSBarry Smith 1417d64ed03dSBarry Smith PetscFunctionBegin; 1418acdf5bf4SSatish Balay if (baij->getrowactive == PETSC_FALSE) { 1419a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1420acdf5bf4SSatish Balay } 1421acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 14223a40ed3dSBarry Smith PetscFunctionReturn(0); 1423acdf5bf4SSatish Balay } 1424acdf5bf4SSatish Balay 14255615d1e5SSatish Balay #undef __FUNC__ 14265615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1427ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 14285a838052SSatish Balay { 14295a838052SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1430d64ed03dSBarry Smith 1431d64ed03dSBarry Smith PetscFunctionBegin; 14325a838052SSatish Balay *bs = baij->bs; 14333a40ed3dSBarry Smith PetscFunctionReturn(0); 14345a838052SSatish Balay } 14355a838052SSatish Balay 14365615d1e5SSatish Balay #undef __FUNC__ 14375615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1438ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A) 143958667388SSatish Balay { 144058667388SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 144158667388SSatish Balay int ierr; 1442d64ed03dSBarry Smith 1443d64ed03dSBarry Smith PetscFunctionBegin; 144458667388SSatish Balay ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 144558667388SSatish Balay ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 14463a40ed3dSBarry Smith PetscFunctionReturn(0); 144758667388SSatish Balay } 14480ac07820SSatish Balay 14495615d1e5SSatish Balay #undef __FUNC__ 14505615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ" 1451ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 14520ac07820SSatish Balay { 14534e220ebcSLois Curfman McInnes Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 14544e220ebcSLois Curfman McInnes Mat A = a->A, B = a->B; 14557d57db60SLois Curfman McInnes int ierr; 14567d57db60SLois Curfman McInnes double isend[5], irecv[5]; 14570ac07820SSatish Balay 1458d64ed03dSBarry Smith PetscFunctionBegin; 14594e220ebcSLois Curfman McInnes info->block_size = (double)a->bs; 14604e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 14610e4b21beSBarry Smith isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1462de87f314SBarry Smith isend[3] = info->memory; isend[4] = info->mallocs; 14634e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 14640e4b21beSBarry Smith isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1465de87f314SBarry Smith isend[3] += info->memory; isend[4] += info->mallocs; 14660ac07820SSatish Balay if (flag == MAT_LOCAL) { 14674e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 14684e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 14694e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 14704e220ebcSLois Curfman McInnes info->memory = isend[3]; 14714e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 14720ac07820SSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 1473f7cdd7c9SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 14744e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 14754e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 14764e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 14774e220ebcSLois Curfman McInnes info->memory = irecv[3]; 14784e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 14790ac07820SSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 1480f7cdd7c9SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 14814e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 14824e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 14834e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 14844e220ebcSLois Curfman McInnes info->memory = irecv[3]; 14854e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 14860ac07820SSatish Balay } 14875a5d4f66SBarry Smith info->rows_global = (double)a->M; 14885a5d4f66SBarry Smith info->columns_global = (double)a->N; 14895a5d4f66SBarry Smith info->rows_local = (double)a->m; 14905a5d4f66SBarry Smith info->columns_local = (double)a->N; 14914e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 14924e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 14934e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 14943a40ed3dSBarry Smith PetscFunctionReturn(0); 14950ac07820SSatish Balay } 14960ac07820SSatish Balay 14975615d1e5SSatish Balay #undef __FUNC__ 14985615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ" 1499ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op) 150058667388SSatish Balay { 150158667388SSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 150258667388SSatish Balay 1503d64ed03dSBarry Smith PetscFunctionBegin; 150458667388SSatish Balay if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 150558667388SSatish Balay op == MAT_YES_NEW_NONZERO_LOCATIONS || 15066da5968aSLois Curfman McInnes op == MAT_COLUMNS_UNSORTED || 1507c2653b3dSLois Curfman McInnes op == MAT_COLUMNS_SORTED || 150896854ed6SLois Curfman McInnes op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1509c2653b3dSLois Curfman McInnes op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1510b1fbbac0SLois Curfman McInnes MatSetOption(a->A,op); 1511b1fbbac0SLois Curfman McInnes MatSetOption(a->B,op); 1512b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROW_ORIENTED) { 1513aeafbbfcSLois Curfman McInnes a->roworiented = 1; 151458667388SSatish Balay MatSetOption(a->A,op); 151558667388SSatish Balay MatSetOption(a->B,op); 1516b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROWS_SORTED || 15176da5968aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 151858667388SSatish Balay op == MAT_SYMMETRIC || 151958667388SSatish Balay op == MAT_STRUCTURALLY_SYMMETRIC || 1520b51ba29fSSatish Balay op == MAT_YES_NEW_DIAGONALS || 1521b51ba29fSSatish Balay op == MAT_USE_HASH_TABLE) 152258667388SSatish Balay PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 152358667388SSatish Balay else if (op == MAT_COLUMN_ORIENTED) { 152458667388SSatish Balay a->roworiented = 0; 152558667388SSatish Balay MatSetOption(a->A,op); 152658667388SSatish Balay MatSetOption(a->B,op); 15272b362799SSatish Balay } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 152890f02eecSBarry Smith a->donotstash = 1; 1529d64ed03dSBarry Smith } else if (op == MAT_NO_NEW_DIAGONALS) { 1530d64ed03dSBarry Smith SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1531133cdb44SSatish Balay } else if (op == MAT_USE_HASH_TABLE) { 1532133cdb44SSatish Balay a->ht_flag = 1; 1533d64ed03dSBarry Smith } else { 1534d64ed03dSBarry Smith SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1535d64ed03dSBarry Smith } 15363a40ed3dSBarry Smith PetscFunctionReturn(0); 153758667388SSatish Balay } 153858667388SSatish Balay 15395615d1e5SSatish Balay #undef __FUNC__ 15405615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ(" 1541ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 15420ac07820SSatish Balay { 15430ac07820SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 15440ac07820SSatish Balay Mat_SeqBAIJ *Aloc; 15450ac07820SSatish Balay Mat B; 154640011551SBarry Smith int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 15470ac07820SSatish Balay int bs=baij->bs,mbs=baij->mbs; 15480ac07820SSatish Balay Scalar *a; 15490ac07820SSatish Balay 1550d64ed03dSBarry Smith PetscFunctionBegin; 1551a8c6a408SBarry Smith if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 15520ac07820SSatish Balay ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 15530ac07820SSatish Balay CHKERRQ(ierr); 15540ac07820SSatish Balay 15550ac07820SSatish Balay /* copy over the A part */ 15560ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 15570ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 15580ac07820SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 15590ac07820SSatish Balay 15600ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 15610ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 15620ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 15630ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 15640ac07820SSatish Balay col = (baij->cstart+aj[j])*bs; 15650ac07820SSatish Balay for (k=0; k<bs; k++ ) { 15660ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 15670ac07820SSatish Balay col++; a += bs; 15680ac07820SSatish Balay } 15690ac07820SSatish Balay } 15700ac07820SSatish Balay } 15710ac07820SSatish Balay /* copy over the B part */ 15720ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 15730ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 15740ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 15750ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 15760ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 15770ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 15780ac07820SSatish Balay col = baij->garray[aj[j]]*bs; 15790ac07820SSatish Balay for (k=0; k<bs; k++ ) { 15800ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 15810ac07820SSatish Balay col++; a += bs; 15820ac07820SSatish Balay } 15830ac07820SSatish Balay } 15840ac07820SSatish Balay } 15850ac07820SSatish Balay PetscFree(rvals); 15860ac07820SSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 15870ac07820SSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 15880ac07820SSatish Balay 15890ac07820SSatish Balay if (matout != PETSC_NULL) { 15900ac07820SSatish Balay *matout = B; 15910ac07820SSatish Balay } else { 1592f830108cSBarry Smith PetscOps *Abops; 1593cc2dc46cSBarry Smith MatOps Aops; 1594f830108cSBarry Smith 15950ac07820SSatish Balay /* This isn't really an in-place transpose .... but free data structures from baij */ 15960ac07820SSatish Balay PetscFree(baij->rowners); 15970ac07820SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 15980ac07820SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 15990ac07820SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 16000ac07820SSatish Balay if (baij->garray) PetscFree(baij->garray); 16010ac07820SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 16020ac07820SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 16030ac07820SSatish Balay PetscFree(baij); 1604f830108cSBarry Smith 1605f830108cSBarry Smith /* 1606f830108cSBarry Smith This is horrible, horrible code. We need to keep the 1607f830108cSBarry Smith A pointers for the bops and ops but copy everything 1608f830108cSBarry Smith else from C. 1609f830108cSBarry Smith */ 1610f830108cSBarry Smith Abops = A->bops; 1611f830108cSBarry Smith Aops = A->ops; 1612f09e8eb9SSatish Balay PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1613f830108cSBarry Smith A->bops = Abops; 1614f830108cSBarry Smith A->ops = Aops; 1615f830108cSBarry Smith 16160ac07820SSatish Balay PetscHeaderDestroy(B); 16170ac07820SSatish Balay } 16183a40ed3dSBarry Smith PetscFunctionReturn(0); 16190ac07820SSatish Balay } 16200e95ebc0SSatish Balay 16215615d1e5SSatish Balay #undef __FUNC__ 16225615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 16230e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 16240e95ebc0SSatish Balay { 16250e95ebc0SSatish Balay Mat a = ((Mat_MPIBAIJ *) A->data)->A; 16260e95ebc0SSatish Balay Mat b = ((Mat_MPIBAIJ *) A->data)->B; 16270e95ebc0SSatish Balay int ierr,s1,s2,s3; 16280e95ebc0SSatish Balay 1629d64ed03dSBarry Smith PetscFunctionBegin; 16300e95ebc0SSatish Balay if (ll) { 16310e95ebc0SSatish Balay ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 16320e95ebc0SSatish Balay ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1633a8c6a408SBarry Smith if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes"); 16340e95ebc0SSatish Balay ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 16350e95ebc0SSatish Balay ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 16360e95ebc0SSatish Balay } 1637a8c6a408SBarry Smith if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector"); 16383a40ed3dSBarry Smith PetscFunctionReturn(0); 16390e95ebc0SSatish Balay } 16400e95ebc0SSatish Balay 16415615d1e5SSatish Balay #undef __FUNC__ 16425615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ" 1643ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 16440ac07820SSatish Balay { 16450ac07820SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 16460ac07820SSatish Balay int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1647a07cd24cSSatish Balay int *procs,*nprocs,j,found,idx,nsends,*work,row; 16480ac07820SSatish Balay int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 16490ac07820SSatish Balay int *rvalues,tag = A->tag,count,base,slen,n,*source; 1650a07cd24cSSatish Balay int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 16510ac07820SSatish Balay MPI_Comm comm = A->comm; 16520ac07820SSatish Balay MPI_Request *send_waits,*recv_waits; 16530ac07820SSatish Balay MPI_Status recv_status,*send_status; 16540ac07820SSatish Balay IS istmp; 165572dacd9aSBarry Smith PetscTruth localdiag; 16560ac07820SSatish Balay 1657d64ed03dSBarry Smith PetscFunctionBegin; 16580ac07820SSatish Balay ierr = ISGetSize(is,&N); CHKERRQ(ierr); 16590ac07820SSatish Balay ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 16600ac07820SSatish Balay 16610ac07820SSatish Balay /* first count number of contributors to each processor */ 16620ac07820SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 16630ac07820SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 16640ac07820SSatish Balay owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 16650ac07820SSatish Balay for ( i=0; i<N; i++ ) { 16660ac07820SSatish Balay idx = rows[i]; 16670ac07820SSatish Balay found = 0; 16680ac07820SSatish Balay for ( j=0; j<size; j++ ) { 16690ac07820SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 16700ac07820SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 16710ac07820SSatish Balay } 16720ac07820SSatish Balay } 1673a8c6a408SBarry Smith if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 16740ac07820SSatish Balay } 16750ac07820SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 16760ac07820SSatish Balay 16770ac07820SSatish Balay /* inform other processors of number of messages and max length*/ 16780ac07820SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 1679ca161407SBarry Smith ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 16800ac07820SSatish Balay nrecvs = work[rank]; 1681ca161407SBarry Smith ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 16820ac07820SSatish Balay nmax = work[rank]; 16830ac07820SSatish Balay PetscFree(work); 16840ac07820SSatish Balay 16850ac07820SSatish Balay /* post receives: */ 1686d64ed03dSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues); 1687d64ed03dSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 16880ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 1689ca161407SBarry Smith ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 16900ac07820SSatish Balay } 16910ac07820SSatish Balay 16920ac07820SSatish Balay /* do sends: 16930ac07820SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 16940ac07820SSatish Balay the ith processor 16950ac07820SSatish Balay */ 16960ac07820SSatish Balay svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 1697ca161407SBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 16980ac07820SSatish Balay starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 16990ac07820SSatish Balay starts[0] = 0; 17000ac07820SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 17010ac07820SSatish Balay for ( i=0; i<N; i++ ) { 17020ac07820SSatish Balay svalues[starts[owner[i]]++] = rows[i]; 17030ac07820SSatish Balay } 17040ac07820SSatish Balay ISRestoreIndices(is,&rows); 17050ac07820SSatish Balay 17060ac07820SSatish Balay starts[0] = 0; 17070ac07820SSatish Balay for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 17080ac07820SSatish Balay count = 0; 17090ac07820SSatish Balay for ( i=0; i<size; i++ ) { 17100ac07820SSatish Balay if (procs[i]) { 1711ca161407SBarry Smith ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 17120ac07820SSatish Balay } 17130ac07820SSatish Balay } 17140ac07820SSatish Balay PetscFree(starts); 17150ac07820SSatish Balay 17160ac07820SSatish Balay base = owners[rank]*bs; 17170ac07820SSatish Balay 17180ac07820SSatish Balay /* wait on receives */ 17190ac07820SSatish Balay lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 17200ac07820SSatish Balay source = lens + nrecvs; 17210ac07820SSatish Balay count = nrecvs; slen = 0; 17220ac07820SSatish Balay while (count) { 1723ca161407SBarry Smith ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 17240ac07820SSatish Balay /* unpack receives into our local space */ 1725ca161407SBarry Smith ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 17260ac07820SSatish Balay source[imdex] = recv_status.MPI_SOURCE; 17270ac07820SSatish Balay lens[imdex] = n; 17280ac07820SSatish Balay slen += n; 17290ac07820SSatish Balay count--; 17300ac07820SSatish Balay } 17310ac07820SSatish Balay PetscFree(recv_waits); 17320ac07820SSatish Balay 17330ac07820SSatish Balay /* move the data into the send scatter */ 17340ac07820SSatish Balay lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 17350ac07820SSatish Balay count = 0; 17360ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 17370ac07820SSatish Balay values = rvalues + i*nmax; 17380ac07820SSatish Balay for ( j=0; j<lens[i]; j++ ) { 17390ac07820SSatish Balay lrows[count++] = values[j] - base; 17400ac07820SSatish Balay } 17410ac07820SSatish Balay } 17420ac07820SSatish Balay PetscFree(rvalues); PetscFree(lens); 17430ac07820SSatish Balay PetscFree(owner); PetscFree(nprocs); 17440ac07820SSatish Balay 17450ac07820SSatish Balay /* actually zap the local rows */ 1746029af93fSBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 17470ac07820SSatish Balay PLogObjectParent(A,istmp); 1748a07cd24cSSatish Balay 174972dacd9aSBarry Smith /* 175072dacd9aSBarry Smith Zero the required rows. If the "diagonal block" of the matrix 175172dacd9aSBarry Smith is square and the user wishes to set the diagonal we use seperate 175272dacd9aSBarry Smith code so that MatSetValues() is not called for each diagonal allocating 175372dacd9aSBarry Smith new memory, thus calling lots of mallocs and slowing things down. 175472dacd9aSBarry Smith 175572dacd9aSBarry Smith Contributed by: Mathew Knepley 175672dacd9aSBarry Smith */ 175772dacd9aSBarry Smith localdiag = PETSC_FALSE; 175872dacd9aSBarry Smith if (diag && (l->A->M == l->A->N)) { 175972dacd9aSBarry Smith localdiag = PETSC_TRUE; 176072dacd9aSBarry Smith ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 176172dacd9aSBarry Smith } else { 1762a07cd24cSSatish Balay ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 176372dacd9aSBarry Smith } 17640ac07820SSatish Balay ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 17650ac07820SSatish Balay ierr = ISDestroy(istmp); CHKERRQ(ierr); 17660ac07820SSatish Balay 176772dacd9aSBarry Smith if (diag && (localdiag == PETSC_FALSE)) { 1768a07cd24cSSatish Balay for ( i = 0; i < slen; i++ ) { 1769a07cd24cSSatish Balay row = lrows[i] + rstart_bs; 1770a07cd24cSSatish Balay ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); CHKERRQ(ierr); 1771a07cd24cSSatish Balay } 1772a07cd24cSSatish Balay ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1773a07cd24cSSatish Balay ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1774a07cd24cSSatish Balay } 1775a07cd24cSSatish Balay PetscFree(lrows); 1776a07cd24cSSatish Balay 17770ac07820SSatish Balay /* wait on sends */ 17780ac07820SSatish Balay if (nsends) { 1779d64ed03dSBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1780ca161407SBarry Smith ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 17810ac07820SSatish Balay PetscFree(send_status); 17820ac07820SSatish Balay } 17830ac07820SSatish Balay PetscFree(send_waits); PetscFree(svalues); 17840ac07820SSatish Balay 17853a40ed3dSBarry Smith PetscFunctionReturn(0); 17860ac07820SSatish Balay } 178772dacd9aSBarry Smith 1788ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat); 17895615d1e5SSatish Balay #undef __FUNC__ 17905615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1791ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A) 1792ba4ca20aSSatish Balay { 1793ba4ca20aSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 179425fdafccSSatish Balay MPI_Comm comm = A->comm; 1795133cdb44SSatish Balay static int called = 0; 17963a40ed3dSBarry Smith int ierr; 1797ba4ca20aSSatish Balay 1798d64ed03dSBarry Smith PetscFunctionBegin; 17993a40ed3dSBarry Smith if (!a->rank) { 18003a40ed3dSBarry Smith ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 180125fdafccSSatish Balay } 180225fdafccSSatish Balay if (called) {PetscFunctionReturn(0);} else called = 1; 1803133cdb44SSatish Balay (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n"); 1804133cdb44SSatish Balay (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n"); 18053a40ed3dSBarry Smith PetscFunctionReturn(0); 1806ba4ca20aSSatish Balay } 18070ac07820SSatish Balay 18085615d1e5SSatish Balay #undef __FUNC__ 18095615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1810ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A) 1811bb5a7306SBarry Smith { 1812bb5a7306SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1813bb5a7306SBarry Smith int ierr; 1814d64ed03dSBarry Smith 1815d64ed03dSBarry Smith PetscFunctionBegin; 1816bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 18173a40ed3dSBarry Smith PetscFunctionReturn(0); 1818bb5a7306SBarry Smith } 1819bb5a7306SBarry Smith 18202e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 18210ac07820SSatish Balay 182279bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 1823cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = { 1824cc2dc46cSBarry Smith MatSetValues_MPIBAIJ, 1825cc2dc46cSBarry Smith MatGetRow_MPIBAIJ, 1826cc2dc46cSBarry Smith MatRestoreRow_MPIBAIJ, 1827cc2dc46cSBarry Smith MatMult_MPIBAIJ, 1828cc2dc46cSBarry Smith MatMultAdd_MPIBAIJ, 1829cc2dc46cSBarry Smith MatMultTrans_MPIBAIJ, 1830cc2dc46cSBarry Smith MatMultTransAdd_MPIBAIJ, 1831cc2dc46cSBarry Smith 0, 1832cc2dc46cSBarry Smith 0, 1833cc2dc46cSBarry Smith 0, 1834cc2dc46cSBarry Smith 0, 1835cc2dc46cSBarry Smith 0, 1836cc2dc46cSBarry Smith 0, 1837cc2dc46cSBarry Smith 0, 1838cc2dc46cSBarry Smith MatTranspose_MPIBAIJ, 1839cc2dc46cSBarry Smith MatGetInfo_MPIBAIJ, 1840cc2dc46cSBarry Smith 0, 1841cc2dc46cSBarry Smith MatGetDiagonal_MPIBAIJ, 1842cc2dc46cSBarry Smith MatDiagonalScale_MPIBAIJ, 1843cc2dc46cSBarry Smith MatNorm_MPIBAIJ, 1844cc2dc46cSBarry Smith MatAssemblyBegin_MPIBAIJ, 1845cc2dc46cSBarry Smith MatAssemblyEnd_MPIBAIJ, 1846cc2dc46cSBarry Smith 0, 1847cc2dc46cSBarry Smith MatSetOption_MPIBAIJ, 1848cc2dc46cSBarry Smith MatZeroEntries_MPIBAIJ, 1849cc2dc46cSBarry Smith MatZeroRows_MPIBAIJ, 1850cc2dc46cSBarry Smith 0, 1851cc2dc46cSBarry Smith 0, 1852cc2dc46cSBarry Smith 0, 1853cc2dc46cSBarry Smith 0, 1854cc2dc46cSBarry Smith MatGetSize_MPIBAIJ, 1855cc2dc46cSBarry Smith MatGetLocalSize_MPIBAIJ, 1856cc2dc46cSBarry Smith MatGetOwnershipRange_MPIBAIJ, 1857cc2dc46cSBarry Smith 0, 1858cc2dc46cSBarry Smith 0, 1859cc2dc46cSBarry Smith 0, 1860cc2dc46cSBarry Smith 0, 18612e8a6d31SBarry Smith MatDuplicate_MPIBAIJ, 1862cc2dc46cSBarry Smith 0, 1863cc2dc46cSBarry Smith 0, 1864cc2dc46cSBarry Smith 0, 1865cc2dc46cSBarry Smith 0, 1866cc2dc46cSBarry Smith 0, 1867cc2dc46cSBarry Smith MatGetSubMatrices_MPIBAIJ, 1868cc2dc46cSBarry Smith MatIncreaseOverlap_MPIBAIJ, 1869cc2dc46cSBarry Smith MatGetValues_MPIBAIJ, 1870cc2dc46cSBarry Smith 0, 1871cc2dc46cSBarry Smith MatPrintHelp_MPIBAIJ, 1872cc2dc46cSBarry Smith MatScale_MPIBAIJ, 1873cc2dc46cSBarry Smith 0, 1874cc2dc46cSBarry Smith 0, 1875cc2dc46cSBarry Smith 0, 1876cc2dc46cSBarry Smith MatGetBlockSize_MPIBAIJ, 1877cc2dc46cSBarry Smith 0, 1878cc2dc46cSBarry Smith 0, 1879cc2dc46cSBarry Smith 0, 1880cc2dc46cSBarry Smith 0, 1881cc2dc46cSBarry Smith 0, 1882cc2dc46cSBarry Smith 0, 1883cc2dc46cSBarry Smith MatSetUnfactored_MPIBAIJ, 1884cc2dc46cSBarry Smith 0, 1885cc2dc46cSBarry Smith MatSetValuesBlocked_MPIBAIJ, 1886cc2dc46cSBarry Smith 0, 1887cc2dc46cSBarry Smith 0, 1888cc2dc46cSBarry Smith 0, 1889cc2dc46cSBarry Smith MatGetMaps_Petsc}; 189079bdfe76SSatish Balay 1891e18c124aSSatish Balay #include "pc.h" 1892e18c124aSSatish Balay EXTERN_C_BEGIN 1893e18c124aSSatish Balay extern int PCSetUp_BJacobi_BAIJ(PC); 1894e18c124aSSatish Balay EXTERN_C_END 189579bdfe76SSatish Balay 18965615d1e5SSatish Balay #undef __FUNC__ 18975615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ" 189879bdfe76SSatish Balay /*@C 189979bdfe76SSatish Balay MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 190079bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 190179bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 190279bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 190379bdfe76SSatish Balay performance can be increased by more than a factor of 50. 190479bdfe76SSatish Balay 1905db81eaa0SLois Curfman McInnes Collective on MPI_Comm 1906db81eaa0SLois Curfman McInnes 190779bdfe76SSatish Balay Input Parameters: 1908db81eaa0SLois Curfman McInnes + comm - MPI communicator 190979bdfe76SSatish Balay . bs - size of blockk 191079bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 191192e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 191292e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 191392e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 191492e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 191592e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 1916be79a94dSBarry Smith . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1917be79a94dSBarry Smith . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 191879bdfe76SSatish Balay . d_nz - number of block nonzeros per block row in diagonal portion of local 191979bdfe76SSatish Balay submatrix (same for all local rows) 192092e8d321SLois Curfman McInnes . d_nzz - array containing the number of block nonzeros in the various block rows 192192e8d321SLois Curfman McInnes of the in diagonal portion of the local (possibly different for each block 1922db81eaa0SLois Curfman McInnes row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 192392e8d321SLois Curfman McInnes . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 192479bdfe76SSatish Balay submatrix (same for all local rows). 1925db81eaa0SLois Curfman McInnes - o_nzz - array containing the number of nonzeros in the various block rows of the 192692e8d321SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 192792e8d321SLois Curfman McInnes each block row) or PETSC_NULL. 192879bdfe76SSatish Balay 192979bdfe76SSatish Balay Output Parameter: 193079bdfe76SSatish Balay . A - the matrix 193179bdfe76SSatish Balay 1932db81eaa0SLois Curfman McInnes Options Database Keys: 1933db81eaa0SLois Curfman McInnes . -mat_no_unroll - uses code that does not unroll the loops in the 1934db81eaa0SLois Curfman McInnes block calculations (much slower) 1935db81eaa0SLois Curfman McInnes . -mat_block_size - size of the blocks to use 1936494eafd4SBarry Smith . -mat_mpi - use the parallel matrix data structures even on one processor 1937494eafd4SBarry Smith (defaults to using SeqBAIJ format on one processor) 19383ffaccefSLois Curfman McInnes 1939b259b22eSLois Curfman McInnes Notes: 194079bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 194179bdfe76SSatish Balay (possibly both). 194279bdfe76SSatish Balay 1943be79a94dSBarry Smith If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1944be79a94dSBarry Smith than it must be used on all processors that share the object for that argument. 1945be79a94dSBarry Smith 194679bdfe76SSatish Balay Storage Information: 194779bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 194879bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 194979bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 195079bdfe76SSatish Balay local matrix (a rectangular submatrix). 195179bdfe76SSatish Balay 195279bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 195379bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 195479bdfe76SSatish Balay d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 195579bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 195679bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 195779bdfe76SSatish Balay 195879bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 195979bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 196079bdfe76SSatish Balay 1961db81eaa0SLois Curfman McInnes .vb 1962db81eaa0SLois Curfman McInnes 0 1 2 3 4 5 6 7 8 9 10 11 1963db81eaa0SLois Curfman McInnes ------------------- 1964db81eaa0SLois Curfman McInnes row 3 | o o o d d d o o o o o o 1965db81eaa0SLois Curfman McInnes row 4 | o o o d d d o o o o o o 1966db81eaa0SLois Curfman McInnes row 5 | o o o d d d o o o o o o 1967db81eaa0SLois Curfman McInnes ------------------- 1968db81eaa0SLois Curfman McInnes .ve 196979bdfe76SSatish Balay 197079bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 197179bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 197279bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 197357b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 197479bdfe76SSatish Balay 1975d64ed03dSBarry Smith Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1976d64ed03dSBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix. 197779bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 197892e8d321SLois Curfman McInnes one expects d_nz >> o_nz. For large problems you MUST preallocate memory 197992e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 19806da5968aSLois Curfman McInnes matrices. 198179bdfe76SSatish Balay 198292e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel 198379bdfe76SSatish Balay 1984db81eaa0SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ() 198579bdfe76SSatish Balay @*/ 198679bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 198779bdfe76SSatish Balay int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 198879bdfe76SSatish Balay { 198979bdfe76SSatish Balay Mat B; 199079bdfe76SSatish Balay Mat_MPIBAIJ *b; 1991133cdb44SSatish Balay int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg; 1992a2ab621fSBarry Smith int flag1 = 0,flag2 = 0; 199379bdfe76SSatish Balay 1994d64ed03dSBarry Smith PetscFunctionBegin; 1995a8c6a408SBarry Smith if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 19963914022bSBarry Smith 19973914022bSBarry Smith MPI_Comm_size(comm,&size); 1998494eafd4SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1); CHKERRQ(ierr); 1999494eafd4SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr); 2000494eafd4SBarry Smith if (!flag1 && !flag2 && size == 1) { 20013914022bSBarry Smith if (M == PETSC_DECIDE) M = m; 20023914022bSBarry Smith if (N == PETSC_DECIDE) N = n; 20033914022bSBarry Smith ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 20043a40ed3dSBarry Smith PetscFunctionReturn(0); 20053914022bSBarry Smith } 20063914022bSBarry Smith 200779bdfe76SSatish Balay *A = 0; 20083f1db9ecSBarry Smith PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView); 200979bdfe76SSatish Balay PLogObjectCreate(B); 201079bdfe76SSatish Balay B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 201179bdfe76SSatish Balay PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 2012cc2dc46cSBarry Smith PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)); 20134c50302cSBarry Smith 2014e1311b90SBarry Smith B->ops->destroy = MatDestroy_MPIBAIJ; 2015e1311b90SBarry Smith B->ops->view = MatView_MPIBAIJ; 201690f02eecSBarry Smith B->mapping = 0; 201779bdfe76SSatish Balay B->factor = 0; 201879bdfe76SSatish Balay B->assembled = PETSC_FALSE; 201979bdfe76SSatish Balay 2020e0fa3b82SLois Curfman McInnes B->insertmode = NOT_SET_VALUES; 202179bdfe76SSatish Balay MPI_Comm_rank(comm,&b->rank); 202279bdfe76SSatish Balay MPI_Comm_size(comm,&b->size); 202379bdfe76SSatish Balay 2024d64ed03dSBarry Smith if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 2025a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 2026d64ed03dSBarry Smith } 2027a8c6a408SBarry Smith if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 2028a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 2029a8c6a408SBarry Smith } 2030a8c6a408SBarry Smith if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 2031a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 2032a8c6a408SBarry Smith } 2033cee3aa6bSSatish Balay if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 2034cee3aa6bSSatish Balay if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 203579bdfe76SSatish Balay 203679bdfe76SSatish Balay if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 203779bdfe76SSatish Balay work[0] = m; work[1] = n; 203879bdfe76SSatish Balay mbs = m/bs; nbs = n/bs; 2039ca161407SBarry Smith ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 204079bdfe76SSatish Balay if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 204179bdfe76SSatish Balay if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 204279bdfe76SSatish Balay } 204379bdfe76SSatish Balay if (m == PETSC_DECIDE) { 204479bdfe76SSatish Balay Mbs = M/bs; 2045a8c6a408SBarry Smith if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 204679bdfe76SSatish Balay mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 204779bdfe76SSatish Balay m = mbs*bs; 204879bdfe76SSatish Balay } 204979bdfe76SSatish Balay if (n == PETSC_DECIDE) { 205079bdfe76SSatish Balay Nbs = N/bs; 2051a8c6a408SBarry Smith if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 205279bdfe76SSatish Balay nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 205379bdfe76SSatish Balay n = nbs*bs; 205479bdfe76SSatish Balay } 2055a8c6a408SBarry Smith if (mbs*bs != m || nbs*bs != n) { 2056a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 2057a8c6a408SBarry Smith } 205879bdfe76SSatish Balay 205979bdfe76SSatish Balay b->m = m; B->m = m; 206079bdfe76SSatish Balay b->n = n; B->n = n; 206179bdfe76SSatish Balay b->N = N; B->N = N; 206279bdfe76SSatish Balay b->M = M; B->M = M; 206379bdfe76SSatish Balay b->bs = bs; 206479bdfe76SSatish Balay b->bs2 = bs*bs; 206579bdfe76SSatish Balay b->mbs = mbs; 206679bdfe76SSatish Balay b->nbs = nbs; 206779bdfe76SSatish Balay b->Mbs = Mbs; 206879bdfe76SSatish Balay b->Nbs = Nbs; 206979bdfe76SSatish Balay 2070c7fcc2eaSBarry Smith /* the information in the maps duplicates the information computed below, eventually 2071c7fcc2eaSBarry Smith we should remove the duplicate information that is not contained in the maps */ 2072488ecbafSBarry Smith ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 2073488ecbafSBarry Smith ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 2074c7fcc2eaSBarry Smith 207579bdfe76SSatish Balay /* build local table of row and column ownerships */ 207679bdfe76SSatish Balay b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 2077f09e8eb9SSatish Balay PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 20780ac07820SSatish Balay b->cowners = b->rowners + b->size + 2; 2079ca161407SBarry Smith ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 208079bdfe76SSatish Balay b->rowners[0] = 0; 208179bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 208279bdfe76SSatish Balay b->rowners[i] += b->rowners[i-1]; 208379bdfe76SSatish Balay } 208479bdfe76SSatish Balay b->rstart = b->rowners[b->rank]; 208579bdfe76SSatish Balay b->rend = b->rowners[b->rank+1]; 20864fa0d573SSatish Balay b->rstart_bs = b->rstart * bs; 20874fa0d573SSatish Balay b->rend_bs = b->rend * bs; 20884fa0d573SSatish Balay 2089ca161407SBarry Smith ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 209079bdfe76SSatish Balay b->cowners[0] = 0; 209179bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 209279bdfe76SSatish Balay b->cowners[i] += b->cowners[i-1]; 209379bdfe76SSatish Balay } 209479bdfe76SSatish Balay b->cstart = b->cowners[b->rank]; 209579bdfe76SSatish Balay b->cend = b->cowners[b->rank+1]; 20964fa0d573SSatish Balay b->cstart_bs = b->cstart * bs; 20974fa0d573SSatish Balay b->cend_bs = b->cend * bs; 209879bdfe76SSatish Balay 2099a07cd24cSSatish Balay 210079bdfe76SSatish Balay if (d_nz == PETSC_DEFAULT) d_nz = 5; 2101029af93fSBarry Smith ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 210279bdfe76SSatish Balay PLogObjectParent(B,b->A); 210379bdfe76SSatish Balay if (o_nz == PETSC_DEFAULT) o_nz = 0; 2104029af93fSBarry Smith ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 210579bdfe76SSatish Balay PLogObjectParent(B,b->B); 210679bdfe76SSatish Balay 210779bdfe76SSatish Balay /* build cache for off array entries formed */ 210879bdfe76SSatish Balay ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 210990f02eecSBarry Smith b->donotstash = 0; 211079bdfe76SSatish Balay b->colmap = 0; 211179bdfe76SSatish Balay b->garray = 0; 211279bdfe76SSatish Balay b->roworiented = 1; 211379bdfe76SSatish Balay 211430793edcSSatish Balay /* stuff used in block assembly */ 211530793edcSSatish Balay b->barray = 0; 211630793edcSSatish Balay 211779bdfe76SSatish Balay /* stuff used for matrix vector multiply */ 211879bdfe76SSatish Balay b->lvec = 0; 211979bdfe76SSatish Balay b->Mvctx = 0; 212079bdfe76SSatish Balay 212179bdfe76SSatish Balay /* stuff for MatGetRow() */ 212279bdfe76SSatish Balay b->rowindices = 0; 212379bdfe76SSatish Balay b->rowvalues = 0; 212479bdfe76SSatish Balay b->getrowactive = PETSC_FALSE; 212579bdfe76SSatish Balay 2126a07cd24cSSatish Balay /* hash table stuff */ 2127a07cd24cSSatish Balay b->ht = 0; 2128187ce0cbSSatish Balay b->hd = 0; 21290bdbc534SSatish Balay b->ht_size = 0; 2130133cdb44SSatish Balay b->ht_flag = 0; 213125fdafccSSatish Balay b->ht_fact = 0; 2132187ce0cbSSatish Balay b->ht_total_ct = 0; 2133187ce0cbSSatish Balay b->ht_insert_ct = 0; 2134a07cd24cSSatish Balay 213579bdfe76SSatish Balay *A = B; 2136133cdb44SSatish Balay ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg); CHKERRQ(ierr); 2137133cdb44SSatish Balay if (flg) { 2138133cdb44SSatish Balay double fact = 1.39; 2139133cdb44SSatish Balay ierr = MatSetOption(B,MAT_USE_HASH_TABLE); CHKERRQ(ierr); 2140133cdb44SSatish Balay ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg); CHKERRQ(ierr); 2141133cdb44SSatish Balay if (fact <= 1.0) fact = 1.39; 2142133cdb44SSatish Balay ierr = MatMPIBAIJSetHashTableFactor(B,fact); CHKERRQ(ierr); 2143133cdb44SSatish Balay PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2144133cdb44SSatish Balay } 2145e18c124aSSatish Balay ierr = PetscObjectComposeFunction((PetscObject)B,"PCSetUp_BJacobi_C", 2146e18c124aSSatish Balay "PCSetUp_BJacobi_BAIJ", 2147e18c124aSSatish Balay (void*)PCSetUp_BJacobi_BAIJ);CHKERRQ(ierr); 21483a40ed3dSBarry Smith PetscFunctionReturn(0); 214979bdfe76SSatish Balay } 2150026e39d0SSatish Balay 21515615d1e5SSatish Balay #undef __FUNC__ 21522e8a6d31SBarry Smith #define __FUNC__ "MatDuplicate_MPIBAIJ" 21532e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 21540ac07820SSatish Balay { 21550ac07820SSatish Balay Mat mat; 21560ac07820SSatish Balay Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 21570ac07820SSatish Balay int ierr, len=0, flg; 21580ac07820SSatish Balay 2159d64ed03dSBarry Smith PetscFunctionBegin; 21600ac07820SSatish Balay *newmat = 0; 21613f1db9ecSBarry Smith PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView); 21620ac07820SSatish Balay PLogObjectCreate(mat); 21630ac07820SSatish Balay mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 2164cc2dc46cSBarry Smith PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)); 2165e1311b90SBarry Smith mat->ops->destroy = MatDestroy_MPIBAIJ; 2166e1311b90SBarry Smith mat->ops->view = MatView_MPIBAIJ; 21670ac07820SSatish Balay mat->factor = matin->factor; 21680ac07820SSatish Balay mat->assembled = PETSC_TRUE; 21690ac07820SSatish Balay 21700ac07820SSatish Balay a->m = mat->m = oldmat->m; 21710ac07820SSatish Balay a->n = mat->n = oldmat->n; 21720ac07820SSatish Balay a->M = mat->M = oldmat->M; 21730ac07820SSatish Balay a->N = mat->N = oldmat->N; 21740ac07820SSatish Balay 21750ac07820SSatish Balay a->bs = oldmat->bs; 21760ac07820SSatish Balay a->bs2 = oldmat->bs2; 21770ac07820SSatish Balay a->mbs = oldmat->mbs; 21780ac07820SSatish Balay a->nbs = oldmat->nbs; 21790ac07820SSatish Balay a->Mbs = oldmat->Mbs; 21800ac07820SSatish Balay a->Nbs = oldmat->Nbs; 21810ac07820SSatish Balay 21820ac07820SSatish Balay a->rstart = oldmat->rstart; 21830ac07820SSatish Balay a->rend = oldmat->rend; 21840ac07820SSatish Balay a->cstart = oldmat->cstart; 21850ac07820SSatish Balay a->cend = oldmat->cend; 21860ac07820SSatish Balay a->size = oldmat->size; 21870ac07820SSatish Balay a->rank = oldmat->rank; 2188e0fa3b82SLois Curfman McInnes mat->insertmode = NOT_SET_VALUES; 21890ac07820SSatish Balay a->rowvalues = 0; 21900ac07820SSatish Balay a->getrowactive = PETSC_FALSE; 219130793edcSSatish Balay a->barray = 0; 21920ac07820SSatish Balay 2193133cdb44SSatish Balay /* hash table stuff */ 2194133cdb44SSatish Balay a->ht = 0; 2195133cdb44SSatish Balay a->hd = 0; 2196133cdb44SSatish Balay a->ht_size = 0; 2197133cdb44SSatish Balay a->ht_flag = oldmat->ht_flag; 219825fdafccSSatish Balay a->ht_fact = oldmat->ht_fact; 2199133cdb44SSatish Balay a->ht_total_ct = 0; 2200133cdb44SSatish Balay a->ht_insert_ct = 0; 2201133cdb44SSatish Balay 2202133cdb44SSatish Balay 22030ac07820SSatish Balay a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 2204f09e8eb9SSatish Balay PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 22050ac07820SSatish Balay a->cowners = a->rowners + a->size + 2; 22060ac07820SSatish Balay PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 22070ac07820SSatish Balay ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 22080ac07820SSatish Balay if (oldmat->colmap) { 2209*48e59246SSatish Balay #if defined (USE_CTABLE) 2210*48e59246SSatish Balay ierr = TableCreateCopy( &a->colmap,oldmat->colmap ); CHKERRQ(ierr); 2211*48e59246SSatish Balay #else 22120ac07820SSatish Balay a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 22130ac07820SSatish Balay PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 22140ac07820SSatish Balay PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 2215*48e59246SSatish Balay #endif 22160ac07820SSatish Balay } else a->colmap = 0; 22170ac07820SSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 22180ac07820SSatish Balay a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 22190ac07820SSatish Balay PLogObjectMemory(mat,len*sizeof(int)); 22200ac07820SSatish Balay PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 22210ac07820SSatish Balay } else a->garray = 0; 22220ac07820SSatish Balay 22230ac07820SSatish Balay ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 22240ac07820SSatish Balay PLogObjectParent(mat,a->lvec); 22250ac07820SSatish Balay ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 2226e18c124aSSatish Balay 22270ac07820SSatish Balay PLogObjectParent(mat,a->Mvctx); 22282e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr); 22290ac07820SSatish Balay PLogObjectParent(mat,a->A); 22302e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr); 22310ac07820SSatish Balay PLogObjectParent(mat,a->B); 22320ac07820SSatish Balay ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 2233e18c124aSSatish Balay ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 22340ac07820SSatish Balay if (flg) { 22350ac07820SSatish Balay ierr = MatPrintHelp(mat); CHKERRQ(ierr); 22360ac07820SSatish Balay } 22370ac07820SSatish Balay *newmat = mat; 22383a40ed3dSBarry Smith PetscFunctionReturn(0); 22390ac07820SSatish Balay } 224057b952d6SSatish Balay 224157b952d6SSatish Balay #include "sys.h" 224257b952d6SSatish Balay 22435615d1e5SSatish Balay #undef __FUNC__ 22445615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ" 224557b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 224657b952d6SSatish Balay { 224757b952d6SSatish Balay Mat A; 224857b952d6SSatish Balay int i, nz, ierr, j,rstart, rend, fd; 224957b952d6SSatish Balay Scalar *vals,*buf; 225057b952d6SSatish Balay MPI_Comm comm = ((PetscObject)viewer)->comm; 225157b952d6SSatish Balay MPI_Status status; 2252cee3aa6bSSatish Balay int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 225357b952d6SSatish Balay int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 225440011551SBarry Smith int flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 225557b952d6SSatish Balay int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 225657b952d6SSatish Balay int dcount,kmax,k,nzcount,tmp; 225757b952d6SSatish Balay 2258d64ed03dSBarry Smith PetscFunctionBegin; 225957b952d6SSatish Balay ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 226057b952d6SSatish Balay 226157b952d6SSatish Balay MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 226257b952d6SSatish Balay if (!rank) { 226357b952d6SSatish Balay ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2264e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 2265a8c6a408SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2266d64ed03dSBarry Smith if (header[3] < 0) { 2267a8c6a408SBarry Smith SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2268d64ed03dSBarry Smith } 22696c5fab8fSBarry Smith } 2270d64ed03dSBarry Smith 2271ca161407SBarry Smith ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 227257b952d6SSatish Balay M = header[1]; N = header[2]; 227357b952d6SSatish Balay 2274a8c6a408SBarry Smith if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 227557b952d6SSatish Balay 227657b952d6SSatish Balay /* 227757b952d6SSatish Balay This code adds extra rows to make sure the number of rows is 227857b952d6SSatish Balay divisible by the blocksize 227957b952d6SSatish Balay */ 228057b952d6SSatish Balay Mbs = M/bs; 228157b952d6SSatish Balay extra_rows = bs - M + bs*(Mbs); 228257b952d6SSatish Balay if (extra_rows == bs) extra_rows = 0; 228357b952d6SSatish Balay else Mbs++; 228457b952d6SSatish Balay if (extra_rows &&!rank) { 2285b0267e0aSLois Curfman McInnes PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 228657b952d6SSatish Balay } 2287537820f0SBarry Smith 228857b952d6SSatish Balay /* determine ownership of all rows */ 228957b952d6SSatish Balay mbs = Mbs/size + ((Mbs % size) > rank); 229057b952d6SSatish Balay m = mbs * bs; 2291cee3aa6bSSatish Balay rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 2292cee3aa6bSSatish Balay browners = rowners + size + 1; 2293ca161407SBarry Smith ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 229457b952d6SSatish Balay rowners[0] = 0; 2295cee3aa6bSSatish Balay for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2296cee3aa6bSSatish Balay for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 229757b952d6SSatish Balay rstart = rowners[rank]; 229857b952d6SSatish Balay rend = rowners[rank+1]; 229957b952d6SSatish Balay 230057b952d6SSatish Balay /* distribute row lengths to all processors */ 230157b952d6SSatish Balay locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 230257b952d6SSatish Balay if (!rank) { 230357b952d6SSatish Balay rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 2304e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 230557b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 230657b952d6SSatish Balay sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 2307cee3aa6bSSatish Balay for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2308ca161407SBarry Smith ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 230957b952d6SSatish Balay PetscFree(sndcounts); 2310d64ed03dSBarry Smith } else { 2311ca161407SBarry Smith ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 231257b952d6SSatish Balay } 231357b952d6SSatish Balay 231457b952d6SSatish Balay if (!rank) { 231557b952d6SSatish Balay /* calculate the number of nonzeros on each processor */ 231657b952d6SSatish Balay procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 231757b952d6SSatish Balay PetscMemzero(procsnz,size*sizeof(int)); 231857b952d6SSatish Balay for ( i=0; i<size; i++ ) { 231957b952d6SSatish Balay for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 232057b952d6SSatish Balay procsnz[i] += rowlengths[j]; 232157b952d6SSatish Balay } 232257b952d6SSatish Balay } 232357b952d6SSatish Balay PetscFree(rowlengths); 232457b952d6SSatish Balay 232557b952d6SSatish Balay /* determine max buffer needed and allocate it */ 232657b952d6SSatish Balay maxnz = 0; 232757b952d6SSatish Balay for ( i=0; i<size; i++ ) { 232857b952d6SSatish Balay maxnz = PetscMax(maxnz,procsnz[i]); 232957b952d6SSatish Balay } 233057b952d6SSatish Balay cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 233157b952d6SSatish Balay 233257b952d6SSatish Balay /* read in my part of the matrix column indices */ 233357b952d6SSatish Balay nz = procsnz[0]; 233457b952d6SSatish Balay ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 233557b952d6SSatish Balay mycols = ibuf; 2336cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 2337e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 2338cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2339cee3aa6bSSatish Balay 234057b952d6SSatish Balay /* read in every ones (except the last) and ship off */ 234157b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 234257b952d6SSatish Balay nz = procsnz[i]; 2343e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2344ca161407SBarry Smith ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 234557b952d6SSatish Balay } 234657b952d6SSatish Balay /* read in the stuff for the last proc */ 234757b952d6SSatish Balay if ( size != 1 ) { 234857b952d6SSatish Balay nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2349e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 235057b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2351ca161407SBarry Smith ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 235257b952d6SSatish Balay } 235357b952d6SSatish Balay PetscFree(cols); 2354d64ed03dSBarry Smith } else { 235557b952d6SSatish Balay /* determine buffer space needed for message */ 235657b952d6SSatish Balay nz = 0; 235757b952d6SSatish Balay for ( i=0; i<m; i++ ) { 235857b952d6SSatish Balay nz += locrowlens[i]; 235957b952d6SSatish Balay } 236057b952d6SSatish Balay ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 236157b952d6SSatish Balay mycols = ibuf; 236257b952d6SSatish Balay /* receive message of column indices*/ 2363ca161407SBarry Smith ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2364ca161407SBarry Smith ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2365a8c6a408SBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 236657b952d6SSatish Balay } 236757b952d6SSatish Balay 236857b952d6SSatish Balay /* loop over local rows, determining number of off diagonal entries */ 2369cee3aa6bSSatish Balay dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 2370cee3aa6bSSatish Balay odlens = dlens + (rend-rstart); 237157b952d6SSatish Balay mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 2372cee3aa6bSSatish Balay PetscMemzero(mask,3*Mbs*sizeof(int)); 237357b952d6SSatish Balay masked1 = mask + Mbs; 237457b952d6SSatish Balay masked2 = masked1 + Mbs; 237557b952d6SSatish Balay rowcount = 0; nzcount = 0; 237657b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 237757b952d6SSatish Balay dcount = 0; 237857b952d6SSatish Balay odcount = 0; 237957b952d6SSatish Balay for ( j=0; j<bs; j++ ) { 238057b952d6SSatish Balay kmax = locrowlens[rowcount]; 238157b952d6SSatish Balay for ( k=0; k<kmax; k++ ) { 238257b952d6SSatish Balay tmp = mycols[nzcount++]/bs; 238357b952d6SSatish Balay if (!mask[tmp]) { 238457b952d6SSatish Balay mask[tmp] = 1; 238557b952d6SSatish Balay if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 238657b952d6SSatish Balay else masked1[dcount++] = tmp; 238757b952d6SSatish Balay } 238857b952d6SSatish Balay } 238957b952d6SSatish Balay rowcount++; 239057b952d6SSatish Balay } 2391cee3aa6bSSatish Balay 239257b952d6SSatish Balay dlens[i] = dcount; 239357b952d6SSatish Balay odlens[i] = odcount; 2394cee3aa6bSSatish Balay 239557b952d6SSatish Balay /* zero out the mask elements we set */ 239657b952d6SSatish Balay for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 239757b952d6SSatish Balay for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 239857b952d6SSatish Balay } 2399cee3aa6bSSatish Balay 240057b952d6SSatish Balay /* create our matrix */ 2401537820f0SBarry Smith ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 2402537820f0SBarry Smith CHKERRQ(ierr); 240357b952d6SSatish Balay A = *newmat; 24046d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 240557b952d6SSatish Balay 240657b952d6SSatish Balay if (!rank) { 240757b952d6SSatish Balay buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 240857b952d6SSatish Balay /* read in my part of the matrix numerical values */ 240957b952d6SSatish Balay nz = procsnz[0]; 241057b952d6SSatish Balay vals = buf; 2411cee3aa6bSSatish Balay mycols = ibuf; 2412cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 2413e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2414cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2415537820f0SBarry Smith 241657b952d6SSatish Balay /* insert into matrix */ 241757b952d6SSatish Balay jj = rstart*bs; 241857b952d6SSatish Balay for ( i=0; i<m; i++ ) { 241957b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 242057b952d6SSatish Balay mycols += locrowlens[i]; 242157b952d6SSatish Balay vals += locrowlens[i]; 242257b952d6SSatish Balay jj++; 242357b952d6SSatish Balay } 242457b952d6SSatish Balay /* read in other processors (except the last one) and ship out */ 242557b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 242657b952d6SSatish Balay nz = procsnz[i]; 242757b952d6SSatish Balay vals = buf; 2428e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2429ca161407SBarry Smith ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 243057b952d6SSatish Balay } 243157b952d6SSatish Balay /* the last proc */ 243257b952d6SSatish Balay if ( size != 1 ){ 243357b952d6SSatish Balay nz = procsnz[i] - extra_rows; 2434cee3aa6bSSatish Balay vals = buf; 2435e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 243657b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2437ca161407SBarry Smith ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 243857b952d6SSatish Balay } 243957b952d6SSatish Balay PetscFree(procsnz); 2440d64ed03dSBarry Smith } else { 244157b952d6SSatish Balay /* receive numeric values */ 244257b952d6SSatish Balay buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 244357b952d6SSatish Balay 244457b952d6SSatish Balay /* receive message of values*/ 244557b952d6SSatish Balay vals = buf; 2446cee3aa6bSSatish Balay mycols = ibuf; 2447ca161407SBarry Smith ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2448ca161407SBarry Smith ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2449a8c6a408SBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 245057b952d6SSatish Balay 245157b952d6SSatish Balay /* insert into matrix */ 245257b952d6SSatish Balay jj = rstart*bs; 2453cee3aa6bSSatish Balay for ( i=0; i<m; i++ ) { 245457b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 245557b952d6SSatish Balay mycols += locrowlens[i]; 245657b952d6SSatish Balay vals += locrowlens[i]; 245757b952d6SSatish Balay jj++; 245857b952d6SSatish Balay } 245957b952d6SSatish Balay } 246057b952d6SSatish Balay PetscFree(locrowlens); 246157b952d6SSatish Balay PetscFree(buf); 246257b952d6SSatish Balay PetscFree(ibuf); 246357b952d6SSatish Balay PetscFree(rowners); 246457b952d6SSatish Balay PetscFree(dlens); 2465cee3aa6bSSatish Balay PetscFree(mask); 24666d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 24676d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 24683a40ed3dSBarry Smith PetscFunctionReturn(0); 246957b952d6SSatish Balay } 247057b952d6SSatish Balay 247157b952d6SSatish Balay 2472133cdb44SSatish Balay 2473133cdb44SSatish Balay #undef __FUNC__ 2474133cdb44SSatish Balay #define __FUNC__ "MatMPIBAIJSetHashTableFactor" 2475133cdb44SSatish Balay /*@ 2476133cdb44SSatish Balay MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2477133cdb44SSatish Balay 2478133cdb44SSatish Balay Input Parameters: 2479133cdb44SSatish Balay . mat - the matrix 2480133cdb44SSatish Balay . fact - factor 2481133cdb44SSatish Balay 2482fee21e36SBarry Smith Collective on Mat 2483fee21e36SBarry Smith 2484133cdb44SSatish Balay Notes: 2485133cdb44SSatish Balay This can also be set by the command line option: -mat_use_hash_table fact 2486133cdb44SSatish Balay 2487133cdb44SSatish Balay .keywords: matrix, hashtable, factor, HT 2488133cdb44SSatish Balay 2489133cdb44SSatish Balay .seealso: MatSetOption() 2490133cdb44SSatish Balay @*/ 2491133cdb44SSatish Balay int MatMPIBAIJSetHashTableFactor(Mat mat,double fact) 2492133cdb44SSatish Balay { 249325fdafccSSatish Balay Mat_MPIBAIJ *baij; 2494133cdb44SSatish Balay 2495133cdb44SSatish Balay PetscFunctionBegin; 2496133cdb44SSatish Balay PetscValidHeaderSpecific(mat,MAT_COOKIE); 249725fdafccSSatish Balay if (mat->type != MATMPIBAIJ) { 2498133cdb44SSatish Balay SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2499133cdb44SSatish Balay } 2500133cdb44SSatish Balay baij = (Mat_MPIBAIJ*) mat->data; 2501133cdb44SSatish Balay baij->ht_fact = fact; 2502133cdb44SSatish Balay PetscFunctionReturn(0); 2503133cdb44SSatish Balay } 2504