1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER 2*73959e64SBarry Smith static char vcid[] = "$Id: mpibaij.c,v 1.159 1999/03/11 23:21:46 balay Exp bsmith $"; 379bdfe76SSatish Balay #endif 479bdfe76SSatish Balay 577ed5343SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "mat.h" I*/ 6c16cb8f2SBarry Smith #include "src/vec/vecimpl.h" 779bdfe76SSatish Balay 857b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat); 957b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat); 10d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 117b2a1423SBarry Smith extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 12946de2abSSatish Balay extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *); 13bbb85fb3SSatish Balay extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 14bbb85fb3SSatish Balay extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 15bbb85fb3SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 16bbb85fb3SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 17bbb85fb3SSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat); 183b2fbd54SBarry Smith 19537820f0SBarry Smith /* 20537820f0SBarry Smith Local utility routine that creates a mapping from the global column 2157b952d6SSatish Balay number to the local number in the off-diagonal part of the local 2257b952d6SSatish Balay storage of the matrix. This is done in a non scable way since the 2357b952d6SSatish Balay length of colmap equals the global matrix length. 2457b952d6SSatish Balay */ 255615d1e5SSatish Balay #undef __FUNC__ 265615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 2757b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat) 2857b952d6SSatish Balay { 2957b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 3057b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 316dee3f7eSBarry Smith int nbs = B->nbs,i,bs=B->bs; 326dee3f7eSBarry Smith #if defined (USE_CTABLE) 336dee3f7eSBarry Smith int ierr; 346dee3f7eSBarry Smith #endif 3557b952d6SSatish Balay 36d64ed03dSBarry Smith PetscFunctionBegin; 3748e59246SSatish Balay #if defined (USE_CTABLE) 38fa46199cSSatish Balay ierr = TableCreate(baij->nbs/5,&baij->colmap); CHKERRQ(ierr); 3948e59246SSatish Balay for ( i=0; i<nbs; i++ ){ 4048e59246SSatish Balay ierr = TableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 4148e59246SSatish Balay } 4248e59246SSatish Balay #else 43758f045eSSatish Balay baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 4457b952d6SSatish Balay PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 4557b952d6SSatish Balay PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 46928fc39bSSatish Balay for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 4748e59246SSatish Balay #endif 483a40ed3dSBarry Smith PetscFunctionReturn(0); 4957b952d6SSatish Balay } 5057b952d6SSatish Balay 5180c1aa95SSatish Balay #define CHUNKSIZE 10 5280c1aa95SSatish Balay 53f5e9677aSSatish Balay #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 5480c1aa95SSatish Balay { \ 5580c1aa95SSatish Balay \ 5680c1aa95SSatish Balay brow = row/bs; \ 5780c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 58ac7a638eSSatish Balay rmax = aimax[brow]; nrow = ailen[brow]; \ 5980c1aa95SSatish Balay bcol = col/bs; \ 6080c1aa95SSatish Balay ridx = row % bs; cidx = col % bs; \ 61ab26458aSBarry Smith low = 0; high = nrow; \ 62ab26458aSBarry Smith while (high-low > 3) { \ 63ab26458aSBarry Smith t = (low+high)/2; \ 64ab26458aSBarry Smith if (rp[t] > bcol) high = t; \ 65ab26458aSBarry Smith else low = t; \ 66ab26458aSBarry Smith } \ 67ab26458aSBarry Smith for ( _i=low; _i<high; _i++ ) { \ 6880c1aa95SSatish Balay if (rp[_i] > bcol) break; \ 6980c1aa95SSatish Balay if (rp[_i] == bcol) { \ 7080c1aa95SSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 71eada6651SSatish Balay if (addv == ADD_VALUES) *bap += value; \ 72eada6651SSatish Balay else *bap = value; \ 73ac7a638eSSatish Balay goto a_noinsert; \ 7480c1aa95SSatish Balay } \ 7580c1aa95SSatish Balay } \ 7689280ab3SLois Curfman McInnes if (a->nonew == 1) goto a_noinsert; \ 77a8c6a408SBarry Smith else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 7880c1aa95SSatish Balay if (nrow >= rmax) { \ 7980c1aa95SSatish Balay /* there is no extra room in row, therefore enlarge */ \ 8080c1aa95SSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 8180c1aa95SSatish Balay Scalar *new_a; \ 8280c1aa95SSatish Balay \ 83a8c6a408SBarry Smith if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 8489280ab3SLois Curfman McInnes \ 8580c1aa95SSatish Balay /* malloc new storage space */ \ 8680c1aa95SSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 8780c1aa95SSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 8880c1aa95SSatish Balay new_j = (int *) (new_a + bs2*new_nz); \ 8980c1aa95SSatish Balay new_i = new_j + new_nz; \ 9080c1aa95SSatish Balay \ 9180c1aa95SSatish Balay /* copy over old data into new slots */ \ 9280c1aa95SSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 9380c1aa95SSatish Balay for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 9480c1aa95SSatish Balay PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 9580c1aa95SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 9680c1aa95SSatish Balay PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 9780c1aa95SSatish Balay len*sizeof(int)); \ 9880c1aa95SSatish Balay PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 9980c1aa95SSatish Balay PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 10080c1aa95SSatish Balay PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 10180c1aa95SSatish Balay aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 10280c1aa95SSatish Balay /* free up old matrix storage */ \ 10380c1aa95SSatish Balay PetscFree(a->a); \ 10480c1aa95SSatish Balay if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 10580c1aa95SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 10680c1aa95SSatish Balay a->singlemalloc = 1; \ 10780c1aa95SSatish Balay \ 10880c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 109ac7a638eSSatish Balay rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 11080c1aa95SSatish Balay PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 11180c1aa95SSatish Balay a->maxnz += bs2*CHUNKSIZE; \ 11280c1aa95SSatish Balay a->reallocs++; \ 11380c1aa95SSatish Balay a->nz++; \ 11480c1aa95SSatish Balay } \ 11580c1aa95SSatish Balay N = nrow++ - 1; \ 11680c1aa95SSatish Balay /* shift up all the later entries in this row */ \ 11780c1aa95SSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 11880c1aa95SSatish Balay rp[ii+1] = rp[ii]; \ 11980c1aa95SSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 12080c1aa95SSatish Balay } \ 12180c1aa95SSatish Balay if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 12280c1aa95SSatish Balay rp[_i] = bcol; \ 12380c1aa95SSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 124ac7a638eSSatish Balay a_noinsert:; \ 12580c1aa95SSatish Balay ailen[brow] = nrow; \ 12680c1aa95SSatish Balay } 12757b952d6SSatish Balay 128ac7a638eSSatish Balay #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 129ac7a638eSSatish Balay { \ 130ac7a638eSSatish Balay \ 131ac7a638eSSatish Balay brow = row/bs; \ 132ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 133ac7a638eSSatish Balay rmax = bimax[brow]; nrow = bilen[brow]; \ 134ac7a638eSSatish Balay bcol = col/bs; \ 135ac7a638eSSatish Balay ridx = row % bs; cidx = col % bs; \ 136ac7a638eSSatish Balay low = 0; high = nrow; \ 137ac7a638eSSatish Balay while (high-low > 3) { \ 138ac7a638eSSatish Balay t = (low+high)/2; \ 139ac7a638eSSatish Balay if (rp[t] > bcol) high = t; \ 140ac7a638eSSatish Balay else low = t; \ 141ac7a638eSSatish Balay } \ 142ac7a638eSSatish Balay for ( _i=low; _i<high; _i++ ) { \ 143ac7a638eSSatish Balay if (rp[_i] > bcol) break; \ 144ac7a638eSSatish Balay if (rp[_i] == bcol) { \ 145ac7a638eSSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 146ac7a638eSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 147ac7a638eSSatish Balay else *bap = value; \ 148ac7a638eSSatish Balay goto b_noinsert; \ 149ac7a638eSSatish Balay } \ 150ac7a638eSSatish Balay } \ 15189280ab3SLois Curfman McInnes if (b->nonew == 1) goto b_noinsert; \ 152a8c6a408SBarry Smith else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 153ac7a638eSSatish Balay if (nrow >= rmax) { \ 154ac7a638eSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 15574c639caSSatish Balay int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 156ac7a638eSSatish Balay Scalar *new_a; \ 157ac7a638eSSatish Balay \ 158a8c6a408SBarry Smith if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 15989280ab3SLois Curfman McInnes \ 160ac7a638eSSatish Balay /* malloc new storage space */ \ 16174c639caSSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 162ac7a638eSSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 163ac7a638eSSatish Balay new_j = (int *) (new_a + bs2*new_nz); \ 164ac7a638eSSatish Balay new_i = new_j + new_nz; \ 165ac7a638eSSatish Balay \ 166ac7a638eSSatish Balay /* copy over old data into new slots */ \ 167ac7a638eSSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 16874c639caSSatish Balay for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 169ac7a638eSSatish Balay PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \ 170ac7a638eSSatish Balay len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 171ac7a638eSSatish Balay PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 172ac7a638eSSatish Balay len*sizeof(int)); \ 173ac7a638eSSatish Balay PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \ 174ac7a638eSSatish Balay PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 175ac7a638eSSatish Balay PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 176ac7a638eSSatish Balay ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar)); \ 177ac7a638eSSatish Balay /* free up old matrix storage */ \ 17874c639caSSatish Balay PetscFree(b->a); \ 17974c639caSSatish Balay if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 18074c639caSSatish Balay ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 18174c639caSSatish Balay b->singlemalloc = 1; \ 182ac7a638eSSatish Balay \ 183ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 184ac7a638eSSatish Balay rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 18574c639caSSatish Balay PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 18674c639caSSatish Balay b->maxnz += bs2*CHUNKSIZE; \ 18774c639caSSatish Balay b->reallocs++; \ 18874c639caSSatish Balay b->nz++; \ 189ac7a638eSSatish Balay } \ 190ac7a638eSSatish Balay N = nrow++ - 1; \ 191ac7a638eSSatish Balay /* shift up all the later entries in this row */ \ 192ac7a638eSSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 193ac7a638eSSatish Balay rp[ii+1] = rp[ii]; \ 194ac7a638eSSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 195ac7a638eSSatish Balay } \ 196ac7a638eSSatish Balay if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 197ac7a638eSSatish Balay rp[_i] = bcol; \ 198ac7a638eSSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 199ac7a638eSSatish Balay b_noinsert:; \ 200ac7a638eSSatish Balay bilen[brow] = nrow; \ 201ac7a638eSSatish Balay } 202ac7a638eSSatish Balay 2035615d1e5SSatish Balay #undef __FUNC__ 2045615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ" 205ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 20657b952d6SSatish Balay { 20757b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 20857b952d6SSatish Balay Scalar value; 2094fa0d573SSatish Balay int ierr,i,j,row,col; 2104fa0d573SSatish Balay int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 2114fa0d573SSatish Balay int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 2124fa0d573SSatish Balay int cend_orig=baij->cend_bs,bs=baij->bs; 21357b952d6SSatish Balay 214eada6651SSatish Balay /* Some Variables required in the macro */ 21580c1aa95SSatish Balay Mat A = baij->A; 21680c1aa95SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 217ac7a638eSSatish Balay int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 218ac7a638eSSatish Balay Scalar *aa=a->a; 219ac7a638eSSatish Balay 220ac7a638eSSatish Balay Mat B = baij->B; 221ac7a638eSSatish Balay Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 222ac7a638eSSatish Balay int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 223ac7a638eSSatish Balay Scalar *ba=b->a; 224ac7a638eSSatish Balay 225ac7a638eSSatish Balay int *rp,ii,nrow,_i,rmax,N,brow,bcol; 226ab26458aSBarry Smith int low,high,t,ridx,cidx,bs2=a->bs2; 227ac7a638eSSatish Balay Scalar *ap,*bap; 22880c1aa95SSatish Balay 229d64ed03dSBarry Smith PetscFunctionBegin; 23057b952d6SSatish Balay for ( i=0; i<m; i++ ) { 2315ef9f2a5SBarry Smith if (im[i] < 0) continue; 2323a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 233a8c6a408SBarry Smith if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 234639f9d9dSBarry Smith #endif 23557b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 23657b952d6SSatish Balay row = im[i] - rstart_orig; 23757b952d6SSatish Balay for ( j=0; j<n; j++ ) { 23857b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig){ 23957b952d6SSatish Balay col = in[j] - cstart_orig; 24057b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 241f5e9677aSSatish Balay MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 24280c1aa95SSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 243*73959e64SBarry Smith } else if (in[j] < 0) continue; 2443a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 245a8c6a408SBarry Smith else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 246639f9d9dSBarry Smith #endif 24757b952d6SSatish Balay else { 24857b952d6SSatish Balay if (mat->was_assembled) { 249905e6a2fSBarry Smith if (!baij->colmap) { 250905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 251905e6a2fSBarry Smith } 25248e59246SSatish Balay #if defined (USE_CTABLE) 253fa46199cSSatish Balay ierr = TableFind(baij->colmap, in[j]/bs + 1,&col); CHKERRQ(ierr); 254fa46199cSSatish Balay col = col - 1 + in[j]%bs; 25548e59246SSatish Balay #else 256905e6a2fSBarry Smith col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 25748e59246SSatish Balay #endif 25857b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 25957b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 26057b952d6SSatish Balay col = in[j]; 2619bf004c3SSatish Balay /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 2629bf004c3SSatish Balay B = baij->B; 2639bf004c3SSatish Balay b = (Mat_SeqBAIJ *) (B)->data; 2649bf004c3SSatish Balay bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 2659bf004c3SSatish Balay ba=b->a; 26657b952d6SSatish Balay } 267d64ed03dSBarry Smith } else col = in[j]; 26857b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 269ac7a638eSSatish Balay MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 270ac7a638eSSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 27157b952d6SSatish Balay } 27257b952d6SSatish Balay } 273d64ed03dSBarry Smith } else { 27490f02eecSBarry Smith if (roworiented && !baij->donotstash) { 275a2d1c673SSatish Balay ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 276d64ed03dSBarry Smith } else { 27790f02eecSBarry Smith if (!baij->donotstash) { 27857b952d6SSatish Balay row = im[i]; 27957b952d6SSatish Balay for ( j=0; j<n; j++ ) { 280a2d1c673SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m);CHKERRQ(ierr); 28157b952d6SSatish Balay } 28257b952d6SSatish Balay } 28357b952d6SSatish Balay } 28457b952d6SSatish Balay } 28590f02eecSBarry Smith } 2863a40ed3dSBarry Smith PetscFunctionReturn(0); 28757b952d6SSatish Balay } 28857b952d6SSatish Balay 289ab26458aSBarry Smith #undef __FUNC__ 290ab26458aSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 291ec1ea8d8SLois Curfman McInnes int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 292ab26458aSBarry Smith { 293ab26458aSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 29430793edcSSatish Balay Scalar *value,*barray=baij->barray; 2957ef1d9bdSSatish Balay int ierr,i,j,ii,jj,row,col; 296ab26458aSBarry Smith int roworiented = baij->roworiented,rstart=baij->rstart ; 297ab26458aSBarry Smith int rend=baij->rend,cstart=baij->cstart,stepval; 298ab26458aSBarry Smith int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 299ab26458aSBarry Smith 30030793edcSSatish Balay if(!barray) { 30147513183SBarry Smith baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray); 30230793edcSSatish Balay } 30330793edcSSatish Balay 304ab26458aSBarry Smith if (roworiented) { 305ab26458aSBarry Smith stepval = (n-1)*bs; 306ab26458aSBarry Smith } else { 307ab26458aSBarry Smith stepval = (m-1)*bs; 308ab26458aSBarry Smith } 309ab26458aSBarry Smith for ( i=0; i<m; i++ ) { 3105ef9f2a5SBarry Smith if (im[i] < 0) continue; 3113a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 3125ef9f2a5SBarry Smith if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs); 313ab26458aSBarry Smith #endif 314ab26458aSBarry Smith if (im[i] >= rstart && im[i] < rend) { 315ab26458aSBarry Smith row = im[i] - rstart; 316ab26458aSBarry Smith for ( j=0; j<n; j++ ) { 31715b57d14SSatish Balay /* If NumCol = 1 then a copy is not required */ 31815b57d14SSatish Balay if ((roworiented) && (n == 1)) { 31915b57d14SSatish Balay barray = v + i*bs2; 32015b57d14SSatish Balay } else if((!roworiented) && (m == 1)) { 32115b57d14SSatish Balay barray = v + j*bs2; 32215b57d14SSatish Balay } else { /* Here a copy is required */ 323ab26458aSBarry Smith if (roworiented) { 324ab26458aSBarry Smith value = v + i*(stepval+bs)*bs + j*bs; 325ab26458aSBarry Smith } else { 326ab26458aSBarry Smith value = v + j*(stepval+bs)*bs + i*bs; 327abef11f7SSatish Balay } 32847513183SBarry Smith for ( ii=0; ii<bs; ii++,value+=stepval ) { 32947513183SBarry Smith for (jj=0; jj<bs; jj++ ) { 33030793edcSSatish Balay *barray++ = *value++; 33147513183SBarry Smith } 33247513183SBarry Smith } 33330793edcSSatish Balay barray -=bs2; 33415b57d14SSatish Balay } 335abef11f7SSatish Balay 336abef11f7SSatish Balay if (in[j] >= cstart && in[j] < cend){ 337abef11f7SSatish Balay col = in[j] - cstart; 33830793edcSSatish Balay ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 339ab26458aSBarry Smith } 3405ef9f2a5SBarry Smith else if (in[j] < 0) continue; 3413a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 3425ef9f2a5SBarry Smith else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);} 343ab26458aSBarry Smith #endif 344ab26458aSBarry Smith else { 345ab26458aSBarry Smith if (mat->was_assembled) { 346ab26458aSBarry Smith if (!baij->colmap) { 347ab26458aSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 348ab26458aSBarry Smith } 349a5eb4965SSatish Balay 3503a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 35148e59246SSatish Balay #if defined (USE_CTABLE) 352fa46199cSSatish Balay { int data; 353fa46199cSSatish Balay ierr = TableFind(baij->colmap,in[j]+1,&data); CHKERRQ(ierr); 354fa46199cSSatish Balay if((data - 1) % bs) 35548e59246SSatish Balay {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 356fa46199cSSatish Balay } 35748e59246SSatish Balay #else 358a8c6a408SBarry Smith if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 359a5eb4965SSatish Balay #endif 36048e59246SSatish Balay #endif 36148e59246SSatish Balay #if defined (USE_CTABLE) 362fa46199cSSatish Balay ierr = TableFind(baij->colmap,in[j]+1,&col); CHKERRQ(ierr); 363fa46199cSSatish Balay col = (col - 1)/bs; 36448e59246SSatish Balay #else 365a5eb4965SSatish Balay col = (baij->colmap[in[j]] - 1)/bs; 36648e59246SSatish Balay #endif 367ab26458aSBarry Smith if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 368ab26458aSBarry Smith ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 369ab26458aSBarry Smith col = in[j]; 370ab26458aSBarry Smith } 371ab26458aSBarry Smith } 372ab26458aSBarry Smith else col = in[j]; 37330793edcSSatish Balay ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 374ab26458aSBarry Smith } 375ab26458aSBarry Smith } 376d64ed03dSBarry Smith } else { 377ab26458aSBarry Smith if (!baij->donotstash) { 378a2d1c673SSatish Balay /* ierr = StashValuesBlocked_Private(&baij->bstash,im[i],n,in,v+i*bs2*n, 379a2d1c673SSatish Balay roworiented,stepval,addv);CHKERRQ(ierr); */ 380a2d1c673SSatish Balay ierr = StashValuesBlocked_Private(&baij->bstash,im[i],n,in,v,m,n, 381a2d1c673SSatish Balay roworiented,i);CHKERRQ(ierr); 382abef11f7SSatish Balay } 383ab26458aSBarry Smith } 384ab26458aSBarry Smith } 3853a40ed3dSBarry Smith PetscFunctionReturn(0); 386ab26458aSBarry Smith } 3870bdbc534SSatish Balay #define HASH_KEY 0.6180339887 388c2760754SSatish Balay /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 389c2760754SSatish Balay #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 390c2760754SSatish Balay /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 3915615d1e5SSatish Balay #undef __FUNC__ 3920bdbc534SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 3930bdbc534SSatish Balay int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 3940bdbc534SSatish Balay { 3950bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 3960bdbc534SSatish Balay int ierr,i,j,row,col; 3970bdbc534SSatish Balay int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 398c2760754SSatish Balay int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 399c2760754SSatish Balay int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 400c2760754SSatish Balay double tmp; 401b9e4cc15SSatish Balay Scalar ** HD = baij->hd,value; 4024a15367fSSatish Balay #if defined(USE_PETSC_BOPT_g) 4034a15367fSSatish Balay int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 4044a15367fSSatish Balay #endif 4050bdbc534SSatish Balay 4060bdbc534SSatish Balay PetscFunctionBegin; 4070bdbc534SSatish Balay 4080bdbc534SSatish Balay for ( i=0; i<m; i++ ) { 4090bdbc534SSatish Balay #if defined(USE_PETSC_BOPT_g) 4100bdbc534SSatish Balay if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 4110bdbc534SSatish Balay if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 4120bdbc534SSatish Balay #endif 4130bdbc534SSatish Balay row = im[i]; 414c2760754SSatish Balay if (row >= rstart_orig && row < rend_orig) { 4150bdbc534SSatish Balay for ( j=0; j<n; j++ ) { 4160bdbc534SSatish Balay col = in[j]; 4170bdbc534SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 4180bdbc534SSatish Balay /* Look up into the Hash Table */ 419c2760754SSatish Balay key = (row/bs)*Nbs+(col/bs)+1; 420c2760754SSatish Balay h1 = HASH(size,key,tmp); 4210bdbc534SSatish Balay 422c2760754SSatish Balay 423c2760754SSatish Balay idx = h1; 424187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 425187ce0cbSSatish Balay insert_ct++; 426187ce0cbSSatish Balay total_ct++; 427187ce0cbSSatish Balay if (HT[idx] != key) { 428187ce0cbSSatish Balay for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 429187ce0cbSSatish Balay if (idx == size) { 430187ce0cbSSatish Balay for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 431187ce0cbSSatish Balay if (idx == h1) { 432187ce0cbSSatish Balay SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 433187ce0cbSSatish Balay } 434187ce0cbSSatish Balay } 435187ce0cbSSatish Balay } 436187ce0cbSSatish Balay #else 437c2760754SSatish Balay if (HT[idx] != key) { 438c2760754SSatish Balay for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 439c2760754SSatish Balay if (idx == size) { 440c2760754SSatish Balay for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 441c2760754SSatish Balay if (idx == h1) { 442c2760754SSatish Balay SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 443c2760754SSatish Balay } 444c2760754SSatish Balay } 445c2760754SSatish Balay } 446187ce0cbSSatish Balay #endif 447c2760754SSatish Balay /* A HASH table entry is found, so insert the values at the correct address */ 448c2760754SSatish Balay if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 449c2760754SSatish Balay else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 4500bdbc534SSatish Balay } 4510bdbc534SSatish Balay } else { 4520bdbc534SSatish Balay if (roworiented && !baij->donotstash) { 453a2d1c673SSatish Balay ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 4540bdbc534SSatish Balay } else { 4550bdbc534SSatish Balay if (!baij->donotstash) { 4560bdbc534SSatish Balay row = im[i]; 4570bdbc534SSatish Balay for ( j=0; j<n; j++ ) { 458a2d1c673SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m);CHKERRQ(ierr); 4590bdbc534SSatish Balay } 4600bdbc534SSatish Balay } 4610bdbc534SSatish Balay } 4620bdbc534SSatish Balay } 4630bdbc534SSatish Balay } 464187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 465187ce0cbSSatish Balay baij->ht_total_ct = total_ct; 466187ce0cbSSatish Balay baij->ht_insert_ct = insert_ct; 467187ce0cbSSatish Balay #endif 4680bdbc534SSatish Balay PetscFunctionReturn(0); 4690bdbc534SSatish Balay } 4700bdbc534SSatish Balay 4710bdbc534SSatish Balay #undef __FUNC__ 4720bdbc534SSatish Balay #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 4730bdbc534SSatish Balay int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 4740bdbc534SSatish Balay { 4750bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 4760bdbc534SSatish Balay int ierr,i,j,ii,jj,row,col,k,l; 4770bdbc534SSatish Balay int roworiented = baij->roworiented,rstart=baij->rstart ; 478b4cc0f5aSSatish Balay int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 479c2760754SSatish Balay int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 480c2760754SSatish Balay double tmp; 481187ce0cbSSatish Balay Scalar ** HD = baij->hd,*value,*v_t,*baij_a; 4824a15367fSSatish Balay #if defined(USE_PETSC_BOPT_g) 4834a15367fSSatish Balay int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 4844a15367fSSatish Balay #endif 4850bdbc534SSatish Balay 486d0a41580SSatish Balay PetscFunctionBegin; 487d0a41580SSatish Balay 4880bdbc534SSatish Balay if (roworiented) { 4890bdbc534SSatish Balay stepval = (n-1)*bs; 4900bdbc534SSatish Balay } else { 4910bdbc534SSatish Balay stepval = (m-1)*bs; 4920bdbc534SSatish Balay } 4930bdbc534SSatish Balay for ( i=0; i<m; i++ ) { 4940bdbc534SSatish Balay #if defined(USE_PETSC_BOPT_g) 4950bdbc534SSatish Balay if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 4960bdbc534SSatish Balay if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 4970bdbc534SSatish Balay #endif 4980bdbc534SSatish Balay row = im[i]; 499187ce0cbSSatish Balay v_t = v + i*bs2; 500c2760754SSatish Balay if (row >= rstart && row < rend) { 5010bdbc534SSatish Balay for ( j=0; j<n; j++ ) { 5020bdbc534SSatish Balay col = in[j]; 5030bdbc534SSatish Balay 5040bdbc534SSatish Balay /* Look up into the Hash Table */ 505c2760754SSatish Balay key = row*Nbs+col+1; 506c2760754SSatish Balay h1 = HASH(size,key,tmp); 5070bdbc534SSatish Balay 508c2760754SSatish Balay idx = h1; 509187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 510187ce0cbSSatish Balay total_ct++; 511187ce0cbSSatish Balay insert_ct++; 512187ce0cbSSatish Balay if (HT[idx] != key) { 513187ce0cbSSatish Balay for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 514187ce0cbSSatish Balay if (idx == size) { 515187ce0cbSSatish Balay for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 516187ce0cbSSatish Balay if (idx == h1) { 517187ce0cbSSatish Balay SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 518187ce0cbSSatish Balay } 519187ce0cbSSatish Balay } 520187ce0cbSSatish Balay } 521187ce0cbSSatish Balay #else 522c2760754SSatish Balay if (HT[idx] != key) { 523c2760754SSatish Balay for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 524c2760754SSatish Balay if (idx == size) { 525c2760754SSatish Balay for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 526c2760754SSatish Balay if (idx == h1) { 527c2760754SSatish Balay SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 528c2760754SSatish Balay } 529c2760754SSatish Balay } 530c2760754SSatish Balay } 531187ce0cbSSatish Balay #endif 532c2760754SSatish Balay baij_a = HD[idx]; 5330bdbc534SSatish Balay if (roworiented) { 534c2760754SSatish Balay /*value = v + i*(stepval+bs)*bs + j*bs;*/ 535187ce0cbSSatish Balay /* value = v + (i*(stepval+bs)+j)*bs; */ 536187ce0cbSSatish Balay value = v_t; 537187ce0cbSSatish Balay v_t += bs; 538fef45726SSatish Balay if (addv == ADD_VALUES) { 539c2760754SSatish Balay for ( ii=0; ii<bs; ii++,value+=stepval) { 540c2760754SSatish Balay for ( jj=ii; jj<bs2; jj+=bs ) { 541fef45726SSatish Balay baij_a[jj] += *value++; 542b4cc0f5aSSatish Balay } 543b4cc0f5aSSatish Balay } 544fef45726SSatish Balay } else { 545c2760754SSatish Balay for ( ii=0; ii<bs; ii++,value+=stepval) { 546c2760754SSatish Balay for ( jj=ii; jj<bs2; jj+=bs ) { 547fef45726SSatish Balay baij_a[jj] = *value++; 548fef45726SSatish Balay } 549fef45726SSatish Balay } 550fef45726SSatish Balay } 5510bdbc534SSatish Balay } else { 5520bdbc534SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 553fef45726SSatish Balay if (addv == ADD_VALUES) { 554b4cc0f5aSSatish Balay for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 5550bdbc534SSatish Balay for ( jj=0; jj<bs; jj++ ) { 556fef45726SSatish Balay baij_a[jj] += *value++; 557fef45726SSatish Balay } 558fef45726SSatish Balay } 559fef45726SSatish Balay } else { 560fef45726SSatish Balay for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 561fef45726SSatish Balay for ( jj=0; jj<bs; jj++ ) { 562fef45726SSatish Balay baij_a[jj] = *value++; 563fef45726SSatish Balay } 564b4cc0f5aSSatish Balay } 5650bdbc534SSatish Balay } 5660bdbc534SSatish Balay } 5670bdbc534SSatish Balay } 5680bdbc534SSatish Balay } else { 5690bdbc534SSatish Balay if (!baij->donotstash) { 5700bdbc534SSatish Balay if (roworiented ) { 5710bdbc534SSatish Balay row = im[i]*bs; 5720bdbc534SSatish Balay value = v + i*(stepval+bs)*bs; 5730bdbc534SSatish Balay for ( j=0; j<bs; j++,row++ ) { 5740bdbc534SSatish Balay for ( k=0; k<n; k++ ) { 5750bdbc534SSatish Balay for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 576a2d1c673SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,&col,value++);CHKERRQ(ierr); 5770bdbc534SSatish Balay } 5780bdbc534SSatish Balay } 5790bdbc534SSatish Balay } 5800bdbc534SSatish Balay } else { 5810bdbc534SSatish Balay for ( j=0; j<n; j++ ) { 5820bdbc534SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 5830bdbc534SSatish Balay col = in[j]*bs; 5840bdbc534SSatish Balay for ( k=0; k<bs; k++,col++,value+=stepval) { 5850bdbc534SSatish Balay for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 586a2d1c673SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,&col,value++);CHKERRQ(ierr); 5870bdbc534SSatish Balay } 5880bdbc534SSatish Balay } 5890bdbc534SSatish Balay } 5900bdbc534SSatish Balay } 5910bdbc534SSatish Balay } 5920bdbc534SSatish Balay } 5930bdbc534SSatish Balay } 594187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 595187ce0cbSSatish Balay baij->ht_total_ct = total_ct; 596187ce0cbSSatish Balay baij->ht_insert_ct = insert_ct; 597187ce0cbSSatish Balay #endif 5980bdbc534SSatish Balay PetscFunctionReturn(0); 5990bdbc534SSatish Balay } 600133cdb44SSatish Balay 6010bdbc534SSatish Balay #undef __FUNC__ 6025615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ" 603ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 604d6de1c52SSatish Balay { 605d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 606d6de1c52SSatish Balay int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 60748e59246SSatish Balay int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data; 608d6de1c52SSatish Balay 609133cdb44SSatish Balay PetscFunctionBegin; 610d6de1c52SSatish Balay for ( i=0; i<m; i++ ) { 611a8c6a408SBarry Smith if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 612a8c6a408SBarry Smith if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 613d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 614d6de1c52SSatish Balay row = idxm[i] - bsrstart; 615d6de1c52SSatish Balay for ( j=0; j<n; j++ ) { 616a8c6a408SBarry Smith if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 617a8c6a408SBarry Smith if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 618d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend){ 619d6de1c52SSatish Balay col = idxn[j] - bscstart; 62098dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 621d64ed03dSBarry Smith } else { 622905e6a2fSBarry Smith if (!baij->colmap) { 623905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 624905e6a2fSBarry Smith } 62548e59246SSatish Balay #if defined (USE_CTABLE) 626fa46199cSSatish Balay ierr = TableFind(baij->colmap,idxn[j]/bs+1,&data); CHKERRQ(ierr); 627fa46199cSSatish Balay data --; 62848e59246SSatish Balay #else 62948e59246SSatish Balay data = baij->colmap[idxn[j]/bs]-1; 63048e59246SSatish Balay #endif 63148e59246SSatish Balay if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 632d9d09a02SSatish Balay else { 63348e59246SSatish Balay col = data + idxn[j]%bs; 63498dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 635d6de1c52SSatish Balay } 636d6de1c52SSatish Balay } 637d6de1c52SSatish Balay } 638d64ed03dSBarry Smith } else { 639a8c6a408SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 640d6de1c52SSatish Balay } 641d6de1c52SSatish Balay } 6423a40ed3dSBarry Smith PetscFunctionReturn(0); 643d6de1c52SSatish Balay } 644d6de1c52SSatish Balay 6455615d1e5SSatish Balay #undef __FUNC__ 6465615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ" 647ec1ea8d8SLois Curfman McInnes int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 648d6de1c52SSatish Balay { 649d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 650d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 651acdf5bf4SSatish Balay int ierr, i,bs2=baij->bs2; 652d6de1c52SSatish Balay double sum = 0.0; 653d6de1c52SSatish Balay Scalar *v; 654d6de1c52SSatish Balay 655d64ed03dSBarry Smith PetscFunctionBegin; 656d6de1c52SSatish Balay if (baij->size == 1) { 657d6de1c52SSatish Balay ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 658d6de1c52SSatish Balay } else { 659d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 660d6de1c52SSatish Balay v = amat->a; 661d6de1c52SSatish Balay for (i=0; i<amat->nz*bs2; i++ ) { 6623a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX) 663e20fef11SSatish Balay sum += PetscReal(PetscConj(*v)*(*v)); v++; 664d6de1c52SSatish Balay #else 665d6de1c52SSatish Balay sum += (*v)*(*v); v++; 666d6de1c52SSatish Balay #endif 667d6de1c52SSatish Balay } 668d6de1c52SSatish Balay v = bmat->a; 669d6de1c52SSatish Balay for (i=0; i<bmat->nz*bs2; i++ ) { 6703a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX) 671e20fef11SSatish Balay sum += PetscReal(PetscConj(*v)*(*v)); v++; 672d6de1c52SSatish Balay #else 673d6de1c52SSatish Balay sum += (*v)*(*v); v++; 674d6de1c52SSatish Balay #endif 675d6de1c52SSatish Balay } 676ca161407SBarry Smith ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 677d6de1c52SSatish Balay *norm = sqrt(*norm); 678d64ed03dSBarry Smith } else { 679e3372554SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 680d6de1c52SSatish Balay } 681d64ed03dSBarry Smith } 6823a40ed3dSBarry Smith PetscFunctionReturn(0); 683d6de1c52SSatish Balay } 68457b952d6SSatish Balay 685bd7f49f5SSatish Balay 686fef45726SSatish Balay /* 687fef45726SSatish Balay Creates the hash table, and sets the table 688fef45726SSatish Balay This table is created only once. 689fef45726SSatish Balay If new entried need to be added to the matrix 690fef45726SSatish Balay then the hash table has to be destroyed and 691fef45726SSatish Balay recreated. 692fef45726SSatish Balay */ 693fef45726SSatish Balay #undef __FUNC__ 694fef45726SSatish Balay #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 695d0a41580SSatish Balay int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor) 696596b8d2eSBarry Smith { 697596b8d2eSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 698596b8d2eSBarry Smith Mat A = baij->A, B=baij->B; 699596b8d2eSBarry Smith Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 7000bdbc534SSatish Balay int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 7014a15367fSSatish Balay int size,bs2=baij->bs2,rstart=baij->rstart; 702187ce0cbSSatish Balay int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 703fef45726SSatish Balay int *HT,key; 7040bdbc534SSatish Balay Scalar **HD; 705c2760754SSatish Balay double tmp; 7064a15367fSSatish Balay #if defined(USE_PETSC_BOPT_g) 7074a15367fSSatish Balay int ct=0,max=0; 7084a15367fSSatish Balay #endif 709fef45726SSatish Balay 710d64ed03dSBarry Smith PetscFunctionBegin; 7110bdbc534SSatish Balay baij->ht_size=(int)(factor*nz); 7120bdbc534SSatish Balay size = baij->ht_size; 713fef45726SSatish Balay 7140bdbc534SSatish Balay if (baij->ht) { 7150bdbc534SSatish Balay PetscFunctionReturn(0); 716596b8d2eSBarry Smith } 7170bdbc534SSatish Balay 718fef45726SSatish Balay /* Allocate Memory for Hash Table */ 719b9e4cc15SSatish Balay baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd); 720b9e4cc15SSatish Balay baij->ht = (int*)(baij->hd + size); 721b9e4cc15SSatish Balay HD = baij->hd; 722a07cd24cSSatish Balay HT = baij->ht; 723b9e4cc15SSatish Balay 724b9e4cc15SSatish Balay 725c2760754SSatish Balay PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*))); 7260bdbc534SSatish Balay 727596b8d2eSBarry Smith 728596b8d2eSBarry Smith /* Loop Over A */ 7290bdbc534SSatish Balay for ( i=0; i<a->mbs; i++ ) { 730596b8d2eSBarry Smith for ( j=ai[i]; j<ai[i+1]; j++ ) { 7310bdbc534SSatish Balay row = i+rstart; 7320bdbc534SSatish Balay col = aj[j]+cstart; 733596b8d2eSBarry Smith 734187ce0cbSSatish Balay key = row*Nbs + col + 1; 735c2760754SSatish Balay h1 = HASH(size,key,tmp); 7360bdbc534SSatish Balay for ( k=0; k<size; k++ ){ 7370bdbc534SSatish Balay if (HT[(h1+k)%size] == 0.0) { 7380bdbc534SSatish Balay HT[(h1+k)%size] = key; 7390bdbc534SSatish Balay HD[(h1+k)%size] = a->a + j*bs2; 740596b8d2eSBarry Smith break; 741187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 742187ce0cbSSatish Balay } else { 743187ce0cbSSatish Balay ct++; 744187ce0cbSSatish Balay #endif 745596b8d2eSBarry Smith } 746187ce0cbSSatish Balay } 747187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 748187ce0cbSSatish Balay if (k> max) max = k; 749187ce0cbSSatish Balay #endif 750596b8d2eSBarry Smith } 751596b8d2eSBarry Smith } 752596b8d2eSBarry Smith /* Loop Over B */ 7530bdbc534SSatish Balay for ( i=0; i<b->mbs; i++ ) { 754596b8d2eSBarry Smith for ( j=bi[i]; j<bi[i+1]; j++ ) { 7550bdbc534SSatish Balay row = i+rstart; 7560bdbc534SSatish Balay col = garray[bj[j]]; 757187ce0cbSSatish Balay key = row*Nbs + col + 1; 758c2760754SSatish Balay h1 = HASH(size,key,tmp); 7590bdbc534SSatish Balay for ( k=0; k<size; k++ ){ 7600bdbc534SSatish Balay if (HT[(h1+k)%size] == 0.0) { 7610bdbc534SSatish Balay HT[(h1+k)%size] = key; 7620bdbc534SSatish Balay HD[(h1+k)%size] = b->a + j*bs2; 763596b8d2eSBarry Smith break; 764187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 765187ce0cbSSatish Balay } else { 766187ce0cbSSatish Balay ct++; 767187ce0cbSSatish Balay #endif 768596b8d2eSBarry Smith } 769187ce0cbSSatish Balay } 770187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 771187ce0cbSSatish Balay if (k> max) max = k; 772187ce0cbSSatish Balay #endif 773596b8d2eSBarry Smith } 774596b8d2eSBarry Smith } 775596b8d2eSBarry Smith 776596b8d2eSBarry Smith /* Print Summary */ 777187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 778c2760754SSatish Balay for ( i=0,j=0; i<size; i++) 779596b8d2eSBarry Smith if (HT[i]) {j++;} 780187ce0cbSSatish Balay PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n", 781187ce0cbSSatish Balay (j== 0)? 0.0:((double)(ct+j))/j,max); 782187ce0cbSSatish Balay #endif 7833a40ed3dSBarry Smith PetscFunctionReturn(0); 784596b8d2eSBarry Smith } 78557b952d6SSatish Balay 786bbb85fb3SSatish Balay #undef __FUNC__ 787bbb85fb3SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 788bbb85fb3SSatish Balay int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 789bbb85fb3SSatish Balay { 790bbb85fb3SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 791bbb85fb3SSatish Balay int ierr; 792bbb85fb3SSatish Balay InsertMode addv; 793bbb85fb3SSatish Balay 794bbb85fb3SSatish Balay PetscFunctionBegin; 795bbb85fb3SSatish Balay if (baij->donotstash) { 796bbb85fb3SSatish Balay PetscFunctionReturn(0); 797bbb85fb3SSatish Balay } 798bbb85fb3SSatish Balay 799bbb85fb3SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 800bbb85fb3SSatish Balay ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 801bbb85fb3SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 802bbb85fb3SSatish Balay SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 803bbb85fb3SSatish Balay } 804bbb85fb3SSatish Balay mat->insertmode = addv; /* in case this processor had no cache */ 805bbb85fb3SSatish Balay 806bbb85fb3SSatish Balay ierr = StashScatterBegin_Private(&baij->stash,baij->rowners); CHKERRQ(ierr); 807a2d1c673SSatish Balay ierr = StashScatterBegin_Private(&baij->bstash,baij->rowners); CHKERRQ(ierr); 808bbb85fb3SSatish Balay 809bbb85fb3SSatish Balay /* Free cache space */ 810bbb85fb3SSatish Balay PLogInfo(baij->A,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n); 811bbb85fb3SSatish Balay PetscFunctionReturn(0); 812bbb85fb3SSatish Balay } 813bbb85fb3SSatish Balay 814bbb85fb3SSatish Balay #undef __FUNC__ 815bbb85fb3SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 816bbb85fb3SSatish Balay int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 817bbb85fb3SSatish Balay { 818bbb85fb3SSatish Balay Mat_MPIBAIJ *baij=(Mat_MPIBAIJ *) mat->data; 819a2d1c673SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 820a2d1c673SSatish Balay int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 821a2d1c673SSatish Balay int *row,*col,other_disassembled,r1,r2,r3; 822bbb85fb3SSatish Balay Scalar *val; 823bbb85fb3SSatish Balay InsertMode addv = mat->insertmode; 824bbb85fb3SSatish Balay 825bbb85fb3SSatish Balay PetscFunctionBegin; 826bbb85fb3SSatish Balay if (!baij->donotstash) { 827a2d1c673SSatish Balay while (1) { 828a2d1c673SSatish Balay ierr = StashScatterGetMesg_Private(&baij->stash,&n,&row,&col,&val,&flg); CHKERRQ(ierr); 829a2d1c673SSatish Balay if (!flg) break; 830a2d1c673SSatish Balay 831bbb85fb3SSatish Balay for ( i=0; i<n; ) { 832bbb85fb3SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 833bbb85fb3SSatish Balay for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 834bbb85fb3SSatish Balay if (j < n) ncols = j-i; 835bbb85fb3SSatish Balay else ncols = n-i; 836bbb85fb3SSatish Balay /* Now assemble all these values with a single function call */ 837bbb85fb3SSatish Balay ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv); CHKERRQ(ierr); 838bbb85fb3SSatish Balay i = j; 839bbb85fb3SSatish Balay } 840bbb85fb3SSatish Balay } 841a2d1c673SSatish Balay ierr = StashScatterEnd_Private(&baij->stash); CHKERRQ(ierr); 842a2d1c673SSatish Balay /* Now process the block-stash. Since the values are stashed column-oriented, 843a2d1c673SSatish Balay set the roworiented flag to column oriented, and after MatSetValues() 844a2d1c673SSatish Balay restore the original flags */ 845a2d1c673SSatish Balay r1 = baij->roworiented; 846a2d1c673SSatish Balay r2 = a->roworiented; 847a2d1c673SSatish Balay r3 = b->roworiented; 848a2d1c673SSatish Balay baij->roworiented = 0; 849a2d1c673SSatish Balay a->roworiented = 0; 850a2d1c673SSatish Balay b->roworiented = 0; 851a2d1c673SSatish Balay while (1) { 852a2d1c673SSatish Balay ierr = StashScatterGetMesg_Private(&baij->bstash,&n,&row,&col,&val,&flg); CHKERRQ(ierr); 853a2d1c673SSatish Balay if (!flg) break; 854a2d1c673SSatish Balay 855a2d1c673SSatish Balay for ( i=0; i<n; ) { 856a2d1c673SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 857a2d1c673SSatish Balay for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 858a2d1c673SSatish Balay if (j < n) ncols = j-i; 859a2d1c673SSatish Balay else ncols = n-i; 860a2d1c673SSatish Balay ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv); CHKERRQ(ierr); 861a2d1c673SSatish Balay i = j; 862a2d1c673SSatish Balay } 863a2d1c673SSatish Balay } 864a2d1c673SSatish Balay ierr = StashScatterEnd_Private(&baij->bstash); CHKERRQ(ierr); 865a2d1c673SSatish Balay baij->roworiented = r1; 866a2d1c673SSatish Balay a->roworiented = r2; 867a2d1c673SSatish Balay b->roworiented = r3; 868bbb85fb3SSatish Balay } 869bbb85fb3SSatish Balay 870bbb85fb3SSatish Balay ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 871bbb85fb3SSatish Balay ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 872bbb85fb3SSatish Balay 873bbb85fb3SSatish Balay /* determine if any processor has disassembled, if so we must 874bbb85fb3SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 875bbb85fb3SSatish Balay /* 876bbb85fb3SSatish Balay if nonzero structure of submatrix B cannot change then we know that 877bbb85fb3SSatish Balay no processor disassembled thus we can skip this stuff 878bbb85fb3SSatish Balay */ 879bbb85fb3SSatish Balay if (!((Mat_SeqBAIJ*) baij->B->data)->nonew) { 880bbb85fb3SSatish Balay ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 881bbb85fb3SSatish Balay if (mat->was_assembled && !other_disassembled) { 882bbb85fb3SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 883bbb85fb3SSatish Balay } 884bbb85fb3SSatish Balay } 885bbb85fb3SSatish Balay 886bbb85fb3SSatish Balay if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 887bbb85fb3SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 888bbb85fb3SSatish Balay } 889bbb85fb3SSatish Balay ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 890bbb85fb3SSatish Balay ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 891bbb85fb3SSatish Balay 892bbb85fb3SSatish Balay #if defined(USE_PETSC_BOPT_g) 893bbb85fb3SSatish Balay if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 894bbb85fb3SSatish Balay PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n", 895bbb85fb3SSatish Balay ((double)baij->ht_total_ct)/baij->ht_insert_ct); 896bbb85fb3SSatish Balay baij->ht_total_ct = 0; 897bbb85fb3SSatish Balay baij->ht_insert_ct = 0; 898bbb85fb3SSatish Balay } 899bbb85fb3SSatish Balay #endif 900bbb85fb3SSatish Balay if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 901bbb85fb3SSatish Balay ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact); CHKERRQ(ierr); 902bbb85fb3SSatish Balay mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 903bbb85fb3SSatish Balay mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 904bbb85fb3SSatish Balay } 905bbb85fb3SSatish Balay 906bbb85fb3SSatish Balay if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 907bbb85fb3SSatish Balay PetscFunctionReturn(0); 908bbb85fb3SSatish Balay } 90957b952d6SSatish Balay 9105615d1e5SSatish Balay #undef __FUNC__ 9115615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary" 91257b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 91357b952d6SSatish Balay { 91457b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 91557b952d6SSatish Balay int ierr; 91657b952d6SSatish Balay 917d64ed03dSBarry Smith PetscFunctionBegin; 91857b952d6SSatish Balay if (baij->size == 1) { 91957b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 920a8c6a408SBarry Smith } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 9213a40ed3dSBarry Smith PetscFunctionReturn(0); 92257b952d6SSatish Balay } 92357b952d6SSatish Balay 9245615d1e5SSatish Balay #undef __FUNC__ 9257b2a1423SBarry Smith #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 9267b2a1423SBarry Smith static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 92757b952d6SSatish Balay { 92857b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 92977ed5343SBarry Smith int ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank; 93057b952d6SSatish Balay FILE *fd; 93157b952d6SSatish Balay ViewerType vtype; 93257b952d6SSatish Balay 933d64ed03dSBarry Smith PetscFunctionBegin; 93457b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 9353f1db9ecSBarry Smith if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 936d41123aaSBarry Smith ierr = ViewerGetFormat(viewer,&format); CHKERRQ(ierr); 937639f9d9dSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 9384e220ebcSLois Curfman McInnes MatInfo info; 93957b952d6SSatish Balay MPI_Comm_rank(mat->comm,&rank); 94057b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 941d41123aaSBarry Smith ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 94257b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 94357b952d6SSatish Balay fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 9444e220ebcSLois Curfman McInnes rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 9454e220ebcSLois Curfman McInnes baij->bs,(int)info.memory); 9464e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 9474e220ebcSLois Curfman McInnes fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 9484e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 9494e220ebcSLois Curfman McInnes fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 95057b952d6SSatish Balay fflush(fd); 95157b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 95257b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 9533a40ed3dSBarry Smith PetscFunctionReturn(0); 954d64ed03dSBarry Smith } else if (format == VIEWER_FORMAT_ASCII_INFO) { 955bcc3fcf6SBarry Smith PetscPrintf(mat->comm," block size is %d\n",bs); 9563a40ed3dSBarry Smith PetscFunctionReturn(0); 95757b952d6SSatish Balay } 95857b952d6SSatish Balay } 95957b952d6SSatish Balay 9603f1db9ecSBarry Smith if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 96157b952d6SSatish Balay Draw draw; 96257b952d6SSatish Balay PetscTruth isnull; 96377ed5343SBarry Smith ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr); 9643a40ed3dSBarry Smith ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 96557b952d6SSatish Balay } 96657b952d6SSatish Balay 96757b952d6SSatish Balay if (size == 1) { 96857b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 969d64ed03dSBarry Smith } else { 97057b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 97157b952d6SSatish Balay Mat A; 97257b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 97340011551SBarry Smith int M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals; 97457b952d6SSatish Balay int mbs = baij->mbs; 97557b952d6SSatish Balay Scalar *a; 97657b952d6SSatish Balay 97757b952d6SSatish Balay if (!rank) { 97855843e3eSBarry Smith ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 979d64ed03dSBarry Smith } else { 98055843e3eSBarry Smith ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 98157b952d6SSatish Balay } 98257b952d6SSatish Balay PLogObjectParent(mat,A); 98357b952d6SSatish Balay 98457b952d6SSatish Balay /* copy over the A part */ 98557b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 98657b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 98757b952d6SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 98857b952d6SSatish Balay 98957b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 99057b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 99157b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 99257b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 99357b952d6SSatish Balay col = (baij->cstart+aj[j])*bs; 99457b952d6SSatish Balay for (k=0; k<bs; k++ ) { 995cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 996cee3aa6bSSatish Balay col++; a += bs; 99757b952d6SSatish Balay } 99857b952d6SSatish Balay } 99957b952d6SSatish Balay } 100057b952d6SSatish Balay /* copy over the B part */ 100157b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 100257b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 100357b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 100457b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 100557b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 100657b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 100757b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 100857b952d6SSatish Balay for (k=0; k<bs; k++ ) { 1009cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1010cee3aa6bSSatish Balay col++; a += bs; 101157b952d6SSatish Balay } 101257b952d6SSatish Balay } 101357b952d6SSatish Balay } 101457b952d6SSatish Balay PetscFree(rvals); 10156d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 10166d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 101755843e3eSBarry Smith /* 101855843e3eSBarry Smith Everyone has to call to draw the matrix since the graphics waits are 101955843e3eSBarry Smith synchronized across all processors that share the Draw object 102055843e3eSBarry Smith */ 10213f1db9ecSBarry Smith if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) { 102257b952d6SSatish Balay ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 102357b952d6SSatish Balay } 102457b952d6SSatish Balay ierr = MatDestroy(A); CHKERRQ(ierr); 102557b952d6SSatish Balay } 10263a40ed3dSBarry Smith PetscFunctionReturn(0); 102757b952d6SSatish Balay } 102857b952d6SSatish Balay 102957b952d6SSatish Balay 103057b952d6SSatish Balay 10315615d1e5SSatish Balay #undef __FUNC__ 10325615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ" 1033e1311b90SBarry Smith int MatView_MPIBAIJ(Mat mat,Viewer viewer) 103457b952d6SSatish Balay { 103557b952d6SSatish Balay int ierr; 103657b952d6SSatish Balay ViewerType vtype; 103757b952d6SSatish Balay 1038d64ed03dSBarry Smith PetscFunctionBegin; 103957b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 10403f1db9ecSBarry Smith if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) || 10417b2a1423SBarry Smith PetscTypeCompare(vtype,SOCKET_VIEWER)) { 10427b2a1423SBarry Smith ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer); CHKERRQ(ierr); 10433f1db9ecSBarry Smith } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 10443a40ed3dSBarry Smith ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 10455cd90555SBarry Smith } else { 10465cd90555SBarry Smith SETERRQ(1,1,"Viewer type not supported by PETSc object"); 104757b952d6SSatish Balay } 10483a40ed3dSBarry Smith PetscFunctionReturn(0); 104957b952d6SSatish Balay } 105057b952d6SSatish Balay 10515615d1e5SSatish Balay #undef __FUNC__ 10525615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ" 1053e1311b90SBarry Smith int MatDestroy_MPIBAIJ(Mat mat) 105479bdfe76SSatish Balay { 105579bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 105679bdfe76SSatish Balay int ierr; 105779bdfe76SSatish Balay 1058d64ed03dSBarry Smith PetscFunctionBegin; 105998dd23e9SBarry Smith if (--mat->refct > 0) PetscFunctionReturn(0); 106098dd23e9SBarry Smith 106198dd23e9SBarry Smith if (mat->mapping) { 106298dd23e9SBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 106398dd23e9SBarry Smith } 106498dd23e9SBarry Smith if (mat->bmapping) { 106598dd23e9SBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr); 106698dd23e9SBarry Smith } 106761b13de0SBarry Smith if (mat->rmap) { 106861b13de0SBarry Smith ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 106961b13de0SBarry Smith } 107061b13de0SBarry Smith if (mat->cmap) { 107161b13de0SBarry Smith ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 107261b13de0SBarry Smith } 10733a40ed3dSBarry Smith #if defined(USE_PETSC_LOG) 1074e1311b90SBarry Smith PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N); 107579bdfe76SSatish Balay #endif 107679bdfe76SSatish Balay 107783e2fdc7SBarry Smith ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 1078a2d1c673SSatish Balay ierr = StashDestroy_Private(&baij->bstash); CHKERRQ(ierr); 107979bdfe76SSatish Balay PetscFree(baij->rowners); 108079bdfe76SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 108179bdfe76SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 108248e59246SSatish Balay #if defined (USE_CTABLE) 108348e59246SSatish Balay if (baij->colmap) TableDelete(baij->colmap); 108448e59246SSatish Balay #else 108579bdfe76SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 108648e59246SSatish Balay #endif 108779bdfe76SSatish Balay if (baij->garray) PetscFree(baij->garray); 108879bdfe76SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 108979bdfe76SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 109079bdfe76SSatish Balay if (baij->rowvalues) PetscFree(baij->rowvalues); 109130793edcSSatish Balay if (baij->barray) PetscFree(baij->barray); 1092b9e4cc15SSatish Balay if (baij->hd) PetscFree(baij->hd); 109379bdfe76SSatish Balay PetscFree(baij); 109479bdfe76SSatish Balay PLogObjectDestroy(mat); 109579bdfe76SSatish Balay PetscHeaderDestroy(mat); 10963a40ed3dSBarry Smith PetscFunctionReturn(0); 109779bdfe76SSatish Balay } 109879bdfe76SSatish Balay 10995615d1e5SSatish Balay #undef __FUNC__ 11005615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ" 1101ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1102cee3aa6bSSatish Balay { 1103cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 110447b4a8eaSLois Curfman McInnes int ierr, nt; 1105cee3aa6bSSatish Balay 1106d64ed03dSBarry Smith PetscFunctionBegin; 1107e1311b90SBarry Smith ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 110847b4a8eaSLois Curfman McInnes if (nt != a->n) { 1109a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 111047b4a8eaSLois Curfman McInnes } 1111e1311b90SBarry Smith ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 111247b4a8eaSLois Curfman McInnes if (nt != a->m) { 1113a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 111447b4a8eaSLois Curfman McInnes } 111543a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1116f830108cSBarry Smith ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 111743a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1118f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 111943a90d84SBarry Smith ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 11203a40ed3dSBarry Smith PetscFunctionReturn(0); 1121cee3aa6bSSatish Balay } 1122cee3aa6bSSatish Balay 11235615d1e5SSatish Balay #undef __FUNC__ 11245615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ" 1125ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1126cee3aa6bSSatish Balay { 1127cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1128cee3aa6bSSatish Balay int ierr; 1129d64ed03dSBarry Smith 1130d64ed03dSBarry Smith PetscFunctionBegin; 113143a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1132f830108cSBarry Smith ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 113343a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1134f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 11353a40ed3dSBarry Smith PetscFunctionReturn(0); 1136cee3aa6bSSatish Balay } 1137cee3aa6bSSatish Balay 11385615d1e5SSatish Balay #undef __FUNC__ 11395615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ" 1140ec1ea8d8SLois Curfman McInnes int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1141cee3aa6bSSatish Balay { 1142cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1143cee3aa6bSSatish Balay int ierr; 1144cee3aa6bSSatish Balay 1145d64ed03dSBarry Smith PetscFunctionBegin; 1146cee3aa6bSSatish Balay /* do nondiagonal part */ 1147f830108cSBarry Smith ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1148cee3aa6bSSatish Balay /* send it on its way */ 1149537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1150cee3aa6bSSatish Balay /* do local part */ 1151f830108cSBarry Smith ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 1152cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1153cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 1154cee3aa6bSSatish Balay /* but is not perhaps always true. */ 1155639f9d9dSBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 11563a40ed3dSBarry Smith PetscFunctionReturn(0); 1157cee3aa6bSSatish Balay } 1158cee3aa6bSSatish Balay 11595615d1e5SSatish Balay #undef __FUNC__ 11605615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1161ec1ea8d8SLois Curfman McInnes int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1162cee3aa6bSSatish Balay { 1163cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1164cee3aa6bSSatish Balay int ierr; 1165cee3aa6bSSatish Balay 1166d64ed03dSBarry Smith PetscFunctionBegin; 1167cee3aa6bSSatish Balay /* do nondiagonal part */ 1168f830108cSBarry Smith ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1169cee3aa6bSSatish Balay /* send it on its way */ 1170537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1171cee3aa6bSSatish Balay /* do local part */ 1172f830108cSBarry Smith ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1173cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1174cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 1175cee3aa6bSSatish Balay /* but is not perhaps always true. */ 1176537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 11773a40ed3dSBarry Smith PetscFunctionReturn(0); 1178cee3aa6bSSatish Balay } 1179cee3aa6bSSatish Balay 1180cee3aa6bSSatish Balay /* 1181cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 1182cee3aa6bSSatish Balay diagonal block 1183cee3aa6bSSatish Balay */ 11845615d1e5SSatish Balay #undef __FUNC__ 11855615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1186ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1187cee3aa6bSSatish Balay { 1188cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 11893a40ed3dSBarry Smith int ierr; 1190d64ed03dSBarry Smith 1191d64ed03dSBarry Smith PetscFunctionBegin; 1192a8c6a408SBarry Smith if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 11933a40ed3dSBarry Smith ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 11943a40ed3dSBarry Smith PetscFunctionReturn(0); 1195cee3aa6bSSatish Balay } 1196cee3aa6bSSatish Balay 11975615d1e5SSatish Balay #undef __FUNC__ 11985615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ" 1199ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1200cee3aa6bSSatish Balay { 1201cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1202cee3aa6bSSatish Balay int ierr; 1203d64ed03dSBarry Smith 1204d64ed03dSBarry Smith PetscFunctionBegin; 1205cee3aa6bSSatish Balay ierr = MatScale(aa,a->A); CHKERRQ(ierr); 1206cee3aa6bSSatish Balay ierr = MatScale(aa,a->B); CHKERRQ(ierr); 12073a40ed3dSBarry Smith PetscFunctionReturn(0); 1208cee3aa6bSSatish Balay } 1209026e39d0SSatish Balay 12105615d1e5SSatish Balay #undef __FUNC__ 12115615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ" 1212ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 121357b952d6SSatish Balay { 121457b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1215d64ed03dSBarry Smith 1216d64ed03dSBarry Smith PetscFunctionBegin; 1217bd7f49f5SSatish Balay if (m) *m = mat->M; 1218bd7f49f5SSatish Balay if (n) *n = mat->N; 12193a40ed3dSBarry Smith PetscFunctionReturn(0); 122057b952d6SSatish Balay } 122157b952d6SSatish Balay 12225615d1e5SSatish Balay #undef __FUNC__ 12235615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1224ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 122557b952d6SSatish Balay { 122657b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1227d64ed03dSBarry Smith 1228d64ed03dSBarry Smith PetscFunctionBegin; 1229f830108cSBarry Smith *m = mat->m; *n = mat->n; 12303a40ed3dSBarry Smith PetscFunctionReturn(0); 123157b952d6SSatish Balay } 123257b952d6SSatish Balay 12335615d1e5SSatish Balay #undef __FUNC__ 12345615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1235ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 123657b952d6SSatish Balay { 123757b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1238d64ed03dSBarry Smith 1239d64ed03dSBarry Smith PetscFunctionBegin; 124057b952d6SSatish Balay *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 12413a40ed3dSBarry Smith PetscFunctionReturn(0); 124257b952d6SSatish Balay } 124357b952d6SSatish Balay 12445615d1e5SSatish Balay #undef __FUNC__ 12455615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ" 1246acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1247acdf5bf4SSatish Balay { 1248acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1249acdf5bf4SSatish Balay Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1250acdf5bf4SSatish Balay int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1251d9d09a02SSatish Balay int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1252d9d09a02SSatish Balay int *cmap, *idx_p,cstart = mat->cstart; 1253acdf5bf4SSatish Balay 1254d64ed03dSBarry Smith PetscFunctionBegin; 1255a8c6a408SBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1256acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 1257acdf5bf4SSatish Balay 1258acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 1259acdf5bf4SSatish Balay /* 1260acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 1261acdf5bf4SSatish Balay */ 1262acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1263bd16c2feSSatish Balay int max = 1,mbs = mat->mbs,tmp; 1264bd16c2feSSatish Balay for ( i=0; i<mbs; i++ ) { 1265acdf5bf4SSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1266acdf5bf4SSatish Balay if (max < tmp) { max = tmp; } 1267acdf5bf4SSatish Balay } 1268acdf5bf4SSatish Balay mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 1269acdf5bf4SSatish Balay CHKPTRQ(mat->rowvalues); 1270acdf5bf4SSatish Balay mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1271acdf5bf4SSatish Balay } 1272acdf5bf4SSatish Balay 1273a8c6a408SBarry Smith if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1274d9d09a02SSatish Balay lrow = row - brstart; 1275acdf5bf4SSatish Balay 1276acdf5bf4SSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1277acdf5bf4SSatish Balay if (!v) {pvA = 0; pvB = 0;} 1278acdf5bf4SSatish Balay if (!idx) {pcA = 0; if (!v) pcB = 0;} 1279f830108cSBarry Smith ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1280f830108cSBarry Smith ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1281acdf5bf4SSatish Balay nztot = nzA + nzB; 1282acdf5bf4SSatish Balay 1283acdf5bf4SSatish Balay cmap = mat->garray; 1284acdf5bf4SSatish Balay if (v || idx) { 1285acdf5bf4SSatish Balay if (nztot) { 1286acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 1287acdf5bf4SSatish Balay int imark = -1; 1288acdf5bf4SSatish Balay if (v) { 1289acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 1290acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 1291d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1292acdf5bf4SSatish Balay else break; 1293acdf5bf4SSatish Balay } 1294acdf5bf4SSatish Balay imark = i; 1295acdf5bf4SSatish Balay for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1296acdf5bf4SSatish Balay for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1297acdf5bf4SSatish Balay } 1298acdf5bf4SSatish Balay if (idx) { 1299acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 1300acdf5bf4SSatish Balay if (imark > -1) { 1301acdf5bf4SSatish Balay for ( i=0; i<imark; i++ ) { 1302bd16c2feSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1303acdf5bf4SSatish Balay } 1304acdf5bf4SSatish Balay } else { 1305acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 1306d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) 1307d9d09a02SSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1308acdf5bf4SSatish Balay else break; 1309acdf5bf4SSatish Balay } 1310acdf5bf4SSatish Balay imark = i; 1311acdf5bf4SSatish Balay } 1312d9d09a02SSatish Balay for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1313d9d09a02SSatish Balay for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1314acdf5bf4SSatish Balay } 1315d64ed03dSBarry Smith } else { 1316d212a18eSSatish Balay if (idx) *idx = 0; 1317d212a18eSSatish Balay if (v) *v = 0; 1318d212a18eSSatish Balay } 1319acdf5bf4SSatish Balay } 1320acdf5bf4SSatish Balay *nz = nztot; 1321f830108cSBarry Smith ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1322f830108cSBarry Smith ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 13233a40ed3dSBarry Smith PetscFunctionReturn(0); 1324acdf5bf4SSatish Balay } 1325acdf5bf4SSatish Balay 13265615d1e5SSatish Balay #undef __FUNC__ 13275615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1328acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1329acdf5bf4SSatish Balay { 1330acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1331d64ed03dSBarry Smith 1332d64ed03dSBarry Smith PetscFunctionBegin; 1333acdf5bf4SSatish Balay if (baij->getrowactive == PETSC_FALSE) { 1334a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1335acdf5bf4SSatish Balay } 1336acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 13373a40ed3dSBarry Smith PetscFunctionReturn(0); 1338acdf5bf4SSatish Balay } 1339acdf5bf4SSatish Balay 13405615d1e5SSatish Balay #undef __FUNC__ 13415615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1342ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 13435a838052SSatish Balay { 13445a838052SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1345d64ed03dSBarry Smith 1346d64ed03dSBarry Smith PetscFunctionBegin; 13475a838052SSatish Balay *bs = baij->bs; 13483a40ed3dSBarry Smith PetscFunctionReturn(0); 13495a838052SSatish Balay } 13505a838052SSatish Balay 13515615d1e5SSatish Balay #undef __FUNC__ 13525615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1353ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A) 135458667388SSatish Balay { 135558667388SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 135658667388SSatish Balay int ierr; 1357d64ed03dSBarry Smith 1358d64ed03dSBarry Smith PetscFunctionBegin; 135958667388SSatish Balay ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 136058667388SSatish Balay ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 13613a40ed3dSBarry Smith PetscFunctionReturn(0); 136258667388SSatish Balay } 13630ac07820SSatish Balay 13645615d1e5SSatish Balay #undef __FUNC__ 13655615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ" 1366ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 13670ac07820SSatish Balay { 13684e220ebcSLois Curfman McInnes Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 13694e220ebcSLois Curfman McInnes Mat A = a->A, B = a->B; 13707d57db60SLois Curfman McInnes int ierr; 13717d57db60SLois Curfman McInnes double isend[5], irecv[5]; 13720ac07820SSatish Balay 1373d64ed03dSBarry Smith PetscFunctionBegin; 13744e220ebcSLois Curfman McInnes info->block_size = (double)a->bs; 13754e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 13760e4b21beSBarry Smith isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1377de87f314SBarry Smith isend[3] = info->memory; isend[4] = info->mallocs; 13784e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 13790e4b21beSBarry Smith isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1380de87f314SBarry Smith isend[3] += info->memory; isend[4] += info->mallocs; 13810ac07820SSatish Balay if (flag == MAT_LOCAL) { 13824e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 13834e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 13844e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 13854e220ebcSLois Curfman McInnes info->memory = isend[3]; 13864e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 13870ac07820SSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 1388f7cdd7c9SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 13894e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 13904e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 13914e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 13924e220ebcSLois Curfman McInnes info->memory = irecv[3]; 13934e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 13940ac07820SSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 1395f7cdd7c9SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 13964e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 13974e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 13984e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 13994e220ebcSLois Curfman McInnes info->memory = irecv[3]; 14004e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 1401d41123aaSBarry Smith } else { 1402d41123aaSBarry Smith SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag); 14030ac07820SSatish Balay } 14045a5d4f66SBarry Smith info->rows_global = (double)a->M; 14055a5d4f66SBarry Smith info->columns_global = (double)a->N; 14065a5d4f66SBarry Smith info->rows_local = (double)a->m; 14075a5d4f66SBarry Smith info->columns_local = (double)a->N; 14084e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 14094e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 14104e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 14113a40ed3dSBarry Smith PetscFunctionReturn(0); 14120ac07820SSatish Balay } 14130ac07820SSatish Balay 14145615d1e5SSatish Balay #undef __FUNC__ 14155615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ" 1416ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op) 141758667388SSatish Balay { 141858667388SSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 141998305bb5SBarry Smith int ierr; 142058667388SSatish Balay 1421d64ed03dSBarry Smith PetscFunctionBegin; 142258667388SSatish Balay if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 142358667388SSatish Balay op == MAT_YES_NEW_NONZERO_LOCATIONS || 14246da5968aSLois Curfman McInnes op == MAT_COLUMNS_UNSORTED || 1425c2653b3dSLois Curfman McInnes op == MAT_COLUMNS_SORTED || 14264787f768SSatish Balay op == MAT_NEW_NONZERO_ALLOCATION_ERR || 14274787f768SSatish Balay op == MAT_NEW_NONZERO_LOCATION_ERR) { 142898305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 142998305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1430b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROW_ORIENTED) { 1431aeafbbfcSLois Curfman McInnes a->roworiented = 1; 143298305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 143398305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1434b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROWS_SORTED || 14356da5968aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 143658667388SSatish Balay op == MAT_SYMMETRIC || 143758667388SSatish Balay op == MAT_STRUCTURALLY_SYMMETRIC || 1438b51ba29fSSatish Balay op == MAT_YES_NEW_DIAGONALS || 143998305bb5SBarry Smith op == MAT_USE_HASH_TABLE) { 144058667388SSatish Balay PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 144198305bb5SBarry Smith } else if (op == MAT_COLUMN_ORIENTED) { 144258667388SSatish Balay a->roworiented = 0; 144398305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 144498305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 14452b362799SSatish Balay } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 144690f02eecSBarry Smith a->donotstash = 1; 1447d64ed03dSBarry Smith } else if (op == MAT_NO_NEW_DIAGONALS) { 1448d64ed03dSBarry Smith SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1449133cdb44SSatish Balay } else if (op == MAT_USE_HASH_TABLE) { 1450133cdb44SSatish Balay a->ht_flag = 1; 1451d64ed03dSBarry Smith } else { 1452d64ed03dSBarry Smith SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1453d64ed03dSBarry Smith } 14543a40ed3dSBarry Smith PetscFunctionReturn(0); 145558667388SSatish Balay } 145658667388SSatish Balay 14575615d1e5SSatish Balay #undef __FUNC__ 14585615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ(" 1459ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 14600ac07820SSatish Balay { 14610ac07820SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 14620ac07820SSatish Balay Mat_SeqBAIJ *Aloc; 14630ac07820SSatish Balay Mat B; 146440011551SBarry Smith int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 14650ac07820SSatish Balay int bs=baij->bs,mbs=baij->mbs; 14660ac07820SSatish Balay Scalar *a; 14670ac07820SSatish Balay 1468d64ed03dSBarry Smith PetscFunctionBegin; 1469a8c6a408SBarry Smith if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 147028937c7bSBarry Smith ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 14710ac07820SSatish Balay CHKERRQ(ierr); 14720ac07820SSatish Balay 14730ac07820SSatish Balay /* copy over the A part */ 14740ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 14750ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 14760ac07820SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 14770ac07820SSatish Balay 14780ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 14790ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 14800ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 14810ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 14820ac07820SSatish Balay col = (baij->cstart+aj[j])*bs; 14830ac07820SSatish Balay for (k=0; k<bs; k++ ) { 14840ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 14850ac07820SSatish Balay col++; a += bs; 14860ac07820SSatish Balay } 14870ac07820SSatish Balay } 14880ac07820SSatish Balay } 14890ac07820SSatish Balay /* copy over the B part */ 14900ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 14910ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 14920ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 14930ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 14940ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 14950ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 14960ac07820SSatish Balay col = baij->garray[aj[j]]*bs; 14970ac07820SSatish Balay for (k=0; k<bs; k++ ) { 14980ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 14990ac07820SSatish Balay col++; a += bs; 15000ac07820SSatish Balay } 15010ac07820SSatish Balay } 15020ac07820SSatish Balay } 15030ac07820SSatish Balay PetscFree(rvals); 15040ac07820SSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 15050ac07820SSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 15060ac07820SSatish Balay 15070ac07820SSatish Balay if (matout != PETSC_NULL) { 15080ac07820SSatish Balay *matout = B; 15090ac07820SSatish Balay } else { 1510f830108cSBarry Smith PetscOps *Abops; 1511cc2dc46cSBarry Smith MatOps Aops; 1512f830108cSBarry Smith 15130ac07820SSatish Balay /* This isn't really an in-place transpose .... but free data structures from baij */ 15140ac07820SSatish Balay PetscFree(baij->rowners); 15150ac07820SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 15160ac07820SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1517b1fc9764SSatish Balay #if defined (USE_CTABLE) 1518b1fc9764SSatish Balay if (baij->colmap) TableDelete(baij->colmap); 1519b1fc9764SSatish Balay #else 15200ac07820SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 1521b1fc9764SSatish Balay #endif 15220ac07820SSatish Balay if (baij->garray) PetscFree(baij->garray); 15230ac07820SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 15240ac07820SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 15250ac07820SSatish Balay PetscFree(baij); 1526f830108cSBarry Smith 1527f830108cSBarry Smith /* 1528f830108cSBarry Smith This is horrible, horrible code. We need to keep the 1529f830108cSBarry Smith A pointers for the bops and ops but copy everything 1530f830108cSBarry Smith else from C. 1531f830108cSBarry Smith */ 1532f830108cSBarry Smith Abops = A->bops; 1533f830108cSBarry Smith Aops = A->ops; 1534f09e8eb9SSatish Balay PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1535f830108cSBarry Smith A->bops = Abops; 1536f830108cSBarry Smith A->ops = Aops; 1537f830108cSBarry Smith 15380ac07820SSatish Balay PetscHeaderDestroy(B); 15390ac07820SSatish Balay } 15403a40ed3dSBarry Smith PetscFunctionReturn(0); 15410ac07820SSatish Balay } 15420e95ebc0SSatish Balay 15435615d1e5SSatish Balay #undef __FUNC__ 15445615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 15450e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 15460e95ebc0SSatish Balay { 15470e95ebc0SSatish Balay Mat a = ((Mat_MPIBAIJ *) A->data)->A; 15480e95ebc0SSatish Balay Mat b = ((Mat_MPIBAIJ *) A->data)->B; 15490e95ebc0SSatish Balay int ierr,s1,s2,s3; 15500e95ebc0SSatish Balay 1551d64ed03dSBarry Smith PetscFunctionBegin; 15520e95ebc0SSatish Balay if (ll) { 15530e95ebc0SSatish Balay ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 15540e95ebc0SSatish Balay ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1555a8c6a408SBarry Smith if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes"); 15560e95ebc0SSatish Balay ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 15570e95ebc0SSatish Balay ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 15580e95ebc0SSatish Balay } 1559a8c6a408SBarry Smith if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector"); 15603a40ed3dSBarry Smith PetscFunctionReturn(0); 15610e95ebc0SSatish Balay } 15620e95ebc0SSatish Balay 15635615d1e5SSatish Balay #undef __FUNC__ 15645615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ" 1565ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 15660ac07820SSatish Balay { 15670ac07820SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 15680ac07820SSatish Balay int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1569a07cd24cSSatish Balay int *procs,*nprocs,j,found,idx,nsends,*work,row; 15700ac07820SSatish Balay int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 15710ac07820SSatish Balay int *rvalues,tag = A->tag,count,base,slen,n,*source; 1572a07cd24cSSatish Balay int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 15730ac07820SSatish Balay MPI_Comm comm = A->comm; 15740ac07820SSatish Balay MPI_Request *send_waits,*recv_waits; 15750ac07820SSatish Balay MPI_Status recv_status,*send_status; 15760ac07820SSatish Balay IS istmp; 15770ac07820SSatish Balay 1578d64ed03dSBarry Smith PetscFunctionBegin; 15790ac07820SSatish Balay ierr = ISGetSize(is,&N); CHKERRQ(ierr); 15800ac07820SSatish Balay ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 15810ac07820SSatish Balay 15820ac07820SSatish Balay /* first count number of contributors to each processor */ 15830ac07820SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 15840ac07820SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 15850ac07820SSatish Balay owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 15860ac07820SSatish Balay for ( i=0; i<N; i++ ) { 15870ac07820SSatish Balay idx = rows[i]; 15880ac07820SSatish Balay found = 0; 15890ac07820SSatish Balay for ( j=0; j<size; j++ ) { 15900ac07820SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 15910ac07820SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 15920ac07820SSatish Balay } 15930ac07820SSatish Balay } 1594a8c6a408SBarry Smith if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 15950ac07820SSatish Balay } 15960ac07820SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 15970ac07820SSatish Balay 15980ac07820SSatish Balay /* inform other processors of number of messages and max length*/ 15990ac07820SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 1600ca161407SBarry Smith ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 16010ac07820SSatish Balay nrecvs = work[rank]; 1602ca161407SBarry Smith ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 16030ac07820SSatish Balay nmax = work[rank]; 16040ac07820SSatish Balay PetscFree(work); 16050ac07820SSatish Balay 16060ac07820SSatish Balay /* post receives: */ 1607d64ed03dSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues); 1608d64ed03dSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 16090ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 1610ca161407SBarry Smith ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 16110ac07820SSatish Balay } 16120ac07820SSatish Balay 16130ac07820SSatish Balay /* do sends: 16140ac07820SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 16150ac07820SSatish Balay the ith processor 16160ac07820SSatish Balay */ 16170ac07820SSatish Balay svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 1618ca161407SBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 16190ac07820SSatish Balay starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 16200ac07820SSatish Balay starts[0] = 0; 16210ac07820SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 16220ac07820SSatish Balay for ( i=0; i<N; i++ ) { 16230ac07820SSatish Balay svalues[starts[owner[i]]++] = rows[i]; 16240ac07820SSatish Balay } 16250ac07820SSatish Balay ISRestoreIndices(is,&rows); 16260ac07820SSatish Balay 16270ac07820SSatish Balay starts[0] = 0; 16280ac07820SSatish Balay for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 16290ac07820SSatish Balay count = 0; 16300ac07820SSatish Balay for ( i=0; i<size; i++ ) { 16310ac07820SSatish Balay if (procs[i]) { 1632ca161407SBarry Smith ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 16330ac07820SSatish Balay } 16340ac07820SSatish Balay } 16350ac07820SSatish Balay PetscFree(starts); 16360ac07820SSatish Balay 16370ac07820SSatish Balay base = owners[rank]*bs; 16380ac07820SSatish Balay 16390ac07820SSatish Balay /* wait on receives */ 16400ac07820SSatish Balay lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 16410ac07820SSatish Balay source = lens + nrecvs; 16420ac07820SSatish Balay count = nrecvs; slen = 0; 16430ac07820SSatish Balay while (count) { 1644ca161407SBarry Smith ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 16450ac07820SSatish Balay /* unpack receives into our local space */ 1646ca161407SBarry Smith ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 16470ac07820SSatish Balay source[imdex] = recv_status.MPI_SOURCE; 16480ac07820SSatish Balay lens[imdex] = n; 16490ac07820SSatish Balay slen += n; 16500ac07820SSatish Balay count--; 16510ac07820SSatish Balay } 16520ac07820SSatish Balay PetscFree(recv_waits); 16530ac07820SSatish Balay 16540ac07820SSatish Balay /* move the data into the send scatter */ 16550ac07820SSatish Balay lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 16560ac07820SSatish Balay count = 0; 16570ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 16580ac07820SSatish Balay values = rvalues + i*nmax; 16590ac07820SSatish Balay for ( j=0; j<lens[i]; j++ ) { 16600ac07820SSatish Balay lrows[count++] = values[j] - base; 16610ac07820SSatish Balay } 16620ac07820SSatish Balay } 16630ac07820SSatish Balay PetscFree(rvalues); PetscFree(lens); 16640ac07820SSatish Balay PetscFree(owner); PetscFree(nprocs); 16650ac07820SSatish Balay 16660ac07820SSatish Balay /* actually zap the local rows */ 1667029af93fSBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 16680ac07820SSatish Balay PLogObjectParent(A,istmp); 1669a07cd24cSSatish Balay 167072dacd9aSBarry Smith /* 167172dacd9aSBarry Smith Zero the required rows. If the "diagonal block" of the matrix 167272dacd9aSBarry Smith is square and the user wishes to set the diagonal we use seperate 167372dacd9aSBarry Smith code so that MatSetValues() is not called for each diagonal allocating 167472dacd9aSBarry Smith new memory, thus calling lots of mallocs and slowing things down. 167572dacd9aSBarry Smith 167672dacd9aSBarry Smith Contributed by: Mathew Knepley 167772dacd9aSBarry Smith */ 16789c957beeSSatish Balay /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 16790ac07820SSatish Balay ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 16809c957beeSSatish Balay if (diag && (l->A->M == l->A->N)) { 16819c957beeSSatish Balay ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 16829c957beeSSatish Balay } else if (diag) { 16839c957beeSSatish Balay ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 1684fa46199cSSatish Balay if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1685fa46199cSSatish Balay SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1686fa46199cSSatish Balay MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 16876525c446SSatish Balay } 1688a07cd24cSSatish Balay for ( i = 0; i < slen; i++ ) { 1689a07cd24cSSatish Balay row = lrows[i] + rstart_bs; 1690a07cd24cSSatish Balay ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1691a07cd24cSSatish Balay } 1692a07cd24cSSatish Balay ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1693a07cd24cSSatish Balay ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 16949c957beeSSatish Balay } else { 16959c957beeSSatish Balay ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 1696a07cd24cSSatish Balay } 16979c957beeSSatish Balay 16989c957beeSSatish Balay ierr = ISDestroy(istmp); CHKERRQ(ierr); 1699a07cd24cSSatish Balay PetscFree(lrows); 1700a07cd24cSSatish Balay 17010ac07820SSatish Balay /* wait on sends */ 17020ac07820SSatish Balay if (nsends) { 1703d64ed03dSBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1704ca161407SBarry Smith ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 17050ac07820SSatish Balay PetscFree(send_status); 17060ac07820SSatish Balay } 17070ac07820SSatish Balay PetscFree(send_waits); PetscFree(svalues); 17080ac07820SSatish Balay 17093a40ed3dSBarry Smith PetscFunctionReturn(0); 17100ac07820SSatish Balay } 171172dacd9aSBarry Smith 17125615d1e5SSatish Balay #undef __FUNC__ 17135615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1714ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A) 1715ba4ca20aSSatish Balay { 1716ba4ca20aSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 171725fdafccSSatish Balay MPI_Comm comm = A->comm; 1718133cdb44SSatish Balay static int called = 0; 17193a40ed3dSBarry Smith int ierr; 1720ba4ca20aSSatish Balay 1721d64ed03dSBarry Smith PetscFunctionBegin; 17223a40ed3dSBarry Smith if (!a->rank) { 17233a40ed3dSBarry Smith ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 172425fdafccSSatish Balay } 172525fdafccSSatish Balay if (called) {PetscFunctionReturn(0);} else called = 1; 1726133cdb44SSatish Balay (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n"); 1727133cdb44SSatish Balay (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n"); 17283a40ed3dSBarry Smith PetscFunctionReturn(0); 1729ba4ca20aSSatish Balay } 17300ac07820SSatish Balay 17315615d1e5SSatish Balay #undef __FUNC__ 17325615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1733ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A) 1734bb5a7306SBarry Smith { 1735bb5a7306SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1736bb5a7306SBarry Smith int ierr; 1737d64ed03dSBarry Smith 1738d64ed03dSBarry Smith PetscFunctionBegin; 1739bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 17403a40ed3dSBarry Smith PetscFunctionReturn(0); 1741bb5a7306SBarry Smith } 1742bb5a7306SBarry Smith 17432e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 17440ac07820SSatish Balay 174579bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 1746cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = { 1747cc2dc46cSBarry Smith MatSetValues_MPIBAIJ, 1748cc2dc46cSBarry Smith MatGetRow_MPIBAIJ, 1749cc2dc46cSBarry Smith MatRestoreRow_MPIBAIJ, 1750cc2dc46cSBarry Smith MatMult_MPIBAIJ, 1751cc2dc46cSBarry Smith MatMultAdd_MPIBAIJ, 1752cc2dc46cSBarry Smith MatMultTrans_MPIBAIJ, 1753cc2dc46cSBarry Smith MatMultTransAdd_MPIBAIJ, 1754cc2dc46cSBarry Smith 0, 1755cc2dc46cSBarry Smith 0, 1756cc2dc46cSBarry Smith 0, 1757cc2dc46cSBarry Smith 0, 1758cc2dc46cSBarry Smith 0, 1759cc2dc46cSBarry Smith 0, 1760cc2dc46cSBarry Smith 0, 1761cc2dc46cSBarry Smith MatTranspose_MPIBAIJ, 1762cc2dc46cSBarry Smith MatGetInfo_MPIBAIJ, 1763cc2dc46cSBarry Smith 0, 1764cc2dc46cSBarry Smith MatGetDiagonal_MPIBAIJ, 1765cc2dc46cSBarry Smith MatDiagonalScale_MPIBAIJ, 1766cc2dc46cSBarry Smith MatNorm_MPIBAIJ, 1767cc2dc46cSBarry Smith MatAssemblyBegin_MPIBAIJ, 1768cc2dc46cSBarry Smith MatAssemblyEnd_MPIBAIJ, 1769cc2dc46cSBarry Smith 0, 1770cc2dc46cSBarry Smith MatSetOption_MPIBAIJ, 1771cc2dc46cSBarry Smith MatZeroEntries_MPIBAIJ, 1772cc2dc46cSBarry Smith MatZeroRows_MPIBAIJ, 1773cc2dc46cSBarry Smith 0, 1774cc2dc46cSBarry Smith 0, 1775cc2dc46cSBarry Smith 0, 1776cc2dc46cSBarry Smith 0, 1777cc2dc46cSBarry Smith MatGetSize_MPIBAIJ, 1778cc2dc46cSBarry Smith MatGetLocalSize_MPIBAIJ, 1779cc2dc46cSBarry Smith MatGetOwnershipRange_MPIBAIJ, 1780cc2dc46cSBarry Smith 0, 1781cc2dc46cSBarry Smith 0, 1782cc2dc46cSBarry Smith 0, 1783cc2dc46cSBarry Smith 0, 17842e8a6d31SBarry Smith MatDuplicate_MPIBAIJ, 1785cc2dc46cSBarry Smith 0, 1786cc2dc46cSBarry Smith 0, 1787cc2dc46cSBarry Smith 0, 1788cc2dc46cSBarry Smith 0, 1789cc2dc46cSBarry Smith 0, 1790cc2dc46cSBarry Smith MatGetSubMatrices_MPIBAIJ, 1791cc2dc46cSBarry Smith MatIncreaseOverlap_MPIBAIJ, 1792cc2dc46cSBarry Smith MatGetValues_MPIBAIJ, 1793cc2dc46cSBarry Smith 0, 1794cc2dc46cSBarry Smith MatPrintHelp_MPIBAIJ, 1795cc2dc46cSBarry Smith MatScale_MPIBAIJ, 1796cc2dc46cSBarry Smith 0, 1797cc2dc46cSBarry Smith 0, 1798cc2dc46cSBarry Smith 0, 1799cc2dc46cSBarry Smith MatGetBlockSize_MPIBAIJ, 1800cc2dc46cSBarry Smith 0, 1801cc2dc46cSBarry Smith 0, 1802cc2dc46cSBarry Smith 0, 1803cc2dc46cSBarry Smith 0, 1804cc2dc46cSBarry Smith 0, 1805cc2dc46cSBarry Smith 0, 1806cc2dc46cSBarry Smith MatSetUnfactored_MPIBAIJ, 1807cc2dc46cSBarry Smith 0, 1808cc2dc46cSBarry Smith MatSetValuesBlocked_MPIBAIJ, 1809cc2dc46cSBarry Smith 0, 1810cc2dc46cSBarry Smith 0, 1811cc2dc46cSBarry Smith 0, 1812cc2dc46cSBarry Smith MatGetMaps_Petsc}; 181379bdfe76SSatish Balay 18145ef9f2a5SBarry Smith 1815e18c124aSSatish Balay EXTERN_C_BEGIN 18165ef9f2a5SBarry Smith #undef __FUNC__ 18175ef9f2a5SBarry Smith #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ" 18185ef9f2a5SBarry Smith int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 18195ef9f2a5SBarry Smith { 18205ef9f2a5SBarry Smith PetscFunctionBegin; 18215ef9f2a5SBarry Smith *a = ((Mat_MPIBAIJ *)A->data)->A; 18225ef9f2a5SBarry Smith *iscopy = PETSC_FALSE; 18235ef9f2a5SBarry Smith PetscFunctionReturn(0); 18245ef9f2a5SBarry Smith } 1825e18c124aSSatish Balay EXTERN_C_END 182679bdfe76SSatish Balay 18275615d1e5SSatish Balay #undef __FUNC__ 18285615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ" 182979bdfe76SSatish Balay /*@C 183079bdfe76SSatish Balay MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 183179bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 183279bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 183379bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 183479bdfe76SSatish Balay performance can be increased by more than a factor of 50. 183579bdfe76SSatish Balay 1836db81eaa0SLois Curfman McInnes Collective on MPI_Comm 1837db81eaa0SLois Curfman McInnes 183879bdfe76SSatish Balay Input Parameters: 1839db81eaa0SLois Curfman McInnes + comm - MPI communicator 184079bdfe76SSatish Balay . bs - size of blockk 184179bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 184292e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 184392e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 184492e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 184592e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 184692e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 1847be79a94dSBarry Smith . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1848be79a94dSBarry Smith . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 184979bdfe76SSatish Balay . d_nz - number of block nonzeros per block row in diagonal portion of local 185079bdfe76SSatish Balay submatrix (same for all local rows) 185192e8d321SLois Curfman McInnes . d_nzz - array containing the number of block nonzeros in the various block rows 185292e8d321SLois Curfman McInnes of the in diagonal portion of the local (possibly different for each block 1853db81eaa0SLois Curfman McInnes row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 185492e8d321SLois Curfman McInnes . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 185579bdfe76SSatish Balay submatrix (same for all local rows). 1856db81eaa0SLois Curfman McInnes - o_nzz - array containing the number of nonzeros in the various block rows of the 185792e8d321SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 185892e8d321SLois Curfman McInnes each block row) or PETSC_NULL. 185979bdfe76SSatish Balay 186079bdfe76SSatish Balay Output Parameter: 186179bdfe76SSatish Balay . A - the matrix 186279bdfe76SSatish Balay 1863db81eaa0SLois Curfman McInnes Options Database Keys: 1864db81eaa0SLois Curfman McInnes . -mat_no_unroll - uses code that does not unroll the loops in the 1865db81eaa0SLois Curfman McInnes block calculations (much slower) 1866db81eaa0SLois Curfman McInnes . -mat_block_size - size of the blocks to use 1867494eafd4SBarry Smith . -mat_mpi - use the parallel matrix data structures even on one processor 1868494eafd4SBarry Smith (defaults to using SeqBAIJ format on one processor) 18693ffaccefSLois Curfman McInnes 1870b259b22eSLois Curfman McInnes Notes: 187179bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 187279bdfe76SSatish Balay (possibly both). 187379bdfe76SSatish Balay 1874be79a94dSBarry Smith If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1875be79a94dSBarry Smith than it must be used on all processors that share the object for that argument. 1876be79a94dSBarry Smith 187779bdfe76SSatish Balay Storage Information: 187879bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 187979bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 188079bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 188179bdfe76SSatish Balay local matrix (a rectangular submatrix). 188279bdfe76SSatish Balay 188379bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 188479bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 188579bdfe76SSatish Balay d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 188679bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 188779bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 188879bdfe76SSatish Balay 188979bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 189079bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 189179bdfe76SSatish Balay 1892db81eaa0SLois Curfman McInnes .vb 1893db81eaa0SLois Curfman McInnes 0 1 2 3 4 5 6 7 8 9 10 11 1894db81eaa0SLois Curfman McInnes ------------------- 1895db81eaa0SLois Curfman McInnes row 3 | o o o d d d o o o o o o 1896db81eaa0SLois Curfman McInnes row 4 | o o o d d d o o o o o o 1897db81eaa0SLois Curfman McInnes row 5 | o o o d d d o o o o o o 1898db81eaa0SLois Curfman McInnes ------------------- 1899db81eaa0SLois Curfman McInnes .ve 190079bdfe76SSatish Balay 190179bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 190279bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 190379bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 190457b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 190579bdfe76SSatish Balay 1906d64ed03dSBarry Smith Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1907d64ed03dSBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix. 190879bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 190992e8d321SLois Curfman McInnes one expects d_nz >> o_nz. For large problems you MUST preallocate memory 191092e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 19116da5968aSLois Curfman McInnes matrices. 191279bdfe76SSatish Balay 1913027ccd11SLois Curfman McInnes Level: intermediate 1914027ccd11SLois Curfman McInnes 191592e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel 191679bdfe76SSatish Balay 1917db81eaa0SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ() 191879bdfe76SSatish Balay @*/ 191979bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 192079bdfe76SSatish Balay int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 192179bdfe76SSatish Balay { 192279bdfe76SSatish Balay Mat B; 192379bdfe76SSatish Balay Mat_MPIBAIJ *b; 1924133cdb44SSatish Balay int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg; 1925a2ab621fSBarry Smith int flag1 = 0,flag2 = 0; 192679bdfe76SSatish Balay 1927d64ed03dSBarry Smith PetscFunctionBegin; 1928a8c6a408SBarry Smith if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 19293914022bSBarry Smith 19303914022bSBarry Smith MPI_Comm_size(comm,&size); 1931494eafd4SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1); CHKERRQ(ierr); 1932494eafd4SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr); 1933494eafd4SBarry Smith if (!flag1 && !flag2 && size == 1) { 19343914022bSBarry Smith if (M == PETSC_DECIDE) M = m; 19353914022bSBarry Smith if (N == PETSC_DECIDE) N = n; 19363914022bSBarry Smith ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 19373a40ed3dSBarry Smith PetscFunctionReturn(0); 19383914022bSBarry Smith } 19393914022bSBarry Smith 194079bdfe76SSatish Balay *A = 0; 19413f1db9ecSBarry Smith PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView); 194279bdfe76SSatish Balay PLogObjectCreate(B); 194379bdfe76SSatish Balay B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 194479bdfe76SSatish Balay PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 1945cc2dc46cSBarry Smith PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)); 19464c50302cSBarry Smith 1947e1311b90SBarry Smith B->ops->destroy = MatDestroy_MPIBAIJ; 1948e1311b90SBarry Smith B->ops->view = MatView_MPIBAIJ; 194990f02eecSBarry Smith B->mapping = 0; 195079bdfe76SSatish Balay B->factor = 0; 195179bdfe76SSatish Balay B->assembled = PETSC_FALSE; 195279bdfe76SSatish Balay 1953e0fa3b82SLois Curfman McInnes B->insertmode = NOT_SET_VALUES; 195479bdfe76SSatish Balay MPI_Comm_rank(comm,&b->rank); 195579bdfe76SSatish Balay MPI_Comm_size(comm,&b->size); 195679bdfe76SSatish Balay 1957d64ed03dSBarry Smith if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1958a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1959d64ed03dSBarry Smith } 1960a8c6a408SBarry Smith if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 1961a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 1962a8c6a408SBarry Smith } 1963a8c6a408SBarry Smith if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 1964a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 1965a8c6a408SBarry Smith } 1966cee3aa6bSSatish Balay if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1967cee3aa6bSSatish Balay if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 196879bdfe76SSatish Balay 196979bdfe76SSatish Balay if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 197079bdfe76SSatish Balay work[0] = m; work[1] = n; 197179bdfe76SSatish Balay mbs = m/bs; nbs = n/bs; 1972ca161407SBarry Smith ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 197379bdfe76SSatish Balay if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 197479bdfe76SSatish Balay if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 197579bdfe76SSatish Balay } 197679bdfe76SSatish Balay if (m == PETSC_DECIDE) { 197779bdfe76SSatish Balay Mbs = M/bs; 1978a8c6a408SBarry Smith if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 197979bdfe76SSatish Balay mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 198079bdfe76SSatish Balay m = mbs*bs; 198179bdfe76SSatish Balay } 198279bdfe76SSatish Balay if (n == PETSC_DECIDE) { 198379bdfe76SSatish Balay Nbs = N/bs; 1984a8c6a408SBarry Smith if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 198579bdfe76SSatish Balay nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 198679bdfe76SSatish Balay n = nbs*bs; 198779bdfe76SSatish Balay } 1988a8c6a408SBarry Smith if (mbs*bs != m || nbs*bs != n) { 1989a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 1990a8c6a408SBarry Smith } 199179bdfe76SSatish Balay 199279bdfe76SSatish Balay b->m = m; B->m = m; 199379bdfe76SSatish Balay b->n = n; B->n = n; 199479bdfe76SSatish Balay b->N = N; B->N = N; 199579bdfe76SSatish Balay b->M = M; B->M = M; 199679bdfe76SSatish Balay b->bs = bs; 199779bdfe76SSatish Balay b->bs2 = bs*bs; 199879bdfe76SSatish Balay b->mbs = mbs; 199979bdfe76SSatish Balay b->nbs = nbs; 200079bdfe76SSatish Balay b->Mbs = Mbs; 200179bdfe76SSatish Balay b->Nbs = Nbs; 200279bdfe76SSatish Balay 2003c7fcc2eaSBarry Smith /* the information in the maps duplicates the information computed below, eventually 2004c7fcc2eaSBarry Smith we should remove the duplicate information that is not contained in the maps */ 2005488ecbafSBarry Smith ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 2006488ecbafSBarry Smith ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 2007c7fcc2eaSBarry Smith 200879bdfe76SSatish Balay /* build local table of row and column ownerships */ 200979bdfe76SSatish Balay b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 2010f09e8eb9SSatish Balay PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 20110ac07820SSatish Balay b->cowners = b->rowners + b->size + 2; 2012ca161407SBarry Smith ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 201379bdfe76SSatish Balay b->rowners[0] = 0; 201479bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 201579bdfe76SSatish Balay b->rowners[i] += b->rowners[i-1]; 201679bdfe76SSatish Balay } 201779bdfe76SSatish Balay b->rstart = b->rowners[b->rank]; 201879bdfe76SSatish Balay b->rend = b->rowners[b->rank+1]; 20194fa0d573SSatish Balay b->rstart_bs = b->rstart * bs; 20204fa0d573SSatish Balay b->rend_bs = b->rend * bs; 20214fa0d573SSatish Balay 2022ca161407SBarry Smith ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 202379bdfe76SSatish Balay b->cowners[0] = 0; 202479bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 202579bdfe76SSatish Balay b->cowners[i] += b->cowners[i-1]; 202679bdfe76SSatish Balay } 202779bdfe76SSatish Balay b->cstart = b->cowners[b->rank]; 202879bdfe76SSatish Balay b->cend = b->cowners[b->rank+1]; 20294fa0d573SSatish Balay b->cstart_bs = b->cstart * bs; 20304fa0d573SSatish Balay b->cend_bs = b->cend * bs; 203179bdfe76SSatish Balay 2032a07cd24cSSatish Balay 203379bdfe76SSatish Balay if (d_nz == PETSC_DEFAULT) d_nz = 5; 2034029af93fSBarry Smith ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 203579bdfe76SSatish Balay PLogObjectParent(B,b->A); 203679bdfe76SSatish Balay if (o_nz == PETSC_DEFAULT) o_nz = 0; 2037029af93fSBarry Smith ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 203879bdfe76SSatish Balay PLogObjectParent(B,b->B); 203979bdfe76SSatish Balay 204079bdfe76SSatish Balay /* build cache for off array entries formed */ 2041a2d1c673SSatish Balay ierr = StashCreate_Private(comm,1,bs,&b->stash); CHKERRQ(ierr); 2042a2d1c673SSatish Balay ierr = StashCreate_Private(comm,bs,bs,&b->bstash); CHKERRQ(ierr); 204390f02eecSBarry Smith b->donotstash = 0; 204479bdfe76SSatish Balay b->colmap = 0; 204579bdfe76SSatish Balay b->garray = 0; 204679bdfe76SSatish Balay b->roworiented = 1; 204779bdfe76SSatish Balay 204830793edcSSatish Balay /* stuff used in block assembly */ 204930793edcSSatish Balay b->barray = 0; 205030793edcSSatish Balay 205179bdfe76SSatish Balay /* stuff used for matrix vector multiply */ 205279bdfe76SSatish Balay b->lvec = 0; 205379bdfe76SSatish Balay b->Mvctx = 0; 205479bdfe76SSatish Balay 205579bdfe76SSatish Balay /* stuff for MatGetRow() */ 205679bdfe76SSatish Balay b->rowindices = 0; 205779bdfe76SSatish Balay b->rowvalues = 0; 205879bdfe76SSatish Balay b->getrowactive = PETSC_FALSE; 205979bdfe76SSatish Balay 2060a07cd24cSSatish Balay /* hash table stuff */ 2061a07cd24cSSatish Balay b->ht = 0; 2062187ce0cbSSatish Balay b->hd = 0; 20630bdbc534SSatish Balay b->ht_size = 0; 2064133cdb44SSatish Balay b->ht_flag = 0; 206525fdafccSSatish Balay b->ht_fact = 0; 2066187ce0cbSSatish Balay b->ht_total_ct = 0; 2067187ce0cbSSatish Balay b->ht_insert_ct = 0; 2068a07cd24cSSatish Balay 206979bdfe76SSatish Balay *A = B; 2070133cdb44SSatish Balay ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg); CHKERRQ(ierr); 2071133cdb44SSatish Balay if (flg) { 2072133cdb44SSatish Balay double fact = 1.39; 2073133cdb44SSatish Balay ierr = MatSetOption(B,MAT_USE_HASH_TABLE); CHKERRQ(ierr); 2074133cdb44SSatish Balay ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg); CHKERRQ(ierr); 2075133cdb44SSatish Balay if (fact <= 1.0) fact = 1.39; 2076133cdb44SSatish Balay ierr = MatMPIBAIJSetHashTableFactor(B,fact); CHKERRQ(ierr); 2077133cdb44SSatish Balay PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2078133cdb44SSatish Balay } 20795ef9f2a5SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C", 20805ef9f2a5SBarry Smith "MatGetDiagonalBlock_MPIBAIJ", 20815ef9f2a5SBarry Smith (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 20823a40ed3dSBarry Smith PetscFunctionReturn(0); 208379bdfe76SSatish Balay } 2084026e39d0SSatish Balay 20855615d1e5SSatish Balay #undef __FUNC__ 20862e8a6d31SBarry Smith #define __FUNC__ "MatDuplicate_MPIBAIJ" 20872e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 20880ac07820SSatish Balay { 20890ac07820SSatish Balay Mat mat; 20900ac07820SSatish Balay Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 20910ac07820SSatish Balay int ierr, len=0, flg; 20920ac07820SSatish Balay 2093d64ed03dSBarry Smith PetscFunctionBegin; 20940ac07820SSatish Balay *newmat = 0; 20953f1db9ecSBarry Smith PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView); 20960ac07820SSatish Balay PLogObjectCreate(mat); 20970ac07820SSatish Balay mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 2098cc2dc46cSBarry Smith PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)); 2099e1311b90SBarry Smith mat->ops->destroy = MatDestroy_MPIBAIJ; 2100e1311b90SBarry Smith mat->ops->view = MatView_MPIBAIJ; 21010ac07820SSatish Balay mat->factor = matin->factor; 21020ac07820SSatish Balay mat->assembled = PETSC_TRUE; 2103aef5e8e0SSatish Balay mat->insertmode = NOT_SET_VALUES; 21040ac07820SSatish Balay 21050ac07820SSatish Balay a->m = mat->m = oldmat->m; 21060ac07820SSatish Balay a->n = mat->n = oldmat->n; 21070ac07820SSatish Balay a->M = mat->M = oldmat->M; 21080ac07820SSatish Balay a->N = mat->N = oldmat->N; 21090ac07820SSatish Balay 21100ac07820SSatish Balay a->bs = oldmat->bs; 21110ac07820SSatish Balay a->bs2 = oldmat->bs2; 21120ac07820SSatish Balay a->mbs = oldmat->mbs; 21130ac07820SSatish Balay a->nbs = oldmat->nbs; 21140ac07820SSatish Balay a->Mbs = oldmat->Mbs; 21150ac07820SSatish Balay a->Nbs = oldmat->Nbs; 21160ac07820SSatish Balay 21170ac07820SSatish Balay a->rstart = oldmat->rstart; 21180ac07820SSatish Balay a->rend = oldmat->rend; 21190ac07820SSatish Balay a->cstart = oldmat->cstart; 21200ac07820SSatish Balay a->cend = oldmat->cend; 21210ac07820SSatish Balay a->size = oldmat->size; 21220ac07820SSatish Balay a->rank = oldmat->rank; 2123aef5e8e0SSatish Balay a->donotstash = oldmat->donotstash; 2124aef5e8e0SSatish Balay a->roworiented = oldmat->roworiented; 2125aef5e8e0SSatish Balay a->rowindices = 0; 21260ac07820SSatish Balay a->rowvalues = 0; 21270ac07820SSatish Balay a->getrowactive = PETSC_FALSE; 212830793edcSSatish Balay a->barray = 0; 21293123a43fSSatish Balay a->rstart_bs = oldmat->rstart_bs; 21303123a43fSSatish Balay a->rend_bs = oldmat->rend_bs; 21313123a43fSSatish Balay a->cstart_bs = oldmat->cstart_bs; 21323123a43fSSatish Balay a->cend_bs = oldmat->cend_bs; 21330ac07820SSatish Balay 2134133cdb44SSatish Balay /* hash table stuff */ 2135133cdb44SSatish Balay a->ht = 0; 2136133cdb44SSatish Balay a->hd = 0; 2137133cdb44SSatish Balay a->ht_size = 0; 2138133cdb44SSatish Balay a->ht_flag = oldmat->ht_flag; 213925fdafccSSatish Balay a->ht_fact = oldmat->ht_fact; 2140133cdb44SSatish Balay a->ht_total_ct = 0; 2141133cdb44SSatish Balay a->ht_insert_ct = 0; 2142133cdb44SSatish Balay 2143133cdb44SSatish Balay 21440ac07820SSatish Balay a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 2145f09e8eb9SSatish Balay PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 21460ac07820SSatish Balay a->cowners = a->rowners + a->size + 2; 21470ac07820SSatish Balay PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 2148a2d1c673SSatish Balay ierr = StashCreate_Private(matin->comm,1,oldmat->bs,&a->stash); CHKERRQ(ierr); 2149a2d1c673SSatish Balay ierr = StashCreate_Private(matin->comm,oldmat->bs,oldmat->bs,&a->bstash); CHKERRQ(ierr); 21500ac07820SSatish Balay if (oldmat->colmap) { 215148e59246SSatish Balay #if defined (USE_CTABLE) 2152fa46199cSSatish Balay ierr = TableCreateCopy(oldmat->colmap,&a->colmap); CHKERRQ(ierr); 215348e59246SSatish Balay #else 21540ac07820SSatish Balay a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 21550ac07820SSatish Balay PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 21560ac07820SSatish Balay PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 215748e59246SSatish Balay #endif 21580ac07820SSatish Balay } else a->colmap = 0; 21590ac07820SSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 21600ac07820SSatish Balay a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 21610ac07820SSatish Balay PLogObjectMemory(mat,len*sizeof(int)); 21620ac07820SSatish Balay PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 21630ac07820SSatish Balay } else a->garray = 0; 21640ac07820SSatish Balay 21650ac07820SSatish Balay ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 21660ac07820SSatish Balay PLogObjectParent(mat,a->lvec); 21670ac07820SSatish Balay ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 2168e18c124aSSatish Balay 21690ac07820SSatish Balay PLogObjectParent(mat,a->Mvctx); 21702e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr); 21710ac07820SSatish Balay PLogObjectParent(mat,a->A); 21722e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr); 21730ac07820SSatish Balay PLogObjectParent(mat,a->B); 21740ac07820SSatish Balay ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 2175e18c124aSSatish Balay ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 21760ac07820SSatish Balay if (flg) { 21770ac07820SSatish Balay ierr = MatPrintHelp(mat); CHKERRQ(ierr); 21780ac07820SSatish Balay } 21790ac07820SSatish Balay *newmat = mat; 21803a40ed3dSBarry Smith PetscFunctionReturn(0); 21810ac07820SSatish Balay } 218257b952d6SSatish Balay 218357b952d6SSatish Balay #include "sys.h" 218457b952d6SSatish Balay 21855615d1e5SSatish Balay #undef __FUNC__ 21865615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ" 218757b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 218857b952d6SSatish Balay { 218957b952d6SSatish Balay Mat A; 219057b952d6SSatish Balay int i, nz, ierr, j,rstart, rend, fd; 219157b952d6SSatish Balay Scalar *vals,*buf; 219257b952d6SSatish Balay MPI_Comm comm = ((PetscObject)viewer)->comm; 219357b952d6SSatish Balay MPI_Status status; 2194cee3aa6bSSatish Balay int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 219557b952d6SSatish Balay int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 219640011551SBarry Smith int flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 219757b952d6SSatish Balay int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 219857b952d6SSatish Balay int dcount,kmax,k,nzcount,tmp; 219957b952d6SSatish Balay 2200d64ed03dSBarry Smith PetscFunctionBegin; 220157b952d6SSatish Balay ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 220257b952d6SSatish Balay 220357b952d6SSatish Balay MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 220457b952d6SSatish Balay if (!rank) { 220557b952d6SSatish Balay ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2206e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 2207a8c6a408SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2208d64ed03dSBarry Smith if (header[3] < 0) { 2209a8c6a408SBarry Smith SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2210d64ed03dSBarry Smith } 22116c5fab8fSBarry Smith } 2212d64ed03dSBarry Smith 2213ca161407SBarry Smith ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 221457b952d6SSatish Balay M = header[1]; N = header[2]; 221557b952d6SSatish Balay 2216a8c6a408SBarry Smith if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 221757b952d6SSatish Balay 221857b952d6SSatish Balay /* 221957b952d6SSatish Balay This code adds extra rows to make sure the number of rows is 222057b952d6SSatish Balay divisible by the blocksize 222157b952d6SSatish Balay */ 222257b952d6SSatish Balay Mbs = M/bs; 222357b952d6SSatish Balay extra_rows = bs - M + bs*(Mbs); 222457b952d6SSatish Balay if (extra_rows == bs) extra_rows = 0; 222557b952d6SSatish Balay else Mbs++; 222657b952d6SSatish Balay if (extra_rows &&!rank) { 2227b0267e0aSLois Curfman McInnes PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 222857b952d6SSatish Balay } 2229537820f0SBarry Smith 223057b952d6SSatish Balay /* determine ownership of all rows */ 223157b952d6SSatish Balay mbs = Mbs/size + ((Mbs % size) > rank); 223257b952d6SSatish Balay m = mbs * bs; 2233cee3aa6bSSatish Balay rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 2234cee3aa6bSSatish Balay browners = rowners + size + 1; 2235ca161407SBarry Smith ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 223657b952d6SSatish Balay rowners[0] = 0; 2237cee3aa6bSSatish Balay for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2238cee3aa6bSSatish Balay for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 223957b952d6SSatish Balay rstart = rowners[rank]; 224057b952d6SSatish Balay rend = rowners[rank+1]; 224157b952d6SSatish Balay 224257b952d6SSatish Balay /* distribute row lengths to all processors */ 224357b952d6SSatish Balay locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 224457b952d6SSatish Balay if (!rank) { 224557b952d6SSatish Balay rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 2246e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 224757b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 224857b952d6SSatish Balay sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 2249cee3aa6bSSatish Balay for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2250ca161407SBarry Smith ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 225157b952d6SSatish Balay PetscFree(sndcounts); 2252d64ed03dSBarry Smith } else { 2253ca161407SBarry Smith ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 225457b952d6SSatish Balay } 225557b952d6SSatish Balay 225657b952d6SSatish Balay if (!rank) { 225757b952d6SSatish Balay /* calculate the number of nonzeros on each processor */ 225857b952d6SSatish Balay procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 225957b952d6SSatish Balay PetscMemzero(procsnz,size*sizeof(int)); 226057b952d6SSatish Balay for ( i=0; i<size; i++ ) { 226157b952d6SSatish Balay for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 226257b952d6SSatish Balay procsnz[i] += rowlengths[j]; 226357b952d6SSatish Balay } 226457b952d6SSatish Balay } 226557b952d6SSatish Balay PetscFree(rowlengths); 226657b952d6SSatish Balay 226757b952d6SSatish Balay /* determine max buffer needed and allocate it */ 226857b952d6SSatish Balay maxnz = 0; 226957b952d6SSatish Balay for ( i=0; i<size; i++ ) { 227057b952d6SSatish Balay maxnz = PetscMax(maxnz,procsnz[i]); 227157b952d6SSatish Balay } 227257b952d6SSatish Balay cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 227357b952d6SSatish Balay 227457b952d6SSatish Balay /* read in my part of the matrix column indices */ 227557b952d6SSatish Balay nz = procsnz[0]; 227657b952d6SSatish Balay ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 227757b952d6SSatish Balay mycols = ibuf; 2278cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 2279e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 2280cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2281cee3aa6bSSatish Balay 228257b952d6SSatish Balay /* read in every ones (except the last) and ship off */ 228357b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 228457b952d6SSatish Balay nz = procsnz[i]; 2285e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2286ca161407SBarry Smith ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 228757b952d6SSatish Balay } 228857b952d6SSatish Balay /* read in the stuff for the last proc */ 228957b952d6SSatish Balay if ( size != 1 ) { 229057b952d6SSatish Balay nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2291e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 229257b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2293ca161407SBarry Smith ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 229457b952d6SSatish Balay } 229557b952d6SSatish Balay PetscFree(cols); 2296d64ed03dSBarry Smith } else { 229757b952d6SSatish Balay /* determine buffer space needed for message */ 229857b952d6SSatish Balay nz = 0; 229957b952d6SSatish Balay for ( i=0; i<m; i++ ) { 230057b952d6SSatish Balay nz += locrowlens[i]; 230157b952d6SSatish Balay } 230257b952d6SSatish Balay ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 230357b952d6SSatish Balay mycols = ibuf; 230457b952d6SSatish Balay /* receive message of column indices*/ 2305ca161407SBarry Smith ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2306ca161407SBarry Smith ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2307a8c6a408SBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 230857b952d6SSatish Balay } 230957b952d6SSatish Balay 231057b952d6SSatish Balay /* loop over local rows, determining number of off diagonal entries */ 2311cee3aa6bSSatish Balay dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 2312cee3aa6bSSatish Balay odlens = dlens + (rend-rstart); 231357b952d6SSatish Balay mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 2314cee3aa6bSSatish Balay PetscMemzero(mask,3*Mbs*sizeof(int)); 231557b952d6SSatish Balay masked1 = mask + Mbs; 231657b952d6SSatish Balay masked2 = masked1 + Mbs; 231757b952d6SSatish Balay rowcount = 0; nzcount = 0; 231857b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 231957b952d6SSatish Balay dcount = 0; 232057b952d6SSatish Balay odcount = 0; 232157b952d6SSatish Balay for ( j=0; j<bs; j++ ) { 232257b952d6SSatish Balay kmax = locrowlens[rowcount]; 232357b952d6SSatish Balay for ( k=0; k<kmax; k++ ) { 232457b952d6SSatish Balay tmp = mycols[nzcount++]/bs; 232557b952d6SSatish Balay if (!mask[tmp]) { 232657b952d6SSatish Balay mask[tmp] = 1; 232757b952d6SSatish Balay if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 232857b952d6SSatish Balay else masked1[dcount++] = tmp; 232957b952d6SSatish Balay } 233057b952d6SSatish Balay } 233157b952d6SSatish Balay rowcount++; 233257b952d6SSatish Balay } 2333cee3aa6bSSatish Balay 233457b952d6SSatish Balay dlens[i] = dcount; 233557b952d6SSatish Balay odlens[i] = odcount; 2336cee3aa6bSSatish Balay 233757b952d6SSatish Balay /* zero out the mask elements we set */ 233857b952d6SSatish Balay for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 233957b952d6SSatish Balay for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 234057b952d6SSatish Balay } 2341cee3aa6bSSatish Balay 234257b952d6SSatish Balay /* create our matrix */ 2343537820f0SBarry Smith ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 2344537820f0SBarry Smith CHKERRQ(ierr); 234557b952d6SSatish Balay A = *newmat; 23466d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 234757b952d6SSatish Balay 234857b952d6SSatish Balay if (!rank) { 234957b952d6SSatish Balay buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 235057b952d6SSatish Balay /* read in my part of the matrix numerical values */ 235157b952d6SSatish Balay nz = procsnz[0]; 235257b952d6SSatish Balay vals = buf; 2353cee3aa6bSSatish Balay mycols = ibuf; 2354cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 2355e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2356cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2357537820f0SBarry Smith 235857b952d6SSatish Balay /* insert into matrix */ 235957b952d6SSatish Balay jj = rstart*bs; 236057b952d6SSatish Balay for ( i=0; i<m; i++ ) { 236157b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 236257b952d6SSatish Balay mycols += locrowlens[i]; 236357b952d6SSatish Balay vals += locrowlens[i]; 236457b952d6SSatish Balay jj++; 236557b952d6SSatish Balay } 236657b952d6SSatish Balay /* read in other processors (except the last one) and ship out */ 236757b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 236857b952d6SSatish Balay nz = procsnz[i]; 236957b952d6SSatish Balay vals = buf; 2370e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2371ca161407SBarry Smith ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 237257b952d6SSatish Balay } 237357b952d6SSatish Balay /* the last proc */ 237457b952d6SSatish Balay if ( size != 1 ){ 237557b952d6SSatish Balay nz = procsnz[i] - extra_rows; 2376cee3aa6bSSatish Balay vals = buf; 2377e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 237857b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2379ca161407SBarry Smith ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 238057b952d6SSatish Balay } 238157b952d6SSatish Balay PetscFree(procsnz); 2382d64ed03dSBarry Smith } else { 238357b952d6SSatish Balay /* receive numeric values */ 238457b952d6SSatish Balay buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 238557b952d6SSatish Balay 238657b952d6SSatish Balay /* receive message of values*/ 238757b952d6SSatish Balay vals = buf; 2388cee3aa6bSSatish Balay mycols = ibuf; 2389ca161407SBarry Smith ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2390ca161407SBarry Smith ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2391a8c6a408SBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 239257b952d6SSatish Balay 239357b952d6SSatish Balay /* insert into matrix */ 239457b952d6SSatish Balay jj = rstart*bs; 2395cee3aa6bSSatish Balay for ( i=0; i<m; i++ ) { 239657b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 239757b952d6SSatish Balay mycols += locrowlens[i]; 239857b952d6SSatish Balay vals += locrowlens[i]; 239957b952d6SSatish Balay jj++; 240057b952d6SSatish Balay } 240157b952d6SSatish Balay } 240257b952d6SSatish Balay PetscFree(locrowlens); 240357b952d6SSatish Balay PetscFree(buf); 240457b952d6SSatish Balay PetscFree(ibuf); 240557b952d6SSatish Balay PetscFree(rowners); 240657b952d6SSatish Balay PetscFree(dlens); 2407cee3aa6bSSatish Balay PetscFree(mask); 24086d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 24096d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 24103a40ed3dSBarry Smith PetscFunctionReturn(0); 241157b952d6SSatish Balay } 241257b952d6SSatish Balay 241357b952d6SSatish Balay 2414133cdb44SSatish Balay 2415133cdb44SSatish Balay #undef __FUNC__ 2416133cdb44SSatish Balay #define __FUNC__ "MatMPIBAIJSetHashTableFactor" 2417133cdb44SSatish Balay /*@ 2418133cdb44SSatish Balay MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2419133cdb44SSatish Balay 2420133cdb44SSatish Balay Input Parameters: 2421133cdb44SSatish Balay . mat - the matrix 2422133cdb44SSatish Balay . fact - factor 2423133cdb44SSatish Balay 2424fee21e36SBarry Smith Collective on Mat 2425fee21e36SBarry Smith 2426133cdb44SSatish Balay Notes: 2427133cdb44SSatish Balay This can also be set by the command line option: -mat_use_hash_table fact 2428133cdb44SSatish Balay 2429133cdb44SSatish Balay .keywords: matrix, hashtable, factor, HT 2430133cdb44SSatish Balay 2431133cdb44SSatish Balay .seealso: MatSetOption() 2432133cdb44SSatish Balay @*/ 2433133cdb44SSatish Balay int MatMPIBAIJSetHashTableFactor(Mat mat,double fact) 2434133cdb44SSatish Balay { 243525fdafccSSatish Balay Mat_MPIBAIJ *baij; 2436133cdb44SSatish Balay 2437133cdb44SSatish Balay PetscFunctionBegin; 2438133cdb44SSatish Balay PetscValidHeaderSpecific(mat,MAT_COOKIE); 243925fdafccSSatish Balay if (mat->type != MATMPIBAIJ) { 2440133cdb44SSatish Balay SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2441133cdb44SSatish Balay } 2442133cdb44SSatish Balay baij = (Mat_MPIBAIJ*) mat->data; 2443133cdb44SSatish Balay baij->ht_fact = fact; 2444133cdb44SSatish Balay PetscFunctionReturn(0); 2445133cdb44SSatish Balay } 2446